svn commit: r340436 - in head/sys/dev: netmap virtio/network

Wed Nov 14 19:44:39 UTC 2018

> Author: vmaffione
> Date: Wed Nov 14 15:39:48 2018
> New Revision: 340436
> URL: https://svnweb.freebsd.org/changeset/base/340436
> 
> Log:
>   vtnet: fix netmap support
>   
>   netmap(4) support for vtnet(4) was incomplete and had multiple bugs.
>   This commit fixes those bugs to bring netmap on vtnet in a functional state.
>   
>   Changelist:
>     - handle errors returned by virtqueue_enqueue() properly (they were
>       previously ignored)
>     - make sure netmap XOR rest of the kernel access each virtqueue.
>     - compute the number of netmap slots for TX and RX separately, according to
>       whether indirect descriptors are used or not for a given virtqueue.
>     - make sure sglist are freed according to their type (mbufs or netmap
>       buffers)
>     - add support for mulitiqueue and netmap host (aka sw) rings.
>     - intercept VQ interrupts directly instead of intercepting them in txq_eof
>       and rxq_eof. This simplifies the code and makes it easier to make sure
>       taskqueues are not running for a VQ while it is in netmap mode.
>     - implement vntet_netmap_config() to cope with changes in the number of queues.
>   
>   Reviewed by:	bryanv
>   Approved by:	gnn (mentor)
>   MFC after:	3 days
>   Sponsored by:	Sunny Valley Networks
>   Differential Revision:	https://reviews.freebsd.org/D17916

I would like to get some wider test of this in ^head/
specifically with use in a bhyve guest before we do
an early merge to stable/12 so that this can be in
the next build.

If you are capable of testing this within bhyve as a guest
please do so and provide feedback.  It does not need to be
a ^/head host, just the guest needs to be using vtnet
nic's.

Thanks,
Rod

> 
> Modified:
>   head/sys/dev/netmap/if_vtnet_netmap.h
>   head/sys/dev/virtio/network/if_vtnet.c
>   head/sys/dev/virtio/network/if_vtnetvar.h
> 
> Modified: head/sys/dev/netmap/if_vtnet_netmap.h
> ==============================================================================
> --- head/sys/dev/netmap/if_vtnet_netmap.h	Wed Nov 14 15:23:39 2018	(r340435)
> +++ head/sys/dev/netmap/if_vtnet_netmap.h	Wed Nov 14 15:39:48 2018	(r340436)
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (C) 2014 Vincenzo Maffione, Luigi Rizzo. All rights reserved.
> + * Copyright (C) 2014-2018 Vincenzo Maffione, Luigi Rizzo.
>   *
>   * Redistribution and use in source and binary forms, with or without
>   * modification, are permitted provided that the following conditions
> @@ -33,74 +33,148 @@
>  #include <vm/pmap.h>    /* vtophys ? */
>  #include <dev/netmap/netmap_kern.h>
>  
> +/*
> + * Return 1 if the queue identified by 't' and 'idx' is in netmap mode.
> + */
> +static int
> +vtnet_netmap_queue_on(struct vtnet_softc *sc, enum txrx t, int idx)
> +{
> +	struct netmap_adapter *na = NA(sc->vtnet_ifp);
>  
> -#define SOFTC_T	vtnet_softc
> +	if (!nm_native_on(na))
> +		return 0;
>  
> -/* Free all the unused buffer in all the RX virtqueues.
> - * This function is called when entering and exiting netmap mode.
> - * - buffers queued by the virtio driver return skbuf/mbuf pointer
> - *   and need to be freed;
> - * - buffers queued by netmap return the txq/rxq, and do not need work
> - */
> +	if (t == NR_RX)
> +		return !!(idx < na->num_rx_rings &&
> +			na->rx_rings[idx]->nr_mode == NKR_NETMAP_ON);
> +
> +	return !!(idx < na->num_tx_rings &&
> +		na->tx_rings[idx]->nr_mode == NKR_NETMAP_ON);
> +}
> +
>  static void
> -vtnet_netmap_free_bufs(struct SOFTC_T* sc)
> +vtnet_free_used(struct virtqueue *vq, int netmap_bufs, enum txrx t, int idx)
>  {
> -	int i, nmb = 0, n = 0, last;
> +	void *cookie;
> +	int deq = 0;
>  
> -	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
> -		struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i];
> -		struct virtqueue *vq;
> -		struct mbuf *m;
> -		struct vtnet_txq *txq = &sc->vtnet_txqs[i];
> -                struct vtnet_tx_header *txhdr;
> +	while ((cookie = virtqueue_dequeue(vq, NULL)) != NULL) {
> +		if (netmap_bufs) {
> +			/* These are netmap buffers: there is nothing to do. */
> +		} else {
> +			/* These are mbufs that we need to free. */
> +			struct mbuf *m;
>  
> -		last = 0;
> -		vq = rxq->vtnrx_vq;
> -		while ((m = virtqueue_drain(vq, &last)) != NULL) {
> -			n++;
> -			if (m != (void *)rxq)
> +			if (t == NR_TX) {
> +				struct vtnet_tx_header *txhdr = cookie;
> +				m = txhdr->vth_mbuf;
>  				m_freem(m);
> -			else
> -				nmb++;
> -		}
> -
> -		last = 0;
> -		vq = txq->vtntx_vq;
> -		while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
> -			n++;
> -			if (txhdr != (void *)txq) {
> -				m_freem(txhdr->vth_mbuf);
>  				uma_zfree(vtnet_tx_header_zone, txhdr);
> -			} else
> -				nmb++;
> +			} else {
> +				m = cookie;
> +				m_freem(m);
> +			}
>  		}
> +		deq++;
>  	}
> -	D("freed %d mbufs, %d netmap bufs on %d queues",
> -		n - nmb, nmb, i);
> +
> +	if (deq)
> +		nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)\n",
> +			 deq, nm_txrx2str(t), idx, netmap_bufs);
>  }
>  
>  /* Register and unregister. */
>  static int
> -vtnet_netmap_reg(struct netmap_adapter *na, int onoff)
> +vtnet_netmap_reg(struct netmap_adapter *na, int state)
>  {
> -        struct ifnet *ifp = na->ifp;
> -	struct SOFTC_T *sc = ifp->if_softc;
> +	struct ifnet *ifp = na->ifp;
> +	struct vtnet_softc *sc = ifp->if_softc;
> +	int success;
> +	enum txrx t;
> +	int i;
>  
> +	/* Drain the taskqueues to make sure that there are no worker threads
> +	 * accessing the virtqueues. */
> +	vtnet_drain_taskqueues(sc);
> +
>  	VTNET_CORE_LOCK(sc);
> -	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
> -	/* enable or disable flags and callbacks in na and ifp */
> -	if (onoff) {
> +
> +	/* We need nm_netmap_on() to return true when called by
> +	 * vtnet_init_locked() below. */
> +	if (state)
>  		nm_set_native_flags(na);
> +
> +	/* We need to trigger a device reset in order to unexpose guest buffers
> +	 * published to the host. */
> +	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
> +	/* Get pending used buffers. The way they are freed depends on whether
> +	 * they are netmap buffer or they are mbufs. We can tell apart the two
> +	 * cases by looking at kring->nr_mode, before this is possibly updated
> +	 * in the loop below. */
> +	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
> +		struct vtnet_txq *txq = &sc->vtnet_txqs[i];
> +		struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i];
> +		struct netmap_kring *kring;
> +
> +		VTNET_TXQ_LOCK(txq);
> +		kring = NMR(na, NR_TX)[i];
> +		vtnet_free_used(txq->vtntx_vq,
> +				kring->nr_mode == NKR_NETMAP_ON, NR_TX, i);
> +		VTNET_TXQ_UNLOCK(txq);
> +
> +		VTNET_RXQ_LOCK(rxq);
> +		kring = NMR(na, NR_RX)[i];
> +		vtnet_free_used(rxq->vtnrx_vq,
> +				kring->nr_mode == NKR_NETMAP_ON, NR_RX, i);
> +		VTNET_RXQ_UNLOCK(rxq);
> +	}
> +	vtnet_init_locked(sc);
> +	success = (ifp->if_drv_flags & IFF_DRV_RUNNING) ? 0 : ENXIO;
> +
> +	if (state) {
> +		for_rx_tx(t) {
> +			/* Hardware rings. */
> +			for (i = 0; i < nma_get_nrings(na, t); i++) {
> +				struct netmap_kring *kring = NMR(na, t)[i];
> +
> +				if (nm_kring_pending_on(kring))
> +					kring->nr_mode = NKR_NETMAP_ON;
> +			}
> +
> +			/* Host rings. */
> +			for (i = 0; i < nma_get_host_nrings(na, t); i++) {
> +				struct netmap_kring *kring =
> +					NMR(na, t)[nma_get_nrings(na, t) + i];
> +
> +				if (nm_kring_pending_on(kring))
> +					kring->nr_mode = NKR_NETMAP_ON;
> +			}
> +		}
>  	} else {
>  		nm_clear_native_flags(na);
> +		for_rx_tx(t) {
> +			/* Hardware rings. */
> +			for (i = 0; i < nma_get_nrings(na, t); i++) {
> +				struct netmap_kring *kring = NMR(na, t)[i];
> +
> +				if (nm_kring_pending_off(kring))
> +					kring->nr_mode = NKR_NETMAP_OFF;
> +			}
> +
> +			/* Host rings. */
> +			for (i = 0; i < nma_get_host_nrings(na, t); i++) {
> +				struct netmap_kring *kring =
> +					NMR(na, t)[nma_get_nrings(na, t) + i];
> +
> +				if (nm_kring_pending_off(kring))
> +					kring->nr_mode = NKR_NETMAP_OFF;
> +			}
> +		}
>  	}
> -	/* drain queues so netmap and native drivers
> -	 * do not interfere with each other
> -	 */
> -	vtnet_netmap_free_bufs(sc);
> -        vtnet_init_locked(sc);       /* also enable intr */
> -        VTNET_CORE_UNLOCK(sc);
> -        return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1);
> +
> +	VTNET_CORE_UNLOCK(sc);
> +
> +	return success;
>  }
>  
>  
> @@ -109,20 +183,19 @@ static int
>  vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
>  {
>  	struct netmap_adapter *na = kring->na;
> -        struct ifnet *ifp = na->ifp;
> +	struct ifnet *ifp = na->ifp;
>  	struct netmap_ring *ring = kring->ring;
>  	u_int ring_nr = kring->ring_id;
>  	u_int nm_i;	/* index into the netmap ring */
> -	u_int nic_i;	/* index into the NIC ring */
> -	u_int n;
>  	u_int const lim = kring->nkr_num_slots - 1;
>  	u_int const head = kring->rhead;
>  
>  	/* device-specific */
> -	struct SOFTC_T *sc = ifp->if_softc;
> +	struct vtnet_softc *sc = ifp->if_softc;
>  	struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr];
>  	struct virtqueue *vq = txq->vtntx_vq;
>  	int interrupts = !(kring->nr_kflags & NKR_NOINTR);
> +	u_int n;
>  
>  	/*
>  	 * First part: process new packets to send.
> @@ -133,15 +206,13 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int fl
>  	if (nm_i != head) {	/* we have new packets to send */
>  		struct sglist *sg = txq->vtntx_sg;
>  
> -		nic_i = netmap_idx_k2n(kring, nm_i);
> -		for (n = 0; nm_i != head; n++) {
> +		for (; nm_i != head; nm_i = nm_next(nm_i, lim)) {
>  			/* we use an empty header here */
> -			static struct virtio_net_hdr_mrg_rxbuf hdr;
>  			struct netmap_slot *slot = &ring->slot[nm_i];
>  			u_int len = slot->len;
>  			uint64_t paddr;
>  			void *addr = PNMB(na, slot, &paddr);
> -                        int err;
> +			int err;
>  
>  			NM_CHECK_ADDR_LEN(na, addr, len);
>  
> @@ -150,88 +221,63 @@ vtnet_netmap_txsync(struct netmap_kring *kring, int fl
>  			 * and kick the hypervisor (if necessary).
>  			 */
>  			sglist_reset(sg); // cheap
> -			// if vtnet_hdr_size > 0 ...
> -			err = sglist_append(sg, &hdr, sc->vtnet_hdr_size);
> -			// XXX later, support multi segment
> -			err = sglist_append_phys(sg, paddr, len);
> -			/* use na as the cookie */
> -                        err = virtqueue_enqueue(vq, txq, sg, sg->sg_nseg, 0);
> -                        if (unlikely(err < 0)) {
> -                                D("virtqueue_enqueue failed");
> -                                break;
> -                        }
> -
> -			nm_i = nm_next(nm_i, lim);
> -			nic_i = nm_next(nic_i, lim);
> +			err = sglist_append(sg, &txq->vtntx_shrhdr, sc->vtnet_hdr_size);
> +			err |= sglist_append_phys(sg, paddr, len);
> +			KASSERT(err == 0, ("%s: cannot append to sglist %d",
> +						__func__, err));
> +			err = virtqueue_enqueue(vq, /*cookie=*/txq, sg,
> +						/*readable=*/sg->sg_nseg,
> +						/*writeable=*/0);
> +			if (unlikely(err)) {
> +				if (err != ENOSPC)
> +					nm_prerr("virtqueue_enqueue(%s) failed: %d\n",
> +							kring->name, err);
> +				break;
> +			}
>  		}
> -		/* Update hwcur depending on where we stopped. */
> -		kring->nr_hwcur = nm_i; /* note we migth break early */
>  
> -		/* No more free TX slots? Ask the hypervisor for notifications,
> -		 * possibly only when a considerable amount of work has been
> -		 * done.
> -		 */
> -		ND(3,"sent %d packets, hwcur %d", n, nm_i);
> -		virtqueue_disable_intr(vq);
>  		virtqueue_notify(vq);
> -	} else {
> -		if (ring->head != ring->tail)
> -		    ND(5, "pure notify ? head %d tail %d nused %d %d",
> -			ring->head, ring->tail, virtqueue_nused(vq),
> -			(virtqueue_dump(vq), 1));
> -		virtqueue_notify(vq);
> -		if (interrupts) {
> -			virtqueue_enable_intr(vq); // like postpone with 0
> -		}
> +
> +		/* Update hwcur depending on where we stopped. */
> +		kring->nr_hwcur = nm_i; /* note we migth break early */
>  	}
>  
> -
> -        /* Free used slots. We only consider our own used buffers, recognized
> -	 * by the token we passed to virtqueue_add_outbuf.
> +	/* Free used slots. We only consider our own used buffers, recognized
> +	 * by the token we passed to virtqueue_enqueue.
>  	 */
> -        n = 0;
> -        for (;;) {
> -                struct vtnet_tx_header *txhdr = virtqueue_dequeue(vq, NULL);
> -                if (txhdr == NULL)
> -                        break;
> -                if (likely(txhdr == (void *)txq)) {
> -                        n++;
> -			if (virtqueue_nused(vq) < 32) { // XXX slow release
> -				break;
> -			}
> -		} else { /* leftover from previous transmission */
> -			m_freem(txhdr->vth_mbuf);
> -			uma_zfree(vtnet_tx_header_zone, txhdr);
> -		}
> -        }
> -	if (n) {
> +	n = 0;
> +	for (;;) {
> +		void *token = virtqueue_dequeue(vq, NULL);
> +		if (token == NULL)
> +			break;
> +		if (unlikely(token != (void *)txq))
> +			nm_prerr("BUG: TX token mismatch\n");
> +		else
> +			n++;
> +	}
> +	if (n > 0) {
>  		kring->nr_hwtail += n;
>  		if (kring->nr_hwtail > lim)
>  			kring->nr_hwtail -= lim + 1;
>  	}
> -	if (nm_i != kring->nr_hwtail /* && vtnet_txq_below_threshold(txq) == 0*/) {
> -		ND(3, "disable intr, hwcur %d", nm_i);
> -		virtqueue_disable_intr(vq);
> -	} else if (interrupts) {
> -		ND(3, "enable intr, hwcur %d", nm_i);
> -		virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT);
> -	}
>  
> -        return 0;
> +	if (interrupts && virtqueue_nfree(vq) < 32)
> +		virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG);
> +
> +	return 0;
>  }
>  
>  static int
> -vtnet_refill_rxq(struct netmap_kring *kring, u_int nm_i, u_int head)
> +vtnet_netmap_kring_refill(struct netmap_kring *kring, u_int nm_i, u_int head)
>  {
>  	struct netmap_adapter *na = kring->na;
> -        struct ifnet *ifp = na->ifp;
> +	struct ifnet *ifp = na->ifp;
>  	struct netmap_ring *ring = kring->ring;
>  	u_int ring_nr = kring->ring_id;
>  	u_int const lim = kring->nkr_num_slots - 1;
> -	u_int n;
>  
>  	/* device-specific */
> -	struct SOFTC_T *sc = ifp->if_softc;
> +	struct vtnet_softc *sc = ifp->if_softc;
>  	struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr];
>  	struct virtqueue *vq = rxq->vtnrx_vq;
>  
> @@ -239,12 +285,11 @@ vtnet_refill_rxq(struct netmap_kring *kring, u_int nm_
>  	struct sglist_seg ss[2];
>  	struct sglist sg = { ss, 0, 0, 2 };
>  
> -	for (n = 0; nm_i != head; n++) {
> -		static struct virtio_net_hdr_mrg_rxbuf hdr;
> +	for (; nm_i != head; nm_i = nm_next(nm_i, lim)) {
>  		struct netmap_slot *slot = &ring->slot[nm_i];
>  		uint64_t paddr;
>  		void *addr = PNMB(na, slot, &paddr);
> -		int err = 0;
> +		int err;
>  
>  		if (addr == NETMAP_BUF_BASE(na)) { /* bad buf */
>  			if (netmap_ring_reinit(kring))
> @@ -252,99 +297,134 @@ vtnet_refill_rxq(struct netmap_kring *kring, u_int nm_
>  		}
>  
>  		slot->flags &= ~NS_BUF_CHANGED;
> -		sglist_reset(&sg); // cheap
> -		err = sglist_append(&sg, &hdr, sc->vtnet_hdr_size);
> -		err = sglist_append_phys(&sg, paddr, NETMAP_BUF_SIZE(na));
> +		sglist_reset(&sg);
> +		err = sglist_append(&sg, &rxq->vtnrx_shrhdr, sc->vtnet_hdr_size);
> +		err |= sglist_append_phys(&sg, paddr, NETMAP_BUF_SIZE(na));
> +		KASSERT(err == 0, ("%s: cannot append to sglist %d",
> +					__func__, err));
>  		/* writable for the host */
> -		err = virtqueue_enqueue(vq, rxq, &sg, 0, sg.sg_nseg);
> -		if (err < 0) {
> -			D("virtqueue_enqueue failed");
> +		err = virtqueue_enqueue(vq, /*cookie=*/rxq, &sg,
> +				/*readable=*/0, /*writeable=*/sg.sg_nseg);
> +		if (unlikely(err)) {
> +			if (err != ENOSPC)
> +				nm_prerr("virtqueue_enqueue(%s) failed: %d\n",
> +					kring->name, err);
>  			break;
>  		}
> -		nm_i = nm_next(nm_i, lim);
>  	}
> +
>  	return nm_i;
>  }
>  
> +/*
> + * Publish netmap buffers on a RX virtqueue.
> + * Returns -1 if this virtqueue is not being opened in netmap mode.
> + * If the virtqueue is being opened in netmap mode, return 0 on success and
> + * a positive error code on failure.
> + */
> +static int
> +vtnet_netmap_rxq_populate(struct vtnet_rxq *rxq)
> +{
> +	struct netmap_adapter *na = NA(rxq->vtnrx_sc->vtnet_ifp);
> +	struct netmap_kring *kring;
> +	int error;
> +
> +	if (!nm_native_on(na) || rxq->vtnrx_id >= na->num_rx_rings)
> +		return -1;
> +
> +	kring = na->rx_rings[rxq->vtnrx_id];
> +	if (!(nm_kring_pending_on(kring) ||
> +			kring->nr_pending_mode == NKR_NETMAP_ON))
> +		return -1;
> +
> +	/* Expose all the RX netmap buffers. Note that the number of
> +	 * netmap slots in the RX ring matches the maximum number of
> +	 * 2-elements sglist that the RX virtqueue can accommodate. */
> +	error = vtnet_netmap_kring_refill(kring, 0, na->num_rx_desc);
> +	virtqueue_notify(rxq->vtnrx_vq);
> +
> +	return error < 0 ? ENXIO : 0;
> +}
> +
>  /* Reconcile kernel and user view of the receive ring. */
>  static int
>  vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
>  {
>  	struct netmap_adapter *na = kring->na;
> -        struct ifnet *ifp = na->ifp;
> +	struct ifnet *ifp = na->ifp;
>  	struct netmap_ring *ring = kring->ring;
>  	u_int ring_nr = kring->ring_id;
>  	u_int nm_i;	/* index into the netmap ring */
> -	// u_int nic_i;	/* index into the NIC ring */
> -	u_int n;
>  	u_int const lim = kring->nkr_num_slots - 1;
>  	u_int const head = kring->rhead;
> -	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
> +	int force_update = (flags & NAF_FORCE_READ) ||
> +				(kring->nr_kflags & NKR_PENDINTR);
>  	int interrupts = !(kring->nr_kflags & NKR_NOINTR);
>  
>  	/* device-specific */
> -	struct SOFTC_T *sc = ifp->if_softc;
> +	struct vtnet_softc *sc = ifp->if_softc;
>  	struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr];
>  	struct virtqueue *vq = rxq->vtnrx_vq;
>  
> -	/* XXX netif_carrier_ok ? */
> -
> -	if (head > lim)
> -		return netmap_ring_reinit(kring);
> -
>  	rmb();
>  	/*
>  	 * First part: import newly received packets.
> -	 * Only accept our
> -	 * own buffers (matching the token). We should only get
> -	 * matching buffers, because of vtnet_netmap_free_rx_unused_bufs()
> -	 * and vtnet_netmap_init_buffers().
> +	 * Only accept our own buffers (matching the token). We should only get
> +	 * matching buffers. We may need to stop early to avoid hwtail to overrun
> +	 * hwcur.
>  	 */
>  	if (netmap_no_pendintr || force_update) {
> -                struct netmap_adapter *token;
> +		uint32_t hwtail_lim = nm_prev(kring->nr_hwcur, lim);
> +		void *token;
>  
> -                nm_i = kring->nr_hwtail;
> -                n = 0;
> -		for (;;) {
> +		vtnet_rxq_disable_intr(rxq);
> +
> +		nm_i = kring->nr_hwtail;
> +		while (nm_i != hwtail_lim) {
>  			int len;
> -                        token = virtqueue_dequeue(vq, &len);
> -                        if (token == NULL)
> -                                break;
> -                        if (likely(token == (void *)rxq)) {
> -                            ring->slot[nm_i].len = len;
> -                            ring->slot[nm_i].flags = 0;
> -                            nm_i = nm_next(nm_i, lim);
> -                            n++;
> -                        } else {
> -			    D("This should not happen");
> -                        }
> +			token = virtqueue_dequeue(vq, &len);
> +			if (token == NULL) {
> +				if (interrupts && vtnet_rxq_enable_intr(rxq)) {
> +					vtnet_rxq_disable_intr(rxq);
> +					continue;
> +				}
> +				break;
> +			}
> +			if (unlikely(token != (void *)rxq)) {
> +				nm_prerr("BUG: RX token mismatch\n");
> +			} else {
> +				/* Skip the virtio-net header. */
> +				len -= sc->vtnet_hdr_size;
> +				if (unlikely(len < 0)) {
> +					RD(1, "Truncated virtio-net-header, "
> +						"missing %d bytes", -len);
> +					len = 0;
> +				}
> +				ring->slot[nm_i].len = len;
> +				ring->slot[nm_i].flags = 0;
> +				nm_i = nm_next(nm_i, lim);
> +			}
>  		}
>  		kring->nr_hwtail = nm_i;
>  		kring->nr_kflags &= ~NKR_PENDINTR;
>  	}
> -        ND("[B] h %d c %d hwcur %d hwtail %d",
> -		ring->head, ring->cur, kring->nr_hwcur,
> -			      kring->nr_hwtail);
> +	ND("[B] h %d c %d hwcur %d hwtail %d", ring->head, ring->cur,
> +				kring->nr_hwcur, kring->nr_hwtail);
>  
>  	/*
>  	 * Second part: skip past packets that userspace has released.
>  	 */
>  	nm_i = kring->nr_hwcur; /* netmap ring index */
>  	if (nm_i != head) {
> -		int err = vtnet_refill_rxq(kring, nm_i, head);
> -		if (err < 0)
> -			return 1;
> -		kring->nr_hwcur = err;
> +		int nm_j = vtnet_netmap_kring_refill(kring, nm_i, head);
> +		if (nm_j < 0)
> +			return nm_j;
> +		kring->nr_hwcur = nm_j;
>  		virtqueue_notify(vq);
> -		/* After draining the queue may need an intr from the hypervisor */
> -		if (interrupts) {
> -			vtnet_rxq_enable_intr(rxq);
> -		}
>  	}
>  
> -        ND("[C] h %d c %d t %d hwcur %d hwtail %d",
> -		ring->head, ring->cur, ring->tail,
> -		kring->nr_hwcur, kring->nr_hwtail);
> +	ND("[C] h %d c %d t %d hwcur %d hwtail %d", ring->head, ring->cur,
> +		ring->tail, kring->nr_hwcur, kring->nr_hwtail);
>  
>  	return 0;
>  }
> @@ -352,9 +432,9 @@ vtnet_netmap_rxsync(struct netmap_kring *kring, int fl
>  
>  /* Enable/disable interrupts on all virtqueues. */
>  static void
> -vtnet_netmap_intr(struct netmap_adapter *na, int onoff)
> +vtnet_netmap_intr(struct netmap_adapter *na, int state)
>  {
> -	struct SOFTC_T *sc = na->ifp->if_softc;
> +	struct vtnet_softc *sc = na->ifp->if_softc;
>  	int i;
>  
>  	for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
> @@ -362,7 +442,7 @@ vtnet_netmap_intr(struct netmap_adapter *na, int onoff
>  		struct vtnet_txq *txq = &sc->vtnet_txqs[i];
>  		struct virtqueue *txvq = txq->vtntx_vq;
>  
> -		if (onoff) {
> +		if (state) {
>  			vtnet_rxq_enable_intr(rxq);
>  			virtqueue_enable_intr(txvq);
>  		} else {
> @@ -372,60 +452,88 @@ vtnet_netmap_intr(struct netmap_adapter *na, int onoff
>  	}
>  }
>  
> -/* Make RX virtqueues buffers pointing to netmap buffers. */
>  static int
> -vtnet_netmap_init_rx_buffers(struct SOFTC_T *sc)
> +vtnet_netmap_tx_slots(struct vtnet_softc *sc)
>  {
> -	struct ifnet *ifp = sc->vtnet_ifp;
> -	struct netmap_adapter* na = NA(ifp);
> -	unsigned int r;
> +	int div;
>  
> -	if (!nm_native_on(na))
> -		return 0;
> -	for (r = 0; r < na->num_rx_rings; r++) {
> -                struct netmap_kring *kring = na->rx_rings[r];
> -		struct vtnet_rxq *rxq = &sc->vtnet_rxqs[r];
> -		struct virtqueue *vq = rxq->vtnrx_vq;
> -	        struct netmap_slot* slot;
> -		int err = 0;
> +	/* We need to prepend a virtio-net header to each netmap buffer to be
> +	 * transmitted, therefore calling virtqueue_enqueue() passing sglist
> +	 * with 2 elements.
> +	 * TX virtqueues use indirect descriptors if the feature was negotiated
> +	 * with the host, and if sc->vtnet_tx_nsegs > 1. With indirect
> +	 * descriptors, a single virtio descriptor is sufficient to reference
> +	 * each TX sglist. Without them, we need two separate virtio descriptors
> +	 * for each TX sglist. We therefore compute the number of netmap TX
> +	 * slots according to these assumptions.
> +	 */
> +	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) && sc->vtnet_tx_nsegs > 1)
> +		div = 1;
> +	else
> +		div = 2;
>  
> -		slot = netmap_reset(na, NR_RX, r, 0);
> -		if (!slot) {
> -			D("strange, null netmap ring %d", r);
> -			return 0;
> -		}
> -		/* Add up to na>-num_rx_desc-1 buffers to this RX virtqueue.
> -		 * It's important to leave one virtqueue slot free, otherwise
> -		 * we can run into ring->cur/ring->tail wraparounds.
> -		 */
> -		err = vtnet_refill_rxq(kring, 0, na->num_rx_desc-1);
> -		if (err < 0)
> -			return 0;
> -		virtqueue_notify(vq);
> -	}
> +	return virtqueue_size(sc->vtnet_txqs[0].vtntx_vq) / div;
> +}
>  
> -	return 1;
> +static int
> +vtnet_netmap_rx_slots(struct vtnet_softc *sc)
> +{
> +	int div;
> +
> +	/* We need to prepend a virtio-net header to each netmap buffer to be
> +	 * received, therefore calling virtqueue_enqueue() passing sglist
> +	 * with 2 elements.
> +	 * RX virtqueues use indirect descriptors if the feature was negotiated
> +	 * with the host, and if sc->vtnet_rx_nsegs > 1. With indirect
> +	 * descriptors, a single virtio descriptor is sufficient to reference
> +	 * each RX sglist. Without them, we need two separate virtio descriptors
> +	 * for each RX sglist. We therefore compute the number of netmap RX
> +	 * slots according to these assumptions.
> +	 */
> +	if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) && sc->vtnet_rx_nsegs > 1)
> +		div = 1;
> +	else
> +		div = 2;
> +
> +	return virtqueue_size(sc->vtnet_rxqs[0].vtnrx_vq) / div;
>  }
>  
> +static int
> +vtnet_netmap_config(struct netmap_adapter *na, struct nm_config_info *info)
> +{
> +	struct vtnet_softc *sc = na->ifp->if_softc;
> +
> +	info->num_tx_rings = sc->vtnet_act_vq_pairs;
> +	info->num_rx_rings = sc->vtnet_act_vq_pairs;
> +	info->num_tx_descs = vtnet_netmap_tx_slots(sc);
> +	info->num_rx_descs = vtnet_netmap_rx_slots(sc);
> +	info->rx_buf_maxsize = NETMAP_BUF_SIZE(na);
> +
> +	return 0;
> +}
> +
>  static void
> -vtnet_netmap_attach(struct SOFTC_T *sc)
> +vtnet_netmap_attach(struct vtnet_softc *sc)
>  {
>  	struct netmap_adapter na;
>  
>  	bzero(&na, sizeof(na));
>  
>  	na.ifp = sc->vtnet_ifp;
> -	na.num_tx_desc =  1024;// sc->vtnet_rx_nmbufs;
> -	na.num_rx_desc =  1024; // sc->vtnet_rx_nmbufs;
> +	na.na_flags = 0;
> +	na.num_tx_desc = vtnet_netmap_tx_slots(sc);
> +	na.num_rx_desc = vtnet_netmap_rx_slots(sc);
> +	na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs;
> +	na.rx_buf_maxsize = 0;
>  	na.nm_register = vtnet_netmap_reg;
>  	na.nm_txsync = vtnet_netmap_txsync;
>  	na.nm_rxsync = vtnet_netmap_rxsync;
>  	na.nm_intr = vtnet_netmap_intr;
> -	na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs;
> -	D("max rings %d", sc->vtnet_max_vq_pairs);
> +	na.nm_config = vtnet_netmap_config;
> +
>  	netmap_attach(&na);
>  
> -        D("virtio attached txq=%d, txd=%d rxq=%d, rxd=%d",
> +	nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d\n",
>  			na.num_tx_rings, na.num_tx_desc,
>  			na.num_tx_rings, na.num_rx_desc);
>  }
> 
> Modified: head/sys/dev/virtio/network/if_vtnet.c
> ==============================================================================
> --- head/sys/dev/virtio/network/if_vtnet.c	Wed Nov 14 15:23:39 2018	(r340435)
> +++ head/sys/dev/virtio/network/if_vtnet.c	Wed Nov 14 15:39:48 2018	(r340436)
> @@ -1192,6 +1192,12 @@ vtnet_rxq_populate(struct vtnet_rxq *rxq)
>  	struct virtqueue *vq;
>  	int nbufs, error;
>  
> +#ifdef DEV_NETMAP
> +	error = vtnet_netmap_rxq_populate(rxq);
> +	if (error >= 0)
> +		return (error);
> +#endif  /* DEV_NETMAP */
> +
>  	vq = rxq->vtnrx_vq;
>  	error = ENOSPC;
>  
> @@ -1221,12 +1227,20 @@ vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
>  	struct virtqueue *vq;
>  	struct mbuf *m;
>  	int last;
> +#ifdef DEV_NETMAP
> +	int netmap_bufs = vtnet_netmap_queue_on(rxq->vtnrx_sc, NR_RX,
> +						rxq->vtnrx_id);
> +#else  /* !DEV_NETMAP */
> +	int netmap_bufs = 0;
> +#endif /* !DEV_NETMAP */
>  
>  	vq = rxq->vtnrx_vq;
>  	last = 0;
>  
> -	while ((m = virtqueue_drain(vq, &last)) != NULL)
> -		m_freem(m);
> +	while ((m = virtqueue_drain(vq, &last)) != NULL) {
> +		if (!netmap_bufs)
> +			m_freem(m);
> +	}
>  
>  	KASSERT(virtqueue_empty(vq),
>  	    ("%s: mbufs remaining in rx queue %p", __func__, rxq));
> @@ -1772,12 +1786,6 @@ vtnet_rxq_eof(struct vtnet_rxq *rxq)
>  
>  	VTNET_RXQ_LOCK_ASSERT(rxq);
>  
> -#ifdef DEV_NETMAP
> -	if (netmap_rx_irq(ifp, 0, &deq)) {
> -		return (FALSE);
> -	}
> -#endif /* DEV_NETMAP */
> -
>  	while (count-- > 0) {
>  		m = virtqueue_dequeue(vq, &len);
>  		if (m == NULL)
> @@ -1871,6 +1879,11 @@ vtnet_rx_vq_intr(void *xrxq)
>  		return;
>  	}
>  
> +#ifdef DEV_NETMAP
> +	if (netmap_rx_irq(ifp, rxq->vtnrx_id, &more) != NM_IRQ_PASS)
> +		return;
> +#endif /* DEV_NETMAP */
> +
>  	VTNET_RXQ_LOCK(rxq);
>  
>  again:
> @@ -1971,13 +1984,21 @@ vtnet_txq_free_mbufs(struct vtnet_txq *txq)
>  	struct virtqueue *vq;
>  	struct vtnet_tx_header *txhdr;
>  	int last;
> +#ifdef DEV_NETMAP
> +	int netmap_bufs = vtnet_netmap_queue_on(txq->vtntx_sc, NR_TX,
> +						txq->vtntx_id);
> +#else  /* !DEV_NETMAP */
> +	int netmap_bufs = 0;
> +#endif /* !DEV_NETMAP */
>  
>  	vq = txq->vtntx_vq;
>  	last = 0;
>  
>  	while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
> -		m_freem(txhdr->vth_mbuf);
> -		uma_zfree(vtnet_tx_header_zone, txhdr);
> +		if (!netmap_bufs) {
> +			m_freem(txhdr->vth_mbuf);
> +			uma_zfree(vtnet_tx_header_zone, txhdr);
> +		}
>  	}
>  
>  	KASSERT(virtqueue_empty(vq),
> @@ -2465,13 +2486,6 @@ vtnet_txq_eof(struct vtnet_txq *txq)
>  	deq = 0;
>  	VTNET_TXQ_LOCK_ASSERT(txq);
>  
> -#ifdef DEV_NETMAP
> -	if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) {
> -		virtqueue_disable_intr(vq); // XXX luigi
> -		return 0; // XXX or 1 ?
> -	}
> -#endif /* DEV_NETMAP */
> -
>  	while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
>  		m = txhdr->vth_mbuf;
>  		deq++;
> @@ -2513,6 +2527,11 @@ vtnet_tx_vq_intr(void *xtxq)
>  		return;
>  	}
>  
> +#ifdef DEV_NETMAP
> +	if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
> +		return;
> +#endif /* DEV_NETMAP */
> +
>  	VTNET_TXQ_LOCK(txq);
>  
>  	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
> @@ -2769,11 +2788,6 @@ vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
>  	struct vtnet_txq *txq;
>  	int i;
>  
> -#ifdef DEV_NETMAP
> -	if (nm_native_on(NA(sc->vtnet_ifp)))
> -		return;
> -#endif /* DEV_NETMAP */
> -
>  	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
>  		rxq = &sc->vtnet_rxqs[i];
>  		vtnet_rxq_free_mbufs(rxq);
> @@ -2938,11 +2952,6 @@ vtnet_init_rx_queues(struct vtnet_softc *sc)
>  	    ("%s: too many rx mbufs %d for %d segments", __func__,
>  	    sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
>  
> -#ifdef DEV_NETMAP
> -	if (vtnet_netmap_init_rx_buffers(sc))
> -		return 0;
> -#endif /* DEV_NETMAP */
> -
>  	for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
>  		rxq = &sc->vtnet_rxqs[i];
>  
> @@ -3092,13 +3101,6 @@ vtnet_init(void *xsc)
>  	struct vtnet_softc *sc;
>  
>  	sc = xsc;
> -
> -#ifdef DEV_NETMAP
> -	if (!NA(sc->vtnet_ifp)) {
> -		D("try to attach again");
> -		vtnet_netmap_attach(sc);
> -	}
> -#endif /* DEV_NETMAP */
>  
>  	VTNET_CORE_LOCK(sc);
>  	vtnet_init_locked(sc);
> 
> Modified: head/sys/dev/virtio/network/if_vtnetvar.h
> ==============================================================================
> --- head/sys/dev/virtio/network/if_vtnetvar.h	Wed Nov 14 15:23:39 2018	(r340435)
> +++ head/sys/dev/virtio/network/if_vtnetvar.h	Wed Nov 14 15:39:48 2018	(r340436)
> @@ -79,6 +79,9 @@ struct vtnet_rxq {
>  	struct vtnet_rxq_stats	 vtnrx_stats;
>  	struct taskqueue	*vtnrx_tq;
>  	struct task		 vtnrx_intrtask;
> +#ifdef DEV_NETMAP
> +	struct virtio_net_hdr_mrg_rxbuf vtnrx_shrhdr;
> +#endif  /* DEV_NETMAP */
>  	char			 vtnrx_name[16];
>  } __aligned(CACHE_LINE_SIZE);
>  
> @@ -114,6 +117,9 @@ struct vtnet_txq {
>  #ifndef VTNET_LEGACY_TX
>  	struct task		 vtntx_defrtask;
>  #endif
> +#ifdef DEV_NETMAP
> +	struct virtio_net_hdr_mrg_rxbuf vtntx_shrhdr;
> +#endif  /* DEV_NETMAP */
>  	char			 vtntx_name[16];
>  } __aligned(CACHE_LINE_SIZE);
>  
> 
> 

-- 
Rod Grimes                                                 rgrimes at freebsd.org