svn commit: r246355 - in stable/9/sys: dev/netmap net

Luigi Rizzo luigi at FreeBSD.org
Tue Feb 5 09:40:32 UTC 2013


Author: luigi
Date: Tue Feb  5 09:40:31 2013
New Revision: 246355
URL: http://svnweb.freebsd.org/changeset/base/246355

Log:
  MFH: sync netmap with the version in HEAD

Deleted:
  stable/9/sys/dev/netmap/netmap_mem1.c
Modified:
  stable/9/sys/dev/netmap/if_em_netmap.h
  stable/9/sys/dev/netmap/if_igb_netmap.h
  stable/9/sys/dev/netmap/if_lem_netmap.h
  stable/9/sys/dev/netmap/if_re_netmap.h
  stable/9/sys/dev/netmap/netmap.c
  stable/9/sys/dev/netmap/netmap_kern.h
  stable/9/sys/dev/netmap/netmap_mem2.c
  stable/9/sys/net/netmap.h
  stable/9/sys/net/netmap_user.h

Modified: stable/9/sys/dev/netmap/if_em_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_em_netmap.h	Tue Feb  5 05:16:02 2013	(r246354)
+++ stable/9/sys/dev/netmap/if_em_netmap.h	Tue Feb  5 09:40:31 2013	(r246355)
@@ -171,7 +171,7 @@ em_netmap_txsync(struct ifnet *ifp, u_in
 	u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
 
 	/* generate an interrupt approximately every half ring */
-	int report_frequency = kring->nkr_num_slots >> 1;
+	u_int report_frequency = kring->nkr_num_slots >> 1;
 
 	k = ring->cur;
 	if (k > lim)
@@ -292,6 +292,8 @@ em_netmap_rxsync(struct ifnet *ifp, u_in
 	l = rxr->next_to_check;
 	j = netmap_idx_n2k(kring, l);
 	if (netmap_no_pendintr || force_update) {
+		uint16_t slot_flags = kring->nkr_slot_flags;
+
 		for (n = 0; ; n++) {
 			struct e1000_rx_desc *curr = &rxr->rx_base[l];
 			uint32_t staterr = le32toh(curr->status);
@@ -299,6 +301,7 @@ em_netmap_rxsync(struct ifnet *ifp, u_in
 			if ((staterr & E1000_RXD_STAT_DD) == 0)
 				break;
 			ring->slot[j].len = le16toh(curr->length);
+			ring->slot[j].flags = slot_flags;
 			bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map,
 				BUS_DMASYNC_POSTREAD);
 			j = (j == lim) ? 0 : j + 1;

Modified: stable/9/sys/dev/netmap/if_igb_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_igb_netmap.h	Tue Feb  5 05:16:02 2013	(r246354)
+++ stable/9/sys/dev/netmap/if_igb_netmap.h	Tue Feb  5 09:40:31 2013	(r246355)
@@ -125,7 +125,7 @@ igb_netmap_txsync(struct ifnet *ifp, u_i
 	u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
 
 	/* generate an interrupt approximately every half ring */
-	int report_frequency = kring->nkr_num_slots >> 1;
+	u_int report_frequency = kring->nkr_num_slots >> 1;
 
 	k = ring->cur;
 	if (k > lim)
@@ -263,6 +263,8 @@ igb_netmap_rxsync(struct ifnet *ifp, u_i
 	l = rxr->next_to_check;
 	j = netmap_idx_n2k(kring, l);
 	if (netmap_no_pendintr || force_update) {
+		uint16_t slot_flags = kring->nkr_slot_flags;
+
 		for (n = 0; ; n++) {
 			union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
 			uint32_t staterr = le32toh(curr->wb.upper.status_error);
@@ -270,6 +272,7 @@ igb_netmap_rxsync(struct ifnet *ifp, u_i
 			if ((staterr & E1000_RXD_STAT_DD) == 0)
 				break;
 			ring->slot[j].len = le16toh(curr->wb.upper.length);
+			ring->slot[j].flags = slot_flags;
 			bus_dmamap_sync(rxr->ptag,
 				rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
 			j = (j == lim) ? 0 : j + 1;

Modified: stable/9/sys/dev/netmap/if_lem_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_lem_netmap.h	Tue Feb  5 05:16:02 2013	(r246354)
+++ stable/9/sys/dev/netmap/if_lem_netmap.h	Tue Feb  5 09:40:31 2013	(r246355)
@@ -253,6 +253,8 @@ lem_netmap_rxsync(struct ifnet *ifp, u_i
 	l = adapter->next_rx_desc_to_check;
 	j = netmap_idx_n2k(kring, l);
 	if (netmap_no_pendintr || force_update) {
+		uint16_t slot_flags = kring->nkr_slot_flags;
+
 		for (n = 0; ; n++) {
 			struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
 			uint32_t staterr = le32toh(curr->status);
@@ -266,6 +268,7 @@ lem_netmap_rxsync(struct ifnet *ifp, u_i
 				len = 0;
 			}
 			ring->slot[j].len = len;
+			ring->slot[j].flags = slot_flags;
 			bus_dmamap_sync(adapter->rxtag,
 				adapter->rx_buffer_area[l].map,
 				    BUS_DMASYNC_POSTREAD);

Modified: stable/9/sys/dev/netmap/if_re_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_re_netmap.h	Tue Feb  5 05:16:02 2013	(r246354)
+++ stable/9/sys/dev/netmap/if_re_netmap.h	Tue Feb  5 09:40:31 2013	(r246355)
@@ -245,6 +245,8 @@ re_netmap_rxsync(struct ifnet *ifp, u_in
 	l = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
 	j = netmap_idx_n2k(kring, l); /* the kring index */
 	if (netmap_no_pendintr || force_update) {
+		uint16_t slot_flags = kring->nkr_slot_flags;
+
 		for (n = kring->nr_hwavail; n < lim ; n++) {
 			struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l];
 			uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
@@ -256,6 +258,7 @@ re_netmap_rxsync(struct ifnet *ifp, u_in
 			/* XXX subtract crc */
 			total_len = (total_len < 4) ? 0 : total_len - 4;
 			kring->ring->slot[j].len = total_len;
+			kring->ring->slot[j].flags = slot_flags;
 			/*  sync was in re_newbuf() */
 			bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
 			    rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD);

Modified: stable/9/sys/dev/netmap/netmap.c
==============================================================================
--- stable/9/sys/dev/netmap/netmap.c	Tue Feb  5 05:16:02 2013	(r246354)
+++ stable/9/sys/dev/netmap/netmap.c	Tue Feb  5 09:40:31 2013	(r246355)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -23,6 +23,8 @@
  * SUCH DAMAGE.
  */
 
+#define NM_BRIDGE
+
 /*
  * This module supports memory mapped access to network devices,
  * see netmap(4).
@@ -52,6 +54,16 @@
  *    transmit or receive queues (or all queues for a given interface).
  */
 
+#ifdef linux
+#include "bsd_glue.h"
+static netdev_tx_t linux_netmap_start(struct sk_buff *skb, struct net_device *dev);
+#endif /* linux */
+
+#ifdef __APPLE__
+#include "osx_glue.h"
+#endif /* __APPLE__ */
+
+#ifdef __FreeBSD__
 #include <sys/cdefs.h> /* prerequisite */
 __FBSDID("$FreeBSD$");
 
@@ -78,21 +90,16 @@ __FBSDID("$FreeBSD$");
 #include <net/if.h>
 #include <net/bpf.h>		/* BIOCIMMEDIATE */
 #include <net/vnet.h>
-#include <net/netmap.h>
-#include <dev/netmap/netmap_kern.h>
 #include <machine/bus.h>	/* bus_dmamap_* */
 
 MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
+#endif /* __FreeBSD__ */
 
-/*
- * lock and unlock for the netmap memory allocator
- */
-#define NMA_LOCK()	mtx_lock(&nm_mem->nm_mtx);
-#define NMA_UNLOCK()	mtx_unlock(&nm_mem->nm_mtx);
-struct netmap_mem_d;
-static struct netmap_mem_d *nm_mem;	/* Our memory allocator. */
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
 
 u_int netmap_total_buffers;
+u_int netmap_buf_size;
 char *netmap_buffer_base;	/* address of an invalid buffer */
 
 /* user-controlled variables */
@@ -105,16 +112,215 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, verbos
     CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
 SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
     CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
-int netmap_buf_size = 2048;
-TUNABLE_INT("hw.netmap.buf_size", &netmap_buf_size);
-SYSCTL_INT(_dev_netmap, OID_AUTO, buf_size,
-    CTLFLAG_RD, &netmap_buf_size, 0, "Size of packet buffers");
 int netmap_mitigate = 1;
 SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
 int netmap_no_pendintr = 1;
 SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
     CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
 
+int netmap_drop = 0;	/* debugging */
+int netmap_flags = 0;	/* debug flags */
+int netmap_fwd = 0;	/* force transparent mode */
+int netmap_copy = 0;	/* debugging, copy content */
+
+SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, copy, CTLFLAG_RW, &netmap_copy, 0 , "");
+
+#ifdef NM_BRIDGE /* support for netmap bridge */
+
+/*
+ * system parameters.
+ *
+ * All switched ports have prefix NM_NAME.
+ * The switch has a max of NM_BDG_MAXPORTS ports (often stored in a bitmap,
+ * so a practical upper bound is 64).
+ * Each tx ring is read-write, whereas rx rings are readonly (XXX not done yet).
+ * The virtual interfaces use per-queue lock instead of core lock.
+ * In the tx loop, we aggregate traffic in batches to make all operations
+ * faster. The batch size is NM_BDG_BATCH
+ */
+#define	NM_NAME			"vale"	/* prefix for the interface */
+#define NM_BDG_MAXPORTS		16	/* up to 64 ? */
+#define NM_BRIDGE_RINGSIZE	1024	/* in the device */
+#define NM_BDG_HASH		1024	/* forwarding table entries */
+#define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
+#define	NM_BRIDGES		4	/* number of bridges */
+int netmap_bridge = NM_BDG_BATCH; /* bridge batch size */
+SYSCTL_INT(_dev_netmap, OID_AUTO, bridge, CTLFLAG_RW, &netmap_bridge, 0 , "");
+
+#ifdef linux
+#define	ADD_BDG_REF(ifp)	(NA(ifp)->if_refcount++)
+#define	DROP_BDG_REF(ifp)	(NA(ifp)->if_refcount-- <= 1)
+#else /* !linux */
+#define	ADD_BDG_REF(ifp)	(ifp)->if_refcount++
+#define	DROP_BDG_REF(ifp)	refcount_release(&(ifp)->if_refcount)
+#ifdef __FreeBSD__
+#include <sys/endian.h>
+#include <sys/refcount.h>
+#endif /* __FreeBSD__ */
+#define prefetch(x)	__builtin_prefetch(x)
+#endif /* !linux */
+
+static void bdg_netmap_attach(struct ifnet *ifp);
+static int bdg_netmap_reg(struct ifnet *ifp, int onoff);
+/* per-tx-queue entry */
+struct nm_bdg_fwd {	/* forwarding entry for a bridge */
+	void *buf;
+	uint64_t dst;	/* dst mask */
+	uint32_t src;	/* src index ? */
+	uint16_t len;	/* src len */
+};
+
+struct nm_hash_ent {
+	uint64_t	mac;	/* the top 2 bytes are the epoch */
+	uint64_t	ports;
+};
+
+/*
+ * Interfaces for a bridge are all in ports[].
+ * The array has fixed size, an empty entry does not terminate
+ * the search.
+ */
+struct nm_bridge {
+	struct ifnet *bdg_ports[NM_BDG_MAXPORTS];
+	int n_ports;
+	uint64_t act_ports;
+	int freelist;	/* first buffer index */
+	NM_SELINFO_T si;	/* poll/select wait queue */
+	NM_LOCK_T bdg_lock;	/* protect the selinfo ? */
+
+	/* the forwarding table, MAC+ports */
+	struct nm_hash_ent ht[NM_BDG_HASH];
+
+	int namelen;	/* 0 means free */
+	char basename[IFNAMSIZ];
+};
+
+struct nm_bridge nm_bridges[NM_BRIDGES];
+
+#define BDG_LOCK(b)	mtx_lock(&(b)->bdg_lock)
+#define BDG_UNLOCK(b)	mtx_unlock(&(b)->bdg_lock)
+
+/*
+ * NA(ifp)->bdg_port	port index
+ */
+
+// XXX only for multiples of 64 bytes, non overlapped.
+static inline void
+pkt_copy(void *_src, void *_dst, int l)
+{
+        uint64_t *src = _src;
+        uint64_t *dst = _dst;
+        if (unlikely(l >= 1024)) {
+                bcopy(src, dst, l);
+                return;
+        }
+        for (; likely(l > 0); l-=64) {
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+                *dst++ = *src++;
+        }
+}
+
+/*
+ * locate a bridge among the existing ones.
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+static struct nm_bridge *
+nm_find_bridge(const char *name)
+{
+	int i, l, namelen, e;
+	struct nm_bridge *b = NULL;
+
+	namelen = strlen(NM_NAME);	/* base length */
+	l = strlen(name);		/* actual length */
+	for (i = namelen + 1; i < l; i++) {
+		if (name[i] == ':') {
+			namelen = i;
+			break;
+		}
+	}
+	if (namelen >= IFNAMSIZ)
+		namelen = IFNAMSIZ;
+	ND("--- prefix is '%.*s' ---", namelen, name);
+
+	/* use the first entry for locking */
+	BDG_LOCK(nm_bridges); // XXX do better
+	for (e = -1, i = 1; i < NM_BRIDGES; i++) {
+		b = nm_bridges + i;
+		if (b->namelen == 0)
+			e = i;	/* record empty slot */
+		else if (strncmp(name, b->basename, namelen) == 0) {
+			ND("found '%.*s' at %d", namelen, name, i);
+			break;
+		}
+	}
+	if (i == NM_BRIDGES) { /* all full */
+		if (e == -1) { /* no empty slot */
+			b = NULL;
+		} else {
+			b = nm_bridges + e;
+			strncpy(b->basename, name, namelen);
+			b->namelen = namelen;
+		}
+	}
+	BDG_UNLOCK(nm_bridges);
+	return b;
+}
+#endif /* NM_BRIDGE */
+
+
+/*
+ * Fetch configuration from the device, to cope with dynamic
+ * reconfigurations after loading the module.
+ */
+static int
+netmap_update_config(struct netmap_adapter *na)
+{
+	struct ifnet *ifp = na->ifp;
+	u_int txr, txd, rxr, rxd;
+
+	txr = txd = rxr = rxd = 0;
+	if (na->nm_config) {
+		na->nm_config(ifp, &txr, &txd, &rxr, &rxd);
+	} else {
+		/* take whatever we had at init time */
+		txr = na->num_tx_rings;
+		txd = na->num_tx_desc;
+		rxr = na->num_rx_rings;
+		rxd = na->num_rx_desc;
+	}	
+
+	if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
+	    na->num_rx_rings == rxr && na->num_rx_desc == rxd)
+		return 0; /* nothing changed */
+	if (netmap_verbose || na->refcount > 0) {
+		D("stored config %s: txring %d x %d, rxring %d x %d",
+			ifp->if_xname,
+			na->num_tx_rings, na->num_tx_desc,
+			na->num_rx_rings, na->num_rx_desc);
+		D("new config %s: txring %d x %d, rxring %d x %d",
+			ifp->if_xname, txr, txd, rxr, rxd);
+	}
+	if (na->refcount == 0) {
+		D("configuration changed (but fine)");
+		na->num_tx_rings = txr;
+		na->num_tx_desc = txd;
+		na->num_rx_rings = rxr;
+		na->num_rx_desc = rxd;
+		return 0;
+	}
+	D("configuration changed while active, this is bad...");
+	return 1;
+}
 
 /*------------- memory allocator -----------------*/
 #ifdef NETMAP_MEM2
@@ -124,23 +330,62 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, no_pen
 #endif /* !NETMAP_MEM2 */
 /*------------ end of memory allocator ----------*/
 
-/* Structure associated to each thread which registered an interface. */
+
+/* Structure associated to each thread which registered an interface.
+ *
+ * The first 4 fields of this structure are written by NIOCREGIF and
+ * read by poll() and NIOC?XSYNC.
+ * There is low contention among writers (actually, a correct user program
+ * should have no contention among writers) and among writers and readers,
+ * so we use a single global lock to protect the structure initialization.
+ * Since initialization involves the allocation of memory, we reuse the memory
+ * allocator lock.
+ * Read access to the structure is lock free. Readers must check that
+ * np_nifp is not NULL before using the other fields.
+ * If np_nifp is NULL initialization has not been performed, so they should
+ * return an error to userlevel.
+ *
+ * The ref_done field is used to regulate access to the refcount in the
+ * memory allocator. The refcount must be incremented at most once for
+ * each open("/dev/netmap"). The increment is performed by the first
+ * function that calls netmap_get_memory() (currently called by
+ * mmap(), NIOCGINFO and NIOCREGIF).
+ * If the refcount is incremented, it is then decremented when the
+ * private structure is destroyed.
+ */
 struct netmap_priv_d {
-	struct netmap_if *np_nifp;	/* netmap interface descriptor. */
+	struct netmap_if * volatile np_nifp;	/* netmap interface descriptor. */
 
 	struct ifnet	*np_ifp;	/* device for which we hold a reference */
 	int		np_ringid;	/* from the ioctl */
 	u_int		np_qfirst, np_qlast;	/* range of rings to scan */
 	uint16_t	np_txpoll;
+
+	unsigned long	ref_done;	/* use with NMA_LOCK held */
 };
 
 
+static int
+netmap_get_memory(struct netmap_priv_d* p)
+{
+	int error = 0;
+	NMA_LOCK();
+	if (!p->ref_done) {
+		error = netmap_memory_finalize();
+		if (!error)
+			p->ref_done = 1;
+	}
+	NMA_UNLOCK();
+	return error;
+}
+
 /*
  * File descriptor's private data destructor.
  *
  * Call nm_register(ifp,0) to stop netmap mode on the interface and
  * revert to normal operation. We expect that np_ifp has not gone.
  */
+/* call with NMA_LOCK held */
 static void
 netmap_dtor_locked(void *data)
 {
@@ -153,7 +398,8 @@ netmap_dtor_locked(void *data)
 	if (na->refcount <= 0) {	/* last instance */
 		u_int i, j, lim;
 
-		D("deleting last netmap instance for %s", ifp->if_xname);
+		if (netmap_verbose)
+			D("deleting last instance for %s", ifp->if_xname);
 		/*
 		 * there is a race here with *_netmap_task() and
 		 * netmap_poll(), which don't run under NETMAP_REG_LOCK.
@@ -180,7 +426,6 @@ netmap_dtor_locked(void *data)
 		selwakeuppri(&na->tx_si, PI_NET);
 		selwakeuppri(&na->rx_si, PI_NET);
 		/* release all buffers */
-		NMA_LOCK();
 		for (i = 0; i < na->num_tx_rings + 1; i++) {
 			struct netmap_ring *ring = na->tx_rings[i].ring;
 			lim = na->tx_rings[i].nkr_num_slots;
@@ -200,30 +445,136 @@ netmap_dtor_locked(void *data)
 		/* XXX kqueue(9) needed; these will mirror knlist_init. */
 		/* knlist_destroy(&na->tx_si.si_note); */
 		/* knlist_destroy(&na->rx_si.si_note); */
-		NMA_UNLOCK();
 		netmap_free_rings(na);
 		wakeup(na);
 	}
 	netmap_if_free(nifp);
 }
 
+static void
+nm_if_rele(struct ifnet *ifp)
+{
+#ifndef NM_BRIDGE
+	if_rele(ifp);
+#else /* NM_BRIDGE */
+	int i, full;
+	struct nm_bridge *b;
+
+	if (strncmp(ifp->if_xname, NM_NAME, sizeof(NM_NAME) - 1)) {
+		if_rele(ifp);
+		return;
+	}
+	if (!DROP_BDG_REF(ifp))
+		return;
+	b = ifp->if_bridge;
+	BDG_LOCK(nm_bridges);
+	BDG_LOCK(b);
+	ND("want to disconnect %s from the bridge", ifp->if_xname);
+	full = 0;
+	for (i = 0; i < NM_BDG_MAXPORTS; i++) {
+		if (b->bdg_ports[i] == ifp) {
+			b->bdg_ports[i] = NULL;
+			bzero(ifp, sizeof(*ifp));
+			free(ifp, M_DEVBUF);
+			break;
+		}
+		else if (b->bdg_ports[i] != NULL)
+			full = 1;
+	}
+	BDG_UNLOCK(b);
+	if (full == 0) {
+		ND("freeing bridge %d", b - nm_bridges);
+		b->namelen = 0;
+	}
+	BDG_UNLOCK(nm_bridges);
+	if (i == NM_BDG_MAXPORTS)
+		D("ouch, cannot find ifp to remove");
+#endif /* NM_BRIDGE */
+}
 
 static void
 netmap_dtor(void *data)
 {
 	struct netmap_priv_d *priv = data;
 	struct ifnet *ifp = priv->np_ifp;
-	struct netmap_adapter *na = NA(ifp);
+	struct netmap_adapter *na;
 
-	na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
-	netmap_dtor_locked(data);
-	na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
+	NMA_LOCK();
+	if (ifp) {
+		na = NA(ifp);
+		na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
+		netmap_dtor_locked(data);
+		na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
 
-	if_rele(ifp);
+		nm_if_rele(ifp);
+	}
+	if (priv->ref_done) {
+		netmap_memory_deref();
+	}
+	NMA_UNLOCK();
 	bzero(priv, sizeof(*priv));	/* XXX for safety */
 	free(priv, M_DEVBUF);
 }
 
+#ifdef __FreeBSD__
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/uma.h>
+
+static struct cdev_pager_ops saved_cdev_pager_ops;
+
+static int
+netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
+    vm_ooffset_t foff, struct ucred *cred, u_short *color)
+{
+	if (netmap_verbose)
+		D("first mmap for %p", handle);
+	return saved_cdev_pager_ops.cdev_pg_ctor(handle,
+			size, prot, foff, cred, color);
+}
+
+static void
+netmap_dev_pager_dtor(void *handle)
+{
+	saved_cdev_pager_ops.cdev_pg_dtor(handle);
+	ND("ready to release memory for %p", handle);
+}
+
+
+static struct cdev_pager_ops netmap_cdev_pager_ops = {
+        .cdev_pg_ctor = netmap_dev_pager_ctor,
+        .cdev_pg_dtor = netmap_dev_pager_dtor,
+        .cdev_pg_fault = NULL,
+};
+
+static int
+netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
+	vm_size_t objsize,  vm_object_t *objp, int prot)
+{
+	vm_object_t obj;
+
+	ND("cdev %p foff %jd size %jd objp %p prot %d", cdev,
+	    (intmax_t )*foff, (intmax_t )objsize, objp, prot);
+	obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
+            curthread->td_ucred);
+	ND("returns obj %p", obj);
+	if (obj == NULL)
+		return EINVAL;
+	if (saved_cdev_pager_ops.cdev_pg_fault == NULL) {
+		ND("initialize cdev_pager_ops");
+		saved_cdev_pager_ops = *(obj->un_pager.devp.ops);
+		netmap_cdev_pager_ops.cdev_pg_fault =
+			saved_cdev_pager_ops.cdev_pg_fault;
+	};
+	obj->un_pager.devp.ops = &netmap_cdev_pager_ops;
+	*objp = obj;
+	return 0;
+}
+#endif /* __FreeBSD__ */
+
 
 /*
  * mmap(2) support for the "netmap" device.
@@ -235,6 +586,7 @@ netmap_dtor(void *data)
  * Return 0 on success, -1 otherwise.
  */
 
+#ifdef __FreeBSD__
 static int
 netmap_mmap(__unused struct cdev *dev,
 #if __FreeBSD_version < 900000
@@ -245,75 +597,222 @@ netmap_mmap(__unused struct cdev *dev,
 #endif
 	)
 {
+	int error = 0;
+	struct netmap_priv_d *priv;
+
 	if (nprot & PROT_EXEC)
 		return (-1);	// XXX -1 or EINVAL ?
 
+	error = devfs_get_cdevpriv((void **)&priv);
+	if (error == EBADF) {	/* called on fault, memory is initialized */
+		ND(5, "handling fault at ofs 0x%x", offset);
+		error = 0;
+	} else if (error == 0)	/* make sure memory is set */
+		error = netmap_get_memory(priv);
+	if (error)
+		return (error);
+
 	ND("request for offset 0x%x", (uint32_t)offset);
 	*paddr = netmap_ofstophys(offset);
 
-	return (0);
+	return (*paddr ? 0 : ENOMEM);
 }
 
+static int
+netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+	if (netmap_verbose)
+		D("dev %p fflag 0x%x devtype %d td %p",
+			dev, fflag, devtype, td);
+	return 0;
+}
+
+static int
+netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+	struct netmap_priv_d *priv;
+	int error;
+
+	priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
+			      M_NOWAIT | M_ZERO);
+	if (priv == NULL)
+		return ENOMEM;
+
+	error = devfs_set_cdevpriv(priv, netmap_dtor);
+	if (error)
+	        return error;
+
+	return 0;
+}
+#endif /* __FreeBSD__ */
+
 
 /*
  * Handlers for synchronization of the queues from/to the host.
- *
- * netmap_sync_to_host() passes packets up. We are called from a
- * system call in user process context, and the only contention
- * can be among multiple user threads erroneously calling
- * this routine concurrently. In principle we should not even
- * need to lock.
+ * Netmap has two operating modes:
+ * - in the default mode, the rings connected to the host stack are
+ *   just another ring pair managed by userspace;
+ * - in transparent mode (XXX to be defined) incoming packets
+ *   (from the host or the NIC) are marked as NS_FORWARD upon
+ *   arrival, and the user application has a chance to reset the
+ *   flag for packets that should be dropped.
+ *   On the RXSYNC or poll(), packets in RX rings between
+ *   kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
+ *   to the other side.
+ * The transfer NIC --> host is relatively easy, just encapsulate
+ * into mbufs and we are done. The host --> NIC side is slightly
+ * harder because there might not be room in the tx ring so it
+ * might take a while before releasing the buffer.
+ */
+
+/*
+ * pass a chain of buffers to the host stack as coming from 'dst'
  */
 static void
-netmap_sync_to_host(struct netmap_adapter *na)
+netmap_send_up(struct ifnet *dst, struct mbuf *head)
 {
-	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
-	struct netmap_ring *ring = kring->ring;
-	struct mbuf *head = NULL, *tail = NULL, *m;
-	u_int k, n, lim = kring->nkr_num_slots - 1;
+	struct mbuf *m;
 
-	k = ring->cur;
-	if (k > lim) {
-		netmap_ring_reinit(kring);
-		return;
+	/* send packets up, outside the lock */
+	while ((m = head) != NULL) {
+		head = head->m_nextpkt;
+		m->m_nextpkt = NULL;
+		if (netmap_verbose & NM_VERB_HOST)
+			D("sending up pkt %p size %d", m, MBUF_LEN(m));
+		NM_SEND_UP(dst, m);
 	}
-	// na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
+}
 
-	/* Take packets from hwcur to cur and pass them up.
+struct mbq {
+	struct mbuf *head;
+	struct mbuf *tail;
+	int count;
+};
+
+/*
+ * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
+ * Run from hwcur to cur - reserved
+ */
+static void
+netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
+{
+	/* Take packets from hwcur to cur-reserved and pass them up.
 	 * In case of no buffers we give up. At the end of the loop,
 	 * the queue is drained in all cases.
+	 * XXX handle reserved
 	 */
+	int k = kring->ring->cur - kring->ring->reserved;
+	u_int n, lim = kring->nkr_num_slots - 1;
+	struct mbuf *m, *tail = q->tail;
+
+	if (k < 0)
+		k = k + kring->nkr_num_slots;
 	for (n = kring->nr_hwcur; n != k;) {
-		struct netmap_slot *slot = &ring->slot[n];
+		struct netmap_slot *slot = &kring->ring->slot[n];
 
 		n = (n == lim) ? 0 : n + 1;
+		if ((slot->flags & NS_FORWARD) == 0 && !force)
+			continue;
 		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) {
 			D("bad pkt at %d len %d", n, slot->len);
 			continue;
 		}
-		m = m_devget(NMB(slot), slot->len, 0, na->ifp, NULL);
+		slot->flags &= ~NS_FORWARD; // XXX needed ?
+		m = m_devget(NMB(slot), slot->len, 0, kring->na->ifp, NULL);
 
 		if (m == NULL)
 			break;
 		if (tail)
 			tail->m_nextpkt = m;
 		else
-			head = m;
+			q->head = m;
 		tail = m;
+		q->count++;
 		m->m_nextpkt = NULL;
 	}
+	q->tail = tail;
+}
+
+/*
+ * called under main lock to send packets from the host to the NIC
+ * The host ring has packets from nr_hwcur to (cur - reserved)
+ * to be sent down. We scan the tx rings, which have just been
+ * flushed so nr_hwcur == cur. Pushing packets down means
+ * increment cur and decrement avail.
+ * XXX to be verified
+ */
+static void
+netmap_sw_to_nic(struct netmap_adapter *na)
+{
+	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
+	struct netmap_kring *k1 = &na->tx_rings[0];
+	int i, howmany, src_lim, dst_lim;
+
+	howmany = kring->nr_hwavail;	/* XXX otherwise cur - reserved - nr_hwcur */
+
+	src_lim = kring->nkr_num_slots;
+	for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
+		ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
+		dst_lim = k1->nkr_num_slots;
+		while (howmany > 0 && k1->ring->avail > 0) {
+			struct netmap_slot *src, *dst, tmp;
+			src = &kring->ring->slot[kring->nr_hwcur];
+			dst = &k1->ring->slot[k1->ring->cur];
+			tmp = *src;
+			src->buf_idx = dst->buf_idx;
+			src->flags = NS_BUF_CHANGED;
+
+			dst->buf_idx = tmp.buf_idx;
+			dst->len = tmp.len;
+			dst->flags = NS_BUF_CHANGED;
+			ND("out len %d buf %d from %d to %d",
+				dst->len, dst->buf_idx,
+				kring->nr_hwcur, k1->ring->cur);
+
+			if (++kring->nr_hwcur >= src_lim)
+				kring->nr_hwcur = 0;
+			howmany--;
+			kring->nr_hwavail--;
+			if (++k1->ring->cur >= dst_lim)
+				k1->ring->cur = 0;
+			k1->ring->avail--;
+		}
+		kring->ring->cur = kring->nr_hwcur; // XXX
+		k1++;
+	}
+}
+
+/*
+ * netmap_sync_to_host() passes packets up. We are called from a
+ * system call in user process context, and the only contention
+ * can be among multiple user threads erroneously calling
+ * this routine concurrently.
+ */
+static void
+netmap_sync_to_host(struct netmap_adapter *na)
+{
+	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
+	struct netmap_ring *ring = kring->ring;
+	u_int k, lim = kring->nkr_num_slots - 1;
+	struct mbq q = { NULL, NULL };
+
+	k = ring->cur;
+	if (k > lim) {
+		netmap_ring_reinit(kring);
+		return;
+	}
+	// na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
+
+	/* Take packets from hwcur to cur and pass them up.
+	 * In case of no buffers we give up. At the end of the loop,
+	 * the queue is drained in all cases.
+	 */
+	netmap_grab_packets(kring, &q, 1);
 	kring->nr_hwcur = k;
 	kring->nr_hwavail = ring->avail = lim;
 	// na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
 
-	/* send packets up, outside the lock */
-	while ((m = head) != NULL) {
-		head = head->m_nextpkt;
-		m->m_nextpkt = NULL;
-		if (netmap_verbose & NM_VERB_HOST)
-			D("sending up pkt %p size %d", m, MBUF_LEN(m));
-		NM_SEND_UP(na->ifp, m);
-	}
+	netmap_send_up(na->ifp, q.head);
 }
 
 /*
@@ -323,15 +822,19 @@ netmap_sync_to_host(struct netmap_adapte
  *
  * This routine also does the selrecord if called from the poll handler
  * (we know because td != NULL).
+ *
+ * NOTE: on linux, selrecord() is defined as a macro and uses pwait
+ *     as an additional hidden argument.
  */
 static void
-netmap_sync_from_host(struct netmap_adapter *na, struct thread *td)
+netmap_sync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
 {
 	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
 	struct netmap_ring *ring = kring->ring;
 	u_int j, n, lim = kring->nkr_num_slots;
 	u_int k = ring->cur, resvd = ring->reserved;
 
+	(void)pwait;	/* disable unused warnings */
 	na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
 	if (k >= lim) {
 		netmap_ring_reinit(kring);
@@ -370,15 +873,73 @@ netmap_sync_from_host(struct netmap_adap
 static int
 get_ifp(const char *name, struct ifnet **ifp)
 {
+#ifdef NM_BRIDGE
+	struct ifnet *iter = NULL;
+
+	do {
+		struct nm_bridge *b;
+		int i, l, cand = -1;
+
+		if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1))
+			break;
+		b = nm_find_bridge(name);
+		if (b == NULL) {
+			D("no bridges available for '%s'", name);
+			return (ENXIO);
+		}
+		/* XXX locking */
+		BDG_LOCK(b);
+		/* lookup in the local list of ports */
+		for (i = 0; i < NM_BDG_MAXPORTS; i++) {
+			iter = b->bdg_ports[i];
+			if (iter == NULL) {
+				if (cand == -1)
+					cand = i; /* potential insert point */
+				continue;
+			}
+			if (!strcmp(iter->if_xname, name)) {
+				ADD_BDG_REF(iter);
+				ND("found existing interface");
+				BDG_UNLOCK(b);
+				break;
+			}
+		}
+		if (i < NM_BDG_MAXPORTS) /* already unlocked */
+			break;
+		if (cand == -1) {
+			D("bridge full, cannot create new port");
+no_port:
+			BDG_UNLOCK(b);
+			*ifp = NULL;
+			return EINVAL;
+		}
+		ND("create new bridge port %s", name);
+		/* space for forwarding list after the ifnet */
+		l = sizeof(*iter) +
+			 sizeof(struct nm_bdg_fwd)*NM_BDG_BATCH ;
+		iter = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
+		if (!iter)
+			goto no_port;
+		strcpy(iter->if_xname, name);
+		bdg_netmap_attach(iter);
+		b->bdg_ports[cand] = iter;
+		iter->if_bridge = b;
+		ADD_BDG_REF(iter);
+		BDG_UNLOCK(b);
+		ND("attaching virtual bridge %p", b);
+	} while (0);
+	*ifp = iter;
+	if (! *ifp)
+#endif /* NM_BRIDGE */
 	*ifp = ifunit_ref(name);
 	if (*ifp == NULL)
 		return (ENXIO);
 	/* can do this if the capability exists and if_pspare[0]
 	 * points to the netmap descriptor.
 	 */
-	if ((*ifp)->if_capabilities & IFCAP_NETMAP && NA(*ifp))
+	if (NETMAP_CAPABLE(*ifp))
 		return 0;	/* valid pointer, we hold the refcount */
-	if_rele(*ifp);
+	nm_if_rele(*ifp);
 	return EINVAL;	// not NETMAP capable
 }
 
@@ -402,7 +963,7 @@ netmap_ring_reinit(struct netmap_kring *
 	u_int i, lim = kring->nkr_num_slots - 1;
 	int errors = 0;
 
-	D("called for %s", kring->na->ifp->if_xname);
+	RD(10, "called for %s", kring->na->ifp->if_xname);
 	if (ring->cur > lim)
 		errors++;
 	for (i = 0; i <= lim; i++) {
@@ -424,9 +985,9 @@ netmap_ring_reinit(struct netmap_kring *
 		int pos = kring - kring->na->tx_rings;
 		int n = kring->na->num_tx_rings + 1;
 
-		D("total %d errors", errors);
+		RD(10, "total %d errors", errors);
 		errors++;
-		D("%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
+		RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
 			kring->na->ifp->if_xname,
 			pos < n ?  "TX" : "RX", pos < n ? pos : pos - n,
 			ring->cur, kring->nr_hwcur,
@@ -474,6 +1035,7 @@ netmap_set_ringid(struct netmap_priv_d *
 	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
 	if (need_lock)
 		na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
+    if (netmap_verbose) {
 	if (ringid & NETMAP_SW_RING)
 		D("ringid %s set to SW RING", ifp->if_xname);
 	else if (ringid & NETMAP_HW_RING)
@@ -481,6 +1043,7 @@ netmap_set_ringid(struct netmap_priv_d *
 			priv->np_qfirst);
 	else
 		D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
+    }
 	return 0;
 }
 
@@ -498,8 +1061,8 @@ netmap_set_ringid(struct netmap_priv_d *
  * Return 0 on success, errno otherwise.
  */
 static int
-netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-stable-9 mailing list