svn commit: r246355 - in stable/9/sys: dev/netmap net
Luigi Rizzo
luigi at FreeBSD.org
Tue Feb 5 09:40:32 UTC 2013
Author: luigi
Date: Tue Feb 5 09:40:31 2013
New Revision: 246355
URL: http://svnweb.freebsd.org/changeset/base/246355
Log:
MFH: sync netmap with the version in HEAD
Deleted:
stable/9/sys/dev/netmap/netmap_mem1.c
Modified:
stable/9/sys/dev/netmap/if_em_netmap.h
stable/9/sys/dev/netmap/if_igb_netmap.h
stable/9/sys/dev/netmap/if_lem_netmap.h
stable/9/sys/dev/netmap/if_re_netmap.h
stable/9/sys/dev/netmap/netmap.c
stable/9/sys/dev/netmap/netmap_kern.h
stable/9/sys/dev/netmap/netmap_mem2.c
stable/9/sys/net/netmap.h
stable/9/sys/net/netmap_user.h
Modified: stable/9/sys/dev/netmap/if_em_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_em_netmap.h Tue Feb 5 05:16:02 2013 (r246354)
+++ stable/9/sys/dev/netmap/if_em_netmap.h Tue Feb 5 09:40:31 2013 (r246355)
@@ -171,7 +171,7 @@ em_netmap_txsync(struct ifnet *ifp, u_in
u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
/* generate an interrupt approximately every half ring */
- int report_frequency = kring->nkr_num_slots >> 1;
+ u_int report_frequency = kring->nkr_num_slots >> 1;
k = ring->cur;
if (k > lim)
@@ -292,6 +292,8 @@ em_netmap_rxsync(struct ifnet *ifp, u_in
l = rxr->next_to_check;
j = netmap_idx_n2k(kring, l);
if (netmap_no_pendintr || force_update) {
+ uint16_t slot_flags = kring->nkr_slot_flags;
+
for (n = 0; ; n++) {
struct e1000_rx_desc *curr = &rxr->rx_base[l];
uint32_t staterr = le32toh(curr->status);
@@ -299,6 +301,7 @@ em_netmap_rxsync(struct ifnet *ifp, u_in
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[j].len = le16toh(curr->length);
+ ring->slot[j].flags = slot_flags;
bus_dmamap_sync(rxr->rxtag, rxr->rx_buffers[l].map,
BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
Modified: stable/9/sys/dev/netmap/if_igb_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_igb_netmap.h Tue Feb 5 05:16:02 2013 (r246354)
+++ stable/9/sys/dev/netmap/if_igb_netmap.h Tue Feb 5 09:40:31 2013 (r246355)
@@ -125,7 +125,7 @@ igb_netmap_txsync(struct ifnet *ifp, u_i
u_int j, k, l, n = 0, lim = kring->nkr_num_slots - 1;
/* generate an interrupt approximately every half ring */
- int report_frequency = kring->nkr_num_slots >> 1;
+ u_int report_frequency = kring->nkr_num_slots >> 1;
k = ring->cur;
if (k > lim)
@@ -263,6 +263,8 @@ igb_netmap_rxsync(struct ifnet *ifp, u_i
l = rxr->next_to_check;
j = netmap_idx_n2k(kring, l);
if (netmap_no_pendintr || force_update) {
+ uint16_t slot_flags = kring->nkr_slot_flags;
+
for (n = 0; ; n++) {
union e1000_adv_rx_desc *curr = &rxr->rx_base[l];
uint32_t staterr = le32toh(curr->wb.upper.status_error);
@@ -270,6 +272,7 @@ igb_netmap_rxsync(struct ifnet *ifp, u_i
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
ring->slot[j].len = le16toh(curr->wb.upper.length);
+ ring->slot[j].flags = slot_flags;
bus_dmamap_sync(rxr->ptag,
rxr->rx_buffers[l].pmap, BUS_DMASYNC_POSTREAD);
j = (j == lim) ? 0 : j + 1;
Modified: stable/9/sys/dev/netmap/if_lem_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_lem_netmap.h Tue Feb 5 05:16:02 2013 (r246354)
+++ stable/9/sys/dev/netmap/if_lem_netmap.h Tue Feb 5 09:40:31 2013 (r246355)
@@ -253,6 +253,8 @@ lem_netmap_rxsync(struct ifnet *ifp, u_i
l = adapter->next_rx_desc_to_check;
j = netmap_idx_n2k(kring, l);
if (netmap_no_pendintr || force_update) {
+ uint16_t slot_flags = kring->nkr_slot_flags;
+
for (n = 0; ; n++) {
struct e1000_rx_desc *curr = &adapter->rx_desc_base[l];
uint32_t staterr = le32toh(curr->status);
@@ -266,6 +268,7 @@ lem_netmap_rxsync(struct ifnet *ifp, u_i
len = 0;
}
ring->slot[j].len = len;
+ ring->slot[j].flags = slot_flags;
bus_dmamap_sync(adapter->rxtag,
adapter->rx_buffer_area[l].map,
BUS_DMASYNC_POSTREAD);
Modified: stable/9/sys/dev/netmap/if_re_netmap.h
==============================================================================
--- stable/9/sys/dev/netmap/if_re_netmap.h Tue Feb 5 05:16:02 2013 (r246354)
+++ stable/9/sys/dev/netmap/if_re_netmap.h Tue Feb 5 09:40:31 2013 (r246355)
@@ -245,6 +245,8 @@ re_netmap_rxsync(struct ifnet *ifp, u_in
l = sc->rl_ldata.rl_rx_prodidx; /* next pkt to check */
j = netmap_idx_n2k(kring, l); /* the kring index */
if (netmap_no_pendintr || force_update) {
+ uint16_t slot_flags = kring->nkr_slot_flags;
+
for (n = kring->nr_hwavail; n < lim ; n++) {
struct rl_desc *cur_rx = &sc->rl_ldata.rl_rx_list[l];
uint32_t rxstat = le32toh(cur_rx->rl_cmdstat);
@@ -256,6 +258,7 @@ re_netmap_rxsync(struct ifnet *ifp, u_in
/* XXX subtract crc */
total_len = (total_len < 4) ? 0 : total_len - 4;
kring->ring->slot[j].len = total_len;
+ kring->ring->slot[j].flags = slot_flags;
/* sync was in re_newbuf() */
bus_dmamap_sync(sc->rl_ldata.rl_rx_mtag,
rxd[l].rx_dmamap, BUS_DMASYNC_POSTREAD);
Modified: stable/9/sys/dev/netmap/netmap.c
==============================================================================
--- stable/9/sys/dev/netmap/netmap.c Tue Feb 5 05:16:02 2013 (r246354)
+++ stable/9/sys/dev/netmap/netmap.c Tue Feb 5 09:40:31 2013 (r246355)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ * Copyright (C) 2011-2012 Matteo Landi, Luigi Rizzo. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -23,6 +23,8 @@
* SUCH DAMAGE.
*/
+#define NM_BRIDGE
+
/*
* This module supports memory mapped access to network devices,
* see netmap(4).
@@ -52,6 +54,16 @@
* transmit or receive queues (or all queues for a given interface).
*/
+#ifdef linux
+#include "bsd_glue.h"
+static netdev_tx_t linux_netmap_start(struct sk_buff *skb, struct net_device *dev);
+#endif /* linux */
+
+#ifdef __APPLE__
+#include "osx_glue.h"
+#endif /* __APPLE__ */
+
+#ifdef __FreeBSD__
#include <sys/cdefs.h> /* prerequisite */
__FBSDID("$FreeBSD$");
@@ -78,21 +90,16 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/bpf.h> /* BIOCIMMEDIATE */
#include <net/vnet.h>
-#include <net/netmap.h>
-#include <dev/netmap/netmap_kern.h>
#include <machine/bus.h> /* bus_dmamap_* */
MALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
+#endif /* __FreeBSD__ */
-/*
- * lock and unlock for the netmap memory allocator
- */
-#define NMA_LOCK() mtx_lock(&nm_mem->nm_mtx);
-#define NMA_UNLOCK() mtx_unlock(&nm_mem->nm_mtx);
-struct netmap_mem_d;
-static struct netmap_mem_d *nm_mem; /* Our memory allocator. */
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
u_int netmap_total_buffers;
+u_int netmap_buf_size;
char *netmap_buffer_base; /* address of an invalid buffer */
/* user-controlled variables */
@@ -105,16 +112,215 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, verbos
CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
-int netmap_buf_size = 2048;
-TUNABLE_INT("hw.netmap.buf_size", &netmap_buf_size);
-SYSCTL_INT(_dev_netmap, OID_AUTO, buf_size,
- CTLFLAG_RD, &netmap_buf_size, 0, "Size of packet buffers");
int netmap_mitigate = 1;
SYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
int netmap_no_pendintr = 1;
SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
+int netmap_drop = 0; /* debugging */
+int netmap_flags = 0; /* debug flags */
+int netmap_fwd = 0; /* force transparent mode */
+int netmap_copy = 0; /* debugging, copy content */
+
+SYSCTL_INT(_dev_netmap, OID_AUTO, drop, CTLFLAG_RW, &netmap_drop, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
+SYSCTL_INT(_dev_netmap, OID_AUTO, copy, CTLFLAG_RW, &netmap_copy, 0 , "");
+
+#ifdef NM_BRIDGE /* support for netmap bridge */
+
+/*
+ * system parameters.
+ *
+ * All switched ports have prefix NM_NAME.
+ * The switch has a max of NM_BDG_MAXPORTS ports (often stored in a bitmap,
+ * so a practical upper bound is 64).
+ * Each tx ring is read-write, whereas rx rings are readonly (XXX not done yet).
+ * The virtual interfaces use per-queue lock instead of core lock.
+ * In the tx loop, we aggregate traffic in batches to make all operations
+ * faster. The batch size is NM_BDG_BATCH
+ */
+#define NM_NAME "vale" /* prefix for the interface */
+#define NM_BDG_MAXPORTS 16 /* up to 64 ? */
+#define NM_BRIDGE_RINGSIZE 1024 /* in the device */
+#define NM_BDG_HASH 1024 /* forwarding table entries */
+#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
+#define NM_BRIDGES 4 /* number of bridges */
+int netmap_bridge = NM_BDG_BATCH; /* bridge batch size */
+SYSCTL_INT(_dev_netmap, OID_AUTO, bridge, CTLFLAG_RW, &netmap_bridge, 0 , "");
+
+#ifdef linux
+#define ADD_BDG_REF(ifp) (NA(ifp)->if_refcount++)
+#define DROP_BDG_REF(ifp) (NA(ifp)->if_refcount-- <= 1)
+#else /* !linux */
+#define ADD_BDG_REF(ifp) (ifp)->if_refcount++
+#define DROP_BDG_REF(ifp) refcount_release(&(ifp)->if_refcount)
+#ifdef __FreeBSD__
+#include <sys/endian.h>
+#include <sys/refcount.h>
+#endif /* __FreeBSD__ */
+#define prefetch(x) __builtin_prefetch(x)
+#endif /* !linux */
+
+static void bdg_netmap_attach(struct ifnet *ifp);
+static int bdg_netmap_reg(struct ifnet *ifp, int onoff);
+/* per-tx-queue entry */
+struct nm_bdg_fwd { /* forwarding entry for a bridge */
+ void *buf;
+ uint64_t dst; /* dst mask */
+ uint32_t src; /* src index ? */
+ uint16_t len; /* src len */
+};
+
+struct nm_hash_ent {
+ uint64_t mac; /* the top 2 bytes are the epoch */
+ uint64_t ports;
+};
+
+/*
+ * Interfaces for a bridge are all in ports[].
+ * The array has fixed size, an empty entry does not terminate
+ * the search.
+ */
+struct nm_bridge {
+ struct ifnet *bdg_ports[NM_BDG_MAXPORTS];
+ int n_ports;
+ uint64_t act_ports;
+ int freelist; /* first buffer index */
+ NM_SELINFO_T si; /* poll/select wait queue */
+ NM_LOCK_T bdg_lock; /* protect the selinfo ? */
+
+ /* the forwarding table, MAC+ports */
+ struct nm_hash_ent ht[NM_BDG_HASH];
+
+ int namelen; /* 0 means free */
+ char basename[IFNAMSIZ];
+};
+
+struct nm_bridge nm_bridges[NM_BRIDGES];
+
+#define BDG_LOCK(b) mtx_lock(&(b)->bdg_lock)
+#define BDG_UNLOCK(b) mtx_unlock(&(b)->bdg_lock)
+
+/*
+ * NA(ifp)->bdg_port port index
+ */
+
+// XXX only for multiples of 64 bytes, non overlapped.
+static inline void
+pkt_copy(void *_src, void *_dst, int l)
+{
+ uint64_t *src = _src;
+ uint64_t *dst = _dst;
+ if (unlikely(l >= 1024)) {
+ bcopy(src, dst, l);
+ return;
+ }
+ for (; likely(l > 0); l-=64) {
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ }
+}
+
+/*
+ * locate a bridge among the existing ones.
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+static struct nm_bridge *
+nm_find_bridge(const char *name)
+{
+ int i, l, namelen, e;
+ struct nm_bridge *b = NULL;
+
+ namelen = strlen(NM_NAME); /* base length */
+ l = strlen(name); /* actual length */
+ for (i = namelen + 1; i < l; i++) {
+ if (name[i] == ':') {
+ namelen = i;
+ break;
+ }
+ }
+ if (namelen >= IFNAMSIZ)
+ namelen = IFNAMSIZ;
+ ND("--- prefix is '%.*s' ---", namelen, name);
+
+ /* use the first entry for locking */
+ BDG_LOCK(nm_bridges); // XXX do better
+ for (e = -1, i = 1; i < NM_BRIDGES; i++) {
+ b = nm_bridges + i;
+ if (b->namelen == 0)
+ e = i; /* record empty slot */
+ else if (strncmp(name, b->basename, namelen) == 0) {
+ ND("found '%.*s' at %d", namelen, name, i);
+ break;
+ }
+ }
+ if (i == NM_BRIDGES) { /* all full */
+ if (e == -1) { /* no empty slot */
+ b = NULL;
+ } else {
+ b = nm_bridges + e;
+ strncpy(b->basename, name, namelen);
+ b->namelen = namelen;
+ }
+ }
+ BDG_UNLOCK(nm_bridges);
+ return b;
+}
+#endif /* NM_BRIDGE */
+
+
+/*
+ * Fetch configuration from the device, to cope with dynamic
+ * reconfigurations after loading the module.
+ */
+static int
+netmap_update_config(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+ u_int txr, txd, rxr, rxd;
+
+ txr = txd = rxr = rxd = 0;
+ if (na->nm_config) {
+ na->nm_config(ifp, &txr, &txd, &rxr, &rxd);
+ } else {
+ /* take whatever we had at init time */
+ txr = na->num_tx_rings;
+ txd = na->num_tx_desc;
+ rxr = na->num_rx_rings;
+ rxd = na->num_rx_desc;
+ }
+
+ if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
+ na->num_rx_rings == rxr && na->num_rx_desc == rxd)
+ return 0; /* nothing changed */
+ if (netmap_verbose || na->refcount > 0) {
+ D("stored config %s: txring %d x %d, rxring %d x %d",
+ ifp->if_xname,
+ na->num_tx_rings, na->num_tx_desc,
+ na->num_rx_rings, na->num_rx_desc);
+ D("new config %s: txring %d x %d, rxring %d x %d",
+ ifp->if_xname, txr, txd, rxr, rxd);
+ }
+ if (na->refcount == 0) {
+ D("configuration changed (but fine)");
+ na->num_tx_rings = txr;
+ na->num_tx_desc = txd;
+ na->num_rx_rings = rxr;
+ na->num_rx_desc = rxd;
+ return 0;
+ }
+ D("configuration changed while active, this is bad...");
+ return 1;
+}
/*------------- memory allocator -----------------*/
#ifdef NETMAP_MEM2
@@ -124,23 +330,62 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, no_pen
#endif /* !NETMAP_MEM2 */
/*------------ end of memory allocator ----------*/
-/* Structure associated to each thread which registered an interface. */
+
+/* Structure associated to each thread which registered an interface.
+ *
+ * The first 4 fields of this structure are written by NIOCREGIF and
+ * read by poll() and NIOC?XSYNC.
+ * There is low contention among writers (actually, a correct user program
+ * should have no contention among writers) and among writers and readers,
+ * so we use a single global lock to protect the structure initialization.
+ * Since initialization involves the allocation of memory, we reuse the memory
+ * allocator lock.
+ * Read access to the structure is lock free. Readers must check that
+ * np_nifp is not NULL before using the other fields.
+ * If np_nifp is NULL initialization has not been performed, so they should
+ * return an error to userlevel.
+ *
+ * The ref_done field is used to regulate access to the refcount in the
+ * memory allocator. The refcount must be incremented at most once for
+ * each open("/dev/netmap"). The increment is performed by the first
+ * function that calls netmap_get_memory() (currently called by
+ * mmap(), NIOCGINFO and NIOCREGIF).
+ * If the refcount is incremented, it is then decremented when the
+ * private structure is destroyed.
+ */
struct netmap_priv_d {
- struct netmap_if *np_nifp; /* netmap interface descriptor. */
+ struct netmap_if * volatile np_nifp; /* netmap interface descriptor. */
struct ifnet *np_ifp; /* device for which we hold a reference */
int np_ringid; /* from the ioctl */
u_int np_qfirst, np_qlast; /* range of rings to scan */
uint16_t np_txpoll;
+
+ unsigned long ref_done; /* use with NMA_LOCK held */
};
+static int
+netmap_get_memory(struct netmap_priv_d* p)
+{
+ int error = 0;
+ NMA_LOCK();
+ if (!p->ref_done) {
+ error = netmap_memory_finalize();
+ if (!error)
+ p->ref_done = 1;
+ }
+ NMA_UNLOCK();
+ return error;
+}
+
/*
* File descriptor's private data destructor.
*
* Call nm_register(ifp,0) to stop netmap mode on the interface and
* revert to normal operation. We expect that np_ifp has not gone.
*/
+/* call with NMA_LOCK held */
static void
netmap_dtor_locked(void *data)
{
@@ -153,7 +398,8 @@ netmap_dtor_locked(void *data)
if (na->refcount <= 0) { /* last instance */
u_int i, j, lim;
- D("deleting last netmap instance for %s", ifp->if_xname);
+ if (netmap_verbose)
+ D("deleting last instance for %s", ifp->if_xname);
/*
* there is a race here with *_netmap_task() and
* netmap_poll(), which don't run under NETMAP_REG_LOCK.
@@ -180,7 +426,6 @@ netmap_dtor_locked(void *data)
selwakeuppri(&na->tx_si, PI_NET);
selwakeuppri(&na->rx_si, PI_NET);
/* release all buffers */
- NMA_LOCK();
for (i = 0; i < na->num_tx_rings + 1; i++) {
struct netmap_ring *ring = na->tx_rings[i].ring;
lim = na->tx_rings[i].nkr_num_slots;
@@ -200,30 +445,136 @@ netmap_dtor_locked(void *data)
/* XXX kqueue(9) needed; these will mirror knlist_init. */
/* knlist_destroy(&na->tx_si.si_note); */
/* knlist_destroy(&na->rx_si.si_note); */
- NMA_UNLOCK();
netmap_free_rings(na);
wakeup(na);
}
netmap_if_free(nifp);
}
+static void
+nm_if_rele(struct ifnet *ifp)
+{
+#ifndef NM_BRIDGE
+ if_rele(ifp);
+#else /* NM_BRIDGE */
+ int i, full;
+ struct nm_bridge *b;
+
+ if (strncmp(ifp->if_xname, NM_NAME, sizeof(NM_NAME) - 1)) {
+ if_rele(ifp);
+ return;
+ }
+ if (!DROP_BDG_REF(ifp))
+ return;
+ b = ifp->if_bridge;
+ BDG_LOCK(nm_bridges);
+ BDG_LOCK(b);
+ ND("want to disconnect %s from the bridge", ifp->if_xname);
+ full = 0;
+ for (i = 0; i < NM_BDG_MAXPORTS; i++) {
+ if (b->bdg_ports[i] == ifp) {
+ b->bdg_ports[i] = NULL;
+ bzero(ifp, sizeof(*ifp));
+ free(ifp, M_DEVBUF);
+ break;
+ }
+ else if (b->bdg_ports[i] != NULL)
+ full = 1;
+ }
+ BDG_UNLOCK(b);
+ if (full == 0) {
+ ND("freeing bridge %d", b - nm_bridges);
+ b->namelen = 0;
+ }
+ BDG_UNLOCK(nm_bridges);
+ if (i == NM_BDG_MAXPORTS)
+ D("ouch, cannot find ifp to remove");
+#endif /* NM_BRIDGE */
+}
static void
netmap_dtor(void *data)
{
struct netmap_priv_d *priv = data;
struct ifnet *ifp = priv->np_ifp;
- struct netmap_adapter *na = NA(ifp);
+ struct netmap_adapter *na;
- na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
- netmap_dtor_locked(data);
- na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
+ NMA_LOCK();
+ if (ifp) {
+ na = NA(ifp);
+ na->nm_lock(ifp, NETMAP_REG_LOCK, 0);
+ netmap_dtor_locked(data);
+ na->nm_lock(ifp, NETMAP_REG_UNLOCK, 0);
- if_rele(ifp);
+ nm_if_rele(ifp);
+ }
+ if (priv->ref_done) {
+ netmap_memory_deref();
+ }
+ NMA_UNLOCK();
bzero(priv, sizeof(*priv)); /* XXX for safety */
free(priv, M_DEVBUF);
}
+#ifdef __FreeBSD__
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/uma.h>
+
+static struct cdev_pager_ops saved_cdev_pager_ops;
+
+static int
+netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
+ vm_ooffset_t foff, struct ucred *cred, u_short *color)
+{
+ if (netmap_verbose)
+ D("first mmap for %p", handle);
+ return saved_cdev_pager_ops.cdev_pg_ctor(handle,
+ size, prot, foff, cred, color);
+}
+
+static void
+netmap_dev_pager_dtor(void *handle)
+{
+ saved_cdev_pager_ops.cdev_pg_dtor(handle);
+ ND("ready to release memory for %p", handle);
+}
+
+
+static struct cdev_pager_ops netmap_cdev_pager_ops = {
+ .cdev_pg_ctor = netmap_dev_pager_ctor,
+ .cdev_pg_dtor = netmap_dev_pager_dtor,
+ .cdev_pg_fault = NULL,
+};
+
+static int
+netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
+ vm_size_t objsize, vm_object_t *objp, int prot)
+{
+ vm_object_t obj;
+
+ ND("cdev %p foff %jd size %jd objp %p prot %d", cdev,
+ (intmax_t )*foff, (intmax_t )objsize, objp, prot);
+ obj = vm_pager_allocate(OBJT_DEVICE, cdev, objsize, prot, *foff,
+ curthread->td_ucred);
+ ND("returns obj %p", obj);
+ if (obj == NULL)
+ return EINVAL;
+ if (saved_cdev_pager_ops.cdev_pg_fault == NULL) {
+ ND("initialize cdev_pager_ops");
+ saved_cdev_pager_ops = *(obj->un_pager.devp.ops);
+ netmap_cdev_pager_ops.cdev_pg_fault =
+ saved_cdev_pager_ops.cdev_pg_fault;
+ };
+ obj->un_pager.devp.ops = &netmap_cdev_pager_ops;
+ *objp = obj;
+ return 0;
+}
+#endif /* __FreeBSD__ */
+
/*
* mmap(2) support for the "netmap" device.
@@ -235,6 +586,7 @@ netmap_dtor(void *data)
* Return 0 on success, -1 otherwise.
*/
+#ifdef __FreeBSD__
static int
netmap_mmap(__unused struct cdev *dev,
#if __FreeBSD_version < 900000
@@ -245,75 +597,222 @@ netmap_mmap(__unused struct cdev *dev,
#endif
)
{
+ int error = 0;
+ struct netmap_priv_d *priv;
+
if (nprot & PROT_EXEC)
return (-1); // XXX -1 or EINVAL ?
+ error = devfs_get_cdevpriv((void **)&priv);
+ if (error == EBADF) { /* called on fault, memory is initialized */
+ ND(5, "handling fault at ofs 0x%x", offset);
+ error = 0;
+ } else if (error == 0) /* make sure memory is set */
+ error = netmap_get_memory(priv);
+ if (error)
+ return (error);
+
ND("request for offset 0x%x", (uint32_t)offset);
*paddr = netmap_ofstophys(offset);
- return (0);
+ return (*paddr ? 0 : ENOMEM);
}
+static int
+netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+ if (netmap_verbose)
+ D("dev %p fflag 0x%x devtype %d td %p",
+ dev, fflag, devtype, td);
+ return 0;
+}
+
+static int
+netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+ struct netmap_priv_d *priv;
+ int error;
+
+ priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (priv == NULL)
+ return ENOMEM;
+
+ error = devfs_set_cdevpriv(priv, netmap_dtor);
+ if (error)
+ return error;
+
+ return 0;
+}
+#endif /* __FreeBSD__ */
+
/*
* Handlers for synchronization of the queues from/to the host.
- *
- * netmap_sync_to_host() passes packets up. We are called from a
- * system call in user process context, and the only contention
- * can be among multiple user threads erroneously calling
- * this routine concurrently. In principle we should not even
- * need to lock.
+ * Netmap has two operating modes:
+ * - in the default mode, the rings connected to the host stack are
+ * just another ring pair managed by userspace;
+ * - in transparent mode (XXX to be defined) incoming packets
+ * (from the host or the NIC) are marked as NS_FORWARD upon
+ * arrival, and the user application has a chance to reset the
+ * flag for packets that should be dropped.
+ * On the RXSYNC or poll(), packets in RX rings between
+ * kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
+ * to the other side.
+ * The transfer NIC --> host is relatively easy, just encapsulate
+ * into mbufs and we are done. The host --> NIC side is slightly
+ * harder because there might not be room in the tx ring so it
+ * might take a while before releasing the buffer.
+ */
+
+/*
+ * pass a chain of buffers to the host stack as coming from 'dst'
*/
static void
-netmap_sync_to_host(struct netmap_adapter *na)
+netmap_send_up(struct ifnet *dst, struct mbuf *head)
{
- struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
- struct netmap_ring *ring = kring->ring;
- struct mbuf *head = NULL, *tail = NULL, *m;
- u_int k, n, lim = kring->nkr_num_slots - 1;
+ struct mbuf *m;
- k = ring->cur;
- if (k > lim) {
- netmap_ring_reinit(kring);
- return;
+ /* send packets up, outside the lock */
+ while ((m = head) != NULL) {
+ head = head->m_nextpkt;
+ m->m_nextpkt = NULL;
+ if (netmap_verbose & NM_VERB_HOST)
+ D("sending up pkt %p size %d", m, MBUF_LEN(m));
+ NM_SEND_UP(dst, m);
}
- // na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
+}
- /* Take packets from hwcur to cur and pass them up.
+struct mbq {
+ struct mbuf *head;
+ struct mbuf *tail;
+ int count;
+};
+
+/*
+ * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
+ * Run from hwcur to cur - reserved
+ */
+static void
+netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
+{
+ /* Take packets from hwcur to cur-reserved and pass them up.
* In case of no buffers we give up. At the end of the loop,
* the queue is drained in all cases.
+ * XXX handle reserved
*/
+ int k = kring->ring->cur - kring->ring->reserved;
+ u_int n, lim = kring->nkr_num_slots - 1;
+ struct mbuf *m, *tail = q->tail;
+
+ if (k < 0)
+ k = k + kring->nkr_num_slots;
for (n = kring->nr_hwcur; n != k;) {
- struct netmap_slot *slot = &ring->slot[n];
+ struct netmap_slot *slot = &kring->ring->slot[n];
n = (n == lim) ? 0 : n + 1;
+ if ((slot->flags & NS_FORWARD) == 0 && !force)
+ continue;
if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE) {
D("bad pkt at %d len %d", n, slot->len);
continue;
}
- m = m_devget(NMB(slot), slot->len, 0, na->ifp, NULL);
+ slot->flags &= ~NS_FORWARD; // XXX needed ?
+ m = m_devget(NMB(slot), slot->len, 0, kring->na->ifp, NULL);
if (m == NULL)
break;
if (tail)
tail->m_nextpkt = m;
else
- head = m;
+ q->head = m;
tail = m;
+ q->count++;
m->m_nextpkt = NULL;
}
+ q->tail = tail;
+}
+
+/*
+ * called under main lock to send packets from the host to the NIC
+ * The host ring has packets from nr_hwcur to (cur - reserved)
+ * to be sent down. We scan the tx rings, which have just been
+ * flushed so nr_hwcur == cur. Pushing packets down means
+ * increment cur and decrement avail.
+ * XXX to be verified
+ */
+static void
+netmap_sw_to_nic(struct netmap_adapter *na)
+{
+ struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
+ struct netmap_kring *k1 = &na->tx_rings[0];
+ int i, howmany, src_lim, dst_lim;
+
+ howmany = kring->nr_hwavail; /* XXX otherwise cur - reserved - nr_hwcur */
+
+ src_lim = kring->nkr_num_slots;
+ for (i = 0; howmany > 0 && i < na->num_tx_rings; i++, k1++) {
+ ND("%d packets left to ring %d (space %d)", howmany, i, k1->nr_hwavail);
+ dst_lim = k1->nkr_num_slots;
+ while (howmany > 0 && k1->ring->avail > 0) {
+ struct netmap_slot *src, *dst, tmp;
+ src = &kring->ring->slot[kring->nr_hwcur];
+ dst = &k1->ring->slot[k1->ring->cur];
+ tmp = *src;
+ src->buf_idx = dst->buf_idx;
+ src->flags = NS_BUF_CHANGED;
+
+ dst->buf_idx = tmp.buf_idx;
+ dst->len = tmp.len;
+ dst->flags = NS_BUF_CHANGED;
+ ND("out len %d buf %d from %d to %d",
+ dst->len, dst->buf_idx,
+ kring->nr_hwcur, k1->ring->cur);
+
+ if (++kring->nr_hwcur >= src_lim)
+ kring->nr_hwcur = 0;
+ howmany--;
+ kring->nr_hwavail--;
+ if (++k1->ring->cur >= dst_lim)
+ k1->ring->cur = 0;
+ k1->ring->avail--;
+ }
+ kring->ring->cur = kring->nr_hwcur; // XXX
+ k1++;
+ }
+}
+
+/*
+ * netmap_sync_to_host() passes packets up. We are called from a
+ * system call in user process context, and the only contention
+ * can be among multiple user threads erroneously calling
+ * this routine concurrently.
+ */
+static void
+netmap_sync_to_host(struct netmap_adapter *na)
+{
+ struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
+ struct netmap_ring *ring = kring->ring;
+ u_int k, lim = kring->nkr_num_slots - 1;
+ struct mbq q = { NULL, NULL };
+
+ k = ring->cur;
+ if (k > lim) {
+ netmap_ring_reinit(kring);
+ return;
+ }
+ // na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
+
+ /* Take packets from hwcur to cur and pass them up.
+ * In case of no buffers we give up. At the end of the loop,
+ * the queue is drained in all cases.
+ */
+ netmap_grab_packets(kring, &q, 1);
kring->nr_hwcur = k;
kring->nr_hwavail = ring->avail = lim;
// na->nm_lock(na->ifp, NETMAP_CORE_UNLOCK, 0);
- /* send packets up, outside the lock */
- while ((m = head) != NULL) {
- head = head->m_nextpkt;
- m->m_nextpkt = NULL;
- if (netmap_verbose & NM_VERB_HOST)
- D("sending up pkt %p size %d", m, MBUF_LEN(m));
- NM_SEND_UP(na->ifp, m);
- }
+ netmap_send_up(na->ifp, q.head);
}
/*
@@ -323,15 +822,19 @@ netmap_sync_to_host(struct netmap_adapte
*
* This routine also does the selrecord if called from the poll handler
* (we know because td != NULL).
+ *
+ * NOTE: on linux, selrecord() is defined as a macro and uses pwait
+ * as an additional hidden argument.
*/
static void
-netmap_sync_from_host(struct netmap_adapter *na, struct thread *td)
+netmap_sync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
{
struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
struct netmap_ring *ring = kring->ring;
u_int j, n, lim = kring->nkr_num_slots;
u_int k = ring->cur, resvd = ring->reserved;
+ (void)pwait; /* disable unused warnings */
na->nm_lock(na->ifp, NETMAP_CORE_LOCK, 0);
if (k >= lim) {
netmap_ring_reinit(kring);
@@ -370,15 +873,73 @@ netmap_sync_from_host(struct netmap_adap
static int
get_ifp(const char *name, struct ifnet **ifp)
{
+#ifdef NM_BRIDGE
+ struct ifnet *iter = NULL;
+
+ do {
+ struct nm_bridge *b;
+ int i, l, cand = -1;
+
+ if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1))
+ break;
+ b = nm_find_bridge(name);
+ if (b == NULL) {
+ D("no bridges available for '%s'", name);
+ return (ENXIO);
+ }
+ /* XXX locking */
+ BDG_LOCK(b);
+ /* lookup in the local list of ports */
+ for (i = 0; i < NM_BDG_MAXPORTS; i++) {
+ iter = b->bdg_ports[i];
+ if (iter == NULL) {
+ if (cand == -1)
+ cand = i; /* potential insert point */
+ continue;
+ }
+ if (!strcmp(iter->if_xname, name)) {
+ ADD_BDG_REF(iter);
+ ND("found existing interface");
+ BDG_UNLOCK(b);
+ break;
+ }
+ }
+ if (i < NM_BDG_MAXPORTS) /* already unlocked */
+ break;
+ if (cand == -1) {
+ D("bridge full, cannot create new port");
+no_port:
+ BDG_UNLOCK(b);
+ *ifp = NULL;
+ return EINVAL;
+ }
+ ND("create new bridge port %s", name);
+ /* space for forwarding list after the ifnet */
+ l = sizeof(*iter) +
+ sizeof(struct nm_bdg_fwd)*NM_BDG_BATCH ;
+ iter = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!iter)
+ goto no_port;
+ strcpy(iter->if_xname, name);
+ bdg_netmap_attach(iter);
+ b->bdg_ports[cand] = iter;
+ iter->if_bridge = b;
+ ADD_BDG_REF(iter);
+ BDG_UNLOCK(b);
+ ND("attaching virtual bridge %p", b);
+ } while (0);
+ *ifp = iter;
+ if (! *ifp)
+#endif /* NM_BRIDGE */
*ifp = ifunit_ref(name);
if (*ifp == NULL)
return (ENXIO);
/* can do this if the capability exists and if_pspare[0]
* points to the netmap descriptor.
*/
- if ((*ifp)->if_capabilities & IFCAP_NETMAP && NA(*ifp))
+ if (NETMAP_CAPABLE(*ifp))
return 0; /* valid pointer, we hold the refcount */
- if_rele(*ifp);
+ nm_if_rele(*ifp);
return EINVAL; // not NETMAP capable
}
@@ -402,7 +963,7 @@ netmap_ring_reinit(struct netmap_kring *
u_int i, lim = kring->nkr_num_slots - 1;
int errors = 0;
- D("called for %s", kring->na->ifp->if_xname);
+ RD(10, "called for %s", kring->na->ifp->if_xname);
if (ring->cur > lim)
errors++;
for (i = 0; i <= lim; i++) {
@@ -424,9 +985,9 @@ netmap_ring_reinit(struct netmap_kring *
int pos = kring - kring->na->tx_rings;
int n = kring->na->num_tx_rings + 1;
- D("total %d errors", errors);
+ RD(10, "total %d errors", errors);
errors++;
- D("%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
+ RD(10, "%s %s[%d] reinit, cur %d -> %d avail %d -> %d",
kring->na->ifp->if_xname,
pos < n ? "TX" : "RX", pos < n ? pos : pos - n,
ring->cur, kring->nr_hwcur,
@@ -474,6 +1035,7 @@ netmap_set_ringid(struct netmap_priv_d *
priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
if (need_lock)
na->nm_lock(ifp, NETMAP_CORE_UNLOCK, 0);
+ if (netmap_verbose) {
if (ringid & NETMAP_SW_RING)
D("ringid %s set to SW RING", ifp->if_xname);
else if (ringid & NETMAP_HW_RING)
@@ -481,6 +1043,7 @@ netmap_set_ringid(struct netmap_priv_d *
priv->np_qfirst);
else
D("ringid %s set to all %d HW RINGS", ifp->if_xname, lim);
+ }
return 0;
}
@@ -498,8 +1061,8 @@ netmap_set_ringid(struct netmap_priv_d *
* Return 0 on success, errno otherwise.
*/
static int
-netmap_ioctl(__unused struct cdev *dev, u_long cmd, caddr_t data,
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-9
mailing list