svn commit: r262152 - stable/10/sys/dev/netmap
Luigi Rizzo
luigi at FreeBSD.org
Tue Feb 18 05:46:20 UTC 2014
Author: luigi
Date: Tue Feb 18 05:46:19 2014
New Revision: 262152
URL: http://svnweb.freebsd.org/changeset/base/262152
Log:
missing files from previous commit...
Added:
stable/10/sys/dev/netmap/netmap_freebsd.c (contents, props changed)
stable/10/sys/dev/netmap/netmap_generic.c (contents, props changed)
stable/10/sys/dev/netmap/netmap_mbq.c (contents, props changed)
stable/10/sys/dev/netmap/netmap_mbq.h (contents, props changed)
stable/10/sys/dev/netmap/netmap_mem2.h (contents, props changed)
stable/10/sys/dev/netmap/netmap_offloadings.c (contents, props changed)
stable/10/sys/dev/netmap/netmap_pipe.c (contents, props changed)
stable/10/sys/dev/netmap/netmap_vale.c (contents, props changed)
Added: stable/10/sys/dev/netmap/netmap_freebsd.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ stable/10/sys/dev/netmap/netmap_freebsd.c Tue Feb 18 05:46:19 2014 (r262152)
@@ -0,0 +1,655 @@
+/*
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/* $FreeBSD$ */
+
+#include <sys/types.h>
+#include <sys/module.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/poll.h> /* POLLIN, POLLOUT */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* DEV_MODULE */
+#include <sys/endian.h>
+
+#include <sys/rwlock.h>
+
+#include <vm/vm.h> /* vtophys */
+#include <vm/pmap.h> /* vtophys */
+#include <vm/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/uma.h>
+
+
+#include <sys/malloc.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <netinet/in.h> /* in6_cksum_pseudo() */
+#include <machine/in_cksum.h> /* in_pseudo(), in_cksum_hdr() */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+
+/* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
+
+rawsum_t nm_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
+{
+ /* TODO XXX please use the FreeBSD implementation for this. */
+ uint16_t *words = (uint16_t *)data;
+ int nw = len / 2;
+ int i;
+
+ for (i = 0; i < nw; i++)
+ cur_sum += be16toh(words[i]);
+
+ if (len & 1)
+ cur_sum += (data[len-1] << 8);
+
+ return cur_sum;
+}
+
+/* Fold a raw checksum: 'cur_sum' is in host byte order, while the
+ * return value is in network byte order.
+ */
+uint16_t nm_csum_fold(rawsum_t cur_sum)
+{
+ /* TODO XXX please use the FreeBSD implementation for this. */
+ while (cur_sum >> 16)
+ cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16);
+
+ return htobe16((~cur_sum) & 0xFFFF);
+}
+
+uint16_t nm_csum_ipv4(struct nm_iphdr *iph)
+{
+#if 0
+ return in_cksum_hdr((void *)iph);
+#else
+ return nm_csum_fold(nm_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
+#endif
+}
+
+void nm_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
+ size_t datalen, uint16_t *check)
+{
+ uint16_t pseudolen = datalen + iph->protocol;
+
+ /* Compute and insert the pseudo-header cheksum. */
+ *check = in_pseudo(iph->saddr, iph->daddr,
+ htobe16(pseudolen));
+ /* Compute the checksum on TCP/UDP header + payload
+ * (includes the pseudo-header).
+ */
+ *check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
+}
+
+void nm_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
+ size_t datalen, uint16_t *check)
+{
+#ifdef INET6
+ *check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
+ *check = nm_csum_fold(nm_csum_raw(data, datalen, 0));
+#else
+ static int notsupported = 0;
+ if (!notsupported) {
+ notsupported = 1;
+ D("inet6 segmentation not supported");
+ }
+#endif
+}
+
+
+/*
+ * Intercept the rx routine in the standard device driver.
+ * Second argument is non-zero to intercept, 0 to restore
+ */
+int
+netmap_catch_rx(struct netmap_adapter *na, int intercept)
+{
+ struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+ struct ifnet *ifp = na->ifp;
+
+ if (intercept) {
+ if (gna->save_if_input) {
+ D("cannot intercept again");
+ return EINVAL; /* already set */
+ }
+ gna->save_if_input = ifp->if_input;
+ ifp->if_input = generic_rx_handler;
+ } else {
+ if (!gna->save_if_input){
+ D("cannot restore");
+ return EINVAL; /* not saved */
+ }
+ ifp->if_input = gna->save_if_input;
+ gna->save_if_input = NULL;
+ }
+
+ return 0;
+}
+
+
+/*
+ * Intercept the packet steering routine in the tx path,
+ * so that we can decide which queue is used for an mbuf.
+ * Second argument is non-zero to intercept, 0 to restore.
+ * On freebsd we just intercept if_transmit.
+ */
+void
+netmap_catch_tx(struct netmap_generic_adapter *gna, int enable)
+{
+ struct netmap_adapter *na = &gna->up.up;
+ struct ifnet *ifp = na->ifp;
+
+ if (enable) {
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_transmit;
+ } else {
+ ifp->if_transmit = na->if_transmit;
+ }
+}
+
+
+/*
+ * Transmit routine used by generic_netmap_txsync(). Returns 0 on success
+ * and non-zero on error (which may be packet drops or other errors).
+ * addr and len identify the netmap buffer, m is the (preallocated)
+ * mbuf to use for transmissions.
+ *
+ * We should add a reference to the mbuf so the m_freem() at the end
+ * of the transmission does not consume resources.
+ *
+ * On FreeBSD, and on multiqueue cards, we can force the queue using
+ * if ((m->m_flags & M_FLOWID) != 0)
+ * i = m->m_pkthdr.flowid % adapter->num_queues;
+ * else
+ * i = curcpu % adapter->num_queues;
+ *
+ */
+int
+generic_xmit_frame(struct ifnet *ifp, struct mbuf *m,
+ void *addr, u_int len, u_int ring_nr)
+{
+ int ret;
+
+ m->m_len = m->m_pkthdr.len = 0;
+
+ // copy data to the mbuf
+ m_copyback(m, 0, len, addr);
+ // inc refcount. We are alone, so we can skip the atomic
+ atomic_fetchadd_int(m->m_ext.ref_cnt, 1);
+ m->m_flags |= M_FLOWID;
+ m->m_pkthdr.flowid = ring_nr;
+ m->m_pkthdr.rcvif = ifp; /* used for tx notification */
+ ret = NA(ifp)->if_transmit(ifp, m);
+ return ret;
+}
+
+
+/*
+ * The following two functions are empty until we have a generic
+ * way to extract the info from the ifp
+ */
+int
+generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
+{
+ D("called");
+ return 0;
+}
+
+
+void
+generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
+{
+ D("called");
+ *txq = netmap_generic_rings;
+ *rxq = netmap_generic_rings;
+}
+
+
+void netmap_mitigation_init(struct nm_generic_mit *mit, struct netmap_adapter *na)
+{
+ ND("called");
+ mit->mit_pending = 0;
+ mit->mit_na = na;
+}
+
+
+void netmap_mitigation_start(struct nm_generic_mit *mit)
+{
+ ND("called");
+}
+
+
+void netmap_mitigation_restart(struct nm_generic_mit *mit)
+{
+ ND("called");
+}
+
+
+int netmap_mitigation_active(struct nm_generic_mit *mit)
+{
+ ND("called");
+ return 0;
+}
+
+
+void netmap_mitigation_cleanup(struct nm_generic_mit *mit)
+{
+ ND("called");
+}
+
+
+/*
+ * In order to track whether pages are still mapped, we hook into
+ * the standard cdev_pager and intercept the constructor and
+ * destructor.
+ */
+
+struct netmap_vm_handle_t {
+ struct cdev *dev;
+ struct netmap_priv_d *priv;
+};
+
+
+static int
+netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
+ vm_ooffset_t foff, struct ucred *cred, u_short *color)
+{
+ struct netmap_vm_handle_t *vmh = handle;
+
+ if (netmap_verbose)
+ D("handle %p size %jd prot %d foff %jd",
+ handle, (intmax_t)size, prot, (intmax_t)foff);
+ dev_ref(vmh->dev);
+ return 0;
+}
+
+
+static void
+netmap_dev_pager_dtor(void *handle)
+{
+ struct netmap_vm_handle_t *vmh = handle;
+ struct cdev *dev = vmh->dev;
+ struct netmap_priv_d *priv = vmh->priv;
+
+ if (netmap_verbose)
+ D("handle %p", handle);
+ netmap_dtor(priv);
+ free(vmh, M_DEVBUF);
+ dev_rel(dev);
+}
+
+
+static int
+netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
+ int prot, vm_page_t *mres)
+{
+ struct netmap_vm_handle_t *vmh = object->handle;
+ struct netmap_priv_d *priv = vmh->priv;
+ vm_paddr_t paddr;
+ vm_page_t page;
+ vm_memattr_t memattr;
+ vm_pindex_t pidx;
+
+ ND("object %p offset %jd prot %d mres %p",
+ object, (intmax_t)offset, prot, mres);
+ memattr = object->memattr;
+ pidx = OFF_TO_IDX(offset);
+ paddr = netmap_mem_ofstophys(priv->np_mref, offset);
+ if (paddr == 0)
+ return VM_PAGER_FAIL;
+
+ if (((*mres)->flags & PG_FICTITIOUS) != 0) {
+ /*
+ * If the passed in result page is a fake page, update it with
+ * the new physical address.
+ */
+ page = *mres;
+ vm_page_updatefake(page, paddr, memattr);
+ } else {
+ /*
+ * Replace the passed in reqpage page with our own fake page and
+ * free up the all of the original pages.
+ */
+#ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */
+#define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
+#define VM_OBJECT_WLOCK VM_OBJECT_LOCK
+#endif /* VM_OBJECT_WUNLOCK */
+
+ VM_OBJECT_WUNLOCK(object);
+ page = vm_page_getfake(paddr, memattr);
+ VM_OBJECT_WLOCK(object);
+ vm_page_lock(*mres);
+ vm_page_free(*mres);
+ vm_page_unlock(*mres);
+ *mres = page;
+ vm_page_insert(page, object, pidx);
+ }
+ page->valid = VM_PAGE_BITS_ALL;
+ return (VM_PAGER_OK);
+}
+
+
+static struct cdev_pager_ops netmap_cdev_pager_ops = {
+ .cdev_pg_ctor = netmap_dev_pager_ctor,
+ .cdev_pg_dtor = netmap_dev_pager_dtor,
+ .cdev_pg_fault = netmap_dev_pager_fault,
+};
+
+
+static int
+netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
+ vm_size_t objsize, vm_object_t *objp, int prot)
+{
+ int error;
+ struct netmap_vm_handle_t *vmh;
+ struct netmap_priv_d *priv;
+ vm_object_t obj;
+
+ if (netmap_verbose)
+ D("cdev %p foff %jd size %jd objp %p prot %d", cdev,
+ (intmax_t )*foff, (intmax_t )objsize, objp, prot);
+
+ vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (vmh == NULL)
+ return ENOMEM;
+ vmh->dev = cdev;
+
+ NMG_LOCK();
+ error = devfs_get_cdevpriv((void**)&priv);
+ if (error)
+ goto err_unlock;
+ vmh->priv = priv;
+ priv->np_refcount++;
+ NMG_UNLOCK();
+
+ error = netmap_get_memory(priv);
+ if (error)
+ goto err_deref;
+
+ obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
+ &netmap_cdev_pager_ops, objsize, prot,
+ *foff, NULL);
+ if (obj == NULL) {
+ D("cdev_pager_allocate failed");
+ error = EINVAL;
+ goto err_deref;
+ }
+
+ *objp = obj;
+ return 0;
+
+err_deref:
+ NMG_LOCK();
+ priv->np_refcount--;
+err_unlock:
+ NMG_UNLOCK();
+// err:
+ free(vmh, M_DEVBUF);
+ return error;
+}
+
+
+// XXX can we remove this ?
+static int
+netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+ if (netmap_verbose)
+ D("dev %p fflag 0x%x devtype %d td %p",
+ dev, fflag, devtype, td);
+ return 0;
+}
+
+
+static int
+netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+ struct netmap_priv_d *priv;
+ int error;
+
+ (void)dev;
+ (void)oflags;
+ (void)devtype;
+ (void)td;
+
+ // XXX wait or nowait ?
+ priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
+ M_NOWAIT | M_ZERO);
+ if (priv == NULL)
+ return ENOMEM;
+
+ error = devfs_set_cdevpriv(priv, netmap_dtor);
+ if (error)
+ return error;
+
+ priv->np_refcount = 1;
+
+ return 0;
+}
+
+/******************** kqueue support ****************/
+
+/*
+ * The OS_selwakeup also needs to issue a KNOTE_UNLOCKED.
+ * We use a non-zero argument to distinguish the call from the one
+ * in kevent_scan() which instead also needs to run netmap_poll().
+ * The knote uses a global mutex for the time being. We might
+ * try to reuse the one in the si, but it is not allocated
+ * permanently so it might be a bit tricky.
+ *
+ * The *kqfilter function registers one or another f_event
+ * depending on read or write mode.
+ * In the call to f_event() td_fpop is NULL so any child function
+ * calling devfs_get_cdevpriv() would fail - and we need it in
+ * netmap_poll(). As a workaround we store priv into kn->kn_hook
+ * and pass it as first argument to netmap_poll(), which then
+ * uses the failure to tell that we are called from f_event()
+ * and do not need the selrecord().
+ */
+
+void freebsd_selwakeup(struct selinfo *si, int pri);
+
+void
+freebsd_selwakeup(struct selinfo *si, int pri)
+{
+ if (netmap_verbose)
+ D("on knote %p", &si->si_note);
+ selwakeuppri(si, pri);
+ /* use a non-zero hint to tell the notification from the
+ * call done in kqueue_scan() which uses 0
+ */
+ KNOTE_UNLOCKED(&si->si_note, 0x100 /* notification */);
+}
+
+static void
+netmap_knrdetach(struct knote *kn)
+{
+ struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
+ struct selinfo *si = priv->np_rxsi;
+
+ D("remove selinfo %p", si);
+ knlist_remove(&si->si_note, kn, 0);
+}
+
+static void
+netmap_knwdetach(struct knote *kn)
+{
+ struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
+ struct selinfo *si = priv->np_txsi;
+
+ D("remove selinfo %p", si);
+ knlist_remove(&si->si_note, kn, 0);
+}
+
+/*
+ * callback from notifies (generated externally) and our
+ * calls to kevent(). The former we just return 1 (ready)
+ * since we do not know better.
+ * In the latter we call netmap_poll and return 0/1 accordingly.
+ */
+static int
+netmap_knrw(struct knote *kn, long hint, int events)
+{
+ struct netmap_priv_d *priv;
+ int revents;
+
+ if (hint != 0) {
+ ND(5, "call from notify");
+ return 1; /* assume we are ready */
+ }
+ priv = kn->kn_hook;
+ /* the notification may come from an external thread,
+ * in which case we do not want to run the netmap_poll
+ * This should be filtered above, but check just in case.
+ */
+ if (curthread != priv->np_td) { /* should not happen */
+ RD(5, "curthread changed %p %p", curthread, priv->np_td);
+ return 1;
+ } else {
+ revents = netmap_poll((void *)priv, events, curthread);
+ return (events & revents) ? 1 : 0;
+ }
+}
+
+static int
+netmap_knread(struct knote *kn, long hint)
+{
+ return netmap_knrw(kn, hint, POLLIN);
+}
+
+static int
+netmap_knwrite(struct knote *kn, long hint)
+{
+ return netmap_knrw(kn, hint, POLLOUT);
+}
+
+static struct filterops netmap_rfiltops = {
+ .f_isfd = 1,
+ .f_detach = netmap_knrdetach,
+ .f_event = netmap_knread,
+};
+
+static struct filterops netmap_wfiltops = {
+ .f_isfd = 1,
+ .f_detach = netmap_knwdetach,
+ .f_event = netmap_knwrite,
+};
+
+
+/*
+ * This is called when a thread invokes kevent() to record
+ * a change in the configuration of the kqueue().
+ * The 'priv' should be the same as in the netmap device.
+ */
+static int
+netmap_kqfilter(struct cdev *dev, struct knote *kn)
+{
+ struct netmap_priv_d *priv;
+ int error;
+ struct netmap_adapter *na;
+ struct selinfo *si;
+ int ev = kn->kn_filter;
+
+ if (ev != EVFILT_READ && ev != EVFILT_WRITE) {
+ D("bad filter request %d", ev);
+ return 1;
+ }
+ error = devfs_get_cdevpriv((void**)&priv);
+ if (error) {
+ D("device not yet setup");
+ return 1;
+ }
+ na = priv->np_na;
+ if (na == NULL) {
+ D("no netmap adapter for this file descriptor");
+ return 1;
+ }
+ /* the si is indicated in the priv */
+ si = (ev == EVFILT_WRITE) ? priv->np_txsi : priv->np_rxsi;
+ // XXX lock(priv) ?
+ kn->kn_fop = (ev == EVFILT_WRITE) ?
+ &netmap_wfiltops : &netmap_rfiltops;
+ kn->kn_hook = priv;
+ knlist_add(&si->si_note, kn, 1);
+ // XXX unlock(priv)
+ ND("register %p %s td %p priv %p kn %p np_nifp %p kn_fp/fpop %s",
+ na, na->ifp->if_xname, curthread, priv, kn,
+ priv->np_nifp,
+ kn->kn_fp == curthread->td_fpop ? "match" : "MISMATCH");
+ return 0;
+}
+
+struct cdevsw netmap_cdevsw = {
+ .d_version = D_VERSION,
+ .d_name = "netmap",
+ .d_open = netmap_open,
+ .d_mmap_single = netmap_mmap_single,
+ .d_ioctl = netmap_ioctl,
+ .d_poll = netmap_poll,
+ .d_kqfilter = netmap_kqfilter,
+ .d_close = netmap_close,
+};
+/*--- end of kqueue support ----*/
+
+/*
+ * Kernel entry point.
+ *
+ * Initialize/finalize the module and return.
+ *
+ * Return 0 on success, errno on failure.
+ */
+static int
+netmap_loader(__unused struct module *module, int event, __unused void *arg)
+{
+ int error = 0;
+
+ switch (event) {
+ case MOD_LOAD:
+ error = netmap_init();
+ break;
+
+ case MOD_UNLOAD:
+ netmap_fini();
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+
+DEV_MODULE(netmap, netmap_loader, NULL);
Added: stable/10/sys/dev/netmap/netmap_generic.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ stable/10/sys/dev/netmap/netmap_generic.c Tue Feb 18 05:46:19 2014 (r262152)
@@ -0,0 +1,806 @@
+/*
+ * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * This module implements netmap support on top of standard,
+ * unmodified device drivers.
+ *
+ * A NIOCREGIF request is handled here if the device does not
+ * have native support. TX and RX rings are emulated as follows:
+ *
+ * NIOCREGIF
+ * We preallocate a block of TX mbufs (roughly as many as
+ * tx descriptors; the number is not critical) to speed up
+ * operation during transmissions. The refcount on most of
+ * these buffers is artificially bumped up so we can recycle
+ * them more easily. Also, the destructor is intercepted
+ * so we use it as an interrupt notification to wake up
+ * processes blocked on a poll().
+ *
+ * For each receive ring we allocate one "struct mbq"
+ * (an mbuf tailq plus a spinlock). We intercept packets
+ * (through if_input)
+ * on the receive path and put them in the mbq from which
+ * netmap receive routines can grab them.
+ *
+ * TX:
+ * in the generic_txsync() routine, netmap buffers are copied
+ * (or linked, in a future) to the preallocated mbufs
+ * and pushed to the transmit queue. Some of these mbufs
+ * (those with NS_REPORT, or otherwise every half ring)
+ * have the refcount=1, others have refcount=2.
+ * When the destructor is invoked, we take that as
+ * a notification that all mbufs up to that one in
+ * the specific ring have been completed, and generate
+ * the equivalent of a transmit interrupt.
+ *
+ * RX:
+ *
+ */
+
+#ifdef __FreeBSD__
+
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/lock.h> /* PROT_EXEC */
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */
+
+// XXX temporary - D() defined here
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#define rtnl_lock() D("rtnl_lock called");
+#define rtnl_unlock() D("rtnl_unlock called");
+#define MBUF_TXQ(m) ((m)->m_pkthdr.flowid)
+#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid)
+#define smp_mb()
+
+/*
+ * mbuf wrappers
+ */
+
+/*
+ * we allocate an EXT_PACKET
+ */
+#define netmap_get_mbuf(len) m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR|M_NOFREE)
+
+/* mbuf destructor, also need to change the type to EXT_EXTREF,
+ * add an M_NOFREE flag, and then clear the flag and
+ * chain into uma_zfree(zone_pack, mf)
+ * (or reinstall the buffer ?)
+ */
+#define SET_MBUF_DESTRUCTOR(m, fn) do { \
+ (m)->m_ext.ext_free = (void *)fn; \
+ (m)->m_ext.ext_type = EXT_EXTREF; \
+} while (0)
+
+
+#define GET_MBUF_REFCNT(m) ((m)->m_ext.ref_cnt ? *(m)->m_ext.ref_cnt : -1)
+
+
+
+#else /* linux */
+
+#include "bsd_glue.h"
+
+#include <linux/rtnetlink.h> /* rtnl_[un]lock() */
+#include <linux/ethtool.h> /* struct ethtool_ops, get_ringparam */
+#include <linux/hrtimer.h>
+
+//#define RATE /* Enables communication statistics. */
+
+//#define REG_RESET
+
+#endif /* linux */
+
+
+/* Common headers. */
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+
+
+/* ======================== usage stats =========================== */
+
+#ifdef RATE
+#define IFRATE(x) x
+struct rate_stats {
+ unsigned long txpkt;
+ unsigned long txsync;
+ unsigned long txirq;
+ unsigned long rxpkt;
+ unsigned long rxirq;
+ unsigned long rxsync;
+};
+
+struct rate_context {
+ unsigned refcount;
+ struct timer_list timer;
+ struct rate_stats new;
+ struct rate_stats old;
+};
+
+#define RATE_PRINTK(_NAME_) \
+ printk( #_NAME_ " = %lu Hz\n", (cur._NAME_ - ctx->old._NAME_)/RATE_PERIOD);
+#define RATE_PERIOD 2
+static void rate_callback(unsigned long arg)
+{
+ struct rate_context * ctx = (struct rate_context *)arg;
+ struct rate_stats cur = ctx->new;
+ int r;
+
+ RATE_PRINTK(txpkt);
+ RATE_PRINTK(txsync);
+ RATE_PRINTK(txirq);
+ RATE_PRINTK(rxpkt);
+ RATE_PRINTK(rxsync);
+ RATE_PRINTK(rxirq);
+ printk("\n");
+
+ ctx->old = cur;
+ r = mod_timer(&ctx->timer, jiffies +
+ msecs_to_jiffies(RATE_PERIOD * 1000));
+ if (unlikely(r))
+ D("[v1000] Error: mod_timer()");
+}
+
+static struct rate_context rate_ctx;
+
+#else /* !RATE */
+#define IFRATE(x)
+#endif /* !RATE */
+
+
+/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
+#define GENERIC_BUF_SIZE netmap_buf_size /* Size of the mbufs in the Tx pool. */
+
+/*
+ * Wrapper used by the generic adapter layer to notify
+ * the poller threads. Differently from netmap_rx_irq(), we check
+ * only IFCAP_NETMAP instead of NAF_NATIVE_ON to enable the irq.
+ */
+static void
+netmap_generic_irq(struct ifnet *ifp, u_int q, u_int *work_done)
+{
+ if (unlikely(!(ifp->if_capenable & IFCAP_NETMAP)))
+ return;
+
+ netmap_common_irq(ifp, q, work_done);
+}
+
+
+/* Enable/disable netmap mode for a generic network interface. */
+static int
+generic_netmap_register(struct netmap_adapter *na, int enable)
+{
+ struct ifnet *ifp = na->ifp;
+ struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)na;
+ struct mbuf *m;
+ int error;
+ int i, r;
+
+ if (!na)
+ return EINVAL;
+
+#ifdef REG_RESET
+ error = ifp->netdev_ops->ndo_stop(ifp);
+ if (error) {
+ return error;
+ }
+#endif /* REG_RESET */
+
+ if (enable) { /* Enable netmap mode. */
+ /* Init the mitigation support. */
+ gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!gna->mit) {
+ D("mitigation allocation failed");
+ error = ENOMEM;
+ goto out;
+ }
+ for (r=0; r<na->num_rx_rings; r++)
+ netmap_mitigation_init(&gna->mit[r], na);
+
+ /* Initialize the rx queue, as generic_rx_handler() can
+ * be called as soon as netmap_catch_rx() returns.
+ */
+ for (r=0; r<na->num_rx_rings; r++) {
+ mbq_safe_init(&na->rx_rings[r].rx_queue);
+ }
+
+ /*
+ * Preallocate packet buffers for the tx rings.
+ */
+ for (r=0; r<na->num_tx_rings; r++)
+ na->tx_rings[r].tx_pool = NULL;
+ for (r=0; r<na->num_tx_rings; r++) {
+ na->tx_rings[r].tx_pool = malloc(na->num_tx_desc * sizeof(struct mbuf *),
+ M_DEVBUF, M_NOWAIT | M_ZERO);
+ if (!na->tx_rings[r].tx_pool) {
+ D("tx_pool allocation failed");
+ error = ENOMEM;
+ goto free_tx_pools;
+ }
+ for (i=0; i<na->num_tx_desc; i++)
+ na->tx_rings[r].tx_pool[i] = NULL;
+ for (i=0; i<na->num_tx_desc; i++) {
+ m = netmap_get_mbuf(GENERIC_BUF_SIZE);
+ if (!m) {
+ D("tx_pool[%d] allocation failed", i);
+ error = ENOMEM;
+ goto free_tx_pools;
+ }
+ na->tx_rings[r].tx_pool[i] = m;
+ }
+ }
+ rtnl_lock();
+ /* Prepare to intercept incoming traffic. */
+ error = netmap_catch_rx(na, 1);
+ if (error) {
+ D("netdev_rx_handler_register() failed (%d)", error);
+ goto register_handler;
+ }
+ ifp->if_capenable |= IFCAP_NETMAP;
+
+ /* Make netmap control the packet steering. */
+ netmap_catch_tx(gna, 1);
+
+ rtnl_unlock();
+
+#ifdef RATE
+ if (rate_ctx.refcount == 0) {
+ D("setup_timer()");
+ memset(&rate_ctx, 0, sizeof(rate_ctx));
+ setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
+ if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
+ D("Error: mod_timer()");
+ }
+ }
+ rate_ctx.refcount++;
+#endif /* RATE */
+
+ } else if (na->tx_rings[0].tx_pool) {
+ /* Disable netmap mode. We enter here only if the previous
+ generic_netmap_register(na, 1) was successfull.
+ If it was not, na->tx_rings[0].tx_pool was set to NULL by the
+ error handling code below. */
+ rtnl_lock();
+
+ ifp->if_capenable &= ~IFCAP_NETMAP;
+
+ /* Release packet steering control. */
+ netmap_catch_tx(gna, 0);
+
+ /* Do not intercept packets on the rx path. */
+ netmap_catch_rx(na, 0);
+
+ rtnl_unlock();
+
+ /* Free the mbufs going to the netmap rings */
+ for (r=0; r<na->num_rx_rings; r++) {
+ mbq_safe_purge(&na->rx_rings[r].rx_queue);
+ mbq_safe_destroy(&na->rx_rings[r].rx_queue);
+ }
+
+ for (r=0; r<na->num_rx_rings; r++)
+ netmap_mitigation_cleanup(&gna->mit[r]);
+ free(gna->mit, M_DEVBUF);
+
+ for (r=0; r<na->num_tx_rings; r++) {
+ for (i=0; i<na->num_tx_desc; i++) {
+ m_freem(na->tx_rings[r].tx_pool[i]);
+ }
+ free(na->tx_rings[r].tx_pool, M_DEVBUF);
+ }
+
+#ifdef RATE
+ if (--rate_ctx.refcount == 0) {
+ D("del_timer()");
+ del_timer(&rate_ctx.timer);
+ }
+#endif
+ }
+
+#ifdef REG_RESET
+ error = ifp->netdev_ops->ndo_open(ifp);
+ if (error) {
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable
mailing list