svn commit: r339906 - in stable/12/sys: conf dev/netmap net
Vincenzo Maffione
vmaffione at FreeBSD.org
Tue Oct 30 08:36:38 UTC 2018
Author: vmaffione
Date: Tue Oct 30 08:36:36 2018
New Revision: 339906
URL: https://svnweb.freebsd.org/changeset/base/339906
Log:
MFC r339639:
netmap: align codebase to the current upstream (sha 8374e1a7e6941)
Changelist:
- Move large parts of VALE code to a new file and header netmap_bdg.[ch].
This is useful to reuse the code within upcoming projects.
- Improvements and bug fixes to pipes and monitors.
- Introduce nm_os_onattach(), nm_os_onenter() and nm_os_onexit() to
handle differences between FreeBSD and Linux.
- Introduce some new helper functions to handle more host rings and fake
rings (netmap_all_rings(), netmap_real_rings(), ...)
- Added new sysctl to enable/disable hw checksum in emulated netmap mode.
- nm_inject: add support for NS_MOREFRAG
Approved by: re (gjb)
Added:
stable/12/sys/dev/netmap/netmap_bdg.c
- copied unchanged from r339639, head/sys/dev/netmap/netmap_bdg.c
stable/12/sys/dev/netmap/netmap_bdg.h
- copied unchanged from r339639, head/sys/dev/netmap/netmap_bdg.h
Modified:
stable/12/sys/conf/files
stable/12/sys/dev/netmap/netmap.c
stable/12/sys/dev/netmap/netmap_freebsd.c
stable/12/sys/dev/netmap/netmap_generic.c
stable/12/sys/dev/netmap/netmap_kern.h
stable/12/sys/dev/netmap/netmap_mem2.c
stable/12/sys/dev/netmap/netmap_monitor.c
stable/12/sys/dev/netmap/netmap_pipe.c
stable/12/sys/dev/netmap/netmap_vale.c
stable/12/sys/net/netmap.h
stable/12/sys/net/netmap_user.h
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/sys/conf/files
==============================================================================
--- stable/12/sys/conf/files Tue Oct 30 05:04:18 2018 (r339905)
+++ stable/12/sys/conf/files Tue Oct 30 08:36:36 2018 (r339906)
@@ -2544,6 +2544,7 @@ dev/netmap/netmap_pipe.c optional netmap
dev/netmap/netmap_pt.c optional netmap
dev/netmap/netmap_vale.c optional netmap
dev/netmap/netmap_legacy.c optional netmap
+dev/netmap/netmap_bdg.c optional netmap
# compile-with "${NORMAL_C} -Wconversion -Wextra"
dev/nfsmb/nfsmb.c optional nfsmb pci
dev/nge/if_nge.c optional nge
Modified: stable/12/sys/dev/netmap/netmap.c
==============================================================================
--- stable/12/sys/dev/netmap/netmap.c Tue Oct 30 05:04:18 2018 (r339905)
+++ stable/12/sys/dev/netmap/netmap.c Tue Oct 30 08:36:36 2018 (r339906)
@@ -521,6 +521,9 @@ int netmap_generic_txqdisc = 1;
int netmap_generic_ringsize = 1024;
int netmap_generic_rings = 1;
+/* Non-zero to enable checksum offloading in NIC drivers */
+int netmap_generic_hwcsum = 0;
+
/* Non-zero if ptnet devices are allowed to use virtio-net headers. */
int ptnet_vnet_hdr = 1;
@@ -549,6 +552,9 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &ne
SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
"Adapter mode. 0 selects the best option available,"
"1 forces native adapter, 2 forces emulated adapter");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_hwcsum, CTLFLAG_RW, &netmap_generic_hwcsum,
+ 0, "Hardware checksums. 0 to disable checksum generation by the NIC (default),"
+ "1 to enable checksum generation by the NIC");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
0, "RX notification interval in nanoseconds");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
@@ -827,8 +833,8 @@ netmap_krings_create(struct netmap_adapter *na, u_int
}
/* account for the (possibly fake) host rings */
- n[NR_TX] = na->num_tx_rings + 1;
- n[NR_RX] = na->num_rx_rings + 1;
+ n[NR_TX] = netmap_all_rings(na, NR_TX);
+ n[NR_RX] = netmap_all_rings(na, NR_RX);
len = (n[NR_TX] + n[NR_RX]) *
(sizeof(struct netmap_kring) + sizeof(struct netmap_kring *))
@@ -930,11 +936,14 @@ netmap_krings_delete(struct netmap_adapter *na)
void
netmap_hw_krings_delete(struct netmap_adapter *na)
{
- struct mbq *q = &na->rx_rings[na->num_rx_rings]->rx_queue;
+ u_int lim = netmap_real_rings(na, NR_RX), i;
- ND("destroy sw mbq with len %d", mbq_len(q));
- mbq_purge(q);
- mbq_safe_fini(q);
+ for (i = nma_get_nrings(na, NR_RX); i < lim; i++) {
+ struct mbq *q = &NMR(na, NR_RX)[i]->rx_queue;
+ ND("destroy sw mbq with len %d", mbq_len(q));
+ mbq_purge(q);
+ mbq_safe_fini(q);
+ }
netmap_krings_delete(na);
}
@@ -1535,7 +1544,7 @@ netmap_get_na(struct nmreq_header *hdr,
goto out;
/* try to see if this is a bridge port */
- error = netmap_get_bdg_na(hdr, na, nmd, create);
+ error = netmap_get_vale_na(hdr, na, nmd, create);
if (error)
goto out;
@@ -1827,7 +1836,7 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint3
}
priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
nma_get_nrings(na, t) : 0);
- priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
+ priv->np_qlast[t] = netmap_all_rings(na, t);
ND("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW",
nm_txrx2str(t),
priv->np_qfirst[t], priv->np_qlast[t]);
@@ -2543,7 +2552,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, c
NMG_LOCK();
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)®req;
- error = netmap_get_bdg_na(hdr, &na, NULL, 0);
+ error = netmap_get_vale_na(hdr, &na, NULL, 0);
hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET;
hdr->nr_body = (uintptr_t)req;
if (na && !error) {
@@ -3336,6 +3345,12 @@ netmap_attach_common(struct netmap_adapter *na)
}
na->pdev = na; /* make sure netmap_mem_map() is called */
#endif /* __FreeBSD__ */
+ if (na->na_flags & NAF_HOST_RINGS) {
+ if (na->num_host_rx_rings == 0)
+ na->num_host_rx_rings = 1;
+ if (na->num_host_tx_rings == 0)
+ na->num_host_tx_rings = 1;
+ }
if (na->nm_krings_create == NULL) {
/* we assume that we have been called by a driver,
* since other port types all provide their own
@@ -3357,7 +3372,7 @@ netmap_attach_common(struct netmap_adapter *na)
/* no special nm_bdg_attach callback. On VALE
* attach, we need to interpose a bwrap
*/
- na->nm_bdg_attach = netmap_bwrap_attach;
+ na->nm_bdg_attach = netmap_default_bdg_attach;
#endif
return 0;
@@ -3399,10 +3414,10 @@ out:
static void
netmap_hw_dtor(struct netmap_adapter *na)
{
- if (nm_iszombie(na) || na->ifp == NULL)
+ if (na->ifp == NULL)
return;
- WNA(na->ifp) = NULL;
+ NM_DETACH_NA(na->ifp);
}
@@ -3426,10 +3441,10 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t s
}
if (arg == NULL || arg->ifp == NULL)
- goto fail;
+ return EINVAL;
ifp = arg->ifp;
- if (NA(ifp) && !NM_NA_VALID(ifp)) {
+ if (NM_NA_CLASH(ifp)) {
/* If NA(ifp) is not null but there is no valid netmap
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
@@ -3456,28 +3471,8 @@ netmap_attach_ext(struct netmap_adapter *arg, size_t s
NM_ATTACH_NA(ifp, &hwna->up);
-#ifdef linux
- if (ifp->netdev_ops) {
- /* prepare a clone of the netdev ops */
-#ifndef NETMAP_LINUX_HAVE_NETDEV_OPS
- hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
-#else
- hwna->nm_ndo = *ifp->netdev_ops;
-#endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
- }
- hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
- hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu;
- if (ifp->ethtool_ops) {
- hwna->nm_eto = *ifp->ethtool_ops;
- }
- hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
-#ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
- hwna->nm_eto.set_channels = linux_netmap_set_channels;
-#endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */
- if (arg->nm_config == NULL) {
- hwna->up.nm_config = netmap_linux_config;
- }
-#endif /* linux */
+ nm_os_onattach(ifp);
+
if (arg->nm_dtor == NULL) {
hwna->up.nm_dtor = netmap_hw_dtor;
}
@@ -3545,7 +3540,10 @@ netmap_hw_krings_create(struct netmap_adapter *na)
int ret = netmap_krings_create(na, 0);
if (ret == 0) {
/* initialize the mbq for the sw rx ring */
- mbq_safe_init(&na->rx_rings[na->num_rx_rings]->rx_queue);
+ u_int lim = netmap_real_rings(na, NR_RX), i;
+ for (i = na->num_rx_rings; i < lim; i++) {
+ mbq_safe_init(&NMR(na, NR_RX)[i]->rx_queue);
+ }
ND("initialized sw rx queue %d", na->num_rx_rings);
}
return ret;
@@ -3608,8 +3606,14 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
unsigned int txr;
struct mbq *q;
int busy;
+ u_int i;
- kring = na->rx_rings[na->num_rx_rings];
+ i = MBUF_TXQ(m);
+ if (i >= na->num_host_rx_rings) {
+ i = i % na->num_host_rx_rings;
+ }
+ kring = NMR(na, NR_RX)[nma_get_nrings(na, NR_RX) + i];
+
// XXX [Linux] we do not need this lock
// if we follow the down/configure/up protocol -gl
// mtx_lock(&na->core_lock);
@@ -3639,8 +3643,15 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
goto done;
}
- if (nm_os_mbuf_has_offld(m)) {
- RD(1, "%s drop mbuf that needs offloadings", na->name);
+ if (!netmap_generic_hwcsum) {
+ if (nm_os_mbuf_has_csum_offld(m)) {
+ RD(1, "%s drop mbuf that needs checksum offload", na->name);
+ goto done;
+ }
+ }
+
+ if (nm_os_mbuf_has_seg_offld(m)) {
+ RD(1, "%s drop mbuf that needs generic segmentation offload", na->name);
goto done;
}
@@ -3843,6 +3854,40 @@ netmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_
}
return netmap_common_irq(na, q, work_done);
+}
+
+/* set/clear native flags and if_transmit/netdev_ops */
+void
+nm_set_native_flags(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+
+ /* We do the setup for intercepting packets only if we are the
+ * first user of this adapapter. */
+ if (na->active_fds > 0) {
+ return;
+ }
+
+ na->na_flags |= NAF_NETMAP_ON;
+ nm_os_onenter(ifp);
+ nm_update_hostrings_mode(na);
+}
+
+void
+nm_clear_native_flags(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+
+ /* We undo the setup for intercepting packets only if we are the
+ * last user of this adapapter. */
+ if (na->active_fds > 0) {
+ return;
+ }
+
+ nm_update_hostrings_mode(na);
+ nm_os_onexit(ifp);
+
+ na->na_flags &= ~NAF_NETMAP_ON;
}
Copied: stable/12/sys/dev/netmap/netmap_bdg.c (from r339639, head/sys/dev/netmap/netmap_bdg.c)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ stable/12/sys/dev/netmap/netmap_bdg.c Tue Oct 30 08:36:36 2018 (r339906, copy of r339639, head/sys/dev/netmap/netmap_bdg.c)
@@ -0,0 +1,1827 @@
+/*
+ * Copyright (C) 2013-2016 Universita` di Pisa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/*
+ * This module implements the VALE switch for netmap
+
+--- VALE SWITCH ---
+
+NMG_LOCK() serializes all modifications to switches and ports.
+A switch cannot be deleted until all ports are gone.
+
+For each switch, an SX lock (RWlock on linux) protects
+deletion of ports. When configuring or deleting a new port, the
+lock is acquired in exclusive mode (after holding NMG_LOCK).
+When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
+The lock is held throughout the entire forwarding cycle,
+during which the thread may incur in a page fault.
+Hence it is important that sleepable shared locks are used.
+
+On the rx ring, the per-port lock is grabbed initially to reserve
+a number of slot in the ring, then the lock is released,
+packets are copied from source to destination, and then
+the lock is acquired again and the receive ring is updated.
+(A similar thing is done on the tx ring for NIC and host stack
+ports attached to the switch)
+
+ */
+
+/*
+ * OS-specific code that is used only within this file.
+ * Other OS-specific code that must be accessed by drivers
+ * is present in netmap_kern.h
+ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* cdevsw struct, UID, GID */
+#include <sys/sockio.h>
+#include <sys/socketvar.h> /* struct socket */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/bpf.h> /* BIOCIMMEDIATE */
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/endian.h>
+#include <sys/refcount.h>
+#include <sys/smp.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#elif defined(_WIN32)
+#include "win_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#include <dev/netmap/netmap_bdg.h>
+
+const char*
+netmap_bdg_name(struct netmap_vp_adapter *vp)
+{
+ struct nm_bridge *b = vp->na_bdg;
+ if (b == NULL)
+ return NULL;
+ return b->bdg_basename;
+}
+
+
+#ifndef CONFIG_NET_NS
+/*
+ * XXX in principle nm_bridges could be created dynamically
+ * Right now we have a static array and deletions are protected
+ * by an exclusive lock.
+ */
+static struct nm_bridge *nm_bridges;
+#endif /* !CONFIG_NET_NS */
+
+
+static int
+nm_is_id_char(const char c)
+{
+ return (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ (c == '_');
+}
+
+/* Validate the name of a VALE bridge port and return the
+ * position of the ":" character. */
+static int
+nm_vale_name_validate(const char *name)
+{
+ int colon_pos = -1;
+ int i;
+
+ if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
+ return -1;
+ }
+
+ for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
+ if (name[i] == ':') {
+ colon_pos = i;
+ break;
+ } else if (!nm_is_id_char(name[i])) {
+ return -1;
+ }
+ }
+
+ if (strlen(name) - colon_pos > IFNAMSIZ) {
+ /* interface name too long */
+ return -1;
+ }
+
+ return colon_pos;
+}
+
+/*
+ * locate a bridge among the existing ones.
+ * MUST BE CALLED WITH NMG_LOCK()
+ *
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+struct nm_bridge *
+nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
+{
+ int i, namelen;
+ struct nm_bridge *b = NULL, *bridges;
+ u_int num_bridges;
+
+ NMG_LOCK_ASSERT();
+
+ netmap_bns_getbridges(&bridges, &num_bridges);
+
+ namelen = nm_vale_name_validate(name);
+ if (namelen < 0) {
+ D("invalid bridge name %s", name ? name : NULL);
+ return NULL;
+ }
+
+ /* lookup the name, remember empty slot if there is one */
+ for (i = 0; i < num_bridges; i++) {
+ struct nm_bridge *x = bridges + i;
+
+ if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
+ if (create && b == NULL)
+ b = x; /* record empty slot */
+ } else if (x->bdg_namelen != namelen) {
+ continue;
+ } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
+ ND("found '%.*s' at %d", namelen, name, i);
+ b = x;
+ break;
+ }
+ }
+ if (i == num_bridges && b) { /* name not found, can create entry */
+ /* initialize the bridge */
+ ND("create new bridge %s with ports %d", b->bdg_basename,
+ b->bdg_active_ports);
+ b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ if (b->ht == NULL) {
+ D("failed to allocate hash table");
+ return NULL;
+ }
+ strncpy(b->bdg_basename, name, namelen);
+ b->bdg_namelen = namelen;
+ b->bdg_active_ports = 0;
+ for (i = 0; i < NM_BDG_MAXPORTS; i++)
+ b->bdg_port_index[i] = i;
+ /* set the default function */
+ b->bdg_ops = ops;
+ b->private_data = b->ht;
+ b->bdg_flags = 0;
+ NM_BNS_GET(b);
+ }
+ return b;
+}
+
+
+int
+netmap_bdg_free(struct nm_bridge *b)
+{
+ if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
+ return EBUSY;
+ }
+
+ ND("marking bridge %s as free", b->bdg_basename);
+ nm_os_free(b->ht);
+ b->bdg_ops = NULL;
+ b->bdg_flags = 0;
+ NM_BNS_PUT(b);
+ return 0;
+}
+
+
+/* remove from bridge b the ports in slots hw and sw
+ * (sw can be -1 if not needed)
+ */
+void
+netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
+{
+ int s_hw = hw, s_sw = sw;
+ int i, lim =b->bdg_active_ports;
+ uint32_t *tmp = b->tmp_bdg_port_index;
+
+ /*
+ New algorithm:
+ make a copy of bdg_port_index;
+ lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
+ in the array of bdg_port_index, replacing them with
+ entries from the bottom of the array;
+ decrement bdg_active_ports;
+ acquire BDG_WLOCK() and copy back the array.
+ */
+
+ if (netmap_verbose)
+ D("detach %d and %d (lim %d)", hw, sw, lim);
+ /* make a copy of the list of active ports, update it,
+ * and then copy back within BDG_WLOCK().
+ */
+ memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
+ for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
+ if (hw >= 0 && tmp[i] == hw) {
+ ND("detach hw %d at %d", hw, i);
+ lim--; /* point to last active port */
+ tmp[i] = tmp[lim]; /* swap with i */
+ tmp[lim] = hw; /* now this is inactive */
+ hw = -1;
+ } else if (sw >= 0 && tmp[i] == sw) {
+ ND("detach sw %d at %d", sw, i);
+ lim--;
+ tmp[i] = tmp[lim];
+ tmp[lim] = sw;
+ sw = -1;
+ } else {
+ i++;
+ }
+ }
+ if (hw >= 0 || sw >= 0) {
+ D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
+ }
+
+ BDG_WLOCK(b);
+ if (b->bdg_ops->dtor)
+ b->bdg_ops->dtor(b->bdg_ports[s_hw]);
+ b->bdg_ports[s_hw] = NULL;
+ if (s_sw >= 0) {
+ b->bdg_ports[s_sw] = NULL;
+ }
+ memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
+ b->bdg_active_ports = lim;
+ BDG_WUNLOCK(b);
+
+ ND("now %d active ports", lim);
+ netmap_bdg_free(b);
+}
+
+
+/* nm_bdg_ctl callback for VALE ports */
+int
+netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
+{
+ struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
+ struct nm_bridge *b = vpna->na_bdg;
+
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ return 0; /* nothing to do */
+ }
+ if (b) {
+ netmap_set_all_rings(na, 0 /* disable */);
+ netmap_bdg_detach_common(b, vpna->bdg_port, -1);
+ vpna->na_bdg = NULL;
+ netmap_set_all_rings(na, 1 /* enable */);
+ }
+ /* I have took reference just for attach */
+ netmap_adapter_put(na);
+ return 0;
+}
+
+int
+netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
+ struct nm_bridge *b)
+{
+ return NM_NEED_BWRAP;
+}
+
+/* Try to get a reference to a netmap adapter attached to a VALE switch.
+ * If the adapter is found (or is created), this function returns 0, a
+ * non NULL pointer is returned into *na, and the caller holds a
+ * reference to the adapter.
+ * If an adapter is not found, then no reference is grabbed and the
+ * function returns an error code, or 0 if there is just a VALE prefix
+ * mismatch. Therefore the caller holds a reference when
+ * (*na != NULL && return == 0).
+ */
+int
+netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
+{
+ char *nr_name = hdr->nr_name;
+ const char *ifname;
+ struct ifnet *ifp = NULL;
+ int error = 0;
+ struct netmap_vp_adapter *vpna, *hostna = NULL;
+ struct nm_bridge *b;
+ uint32_t i, j;
+ uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
+ int needed;
+
+ *na = NULL; /* default return value */
+
+ /* first try to see if this is a bridge port. */
+ NMG_LOCK_ASSERT();
+ if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
+ return 0; /* no error, but no VALE prefix */
+ }
+
+ b = nm_find_bridge(nr_name, create, ops);
+ if (b == NULL) {
+ ND("no bridges available for '%s'", nr_name);
+ return (create ? ENOMEM : ENXIO);
+ }
+ if (strlen(nr_name) < b->bdg_namelen) /* impossible */
+ panic("x");
+
+ /* Now we are sure that name starts with the bridge's name,
+ * lookup the port in the bridge. We need to scan the entire
+ * list. It is not important to hold a WLOCK on the bridge
+ * during the search because NMG_LOCK already guarantees
+ * that there are no other possible writers.
+ */
+
+ /* lookup in the local list of ports */
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ ND("checking %s", vpna->up.name);
+ if (!strcmp(vpna->up.name, nr_name)) {
+ netmap_adapter_get(&vpna->up);
+ ND("found existing if %s refs %d", nr_name)
+ *na = &vpna->up;
+ return 0;
+ }
+ }
+ /* not found, should we create it? */
+ if (!create)
+ return ENXIO;
+ /* yes we should, see if we have space to attach entries */
+ needed = 2; /* in some cases we only need 1 */
+ if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
+ D("bridge full %d, cannot create new port", b->bdg_active_ports);
+ return ENOMEM;
+ }
+ /* record the next two ports available, but do not allocate yet */
+ cand = b->bdg_port_index[b->bdg_active_ports];
+ cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
+ ND("+++ bridge %s port %s used %d avail %d %d",
+ b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
+
+ /*
+ * try see if there is a matching NIC with this name
+ * (after the bridge's name)
+ */
+ ifname = nr_name + b->bdg_namelen + 1;
+ ifp = ifunit_ref(ifname);
+ if (!ifp) {
+ /* Create an ephemeral virtual port.
+ * This block contains all the ephemeral-specific logic.
+ */
+
+ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /* bdg_netmap_attach creates a struct netmap_adapter */
+ error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna);
+ if (error) {
+ D("error %d", error);
+ goto out;
+ }
+ /* shortcut - we can skip get_hw_na(),
+ * ownership check and nm_bdg_attach()
+ */
+
+ } else {
+ struct netmap_adapter *hw;
+
+ /* the vale:nic syntax is only valid for some commands */
+ switch (hdr->nr_reqtype) {
+ case NETMAP_REQ_VALE_ATTACH:
+ case NETMAP_REQ_VALE_DETACH:
+ case NETMAP_REQ_VALE_POLLING_ENABLE:
+ case NETMAP_REQ_VALE_POLLING_DISABLE:
+ break; /* ok */
+ default:
+ error = EINVAL;
+ goto out;
+ }
+
+ error = netmap_get_hw_na(ifp, nmd, &hw);
+ if (error || hw == NULL)
+ goto out;
+
+ /* host adapter might not be created */
+ error = hw->nm_bdg_attach(nr_name, hw, b);
+ if (error == NM_NEED_BWRAP) {
+ error = b->bdg_ops->bwrap_attach(nr_name, hw);
+ }
+ if (error)
+ goto out;
+ vpna = hw->na_vp;
+ hostna = hw->na_hostvp;
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ /* Check if we need to skip the host rings. */
+ struct nmreq_vale_attach *areq =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ if (areq->reg.nr_mode != NR_REG_NIC_SW) {
+ hostna = NULL;
+ }
+ }
+ }
+
+ BDG_WLOCK(b);
+ vpna->bdg_port = cand;
+ ND("NIC %p to bridge port %d", vpna, cand);
+ /* bind the port to the bridge (virtual ports are not active) */
+ b->bdg_ports[cand] = vpna;
+ vpna->na_bdg = b;
+ b->bdg_active_ports++;
+ if (hostna != NULL) {
+ /* also bind the host stack to the bridge */
+ b->bdg_ports[cand2] = hostna;
+ hostna->bdg_port = cand2;
+ hostna->na_bdg = b;
+ b->bdg_active_ports++;
+ ND("host %p to bridge port %d", hostna, cand2);
+ }
+ ND("if %s refs %d", ifname, vpna->up.na_refcount);
+ BDG_WUNLOCK(b);
+ *na = &vpna->up;
+ netmap_adapter_get(*na);
+
+out:
+ if (ifp)
+ if_rele(ifp);
+
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_ATTACH.
+ */
+int
+nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter * vpna;
+ struct netmap_adapter *na = NULL;
+ struct netmap_mem_d *nmd = NULL;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ if (req->reg.nr_mem_id) {
+ nmd = netmap_mem_find(req->reg.nr_mem_id);
+ if (nmd == NULL) {
+ error = EINVAL;
+ goto unlock_exit;
+ }
+ }
+
+ /* check for existing one */
+ error = netmap_get_vale_na(hdr, &na, nmd, 0);
+ if (na) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+ error = netmap_get_vale_na(hdr, &na,
+ nmd, 1 /* create if not exists */);
+ if (error) { /* no device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ }
+
+ if (NETMAP_OWNED_BY_ANY(na)) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ if (na->nm_bdg_ctl) {
+ /* nop for VALE ports. The bwrap needs to put the hwna
+ * in netmap mode (see netmap_bwrap_bdg_ctl)
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ if (error)
+ goto unref_exit;
+ ND("registered %s to netmap-mode", na->name);
+ }
+ vpna = (struct netmap_vp_adapter *)na;
+ req->port_index = vpna->bdg_port;
+ NMG_UNLOCK();
+ return 0;
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+}
+
+static inline int
+nm_is_bwrap(struct netmap_adapter *na)
+{
+ return na->nm_register == netmap_bwrap_reg;
+}
+
+/* Process NETMAP_REQ_VALE_DETACH.
+ */
+int
+nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter *vpna;
+ struct netmap_adapter *na;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
+ if (error) { /* no device, or another bridge or user owns the device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ } else if (nm_is_bwrap(na) &&
+ ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
+ /* Don't detach a NIC with polling */
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ vpna = (struct netmap_vp_adapter *)na;
+ if (na->na_vp != vpna) {
+ /* trying to detach first attach of VALE persistent port attached
+ * to 2 bridges
+ */
+ error = EBUSY;
+ goto unref_exit;
+ }
+ nmreq_det->port_index = vpna->bdg_port;
+
+ if (na->nm_bdg_ctl) {
+ /* remove the port from bridge. The bwrap
+ * also needs to put the hwna in normal mode
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ }
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+
+}
+
+struct nm_bdg_polling_state;
+struct
+nm_bdg_kthread {
+ struct nm_kctx *nmk;
+ u_int qfirst;
+ u_int qlast;
+ struct nm_bdg_polling_state *bps;
+};
+
+struct nm_bdg_polling_state {
+ bool configured;
+ bool stopped;
+ struct netmap_bwrap_adapter *bna;
+ uint32_t mode;
+ u_int qfirst;
+ u_int qlast;
+ u_int cpu_from;
+ u_int ncpus;
+ struct nm_bdg_kthread *kthreads;
+};
+
+static void
+netmap_bwrap_polling(void *data, int is_kthread)
+{
+ struct nm_bdg_kthread *nbk = data;
+ struct netmap_bwrap_adapter *bna;
+ u_int qfirst, qlast, i;
+ struct netmap_kring **kring0, *kring;
+
+ if (!nbk)
+ return;
+ qfirst = nbk->qfirst;
+ qlast = nbk->qlast;
+ bna = nbk->bps->bna;
+ kring0 = NMR(bna->hwna, NR_RX);
+
+ for (i = qfirst; i < qlast; i++) {
+ kring = kring0[i];
+ kring->nm_notify(kring, 0);
+ }
+}
+
+static int
+nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
+{
+ struct nm_kctx_cfg kcfg;
+ int i, j;
+
+ bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
+ if (bps->kthreads == NULL)
+ return ENOMEM;
+
+ bzero(&kcfg, sizeof(kcfg));
+ kcfg.worker_fn = netmap_bwrap_polling;
+ kcfg.use_kthread = 1;
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ int all = (bps->ncpus == 1 &&
+ bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
+ int affinity = bps->cpu_from + i;
+
+ t->bps = bps;
+ t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
+ t->qlast = all ? bps->qlast : t->qfirst + 1;
+ D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
+ t->qlast);
+
+ kcfg.type = i;
+ kcfg.worker_private = t;
+ t->nmk = nm_os_kctx_create(&kcfg, NULL);
+ if (t->nmk == NULL) {
+ goto cleanup;
+ }
+ nm_os_kctx_worker_setaff(t->nmk, affinity);
+ }
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kctx_destroy(t->nmk);
+ }
+ nm_os_free(bps->kthreads);
+ return EFAULT;
+}
+
+/* A variant of ptnetmap_start_kthreads() */
+static int
+nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
+{
+ int error, i, j;
+
+ if (!bps) {
+ D("polling is not configured");
+ return EFAULT;
+ }
+ bps->stopped = false;
+
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ error = nm_os_kctx_worker_start(t->nmk);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-12
mailing list