svn commit: r231697 - in stable/9: share/man/man4 sys/conf
sys/dev/xen/blkback sys/dev/xen/netback sys/i386/include/xen
sys/kern sys/xen/interface/io
Kenneth D. Merry
ken at FreeBSD.org
Tue Feb 14 18:00:37 UTC 2012
Author: ken
Date: Tue Feb 14 18:00:37 2012
New Revision: 231697
URL: http://svn.freebsd.org/changeset/base/231697
Log:
MFC 230587, 230916
Xen netback driver rewrite.
r230587 | ken | 2012-01-26 09:35:09 -0700 (Thu, 26 Jan 2012) | 38 lines
Xen netback driver rewrite.
share/man/man4/Makefile,
share/man/man4/xnb.4,
sys/dev/xen/netback/netback.c,
sys/dev/xen/netback/netback_unit_tests.c:
Rewrote the netback driver for xen to attach properly via newbus
and work properly in both HVM and PVM mode (only HVM is tested).
Works with the in-tree FreeBSD netfront driver or the Windows
netfront driver from SuSE. Has not been extensively tested with
a Linux netfront driver. Does not implement LRO, TSO, or
polling. Includes unit tests that may be run through sysctl
after compiling with XNB_DEBUG defined.
sys/dev/xen/blkback/blkback.c,
sys/xen/interface/io/netif.h:
Comment elaboration.
sys/kern/uipc_mbuf.c:
Fix page fault in kernel mode when calling m_print() on a
null mbuf. Since m_print() is only used for debugging, there
are no performance concerns for extra error checking code.
sys/kern/subr_scanf.c:
Add the "hh" and "ll" width specifiers from C99 to scanf().
A few callers were already using "ll" even though scanf()
was handling it as "l".
Submitted by: Alan Somers <alans at spectralogic.com>
Submitted by: John Suykerbuyk <johns at spectralogic.com>
Sponsored by: Spectra Logic
Reviewed by: ken
r230916 | ken | 2012-02-02 10:54:35 -0700 (Thu, 02 Feb 2012) | 13 lines
Fix the netback driver build for i386.
netback.c: Add missing VM includes.
xen/xenvar.h,
xen/xenpmap.h: Move some XENHVM macros from <machine/xen/xenpmap.h> to
<machine/xen/xenvar.h> on i386 to match the amd64 headers.
conf/files: Add netback to the build.
Submitted by: jhb
Added:
stable/9/share/man/man4/xnb.4
- copied unchanged from r230587, head/share/man/man4/xnb.4
stable/9/sys/dev/xen/netback/netback_unit_tests.c
- copied unchanged from r230587, head/sys/dev/xen/netback/netback_unit_tests.c
Modified:
stable/9/share/man/man4/Makefile
stable/9/sys/conf/files
stable/9/sys/dev/xen/blkback/blkback.c
stable/9/sys/dev/xen/netback/netback.c
stable/9/sys/i386/include/xen/xenpmap.h
stable/9/sys/i386/include/xen/xenvar.h
stable/9/sys/kern/subr_scanf.c
stable/9/sys/kern/uipc_mbuf.c
stable/9/sys/xen/interface/io/netif.h
Directory Properties:
stable/9/ (props changed)
stable/9/share/ (props changed)
stable/9/share/man/ (props changed)
stable/9/share/man/man4/ (props changed)
stable/9/sys/ (props changed)
stable/9/sys/conf/ (props changed)
Modified: stable/9/share/man/man4/Makefile
==============================================================================
--- stable/9/share/man/man4/Makefile Tue Feb 14 17:35:44 2012 (r231696)
+++ stable/9/share/man/man4/Makefile Tue Feb 14 18:00:37 2012 (r231697)
@@ -528,6 +528,7 @@ MAN= aac.4 \
${_xen.4} \
xhci.4 \
xl.4 \
+ ${_xnb.4} \
xpt.4 \
zero.4 \
zyd.4
@@ -722,6 +723,7 @@ _urtw.4= urtw.4
_viawd.4= viawd.4
_wpi.4= wpi.4
_xen.4= xen.4
+_xnb.4= xnb.4
MLINKS+=lindev.4 full.4
.endif
Copied: stable/9/share/man/man4/xnb.4 (from r230587, head/share/man/man4/xnb.4)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ stable/9/share/man/man4/xnb.4 Tue Feb 14 18:00:37 2012 (r231697, copy of r230587, head/share/man/man4/xnb.4)
@@ -0,0 +1,134 @@
+.\" Copyright (c) 2012 Spectra Logic Corporation
+.\" All rights reserved.
+.\"
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions, and the following disclaimer,
+.\" without modification.
+.\" 2. Redistributions in binary form must reproduce at minimum a disclaimer
+.\" substantially similar to the "NO WARRANTY" disclaimer below
+.\" ("Disclaimer") and any redistribution must be conditioned upon
+.\" including a substantially similar Disclaimer requirement for further
+.\" binary redistribution.
+.\"
+.\" NO WARRANTY
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+.\" HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+.\" IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+.\" POSSIBILITY OF SUCH DAMAGES.
+.\"
+.\" Authors: Alan Somers (Spectra Logic Corporation)
+.\"
+.\" $FreeBSD$
+.\"
+
+.Dd January 6, 2012
+.Dt XNB 4
+.Os
+.Sh NAME
+.Nm xnb
+.Nd "Xen Paravirtualized Backend Ethernet Driver"
+.Sh SYNOPSIS
+To compile this driver into the kernel, place the following lines in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "options XENHVM"
+.Cd "device xenpci"
+.Ed
+.Sh DESCRIPTION
+The
+.Nm
+driver provides the back half of a paravirtualized
+.Xr xen 4
+network connection. The netback and netfront drivers appear to their
+respective operating systems as Ethernet devices linked by a crossover cable.
+Typically,
+.Nm
+will run on Domain 0 and the netfront driver will run on a guest domain.
+However, it is also possible to run
+.Nm
+on a guest domain. It may be bridged or routed to provide the netfront's
+domain access to other guest domains or to a physical network.
+.Pp
+In most respects, the
+.Nm
+device appears to the OS as an other Ethernet device. It can be configured at
+runtime entirely with
+.Xr ifconfig 8
+\&. In particular, it supports MAC changing, arbitrary MTU sizes, checksum
+offload for IP, UDP, and TCP for both receive and transmit, and TSO. However,
+see
+.Sx CAVEATS
+before enabling txcsum, rxcsum, or tso.
+.Sh SYSCTL VARIABLES
+The following read-only variables are available via
+.Xr sysctl 8 :
+.Bl -tag -width indent
+.It Va dev.xnb.%d.dump_rings
+Displays information about the ring buffers used to pass requests between the
+netfront and netback. Mostly useful for debugging, but can also be used to
+get traffic statistics.
+.It Va dev.xnb.%d.unit_test_results
+Runs a builtin suite of unit tests and displays the results. Does not affect
+the operation of the driver in any way. Note that the test suite simulates
+error conditions; this will result in error messages being printed to the
+system system log.
+.Sh CAVEATS
+Packets sent through Xennet pass over shared memory, so the protocol includes
+no form of link-layer checksum or CRC. Furthermore, Xennet drivers always
+report to their hosts that they support receive and transmit checksum
+offloading. They "offload" the checksum calculation by simply skipping it.
+That works fine for packets that are exchanged between two domains on the same
+machine. However, when a Xennet interface is bridged to a physical interface,
+a correct checksum must be attached to any packets bound for that physical
+interface. Currently, FreeBSD lacks any mechanism for an ethernet device to
+inform the OS that newly received packets are valid even though their checksums
+are not. So if the netfront driver is configured to offload checksum
+calculations, it will pass non-checksumed packets to
+.Nm
+, which must then calculate the checksum in software before passing the packet
+to the OS.
+.Pp
+For this reason, it is recommended that if
+.Nm
+is bridged to a physcal interface, then transmit checksum offloading should be
+disabled on the netfront. The Xennet protocol does not have any mechanism for
+the netback to request the netfront to do this; the operator must do it
+manually.
+.Sh SEE ALSO
+.Xr arp 4 ,
+.Xr netintro 4 ,
+.Xr ng_ether 4 ,
+.Xr ifconfig 8 ,
+.Xr xen 4
+.Sh HISTORY
+The
+.Nm
+device driver first appeared in
+.Fx 10.0
+.
+.Sh AUTHORS
+The
+.Nm
+driver was written by
+.An Alan Somers
+.Aq alans at spectralogic.com
+and
+.An John Suykerbuyk
+.Aq johns at spectralogic.com
+.Sh BUGS
+The
+.Nm
+driver does not properly checksum UDP datagrams that span more than one
+Ethernet frame. Nor does it correctly checksum IPv6 packets. To workaround
+that bug, disable transmit checksum offloading on the netfront driver.
Modified: stable/9/sys/conf/files
==============================================================================
--- stable/9/sys/conf/files Tue Feb 14 17:35:44 2012 (r231696)
+++ stable/9/sys/conf/files Tue Feb 14 18:00:37 2012 (r231697)
@@ -3493,6 +3493,7 @@ dev/xen/blkback/blkback.c optional xen |
dev/xen/console/console.c optional xen
dev/xen/console/xencons_ring.c optional xen
dev/xen/control/control.c optional xen | xenhvm
+dev/xen/netback/netback.c optional xen | xenhvm
dev/xen/netfront/netfront.c optional xen | xenhvm
dev/xen/xenpci/xenpci.c optional xenpci
dev/xen/xenpci/evtchn.c optional xenpci
Modified: stable/9/sys/dev/xen/blkback/blkback.c
==============================================================================
--- stable/9/sys/dev/xen/blkback/blkback.c Tue Feb 14 17:35:44 2012 (r231696)
+++ stable/9/sys/dev/xen/blkback/blkback.c Tue Feb 14 18:00:37 2012 (r231697)
@@ -3434,6 +3434,10 @@ xbb_shutdown(struct xbb_softc *xbb)
DPRINTF("\n");
+ /*
+ * Before unlocking mutex, set this flag to prevent other threads from
+ * getting into this function
+ */
xbb->flags |= XBBF_IN_SHUTDOWN;
mtx_unlock(&xbb->lock);
Modified: stable/9/sys/dev/xen/netback/netback.c
==============================================================================
--- stable/9/sys/dev/xen/netback/netback.c Tue Feb 14 17:35:44 2012 (r231696)
+++ stable/9/sys/dev/xen/netback/netback.c Tue Feb 14 18:00:37 2012 (r231697)
@@ -1,1596 +1,2537 @@
-/*
- * Copyright (c) 2006, Cisco Systems, Inc.
+/*-
+ * Copyright (c) 2009-2011 Spectra Logic Corporation
* All rights reserved.
*
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
* are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ * substantially similar to the "NO WARRANTY" disclaimer below
+ * ("Disclaimer") and any redistribution must be conditioned upon
+ * including a substantially similar Disclaimer requirement for further
+ * binary redistribution.
*
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ *
+ * Authors: Justin T. Gibbs (Spectra Logic Corporation)
+ * Alan Somers (Spectra Logic Corporation)
+ * John Suykerbuyk (Spectra Logic Corporation)
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+
+/**
+ * \file netback.c
+ *
+ * \brief Device driver supporting the vending of network access
+ * from this FreeBSD domain to other domains.
+ */
+#include "opt_inet.h"
+#include "opt_global.h"
+
#include "opt_sctp.h"
#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/sockio.h>
-#include <sys/mbuf.h>
-#include <sys/malloc.h>
#include <sys/kernel.h>
-#include <sys/socket.h>
-#include <sys/queue.h>
-#include <sys/taskqueue.h>
-#include <sys/module.h>
#include <sys/bus.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_arp.h>
-#include <net/if_types.h>
#include <net/ethernet.h>
-#include <net/if_bridgevar.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/if_types.h>
-#include <netinet/in_systm.h>
#include <netinet/in.h>
-#include <netinet/in_var.h>
#include <netinet/ip.h>
+#include <netinet/if_ether.h>
+#if __FreeBSD_version >= 700000
#include <netinet/tcp.h>
-#include <netinet/udp.h>
-#ifdef SCTP
-#include <netinet/sctp.h>
-#include <netinet/sctp_crc32.h>
#endif
+#include <netinet/ip_icmp.h>
+#include <netinet/udp.h>
+#include <machine/in_cksum.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
-#include <machine/in_cksum.h>
-#include <machine/xen-os.h>
-#include <machine/hypervisor.h>
-#include <machine/hypervisor-ifs.h>
-#include <machine/xen_intr.h>
-#include <machine/evtchn.h>
-#include <machine/xenbus.h>
-#include <machine/gnttab.h>
-#include <machine/xen-public/memory.h>
-#include <dev/xen/xenbus/xenbus_comms.h>
+#include <machine/_inttypes.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
+
+#include <xen/evtchn.h>
+#include <xen/xen_intr.h>
+#include <xen/interface/io/netif.h>
+#include <xen/xenbus/xenbusvar.h>
+/*--------------------------- Compile-time Tunables --------------------------*/
+
+/*---------------------------------- Macros ----------------------------------*/
+/**
+ * Custom malloc type for all driver allocations.
+ */
+static MALLOC_DEFINE(M_XENNETBACK, "xnb", "Xen Net Back Driver Data");
-#ifdef XEN_NETBACK_DEBUG
-#define DPRINTF(fmt, args...) \
- printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#define XNB_SG 1 /* netback driver supports feature-sg */
+#define XNB_GSO_TCPV4 1 /* netback driver supports feature-gso-tcpv4 */
+#define XNB_RX_COPY 1 /* netback driver supports feature-rx-copy */
+#define XNB_RX_FLIP 0 /* netback driver does not support feature-rx-flip */
+
+#undef XNB_DEBUG
+#define XNB_DEBUG /* hardcode on during development */
+
+#ifdef XNB_DEBUG
+#define DPRINTF(fmt, args...) \
+ printf("xnb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
#else
-#define DPRINTF(fmt, args...) ((void)0)
+#define DPRINTF(fmt, args...) do {} while (0)
#endif
-#ifdef XEN_NETBACK_DEBUG_LOTS
-#define DDPRINTF(fmt, args...) \
- printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
-#define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
-#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
-#else
-#define DDPRINTF(fmt, args...) ((void)0)
-#define DPRINTF_MBUF(_m) ((void)0)
-#define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
+/* Default length for stack-allocated grant tables */
+#define GNTTAB_LEN (64)
+
+/* Features supported by all backends. TSO and LRO can be negotiated */
+#define XNB_CSUM_FEATURES (CSUM_TCP | CSUM_UDP)
+
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+/**
+ * Two argument version of the standard macro. Second argument is a tentative
+ * value of req_cons
+ */
+#define RING_HAS_UNCONSUMED_REQUESTS_2(_r, cons) ({ \
+ unsigned int req = (_r)->sring->req_prod - cons; \
+ unsigned int rsp = RING_SIZE(_r) - \
+ (cons - (_r)->rsp_prod_pvt); \
+ req < rsp ? req : rsp; \
+})
+
+#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
+#define virt_to_offset(x) ((x) & (PAGE_SIZE - 1))
+
+/**
+ * Predefined array type of grant table copy descriptors. Used to pass around
+ * statically allocated memory structures.
+ */
+typedef struct gnttab_copy gnttab_copy_table[GNTTAB_LEN];
+
+/*--------------------------- Forward Declarations ---------------------------*/
+struct xnb_softc;
+struct xnb_pkt;
+
+static void xnb_attach_failed(struct xnb_softc *xnb,
+ int err, const char *fmt, ...)
+ __printflike(3,4);
+static int xnb_shutdown(struct xnb_softc *xnb);
+static int create_netdev(device_t dev);
+static int xnb_detach(device_t dev);
+static int xen_net_read_mac(device_t dev, uint8_t mac[]);
+static int xnb_ifmedia_upd(struct ifnet *ifp);
+static void xnb_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
+static void xnb_intr(void *arg);
+static int xnb_send(netif_rx_back_ring_t *rxb, domid_t otherend,
+ const struct mbuf *mbufc, gnttab_copy_table gnttab);
+static int xnb_recv(netif_tx_back_ring_t *txb, domid_t otherend,
+ struct mbuf **mbufc, struct ifnet *ifnet,
+ gnttab_copy_table gnttab);
+static int xnb_ring2pkt(struct xnb_pkt *pkt,
+ const netif_tx_back_ring_t *tx_ring,
+ RING_IDX start);
+static void xnb_txpkt2rsp(const struct xnb_pkt *pkt,
+ netif_tx_back_ring_t *ring, int error);
+static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp);
+static int xnb_txpkt2gnttab(const struct xnb_pkt *pkt,
+ const struct mbuf *mbufc,
+ gnttab_copy_table gnttab,
+ const netif_tx_back_ring_t *txb,
+ domid_t otherend_id);
+static void xnb_update_mbufc(struct mbuf *mbufc,
+ const gnttab_copy_table gnttab, int n_entries);
+static int xnb_mbufc2pkt(const struct mbuf *mbufc,
+ struct xnb_pkt *pkt,
+ RING_IDX start, int space);
+static int xnb_rxpkt2gnttab(const struct xnb_pkt *pkt,
+ const struct mbuf *mbufc,
+ gnttab_copy_table gnttab,
+ const netif_rx_back_ring_t *rxb,
+ domid_t otherend_id);
+static int xnb_rxpkt2rsp(const struct xnb_pkt *pkt,
+ const gnttab_copy_table gnttab, int n_entries,
+ netif_rx_back_ring_t *ring);
+static void xnb_add_mbuf_cksum(struct mbuf *mbufc);
+static void xnb_stop(struct xnb_softc*);
+static int xnb_ioctl(struct ifnet*, u_long, caddr_t);
+static void xnb_start_locked(struct ifnet*);
+static void xnb_start(struct ifnet*);
+static void xnb_ifinit_locked(struct xnb_softc*);
+static void xnb_ifinit(void*);
+#ifdef XNB_DEBUG
+static int xnb_unit_test_main(SYSCTL_HANDLER_ARGS);
+static int xnb_dump_rings(SYSCTL_HANDLER_ARGS);
#endif
+/*------------------------------ Data Structures -----------------------------*/
-#define WPRINTF(fmt, args...) \
- printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
-#define BUG_ON PANIC_IF
+/**
+ * Representation of a xennet packet. Simplified version of a packet as
+ * stored in the Xen tx ring. Applicable to both RX and TX packets
+ */
+struct xnb_pkt{
+ /**
+ * Array index of the first data-bearing (eg, not extra info) entry
+ * for this packet
+ */
+ RING_IDX car;
+
+ /**
+ * Array index of the second data-bearing entry for this packet.
+ * Invalid if the packet has only one data-bearing entry. If the
+ * packet has more than two data-bearing entries, then the second
+ * through the last will be sequential modulo the ring size
+ */
+ RING_IDX cdr;
+
+ /**
+ * Optional extra info. Only valid if flags contains
+ * NETTXF_extra_info. Note that extra.type will always be
+ * XEN_NETIF_EXTRA_TYPE_GSO. Currently, no known netfront or netback
+ * driver will ever set XEN_NETIF_EXTRA_TYPE_MCAST_*
+ */
+ netif_extra_info_t extra;
+
+ /** Size of entire packet in bytes. */
+ uint16_t size;
-#define IFNAME(_np) (_np)->ifp->if_xname
+ /** The size of the first entry's data in bytes */
+ uint16_t car_size;
-#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
-#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+ /**
+ * Either NETTXF_ or NETRXF_ flags. Note that the flag values are
+ * not the same for TX and RX packets
+ */
+ uint16_t flags;
+
+ /**
+ * The number of valid data-bearing entries (either netif_tx_request's
+ * or netif_rx_response's) in the packet. If this is 0, it means the
+ * entire packet is invalid.
+ */
+ uint16_t list_len;
-struct ring_ref {
- vm_offset_t va;
- grant_handle_t handle;
- uint64_t bus_addr;
+ /** There was an error processing the packet */
+ uint8_t error;
};
-typedef struct netback_info {
+/** xnb_pkt method: initialize it */
+static inline void
+xnb_pkt_initialize(struct xnb_pkt *pxnb)
+{
+ bzero(pxnb, sizeof(*pxnb));
+}
- /* Schedule lists */
- STAILQ_ENTRY(netback_info) next_tx;
- STAILQ_ENTRY(netback_info) next_rx;
- int on_tx_sched_list;
- int on_rx_sched_list;
-
- struct xenbus_device *xdev;
- XenbusState frontend_state;
-
- domid_t domid;
- int handle;
- char *bridge;
-
- int rings_connected;
- struct ring_ref tx_ring_ref;
- struct ring_ref rx_ring_ref;
- netif_tx_back_ring_t tx;
- netif_rx_back_ring_t rx;
- evtchn_port_t evtchn;
- int irq;
- void *irq_cookie;
+/** xnb_pkt method: mark the packet as valid */
+static inline void
+xnb_pkt_validate(struct xnb_pkt *pxnb)
+{
+ pxnb->error = 0;
+};
- struct ifnet *ifp;
- int ref_cnt;
+/** xnb_pkt method: mark the packet as invalid */
+static inline void
+xnb_pkt_invalidate(struct xnb_pkt *pxnb)
+{
+ pxnb->error = 1;
+};
- device_t ndev;
- int attached;
-} netif_t;
-
-
-#define MAX_PENDING_REQS 256
-#define PKT_PROT_LEN 64
-
-static struct {
- netif_tx_request_t req;
- netif_t *netif;
-} pending_tx_info[MAX_PENDING_REQS];
-static uint16_t pending_ring[MAX_PENDING_REQS];
-typedef unsigned int PEND_RING_IDX;
-#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
-static PEND_RING_IDX pending_prod, pending_cons;
-#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
-
-static unsigned long mmap_vstart;
-#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
-
-/* Freed TX mbufs get batched on this ring before return to pending_ring. */
-static uint16_t dealloc_ring[MAX_PENDING_REQS];
-static PEND_RING_IDX dealloc_prod, dealloc_cons;
-
-static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
-static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
-static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
-
-static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
-static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
-static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
-
-static struct task net_tx_task, net_rx_task;
-static struct callout rx_task_callout;
-
-static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
- STAILQ_HEAD_INITIALIZER(tx_sched_list);
-static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
- STAILQ_HEAD_INITIALIZER(rx_sched_list);
-static struct mtx tx_sched_list_lock;
-static struct mtx rx_sched_list_lock;
-
-static int vif_unit_maker = 0;
-
-/* Protos */
-static void netback_start(struct ifnet *ifp);
-static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
-static int vif_add_dev(struct xenbus_device *xdev);
-static void disconnect_rings(netif_t *netif);
+/** xnb_pkt method: Check whether the packet is valid */
+static inline int
+xnb_pkt_is_valid(const struct xnb_pkt *pxnb)
+{
+ return (! pxnb->error);
+}
+
+#ifdef XNB_DEBUG
+/** xnb_pkt method: print the packet's contents in human-readable format*/
+static void __unused
+xnb_dump_pkt(const struct xnb_pkt *pkt) {
+ if (pkt == NULL) {
+ DPRINTF("Was passed a null pointer.\n");
+ return;
+ }
+ DPRINTF("pkt address= %p\n", pkt);
+ DPRINTF("pkt->size=%d\n", pkt->size);
+ DPRINTF("pkt->car_size=%d\n", pkt->car_size);
+ DPRINTF("pkt->flags=0x%04x\n", pkt->flags);
+ DPRINTF("pkt->list_len=%d\n", pkt->list_len);
+ /* DPRINTF("pkt->extra"); TODO */
+ DPRINTF("pkt->car=%d\n", pkt->car);
+ DPRINTF("pkt->cdr=%d\n", pkt->cdr);
+ DPRINTF("pkt->error=%d\n", pkt->error);
+}
+#endif /* XNB_DEBUG */
-#ifdef XEN_NETBACK_DEBUG_LOTS
-/* Debug code to display the contents of an mbuf */
static void
-print_mbuf(struct mbuf *m, int max)
+xnb_dump_txreq(RING_IDX idx, const struct netif_tx_request *txreq)
{
- int i, j=0;
- printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
- for (; m; m = m->m_next) {
- unsigned char *d = m->m_data;
- for (i=0; i < m->m_len; i++) {
- if (max && j == max)
- break;
- if ((j++ % 16) == 0)
- printf("\n%04x:", j);
- printf(" %02x", d[i]);
- }
+ if (txreq != NULL) {
+ DPRINTF("netif_tx_request index =%u\n", idx);
+ DPRINTF("netif_tx_request.gref =%u\n", txreq->gref);
+ DPRINTF("netif_tx_request.offset=%hu\n", txreq->offset);
+ DPRINTF("netif_tx_request.flags =%hu\n", txreq->flags);
+ DPRINTF("netif_tx_request.id =%hu\n", txreq->id);
+ DPRINTF("netif_tx_request.size =%hu\n", txreq->size);
}
- printf("\n");
}
-#endif
-#define MAX_MFN_ALLOC 64
-static unsigned long mfn_list[MAX_MFN_ALLOC];
-static unsigned int alloc_index = 0;
+/**
+ * \brief Configuration data for a shared memory request ring
+ * used to communicate with the front-end client of this
+ * this driver.
+ */
+struct xnb_ring_config {
+ /**
+ * Runtime structures for ring access. Unfortunately, TX and RX rings
+ * use different data structures, and that cannot be changed since it
+ * is part of the interdomain protocol.
+ */
+ union{
+ netif_rx_back_ring_t rx_ring;
+ netif_tx_back_ring_t tx_ring;
+ } back_ring;
+
+ /**
+ * The device bus address returned by the hypervisor when
+ * mapping the ring and required to unmap it when a connection
+ * is torn down.
+ */
+ uint64_t bus_addr;
+
+ /** The pseudo-physical address where ring memory is mapped.*/
+ uint64_t gnt_addr;
-static unsigned long
-alloc_mfn(void)
-{
- unsigned long mfn = 0;
- struct xen_memory_reservation reservation = {
- .extent_start = mfn_list,
- .nr_extents = MAX_MFN_ALLOC,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
- if ( unlikely(alloc_index == 0) )
- alloc_index = HYPERVISOR_memory_op(
- XENMEM_increase_reservation, &reservation);
- if ( alloc_index != 0 )
- mfn = mfn_list[--alloc_index];
- return mfn;
-}
+ /** KVA address where ring memory is mapped. */
+ vm_offset_t va;
-static unsigned long
-alloc_empty_page_range(unsigned long nr_pages)
+ /**
+ * Grant table handles, one per-ring page, returned by the
+ * hyperpervisor upon mapping of the ring and required to
+ * unmap it when a connection is torn down.
+ */
+ grant_handle_t handle;
+
+ /** The number of ring pages mapped for the current connection. */
+ unsigned ring_pages;
+
+ /**
+ * The grant references, one per-ring page, supplied by the
+ * front-end, allowing us to reference the ring pages in the
+ * front-end's domain and to map these pages into our own domain.
+ */
+ grant_ref_t ring_ref;
+};
+
+/**
+ * Per-instance connection state flags.
+ */
+typedef enum
{
- void *pages;
- int i = 0, j = 0;
- multicall_entry_t mcl[17];
- unsigned long mfn_list[16];
- struct xen_memory_reservation reservation = {
- .extent_start = mfn_list,
- .nr_extents = 0,
- .address_bits = 0,
- .extent_order = 0,
- .domid = DOMID_SELF
- };
+ /** Communication with the front-end has been established. */
+ XNBF_RING_CONNECTED = 0x01,
- pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
- if (pages == NULL)
- return 0;
+ /**
+ * Front-end requests exist in the ring and are waiting for
+ * xnb_xen_req objects to free up.
+ */
+ XNBF_RESOURCE_SHORTAGE = 0x02,
+
+ /** Connection teardown has started. */
+ XNBF_SHUTDOWN = 0x04,
- memset(mcl, 0, sizeof(mcl));
+ /** A thread is already performing shutdown processing. */
+ XNBF_IN_SHUTDOWN = 0x08
+} xnb_flag_t;
- while (i < nr_pages) {
- unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+/**
+ * Types of rings. Used for array indices and to identify a ring's control
+ * data structure type
+ */
+typedef enum{
+ XNB_RING_TYPE_TX = 0, /* ID of TX rings, used for array indices */
+ XNB_RING_TYPE_RX = 1, /* ID of RX rings, used for array indices */
+ XNB_NUM_RING_TYPES
+} xnb_ring_type_t;
- mcl[j].op = __HYPERVISOR_update_va_mapping;
- mcl[j].args[0] = va;
+/**
+ * Per-instance configuration data.
+ */
+struct xnb_softc {
+ /** NewBus device corresponding to this instance. */
+ device_t dev;
+
+ /* Media related fields */
+
+ /** Generic network media state */
+ struct ifmedia sc_media;
+
+ /** Media carrier info */
+ struct ifnet *xnb_ifp;
+
+ /** Our own private carrier state */
+ unsigned carrier;
+
+ /** Device MAC Address */
+ uint8_t mac[ETHER_ADDR_LEN];
+
+ /* Xen related fields */
+
+ /**
+ * \brief The netif protocol abi in effect.
+ *
+ * There are situations where the back and front ends can
+ * have a different, native abi (e.g. intel x86_64 and
+ * 32bit x86 domains on the same machine). The back-end
+ * always accomodates the front-end's native abi. That
+ * value is pulled from the XenStore and recorded here.
+ */
+ int abi;
- mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+ /**
+ * Name of the bridge to which this VIF is connected, if any
+ * This field is dynamically allocated by xenbus and must be free()ed
+ * when no longer needed
+ */
+ char *bridge;
- xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
+ /** The interrupt driven even channel used to signal ring events. */
+ evtchn_port_t evtchn;
- if (j == 16 || i == nr_pages) {
- mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
+ /** Xen device handle.*/
+ long handle;
- reservation.nr_extents = j;
+ /** IRQ mapping for the communication ring event channel. */
+ int irq;
+
+ /**
+ * \brief Cached value of the front-end's domain id.
+ *
+ * This value is used at once for each mapped page in
+ * a transaction. We cache it to avoid incuring the
+ * cost of an ivar access every time this is needed.
+ */
+ domid_t otherend_id;
- mcl[j].op = __HYPERVISOR_memory_op;
- mcl[j].args[0] = XENMEM_decrease_reservation;
- mcl[j].args[1] = (unsigned long)&reservation;
-
- (void)HYPERVISOR_multicall(mcl, j+1);
+ /**
+ * Undocumented frontend feature. Has something to do with
+ * scatter/gather IO
+ */
+ uint8_t can_sg;
+ /** Undocumented frontend feature */
+ uint8_t gso;
+ /** Undocumented frontend feature */
+ uint8_t gso_prefix;
+ /** Can checksum TCP/UDP over IPv4 */
+ uint8_t ip_csum;
+
+ /* Implementation related fields */
+ /**
+ * Preallocated grant table copy descriptor for RX operations.
+ * Access must be protected by rx_lock
+ */
+ gnttab_copy_table rx_gnttab;
- mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
- j = 0;
- }
- }
+ /**
+ * Preallocated grant table copy descriptor for TX operations.
+ * Access must be protected by tx_lock
+ */
+ gnttab_copy_table tx_gnttab;
- return (unsigned long)pages;
-}
+#ifdef XENHVM
+ /**
+ * Resource representing allocated physical address space
+ * associated with our per-instance kva region.
+ */
+ struct resource *pseudo_phys_res;
-#ifdef XEN_NETBACK_FIXUP_CSUM
-static void
-fixup_checksum(struct mbuf *m)
-{
- struct ether_header *eh = mtod(m, struct ether_header *);
- struct ip *ip = (struct ip *)(eh + 1);
- int iphlen = ip->ip_hl << 2;
- int iplen = ntohs(ip->ip_len);
-
- if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
- struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
- th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(IPPROTO_TCP + (iplen - iphlen)));
- th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
- m->m_pkthdr.csum_flags &= ~CSUM_TCP;
-#ifdef SCTP
- } else if (sw_csum & CSUM_SCTP) {
- sctp_delayed_cksum(m, iphlen);
- sw_csum &= ~CSUM_SCTP;
-#endif
- } else {
- u_short csum;
- struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
- uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
- htons(IPPROTO_UDP + (iplen - iphlen)));
- if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
- csum = 0xffff;
- uh->uh_sum = csum;
- m->m_pkthdr.csum_flags &= ~CSUM_UDP;
- }
-}
+ /** Resource id for allocated physical address space. */
+ int pseudo_phys_res_id;
#endif
-/* Add the interface to the specified bridge */
-static int
-add_to_bridge(struct ifnet *ifp, char *bridge)
-{
- struct ifdrv ifd;
- struct ifbreq ifb;
- struct ifnet *ifp_bridge = ifunit(bridge);
+ /** Ring mapping and interrupt configuration data. */
+ struct xnb_ring_config ring_configs[XNB_NUM_RING_TYPES];
- if (!ifp_bridge)
- return ENOENT;
+ /**
+ * Global pool of kva used for mapping remote domain ring
+ * and I/O transaction data.
+ */
+ vm_offset_t kva;
- bzero(&ifd, sizeof(ifd));
- bzero(&ifb, sizeof(ifb));
+ /** Psuedo-physical address corresponding to kva. */
+ uint64_t gnt_base_addr;
- strcpy(ifb.ifbr_ifsname, ifp->if_xname);
- strcpy(ifd.ifd_name, ifp->if_xname);
- ifd.ifd_cmd = BRDGADD;
- ifd.ifd_len = sizeof(ifb);
- ifd.ifd_data = &ifb;
+ /** Various configuration and state bit flags. */
+ xnb_flag_t flags;
- return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
-
-}
+ /** Mutex protecting per-instance data in the receive path. */
+ struct mtx rx_lock;
-static int
-netif_create(int handle, struct xenbus_device *xdev, char *bridge)
-{
- netif_t *netif;
- struct ifnet *ifp;
+ /** Mutex protecting per-instance data in the softc structure. */
+ struct mtx sc_lock;
- netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
- if (!netif)
- return ENOMEM;
+ /** Mutex protecting per-instance data in the transmit path. */
+ struct mtx tx_lock;
- netif->ref_cnt = 1;
- netif->handle = handle;
- netif->domid = xdev->otherend_id;
- netif->xdev = xdev;
- netif->bridge = bridge;
- xdev->data = netif;
-
- /* Set up ifnet structure */
- ifp = netif->ifp = if_alloc(IFT_ETHER);
- if (!ifp) {
- if (bridge)
- free(bridge, M_DEVBUF);
- free(netif, M_DEVBUF);
- return ENOMEM;
- }
+ /** The size of the global kva pool. */
+ int kva_size;
+};
- ifp->if_softc = netif;
- if_initname(ifp, "vif",
- atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
- ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
- ifp->if_output = ether_output;
- ifp->if_start = netback_start;
- ifp->if_ioctl = netback_ioctl;
- ifp->if_mtu = ETHERMTU;
- ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
-
- DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);
+/*---------------------------- Debugging functions ---------------------------*/
+#ifdef XNB_DEBUG
+static void __unused
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-9
mailing list