svn commit: r322489 - stable/11/sys/dev/hyperv/netvsc
Sepherosa Ziehau
sephe at FreeBSD.org
Mon Aug 14 06:29:26 UTC 2017
Author: sephe
Date: Mon Aug 14 06:29:24 2017
New Revision: 322489
URL: https://svnweb.freebsd.org/changeset/base/322489
Log:
MFC 322299
hyperv/hn: Implement transparent mode network VF.
How network VF works with hn(4) on Hyper-V in transparent mode:
- Each network VF has a cooresponding hn(4).
- The network VF and the it's cooresponding hn(4) have the same hardware
address.
- Once the network VF is attached, the cooresponding hn(4) waits several
seconds to make sure that the network VF attach routing completes, then:
o Set the intersection of the network VF's if_capabilities and the
cooresponding hn(4)'s if_capabilities to the cooresponding hn(4)'s
if_capabilities. And adjust the cooresponding hn(4) if_capable and
if_hwassist accordingly. (*)
o Make sure that the cooresponding hn(4)'s TSO parameters meet the
constraints posed by both the network VF and the cooresponding hn(4).
(*)
o The network VF's if_input is overridden. The overriding if_input
changes the input packet's rcvif to the cooreponding hn(4). The
network layers are tricked into thinking that all packets are
neceived by the cooresponding hn(4).
o If the cooresponding hn(4) was brought up, bring up the network VF.
The transmission dispatched to the cooresponding hn(4) are
redispatched to the network VF.
o Bringing down the cooresponding hn(4) also brings down the network
VF.
o All IOCTLs issued to the cooresponding hn(4) are pass-through'ed to
the network VF; the cooresponding hn(4) changes its internal state
if necessary.
o The media status of the cooresponding hn(4) solely relies on the
network VF.
o If there are multicast filters on the cooresponding hn(4), allmulti
will be enabled on the network VF. (**)
- Once the network VF is detached. Undo all damages did to the
cooresponding hn(4) in the above item.
NOTE:
No operation should be issued directly to the network VF, if the
network VF transparent mode is enabled. The network VF transparent mode
can be enabled by setting tunable hw.hn.vf_transparent to 1. The network
VF transparent mode is _not_ enabled by default, as of this commit.
The benefit of the network VF transparent mode is that the network VF
attachment and detachment are transparent to all network layers; e.g. live
migration detaches and reattaches the network VF.
The major drawbacks of the network VF transparent mode:
- The netmap(4) support is lost, even if the VF supports it.
- ALTQ does not work, since if_start method cannot be properly supported.
(*)
These decisions were made so that things will not be messed up too much
during the transition period.
(**)
This does _not_ need to go through the fancy multicast filter management
stuffs like what vlan(4) has, at least currently:
- As of this write, multicast does not work in Azure.
- As of this write, multicast packets go through the cooresponding hn(4).
Sponsored by: Microsoft
Differential Revision: https://reviews.freebsd.org/D11803
Modified:
stable/11/sys/dev/hyperv/netvsc/if_hn.c
stable/11/sys/dev/hyperv/netvsc/if_hnreg.h
stable/11/sys/dev/hyperv/netvsc/if_hnvar.h
Directory Properties:
stable/11/ (props changed)
Modified: stable/11/sys/dev/hyperv/netvsc/if_hn.c
==============================================================================
--- stable/11/sys/dev/hyperv/netvsc/if_hn.c Mon Aug 14 06:00:50 2017 (r322488)
+++ stable/11/sys/dev/hyperv/netvsc/if_hn.c Mon Aug 14 06:29:24 2017 (r322489)
@@ -123,6 +123,8 @@ __FBSDID("$FreeBSD$");
#define HN_VFMAP_SIZE_DEF 8
+#define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */
+
/* YYY should get it from the underlying channel */
#define HN_TX_DESC_CNT 512
@@ -263,6 +265,7 @@ static void hn_ifnet_event(void *, struct ifnet *, i
static void hn_ifaddr_event(void *, struct ifnet *);
static void hn_ifnet_attevent(void *, struct ifnet *);
static void hn_ifnet_detevent(void *, struct ifnet *);
+static void hn_ifnet_lnkevent(void *, struct ifnet *, int);
static bool hn_ismyvf(const struct hn_softc *,
const struct ifnet *);
@@ -270,6 +273,15 @@ static void hn_rxvf_change(struct hn_softc *,
struct ifnet *, bool);
static void hn_rxvf_set(struct hn_softc *, struct ifnet *);
static void hn_rxvf_set_task(void *, int);
+static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *);
+static int hn_xpnt_vf_iocsetflags(struct hn_softc *);
+static int hn_xpnt_vf_iocsetcaps(struct hn_softc *,
+ struct ifreq *);
+static void hn_xpnt_vf_saveifflags(struct hn_softc *);
+static bool hn_xpnt_vf_isready(struct hn_softc *);
+static void hn_xpnt_vf_setready(struct hn_softc *);
+static void hn_xpnt_vf_init_taskfunc(void *, int);
+static void hn_xpnt_vf_init(struct hn_softc *);
static int hn_rndis_rxinfo(const void *, int,
struct hn_rxinfo *);
@@ -322,6 +334,8 @@ static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS);
static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS);
+static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS);
static void hn_stop(struct hn_softc *, bool);
static void hn_init_locked(struct hn_softc *);
@@ -352,6 +366,7 @@ static void hn_disable_rx(struct hn_softc *);
static void hn_drain_rxtx(struct hn_softc *, int);
static void hn_polling(struct hn_softc *, u_int);
static void hn_chan_polling(struct vmbus_channel *, u_int);
+static void hn_mtu_change_fixup(struct hn_softc *);
static void hn_update_link_status(struct hn_softc *);
static void hn_change_network(struct hn_softc *);
@@ -529,6 +544,22 @@ SYSCTL_PROC(_hw_hn, OID_AUTO, vflist, CTLFLAG_RD | CTL
SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING,
0, 0, hn_vfmap_sysctl, "A", "VF mapping");
+/* Transparent VF */
+static int hn_xpnt_vf = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN,
+ &hn_xpnt_vf, 0, "Transparent VF mod");
+
+/* Accurate BPF support for Transparent VF */
+static int hn_xpnt_vf_accbpf = 0;
+SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN,
+ &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF");
+
+/* Extra wait for transparent VF attach routing; unit seconds. */
+static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN;
+SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN,
+ &hn_xpnt_vf_attwait, 0,
+ "Extra wait for transparent VF attach routing; unit: seconds");
+
static u_int hn_cpu_index; /* next CPU for channel */
static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */
@@ -807,8 +838,12 @@ hn_rxfilter_config(struct hn_softc *sc)
HN_LOCK_ASSERT(sc);
- if ((ifp->if_flags & IFF_PROMISC) ||
- (sc->hn_flags & HN_FLAG_RXVF)) {
+ /*
+ * If the non-transparent mode VF is activated, we don't know how
+ * its RX filter is configured, so stick the synthetic device in
+ * the promiscous mode.
+ */
+ if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) {
filter = NDIS_PACKET_TYPE_PROMISCUOUS;
} else {
filter = NDIS_PACKET_TYPE_DIRECTED;
@@ -1086,7 +1121,7 @@ hn_rxvf_change(struct hn_softc *sc, struct ifnet *ifp,
}
hn_nvs_set_datapath(sc,
- rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTHETIC);
+ rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH);
hn_rxvf_set(sc, rxvf ? ifp : NULL);
@@ -1126,7 +1161,323 @@ hn_ifaddr_event(void *arg, struct ifnet *ifp)
hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP);
}
+static int
+hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr)
+{
+ struct ifnet *ifp, *vf_ifp;
+ uint64_t tmp;
+ int error;
+
+ HN_LOCK_ASSERT(sc);
+ ifp = sc->hn_ifp;
+ vf_ifp = sc->hn_vf_ifp;
+
+ /*
+ * Fix up requested capabilities w/ supported capabilities,
+ * since the supported capabilities could have been changed.
+ */
+ ifr->ifr_reqcap &= ifp->if_capabilities;
+ /* Pass SIOCSIFCAP to VF. */
+ error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr);
+
+ /*
+ * NOTE:
+ * The error will be propagated to the callers, however, it
+ * is _not_ useful here.
+ */
+
+ /*
+ * Merge VF's enabled capabilities.
+ */
+ ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities;
+
+ tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc);
+ if (ifp->if_capenable & IFCAP_TXCSUM)
+ ifp->if_hwassist |= tmp;
+ else
+ ifp->if_hwassist &= ~tmp;
+
+ tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc);
+ if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
+ ifp->if_hwassist |= tmp;
+ else
+ ifp->if_hwassist &= ~tmp;
+
+ tmp = vf_ifp->if_hwassist & CSUM_IP_TSO;
+ if (ifp->if_capenable & IFCAP_TSO4)
+ ifp->if_hwassist |= tmp;
+ else
+ ifp->if_hwassist &= ~tmp;
+
+ tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO;
+ if (ifp->if_capenable & IFCAP_TSO6)
+ ifp->if_hwassist |= tmp;
+ else
+ ifp->if_hwassist &= ~tmp;
+
+ return (error);
+}
+
+static int
+hn_xpnt_vf_iocsetflags(struct hn_softc *sc)
+{
+ struct ifnet *vf_ifp;
+ struct ifreq ifr;
+
+ HN_LOCK_ASSERT(sc);
+ vf_ifp = sc->hn_vf_ifp;
+
+ memset(&ifr, 0, sizeof(ifr));
+ strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
+ ifr.ifr_flags = vf_ifp->if_flags & 0xffff;
+ ifr.ifr_flagshigh = vf_ifp->if_flags >> 16;
+ return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr));
+}
+
static void
+hn_xpnt_vf_saveifflags(struct hn_softc *sc)
+{
+ struct ifnet *ifp = sc->hn_ifp;
+ int allmulti = 0;
+
+ HN_LOCK_ASSERT(sc);
+
+ /* XXX vlan(4) style mcast addr maintenance */
+ if (!TAILQ_EMPTY(&ifp->if_multiaddrs))
+ allmulti = IFF_ALLMULTI;
+
+ /* Always set the VF's if_flags */
+ sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti;
+}
+
+static void
+hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m)
+{
+ struct rm_priotracker pt;
+ struct ifnet *hn_ifp = NULL;
+ struct mbuf *mn;
+
+ /*
+ * XXX racy, if hn(4) ever detached.
+ */
+ rm_rlock(&hn_vfmap_lock, &pt);
+ if (vf_ifp->if_index < hn_vfmap_size)
+ hn_ifp = hn_vfmap[vf_ifp->if_index];
+ rm_runlock(&hn_vfmap_lock, &pt);
+
+ if (hn_ifp != NULL) {
+ /*
+ * Fix up rcvif and go through hn(4)'s if_input and
+ * increase ipackets.
+ */
+ for (mn = m; mn != NULL; mn = mn->m_nextpkt) {
+ /* Allow tapping on the VF. */
+ ETHER_BPF_MTAP(vf_ifp, mn);
+ mn->m_pkthdr.rcvif = hn_ifp;
+ if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1);
+ }
+ hn_ifp->if_input(hn_ifp, m);
+ } else {
+ /*
+ * In the middle of the transition; free this
+ * mbuf chain.
+ */
+ while (m != NULL) {
+ mn = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ m_freem(m);
+ m = mn;
+ }
+ }
+}
+
+static void
+hn_mtu_change_fixup(struct hn_softc *sc)
+{
+ struct ifnet *ifp;
+
+ HN_LOCK_ASSERT(sc);
+ ifp = sc->hn_ifp;
+
+ hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
+#if __FreeBSD_version >= 1100099
+ if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp))
+ hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
+#endif
+}
+
+static void
+hn_xpnt_vf_setready(struct hn_softc *sc)
+{
+ struct ifnet *ifp, *vf_ifp;
+ struct ifreq ifr;
+
+ HN_LOCK_ASSERT(sc);
+ ifp = sc->hn_ifp;
+ vf_ifp = sc->hn_vf_ifp;
+
+ /*
+ * Mark the VF ready.
+ */
+ sc->hn_vf_rdytick = 0;
+
+ /*
+ * Save information for restoration.
+ */
+ sc->hn_saved_caps = ifp->if_capabilities;
+ sc->hn_saved_tsomax = ifp->if_hw_tsomax;
+ sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount;
+ sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize;
+
+ /*
+ * Intersect supported/enabled capabilities.
+ *
+ * NOTE:
+ * if_hwassist is not changed here.
+ */
+ ifp->if_capabilities &= vf_ifp->if_capabilities;
+ ifp->if_capenable &= ifp->if_capabilities;
+
+ /*
+ * Fix TSO settings.
+ */
+ if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax)
+ ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax;
+ if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount)
+ ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount;
+ if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize)
+ ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize;
+
+ /*
+ * Change VF's enabled capabilities.
+ */
+ memset(&ifr, 0, sizeof(ifr));
+ strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
+ ifr.ifr_reqcap = ifp->if_capenable;
+ hn_xpnt_vf_iocsetcaps(sc, &ifr);
+
+ if (ifp->if_mtu != ETHERMTU) {
+ int error;
+
+ /*
+ * Change VF's MTU.
+ */
+ memset(&ifr, 0, sizeof(ifr));
+ strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name));
+ ifr.ifr_mtu = ifp->if_mtu;
+ error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr);
+ if (error) {
+ if_printf(ifp, "%s SIOCSIFMTU %u failed\n",
+ vf_ifp->if_xname, ifp->if_mtu);
+ if (ifp->if_mtu > ETHERMTU) {
+ if_printf(ifp, "change MTU to %d\n", ETHERMTU);
+
+ /*
+ * XXX
+ * No need to adjust the synthetic parts' MTU;
+ * failure of the adjustment will cause us
+ * infinite headache.
+ */
+ ifp->if_mtu = ETHERMTU;
+ hn_mtu_change_fixup(sc);
+ }
+ }
+ }
+}
+
+static bool
+hn_xpnt_vf_isready(struct hn_softc *sc)
+{
+
+ HN_LOCK_ASSERT(sc);
+
+ if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL)
+ return (false);
+
+ if (sc->hn_vf_rdytick == 0)
+ return (true);
+
+ if (sc->hn_vf_rdytick > ticks)
+ return (false);
+
+ /* Mark VF as ready. */
+ hn_xpnt_vf_setready(sc);
+ return (true);
+}
+
+static void
+hn_xpnt_vf_init(struct hn_softc *sc)
+{
+ int error;
+
+ HN_LOCK_ASSERT(sc);
+
+ KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
+ ("%s: transparent VF was enabled", sc->hn_ifp->if_xname));
+
+ if (bootverbose) {
+ if_printf(sc->hn_ifp, "try bringing up %s\n",
+ sc->hn_vf_ifp->if_xname);
+ }
+
+ /*
+ * Bring the VF up.
+ */
+ hn_xpnt_vf_saveifflags(sc);
+ sc->hn_vf_ifp->if_flags |= IFF_UP;
+ error = hn_xpnt_vf_iocsetflags(sc);
+ if (error) {
+ if_printf(sc->hn_ifp, "bringing up %s failed: %d\n",
+ sc->hn_vf_ifp->if_xname, error);
+ return;
+ }
+
+ /*
+ * NOTE:
+ * Datapath setting must happen _after_ bringing the VF up.
+ */
+ hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
+
+ /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+ rm_wlock(&sc->hn_vf_lock);
+ sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED;
+ rm_wunlock(&sc->hn_vf_lock);
+}
+
+static void
+hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused)
+{
+ struct hn_softc *sc = xsc;
+
+ HN_LOCK(sc);
+
+ if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
+ goto done;
+ if (sc->hn_vf_ifp == NULL)
+ goto done;
+ if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+ goto done;
+
+ if (sc->hn_vf_rdytick != 0) {
+ /* Mark VF as ready. */
+ hn_xpnt_vf_setready(sc);
+ }
+
+ if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) {
+ /*
+ * Delayed VF initialization.
+ */
+ if (bootverbose) {
+ if_printf(sc->hn_ifp, "delayed initialize %s\n",
+ sc->hn_vf_ifp->if_xname);
+ }
+ hn_xpnt_vf_init(sc);
+ }
+done:
+ HN_UNLOCK(sc);
+}
+
+static void
hn_ifnet_attevent(void *xsc, struct ifnet *ifp)
{
struct hn_softc *sc = xsc;
@@ -1145,6 +1496,16 @@ hn_ifnet_attevent(void *xsc, struct ifnet *ifp)
goto done;
}
+ if (hn_xpnt_vf && ifp->if_start != NULL) {
+ /*
+ * ifnet.if_start is _not_ supported by transparent
+ * mode VF; mainly due to the IFF_DRV_OACTIVE flag.
+ */
+ if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported "
+ "in transparent VF mode.\n", ifp->if_xname);
+ goto done;
+ }
+
rm_wlock(&hn_vfmap_lock);
if (ifp->if_index >= hn_vfmap_size) {
@@ -1168,7 +1529,37 @@ hn_ifnet_attevent(void *xsc, struct ifnet *ifp)
rm_wunlock(&hn_vfmap_lock);
+ /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+ rm_wlock(&sc->hn_vf_lock);
+ KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0,
+ ("%s: transparent VF was enabled", sc->hn_ifp->if_xname));
sc->hn_vf_ifp = ifp;
+ rm_wunlock(&sc->hn_vf_lock);
+
+ if (hn_xpnt_vf) {
+ int wait_ticks;
+
+ /*
+ * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp.
+ * Save vf_ifp's current if_input for later restoration.
+ */
+ sc->hn_vf_input = ifp->if_input;
+ ifp->if_input = hn_xpnt_vf_input;
+
+ /*
+ * Stop link status management; use the VF's.
+ */
+ hn_suspend_mgmt(sc);
+
+ /*
+ * Give VF sometime to complete its attach routing.
+ */
+ wait_ticks = hn_xpnt_vf_attwait * hz;
+ sc->hn_vf_rdytick = ticks + wait_ticks;
+
+ taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init,
+ wait_ticks);
+ }
done:
HN_UNLOCK(sc);
}
@@ -1186,7 +1577,61 @@ hn_ifnet_detevent(void *xsc, struct ifnet *ifp)
if (!hn_ismyvf(sc, ifp))
goto done;
+ if (hn_xpnt_vf) {
+ /*
+ * Make sure that the delayed initialization is not running.
+ *
+ * NOTE:
+ * - This lock _must_ be released, since the hn_vf_init task
+ * will try holding this lock.
+ * - It is safe to release this lock here, since the
+ * hn_ifnet_attevent() is interlocked by the hn_vf_ifp.
+ *
+ * XXX racy, if hn(4) ever detached.
+ */
+ HN_UNLOCK(sc);
+ taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init);
+ HN_LOCK(sc);
+
+ KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved",
+ sc->hn_ifp->if_xname));
+ ifp->if_input = sc->hn_vf_input;
+ sc->hn_vf_input = NULL;
+
+ if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+ hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
+
+ if (sc->hn_vf_rdytick == 0) {
+ /*
+ * The VF was ready; restore some settings.
+ */
+ sc->hn_ifp->if_capabilities = sc->hn_saved_caps;
+ /*
+ * NOTE:
+ * There is _no_ need to fixup if_capenable and
+ * if_hwassist, since the if_capabilities before
+ * restoration was an intersection of the VF's
+ * if_capabilites and the synthetic device's
+ * if_capabilites.
+ */
+ sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax;
+ sc->hn_ifp->if_hw_tsomaxsegcount =
+ sc->hn_saved_tsosegcnt;
+ sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz;
+ }
+
+ /*
+ * Resume link status management, which was suspended
+ * by hn_ifnet_attevent().
+ */
+ hn_resume_mgmt(sc);
+ }
+
+ /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */
+ rm_wlock(&sc->hn_vf_lock);
+ sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
sc->hn_vf_ifp = NULL;
+ rm_wunlock(&sc->hn_vf_lock);
rm_wlock(&hn_vfmap_lock);
@@ -1205,6 +1650,15 @@ done:
HN_UNLOCK(sc);
}
+static void
+hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state)
+{
+ struct hn_softc *sc = xsc;
+
+ if (sc->hn_vf_ifp == ifp)
+ if_link_state_change(sc->hn_ifp, link_state);
+}
+
/* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
static const struct hyperv_guid g_net_vsc_device_type = {
.hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
@@ -1236,6 +1690,9 @@ hn_attach(device_t dev)
sc->hn_dev = dev;
sc->hn_prichan = vmbus_get_channel(dev);
HN_LOCK_INIT(sc);
+ rm_init(&sc->hn_vf_lock, "hnvf");
+ if (hn_xpnt_vf && hn_xpnt_vf_accbpf)
+ sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
/*
* Initialize these tunables once.
@@ -1275,6 +1732,18 @@ hn_attach(device_t dev)
TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
hn_netchg_status_taskfunc, sc);
+ if (hn_xpnt_vf) {
+ /*
+ * Setup taskqueue for VF tasks, e.g. delayed VF bringing up.
+ */
+ sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK,
+ taskqueue_thread_enqueue, &sc->hn_vf_taskq);
+ taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf",
+ device_get_nameunit(dev));
+ TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0,
+ hn_xpnt_vf_init_taskfunc, sc);
+ }
+
/*
* Allocate ifnet and setup its name earlier, so that if_printf
* can be used by functions, which will be called after
@@ -1401,6 +1870,14 @@ hn_attach(device_t dev)
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_hwassist_sysctl, "A", "hwassist");
+ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max",
+ CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size");
+ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt",
+ CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0,
+ "max # of TSO segments");
+ SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz",
+ CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0,
+ "max size of TSO segment");
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_rxfilter_sysctl, "A", "rxfilter");
@@ -1445,9 +1922,20 @@ hn_attach(device_t dev)
SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf",
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
hn_vf_sysctl, "A", "Virtual Function's name");
- SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf",
- CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
- hn_rxvf_sysctl, "A", "activated Virtual Function's name");
+ if (!hn_xpnt_vf) {
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf",
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+ hn_rxvf_sysctl, "A", "activated Virtual Function's name");
+ } else {
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled",
+ CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
+ hn_xpnt_vf_enabled_sysctl, "I",
+ "Transparent VF enabled");
+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf",
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
+ hn_xpnt_vf_accbpf_sysctl, "I",
+ "Accurate BPF for transparent VF");
+ }
/*
* Setup the ifmedia, which has been initialized earlier.
@@ -1480,7 +1968,7 @@ hn_attach(device_t dev)
ifp->if_qflush = hn_xmit_qflush;
}
- ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
+ ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE;
#ifdef foo
/* We can't diff IPv6 packets from IPv4 packets on RX path. */
ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
@@ -1515,7 +2003,13 @@ hn_attach(device_t dev)
ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
+ /*
+ * Lock hn_set_tso_maxsize() to simplify its
+ * internal logic.
+ */
+ HN_LOCK(sc);
hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
+ HN_UNLOCK(sc);
ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
}
@@ -1536,10 +2030,15 @@ hn_attach(device_t dev)
sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
hn_update_link_status(sc);
- sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
- hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
- sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
- hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
+ if (!hn_xpnt_vf) {
+ sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
+ hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
+ sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
+ hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
+ } else {
+ sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event,
+ hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY);
+ }
/*
* NOTE:
@@ -1566,6 +2065,14 @@ hn_detach(device_t dev)
struct hn_softc *sc = device_get_softc(dev);
struct ifnet *ifp = sc->hn_ifp, *vf_ifp;
+ if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
+ /*
+ * In case that the vmbus missed the orphan handler
+ * installation.
+ */
+ vmbus_xact_ctx_orphan(sc->hn_xact);
+ }
+
if (sc->hn_ifaddr_evthand != NULL)
EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
if (sc->hn_ifnet_evthand != NULL)
@@ -1578,20 +2085,14 @@ hn_detach(device_t dev)
EVENTHANDLER_DEREGISTER(ifnet_departure_event,
sc->hn_ifnet_dethand);
}
+ if (sc->hn_ifnet_lnkhand != NULL)
+ EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand);
vf_ifp = sc->hn_vf_ifp;
__compiler_membar();
if (vf_ifp != NULL)
hn_ifnet_detevent(sc, vf_ifp);
- if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
- /*
- * In case that the vmbus missed the orphan handler
- * installation.
- */
- vmbus_xact_ctx_orphan(sc->hn_xact);
- }
-
if (device_is_attached(dev)) {
HN_LOCK(sc);
if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
@@ -1621,6 +2122,8 @@ hn_detach(device_t dev)
free(sc->hn_tx_taskqs, M_DEVBUF);
}
taskqueue_free(sc->hn_mgmt_taskq0);
+ if (sc->hn_vf_taskq != NULL)
+ taskqueue_free(sc->hn_vf_taskq);
if (sc->hn_xact != NULL) {
/*
@@ -1634,6 +2137,7 @@ hn_detach(device_t dev)
if_free(ifp);
HN_LOCK_DESTROY(sc);
+ rm_destroy(&sc->hn_vf_lock);
return (0);
}
@@ -2699,7 +3203,8 @@ static int
hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
struct hn_softc *sc = ifp->if_softc;
- struct ifreq *ifr = (struct ifreq *)data;
+ struct ifreq *ifr = (struct ifreq *)data, ifr_vf;
+ struct ifnet *vf_ifp;
int mask, error = 0;
switch (cmd) {
@@ -2728,6 +3233,21 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
+ if (hn_xpnt_vf_isready(sc)) {
+ vf_ifp = sc->hn_vf_ifp;
+ ifr_vf = *ifr;
+ strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname,
+ sizeof(ifr_vf.ifr_name));
+ error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU,
+ (caddr_t)&ifr_vf);
+ if (error) {
+ HN_UNLOCK(sc);
+ if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n",
+ vf_ifp->if_xname, ifr->ifr_mtu, error);
+ break;
+ }
+ }
+
/*
* Suspend this interface before the synthetic parts
* are ripped.
@@ -2756,23 +3276,32 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
ifp->if_mtu = ifr->ifr_mtu;
/*
- * Make sure that various parameters based on MTU are
- * still valid, after the MTU change.
+ * Synthetic parts' reattach may change the chimney
+ * sending size; update it.
*/
if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
hn_set_chim_size(sc, sc->hn_chim_szmax);
- hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
-#if __FreeBSD_version >= 1100099
- if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
- HN_LRO_LENLIM_MIN(ifp))
- hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
-#endif
/*
+ * Make sure that various parameters based on MTU are
+ * still valid, after the MTU change.
+ */
+ hn_mtu_change_fixup(sc);
+
+ /*
* All done! Resume the interface now.
*/
hn_resume(sc);
+ if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
+ /*
+ * Since we have reattached the NVS part,
+ * change the datapath to VF again; in case
+ * that it is lost, after the NVS was detached.
+ */
+ hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF);
+ }
+
HN_UNLOCK(sc);
break;
@@ -2784,6 +3313,9 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}
+ if (hn_xpnt_vf_isready(sc))
+ hn_xpnt_vf_saveifflags(sc);
+
if (ifp->if_flags & IFF_UP) {
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
/*
@@ -2794,6 +3326,9 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
HN_NO_SLEEPING(sc);
hn_rxfilter_config(sc);
HN_SLEEPING_OK(sc);
+
+ if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+ error = hn_xpnt_vf_iocsetflags(sc);
} else {
hn_init_locked(sc);
}
@@ -2808,8 +3343,23 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
case SIOCSIFCAP:
HN_LOCK(sc);
- mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+ if (hn_xpnt_vf_isready(sc)) {
+ ifr_vf = *ifr;
+ strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname,
+ sizeof(ifr_vf.ifr_name));
+ error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf);
+ HN_UNLOCK(sc);
+ break;
+ }
+
+ /*
+ * Fix up requested capabilities w/ supported capabilities,
+ * since the supported capabilities could have been changed.
+ */
+ mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^
+ ifp->if_capenable;
+
if (mask & IFCAP_TXCSUM) {
ifp->if_capenable ^= IFCAP_TXCSUM;
if (ifp->if_capenable & IFCAP_TXCSUM)
@@ -2873,11 +3423,42 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
HN_SLEEPING_OK(sc);
}
+ /* XXX vlan(4) style mcast addr maintenance */
+ if (hn_xpnt_vf_isready(sc)) {
+ int old_if_flags;
+
+ old_if_flags = sc->hn_vf_ifp->if_flags;
+ hn_xpnt_vf_saveifflags(sc);
+
+ if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) &&
+ ((old_if_flags ^ sc->hn_vf_ifp->if_flags) &
+ IFF_ALLMULTI))
+ error = hn_xpnt_vf_iocsetflags(sc);
+ }
+
HN_UNLOCK(sc);
break;
case SIOCSIFMEDIA:
case SIOCGIFMEDIA:
+ HN_LOCK(sc);
+ if (hn_xpnt_vf_isready(sc)) {
+ /*
+ * SIOCGIFMEDIA expects ifmediareq, so don't
+ * create and pass ifr_vf to the VF here; just
+ * replace the ifr_name.
+ */
+ vf_ifp = sc->hn_vf_ifp;
+ strlcpy(ifr->ifr_name, vf_ifp->if_xname,
+ sizeof(ifr->ifr_name));
+ error = vf_ifp->if_ioctl(vf_ifp, cmd, data);
+ /* Restore the ifr_name. */
+ strlcpy(ifr->ifr_name, ifp->if_xname,
+ sizeof(ifr->ifr_name));
+ HN_UNLOCK(sc);
+ break;
+ }
+ HN_UNLOCK(sc);
error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
break;
@@ -2899,11 +3480,37 @@ hn_stop(struct hn_softc *sc, bool detaching)
KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
("synthetic parts were not attached"));
+ /* Clear RUNNING bit ASAP. */
+ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
+
/* Disable polling. */
hn_polling(sc, 0);
- /* Clear RUNNING bit _before_ hn_suspend_data() */
- atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
+ if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) {
+ KASSERT(sc->hn_vf_ifp != NULL,
+ ("%s: VF is not attached", ifp->if_xname));
+
+ /* NOTE: hn_vf_lock for hn_transmit() */
+ rm_wlock(&sc->hn_vf_lock);
+ sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED;
+ rm_wunlock(&sc->hn_vf_lock);
+
+ /*
+ * NOTE:
+ * Datapath setting must happen _before_ bringing
+ * the VF down.
+ */
+ hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH);
+
+ /*
+ * Bring the VF down.
+ */
+ hn_xpnt_vf_saveifflags(sc);
+ sc->hn_vf_ifp->if_flags &= ~IFF_UP;
+ hn_xpnt_vf_iocsetflags(sc);
+ }
+
+ /* Suspend data transfers. */
hn_suspend_data(sc);
/* Clear OACTIVE bit. */
@@ -2912,8 +3519,8 @@ hn_stop(struct hn_softc *sc, bool detaching)
sc->hn_tx_ring[i].hn_oactive = 0;
/*
- * If the VF is active, make sure the filter is not 0, even if
- * the synthetic NIC is down.
+ * If the non-transparent mode VF is active, make sure
+ * that the RX filter still allows packet reception.
*/
if (!detaching && (sc->hn_flags & HN_FLAG_RXVF))
hn_rxfilter_config(sc);
@@ -2944,6 +3551,11 @@ hn_init_locked(struct hn_softc *sc)
/* Clear TX 'suspended' bit. */
hn_resume_tx(sc, sc->hn_tx_ring_inuse);
+ if (hn_xpnt_vf_isready(sc)) {
+ /* Initialize transparent VF. */
+ hn_xpnt_vf_init(sc);
+ }
+
/* Everything is ready; unleash! */
atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
@@ -3567,6 +4179,42 @@ hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS)
}
static int
+hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int error, onoff = 0;
+
+ if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF)
+ onoff = 1;
+ error = sysctl_handle_int(oidp, &onoff, 0, req);
+ if (error || req->newptr == NULL)
+ return (error);
+
+ HN_LOCK(sc);
+ /* NOTE: hn_vf_lock for hn_transmit() */
+ rm_wlock(&sc->hn_vf_lock);
+ if (onoff)
+ sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF;
+ else
+ sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF;
+ rm_wunlock(&sc->hn_vf_lock);
+ HN_UNLOCK(sc);
+
+ return (0);
+}
+
+static int
+hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS)
+{
+ struct hn_softc *sc = arg1;
+ int enabled = 0;
+
+ if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)
+ enabled = 1;
+ return (sysctl_handle_int(oidp, &enabled, 0, req));
+}
+
+static int
hn_check_iplen(const struct mbuf *m, int hoff)
{
const struct ip *ip;
@@ -4282,8 +4930,11 @@ static void
hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
{
struct ifnet *ifp = sc->hn_ifp;
+ u_int hw_tsomax;
int tso_minlen;
+ HN_LOCK_ASSERT(sc);
+
if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
return;
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list