svn commit: r317482 - stable/10/sys/dev/hyperv/netvsc

Sepherosa Ziehau sephe at FreeBSD.org
Thu Apr 27 02:17:46 UTC 2017


Author: sephe
Date: Thu Apr 27 02:17:45 2017
New Revision: 317482
URL: https://svnweb.freebsd.org/changeset/base/317482

Log:
  MFC 317353
  
      hyperv/hn: Use channel0, i.e. TX ring0, for TCP SYN/SYN|ACK.
  
      Hyper-V hot channel effect:
      Operation latency on hot channel is only _half_ of the operation
      latency on cold channels.
  
      This commit takes the advantage of the above Hyper-V host channel
      effect, and can reduce more than 75% latency and more than 50%
      latency stdev, i.e. lower and more stable/predictable latency,
      for various types of web server workloads.
  
      Sponsored by:   Microsoft

Modified:
  stable/10/sys/dev/hyperv/netvsc/if_hn.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/dev/hyperv/netvsc/if_hn.c
==============================================================================
--- stable/10/sys/dev/hyperv/netvsc/if_hn.c	Thu Apr 27 01:58:20 2017	(r317481)
+++ stable/10/sys/dev/hyperv/netvsc/if_hn.c	Thu Apr 27 02:17:45 2017	(r317482)
@@ -611,6 +611,16 @@ hn_chim_free(struct hn_softc *sc, uint32
 }
 
 #if defined(INET6) || defined(INET)
+
+#define PULLUP_HDR(m, len)				\
+do {							\
+	if (__predict_false((m)->m_len < (len))) {	\
+		(m) = m_pullup((m), (len));		\
+		if ((m) == NULL)			\
+			return (NULL);			\
+	}						\
+} while (0)
+
 /*
  * NOTE: If this function failed, the m_head would be freed.
  */
@@ -623,15 +633,6 @@ hn_tso_fixup(struct mbuf *m_head)
 
 	KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
 
-#define PULLUP_HDR(m, len)				\
-do {							\
-	if (__predict_false((m)->m_len < (len))) {	\
-		(m) = m_pullup((m), (len));		\
-		if ((m) == NULL)			\
-			return (NULL);			\
-	}						\
-} while (0)
-
 	PULLUP_HDR(m_head, sizeof(*evl));
 	evl = mtod(m_head, struct ether_vlan_header *);
 	if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
@@ -680,8 +681,65 @@ do {							\
 #endif
 	return (m_head);
 
-#undef PULLUP_HDR
 }
+
+/*
+ * NOTE: If this function failed, the m_head would be freed.
+ */
+static __inline struct mbuf *
+hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn)
+{
+	const struct ether_vlan_header *evl;
+	const struct tcphdr *th;
+	int ehlen;
+
+	*tcpsyn = 0;
+
+	PULLUP_HDR(m_head, sizeof(*evl));
+	evl = mtod(m_head, const struct ether_vlan_header *);
+	if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
+		ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
+	else
+		ehlen = ETHER_HDR_LEN;
+
+#ifdef INET
+	if (m_head->m_pkthdr.csum_flags & CSUM_IP_TCP) {
+		const struct ip *ip;
+		int iphlen;
+
+		PULLUP_HDR(m_head, ehlen + sizeof(*ip));
+		ip = mtodo(m_head, ehlen);
+		iphlen = ip->ip_hl << 2;
+
+		PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
+		th = mtodo(m_head, ehlen + iphlen);
+		if (th->th_flags & TH_SYN)
+			*tcpsyn = 1;
+	}
+#endif
+#if defined(INET6) && defined(INET)
+	else
+#endif
+#ifdef INET6
+	{
+		const struct ip6_hdr *ip6;
+
+		PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
+		ip6 = mtodo(m_head, ehlen);
+		if (ip6->ip6_nxt != IPPROTO_TCP)
+			return (m_head);
+
+		PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
+		th = mtodo(m_head, ehlen + sizeof(*ip6));
+		if (th->th_flags & TH_SYN)
+			*tcpsyn = 1;
+	}
+#endif
+	return (m_head);
+}
+
+#undef PULLUP_HDR
+
 #endif	/* INET6 || INET */
 
 static int
@@ -4343,8 +4401,28 @@ hn_transmit(struct ifnet *ifp, struct mb
 	/*
 	 * Select the TX ring based on flowid
 	 */
-	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
-		idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
+	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
+#if defined(INET6) || defined(INET)
+		int tcpsyn = 0;
+
+		if (m->m_pkthdr.len < 128 &&
+		    (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) &&
+		    (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) {
+			m = hn_check_tcpsyn(m, &tcpsyn);
+			if (__predict_false(m == NULL)) {
+				if_inc_counter(ifp,
+				    IFCOUNTER_OERRORS, 1);
+				return (EIO);
+			}
+		}
+#else
+		const int tcpsyn = 0;
+#endif
+		if (tcpsyn)
+			idx = 0;
+		else
+			idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
+	}
 	txr = &sc->hn_tx_ring[idx];
 
 	error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);


More information about the svn-src-stable-10 mailing list