svn commit: r358808 - in head/sys: kern net netinet

Andrew Gallatin gallatin at FreeBSD.org
Mon Mar 9 13:44:54 UTC 2020


Author: gallatin
Date: Mon Mar  9 13:44:51 2020
New Revision: 358808
URL: https://svnweb.freebsd.org/changeset/base/358808

Log:
  make lacp's use_numa hashing aware of send tags
  
  When I did the use_numa support, I missed the fact that there is
  a separate hash function for send tag nic selection. So when
  use_numa is enabled, ktls offload does not work properly, as it
  does not reliably allocate a send tag on the proper egress nic
  since different egress nics are selected for send-tag allocation
  and packet transmit. To fix this, this change:
  
  - refectors lacp_select_tx_port_by_hash() and
       lacp_select_tx_port() to make lacp_select_tx_port_by_hash()
       always called by lacp_select_tx_port()
  
  -   pre-shifts flowids to convert them to hashes when calling lacp_select_tx_port_by_hash()
  
  -   adds a numa_domain field to if_snd_tag_alloc_params
  
  -   plumbs the numa domain into places where we allocate send tags
  
  In testing with NIC TLS setup on a NUMA machine, I see thousands
  of output errors before the change when enabling
  kern.ipc.tls.ifnet.permitted=1. After the change, I see no
  errors, and I see the NIC sysctl counters showing active TLS
  offload sessions.
  
  Reviewed by:	rrs, hselasky, jhb
  Sponsored by:	Netflix

Modified:
  head/sys/kern/uipc_ktls.c
  head/sys/net/ieee8023ad_lacp.c
  head/sys/net/ieee8023ad_lacp.h
  head/sys/net/if_lagg.c
  head/sys/net/if_var.h
  head/sys/netinet/in_pcb.c
  head/sys/netinet/tcp_ratelimit.c

Modified: head/sys/kern/uipc_ktls.c
==============================================================================
--- head/sys/kern/uipc_ktls.c	Mon Mar  9 13:36:45 2020	(r358807)
+++ head/sys/kern/uipc_ktls.c	Mon Mar  9 13:44:51 2020	(r358808)
@@ -800,6 +800,7 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_sess
 	params.hdr.type = IF_SND_TAG_TYPE_TLS;
 	params.hdr.flowid = inp->inp_flowid;
 	params.hdr.flowtype = inp->inp_flowtype;
+	params.hdr.numa_domain = inp->inp_numa_domain;
 	params.tls.inp = inp;
 	params.tls.tls = tls;
 	INP_RUNLOCK(inp);

Modified: head/sys/net/ieee8023ad_lacp.c
==============================================================================
--- head/sys/net/ieee8023ad_lacp.c	Mon Mar  9 13:36:45 2020	(r358807)
+++ head/sys/net/ieee8023ad_lacp.c	Mon Mar  9 13:44:51 2020	(r358808)
@@ -832,13 +832,12 @@ lacp_stop(struct lagg_softc *sc)
 }
 
 struct lagg_port *
-lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
+lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t hash, uint8_t numa_domain)
 {
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
 	struct lacp_portmap *pm;
 	struct lacp_port *lp;
 	struct lacp_port **map;
-	uint32_t hash;
 	int count;
 
 	if (__predict_false(lsc->lsc_suppress_distributing)) {
@@ -854,10 +853,10 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf
 
 #ifdef NUMA
 	if ((sc->sc_opts & LAGG_OPT_USE_NUMA) &&
-	    pm->pm_num_dom > 1 && m->m_pkthdr.numa_domain < MAXMEMDOM) {
-		count = pm->pm_numa[m->m_pkthdr.numa_domain].count;
+	    pm->pm_num_dom > 1 && numa_domain < MAXMEMDOM) {
+		count = pm->pm_numa[numa_domain].count;
 		if (count > 0) {
-			map = pm->pm_numa[m->m_pkthdr.numa_domain].map;
+			map = pm->pm_numa[numa_domain].map;
 		} else {
 			/* No ports on this domain; use global hash. */
 			map = pm->pm_map;
@@ -869,11 +868,6 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf
 		map = pm->pm_map;
 		count = pm->pm_count;
 	}
-	if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
-	    M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
-		hash = m->m_pkthdr.flowid >> sc->flowid_shift;
-	else
-		hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
 
 	hash %= count;
 	lp = map[hash];
@@ -884,33 +878,22 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf
 	return (lp->lp_lagg);
 }
 
-#if defined(RATELIMIT) || defined(KERN_TLS)
 struct lagg_port *
-lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t flowid)
+lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
 {
 	struct lacp_softc *lsc = LACP_SOFTC(sc);
-	struct lacp_portmap *pm;
-	struct lacp_port *lp;
 	uint32_t hash;
+	uint8_t numa_domain;
 
-	if (__predict_false(lsc->lsc_suppress_distributing)) {
-		LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
-		return (NULL);
-	}
+	if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
+	    M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
+		hash = m->m_pkthdr.flowid >> sc->flowid_shift;
+	else
+		hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
 
-	pm = &lsc->lsc_pmap[lsc->lsc_activemap];
-	if (pm->pm_count == 0) {
-		LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
-		return (NULL);
-	}
-
-	hash = flowid >> sc->flowid_shift;
-	hash %= pm->pm_count;
-	lp = pm->pm_map[hash];
-
-	return (lp->lp_lagg);
+	numa_domain = m->m_pkthdr.numa_domain;
+	return (lacp_select_tx_port_by_hash(sc, hash, numa_domain));
 }
-#endif
 
 /*
  * lacp_suppress_distributing: drop transmit packets for a while

Modified: head/sys/net/ieee8023ad_lacp.h
==============================================================================
--- head/sys/net/ieee8023ad_lacp.h	Mon Mar  9 13:36:45 2020	(r358807)
+++ head/sys/net/ieee8023ad_lacp.h	Mon Mar  9 13:44:51 2020	(r358808)
@@ -293,9 +293,7 @@ struct lacp_softc {
 
 struct mbuf	*lacp_input(struct lagg_port *, struct mbuf *);
 struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
-#if defined(RATELIMIT) || defined(KERN_TLS)
-struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t);
-#endif
+struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t, uint8_t);
 void		lacp_attach(struct lagg_softc *);
 void		lacp_detach(void *);
 void		lacp_init(struct lagg_softc *);

Modified: head/sys/net/if_lagg.c
==============================================================================
--- head/sys/net/if_lagg.c	Mon Mar  9 13:36:45 2020	(r358807)
+++ head/sys/net/if_lagg.c	Mon Mar  9 13:44:51 2020	(r358808)
@@ -1609,12 +1609,13 @@ mst_to_lst(struct m_snd_tag *mst)
  * contents.
  */
 static struct lagg_port *
-lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
+lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype,
+    uint8_t numa_domain)
 {
 	struct lagg_softc *sc;
 	struct lagg_port *lp;
 	struct lagg_lb *lb;
-	uint32_t p;
+	uint32_t hash, p;
 
 	sc = ifp->if_softc;
 
@@ -1634,7 +1635,8 @@ lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid
 		if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
 		    flowtype == M_HASHTYPE_NONE)
 			return (NULL);
-		return (lacp_select_tx_port_by_hash(sc, flowid));
+		hash = flowid >> sc->flowid_shift;
+		return (lacp_select_tx_port_by_hash(sc, hash, numa_domain));
 	default:
 		return (NULL);
 	}
@@ -1654,7 +1656,8 @@ lagg_snd_tag_alloc(struct ifnet *ifp,
 	sc = ifp->if_softc;
 
 	LAGG_RLOCK();
-	lp = lookup_snd_tag_port(ifp, params->hdr.flowid, params->hdr.flowtype);
+	lp = lookup_snd_tag_port(ifp, params->hdr.flowid,
+	    params->hdr.flowtype, params->hdr.numa_domain);
 	if (lp == NULL) {
 		LAGG_RUNLOCK();
 		return (EOPNOTSUPP);

Modified: head/sys/net/if_var.h
==============================================================================
--- head/sys/net/if_var.h	Mon Mar  9 13:36:45 2020	(r358807)
+++ head/sys/net/if_var.h	Mon Mar  9 13:44:51 2020	(r358808)
@@ -198,6 +198,7 @@ struct if_snd_tag_alloc_header {
 	uint32_t type;		/* send tag type, see IF_SND_TAG_XXX */
 	uint32_t flowid;	/* mbuf hash value */
 	uint32_t flowtype;	/* mbuf hash type */
+	uint8_t numa_domain;	/* numa domain of associated inp */
 };
 
 struct if_snd_tag_alloc_rate_limit {

Modified: head/sys/netinet/in_pcb.c
==============================================================================
--- head/sys/netinet/in_pcb.c	Mon Mar  9 13:36:45 2020	(r358807)
+++ head/sys/netinet/in_pcb.c	Mon Mar  9 13:44:51 2020	(r358808)
@@ -3258,6 +3258,7 @@ in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *
 		    IF_SND_TAG_TYPE_UNLIMITED : IF_SND_TAG_TYPE_RATE_LIMIT,
 		.rate_limit.hdr.flowid = flowid,
 		.rate_limit.hdr.flowtype = flowtype,
+		.rate_limit.hdr.numa_domain = inp->inp_numa_domain,
 		.rate_limit.max_rate = max_pacing_rate,
 		.rate_limit.flags = M_NOWAIT,
 	};

Modified: head/sys/netinet/tcp_ratelimit.c
==============================================================================
--- head/sys/netinet/tcp_ratelimit.c	Mon Mar  9 13:36:45 2020	(r358807)
+++ head/sys/netinet/tcp_ratelimit.c	Mon Mar  9 13:44:51 2020	(r358808)
@@ -1006,6 +1006,7 @@ rt_find_real_interface(struct ifnet *ifp, struct inpcb
 	union if_snd_tag_alloc_params params = {
 		.rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT,
 		.rate_limit.hdr.flowid = 1,
+		.rate_limit.hdr.numa_domain = inp->inp_numa_domain,
 		.rate_limit.max_rate = COMMON_RATE,
 		.rate_limit.flags = M_NOWAIT,
 	};


More information about the svn-src-head mailing list