git: 2707135d0959 - stable/14 - hpts/lro: make tcp_lro_flush_tcphpts() and tcp_run_hpts() pointers

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Tue, 16 Jan 2024 19:05:18 UTC
The branch stable/14 has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=2707135d0959fe0ccabd872b23f3082a5a156a35

commit 2707135d0959fe0ccabd872b23f3082a5a156a35
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2023-12-04 18:19:46 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2024-01-16 18:47:49 +0000

    hpts/lro: make tcp_lro_flush_tcphpts() and tcp_run_hpts() pointers
    
    Rename tcp_run_hpts() to tcp_hpts_softlock() to better describe its
    function.  This makes loadable hpts.ko working correctly with LRO.
    
    Reviewed by:            tuexen, rrs
    Differential Revision:  https://reviews.freebsd.org/D42858
    
    (cherry picked from commit 2c6fc36a0ddd4d741e2c206855d2dff9b008005a)
---
 sys/netinet/tcp_hpts.c     | 104 +++++++++++++++++++++------------------------
 sys/netinet/tcp_hpts.h     |   3 +-
 sys/netinet/tcp_lro.c      |  24 ++++-------
 sys/netinet/tcp_lro.h      |   2 +-
 sys/netinet/tcp_lro_hpts.c |  13 +++++-
 5 files changed, 71 insertions(+), 75 deletions(-)

diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
index a85eca08b815..f1b729c249c6 100644
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -1497,11 +1497,56 @@ __tcp_set_hpts(struct tcpcb *tp, int32_t line)
 	mtx_unlock(&hpts->p_mtx);
 }
 
+static struct tcp_hpts_entry *
+tcp_choose_hpts_to_run(void)
+{
+	int i, oldest_idx, start, end;
+	uint32_t cts, time_since_ran, calc;
+
+	cts = tcp_get_usecs(NULL);
+	time_since_ran = 0;
+	/* Default is all one group */
+	start = 0;
+	end = tcp_pace.rp_num_hptss;
+	/*
+	 * If we have more than one L3 group figure out which one
+	 * this CPU is in.
+	 */
+	if (tcp_pace.grp_cnt > 1) {
+		for (i = 0; i < tcp_pace.grp_cnt; i++) {
+			if (CPU_ISSET(curcpu, &tcp_pace.grps[i]->cg_mask)) {
+				start = tcp_pace.grps[i]->cg_first;
+				end = (tcp_pace.grps[i]->cg_last + 1);
+				break;
+			}
+		}
+	}
+	oldest_idx = -1;
+	for (i = start; i < end; i++) {
+		if (TSTMP_GT(cts, cts_last_ran[i]))
+			calc = cts - cts_last_ran[i];
+		else
+			calc = 0;
+		if (calc > time_since_ran) {
+			oldest_idx = i;
+			time_since_ran = calc;
+		}
+	}
+	if (oldest_idx >= 0)
+		return(tcp_pace.rp_ent[oldest_idx]);
+	else
+		return(tcp_pace.rp_ent[(curcpu % tcp_pace.rp_num_hptss)]);
+}
+
 static void
-__tcp_run_hpts(struct tcp_hpts_entry *hpts)
+__tcp_run_hpts(void)
 {
+	struct epoch_tracker et;
+	struct tcp_hpts_entry *hpts;
 	int ticks_ran;
 
+	hpts = tcp_choose_hpts_to_run();
+
 	if (hpts->p_hpts_active) {
 		/* Already active */
 		return;
@@ -1510,6 +1555,7 @@ __tcp_run_hpts(struct tcp_hpts_entry *hpts)
 		/* Someone else got the lock */
 		return;
 	}
+	NET_EPOCH_ENTER(et);
 	if (hpts->p_hpts_active)
 		goto out_with_mtx;
 	hpts->syscall_cnt++;
@@ -1562,63 +1608,9 @@ __tcp_run_hpts(struct tcp_hpts_entry *hpts)
 out_with_mtx:
 	HPTS_MTX_ASSERT(hpts);
 	mtx_unlock(&hpts->p_mtx);
-}
-
-static struct tcp_hpts_entry *
-tcp_choose_hpts_to_run(void)
-{
-	int i, oldest_idx, start, end;
-	uint32_t cts, time_since_ran, calc;
-
-	cts = tcp_get_usecs(NULL);
-	time_since_ran = 0;
-	/* Default is all one group */
-	start = 0;
-	end = tcp_pace.rp_num_hptss;
-	/*
-	 * If we have more than one L3 group figure out which one
-	 * this CPU is in.
-	 */
-	if (tcp_pace.grp_cnt > 1) {
-		for (i = 0; i < tcp_pace.grp_cnt; i++) {
-			if (CPU_ISSET(curcpu, &tcp_pace.grps[i]->cg_mask)) {
-				start = tcp_pace.grps[i]->cg_first;
-				end = (tcp_pace.grps[i]->cg_last + 1);
-				break;
-			}
-		}
-	}
-	oldest_idx = -1;
-	for (i = start; i < end; i++) {
-		if (TSTMP_GT(cts, cts_last_ran[i]))
-			calc = cts - cts_last_ran[i];
-		else
-			calc = 0;
-		if (calc > time_since_ran) {
-			oldest_idx = i;
-			time_since_ran = calc;
-		}
-	}
-	if (oldest_idx >= 0)
-		return(tcp_pace.rp_ent[oldest_idx]);
-	else
-		return(tcp_pace.rp_ent[(curcpu % tcp_pace.rp_num_hptss)]);
-}
-
-
-void
-tcp_run_hpts(void)
-{
-	struct tcp_hpts_entry *hpts;
-	struct epoch_tracker et;
-
-	NET_EPOCH_ENTER(et);
-	hpts = tcp_choose_hpts_to_run();
-	__tcp_run_hpts(hpts);
 	NET_EPOCH_EXIT(et);
 }
 
-
 static void
 tcp_hpts_thread(void *ctx)
 {
@@ -2009,6 +2001,8 @@ tcp_init_hptsi(void *st)
 			break;
 		}
 	}
+	tcp_hpts_softclock = __tcp_run_hpts;
+	tcp_lro_hpts_init();
 	printf("TCP Hpts created %d swi interrupt threads and bound %d to %s\n",
 	    created, bound,
 	    tcp_bind_threads == 2 ? "NUMA domains" : "cpus");
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
index 514ab84227b5..8ca21daf60de 100644
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -152,7 +152,8 @@ void __tcp_set_hpts(struct tcpcb *tp, int32_t line);
 
 void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason);
 
-void tcp_run_hpts(void);
+extern void (*tcp_hpts_softclock)(void);
+void tcp_lro_hpts_init(void);
 
 extern int32_t tcp_min_hptsi_time;
 
diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c
index 6cf0411b5f65..255e543ae21d 100644
--- a/sys/netinet/tcp_lro.c
+++ b/sys/netinet/tcp_lro.c
@@ -88,6 +88,9 @@ SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro,  CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "TCP LRO");
 
 long tcplro_stacks_wanting_mbufq;
+int	(*tcp_lro_flush_tcphpts)(struct lro_ctrl *lc, struct lro_entry *le);
+void	(*tcp_hpts_softclock)(void);
+
 counter_u64_t tcp_inp_lro_direct_queue;
 counter_u64_t tcp_inp_lro_wokeup_queue;
 counter_u64_t tcp_inp_lro_compressed;
@@ -1109,23 +1112,14 @@ again:
 void
 tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *le)
 {
-	/* Only optimise if there are multiple packets waiting. */
-#ifdef TCPHPTS
-	int error;
-#endif
 
+	/* Only optimise if there are multiple packets waiting. */
 	NET_EPOCH_ASSERT();
-#ifdef TCPHPTS
-	CURVNET_SET(lc->ifp->if_vnet);
-	error = tcp_lro_flush_tcphpts(lc, le);
-	CURVNET_RESTORE();
-	if (error != 0) {
-#endif
+	if (tcp_lro_flush_tcphpts == NULL ||
+	    tcp_lro_flush_tcphpts(lc, le) != 0) {
 		tcp_lro_condense(lc, le);
 		tcp_flush_out_entry(lc, le);
-#ifdef TCPHPTS
 	}
-#endif
 	lc->lro_flushed++;
 	bzero(le, sizeof(*le));
 	LIST_INSERT_HEAD(&lc->lro_free, le, next);
@@ -1268,10 +1262,8 @@ tcp_lro_flush_all(struct lro_ctrl *lc)
 done:
 	/* flush active streams */
 	tcp_lro_rx_done(lc);
-
-#ifdef TCPHPTS
-	tcp_run_hpts();
-#endif
+	if (tcp_hpts_softclock != NULL)
+		tcp_hpts_softclock();
 	lc->lro_mbuf_count = 0;
 }
 
diff --git a/sys/netinet/tcp_lro.h b/sys/netinet/tcp_lro.h
index d981c940e7eb..b4b5e3f811e4 100644
--- a/sys/netinet/tcp_lro.h
+++ b/sys/netinet/tcp_lro.h
@@ -218,7 +218,7 @@ void tcp_lro_free(struct lro_ctrl *);
 void tcp_lro_flush_inactive(struct lro_ctrl *, const struct timeval *);
 void tcp_lro_flush(struct lro_ctrl *, struct lro_entry *);
 void tcp_lro_flush_all(struct lro_ctrl *);
-int tcp_lro_flush_tcphpts(struct lro_ctrl *, struct lro_entry *);
+extern int (*tcp_lro_flush_tcphpts)(struct lro_ctrl *, struct lro_entry *);
 int tcp_lro_rx(struct lro_ctrl *, struct mbuf *, uint32_t);
 void tcp_lro_queue_mbuf(struct lro_ctrl *, struct mbuf *);
 void tcp_lro_reg_mbufq(void);
diff --git a/sys/netinet/tcp_lro_hpts.c b/sys/netinet/tcp_lro_hpts.c
index 497da9cba40e..769c82a32391 100644
--- a/sys/netinet/tcp_lro_hpts.c
+++ b/sys/netinet/tcp_lro_hpts.c
@@ -423,6 +423,7 @@ tcp_lro_lookup(struct ifnet *ifp, struct lro_parser *pa)
 {
 	struct inpcb *inp;
 
+	CURVNET_SET(ifp->if_vnet);
 	switch (pa->data.lro_type) {
 #ifdef INET6
 	case LRO_TYPE_IPV6_TCP:
@@ -447,14 +448,16 @@ tcp_lro_lookup(struct ifnet *ifp, struct lro_parser *pa)
 		break;
 #endif
 	default:
+		CURVNET_RESTORE();
 		return (NULL);
 	}
+	CURVNET_RESTORE();
 
 	return (intotcpcb(inp));
 }
 
-int
-tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le)
+static int
+_tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le)
 {
 	struct tcpcb *tp;
 	struct mbuf **pp, *cmp, *mv_to;
@@ -575,3 +578,9 @@ tcp_lro_flush_tcphpts(struct lro_ctrl *lc, struct lro_entry *le)
 
 	return (0);	/* Success. */
 }
+
+void
+tcp_lro_hpts_init(void)
+{
+	tcp_lro_flush_tcphpts = _tcp_lro_flush_tcphpts;
+}