git: bffebc336f4e - main - tcp: use CALLOUT_TRYLOCK for the TCP callout

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Thu, 24 Oct 2024 17:14:39 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=bffebc336f4ece4d18774c1ab8f555802cebf961

commit bffebc336f4ece4d18774c1ab8f555802cebf961
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2024-10-24 16:58:37 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2024-10-24 17:14:03 +0000

    tcp: use CALLOUT_TRYLOCK for the TCP callout
    
    This allows to remove the drop of the lock tcp_timer_enter(), which closes
    a sophisticated but possible race that involves three threads.  In case we
    got a callout executing and two threads trying to close the connection,
    e.g. and interrupt and a syscall, then lock yielding in tcp_timer_enter()
    may transfer lock from one closing thread to the other closing thread,
    instead of the callout.
    
    Reviewed by:            jtl
    Differential Revision:  https://reviews.freebsd.org/D45747
---
 sys/netinet/tcp_subr.c  |  3 ++-
 sys/netinet/tcp_timer.c | 39 +++++++++++++++------------------------
 2 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 9b5f2651fb35..668d218b34a8 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2309,7 +2309,8 @@ tcp_newtcpcb(struct inpcb *inp, struct tcpcb *listening_tcb)
 	tp->t_hpts_cpu = HPTS_CPU_NONE;
 	tp->t_lro_cpu = HPTS_CPU_NONE;
 
-	callout_init_rw(&tp->t_callout, &inp->inp_lock, CALLOUT_RETURNUNLOCKED);
+	callout_init_rw(&tp->t_callout, &inp->inp_lock,
+	    CALLOUT_TRYLOCK | CALLOUT_RETURNUNLOCKED);
 	for (int i = 0; i < TT_N; i++)
 		tp->t_timers[i] = SBT_MAX;
 
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index ae6b97c09cdf..ae4753b2523f 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -949,35 +949,26 @@ tcp_timer_active(struct tcpcb *tp, tt_which which)
 
 /*
  * Stop all timers associated with tcpcb.
- *
  * Called when tcpcb moves to TCPS_CLOSED.
- *
- * XXXGL: unfortunately our callout(9) is not able to fully stop a locked
- * callout even when only two threads are involved: the callout itself and the
- * thread that does callout_stop().  See where softclock_call_cc() swaps the
- * callwheel lock to callout lock and then checks cc_exec_cancel().  This is
- * the race window.  If it happens, the tcp_timer_enter() won't be executed,
- * however pcb lock will be locked and released, hence we can't free memory.
- * Until callout(9) is improved, just keep retrying.  In my profiling I've seen
- * such event happening less than 1 time per hour with 20-30 Gbit/s of traffic.
  */
 void
 tcp_timer_stop(struct tcpcb *tp)
 {
-	struct inpcb *inp = tptoinpcb(tp);
 
-	INP_WLOCK_ASSERT(inp);
-
-	if (curthread->td_pflags & TDP_INTCPCALLOUT) {
-		int stopped __diagused;
+	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
-		stopped = callout_stop(&tp->t_callout);
-		MPASS(stopped == 0);
-		for (tt_which i = 0; i < TT_N; i++)
-			tp->t_timers[i] = SBT_MAX;
-	} else while(__predict_false(callout_stop(&tp->t_callout) == 0)) {
-		INP_WUNLOCK(inp);
-		kern_yield(PRI_UNCHANGED);
-		INP_WLOCK(inp);
-	}
+	/*
+	 * We don't check return value from callout_stop().  There are two
+	 * reasons why it can return 0.  First, a legitimate one: we could have
+	 * been called from the callout itself.  Second, callout(9) has a bug.
+	 * It can race internally in softclock_call_cc(), when callout has
+	 * already completed, but cc_exec_curr still points at the callout.
+	 */
+	(void )callout_stop(&tp->t_callout);
+	/*
+	 * In case of being called from callout itself, we must make sure that
+	 * we don't reschedule.
+	 */
+	for (tt_which i = 0; i < TT_N; i++)
+		tp->t_timers[i] = SBT_MAX;
 }