git: 30409ecdb648 - main - tcp: do not purge SACK scoreboard on first RTO

From: Richard Scheffenegger <rscheff_at_FreeBSD.org>
Date: Sat, 06 Jan 2024 19:44:06 UTC
The branch main has been updated by rscheff:

URL: https://cgit.FreeBSD.org/src/commit/?id=30409ecdb648901223f4e02e4a575d79c447acab

commit 30409ecdb648901223f4e02e4a575d79c447acab
Author:     Richard Scheffenegger <rscheff@FreeBSD.org>
AuthorDate: 2024-01-06 19:25:17 +0000
Commit:     Richard Scheffenegger <rscheff@FreeBSD.org>
CommitDate: 2024-01-06 19:25:38 +0000

    tcp: do not purge SACK scoreboard on first RTO
    
    Keeping the SACK scoreboard intact after the first RTO
    and retransmitting all data anew only on subsequent RTOs
    allows a more timely and efficient loss recovery under
    many adverse cirumstances.
    
    Reviewed By:           tuexen, #transport
    MFC after:             10 weeks
    Sponsored by:          NetApp, Inc.
    Differential Revision: https://reviews.freebsd.org/D42906
---
 sys/netinet/tcp_input.c |  5 ++++-
 sys/netinet/tcp_sack.c  | 19 +++++++++++++++++++
 sys/netinet/tcp_timer.c |  6 ++++--
 sys/netinet/tcp_var.h   |  1 +
 4 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 1d6430f6ef20..4a6100fc969b 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -439,7 +439,10 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type)
 	case CC_RTO:
 		tp->t_dupacks = 0;
 		tp->t_bytes_acked = 0;
-		EXIT_RECOVERY(tp->t_flags);
+		if ((tp->t_rxtshift > 1) ||
+		    !((tp->t_flags & TF_SACK_PERMIT) &&
+		      (!TAILQ_EMPTY(&tp->snd_holes))))
+			EXIT_RECOVERY(tp->t_flags);
 		if (tp->t_flags2 & TF2_ECN_PERMIT)
 			tp->t_flags2 |= TF2_ECN_SND_CWR;
 		break;
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index e8ed3c52fd67..48efe855f689 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -908,6 +908,25 @@ tcp_free_sackholes(struct tcpcb *tp)
 		("tp->sackhint.nexthole == NULL"));
 }
 
+/*
+ * Resend all the currently existing SACK holes of
+ * the scoreboard. This is in line with the Errata to
+ * RFC 2018, which allows the use of SACK data past
+ * an RTO to good effect typically.
+ */
+void
+tcp_resend_sackholes(struct tcpcb *tp)
+{
+	struct sackhole *p;
+
+	INP_WLOCK_ASSERT(tptoinpcb(tp));
+	TAILQ_FOREACH(p, &tp->snd_holes, scblink) {
+		p->rxmit = p->start;
+	}
+	tp->sackhint.nexthole = TAILQ_FIRST(&tp->snd_holes);
+	tp->sackhint.sack_bytes_rexmit = 0;
+}
+
 /*
  * Partial ack handling within a sack recovery episode.  Keeping this very
  * simple for now.  When a partial ack is received, force snd_cwnd to a value
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 952fa53e0275..125e28134c01 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -559,7 +559,6 @@ tcp_timer_rexmt(struct tcpcb *tp)
 
 	TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
 	CURVNET_SET(inp->inp_vnet);
-	tcp_free_sackholes(tp);
 	if (tp->t_fb->tfb_tcp_rexmit_tmr) {
 		/* The stack has a timer action too. */
 		(*tp->t_fb->tfb_tcp_rexmit_tmr)(tp);
@@ -619,8 +618,11 @@ tcp_timer_rexmt(struct tcpcb *tp)
 		 * the retransmitted packet's to_tsval to by tcp_output
 		 */
 		tp->t_flags |= TF_PREVVALID;
-	} else
+		tcp_resend_sackholes(tp);
+	} else {
 		tp->t_flags &= ~TF_PREVVALID;
+		tcp_free_sackholes(tp);
+	}
 	TCPSTAT_INC(tcps_rexmttimeo);
 	if ((tp->t_state == TCPS_SYN_SENT) ||
 	    (tp->t_state == TCPS_SYN_RECEIVED))
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index c2b15526c15b..5f064ead7f64 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1499,6 +1499,7 @@ struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
 void	 tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *, sackstatus_t);
 void	 tcp_lost_retransmission(struct tcpcb *, struct tcphdr *);
 void	 tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
+void	 tcp_resend_sackholes(struct tcpcb *tp);
 void	 tcp_free_sackholes(struct tcpcb *tp);
 void	 tcp_sack_lost_retransmission(struct tcpcb *, struct tcphdr *);
 int	 tcp_newreno(struct tcpcb *, struct tcphdr *);