git: 22dcc8129385 - main - tcp: Use segment size excluding tcp options for all cwnd calculations

From: Richard Scheffenegger <rscheff_at_FreeBSD.org>
Date: Thu, 14 Nov 2024 18:38:01 UTC
The branch main has been updated by rscheff:

URL: https://cgit.FreeBSD.org/src/commit/?id=22dcc81293854c4d39df639a329fecded175b2b0

commit 22dcc81293854c4d39df639a329fecded175b2b0
Author:     Richard Scheffenegger <rscheff@FreeBSD.org>
AuthorDate: 2024-11-14 09:03:40 +0000
Commit:     Richard Scheffenegger <rscheff@FreeBSD.org>
CommitDate: 2024-11-14 09:16:57 +0000

    tcp: Use segment size excluding tcp options for all cwnd calculations
    
    Avoid sending small segments by making sure that cwnd is usually
    calculated in full (data) segment sizes. Especially during loss
    recovery and retransmission scenarios.
    
    Reviewed By: tuexen, #transport
    Sponsored by: NetApp, Inc.
    Differential Revision: https://reviews.freebsd.org/D47474
---
 sys/netinet/cc/cc.c         | 13 +++++++------
 sys/netinet/cc/cc_cdg.c     |  9 +++++----
 sys/netinet/cc/cc_chd.c     | 20 +++++++++++---------
 sys/netinet/cc/cc_cubic.c   | 24 ++++++++++++------------
 sys/netinet/cc/cc_dctcp.c   | 13 +++++++------
 sys/netinet/cc/cc_htcp.c    | 15 ++++++++-------
 sys/netinet/cc/cc_newreno.c |  8 ++++----
 sys/netinet/cc/cc_vegas.c   | 11 ++++++-----
 sys/netinet/tcp_output.c    |  2 +-
 9 files changed, 61 insertions(+), 54 deletions(-)

diff --git a/sys/netinet/cc/cc.c b/sys/netinet/cc/cc.c
index c628be250abf..eae5b7bca4d9 100644
--- a/sys/netinet/cc/cc.c
+++ b/sys/netinet/cc/cc.c
@@ -392,6 +392,7 @@ void
 newreno_cc_post_recovery(struct cc_var *ccv)
 {
 	int pipe;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
 		/*
@@ -412,8 +413,7 @@ newreno_cc_post_recovery(struct cc_var *ccv)
 			 * Ensure that cwnd does not collapse to 1 MSS under
 			 * adverse conditions. Implements RFC6582
 			 */
-			CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
-			    CCV(ccv, t_maxseg);
+			CCV(ccv, snd_cwnd) = max(pipe, mss) + mss;
 		else
 			CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
 	}
@@ -521,7 +521,7 @@ u_int
 newreno_cc_cwnd_in_cong_avoid(struct cc_var *ccv)
 {
 	u_int cw = CCV(ccv, snd_cwnd);
-	u_int incr = CCV(ccv, t_maxseg);
+	u_int incr = tcp_fixed_maxseg(ccv->tp);
 
 	KASSERT(cw > CCV(ccv, snd_ssthresh),
 		("congestion control state not in congestion avoidance\n"));
@@ -561,7 +561,8 @@ u_int
 newreno_cc_cwnd_in_slow_start(struct cc_var *ccv)
 {
 	u_int cw = CCV(ccv, snd_cwnd);
-	u_int incr = CCV(ccv, t_maxseg);
+	u_int mss = tcp_fixed_maxseg(ccv->tp);
+	u_int incr = mss;
 
 	KASSERT(cw <= CCV(ccv, snd_ssthresh),
 		("congestion control state not in slow start\n"));
@@ -599,9 +600,9 @@ newreno_cc_cwnd_in_slow_start(struct cc_var *ccv)
 			abc_val = V_tcp_abc_l_var;
 		if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
 			incr = min(ccv->bytes_this_ack,
-			           ccv->nsegs * abc_val * CCV(ccv, t_maxseg));
+			           ccv->nsegs * abc_val * mss);
 		else
-			incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
+			incr = min(ccv->bytes_this_ack, mss);
 	}
 	/* ABC is on by default, so incr equals 0 frequently. */
 	if (incr > 0)
diff --git a/sys/netinet/cc/cc_cdg.c b/sys/netinet/cc/cc_cdg.c
index 997d9435870f..5b1df76e71a2 100644
--- a/sys/netinet/cc/cc_cdg.c
+++ b/sys/netinet/cc/cc_cdg.c
@@ -416,27 +416,28 @@ cdg_window_increase(struct cc_var *ccv, int new_measurement)
 {
 	struct cdg *cdg_data;
 	int incr, s_w_incr;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	cdg_data = ccv->cc_data;
 	incr = s_w_incr = 0;
 
 	if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) {
 		/* Slow start. */
-		incr = CCV(ccv, t_maxseg);
+		incr = mss;
 		s_w_incr = incr;
 		cdg_data->window_incr = cdg_data->rtt_count = 0;
 	} else {
 		/* Congestion avoidance. */
 		if (new_measurement) {
-			s_w_incr = CCV(ccv, t_maxseg);
+			s_w_incr = mss;
 			if (V_cdg_alpha_inc == 0) {
-				incr = CCV(ccv, t_maxseg);
+				incr = mss;
 			} else {
 				if (++cdg_data->rtt_count >= V_cdg_alpha_inc) {
 					cdg_data->window_incr++;
 					cdg_data->rtt_count = 0;
 				}
-				incr = CCV(ccv, t_maxseg) *
+				incr = mss *
 				    cdg_data->window_incr;
 			}
 		}
diff --git a/sys/netinet/cc/cc_chd.c b/sys/netinet/cc/cc_chd.c
index f48d1f0066e2..1d440f43578f 100644
--- a/sys/netinet/cc/cc_chd.c
+++ b/sys/netinet/cc/cc_chd.c
@@ -147,10 +147,11 @@ static __inline void
 chd_window_decrease(struct cc_var *ccv)
 {
 	unsigned long win;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
-	win = min(CCV(ccv, snd_wnd), CCV(ccv, snd_cwnd)) / CCV(ccv, t_maxseg);
+	win = min(CCV(ccv, snd_wnd), CCV(ccv, snd_cwnd)) / mss;
 	win -= max((win / 2), 1);
-	CCV(ccv, snd_ssthresh) = max(win, 2) * CCV(ccv, t_maxseg);
+	CCV(ccv, snd_ssthresh) = max(win, 2) * mss;
 }
 
 /*
@@ -190,6 +191,7 @@ chd_window_increase(struct cc_var *ccv, int new_measurement)
 {
 	struct chd *chd_data;
 	int incr;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	chd_data = ccv->cc_data;
 	incr = 0;
@@ -201,23 +203,22 @@ chd_window_increase(struct cc_var *ccv, int new_measurement)
 			if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) {
 				/* Not due to RTO. */
 				incr = min(ccv->bytes_this_ack,
-				    V_tcp_abc_l_var * CCV(ccv, t_maxseg));
+				    V_tcp_abc_l_var * mss);
 			} else {
 				/* Due to RTO. */
-				incr = min(ccv->bytes_this_ack,
-				    CCV(ccv, t_maxseg));
+				incr = min(ccv->bytes_this_ack, mss);
 			}
 		} else
-			incr = CCV(ccv, t_maxseg);
+			incr = mss;
 
 	} else { /* Congestion avoidance. */
 		if (V_tcp_do_rfc3465) {
 			if (ccv->flags & CCF_ABC_SENTAWND) {
 				ccv->flags &= ~CCF_ABC_SENTAWND;
-				incr = CCV(ccv, t_maxseg);
+				incr = mss;
 			}
 		} else if (new_measurement)
-			incr = CCV(ccv, t_maxseg);
+			incr = mss;
 	}
 
 	if (chd_data->shadow_w > 0) {
@@ -380,8 +381,9 @@ chd_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
 		}
 
 		if (chd_data->shadow_w > 0) {
+			uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 			chd_data->shadow_w = max(chd_data->shadow_w /
-			    CCV(ccv, t_maxseg) / 2, 2) * CCV(ccv, t_maxseg);
+			    mss / 2, 2) * mss;
 		}
 		ENTER_FASTRECOVERY(CCV(ccv, t_flags));
 		break;
diff --git a/sys/netinet/cc/cc_cubic.c b/sys/netinet/cc/cc_cubic.c
index c4b44d5c3660..3b134082a59b 100644
--- a/sys/netinet/cc/cc_cubic.c
+++ b/sys/netinet/cc/cc_cubic.c
@@ -168,7 +168,8 @@ cubic_does_slow_start(struct cc_var *ccv, struct cubic *cubicd)
 	 * doesn't rely on tcpcb vars.
 	 */
 	u_int cw = CCV(ccv, snd_cwnd);
-	u_int incr = CCV(ccv, t_maxseg);
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
+	u_int incr = mss;
 	uint16_t abc_val;
 
 	cubicd->flags |= CUBICFLAG_IN_SLOWSTART;
@@ -216,10 +217,9 @@ cubic_does_slow_start(struct cc_var *ccv, struct cubic *cubicd)
 	}
 	if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
 		incr = min(ccv->bytes_this_ack,
-			   ccv->nsegs * abc_val *
-			   CCV(ccv, t_maxseg));
+			   ccv->nsegs * abc_val * mss);
 	else
-		incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
+		incr = min(ccv->bytes_this_ack, mss);
 
 	/* Only if Hystart is enabled will the flag get set */
 	if (cubicd->flags & CUBICFLAG_HYSTART_IN_CSS) {
@@ -238,6 +238,7 @@ cubic_ack_received(struct cc_var *ccv, ccsignal_t type)
 	struct cubic *cubic_data;
 	unsigned long W_est, W_cubic;
 	int usecs_since_epoch;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	cubic_data = ccv->cc_data;
 	cubic_record_rtt(ccv);
@@ -277,8 +278,7 @@ cubic_ack_received(struct cc_var *ccv, ccsignal_t type)
 				cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART |
 						       CUBICFLAG_IN_APPLIMIT);
 				cubic_data->t_epoch = ticks;
-				cubic_data->K = cubic_k(cubic_data->W_max /
-							CCV(ccv, t_maxseg));
+				cubic_data->K = cubic_k(cubic_data->W_max / mss);
 			}
 			usecs_since_epoch = (ticks - cubic_data->t_epoch) * tick;
 			if (usecs_since_epoch < 0) {
@@ -298,7 +298,7 @@ cubic_ack_received(struct cc_var *ccv, ccsignal_t type)
 			W_cubic = cubic_cwnd(usecs_since_epoch +
 					     cubic_data->mean_rtt_usecs,
 					     cubic_data->W_max,
-					     CCV(ccv, t_maxseg),
+					     tcp_fixed_maxseg(ccv->tp),
 					     cubic_data->K);
 
 			if (W_cubic < W_est) {
@@ -329,7 +329,7 @@ cubic_ack_received(struct cc_var *ccv, ccsignal_t type)
 			    cubic_data->W_max < CCV(ccv, snd_cwnd)) {
 				cubic_data->W_max = CCV(ccv, snd_cwnd);
 				cubic_data->K = cubic_k(cubic_data->W_max /
-				    CCV(ccv, t_maxseg));
+				    tcp_fixed_maxseg(ccv->tp));
 			}
 		}
 	} else if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
@@ -351,7 +351,7 @@ cubic_after_idle(struct cc_var *ccv)
 	cubic_data = ccv->cc_data;
 
 	cubic_data->W_max = ulmax(cubic_data->W_max, CCV(ccv, snd_cwnd));
-	cubic_data->K = cubic_k(cubic_data->W_max / CCV(ccv, t_maxseg));
+	cubic_data->K = cubic_k(cubic_data->W_max / tcp_fixed_maxseg(ccv->tp));
 	if ((cubic_data->flags & CUBICFLAG_HYSTART_ENABLED) == 0) {
 		/*
 		 * Re-enable hystart if we have been idle.
@@ -532,6 +532,7 @@ cubic_post_recovery(struct cc_var *ccv)
 {
 	struct cubic *cubic_data;
 	int pipe;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	cubic_data = ccv->cc_data;
 	pipe = 0;
@@ -554,13 +555,12 @@ cubic_post_recovery(struct cc_var *ccv)
 			 * Ensure that cwnd does not collapse to 1 MSS under
 			 * adverse conditions. Implements RFC6582
 			 */
-			CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
-			    CCV(ccv, t_maxseg);
+			CCV(ccv, snd_cwnd) = max(pipe, mss) + mss;
 		else
 			/* Update cwnd based on beta and adjusted W_max. */
 			CCV(ccv, snd_cwnd) = max(((uint64_t)cubic_data->W_max *
 			    CUBIC_BETA) >> CUBIC_SHIFT,
-			    2 * CCV(ccv, t_maxseg));
+			    2 * mss);
 	}
 
 	/* Calculate the average RTT between congestion epochs. */
diff --git a/sys/netinet/cc/cc_dctcp.c b/sys/netinet/cc/cc_dctcp.c
index 06b2de11fd46..f43efe0e27c7 100644
--- a/sys/netinet/cc/cc_dctcp.c
+++ b/sys/netinet/cc/cc_dctcp.c
@@ -108,6 +108,7 @@ dctcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 {
 	struct dctcp *dctcp_data;
 	int bytes_acked = 0;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	dctcp_data = ccv->cc_data;
 
@@ -125,7 +126,7 @@ dctcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 			newreno_cc_ack_received(ccv, type);
 
 		if (type == CC_DUPACK)
-			bytes_acked = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
+			bytes_acked = min(ccv->bytes_this_ack, mss);
 
 		if (type == CC_ACK)
 			bytes_acked = ccv->bytes_this_ack;
@@ -138,16 +139,16 @@ dctcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 			//XXRMS: For fluid-model DCTCP, update
 			//cwnd here during for RTT fairness
 			if (!dctcp_data->ece_prev
-			    && bytes_acked > CCV(ccv, t_maxseg)) {
+			    && bytes_acked > mss) {
 				dctcp_data->bytes_ecn +=
-				    (bytes_acked - CCV(ccv, t_maxseg));
+				    (bytes_acked - mss);
 			} else
 				dctcp_data->bytes_ecn += bytes_acked;
 			dctcp_data->ece_prev = 1;
 		} else {
 			if (dctcp_data->ece_prev
-			    && bytes_acked > CCV(ccv, t_maxseg))
-				dctcp_data->bytes_ecn += CCV(ccv, t_maxseg);
+			    && bytes_acked > mss)
+				dctcp_data->bytes_ecn += mss;
 			dctcp_data->ece_prev = 0;
 		}
 		dctcp_data->ece_curr = 0;
@@ -305,7 +306,7 @@ dctcp_cong_signal(struct cc_var *ccv, ccsignal_t type)
 			}
 			CCV(ccv, snd_cwnd) = mss;
 			dctcp_update_alpha(ccv);
-			dctcp_data->save_sndnxt += CCV(ccv, t_maxseg);
+			dctcp_data->save_sndnxt += mss;
 			dctcp_data->num_cong_events++;
 			break;
 		default:
diff --git a/sys/netinet/cc/cc_htcp.c b/sys/netinet/cc/cc_htcp.c
index 49bf4d6142f1..13441bb49190 100644
--- a/sys/netinet/cc/cc_htcp.c
+++ b/sys/netinet/cc/cc_htcp.c
@@ -193,6 +193,7 @@ static void
 htcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 {
 	struct htcp *htcp_data;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	htcp_data = ccv->cc_data;
 	htcp_record_rtt(ccv);
@@ -220,7 +221,7 @@ htcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 			if (V_tcp_do_rfc3465) {
 				/* Increment cwnd by alpha segments. */
 				CCV(ccv, snd_cwnd) += htcp_data->alpha *
-				    CCV(ccv, t_maxseg);
+				    mss;
 				ccv->flags &= ~CCF_ABC_SENTAWND;
 			} else
 				/*
@@ -230,8 +231,8 @@ htcp_ack_received(struct cc_var *ccv, ccsignal_t type)
 				 */
 				CCV(ccv, snd_cwnd) += (((htcp_data->alpha <<
 				    HTCP_SHIFT) / (max(1,
-				    CCV(ccv, snd_cwnd) / CCV(ccv, t_maxseg)))) *
-				    CCV(ccv, t_maxseg))  >> HTCP_SHIFT;
+				    CCV(ccv, snd_cwnd) / mss))) *
+				    mss)  >> HTCP_SHIFT;
 		}
 	}
 }
@@ -370,6 +371,7 @@ htcp_post_recovery(struct cc_var *ccv)
 {
 	int pipe;
 	struct htcp *htcp_data;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	pipe = 0;
 	htcp_data = ccv->cc_data;
@@ -392,12 +394,11 @@ htcp_post_recovery(struct cc_var *ccv)
 			 * Ensure that cwnd down not collape to 1 MSS under
 			 * adverse conditions. Implements RFC6582
 			 */
-			CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
-			    CCV(ccv, t_maxseg);
+			CCV(ccv, snd_cwnd) = max(pipe, mss) + mss;
 		else
 			CCV(ccv, snd_cwnd) = max(1, ((htcp_data->beta *
-			    htcp_data->prev_cwnd / CCV(ccv, t_maxseg))
-			    >> HTCP_SHIFT)) * CCV(ccv, t_maxseg);
+			    htcp_data->prev_cwnd / mss)
+			    >> HTCP_SHIFT)) * mss;
 	}
 }
 
diff --git a/sys/netinet/cc/cc_newreno.c b/sys/netinet/cc/cc_newreno.c
index 4515ac133c19..d7172fa78bc4 100644
--- a/sys/netinet/cc/cc_newreno.c
+++ b/sys/netinet/cc/cc_newreno.c
@@ -215,12 +215,13 @@ static void
 newreno_ack_received(struct cc_var *ccv, ccsignal_t type)
 {
 	struct newreno *nreno;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	nreno = ccv->cc_data;
 	if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
 	    (ccv->flags & CCF_CWND_LIMITED)) {
 		u_int cw = CCV(ccv, snd_cwnd);
-		u_int incr = CCV(ccv, t_maxseg);
+		u_int incr = mss;
 
 		/*
 		 * Regular in-order ACK, open the congestion window.
@@ -324,10 +325,9 @@ newreno_ack_received(struct cc_var *ccv, ccsignal_t type)
 			}
 			if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
 				incr = min(ccv->bytes_this_ack,
-				    ccv->nsegs * abc_val *
-				    CCV(ccv, t_maxseg));
+				    ccv->nsegs * abc_val * mss);
 			else
-				incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
+				incr = min(ccv->bytes_this_ack, mss);
 
 			/* Only if Hystart is enabled will the flag get set */
 			if (nreno->newreno_flags & CC_NEWRENO_HYSTART_IN_CSS) {
diff --git a/sys/netinet/cc/cc_vegas.c b/sys/netinet/cc/cc_vegas.c
index 45d6b646bcba..2e24a717f869 100644
--- a/sys/netinet/cc/cc_vegas.c
+++ b/sys/netinet/cc/cc_vegas.c
@@ -129,6 +129,7 @@ vegas_ack_received(struct cc_var *ccv, ccsignal_t ack_type)
 	struct ertt *e_t;
 	struct vegas *vegas_data;
 	long actual_tx_rate, expected_tx_rate, ndiff;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	e_t = khelp_get_osd(&CCV(ccv, t_osd), ertt_id);
 	vegas_data = ccv->cc_data;
@@ -139,7 +140,7 @@ vegas_ack_received(struct cc_var *ccv, ccsignal_t ack_type)
 			actual_tx_rate = e_t->bytes_tx_in_marked_rtt /
 			    e_t->markedpkt_rtt;
 			ndiff = (expected_tx_rate - actual_tx_rate) *
-			    e_t->minrtt / CCV(ccv, t_maxseg);
+			    e_t->minrtt / mss;
 
 			if (ndiff < V_vegas_alpha) {
 				if (CCV(ccv, snd_cwnd) <=
@@ -150,8 +151,7 @@ vegas_ack_received(struct cc_var *ccv, ccsignal_t ack_type)
 				} else {
 					vegas_data->slow_start_toggle = 0;
 					CCV(ccv, snd_cwnd) =
-					    min(CCV(ccv, snd_cwnd) +
-					    CCV(ccv, t_maxseg),
+					    min(CCV(ccv, snd_cwnd) + mss,
 					    TCP_MAXWIN << CCV(ccv, snd_scale));
 				}
 			} else if (ndiff > V_vegas_beta) {
@@ -207,6 +207,7 @@ vegas_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
 {
 	struct vegas *vegas_data;
 	int presignalrecov;
+	uint32_t mss = tcp_fixed_maxseg(ccv->tp);
 
 	vegas_data = ccv->cc_data;
 
@@ -218,8 +219,8 @@ vegas_cong_signal(struct cc_var *ccv, ccsignal_t signal_type)
 	switch((int)signal_type) {
 	case CC_VEGAS_RATE:
 		if (!IN_RECOVERY(CCV(ccv, t_flags))) {
-			CCV(ccv, snd_cwnd) = max(2 * CCV(ccv, t_maxseg),
-			    CCV(ccv, snd_cwnd) - CCV(ccv, t_maxseg));
+			CCV(ccv, snd_cwnd) = max(2 * mss,
+			    CCV(ccv, snd_cwnd) - mss);
 			if (CCV(ccv, snd_cwnd) < CCV(ccv, snd_ssthresh))
 				/* Exit slow start. */
 				CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index 854cce2a0cc1..be98d2e41f11 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1696,7 +1696,7 @@ timer:
 			return (error);
 		case ENOBUFS:
 			TCP_XMIT_TIMER_ASSERT(tp, len, flags);
-			tp->snd_cwnd = tp->t_maxseg;
+			tp->snd_cwnd = tcp_maxseg(tp);
 			return (0);
 		case EMSGSIZE:
 			/*