git: 646c28ea80cb - main - tcp: improve SEG.ACK validation

From: Michael Tuexen <tuexen_at_FreeBSD.org>
Date: Sun, 21 Jul 2024 18:43:52 UTC
The branch main has been updated by tuexen:

URL: https://cgit.FreeBSD.org/src/commit/?id=646c28ea80cb0f9258386626297495b5a0e56db5

commit 646c28ea80cb0f9258386626297495b5a0e56db5
Author:     Michael Tuexen <tuexen@FreeBSD.org>
AuthorDate: 2024-07-21 09:37:35 +0000
Commit:     Michael Tuexen <tuexen@FreeBSD.org>
CommitDate: 2024-07-21 09:37:35 +0000

    tcp: improve SEG.ACK validation
    
    Implement the improved SEG.ACK validation described in RFC 5961.
    In addition to that, also detect ghost ACKs, which are ACKs for data
    that has never been sent.
    The additional checks are enabled by default, but can be disabled
    by setting the sysctl-variable net.inet.tcp.insecure_ack to a
    non-zero value.
    
    PR:                     250357
    Reviewed by:            Peter Lei, rscheff (older version)
    MFC after:              1 week
    Sponsored by:           Netflix, Inc.
    Differential Revision:  https://reviews.freebsd.org/D45894
---
 share/man/man4/tcp.4          |  5 ++++-
 sys/netinet/in_kdtrace.c      |  2 ++
 sys/netinet/in_kdtrace.h      |  3 +++
 sys/netinet/tcp_input.c       | 44 +++++++++++++++++++++++++++++++++++++++++++
 sys/netinet/tcp_stacks/bbr.c  | 37 ++++++++++++++++++++++++++++++++++++
 sys/netinet/tcp_stacks/rack.c | 39 ++++++++++++++++++++++++++++++++++++++
 sys/netinet/tcp_var.h         |  9 ++++++++-
 usr.bin/netstat/inet.c        |  8 ++++++--
 8 files changed, 143 insertions(+), 4 deletions(-)

diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
index 39db12752937..3f5d87da0ffb 100644
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -31,7 +31,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd November 30, 2023
+.Dd July 21, 2024
 .Dt TCP 4
 .Os
 .Sh NAME
@@ -699,6 +699,9 @@ Default is false.
 .It Va insecure_syn
 Use criteria defined in RFC793 instead of RFC5961 for accepting SYN segments.
 Default is false.
+.It Va insecure_ack
+Use criteria defined in RFC793 for validating SEG.ACK.
+Default is false.
 .It Va isn_reseed_interval
 The interval (in seconds) specifying how often the secret data used in
 RFC 1948 initial sequence number calculations should be reseeded.
diff --git a/sys/netinet/in_kdtrace.c b/sys/netinet/in_kdtrace.c
index 8491a4d49d91..219bc6482910 100644
--- a/sys/netinet/in_kdtrace.c
+++ b/sys/netinet/in_kdtrace.c
@@ -339,6 +339,8 @@ MIB_PROBE_TCP(tcps_ecn_sndect1);
 MIB_PROBE_TCP(tcps_tlpresends);
 MIB_PROBE_TCP(tcps_tlpresend_bytes);
 
+MIB_PROBE_TCP(tcps_rcvghostack);
+MIB_PROBE_TCP(tcps_rcvacktooold);
 #endif
 
 SDT_PROBE_DEFINE6_XLATE(ip, , , receive,
diff --git a/sys/netinet/in_kdtrace.h b/sys/netinet/in_kdtrace.h
index 9896af96eb84..22f181974f93 100644
--- a/sys/netinet/in_kdtrace.h
+++ b/sys/netinet/in_kdtrace.h
@@ -330,6 +330,9 @@ SDT_PROBE_DECLARE(mib, tcp, count, tcps_ecn_sndect1);
 
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_tlpresends);
 SDT_PROBE_DECLARE(mib, tcp, count, tcps_tlpresend_bytes);
+
+SDT_PROBE_DECLARE(mib, tcp, count, tcps_rcvghostack);
+SDT_PROBE_DECLARE(mib, tcp, count, tcps_rcvacktooold);
 #endif
 
 SDT_PROBE_DECLARE(ip, , , receive);
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 833a1e501780..ef24f20f784a 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -202,6 +202,11 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_rst, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_insecure_rst), 0,
     "Follow RFC793 instead of RFC5961 criteria for accepting RST packets");
 
+VNET_DEFINE(int, tcp_insecure_ack) = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_ack, CTLFLAG_VNET | CTLFLAG_RW,
+    &VNET_NAME(tcp_insecure_ack), 0,
+    "Follow RFC793 criteria for validating SEG.ACK");
+
 VNET_DEFINE(int, tcp_recvspace) = 1024*64;
 #define	V_tcp_recvspace	VNET(tcp_recvspace)
 SYSCTL_INT(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_VNET | CTLFLAG_RW,
@@ -2438,6 +2443,45 @@ tcp_do_segment(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 	/*
 	 * Ack processing.
 	 */
+	if (SEQ_GEQ(tp->snd_una, tp->iss + (65535 << tp->snd_scale))) {
+		/* Checking SEG.ACK against ISS is definitely redundant. */
+		tp->t_flags2 |= TF2_NO_ISS_CHECK;
+	}
+	if (!V_tcp_insecure_ack) {
+		tcp_seq seq_min;
+		bool ghost_ack_check;
+
+		if (tp->t_flags2 & TF2_NO_ISS_CHECK) {
+			/* Check for too old ACKs (RFC 5961, Section 5.2). */
+			seq_min = tp->snd_una - tp->max_sndwnd;
+			ghost_ack_check = false;
+		} else {
+			if (SEQ_GT(tp->iss + 1, tp->snd_una - tp->max_sndwnd)) {
+				/* Checking for ghost ACKs is stricter. */
+				seq_min = tp->iss + 1;
+				ghost_ack_check = true;
+			} else {
+				/*
+				 * Checking for too old ACKs (RFC 5961,
+				 * Section 5.2) is stricter.
+				 */
+				seq_min = tp->snd_una - tp->max_sndwnd;
+				ghost_ack_check = false;
+			}
+		}
+		if (SEQ_LT(th->th_ack, seq_min)) {
+			if (ghost_ack_check)
+				TCPSTAT_INC(tcps_rcvghostack);
+			else
+				TCPSTAT_INC(tcps_rcvacktooold);
+			/* Send a challenge ACK. */
+			tcp_respond(tp, mtod(m, void *), th, m,
+			    tp->rcv_nxt, tp->snd_nxt, TH_ACK);
+			tp->last_ack_sent = tp->rcv_nxt;
+			m = NULL;
+			goto drop;
+		}
+	}
 	switch (tp->t_state) {
 	/*
 	 * In SYN_RECEIVED state, the ack ACKs our SYN, so enter
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index cb87932fa8db..445ba064b316 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -7711,6 +7711,43 @@ bbr_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	bbr = (struct tcp_bbr *)tp->t_fb_ptr;
 	lost = bbr->r_ctl.rc_lost;
 	nsegs = max(1, m->m_pkthdr.lro_nsegs);
+	if (SEQ_GEQ(tp->snd_una, tp->iss + (65535 << tp->snd_scale))) {
+		/* Checking SEG.ACK against ISS is definitely redundant. */
+		tp->t_flags2 |= TF2_NO_ISS_CHECK;
+	}
+	if (!V_tcp_insecure_ack) {
+		tcp_seq seq_min;
+		bool ghost_ack_check;
+
+		if (tp->t_flags2 & TF2_NO_ISS_CHECK) {
+			/* Check for too old ACKs (RFC 5961, Section 5.2). */
+			seq_min = tp->snd_una - tp->max_sndwnd;
+			ghost_ack_check = false;
+		} else {
+			if (SEQ_GT(tp->iss + 1, tp->snd_una - tp->max_sndwnd)) {
+				/* Checking for ghost ACKs is stricter. */
+				seq_min = tp->iss + 1;
+				ghost_ack_check = true;
+			} else {
+				/*
+				 * Checking for too old ACKs (RFC 5961,
+				 * Section 5.2) is stricter.
+				 */
+				seq_min = tp->snd_una - tp->max_sndwnd;
+				ghost_ack_check = false;
+			}
+		}
+		if (SEQ_LT(th->th_ack, seq_min)) {
+			if (ghost_ack_check)
+				TCPSTAT_INC(tcps_rcvghostack);
+			else
+				TCPSTAT_INC(tcps_rcvacktooold);
+			/* Send challenge ACK. */
+			ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
+			bbr->r_wanted_output = 1;
+			return (1);
+		}
+	}
 	if (SEQ_GT(th->th_ack, tp->snd_max)) {
 		ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val);
 		bbr->r_wanted_output = 1;
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index b7d9d383fc61..bd7583d3843a 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -12472,6 +12472,45 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, struct socket *so,
 	INP_WLOCK_ASSERT(tptoinpcb(tp));
 
 	rack = (struct tcp_rack *)tp->t_fb_ptr;
+	if (SEQ_GEQ(tp->snd_una, tp->iss + (65535 << tp->snd_scale))) {
+		/* Checking SEG.ACK against ISS is definitely redundant. */
+		tp->t_flags2 |= TF2_NO_ISS_CHECK;
+	}
+	if (!V_tcp_insecure_ack) {
+		tcp_seq seq_min;
+		bool ghost_ack_check;
+
+		if (tp->t_flags2 & TF2_NO_ISS_CHECK) {
+			/* Check for too old ACKs (RFC 5961, Section 5.2). */
+			seq_min = tp->snd_una - tp->max_sndwnd;
+			ghost_ack_check = false;
+		} else {
+			if (SEQ_GT(tp->iss + 1, tp->snd_una - tp->max_sndwnd)) {
+				/* Checking for ghost ACKs is stricter. */
+				seq_min = tp->iss + 1;
+				ghost_ack_check = true;
+			} else {
+				/*
+				 * Checking for too old ACKs (RFC 5961,
+				 * Section 5.2) is stricter.
+				 */
+				seq_min = tp->snd_una - tp->max_sndwnd;
+				ghost_ack_check = false;
+			}
+		}
+		if (SEQ_LT(th->th_ack, seq_min)) {
+			if (ghost_ack_check)
+				TCPSTAT_INC(tcps_rcvghostack);
+			else
+				TCPSTAT_INC(tcps_rcvacktooold);
+			/* Send challenge ACK. */
+			__ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val,
+					      &rack->r_ctl.challenge_ack_ts,
+					      &rack->r_ctl.challenge_ack_cnt);
+			rack->r_wanted_output = 1;
+			return (1);
+		}
+	}
 	if (SEQ_GT(th->th_ack, tp->snd_max)) {
 		__ctf_do_dropafterack(m, tp, th, thflags, tlen, ret_val,
 				      &rack->r_ctl.challenge_ack_ts,
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 1f48297c2b0a..f691579acf09 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -848,6 +848,7 @@ tcp_packets_this_ack(struct tcpcb *tp, tcp_seq ack)
 #define	TF2_CANNOT_DO_ECN	0x00080000 /* The stack does not do ECN */
 #define TF2_PROC_SACK_PROHIBIT	0x00100000 /* Due to small MSS size do not process sack's */
 #define	TF2_IPSEC_TSO		0x00200000	/* IPSEC + TSO supported */
+#define	TF2_NO_ISS_CHECK	0x00200000 /* Don't check SEG.ACK against ISS */
 
 /*
  * Structure to hold TCP options that are only used during segment
@@ -1089,8 +1090,12 @@ struct	tcpstat {
 	uint64_t tcps_tlpresends;	/* number of tlp resends */
 	uint64_t tcps_tlpresend_bytes;	/* number of bytes resent by tlp */
 
+	/* SEG.ACK validation failures */
+	uint64_t tcps_rcvghostack;	/* received ACK for data never sent */
+	uint64_t tcps_rcvacktooold;	/* received ACK for data too long ago */
 
-	uint64_t _pad[3];		/* 3 TBD placeholder for STABLE */
+
+	uint64_t _pad[1];		/* 1 TBD placeholder for STABLE */
 };
 
 #define	tcps_rcvmemdrop	tcps_rcvreassfull	/* compat */
@@ -1280,6 +1285,7 @@ VNET_DECLARE(int, tcp_ecn_maxretries);
 VNET_DECLARE(int, tcp_initcwnd_segments);
 VNET_DECLARE(int, tcp_insecure_rst);
 VNET_DECLARE(int, tcp_insecure_syn);
+VNET_DECLARE(int, tcp_insecure_ack);
 VNET_DECLARE(uint32_t, tcp_map_entries_limit);
 VNET_DECLARE(uint32_t, tcp_map_split_limit);
 VNET_DECLARE(int, tcp_minmss);
@@ -1327,6 +1333,7 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo);
 #define	V_tcp_initcwnd_segments		VNET(tcp_initcwnd_segments)
 #define	V_tcp_insecure_rst		VNET(tcp_insecure_rst)
 #define	V_tcp_insecure_syn		VNET(tcp_insecure_syn)
+#define	V_tcp_insecure_ack		VNET(tcp_insecure_ack)
 #define	V_tcp_map_entries_limit		VNET(tcp_map_entries_limit)
 #define	V_tcp_map_split_limit		VNET(tcp_map_split_limit)
 #define	V_tcp_minmss			VNET(tcp_minmss)
diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c
index 5d7fd0e46cf1..9ff7c687353f 100644
--- a/usr.bin/netstat/inet.c
+++ b/usr.bin/netstat/inet.c
@@ -642,8 +642,12 @@ tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 	    "{N:/UDP tunneled pkt%s}\n");
 	p(tcps_tunneled_errs, "\t\t{:received-bad-udp-tunneled-pkts/%ju} "
 	    "{N:/UDP tunneled pkt cnt with error%s}\n");
-	p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-unsent-data/%ju} "
-	    "{N:/ack%s for unsent data}\n");
+	p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-data-not-yet-sent/%ju} "
+	    "{N:/ack%s for data not yet sent}\n");
+	p(tcps_rcvghostack, "\t\t{:received-acks-for-data-never-been-sent/%ju} "
+	    "{N:/ack%s for data never been sent (ghost acks)}\n");
+	p(tcps_rcvacktooold, "\t\t{:received-acks-for-data-being-too-old/%ju} "
+	    "{N:/ack%s for data being too old}\n");
 	p2(tcps_rcvpack, tcps_rcvbyte, "\t\t"
 	    "{:received-in-sequence-packets/%ju} {N:/packet%s} "
 	    "({:received-in-sequence-bytes/%ju} {N:/byte%s}) "