From nobody Fri Oct 29 21:39:17 2021 X-Original-To: dev-commits-src-all@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 174F6182DF75; Fri, 29 Oct 2021 21:39:18 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4Hgwmj75cLz4tTd; Fri, 29 Oct 2021 21:39:17 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id CFFB91E79C; Fri, 29 Oct 2021 21:39:17 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 19TLdHlX017759; Fri, 29 Oct 2021 21:39:17 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 19TLdH30017758; Fri, 29 Oct 2021 21:39:17 GMT (envelope-from git) Date: Fri, 29 Oct 2021 21:39:17 GMT Message-Id: <202110292139.19TLdH30017758@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org From: Randall Stewart Subject: git: 141a53cd58cd - main - tcp: Rack might retransmit forever. List-Id: Commit messages for all branches of the src repository List-Archive: https://lists.freebsd.org/archives/dev-commits-src-all List-Help: List-Post: List-Subscribe: List-Unsubscribe: Sender: owner-dev-commits-src-all@freebsd.org X-BeenThere: dev-commits-src-all@freebsd.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: rrs X-Git-Repository: src X-Git-Refname: refs/heads/main X-Git-Reftype: branch X-Git-Commit: 141a53cd58cdf0681f800bb98ed5718f32ea7909 Auto-Submitted: auto-generated X-ThisMailContainsUnwantedMimeParts: N The branch main has been updated by rrs: URL: https://cgit.FreeBSD.org/src/commit/?id=141a53cd58cdf0681f800bb98ed5718f32ea7909 commit 141a53cd58cdf0681f800bb98ed5718f32ea7909 Author: Randall Stewart AuthorDate: 2021-10-29 21:37:49 +0000 Commit: Randall Stewart CommitDate: 2021-10-29 21:37:49 +0000 tcp: Rack might retransmit forever. If we get a Sacked peer with an MTU change we can retransmit forever if the last bytes are sacked and the client goes away (think power off). Then we never see the end condition and continually retransmit. Reviewed by: Michael Tuexen Sponsored by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D32671 --- sys/netinet/tcp_stacks/rack.c | 66 ++++++++++++++++++++++++++++++--------- sys/netinet/tcp_stacks/tcp_rack.h | 38 +++++++++++----------- 2 files changed, 71 insertions(+), 33 deletions(-) diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 04252511ad18..616e079df60c 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -2422,7 +2422,8 @@ rack_log_rtt_upd(struct tcpcb *tp, struct tcp_rack *rack, uint32_t t, uint32_t l log.u_bbr.pkt_epoch = rsm->r_start; log.u_bbr.lost = rsm->r_end; log.u_bbr.cwnd_gain = rsm->r_rtr_cnt; - log.u_bbr.pacing_gain = rsm->r_flags; + /* We loose any upper of the 24 bits */ + log.u_bbr.pacing_gain = (uint16_t)rsm->r_flags; } else { /* Its a SYN */ log.u_bbr.pkt_epoch = rack->rc_tp->iss; @@ -6670,6 +6671,7 @@ rack_remxt_tmr(struct tcpcb *tp) if (rsm->r_flags & RACK_ACKED) rsm->r_flags |= RACK_WAS_ACKED; rsm->r_flags &= ~(RACK_ACKED | RACK_SACK_PASSED | RACK_WAS_SACKPASS); + rsm->r_flags |= RACK_MUST_RXT; } /* Clear the count (we just un-acked them) */ rack->r_ctl.rc_last_timeout_snduna = tp->snd_una; @@ -7257,7 +7259,6 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendmap *rsm, uint64_t ts, uint16_t add_flag) { int32_t idx; - uint16_t stripped_flags; rsm->r_rtr_cnt++; rack_log_retran_reason(rack, rsm, __LINE__, 0, 2); @@ -7278,7 +7279,6 @@ rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack, */ rsm->r_fas = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked); - stripped_flags = rsm->r_flags & ~(RACK_SENT_SP|RACK_SENT_FP); if (rsm->r_flags & RACK_ACKED) { /* Problably MTU discovery messing with us */ rsm->r_flags &= ~RACK_ACKED; @@ -16112,12 +16112,25 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma rack_start_hpts_timer(rack, tp, cts, slot, len, 0); if (rack->r_must_retran) { rack->r_ctl.rc_out_at_rto -= (rsm->r_end - rsm->r_start); - if (SEQ_GEQ(rsm->r_end, rack->r_ctl.rc_snd_max_at_rto)) { + if ((SEQ_GEQ(rsm->r_end, rack->r_ctl.rc_snd_max_at_rto)) || + ((rsm->r_flags & RACK_MUST_RXT) == 0)) { /* - * We have retransmitted all we need. + * We have retransmitted all we need. If + * RACK_MUST_RXT is not set then we need to + * not retransmit this guy. */ rack->r_must_retran = 0; rack->r_ctl.rc_out_at_rto = 0; + if ((rsm->r_flags & RACK_MUST_RXT) == 0) { + /* Not one we should rxt */ + goto failed; + } else { + /* Clear the flag */ + rsm->r_flags &= ~RACK_MUST_RXT; + } + } else { + /* Remove the flag */ + rsm->r_flags &= ~RACK_MUST_RXT; } } #ifdef TCP_ACCOUNTING @@ -17004,8 +17017,8 @@ again: if (rack->r_must_retran && (rsm == NULL)) { /* - * Non-Sack and we had a RTO or MTU change, we - * need to retransmit until we reach + * Non-Sack and we had a RTO or Sack/non-Sack and a + * MTU change, we need to retransmit until we reach * the former snd_max (rack->r_ctl.rc_snd_max_at_rto). */ if (SEQ_GT(tp->snd_max, tp->snd_una)) { @@ -17028,12 +17041,24 @@ again: sb = &so->so_snd; goto just_return_nolock; } - sack_rxmit = 1; - len = rsm->r_end - rsm->r_start; - sendalot = 0; - sb_offset = rsm->r_start - tp->snd_una; - if (len >= segsiz) - len = segsiz; + if ((rsm->r_flags & RACK_MUST_RXT) == 0) { + /* It does not have the flag, we are done */ + rack->r_must_retran = 0; + rack->r_ctl.rc_out_at_rto = 0; + } else { + sack_rxmit = 1; + len = rsm->r_end - rsm->r_start; + sendalot = 0; + sb_offset = rsm->r_start - tp->snd_una; + if (len >= segsiz) + len = segsiz; + /* + * Delay removing the flag RACK_MUST_RXT so + * that the fastpath for retransmit will + * work with this rsm. + */ + + } } else { /* We must be done if there is nothing outstanding */ rack->r_must_retran = 0; @@ -17080,6 +17105,15 @@ again: if (ret == 0) return (0); } + if (rsm && (rsm->r_flags & RACK_MUST_RXT)) { + /* + * Clear the flag in prep for the send + * note that if we can't get an mbuf + * and fail, we won't retransmit this + * rsm but that should be ok (its rare). + */ + rsm->r_flags &= ~RACK_MUST_RXT; + } so = inp->inp_socket; sb = &so->so_snd; if (do_a_prefetch == 0) { @@ -19313,6 +19347,7 @@ rack_mtu_change(struct tcpcb *tp) * The MSS may have changed */ struct tcp_rack *rack; + struct rack_sendmap *rsm; rack = (struct tcp_rack *)tp->t_fb_ptr; if (rack->r_ctl.rc_pace_min_segs != ctf_fixed_maxseg(tp)) { @@ -19329,7 +19364,10 @@ rack_mtu_change(struct tcpcb *tp) rack->r_ctl.rc_sacked); rack->r_ctl.rc_snd_max_at_rto = tp->snd_max; rack->r_must_retran = 1; - + /* Mark all inflight to needing to be rxt'd */ + TAILQ_FOREACH(rsm, &rack->r_ctl.rc_tmap, r_tnext) { + rsm->r_flags |= RACK_MUST_RXT; + } } sack_filter_clear(&rack->r_ctl.rack_sf, tp->snd_una); /* We don't use snd_nxt to retransmit */ diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h index 0ada2116dc6f..0893237e92f9 100644 --- a/sys/netinet/tcp_stacks/tcp_rack.h +++ b/sys/netinet/tcp_stacks/tcp_rack.h @@ -28,22 +28,23 @@ #ifndef _NETINET_TCP_RACK_H_ #define _NETINET_TCP_RACK_H_ -#define RACK_ACKED 0x0001/* The remote endpoint acked this */ -#define RACK_TO_REXT 0x0002/* A timeout occured on this sendmap entry */ -#define RACK_DEFERRED 0x0004/* We can't use this for RTT calc - not used */ -#define RACK_OVERMAX 0x0008/* We have more retran's then we can fit */ -#define RACK_SACK_PASSED 0x0010/* A sack was done above this block */ -#define RACK_WAS_SACKPASS 0x0020/* We retransmitted due to SACK pass */ -#define RACK_HAS_FIN 0x0040/* segment is sent with fin */ -#define RACK_TLP 0x0080/* segment sent as tail-loss-probe */ -#define RACK_RWND_COLLAPSED 0x0100/* The peer collapsed the rwnd on the segment */ -#define RACK_APP_LIMITED 0x0200/* We went app limited after this send */ -#define RACK_WAS_ACKED 0x0400/* a RTO undid the ack, but it already had a rtt calc done */ -#define RACK_HAS_SYN 0x0800/* SYN is on this guy */ -#define RACK_SENT_W_DSACK 0x1000/* Sent with a dsack */ -#define RACK_SENT_SP 0x2000/* sent in slow path */ -#define RACK_SENT_FP 0x4000/* sent in fast path */ -#define RACK_HAD_PUSH 0x8000/* Push was sent on original send */ +#define RACK_ACKED 0x000001/* The remote endpoint acked this */ +#define RACK_TO_REXT 0x000002/* A timeout occured on this sendmap entry */ +#define RACK_DEFERRED 0x000004/* We can't use this for RTT calc - not used */ +#define RACK_OVERMAX 0x000008/* We have more retran's then we can fit */ +#define RACK_SACK_PASSED 0x000010/* A sack was done above this block */ +#define RACK_WAS_SACKPASS 0x000020/* We retransmitted due to SACK pass */ +#define RACK_HAS_FIN 0x000040/* segment is sent with fin */ +#define RACK_TLP 0x000080/* segment sent as tail-loss-probe */ +#define RACK_RWND_COLLAPSED 0x000100/* The peer collapsed the rwnd on the segment */ +#define RACK_APP_LIMITED 0x000200/* We went app limited after this send */ +#define RACK_WAS_ACKED 0x000400/* a RTO undid the ack, but it already had a rtt calc done */ +#define RACK_HAS_SYN 0x000800/* SYN is on this guy */ +#define RACK_SENT_W_DSACK 0x001000/* Sent with a dsack */ +#define RACK_SENT_SP 0x002000/* sent in slow path */ +#define RACK_SENT_FP 0x004000/* sent in fast path */ +#define RACK_HAD_PUSH 0x008000/* Push was sent on original send */ +#define RACK_MUST_RXT 0x010000/* We must retransmit this rsm (non-sack/mtu chg)*/ #define RACK_NUM_OF_RETRANS 3 #define RACK_INITIAL_RTO 1000000 /* 1 second in microseconds */ @@ -55,9 +56,8 @@ struct rack_sendmap { uint32_t r_start; /* Sequence number of the segment */ uint32_t r_end; /* End seq, this is 1 beyond actually */ uint32_t r_rtr_bytes; /* How many bytes have been retransmitted */ - uint16_t r_rtr_cnt; /* Retran count, index this -1 to get time - * sent */ - uint16_t r_flags; /* Flags as defined above */ + uint32_t r_flags : 24, /* Flags as defined above */ + r_rtr_cnt : 8; /* Retran count, index this -1 to get time */ struct mbuf *m; uint32_t soff; uint32_t orig_m_len;