svn commit: r352022 - in stable/12/sys/netinet: . tcp_stacks
Michael Tuexen
tuexen at FreeBSD.org
Sat Sep 7 19:58:07 UTC 2019
Author: tuexen
Date: Sat Sep 7 19:58:06 2019
New Revision: 352022
URL: https://svnweb.freebsd.org/changeset/base/352022
Log:
Improve DSACK support:
MFC This is the second in a number of patches needed to
get BBRv1 into the tree. This fixes the DSACK bug but
is also needed by BBR. We have yet to go two more
one will be for the pacing code (tcp_ratelimit.c) and
the second will be for the new updated LRO code that
allows a transport to know the arrival times of packets
and (tcp_lro.c). After that we should finally be able
Improve DSACK support.
MFC r349987 from rrs@:
This is the second in a number of patches needed to
get BBRv1 into the tree. This fixes the DSACK bug but
is also needed by BBR. We have yet to go two more
one will be for the pacing code (tcp_ratelimit.c) and
the second will be for the new updated LRO code that
allows a transport to know the arrival times of packets
and (tcp_lro.c). After that we should finally be able
to get BBRv1 into head.
This required manual tweaking to address merge conflicts.
MFC r351725:
This patch improves the DSACK handling to conform with RFC 2883.
The lowest SACK block is used when multiple Blocks would be elegible as
DSACK blocks ACK blocks get reordered - while maintaining the ordering of
SACK blocks not relevant in the DSACK context is maintained.
This required manual tweaking to address merge conflicts.
MFC r351801:
Fix the SACK block generation in the base TCP stack by bringing it in
sync with the RACK stack.
Modified:
stable/12/sys/netinet/tcp_input.c
stable/12/sys/netinet/tcp_output.c
stable/12/sys/netinet/tcp_sack.c
stable/12/sys/netinet/tcp_stacks/rack.c
stable/12/sys/netinet/tcp_var.h
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/sys/netinet/tcp_input.c
==============================================================================
--- stable/12/sys/netinet/tcp_input.c Sat Sep 7 19:25:45 2019 (r352021)
+++ stable/12/sys/netinet/tcp_input.c Sat Sep 7 19:58:06 2019 (r352022)
@@ -1506,7 +1506,6 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, stru
} else {
tp->rfbuf_cnt += tlen; /* add up */
}
-
return (newsize);
}
@@ -2285,7 +2284,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
* DSACK - add SACK block for dropped range
*/
if (tp->t_flags & TF_SACK_PERMIT) {
- tcp_update_sack_list(tp, th->th_seq, th->th_seq+tlen);
+ tcp_update_sack_list(tp, th->th_seq,
+ th->th_seq + todrop);
/*
* ACK now, as the next in-sequence segment
* will clear the DSACK block again
@@ -3072,21 +3072,29 @@ dodata: /* XXX */
* DSACK actually handled in the fastpath
* above.
*/
- tcp_update_sack_list(tp, save_start, save_start + save_tlen);
- } else
- if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
+ tcp_update_sack_list(tp, save_start,
+ save_start + save_tlen);
+ } else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
/*
* Cleaning sackblks by using zero length
* update.
*/
- tcp_update_sack_list(tp, save_start, save_start);
- } else
- if ((tlen > 0) && (tlen >= save_tlen)) {
+ if ((tp->rcv_numsacks >= 1) &&
+ (tp->sackblks[0].end == save_start)) {
+ /* partial overlap, recorded at todrop above */
+ tcp_update_sack_list(tp, tp->sackblks[0].start,
+ tp->sackblks[0].end);
+ } else {
+ tcp_update_dsack_list(tp, save_start,
+ save_start + save_tlen);
+ }
+ } else if ((tlen > 0) && (tlen >= save_tlen)) {
/* Update of sackblks. */
- tcp_update_sack_list(tp, save_start, save_start + save_tlen);
- } else
- if (tlen > 0) {
- tcp_update_sack_list(tp, save_start, save_start+tlen);
+ tcp_update_dsack_list(tp, save_start,
+ save_start + save_tlen);
+ } else if (tlen > 0) {
+ tcp_update_dsack_list(tp, save_start,
+ save_start + tlen);
}
}
#if 0
Modified: stable/12/sys/netinet/tcp_output.c
==============================================================================
--- stable/12/sys/netinet/tcp_output.c Sat Sep 7 19:25:45 2019 (r352021)
+++ stable/12/sys/netinet/tcp_output.c Sat Sep 7 19:58:06 2019 (r352022)
@@ -1507,7 +1507,13 @@ timer:
if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
tp->snd_max = tp->snd_nxt + xlen;
}
-
+ if ((error == 0) &&
+ (TCPS_HAVEESTABLISHED(tp->t_state) &&
+ (tp->t_flags & TF_SACK_PERMIT) &&
+ tp->rcv_numsacks > 0)) {
+ /* Clean up any DSACK's sent */
+ tcp_clean_dsack_blocks(tp);
+ }
if (error) {
/* Record the error. */
TCP_LOG_EVENT(tp, NULL, &so->so_rcv, &so->so_snd, TCP_LOG_OUT,
Modified: stable/12/sys/netinet/tcp_sack.c
==============================================================================
--- stable/12/sys/netinet/tcp_sack.c Sat Sep 7 19:25:45 2019 (r352021)
+++ stable/12/sys/netinet/tcp_sack.c Sat Sep 7 19:58:06 2019 (r352022)
@@ -149,7 +149,109 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes,
&VNET_NAME(tcp_sack_globalholes), 0,
"Global number of TCP SACK holes currently allocated");
+
/*
+ * This function will find overlaps with the currently stored sackblocks
+ * and add any overlap as a dsack block upfront
+ */
+void
+tcp_update_dsack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
+{
+ struct sackblk head_blk,mid_blk,saved_blks[MAX_SACK_BLKS];
+ int i, j, n, identical;
+ tcp_seq start, end;
+
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));
+
+ if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
+ log(LOG_DEBUG, "\nDSACK update: %d..%d, rcv_nxt: %u\n",
+ rcv_start, rcv_end, tp->rcv_nxt);
+ }
+
+ if (SEQ_LT(rcv_end, tp->rcv_nxt) ||
+ ((rcv_end == tp->rcv_nxt) &&
+ (tp->rcv_numsacks > 0 ) &&
+ (tp->sackblks[0].end == tp->rcv_nxt))) {
+ saved_blks[0].start = rcv_start;
+ saved_blks[0].end = rcv_end;
+ } else {
+ saved_blks[0].start = saved_blks[0].end = 0;
+ }
+
+ head_blk.start = head_blk.end = 0;
+ mid_blk.start = rcv_start;
+ mid_blk.end = rcv_end;
+ identical = 0;
+
+ for (i = 0; i < tp->rcv_numsacks; i++) {
+ start = tp->sackblks[i].start;
+ end = tp->sackblks[i].end;
+ if (SEQ_LT(rcv_end, start)) {
+ /* pkt left to sack blk */
+ continue;
+ }
+ if (SEQ_GT(rcv_start, end)) {
+ /* pkt right to sack blk */
+ continue;
+ }
+ if (SEQ_GT(tp->rcv_nxt, end)) {
+ if ((SEQ_MAX(rcv_start, start) != SEQ_MIN(rcv_end, end)) &&
+ (SEQ_GT(head_blk.start, SEQ_MAX(rcv_start, start)) ||
+ (head_blk.start == head_blk.end))) {
+ head_blk.start = SEQ_MAX(rcv_start, start);
+ head_blk.end = SEQ_MIN(rcv_end, end);
+ }
+ continue;
+ }
+ if (((head_blk.start == head_blk.end) ||
+ SEQ_LT(start, head_blk.start)) &&
+ (SEQ_GT(end, rcv_start) &&
+ SEQ_LEQ(start, rcv_end))) {
+ head_blk.start = start;
+ head_blk.end = end;
+ }
+ mid_blk.start = SEQ_MIN(mid_blk.start, start);
+ mid_blk.end = SEQ_MAX(mid_blk.end, end);
+ if ((mid_blk.start == start) &&
+ (mid_blk.end == end))
+ identical = 1;
+ }
+ if (SEQ_LT(head_blk.start, head_blk.end)) {
+ /* store overlapping range */
+ saved_blks[0].start = SEQ_MAX(rcv_start, head_blk.start);
+ saved_blks[0].end = SEQ_MIN(rcv_end, head_blk.end);
+ }
+ n = 1;
+ /*
+ * Second, if not ACKed, store the SACK block that
+ * overlaps with the DSACK block unless it is identical
+ */
+ if ((SEQ_LT(tp->rcv_nxt, mid_blk.end) &&
+ !((mid_blk.start == saved_blks[0].start) &&
+ (mid_blk.end == saved_blks[0].end))) ||
+ identical == 1) {
+ saved_blks[n].start = mid_blk.start;
+ saved_blks[n++].end = mid_blk.end;
+ }
+ for (j = 0; (j < tp->rcv_numsacks) && (j < MAX_SACK_BLKS-1); j++) {
+ if (((SEQ_LT(tp->sackblks[j].end, mid_blk.start) ||
+ SEQ_GT(tp->sackblks[j].start, mid_blk.end)) &&
+ (SEQ_GT(tp->sackblks[j].start, tp->rcv_nxt))))
+ saved_blks[n++] = tp->sackblks[j];
+ }
+ j = 0;
+ for (i = 0; i < n; i++) {
+ /* we can end up with a stale inital entry */
+ if (SEQ_LT(saved_blks[i].start, saved_blks[i].end)) {
+ tp->sackblks[j++] = saved_blks[i];
+ }
+ }
+ tp->rcv_numsacks = j;
+}
+
+/*
* This function is called upon receipt of new valid data (while not in
* header prediction mode), and it updates the ordered list of sacks.
*/
@@ -170,9 +272,16 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta
/* Check arguments. */
KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("rcv_start <= rcv_end"));
- /* SACK block for the received segment. */
- head_blk.start = rcv_start;
- head_blk.end = rcv_end;
+ if ((rcv_start == rcv_end) &&
+ (tp->rcv_numsacks >= 1) &&
+ (rcv_end == tp->sackblks[0].end)) {
+ /* retaining DSACK block below rcv_nxt (todrop) */
+ head_blk = tp->sackblks[0];
+ } else {
+ /* SACK block for the received segment. */
+ head_blk.start = rcv_start;
+ head_blk.end = rcv_end;
+ }
/*
* Merge updated SACK blocks into head_blk, and save unchanged SACK
@@ -267,6 +376,10 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta
if (num_saved >= MAX_SACK_BLKS)
num_saved--;
}
+ if ((rcv_start == rcv_end) &&
+ (rcv_start == tp->sackblks[0].end)) {
+ num_head = 1;
+ }
if (num_saved > 0) {
/*
* Copy the saved SACK blocks back.
@@ -277,6 +390,45 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta
/* Save the number of SACK blocks. */
tp->rcv_numsacks = num_head + num_saved;
+}
+
+void
+tcp_clean_dsack_blocks(struct tcpcb *tp)
+{
+ struct sackblk saved_blks[MAX_SACK_BLKS];
+ int num_saved, i;
+
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+ /*
+ * Clean up any DSACK blocks that
+ * are in our queue of sack blocks.
+ *
+ */
+ num_saved = 0;
+ for (i = 0; i < tp->rcv_numsacks; i++) {
+ tcp_seq start = tp->sackblks[i].start;
+ tcp_seq end = tp->sackblks[i].end;
+ if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) {
+ /*
+ * Discard this D-SACK block.
+ */
+ continue;
+ }
+ /*
+ * Save this SACK block.
+ */
+ saved_blks[num_saved].start = start;
+ saved_blks[num_saved].end = end;
+ num_saved++;
+ }
+ if (num_saved > 0) {
+ /*
+ * Copy the saved SACK blocks back.
+ */
+ bcopy(saved_blks, &tp->sackblks[0],
+ sizeof(struct sackblk) * num_saved);
+ }
+ tp->rcv_numsacks = num_saved;
}
/*
Modified: stable/12/sys/netinet/tcp_stacks/rack.c
==============================================================================
--- stable/12/sys/netinet/tcp_stacks/rack.c Sat Sep 7 19:25:45 2019 (r352021)
+++ stable/12/sys/netinet/tcp_stacks/rack.c Sat Sep 7 19:58:06 2019 (r352022)
@@ -1794,7 +1794,8 @@ rack_drop_checks(struct tcpopt *to, struct mbuf *m, st
* DSACK - add SACK block for dropped range
*/
if (tp->t_flags & TF_SACK_PERMIT) {
- tcp_update_sack_list(tp, th->th_seq, th->th_seq + tlen);
+ tcp_update_sack_list(tp, th->th_seq,
+ th->th_seq + todrop);
/*
* ACK now, as the next in-sequence segment
* will clear the DSACK block again
@@ -4853,7 +4854,8 @@ dodata: /* XXX */
(TCPS_HAVEESTABLISHED(tp->t_state) ||
tfo_syn)) {
if (DELAY_ACK(tp, tlen) || tfo_syn) {
- rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+ rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
tp->t_flags |= TF_DELACK;
} else {
rack->r_wanted_output++;
@@ -4887,18 +4889,29 @@ dodata: /* XXX */
* DSACK actually handled in the fastpath
* above.
*/
- tcp_update_sack_list(tp, save_start, save_start + save_tlen);
+ tcp_update_sack_list(tp, save_start,
+ save_start + save_tlen);
} else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
/*
* Cleaning sackblks by using zero length
* update.
*/
- tcp_update_sack_list(tp, save_start, save_start);
+ if ((tp->rcv_numsacks >= 1) &&
+ (tp->sackblks[0].end == save_start)) {
+ /* partial overlap, recorded at todrop above */
+ tcp_update_sack_list(tp, tp->sackblks[0].start,
+ tp->sackblks[0].end);
+ } else {
+ tcp_update_dsack_list(tp, save_start,
+ save_start + save_tlen);
+ }
} else if ((tlen > 0) && (tlen >= save_tlen)) {
/* Update of sackblks. */
- tcp_update_sack_list(tp, save_start, save_start + save_tlen);
+ tcp_update_dsack_list(tp, save_start,
+ save_start + save_tlen);
} else if (tlen > 0) {
- tcp_update_sack_list(tp, save_start, save_start+tlen);
+ tcp_update_dsack_list(tp, save_start,
+ save_start + tlen);
}
} else {
m_freem(m);
@@ -4920,7 +4933,8 @@ dodata: /* XXX */
* now.
*/
if (tp->t_flags & TF_NEEDSYN) {
- rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+ rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
tp->t_flags |= TF_DELACK;
} else {
tp->t_flags |= TF_ACKNOW;
@@ -4937,7 +4951,8 @@ dodata: /* XXX */
tp->t_starttime = ticks;
/* FALLTHROUGH */
case TCPS_ESTABLISHED:
- rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+ rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
tcp_state_change(tp, TCPS_CLOSE_WAIT);
break;
@@ -4946,7 +4961,8 @@ dodata: /* XXX */
* acked so enter the CLOSING state.
*/
case TCPS_FIN_WAIT_1:
- rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+ rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
tcp_state_change(tp, TCPS_CLOSING);
break;
@@ -4956,7 +4972,8 @@ dodata: /* XXX */
* other standard timers.
*/
case TCPS_FIN_WAIT_2:
- rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+ rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
return (1);
@@ -4965,7 +4982,8 @@ dodata: /* XXX */
/*
* Return any desired output.
*/
- if ((tp->t_flags & TF_ACKNOW) || (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
+ if ((tp->t_flags & TF_ACKNOW) ||
+ (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
rack->r_wanted_output++;
}
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -5038,6 +5056,7 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th,
/* Clean receiver SACK report if present */
if (tp->rcv_numsacks)
tcp_clean_sackreport(tp);
+
TCPSTAT_INC(tcps_preddat);
tp->rcv_nxt += tlen;
/*
@@ -8571,6 +8590,10 @@ out:
* retransmit. In persist state, just set snd_max.
*/
if (error == 0) {
+ if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+ (tp->t_flags & TF_SACK_PERMIT) &&
+ tp->rcv_numsacks > 0)
+ tcp_clean_dsack_blocks(tp);
if (len == 0)
counter_u64_add(rack_out_size[TCP_MSS_ACCT_SNDACK], 1);
else if (len == 1) {
Modified: stable/12/sys/netinet/tcp_var.h
==============================================================================
--- stable/12/sys/netinet/tcp_var.h Sat Sep 7 19:25:45 2019 (r352021)
+++ stable/12/sys/netinet/tcp_var.h Sat Sep 7 19:58:06 2019 (r352022)
@@ -935,7 +935,9 @@ uint32_t tcp_new_ts_offset(struct in_conninfo *);
tcp_seq tcp_new_isn(struct in_conninfo *);
int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+void tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
+void tcp_clean_dsack_blocks(struct tcpcb *tp);
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
More information about the svn-src-stable
mailing list