svn commit: r351725 - in head/sys/netinet: . tcp_stacks
Michael Tuexen
tuexen at FreeBSD.org
Mon Sep 2 19:04:04 UTC 2019
Author: tuexen
Date: Mon Sep 2 19:04:02 2019
New Revision: 351725
URL: https://svnweb.freebsd.org/changeset/base/351725
Log:
This patch improves the DSACK handling to conform with RFC 2883.
The lowest SACK block is used when multiple Blocks would be elegible as
DSACK blocks ACK blocks get reordered - while maintaining the ordering of
SACK blocks not relevant in the DSACK context is maintained.
Reviewed by: rrs@, tuexen@
Obtained from: Richard Scheffenegger
MFC after: 1 week
Differential Revision: https://reviews.freebsd.org/D21038
Modified:
head/sys/netinet/tcp_input.c
head/sys/netinet/tcp_sack.c
head/sys/netinet/tcp_stacks/rack.c
head/sys/netinet/tcp_stacks/rack_bbr_common.c
head/sys/netinet/tcp_var.h
Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c Mon Sep 2 18:32:08 2019 (r351724)
+++ head/sys/netinet/tcp_input.c Mon Sep 2 19:04:02 2019 (r351725)
@@ -1486,7 +1486,6 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, stru
} else {
tp->rfbuf_cnt += tlen; /* add up */
}
-
return (newsize);
}
Modified: head/sys/netinet/tcp_sack.c
==============================================================================
--- head/sys/netinet/tcp_sack.c Mon Sep 2 18:32:08 2019 (r351724)
+++ head/sys/netinet/tcp_sack.c Mon Sep 2 19:04:02 2019 (r351725)
@@ -149,7 +149,109 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes,
&VNET_NAME(tcp_sack_globalholes), 0,
"Global number of TCP SACK holes currently allocated");
+
/*
+ * This function will find overlaps with the currently stored sackblocks
+ * and add any overlap as a dsack block upfront
+ */
+void
+tcp_update_dsack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
+{
+ struct sackblk head_blk,mid_blk,saved_blks[MAX_SACK_BLKS];
+ int i, j, n, identical;
+ tcp_seq start, end;
+
+ INP_WLOCK_ASSERT(tp->t_inpcb);
+
+ KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));
+
+ if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
+ log(LOG_DEBUG, "\nDSACK update: %d..%d, rcv_nxt: %u\n",
+ rcv_start, rcv_end, tp->rcv_nxt);
+ }
+
+ if (SEQ_LT(rcv_end, tp->rcv_nxt) ||
+ ((rcv_end == tp->rcv_nxt) &&
+ (tp->rcv_numsacks > 0 ) &&
+ (tp->sackblks[0].end == tp->rcv_nxt))) {
+ saved_blks[0].start = rcv_start;
+ saved_blks[0].end = rcv_end;
+ } else {
+ saved_blks[0].start = saved_blks[0].end = 0;
+ }
+
+ head_blk.start = head_blk.end = 0;
+ mid_blk.start = rcv_start;
+ mid_blk.end = rcv_end;
+ identical = 0;
+
+ for (i = 0; i < tp->rcv_numsacks; i++) {
+ start = tp->sackblks[i].start;
+ end = tp->sackblks[i].end;
+ if (SEQ_LT(rcv_end, start)) {
+ /* pkt left to sack blk */
+ continue;
+ }
+ if (SEQ_GT(rcv_start, end)) {
+ /* pkt right to sack blk */
+ continue;
+ }
+ if (SEQ_GT(tp->rcv_nxt, end)) {
+ if ((SEQ_MAX(rcv_start, start) != SEQ_MIN(rcv_end, end)) &&
+ (SEQ_GT(head_blk.start, SEQ_MAX(rcv_start, start)) ||
+ (head_blk.start == head_blk.end))) {
+ head_blk.start = SEQ_MAX(rcv_start, start);
+ head_blk.end = SEQ_MIN(rcv_end, end);
+ }
+ continue;
+ }
+ if (((head_blk.start == head_blk.end) ||
+ SEQ_LT(start, head_blk.start)) &&
+ (SEQ_GT(end, rcv_start) &&
+ SEQ_LEQ(start, rcv_end))) {
+ head_blk.start = start;
+ head_blk.end = end;
+ }
+ mid_blk.start = SEQ_MIN(mid_blk.start, start);
+ mid_blk.end = SEQ_MAX(mid_blk.end, end);
+ if ((mid_blk.start == start) &&
+ (mid_blk.end == end))
+ identical = 1;
+ }
+ if (SEQ_LT(head_blk.start, head_blk.end)) {
+ /* store overlapping range */
+ saved_blks[0].start = SEQ_MAX(rcv_start, head_blk.start);
+ saved_blks[0].end = SEQ_MIN(rcv_end, head_blk.end);
+ }
+ n = 1;
+ /*
+ * Second, if not ACKed, store the SACK block that
+ * overlaps with the DSACK block unless it is identical
+ */
+ if ((SEQ_LT(tp->rcv_nxt, mid_blk.end) &&
+ !((mid_blk.start == saved_blks[0].start) &&
+ (mid_blk.end == saved_blks[0].end))) ||
+ identical == 1) {
+ saved_blks[n].start = mid_blk.start;
+ saved_blks[n++].end = mid_blk.end;
+ }
+ for (j = 0; (j < tp->rcv_numsacks) && (j < MAX_SACK_BLKS-1); j++) {
+ if (((SEQ_LT(tp->sackblks[j].end, mid_blk.start) ||
+ SEQ_GT(tp->sackblks[j].start, mid_blk.end)) &&
+ (SEQ_GT(tp->sackblks[j].start, tp->rcv_nxt))))
+ saved_blks[n++] = tp->sackblks[j];
+ }
+ j = 0;
+ for (i = 0; i < n; i++) {
+ /* we can end up with a stale inital entry */
+ if (SEQ_LT(saved_blks[i].start, saved_blks[i].end)) {
+ tp->sackblks[j++] = saved_blks[i];
+ }
+ }
+ tp->rcv_numsacks = j;
+}
+
+/*
* This function is called upon receipt of new valid data (while not in
* header prediction mode), and it updates the ordered list of sacks.
*/
@@ -170,9 +272,16 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta
/* Check arguments. */
KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("rcv_start <= rcv_end"));
- /* SACK block for the received segment. */
- head_blk.start = rcv_start;
- head_blk.end = rcv_end;
+ if ((rcv_start == rcv_end) &&
+ (tp->rcv_numsacks >= 1) &&
+ (rcv_end == tp->sackblks[0].end)) {
+ /* retaining DSACK block below rcv_nxt (todrop) */
+ head_blk = tp->sackblks[0];
+ } else {
+ /* SACK block for the received segment. */
+ head_blk.start = rcv_start;
+ head_blk.end = rcv_end;
+ }
/*
* Merge updated SACK blocks into head_blk, and save unchanged SACK
@@ -266,6 +375,10 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta
*/
if (num_saved >= MAX_SACK_BLKS)
num_saved--;
+ }
+ if ((rcv_start == rcv_end) &&
+ (rcv_start == tp->sackblks[0].end)) {
+ num_head = 1;
}
if (num_saved > 0) {
/*
Modified: head/sys/netinet/tcp_stacks/rack.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack.c Mon Sep 2 18:32:08 2019 (r351724)
+++ head/sys/netinet/tcp_stacks/rack.c Mon Sep 2 19:04:02 2019 (r351725)
@@ -1783,6 +1783,14 @@ rack_drop_checks(struct tcpopt *to, struct mbuf *m, st
TCPSTAT_INC(tcps_rcvpartduppack);
TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
}
+ if (tp->t_flags & TF_SACK_PERMIT) {
+ /*
+ * record the left, to-be-dropped edge of data
+ * here, for use as dsack block further down
+ */
+ tcp_update_sack_list(tp, th->th_seq,
+ th->th_seq + todrop);
+ }
*drop_hdrlen += todrop; /* drop from the top afterwards */
th->th_seq += todrop;
tlen -= todrop;
@@ -4900,7 +4908,8 @@ dodata: /* XXX */
(TCPS_HAVEESTABLISHED(tp->t_state) ||
tfo_syn)) {
if (DELAY_ACK(tp, tlen) || tfo_syn) {
- rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+ rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
tp->t_flags |= TF_DELACK;
} else {
rack->r_wanted_output++;
@@ -4934,18 +4943,29 @@ dodata: /* XXX */
* DSACK actually handled in the fastpath
* above.
*/
- tcp_update_sack_list(tp, save_start, save_start + save_tlen);
+ tcp_update_sack_list(tp, save_start,
+ save_start + save_tlen);
} else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
/*
* Cleaning sackblks by using zero length
* update.
*/
- tcp_update_sack_list(tp, save_start, save_start);
+ if ((tp->rcv_numsacks >= 1) &&
+ (tp->sackblks[0].end == save_start)) {
+ /* partial overlap, recorded at todrop above */
+ tcp_update_sack_list(tp, tp->sackblks[0].start,
+ tp->sackblks[0].end);
+ } else {
+ tcp_update_dsack_list(tp, save_start,
+ save_start + save_tlen);
+ }
} else if ((tlen > 0) && (tlen >= save_tlen)) {
/* Update of sackblks. */
- tcp_update_sack_list(tp, save_start, save_start + save_tlen);
+ tcp_update_dsack_list(tp, save_start,
+ save_start + save_tlen);
} else if (tlen > 0) {
- tcp_update_sack_list(tp, save_start, save_start+tlen);
+ tcp_update_dsack_list(tp, save_start,
+ save_start + tlen);
}
} else {
m_freem(m);
@@ -4967,7 +4987,8 @@ dodata: /* XXX */
* now.
*/
if (tp->t_flags & TF_NEEDSYN) {
- rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+ rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
tp->t_flags |= TF_DELACK;
} else {
tp->t_flags |= TF_ACKNOW;
@@ -4984,7 +5005,8 @@ dodata: /* XXX */
tp->t_starttime = ticks;
/* FALLTHROUGH */
case TCPS_ESTABLISHED:
- rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+ rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
tcp_state_change(tp, TCPS_CLOSE_WAIT);
break;
@@ -4993,7 +5015,8 @@ dodata: /* XXX */
* acked so enter the CLOSING state.
*/
case TCPS_FIN_WAIT_1:
- rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+ rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
tcp_state_change(tp, TCPS_CLOSING);
break;
@@ -5003,7 +5026,8 @@ dodata: /* XXX */
* other standard timers.
*/
case TCPS_FIN_WAIT_2:
- rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
+ rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
tcp_twstart(tp);
return (1);
@@ -5012,7 +5036,8 @@ dodata: /* XXX */
/*
* Return any desired output.
*/
- if ((tp->t_flags & TF_ACKNOW) || (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
+ if ((tp->t_flags & TF_ACKNOW) ||
+ (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
rack->r_wanted_output++;
}
INP_WLOCK_ASSERT(tp->t_inpcb);
Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack_bbr_common.c Mon Sep 2 18:32:08 2019 (r351724)
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.c Mon Sep 2 19:04:02 2019 (r351725)
@@ -495,7 +495,8 @@ ctf_drop_checks(struct tcpopt *to, struct mbuf *m, str
* DSACK - add SACK block for dropped range
*/
if (tp->t_flags & TF_SACK_PERMIT) {
- tcp_update_sack_list(tp, th->th_seq, th->th_seq + tlen);
+ tcp_update_sack_list(tp, th->th_seq,
+ th->th_seq + todrop);
/*
* ACK now, as the next in-sequence segment
* will clear the DSACK block again
Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h Mon Sep 2 18:32:08 2019 (r351724)
+++ head/sys/netinet/tcp_var.h Mon Sep 2 19:04:02 2019 (r351725)
@@ -939,6 +939,7 @@ uint32_t tcp_new_ts_offset(struct in_conninfo *);
tcp_seq tcp_new_isn(struct in_conninfo *);
int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+void tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
void tcp_clean_dsack_blocks(struct tcpcb *tp);
void tcp_clean_sackreport(struct tcpcb *tp);
More information about the svn-src-all
mailing list