svn commit: r292003 - head/sys/netinet
Yongmin Cho
yongmincho82 at gmail.com
Thu Dec 10 01:05:44 UTC 2015
I've been wating for this issue is solved.
I will try to test this implementation.
thanks.
On Tue, Dec 08, 2015 at 09:21:48PM +0000, Hiren Panchasara wrote:
> Author: hiren
> Date: Tue Dec 8 21:21:48 2015
> New Revision: 292003
> URL: https://svnweb.freebsd.org/changeset/base/292003
>
> Log:
> One of the ways to detect loss is to count duplicate acks coming back from the
> other end till it reaches predetermined threshold which is 3 for us right now.
> Once that happens, we trigger fast-retransmit to do loss recovery.
>
> Main problem with the current implementation is that we don't honor SACK
> information well to detect whether an incoming ack is a dupack or not. RFC6675
> has latest recommendations for that. According to it, dupack is a segment that
> arrives carrying a SACK block that identifies previously unknown information
> between snd_una and snd_max even if it carries new data, changes the advertised
> window, or moves the cumulative acknowledgment point.
>
> With the prevalence of Selective ACK (SACK) these days, improper handling can
> lead to delayed loss recovery.
>
> With the fix, new behavior looks like following:
>
> 0) th_ack < snd_una --> ignore
> Old acks are ignored.
> 1) th_ack == snd_una, !sack_changed --> ignore
> Acks with SACK enabled but without any new SACK info in them are ignored.
> 2) th_ack == snd_una, window == old_window --> increment
> Increment on a good dupack.
> 3) th_ack == snd_una, window != old_window, sack_changed --> increment
> When SACK enabled, it's okay to have advertized window changed if the ack has
> new SACK info.
> 4) th_ack > snd_una --> reset to 0
> Reset to 0 when left edge moves.
> 5) th_ack > snd_una, sack_changed --> increment
> Increment if left edge moves but there is new SACK info.
>
> Here, sack_changed is the indicator that incoming ack has previously unknown
> SACK info in it.
>
> Note: This fix is not fully compliant to RFC6675. That may require a few
> changes to current implementation in order to keep per-sackhole dupack counter
> and change to the way we mark/handle sack holes.
>
> PR: 203663
> Reviewed by: jtl
> MFC after: 3 weeks
> Sponsored by: Limelight Networks
> Differential Revision: https://reviews.freebsd.org/D4225
>
> Modified:
> head/sys/netinet/tcp_input.c
> head/sys/netinet/tcp_sack.c
> head/sys/netinet/tcp_var.h
>
> Modified: head/sys/netinet/tcp_input.c
> ==============================================================================
> --- head/sys/netinet/tcp_input.c Tue Dec 8 20:20:40 2015 (r292002)
> +++ head/sys/netinet/tcp_input.c Tue Dec 8 21:21:48 2015 (r292003)
> @@ -1481,7 +1481,7 @@ tcp_do_segment(struct mbuf *m, struct tc
> struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
> int ti_locked)
> {
> - int thflags, acked, ourfinisacked, needoutput = 0;
> + int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
> int rstreason, todrop, win;
> u_long tiwin;
> char *s;
> @@ -1501,6 +1501,7 @@ tcp_do_segment(struct mbuf *m, struct tc
> thflags = th->th_flags;
> inc = &tp->t_inpcb->inp_inc;
> tp->sackhint.last_sack_ack = 0;
> + sack_changed = 0;
>
> /*
> * If this is either a state-changing packet or current state isn't
> @@ -2424,7 +2425,7 @@ tcp_do_segment(struct mbuf *m, struct tc
> if ((tp->t_flags & TF_SACK_PERMIT) &&
> ((to.to_flags & TOF_SACK) ||
> !TAILQ_EMPTY(&tp->snd_holes)))
> - tcp_sack_doack(tp, &to, th->th_ack);
> + sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
> else
> /*
> * Reset the value so that previous (valid) value
> @@ -2436,7 +2437,9 @@ tcp_do_segment(struct mbuf *m, struct tc
> hhook_run_tcp_est_in(tp, th, &to);
>
> if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
> - if (tlen == 0 && tiwin == tp->snd_wnd) {
> + if (tlen == 0 &&
> + (tiwin == tp->snd_wnd ||
> + (tp->t_flags & TF_SACK_PERMIT))) {
> /*
> * If this is the first time we've seen a
> * FIN from the remote, this is not a
> @@ -2478,8 +2481,20 @@ tcp_do_segment(struct mbuf *m, struct tc
> * When using TCP ECN, notify the peer that
> * we reduced the cwnd.
> */
> - if (!tcp_timer_active(tp, TT_REXMT) ||
> - th->th_ack != tp->snd_una)
> + /*
> + * Following 2 kinds of acks should not affect
> + * dupack counting:
> + * 1) Old acks
> + * 2) Acks with SACK but without any new SACK
> + * information in them. These could result from
> + * any anomaly in the network like a switch
> + * duplicating packets or a possible DoS attack.
> + */
> + if (th->th_ack != tp->snd_una ||
> + ((tp->t_flags & TF_SACK_PERMIT) &&
> + !sack_changed))
> + break;
> + else if (!tcp_timer_active(tp, TT_REXMT))
> tp->t_dupacks = 0;
> else if (++tp->t_dupacks > tcprexmtthresh ||
> IN_FASTRECOVERY(tp->t_flags)) {
> @@ -2608,9 +2623,20 @@ tcp_do_segment(struct mbuf *m, struct tc
> tp->snd_cwnd = oldcwnd;
> goto drop;
> }
> - } else
> - tp->t_dupacks = 0;
> + }
> break;
> + } else {
> + /*
> + * This ack is advancing the left edge, reset the
> + * counter.
> + */
> + tp->t_dupacks = 0;
> + /*
> + * If this ack also has new SACK info, increment the
> + * counter as per rfc6675.
> + */
> + if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
> + tp->t_dupacks++;
> }
>
> KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
> @@ -2629,7 +2655,6 @@ tcp_do_segment(struct mbuf *m, struct tc
> } else
> cc_post_recovery(tp, th);
> }
> - tp->t_dupacks = 0;
> /*
> * If we reach this point, ACK is not a duplicate,
> * i.e., it ACKs something we sent.
>
> Modified: head/sys/netinet/tcp_sack.c
> ==============================================================================
> --- head/sys/netinet/tcp_sack.c Tue Dec 8 20:20:40 2015 (r292002)
> +++ head/sys/netinet/tcp_sack.c Tue Dec 8 21:21:48 2015 (r292003)
> @@ -345,17 +345,22 @@ tcp_sackhole_remove(struct tcpcb *tp, st
> * Process cumulative ACK and the TCP SACK option to update the scoreboard.
> * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
> * the sequence space).
> + * Returns 1 if incoming ACK has previously unknown SACK information,
> + * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes
> + * to that (i.e. left edge moving) would also be considered a change in SACK
> + * information which is slightly different than rfc6675.
> */
> -void
> +int
> tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
> {
> struct sackhole *cur, *temp;
> struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
> - int i, j, num_sack_blks;
> + int i, j, num_sack_blks, sack_changed;
>
> INP_WLOCK_ASSERT(tp->t_inpcb);
>
> num_sack_blks = 0;
> + sack_changed = 0;
> /*
> * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
> * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
> @@ -392,7 +397,7 @@ tcp_sack_doack(struct tcpcb *tp, struct
> * received.
> */
> if (num_sack_blks == 0)
> - return;
> + return (sack_changed);
>
> /*
> * Sort the SACK blocks so we can update the scoreboard with just one
> @@ -443,6 +448,7 @@ tcp_sack_doack(struct tcpcb *tp, struct
> tp->snd_fack = sblkp->end;
> /* Go to the previous sack block. */
> sblkp--;
> + sack_changed = 1;
> } else {
> /*
> * We failed to add a new hole based on the current
> @@ -459,9 +465,11 @@ tcp_sack_doack(struct tcpcb *tp, struct
> SEQ_LT(tp->snd_fack, sblkp->end))
> tp->snd_fack = sblkp->end;
> }
> - } else if (SEQ_LT(tp->snd_fack, sblkp->end))
> + } else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
> /* fack is advanced. */
> tp->snd_fack = sblkp->end;
> + sack_changed = 1;
> + }
> /* We must have at least one SACK hole in scoreboard. */
> KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
> ("SACK scoreboard must not be empty"));
> @@ -490,6 +498,7 @@ tcp_sack_doack(struct tcpcb *tp, struct
> tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
> KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
> ("sackhint bytes rtx >= 0"));
> + sack_changed = 1;
> if (SEQ_LEQ(sblkp->start, cur->start)) {
> /* Data acks at least the beginning of hole. */
> if (SEQ_GEQ(sblkp->end, cur->end)) {
> @@ -545,6 +554,7 @@ tcp_sack_doack(struct tcpcb *tp, struct
> else
> sblkp--;
> }
> + return (sack_changed);
> }
>
> /*
>
> Modified: head/sys/netinet/tcp_var.h
> ==============================================================================
> --- head/sys/netinet/tcp_var.h Tue Dec 8 20:20:40 2015 (r292002)
> +++ head/sys/netinet/tcp_var.h Tue Dec 8 21:21:48 2015 (r292003)
> @@ -741,7 +741,7 @@ void tcp_hc_update(struct in_conninfo *
> extern struct pr_usrreqs tcp_usrreqs;
> tcp_seq tcp_new_isn(struct tcpcb *);
>
> -void tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
> +int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
> void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
> void tcp_clean_sackreport(struct tcpcb *tp);
> void tcp_sack_adjust(struct tcpcb *tp);
> _______________________________________________
> svn-src-head at freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscribe at freebsd.org"
More information about the svn-src-head
mailing list