git: 7994ef3c394d - main - Revert "tcp: move ECN handling code to a common file"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 05 Feb 2022 02:28:49 UTC
The branch main has been updated by rscheff: URL: https://cgit.FreeBSD.org/src/commit/?id=7994ef3c394d16e37af7a4848e58d01c28b81fbc commit 7994ef3c394d16e37af7a4848e58d01c28b81fbc Author: Richard Scheffenegger <rscheff@FreeBSD.org> AuthorDate: 2022-02-05 00:07:51 +0000 Commit: Richard Scheffenegger <rscheff@FreeBSD.org> CommitDate: 2022-02-05 00:07:51 +0000 Revert "tcp: move ECN handling code to a common file" This reverts commit 0c424c90eaa6602e07bca7836b1d178b91f2a88a. --- sys/conf/files | 1 - sys/netinet/tcp_ecn.c | 296 ------------------------------------------ sys/netinet/tcp_ecn.h | 55 -------- sys/netinet/tcp_input.c | 46 ++++++- sys/netinet/tcp_output.c | 63 ++++++--- sys/netinet/tcp_stacks/rack.c | 192 ++++++++++++++++++--------- sys/netinet/tcp_syncache.c | 15 ++- 7 files changed, 224 insertions(+), 444 deletions(-) diff --git a/sys/conf/files b/sys/conf/files index 148bd9f4f7b4..78921d2c9fa0 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4364,7 +4364,6 @@ netinet/sctp_usrreq.c optional inet sctp | inet6 sctp netinet/sctputil.c optional inet sctp | inet6 sctp netinet/siftr.c optional inet siftr alq | inet6 siftr alq netinet/tcp_debug.c optional tcpdebug -netinet/tcp_ecn.c optional inet | inet6 netinet/tcp_fastopen.c optional inet tcp_rfc7413 | inet6 tcp_rfc7413 netinet/tcp_hostcache.c optional inet | inet6 netinet/tcp_input.c optional inet | inet6 diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c deleted file mode 100644 index cf29431ea5d2..000000000000 --- a/sys/netinet/tcp_ecn.c +++ /dev/null @@ -1,296 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-3-Clause - * - * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * Copyright (c) 2007-2008,2010 - * Swinburne University of Technology, Melbourne, Australia. - * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> - * Copyright (c) 2010 The FreeBSD Foundation - * Copyright (c) 2010-2011 Juniper Networks, Inc. - * Copyright (c) 2019 Richard Scheffenegger <srichard@netapp.com> - * All rights reserved. - * - * Portions of this software were developed at the Centre for Advanced Internet - * Architectures, Swinburne University of Technology, by Lawrence Stewart, - * James Healy and David Hayes, made possible in part by a grant from the Cisco - * University Research Program Fund at Community Foundation Silicon Valley. - * - * Portions of this software were developed at the Centre for Advanced - * Internet Architectures, Swinburne University of Technology, Melbourne, - * Australia by David Hayes under sponsorship from the FreeBSD Foundation. - * - * Portions of this software were developed by Robert N. M. Watson under - * contract to Juniper Networks, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95 - */ - -/* - * Utility functions to deal with Explicit Congestion Notification in TCP - * implementing the essential parts of the Accurate ECN extension - * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09 - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include "opt_inet.h" -#include "opt_inet6.h" -#include "opt_tcpdebug.h" - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/kernel.h> -#include <sys/sysctl.h> -#include <sys/malloc.h> -#include <sys/mbuf.h> -#include <sys/socket.h> -#include <sys/socketvar.h> - -#include <machine/cpu.h> - -#include <vm/uma.h> - -#include <net/if.h> -#include <net/if_var.h> -#include <net/route.h> -#include <net/vnet.h> - -#include <netinet/in.h> -#include <netinet/in_systm.h> -#include <netinet/ip.h> -#include <netinet/in_var.h> -#include <netinet/in_pcb.h> -#include <netinet/ip_var.h> -#include <netinet/ip6.h> -#include <netinet/icmp6.h> -#include <netinet6/nd6.h> -#include <netinet6/ip6_var.h> -#include <netinet6/in6_pcb.h> -#include <netinet/tcp.h> -#include <netinet/tcp_fsm.h> -#include <netinet/tcp_seq.h> -#include <netinet/tcp_timer.h> -#include <netinet/tcp_var.h> -#include <netinet6/tcp6_var.h> -#include <netinet/tcpip.h> -#include <netinet/tcp_ecn.h> - - -/* - * Process incoming SYN,ACK packet - */ -void -tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos) -{ - thflags &= (TH_CWR|TH_ECE); - - if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && - V_tcp_do_ecn) { - tp->t_flags2 |= TF2_ECN_PERMIT; - KMOD_TCPSTAT_INC(tcps_ecn_shs); - } -} - -/* - * Handle parallel SYN for ECN - */ -void -tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos) -{ - if (thflags & TH_ACK) - return; - if (V_tcp_do_ecn == 0) - return; - if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) { - /* RFC3168 ECN handling */ - if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) { - tp->t_flags2 |= TF2_ECN_PERMIT; - tp->t_flags2 |= TF2_ECN_SND_ECE; - KMOD_TCPSTAT_INC(tcps_ecn_shs); - } - } -} - -/* - * TCP ECN processing. - */ -int -tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos) -{ - int delta_ace = 0; - - if (tp->t_flags2 & TF2_ECN_PERMIT) { - switch (iptos & IPTOS_ECN_MASK) { - case IPTOS_ECN_CE: - KMOD_TCPSTAT_INC(tcps_ecn_ce); - break; - case IPTOS_ECN_ECT0: - KMOD_TCPSTAT_INC(tcps_ecn_ect0); - break; - case IPTOS_ECN_ECT1: - KMOD_TCPSTAT_INC(tcps_ecn_ect1); - break; - } - - /* RFC3168 ECN handling */ - if (thflags & TH_ECE) - delta_ace = 1; - if (thflags & TH_CWR) { - tp->t_flags2 &= ~TF2_ECN_SND_ECE; - tp->t_flags |= TF_ACKNOW; - } - if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) - tp->t_flags2 |= TF2_ECN_SND_ECE; - - /* Process a packet differently from RFC3168. */ - cc_ecnpkt_handler_flags(tp, thflags, iptos); - } - - return delta_ace; -} - -/* - * Send ECN setup <SYN> packet header flags - */ -uint16_t -tcp_ecn_output_syn_sent(struct tcpcb *tp) -{ - uint16_t thflags = 0; - - if (V_tcp_do_ecn == 1) { - /* Send a RFC3168 ECN setup <SYN> packet */ - if (tp->t_rxtshift >= 1) { - if (tp->t_rxtshift <= V_tcp_ecn_maxretries) - thflags = TH_ECE|TH_CWR; - } else - thflags = TH_ECE|TH_CWR; - } - - return thflags; -} - -/* - * output processing of ECN feature - * returning IP ECN header codepoint - */ -int -tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len) -{ - int ipecn = IPTOS_ECN_NOTECT; - bool newdata; - - /* - * If the peer has ECN, mark data packets with - * ECN capable transmission (ECT). - * Ignore pure control packets, retransmissions - * and window probes. - */ - newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && - !((tp->t_flags & TF_FORCEDATA) && len == 1)); - if (newdata) { - ipecn = IPTOS_ECN_ECT0; - KMOD_TCPSTAT_INC(tcps_ecn_ect0); - } - /* - * Reply with proper ECN notifications. - */ - if (newdata && - (tp->t_flags2 & TF2_ECN_SND_CWR)) { - *thflags |= TH_CWR; - tp->t_flags2 &= ~TF2_ECN_SND_CWR; - } - if (tp->t_flags2 & TF2_ECN_SND_ECE) - *thflags |= TH_ECE; - - return ipecn; -} - -/* - * Set up the ECN related tcpcb fields from - * a syncache entry - */ -void -tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc) -{ - if (sc->sc_flags & SCF_ECN) { - switch (sc->sc_flags & SCF_ECN) { - case SCF_ECN: - tp->t_flags2 |= TF2_ECN_PERMIT; - break; - /* undefined SCF codepoint */ - default: - break; - } - } -} - -/* - * Process a <SYN> packets ECN information, and provide the - * syncache with the relevant information. - */ -int -tcp_ecn_syncache_add(uint16_t thflags, int iptos) -{ - int scflags = 0; - - switch (thflags & (TH_CWR|TH_ECE)) { - /* no ECN */ - case (0|0): - break; - /* legacy ECN */ - case (TH_CWR|TH_ECE): - scflags = SCF_ECN; - break; - default: - break; - } - return scflags; -} - -/* - * Set up the ECN information for the <SYN,ACK> from - * syncache information. - */ -uint16_t -tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc) -{ - if ((thflags & TH_SYN) && - (sc->sc_flags & SCF_ECN)) { - switch (sc->sc_flags & SCF_ECN) { - case SCF_ECN: - thflags |= (0 | TH_ECE); - KMOD_TCPSTAT_INC(tcps_ecn_shs); - break; - /* undefined SCF codepoint */ - default: - break; - } - } - return thflags; -} diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h deleted file mode 100644 index 5ee49ce53a7a..000000000000 --- a/sys/netinet/tcp_ecn.h +++ /dev/null @@ -1,55 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-3-Clause - * - * Copyright (c) 1982, 1986, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)tcp_ecn.h 8.4 (Berkeley) 5/24/95 - * $FreeBSD$ - */ - -#ifndef _NETINET_TCP_ECN_H_ -#define _NETINET_TCP_ECN_H_ - -#include <netinet/tcp.h> -#include <netinet/tcp_var.h> -#include <netinet/tcp_syncache.h> - -#ifdef _KERNEL - -void tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int); -void tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int); -int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int); -uint16_t tcp_ecn_output_syn_sent(struct tcpcb *); -int tcp_ecn_output_established(struct tcpcb *, uint16_t *, int); -void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *); -int tcp_ecn_syncache_add(uint16_t, int); -uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *); - -#endif /* _KERNEL */ - -#endif /* _NETINET_TCP_ECN_H_ */ diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index d0b323723e6b..9a1f3ace2541 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -104,7 +104,6 @@ __FBSDID("$FreeBSD$"); #include <netinet6/ip6_var.h> #include <netinet6/nd6.h> #include <netinet/tcp.h> -#include <netinet/tcp_ecn.h> #include <netinet/tcp_fsm.h> #include <netinet/tcp_log_buf.h> #include <netinet/tcp_seq.h> @@ -1518,8 +1517,7 @@ void tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) { - uint16_t thflags; - int acked, ourfinisacked, needoutput = 0, sack_changed; + int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; int rstreason, todrop, win, incforsyn = 0; uint32_t tiwin; uint16_t nsegs; @@ -1599,8 +1597,32 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, /* * TCP ECN processing. */ - if (tcp_ecn_input_segment(tp, thflags, iptos)) - cc_cong_signal(tp, th, CC_ECN); + if (tp->t_flags2 & TF2_ECN_PERMIT) { + if (thflags & TH_CWR) { + tp->t_flags2 &= ~TF2_ECN_SND_ECE; + tp->t_flags |= TF_ACKNOW; + } + switch (iptos & IPTOS_ECN_MASK) { + case IPTOS_ECN_CE: + tp->t_flags2 |= TF2_ECN_SND_ECE; + TCPSTAT_INC(tcps_ecn_ce); + break; + case IPTOS_ECN_ECT0: + TCPSTAT_INC(tcps_ecn_ect0); + break; + case IPTOS_ECN_ECT1: + TCPSTAT_INC(tcps_ecn_ect1); + break; + } + + /* Process a packet differently from RFC3168. */ + cc_ecnpkt_handler(tp, th, iptos); + + /* Congestion experienced. */ + if (thflags & TH_ECE) { + cc_cong_signal(tp, th, CC_ECN); + } + } /* * Parse options on any incoming segment. @@ -1641,7 +1663,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { /* Handle parallel SYN for ECN */ - tcp_ecn_input_parallel_syn(tp, thflags, iptos); + if (!(thflags & TH_ACK) && + ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) && + ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) { + tp->t_flags2 |= TF2_ECN_PERMIT; + tp->t_flags2 |= TF2_ECN_SND_ECE; + TCPSTAT_INC(tcps_ecn_shs); + } if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE) && !(tp->t_flags & TF_NOOPT)) { @@ -2047,7 +2075,11 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, else tp->t_flags |= TF_ACKNOW; - tcp_ecn_input_syn_sent(tp, thflags, iptos); + if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && + (V_tcp_do_ecn == 1)) { + tp->t_flags2 |= TF2_ECN_PERMIT; + TCPSTAT_INC(tcps_ecn_shs); + } /* * Received <SYN,ACK> in SYN_SENT[*] state. diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index df9ce167b7d5..ce6d9b86e73f 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -98,7 +98,6 @@ __FBSDID("$FreeBSD$"); #ifdef TCP_OFFLOAD #include <netinet/tcp_offload.h> #endif -#include <netinet/tcp_ecn.h> #include <netipsec/ipsec_support.h> @@ -200,8 +199,7 @@ tcp_default_output(struct tcpcb *tp) struct socket *so = tp->t_inpcb->inp_socket; int32_t len; uint32_t recwin, sendwin; - uint16_t flags; - int off, error = 0; /* Keep compiler happy */ + int off, flags, error = 0; /* Keep compiler happy */ u_int if_hw_tsomaxsegcount = 0; u_int if_hw_tsomaxsegsize = 0; struct mbuf *m; @@ -1199,27 +1197,54 @@ send: * resend those bits a number of times as per * RFC 3168. */ - if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) { - flags |= tcp_ecn_output_syn_sent(tp); + if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) { + if (tp->t_rxtshift >= 1) { + if (tp->t_rxtshift <= V_tcp_ecn_maxretries) + flags |= TH_ECE|TH_CWR; + } else + flags |= TH_ECE|TH_CWR; } - /* Also handle parallel SYN for ECN */ - if ((TCPS_HAVERCVDSYN(tp->t_state)) && - (tp->t_flags2 & TF2_ECN_PERMIT)) { - int ect = tcp_ecn_output_established(tp, &flags, len); - if ((tp->t_state == TCPS_SYN_RECEIVED) && - (tp->t_flags2 & TF2_ECN_SND_ECE)) + /* Handle parallel SYN for ECN */ + if ((tp->t_state == TCPS_SYN_RECEIVED) && + (tp->t_flags2 & TF2_ECN_SND_ECE)) { + flags |= TH_ECE; tp->t_flags2 &= ~TF2_ECN_SND_ECE; + } + + if (TCPS_HAVEESTABLISHED(tp->t_state) && + (tp->t_flags2 & TF2_ECN_PERMIT)) { + /* + * If the peer has ECN, mark data packets with + * ECN capable transmission (ECT). + * Ignore pure ack packets, retransmissions and window probes. + */ + if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && + (sack_rxmit == 0) && + !((tp->t_flags & TF_FORCEDATA) && len == 1 && + SEQ_LT(tp->snd_una, tp->snd_max))) { #ifdef INET6 - if (isipv6) { - ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); - ip6->ip6_flow |= htonl(ect << 20); - } - else + if (isipv6) { + ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); + ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); + } + else #endif - { - ip->ip_tos &= ~IPTOS_ECN_MASK; - ip->ip_tos |= ect; + { + ip->ip_tos &= ~IPTOS_ECN_MASK; + ip->ip_tos |= IPTOS_ECN_ECT0; + } + TCPSTAT_INC(tcps_ecn_ect0); + /* + * Reply with proper ECN notifications. + * Only set CWR on new data segments. + */ + if (tp->t_flags2 & TF2_ECN_SND_CWR) { + flags |= TH_CWR; + tp->t_flags2 &= ~TF2_ECN_SND_CWR; + } } + if (tp->t_flags2 & TF2_ECN_SND_ECE) + flags |= TH_ECE; } /* diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 7bc37a9552a7..6d5b3f2133a6 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -113,7 +113,6 @@ __FBSDID("$FreeBSD$"); #ifdef INET6 #include <netinet6/tcp6_var.h> #endif -#include <netinet/tcp_ecn.h> #include <netipsec/ipsec_support.h> @@ -11407,9 +11406,11 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, struct socket *so, tp->t_flags |= TF_ACKNOW; rack->rc_dack_toggle = 0; } - - tcp_ecn_input_syn_sent(tp, thflags, iptos); - + if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && + (V_tcp_do_ecn == 1)) { + tp->t_flags2 |= TF2_ECN_PERMIT; + KMOD_TCPSTAT_INC(tcps_ecn_shs); + } if (SEQ_GT(th->th_ack, tp->snd_una)) { /* * We advance snd_una for the @@ -13682,8 +13683,31 @@ rack_do_compressed_ack_processing(struct tcpcb *tp, struct socket *so, struct mb } tp->t_rcvtime = ticks; /* Now what about ECN? */ - if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint)) - rack_cong_signal(tp, CC_ECN, ae->ack); + if (tp->t_flags2 & TF2_ECN_PERMIT) { + if (ae->flags & TH_CWR) { + tp->t_flags2 &= ~TF2_ECN_SND_ECE; + tp->t_flags |= TF_ACKNOW; + } + switch (ae->codepoint & IPTOS_ECN_MASK) { + case IPTOS_ECN_CE: + tp->t_flags2 |= TF2_ECN_SND_ECE; + KMOD_TCPSTAT_INC(tcps_ecn_ce); + break; + case IPTOS_ECN_ECT0: + KMOD_TCPSTAT_INC(tcps_ecn_ect0); + break; + case IPTOS_ECN_ECT1: + KMOD_TCPSTAT_INC(tcps_ecn_ect1); + break; + } + + /* Process a packet differently from RFC3168. */ + cc_ecnpkt_handler_flags(tp, ae->flags, ae->codepoint); + /* Congestion experienced. */ + if (ae->flags & TH_ECE) { + rack_cong_signal(tp, CC_ECN, ae->ack); + } + } #ifdef TCP_ACCOUNTING /* Count for the specific type of ack in */ counter_u64_add(tcp_cnt_counters[ae->ack_val_set], 1); @@ -14433,8 +14457,32 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so, * TCP ECN processing. XXXJTL: If we ever use ECN, we need to move * this to occur after we've validated the segment. */ - if (tcp_ecn_input_segment(tp, thflags, iptos)) - rack_cong_signal(tp, CC_ECN, th->th_ack); + if (tp->t_flags2 & TF2_ECN_PERMIT) { + if (thflags & TH_CWR) { + tp->t_flags2 &= ~TF2_ECN_SND_ECE; + tp->t_flags |= TF_ACKNOW; + } + switch (iptos & IPTOS_ECN_MASK) { + case IPTOS_ECN_CE: + tp->t_flags2 |= TF2_ECN_SND_ECE; + KMOD_TCPSTAT_INC(tcps_ecn_ce); + break; + case IPTOS_ECN_ECT0: + KMOD_TCPSTAT_INC(tcps_ecn_ect0); + break; + case IPTOS_ECN_ECT1: + KMOD_TCPSTAT_INC(tcps_ecn_ect1); + break; + } + + /* Process a packet differently from RFC3168. */ + cc_ecnpkt_handler(tp, th, iptos); + + /* Congestion experienced. */ + if (thflags & TH_ECE) { + rack_cong_signal(tp, CC_ECN, th->th_ack); + } + } /* * If echoed timestamp is later than the current time, fall back to @@ -14468,7 +14516,13 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { /* Handle parallel SYN for ECN */ - tcp_ecn_input_parallel_syn(tp, thflags, iptos); + if (!(thflags & TH_ACK) && + ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) && + ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) { + tp->t_flags2 |= TF2_ECN_PERMIT; + tp->t_flags2 |= TF2_ECN_SND_ECE; + TCPSTAT_INC(tcps_ecn_shs); + } if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; @@ -16002,24 +16056,6 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma udp->uh_ulen = htons(ulen); } m->m_pkthdr.rcvif = (struct ifnet *)0; - if (TCPS_HAVERCVDSYN(tp->t_state) && - (tp->t_flags2 & TF2_ECN_PERMIT)) { - int ect = tcp_ecn_output_established(tp, &flags, len); - if ((tp->t_state == TCPS_SYN_RECEIVED) && - (tp->t_flags2 & TF2_ECN_SND_ECE)) - tp->t_flags2 &= ~TF2_ECN_SND_ECE; -#ifdef INET6 - if (rack->r_is_v6) { - ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); - ip6->ip6_flow |= htonl(ect << 20); - } - else -#endif - { - ip->ip_tos &= ~IPTOS_ECN_MASK; - ip->ip_tos |= ect; - } - } m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ #ifdef INET6 if (rack->r_is_v6) { @@ -16343,8 +16379,7 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack *rack, uint64_t ts_val, u_char opt[TCP_MAXOLEN]; uint32_t hdrlen, optlen; int cnt_thru = 1; - int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0; - uint16_t flags; + int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, flags, ulen = 0; uint32_t s_soff; uint32_t if_hw_tsomaxsegcount = 0, startseq; uint32_t if_hw_tsomaxsegsize; @@ -16493,23 +16528,37 @@ again: udp->uh_ulen = htons(ulen); } m->m_pkthdr.rcvif = (struct ifnet *)0; - if (TCPS_HAVERCVDSYN(tp->t_state) && + if (tp->t_state == TCPS_ESTABLISHED && (tp->t_flags2 & TF2_ECN_PERMIT)) { - int ect = tcp_ecn_output_established(tp, &flags, len); - if ((tp->t_state == TCPS_SYN_RECEIVED) && - (tp->t_flags2 & TF2_ECN_SND_ECE)) - tp->t_flags2 &= ~TF2_ECN_SND_ECE; + /* + * If the peer has ECN, mark data packets with ECN capable + * transmission (ECT). Ignore pure ack packets, + * retransmissions. + */ + if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max)) { #ifdef INET6 - if (rack->r_is_v6) { - ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); - ip6->ip6_flow |= htonl(ect << 20); - } - else + if (rack->r_is_v6) { + ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); + ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); + } + else #endif - { - ip->ip_tos &= ~IPTOS_ECN_MASK; - ip->ip_tos |= ect; + { + ip->ip_tos &= ~IPTOS_ECN_MASK; + ip->ip_tos |= IPTOS_ECN_ECT0; + } + KMOD_TCPSTAT_INC(tcps_ecn_ect0); + /* + * Reply with proper ECN notifications. + * Only set CWR on new data segments. + */ + if (tp->t_flags2 & TF2_ECN_SND_CWR) { + flags |= TH_CWR; + tp->t_flags2 &= ~TF2_ECN_SND_CWR; + } } + if (tp->t_flags2 & TF2_ECN_SND_ECE) + flags |= TH_ECE; } m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ #ifdef INET6 @@ -16737,8 +16786,7 @@ rack_output(struct tcpcb *tp) struct socket *so; uint32_t recwin; uint32_t sb_offset, s_moff = 0; - int32_t len, error = 0; - uint16_t flags; + int32_t len, flags, error = 0; struct mbuf *m, *s_mb = NULL; struct mbuf *mb; uint32_t if_hw_tsomaxsegcount = 0; @@ -18548,27 +18596,51 @@ send: * are on a retransmit, we may resend those bits a number of times * as per RFC 3168. */ - if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) { - flags |= tcp_ecn_output_syn_sent(tp); + if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) { + if (tp->t_rxtshift >= 1) { + if (tp->t_rxtshift <= V_tcp_ecn_maxretries) + flags |= TH_ECE | TH_CWR; + } else + flags |= TH_ECE | TH_CWR; } - /* Also handle parallel SYN for ECN */ - if (TCPS_HAVERCVDSYN(tp->t_state) && + /* Handle parallel SYN for ECN */ + if ((tp->t_state == TCPS_SYN_RECEIVED) && + (tp->t_flags2 & TF2_ECN_SND_ECE)) { + flags |= TH_ECE; + tp->t_flags2 &= ~TF2_ECN_SND_ECE; + } + if (TCPS_HAVEESTABLISHED(tp->t_state) && (tp->t_flags2 & TF2_ECN_PERMIT)) { - int ect = tcp_ecn_output_established(tp, &flags, len); - if ((tp->t_state == TCPS_SYN_RECEIVED) && - (tp->t_flags2 & TF2_ECN_SND_ECE)) - tp->t_flags2 &= ~TF2_ECN_SND_ECE; + /* + * If the peer has ECN, mark data packets with ECN capable + * transmission (ECT). Ignore pure ack packets, + * retransmissions. + */ + if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && + (sack_rxmit == 0)) { #ifdef INET6 - if (isipv6) { - ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); - ip6->ip6_flow |= htonl(ect << 20); - } - else + if (isipv6) { + ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); + ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); + } + else #endif - { - ip->ip_tos &= ~IPTOS_ECN_MASK; - ip->ip_tos |= ect; + { + ip->ip_tos &= ~IPTOS_ECN_MASK; + ip->ip_tos |= IPTOS_ECN_ECT0; + } + KMOD_TCPSTAT_INC(tcps_ecn_ect0); + /* + * Reply with proper ECN notifications. + * Only set CWR on new data segments. + */ + if (tp->t_flags2 & TF2_ECN_SND_CWR) { + flags |= TH_CWR; + tp->t_flags2 &= ~TF2_ECN_SND_CWR; + } } + if (tp->t_flags2 & TF2_ECN_SND_ECE) + flags |= TH_ECE; } /* * If we are doing retransmissions, then snd_nxt will not reflect diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index ed4adda59c22..5fcafa44cc97 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -89,7 +89,6 @@ __FBSDID("$FreeBSD$"); #include <netinet/tcp_timer.h> #include <netinet/tcp_var.h> #include <netinet/tcp_syncache.h> -#include <netinet/tcp_ecn.h> #ifdef INET6 #include <netinet6/tcp6_var.h> #endif @@ -1028,7 +1027,8 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) tp->t_flags |= TF_SACK_PERMIT; } - tcp_ecn_syncache_socket(tp, sc); + if (sc->sc_flags & SCF_ECN) + tp->t_flags2 |= TF2_ECN_PERMIT; /* * Set up MSS and get cached values from tcp_hostcache. @@ -1743,9 +1743,9 @@ skip_alloc: sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */ if (ltflags & TF_NOOPT) sc->sc_flags |= SCF_NOOPT; - /* ECN Handshake */ - if (V_tcp_do_ecn) - sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos); + if (((tcp_get_flags(th) & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) && + V_tcp_do_ecn) + sc->sc_flags |= SCF_ECN; if (V_tcp_syncookies) sc->sc_iss = syncookie_generate(sch, sc); @@ -1938,7 +1938,10 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) th->th_win = htons(sc->sc_wnd); th->th_urp = 0; - flags = tcp_ecn_syncache_respond(flags, sc); + if ((flags & TH_SYN) && (sc->sc_flags & SCF_ECN)) { + flags |= TH_ECE; + TCPSTAT_INC(tcps_ecn_shs); + } tcp_set_flags(th, flags); /* Tack on the TCP options. */