git: 5d8fd932e418 - main - This brings into sync FreeBSD with the netflix versions of rack and bbr. This fixes several breakages (panics) since the tcp_lro code was committed that have been reported. Quite a few new features are now in rack (prefecting of DGP -- Dynamic Goodput Pacing among the largest). There is also support for ack-war prevention. Documents comming soon on rack..
Randall Stewart
rrs at FreeBSD.org
Thu May 6 15:26:29 UTC 2021
The branch main has been updated by rrs:
URL: https://cgit.FreeBSD.org/src/commit/?id=5d8fd932e418f03e98b3469c4088a36f0ef34ffe
commit 5d8fd932e418f03e98b3469c4088a36f0ef34ffe
Author: Randall Stewart <rrs at FreeBSD.org>
AuthorDate: 2021-05-06 15:22:26 +0000
Commit: Randall Stewart <rrs at FreeBSD.org>
CommitDate: 2021-05-06 15:22:26 +0000
This brings into sync FreeBSD with the netflix versions of rack and bbr.
This fixes several breakages (panics) since the tcp_lro code was
committed that have been reported. Quite a few new features are
now in rack (prefecting of DGP -- Dynamic Goodput Pacing among the
largest). There is also support for ack-war prevention. Documents
comming soon on rack..
Sponsored by: Netflix
Reviewed by: rscheff, mtuexen
Differential Revision: https://reviews.freebsd.org/D30036
---
sys/netinet/cc/cc.h | 7 +-
sys/netinet/cc/cc_newreno.c | 34 +-
sys/netinet/cc/cc_newreno.h | 13 +-
sys/netinet/tcp.h | 24 +-
sys/netinet/tcp_accounting.h | 39 +
sys/netinet/tcp_input.c | 10 +-
sys/netinet/tcp_log_buf.h | 8 +-
sys/netinet/tcp_ratelimit.c | 29 +-
sys/netinet/tcp_ratelimit.h | 4 +-
sys/netinet/tcp_sack.c | 11 +
sys/netinet/tcp_stacks/bbr.c | 92 +-
sys/netinet/tcp_stacks/rack.c | 9876 ++++++++++++++++++++++--------
sys/netinet/tcp_stacks/rack_bbr_common.c | 473 +-
sys/netinet/tcp_stacks/rack_bbr_common.h | 21 +-
sys/netinet/tcp_stacks/tcp_bbr.h | 6 +-
sys/netinet/tcp_stacks/tcp_rack.h | 201 +-
sys/netinet/tcp_subr.c | 105 +
sys/netinet/tcp_var.h | 12 +
18 files changed, 8181 insertions(+), 2784 deletions(-)
diff --git a/sys/netinet/cc/cc.h b/sys/netinet/cc/cc.h
index a52cfca9579b..be84a6841d15 100644
--- a/sys/netinet/cc/cc.h
+++ b/sys/netinet/cc/cc.h
@@ -91,15 +91,20 @@ struct cc_var {
struct sctp_nets *sctp;
} ccvc;
uint16_t nsegs; /* # segments coalesced into current chain. */
+ uint8_t labc; /* Dont use system abc use passed in */
};
/* cc_var flags. */
#define CCF_ABC_SENTAWND 0x0001 /* ABC counted cwnd worth of bytes? */
#define CCF_CWND_LIMITED 0x0002 /* Are we currently cwnd limited? */
-#define CCF_UNUSED1 0x0004 /* unused */
+#define CCF_USE_LOCAL_ABC 0x0004 /* Dont use the system l_abc val */
#define CCF_ACKNOW 0x0008 /* Will this ack be sent now? */
#define CCF_IPHDR_CE 0x0010 /* Does this packet set CE bit? */
#define CCF_TCPHDR_CWR 0x0020 /* Does this packet set CWR bit? */
+#define CCF_MAX_CWND 0x0040 /* Have we reached maximum cwnd? */
+#define CCF_CHG_MAX_CWND 0x0080 /* Cubic max_cwnd changed, for K */
+#define CCF_USR_IWND 0x0100 /* User specified initial window */
+#define CCF_USR_IWND_INIT_NSEG 0x0200 /* Convert segs to bytes on conn init */
/* ACK types passed to the ack_received() hook. */
#define CC_ACK 0x0001 /* Regular in sequence ACK. */
diff --git a/sys/netinet/cc/cc_newreno.c b/sys/netinet/cc/cc_newreno.c
index daf4e3351ffa..5f91ad8d1e7b 100644
--- a/sys/netinet/cc/cc_newreno.c
+++ b/sys/netinet/cc/cc_newreno.c
@@ -86,8 +86,8 @@ static void newreno_cong_signal(struct cc_var *ccv, uint32_t type);
static void newreno_post_recovery(struct cc_var *ccv);
static int newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf);
-VNET_DEFINE_STATIC(uint32_t, newreno_beta) = 50;
-VNET_DEFINE_STATIC(uint32_t, newreno_beta_ecn) = 80;
+VNET_DEFINE(uint32_t, newreno_beta) = 50;
+VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80;
#define V_newreno_beta VNET(newreno_beta)
#define V_newreno_beta_ecn VNET(newreno_beta_ecn)
@@ -101,11 +101,6 @@ struct cc_algo newreno_cc_algo = {
.ctl_output = newreno_ctl_output,
};
-struct newreno {
- uint32_t beta;
- uint32_t beta_ecn;
-};
-
static inline struct newreno *
newreno_malloc(struct cc_var *ccv)
{
@@ -182,9 +177,15 @@ newreno_ack_received(struct cc_var *ccv, uint16_t type)
* XXXLAS: Find a way to signal SS after RTO that
* doesn't rely on tcpcb vars.
*/
+ uint16_t abc_val;
+
+ if (ccv->flags & CCF_USE_LOCAL_ABC)
+ abc_val = ccv->labc;
+ else
+ abc_val = V_tcp_abc_l_var;
if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
incr = min(ccv->bytes_this_ack,
- ccv->nsegs * V_tcp_abc_l_var *
+ ccv->nsegs * abc_val *
CCV(ccv, t_maxseg));
else
incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
@@ -237,11 +238,19 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
u_int mss;
cwin = CCV(ccv, snd_cwnd);
- mss = tcp_maxseg(ccv->ccvc.tcp);
+ mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
nreno = ccv->cc_data;
beta = (nreno == NULL) ? V_newreno_beta : nreno->beta;
beta_ecn = (nreno == NULL) ? V_newreno_beta_ecn : nreno->beta_ecn;
- if (V_cc_do_abe && type == CC_ECN)
+
+ /*
+ * Note that we only change the backoff for ECN if the
+ * global sysctl V_cc_do_abe is set <or> the stack itself
+ * has set a flag in our newreno_flags (due to pacing) telling
+ * us to use the lower valued back-off.
+ */
+ if (V_cc_do_abe ||
+ (nreno && (nreno->newreno_flags & CC_NEWRENO_BETA_ECN) && (type == CC_ECN)))
factor = beta_ecn;
else
factor = beta;
@@ -260,8 +269,7 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type)
V_cc_do_abe && V_cc_abe_frlossreduce)) {
CCV(ccv, snd_ssthresh) =
((uint64_t)CCV(ccv, snd_ssthresh) *
- (uint64_t)beta) /
- (100ULL * (uint64_t)beta_ecn);
+ (uint64_t)beta) / (uint64_t)beta_ecn;
}
if (!IN_CONGRECOVERY(CCV(ccv, t_flags)))
CCV(ccv, snd_ssthresh) = cwin;
@@ -344,7 +352,7 @@ newreno_ctl_output(struct cc_var *ccv, struct sockopt *sopt, void *buf)
nreno->beta = opt->val;
break;
case CC_NEWRENO_BETA_ECN:
- if (!V_cc_do_abe)
+ if ((!V_cc_do_abe) && ((nreno->newreno_flags & CC_NEWRENO_BETA_ECN) == 0))
return (EACCES);
nreno->beta_ecn = opt->val;
break;
diff --git a/sys/netinet/cc/cc_newreno.h b/sys/netinet/cc/cc_newreno.h
index 9e2a3cff5fe9..16cf1757e830 100644
--- a/sys/netinet/cc/cc_newreno.h
+++ b/sys/netinet/cc/cc_newreno.h
@@ -31,12 +31,17 @@
#define CCALGONAME_NEWRENO "newreno"
+struct newreno {
+ uint32_t beta;
+ uint32_t beta_ecn;
+ uint32_t newreno_flags;
+};
+
struct cc_newreno_opts {
- int name;
+ int name;
uint32_t val;
};
-#define CC_NEWRENO_BETA 1
-#define CC_NEWRENO_BETA_ECN 2
-
+#define CC_NEWRENO_BETA 1 /* Beta for normal DUP-ACK/Sack recovery */
+#define CC_NEWRENO_BETA_ECN 2 /* ECN Beta for Abe */
#endif /* _CC_NEWRENO_H */
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index d2bf1f8431fd..50f0811a6517 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -181,13 +181,24 @@ struct tcphdr {
#define TCP_TXTLS_MODE 40 /* Transmit TLS mode */
#define TCP_RXTLS_ENABLE 41 /* TLS framing and encryption for receive */
#define TCP_RXTLS_MODE 42 /* Receive TLS mode */
+#define TCP_IWND_NB 43 /* Override initial window (units: bytes) */
+#define TCP_IWND_NSEG 44 /* Override initial window (units: MSS segs) */
+#define TCP_LOGID_CNT 46 /* get number of connections with the same ID */
+#define TCP_LOG_TAG 47 /* configure tag for grouping logs */
+#define TCP_USER_LOG 48 /* userspace log event */
#define TCP_CONGESTION 64 /* get/set congestion control algorithm */
#define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */
+#define TCP_MAXUNACKTIME 68 /* maximum time without making progress (sec) */
+#define TCP_MAXPEAKRATE 69 /* maximum peak rate allowed (kbps) */
+#define TCP_IDLE_REDUCE 70 /* Reduce cwnd on idle input */
#define TCP_REMOTE_UDP_ENCAPS_PORT 71 /* Enable TCP over UDP tunneling via the specified port */
#define TCP_DELACK 72 /* socket option for delayed ack */
#define TCP_FIN_IS_RST 73 /* A fin from the peer is treated has a RST */
#define TCP_LOG_LIMIT 74 /* Limit to number of records in tcp-log */
#define TCP_SHARED_CWND_ALLOWED 75 /* Use of a shared cwnd is allowed */
+#define TCP_PROC_ACCOUNTING 76 /* Do accounting on tcp cpu usage and counts */
+#define TCP_USE_CMP_ACKS 77 /* The transport can handle the Compressed mbuf acks */
+#define TCP_PERF_INFO 78 /* retrieve accounting counters */
#define TCP_KEEPINIT 128 /* N, time to establish connection */
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
@@ -201,7 +212,7 @@ struct tcphdr {
#define TCP_RACK_MBUF_QUEUE 1050 /* Do we allow mbuf queuing if supported */
#define TCP_RACK_PROP 1051 /* RACK proportional rate reduction (bool) */
#define TCP_RACK_TLP_REDUCE 1052 /* RACK TLP cwnd reduction (bool) */
-#define TCP_RACK_PACE_REDUCE 1053 /* RACK Pacing reduction factor (divisor) */
+#define TCP_RACK_PACE_REDUCE 1053 /* RACK Pacingv reduction factor (divisor) */
#define TCP_RACK_PACE_MAX_SEG 1054 /* Max TSO size we will send */
#define TCP_RACK_PACE_ALWAYS 1055 /* Use the always pace method */
#define TCP_RACK_PROP_RATE 1056 /* The proportional reduction rate */
@@ -284,6 +295,16 @@ struct tcphdr {
#define TCP_RACK_PACE_TO_FILL 1127 /* If we are not in recovery, always pace to fill the cwnd in 1 RTT */
#define TCP_SHARED_CWND_TIME_LIMIT 1128 /* we should limit to low time values the scwnd life */
#define TCP_RACK_PROFILE 1129 /* Select a profile that sets multiple options */
+#define TCP_HDWR_RATE_CAP 1130 /* Allow hardware rates to cap pacing rate */
+#define TCP_PACING_RATE_CAP 1131 /* Highest rate allowed in pacing in bytes per second (uint64_t) */
+#define TCP_HDWR_UP_ONLY 1132 /* Allow the pacing rate to climb but not descend (with the exception of fill-cw */
+#define TCP_RACK_ABC_VAL 1133 /* Set a local ABC value different then the system default */
+#define TCP_REC_ABC_VAL 1134 /* Do we use the ABC value for recovery or the override one from sysctl */
+#define TCP_RACK_MEASURE_CNT 1135 /* How many measurements are required in GP pacing */
+#define TCP_DEFER_OPTIONS 1136 /* Defer options until the proper number of measurements occur, does not defer TCP_RACK_MEASURE_CNT */
+#define TCP_FAST_RSM_HACK 1137 /* Do we do the broken thing where we don't twiddle the TLP bits properly in fast_rsm_output? */
+#define TCP_RACK_PACING_BETA 1138 /* Changing the beta for pacing */
+#define TCP_RACK_PACING_BETA_ECN 1139 /* Changing the beta for ecn with pacing */
/* Start of reserved space for third-party user-settable options. */
#define TCP_VENDOR SO_VENDOR
@@ -295,6 +316,7 @@ struct tcphdr {
#define TCPI_OPT_WSCALE 0x04
#define TCPI_OPT_ECN 0x08
#define TCPI_OPT_TOE 0x10
+#define TCPI_OPT_TFO 0x20
/* Maximum length of log ID. */
#define TCP_LOG_ID_LEN 64
diff --git a/sys/netinet/tcp_accounting.h b/sys/netinet/tcp_accounting.h
new file mode 100644
index 000000000000..a2e2d4edd46d
--- /dev/null
+++ b/sys/netinet/tcp_accounting.h
@@ -0,0 +1,39 @@
+#ifndef __tcp_accounting_h__
+#define __tcp_accounting_h__
+/*
+ * Return values from tcp_do_ack_accounting
+ * and indexs to the into the tcp_proc_time[]
+ * array.
+ */
+#define ACK_BEHIND 0
+#define ACK_SACK 1
+#define ACK_CUMACK 2
+#define ACK_CUMACK_SACK 3
+#define ACK_DUPACK 4
+#define ACK_RWND 5
+/* Added values for tracking output too */
+#define SND_BLOCKED 6
+#define SND_LIMITED 7
+#define SND_OUT_DATA 8
+#define SND_OUT_ACK 9
+#define SND_OUT_FAIL 10
+/* We also count in the counts array two added (MSS sent and ACKS In) */
+#define CNT_OF_MSS_OUT 11
+#define CNT_OF_ACKS_IN 12
+
+/* for the tcpcb we add two more cycle counters */
+#define CYC_HANDLE_MAP 11
+#define CYC_HANDLE_ACK 12
+
+/* Should the tp->xxx array's be alloc'ed? */
+/* #define TCP_NUM_PROC_COUNTERS 11 defined in tcp_var.h */
+/* #define TCP_NUM_CNT_COUNTERS 13 defined in tcp_var.h */
+
+#ifdef _KERNEL
+#ifdef TCP_ACCOUNTING
+extern counter_u64_t tcp_cnt_counters[TCP_NUM_CNT_COUNTERS];
+extern counter_u64_t tcp_proc_time[TCP_NUM_PROC_COUNTERS];
+#endif
+#endif
+
+#endif
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index bdbf28556149..bfa95feb7eee 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -526,7 +526,7 @@ cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
(V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
void inline
-cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
+cc_ecnpkt_handler_flags(struct tcpcb *tp, uint16_t flags, uint8_t iptos)
{
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -544,7 +544,7 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
break;
}
- if (th->th_flags & TH_CWR)
+ if (flags & TH_CWR)
tp->ccv->flags |= CCF_TCPHDR_CWR;
else
tp->ccv->flags &= ~CCF_TCPHDR_CWR;
@@ -558,6 +558,12 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
}
}
+void inline
+cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
+{
+ cc_ecnpkt_handler_flags(tp, th->th_flags, iptos);
+}
+
/*
* TCP input handling is split into multiple parts:
* tcp6_input is a thin wrapper around tcp_input for the extended
diff --git a/sys/netinet/tcp_log_buf.h b/sys/netinet/tcp_log_buf.h
index bdd56c94587e..47993660cb1f 100644
--- a/sys/netinet/tcp_log_buf.h
+++ b/sys/netinet/tcp_log_buf.h
@@ -174,7 +174,7 @@ enum tcp_log_events {
TCP_LOG_IN = 1, /* Incoming packet 1 */
TCP_LOG_OUT, /* Transmit (without other event) 2 */
TCP_LOG_RTO, /* Retransmit timeout 3 */
- TCP_LOG_TF_ACK, /* Transmit due to TF_ACK 4 */
+ TCP_LOG_SB_WAKE, /* Awaken socket buffer 4 */
TCP_LOG_BAD_RETRAN, /* Detected bad retransmission 5 */
TCP_LOG_PRR, /* Doing PRR 6 */
TCP_LOG_REORDER, /* Detected reorder 7 */
@@ -200,7 +200,7 @@ enum tcp_log_events {
BBR_LOG_DOSEG_DONE, /* hpts do_segment completes 27 */
BBR_LOG_EXIT_GAIN, /* hpts do_segment completes 28 */
BBR_LOG_THRESH_CALC, /* Doing threshold calculation 29 */
- BBR_LOG_EXTRACWNDGAIN, /* Removed 30 */
+ TCP_LOG_MAPCHG, /* Map Changes to the sendmap 30 */
TCP_LOG_USERSEND, /* User level sends data 31 */
BBR_RSM_CLEARED, /* RSM cleared of ACK flags 32 */
BBR_LOG_STATE_TARGET, /* Log of target at state 33 */
@@ -232,7 +232,9 @@ enum tcp_log_events {
TCP_LOG_USER_EVENT, /* User space event data 59 */
TCP_LOG_SENDFILE, /* sendfile() logging for TCP connections 60 */
TCP_LOG_HTTP_T, /* logging of http request tracking 61 */
- TCP_LOG_END /* End (keep at end) 62 */
+ TCP_LOG_ACCOUNTING, /* Log of TCP Accounting data 62 */
+ TCP_LOG_FSB, /* FSB information 63 */
+ TCP_LOG_END /* End (keep at end) 64 */
};
enum tcp_log_states {
diff --git a/sys/netinet/tcp_ratelimit.c b/sys/netinet/tcp_ratelimit.c
index 8f2cf3d8d061..c33b2872e91f 100644
--- a/sys/netinet/tcp_ratelimit.c
+++ b/sys/netinet/tcp_ratelimit.c
@@ -367,11 +367,22 @@ rl_add_syctl_entries(struct sysctl_oid *rl_sysctl_root, struct tcp_rate_set *rs)
OID_AUTO, "pacetime", CTLFLAG_RD,
&rs->rs_rlt[i].time_between, 0,
"Time hardware inserts between 1500 byte sends");
- SYSCTL_ADD_U64(&rs->sysctl_ctx,
+ SYSCTL_ADD_LONG(&rs->sysctl_ctx,
SYSCTL_CHILDREN(rl_rate_num),
OID_AUTO, "rate", CTLFLAG_RD,
- &rs->rs_rlt[i].rate, 0,
+ &rs->rs_rlt[i].rate,
"Rate in bytes per second");
+ SYSCTL_ADD_LONG(&rs->sysctl_ctx,
+ SYSCTL_CHILDREN(rl_rate_num),
+ OID_AUTO, "using", CTLFLAG_RD,
+ &rs->rs_rlt[i].using,
+ "Number of flows using");
+ SYSCTL_ADD_LONG(&rs->sysctl_ctx,
+ SYSCTL_CHILDREN(rl_rate_num),
+ OID_AUTO, "enobufs", CTLFLAG_RD,
+ &rs->rs_rlt[i].rs_num_enobufs,
+ "Number of enobufs logged on this rate");
+
}
}
#endif
@@ -667,6 +678,8 @@ bail:
*/
rs->rs_rlt[i].ptbl = rs;
rs->rs_rlt[i].tag = NULL;
+ rs->rs_rlt[i].using = 0;
+ rs->rs_rlt[i].rs_num_enobufs = 0;
/*
* Calculate the time between.
*/
@@ -1063,16 +1076,28 @@ rt_find_real_interface(struct ifnet *ifp, struct inpcb *inp, int *error)
static void
rl_increment_using(const struct tcp_hwrate_limit_table *rte)
{
+ struct tcp_hwrate_limit_table *decon_rte;
+
+ decon_rte = __DECONST(struct tcp_hwrate_limit_table *, rte);
+ atomic_add_long(&decon_rte->using, 1);
}
static void
rl_decrement_using(const struct tcp_hwrate_limit_table *rte)
{
+ struct tcp_hwrate_limit_table *decon_rte;
+
+ decon_rte = __DECONST(struct tcp_hwrate_limit_table *, rte);
+ atomic_subtract_long(&decon_rte->using, 1);
}
void
tcp_rl_log_enobuf(const struct tcp_hwrate_limit_table *rte)
{
+ struct tcp_hwrate_limit_table *decon_rte;
+
+ decon_rte = __DECONST(struct tcp_hwrate_limit_table *, rte);
+ atomic_add_long(&decon_rte->rs_num_enobufs, 1);
}
/*
diff --git a/sys/netinet/tcp_ratelimit.h b/sys/netinet/tcp_ratelimit.h
index b69f0e634b60..8747708e8b5e 100644
--- a/sys/netinet/tcp_ratelimit.h
+++ b/sys/netinet/tcp_ratelimit.h
@@ -43,7 +43,9 @@ struct m_snd_tag;
struct tcp_hwrate_limit_table {
const struct tcp_rate_set *ptbl; /* Pointer to parent table */
struct m_snd_tag *tag; /* Send tag if needed (chelsio) */
- uint64_t rate; /* Rate we get in Bytes per second (Bps) */
+ long rate; /* Rate we get in Bytes per second (Bps) */
+ long using; /* How many flows are using this hdwr rate. */
+ long rs_num_enobufs;
uint32_t time_between; /* Time-Gap between packets at this rate */
uint32_t flags;
};
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index 0eaeb39af71a..96056b5efd4d 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -156,6 +156,17 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, CTLFLAG_VNET | CTLFLAG_RD,
&VNET_NAME(tcp_sack_globalholes), 0,
"Global number of TCP SACK holes currently allocated");
+int
+tcp_dsack_block_exists(struct tcpcb *tp)
+{
+ /* Return true if a DSACK block exists */
+ if (tp->rcv_numsacks == 0)
+ return (0);
+ if (SEQ_LEQ(tp->sackblks[0].end, tp->rcv_nxt))
+ return(1);
+ return (0);
+}
+
/*
* This function will find overlaps with the currently stored sackblocks
* and add any overlap as a dsack block upfront
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
index febac7ad424c..af6c9462c8e0 100644
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -3930,6 +3930,9 @@ bbr_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type, struct bbr_s
struct tcp_bbr *bbr;
INP_WLOCK_ASSERT(tp->t_inpcb);
+#ifdef STATS
+ stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_CSIG, type);
+#endif
bbr = (struct tcp_bbr *)tp->t_fb_ptr;
switch (type) {
case CC_NDUPACK:
@@ -4403,6 +4406,7 @@ bbr_clone_rsm(struct tcp_bbr *bbr, struct bbr_sendmap *nrsm, struct bbr_sendmap
nrsm->r_start = start;
nrsm->r_end = rsm->r_end;
nrsm->r_rtr_cnt = rsm->r_rtr_cnt;
+ nrsm-> r_rtt_not_allowed = rsm->r_rtt_not_allowed;
nrsm->r_flags = rsm->r_flags;
/* We don't transfer forward the SYN flag */
nrsm->r_flags &= ~BBR_HAS_SYN;
@@ -6429,65 +6433,6 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct tcpcb *tp, uint32_t cts)
bbr->r_ctl.bbr_smallest_srtt_this_state = rtt;
}
-static void
-bbr_earlier_retran(struct tcpcb *tp, struct tcp_bbr *bbr, struct bbr_sendmap *rsm,
- uint32_t t, uint32_t cts, int ack_type)
-{
- /*
- * For this RSM, we acknowledged the data from a previous
- * transmission, not the last one we made. This means we did a false
- * retransmit.
- */
- if (rsm->r_flags & BBR_HAS_FIN) {
- /*
- * The sending of the FIN often is multiple sent when we
- * have everything outstanding ack'd. We ignore this case
- * since its over now.
- */
- return;
- }
- if (rsm->r_flags & BBR_TLP) {
- /*
- * We expect TLP's to have this occur often
- */
- bbr->rc_tlp_rtx_out = 0;
- return;
- }
- if (ack_type != BBR_CUM_ACKED) {
- /*
- * If it was not a cum-ack we
- * don't really know for sure since
- * the timestamp could be from some
- * other transmission.
- */
- return;
- }
-
- if (rsm->r_flags & BBR_WAS_SACKPASS) {
- /*
- * We retransmitted based on a sack and the earlier
- * retransmission ack'd it - re-ordering is occuring.
- */
- BBR_STAT_INC(bbr_reorder_seen);
- bbr->r_ctl.rc_reorder_ts = cts;
- }
- /* Back down the loss count */
- if (rsm->r_flags & BBR_MARKED_LOST) {
- bbr->r_ctl.rc_lost -= rsm->r_end - rsm->r_start;
- bbr->r_ctl.rc_lost_bytes -= rsm->r_end - rsm->r_start;
- rsm->r_flags &= ~BBR_MARKED_LOST;
- if (SEQ_GT(bbr->r_ctl.rc_lt_lost, bbr->r_ctl.rc_lost))
- /* LT sampling also needs adjustment */
- bbr->r_ctl.rc_lt_lost = bbr->r_ctl.rc_lost;
- }
- /***** RRS HERE ************************/
- /* Do we need to do this??? */
- /* bbr_reset_lt_bw_sampling(bbr, cts); */
- /***** RRS HERE ************************/
- BBR_STAT_INC(bbr_badfr);
- BBR_STAT_ADD(bbr_badfr_bytes, (rsm->r_end - rsm->r_start));
-}
-
static void
bbr_set_reduced_rtt(struct tcp_bbr *bbr, uint32_t cts, uint32_t line)
{
@@ -6869,6 +6814,10 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
/* Already done */
return (0);
}
+ if (rsm->r_rtt_not_allowed) {
+ /* Not allowed */
+ return (0);
+ }
if (rsm->r_rtr_cnt == 1) {
/*
* Only one transmit. Hopefully the normal case.
@@ -6926,7 +6875,7 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
rsm->r_tim_lastsent[i], ack_type, to);
if ((i + 1) < rsm->r_rtr_cnt) {
/* Likely */
- bbr_earlier_retran(tp, bbr, rsm, t, cts, ack_type);
+ return (0);
} else if (rsm->r_flags & BBR_TLP) {
bbr->rc_tlp_rtx_out = 0;
}
@@ -6974,7 +6923,7 @@ bbr_update_rtt(struct tcpcb *tp, struct tcp_bbr *bbr,
t = 1;
bbr_update_bbr_info(bbr, rsm, t, cts, to->to_tsecr, uts, BBR_RTT_BY_EARLIER_RET,
rsm->r_tim_lastsent[i], ack_type, to);
- bbr_earlier_retran(tp, bbr, rsm, t, cts, ack_type);
+ return (0);
} else {
/*
* Too many prior transmissions, just
@@ -10207,7 +10156,7 @@ bbr_init(struct tcpcb *tp)
tp->t_fb_ptr = NULL;
return (ENOMEM);
}
- rsm->r_flags = BBR_OVERMAX;
+ rsm->r_rtt_not_allowed = 1;
rsm->r_tim_lastsent[0] = cts;
rsm->r_rtr_cnt = 1;
rsm->r_rtr_bytes = 0;
@@ -10320,6 +10269,10 @@ bbr_fini(struct tcpcb *tp, int32_t tcb_is_purged)
counter_u64_add(bbr_flows_whdwr_pacing, -1);
else
counter_u64_add(bbr_flows_nohdwr_pacing, -1);
+ if (bbr->r_ctl.crte != NULL) {
+ tcp_rel_pacing_rate(bbr->r_ctl.crte, tp);
+ bbr->r_ctl.crte = NULL;
+ }
rsm = TAILQ_FIRST(&bbr->r_ctl.rc_map);
while (rsm) {
TAILQ_REMOVE(&bbr->r_ctl.rc_map, rsm, r_next);
@@ -13463,15 +13416,6 @@ send:
th->th_seq = htonl(tp->snd_max);
bbr_seq = tp->snd_max;
}
- } else if (flags & TH_RST) {
- /*
- * For a Reset send the last cum ack in sequence
- * (this like any other choice may still generate a
- * challenge ack, if a ack-update packet is in
- * flight).
- */
- th->th_seq = htonl(tp->snd_una);
- bbr_seq = tp->snd_una;
} else {
/*
* len == 0 and not persist we use snd_max, sending
@@ -14536,9 +14480,9 @@ bbr_set_sockopt(struct socket *so, struct sockopt *sopt,
} else {
bbr->bbr_hdw_pace_ena = 0;
#ifdef RATELIMIT
- if (bbr->bbr_hdrw_pacing) {
- bbr->bbr_hdrw_pacing = 0;
- in_pcbdetach_txrtlmt(bbr->rc_inp);
+ if (bbr->r_ctl.crte != NULL) {
+ tcp_rel_pacing_rate(bbr->r_ctl.crte, tp);
+ bbr->r_ctl.crte = NULL;
}
#endif
}
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index d2093e1afab7..64180125658f 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -63,7 +63,10 @@ __FBSDID("$FreeBSD$");
#include <sys/kthread.h>
#include <sys/kern_prefetch.h>
#include <sys/protosw.h>
-
+#ifdef TCP_ACCOUNTING
+#include <sys/sched.h>
+#include <machine/cpu.h>
+#endif
#include <vm/uma.h>
#include <net/route.h>
@@ -91,8 +94,10 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_var.h>
#include <netinet/tcp_hpts.h>
#include <netinet/tcp_ratelimit.h>
+#include <netinet/tcp_accounting.h>
#include <netinet/tcpip.h>
#include <netinet/cc/cc.h>
+#include <netinet/cc/cc_newreno.h>
#include <netinet/tcp_fastopen.h>
#include <netinet/tcp_lro.h>
#ifdef NETFLIX_SHARED_CWND
@@ -133,6 +138,15 @@ uma_zone_t rack_pcb_zone;
#define TICKS2SBT(__t) (tick_sbt * ((sbintime_t)(__t)))
#endif
+VNET_DECLARE(uint32_t, newreno_beta);
+VNET_DECLARE(uint32_t, newreno_beta_ecn);
+#define V_newreno_beta VNET(newreno_beta)
+#define V_newreno_beta_ecn VNET(newreno_beta_ecn)
+
+
+MALLOC_DEFINE(M_TCPFSB, "tcp_fsb", "TCP fast send block");
+MALLOC_DEFINE(M_TCPDO, "tcp_do", "TCP deferred options");
+
struct sysctl_ctx_list rack_sysctl_ctx;
struct sysctl_oid *rack_sysctl_root;
@@ -175,30 +189,51 @@ static int32_t rack_tlp_thresh = 1;
static int32_t rack_tlp_limit = 2; /* No more than 2 TLPs w-out new data */
static int32_t rack_tlp_use_greater = 1;
static int32_t rack_reorder_thresh = 2;
-static int32_t rack_reorder_fade = 60000; /* 0 - never fade, def 60,000
+static int32_t rack_reorder_fade = 60000000; /* 0 - never fade, def 60,000,000
* - 60 seconds */
+static uint8_t rack_req_measurements = 1;
/* Attack threshold detections */
static uint32_t rack_highest_sack_thresh_seen = 0;
static uint32_t rack_highest_move_thresh_seen = 0;
-
-static int32_t rack_pkt_delay = 1;
-static int32_t rack_early_recovery = 1;
+static int32_t rack_enable_hw_pacing = 0; /* Due to CCSP keep it off by default */
+static int32_t rack_hw_pace_extra_slots = 2; /* 2 extra MSS time betweens */
+static int32_t rack_hw_rate_caps = 1; /* 1; */
+static int32_t rack_hw_rate_min = 0; /* 1500000;*/
+static int32_t rack_hw_rate_to_low = 0; /* 1200000; */
+static int32_t rack_hw_up_only = 1;
+static int32_t rack_stats_gets_ms_rtt = 1;
+static int32_t rack_prr_addbackmax = 2;
+
+static int32_t rack_pkt_delay = 1000;
static int32_t rack_send_a_lot_in_prr = 1;
-static int32_t rack_min_to = 1; /* Number of ms minimum timeout */
+static int32_t rack_min_to = 1000; /* Number of microsecond min timeout */
static int32_t rack_verbose_logging = 0;
static int32_t rack_ignore_data_after_close = 1;
-static int32_t rack_enable_shared_cwnd = 0;
+static int32_t rack_enable_shared_cwnd = 1;
+static int32_t rack_use_cmp_acks = 1;
+static int32_t rack_use_fsb = 1;
+static int32_t rack_use_rfo = 1;
+static int32_t rack_use_rsm_rfo = 1;
+static int32_t rack_max_abc_post_recovery = 2;
+static int32_t rack_client_low_buf = 0;
+#ifdef TCP_ACCOUNTING
+static int32_t rack_tcp_accounting = 0;
+#endif
static int32_t rack_limits_scwnd = 1;
static int32_t rack_enable_mqueue_for_nonpaced = 0;
static int32_t rack_disable_prr = 0;
static int32_t use_rack_rr = 1;
static int32_t rack_non_rxt_use_cr = 0; /* does a non-rxt in recovery use the configured rate (ss/ca)? */
-static int32_t rack_persist_min = 250; /* 250ms */
-static int32_t rack_persist_max = 2000; /* 2 Second */
-static int32_t rack_sack_not_required = 0; /* set to one to allow non-sack to use rack */
-static int32_t rack_default_init_window = 0; /* Use system default */
+static int32_t rack_persist_min = 250000; /* 250usec */
+static int32_t rack_persist_max = 2000000; /* 2 Second in usec's */
+static int32_t rack_sack_not_required = 1; /* set to one to allow non-sack to use rack */
+static int32_t rack_default_init_window = 0; /* Use system default */
static int32_t rack_limit_time_with_srtt = 0;
-static int32_t rack_hw_pace_adjust = 0;
+static int32_t rack_autosndbuf_inc = 20; /* In percentage form */
+static int32_t rack_enobuf_hw_boost_mult = 2; /* How many times the hw rate we boost slot using time_between */
+static int32_t rack_enobuf_hw_max = 12000; /* 12 ms in usecs */
+static int32_t rack_enobuf_hw_min = 10000; /* 10 ms in usecs */
+static int32_t rack_hw_rwnd_factor = 2; /* How many max_segs the rwnd must be before we hold off sending */
/*
* Currently regular tcp has a rto_min of 30ms
* the backoff goes 12 times so that ends up
@@ -209,23 +244,21 @@ static uint32_t rack_def_data_window = 20;
static uint32_t rack_goal_bdp = 2;
static uint32_t rack_min_srtts = 1;
static uint32_t rack_min_measure_usec = 0;
-static int32_t rack_tlp_min = 10;
-static int32_t rack_rto_min = 30; /* 30ms same as main freebsd */
-static int32_t rack_rto_max = 4000; /* 4 seconds */
+static int32_t rack_tlp_min = 10000; /* 10ms */
+static int32_t rack_rto_min = 30000; /* 30,000 usec same as main freebsd */
+static int32_t rack_rto_max = 4000000; /* 4 seconds in usec's */
static const int32_t rack_free_cache = 2;
static int32_t rack_hptsi_segments = 40;
static int32_t rack_rate_sample_method = USE_RTT_LOW;
static int32_t rack_pace_every_seg = 0;
-static int32_t rack_delayed_ack_time = 200; /* 200ms */
+static int32_t rack_delayed_ack_time = 40000; /* 40ms in usecs */
static int32_t rack_slot_reduction = 4;
static int32_t rack_wma_divisor = 8; /* For WMA calculation */
static int32_t rack_cwnd_block_ends_measure = 0;
static int32_t rack_rwnd_block_ends_measure = 0;
+static int32_t rack_def_profile = 0;
static int32_t rack_lower_cwnd_at_tlp = 0;
-static int32_t rack_use_proportional_reduce = 0;
-static int32_t rack_proportional_rate = 10;
-static int32_t rack_tlp_max_resend = 2;
static int32_t rack_limited_retran = 0;
static int32_t rack_always_send_oldest = 0;
static int32_t rack_tlp_threshold_use = TLP_USE_TWO_ONE;
@@ -247,13 +280,13 @@ static uint32_t rack_probertt_use_min_rtt_entry = 1; /* Use the min to calculate
static uint32_t rack_probertt_use_min_rtt_exit = 0;
static uint32_t rack_probe_rtt_sets_cwnd = 0;
static uint32_t rack_probe_rtt_safety_val = 2000000; /* No more than 2 sec in probe-rtt */
-static uint32_t rack_time_between_probertt = 9600000; /* 9.6 sec in us */
+static uint32_t rack_time_between_probertt = 9600000; /* 9.6 sec in usecs */
static uint32_t rack_probertt_gpsrtt_cnt_mul = 0; /* How many srtt periods does probe-rtt last top fraction */
-static uint32_t rack_probertt_gpsrtt_cnt_div = 0; /* How many srtt periods does probe-rtt last bottom fraction */
-static uint32_t rack_min_probertt_hold = 200000; /* Equal to delayed ack time */
+static uint32_t rack_probertt_gpsrtt_cnt_div = 0; /* How many srtt periods does probe-rtt last bottom fraction */
+static uint32_t rack_min_probertt_hold = 40000; /* Equal to delayed ack time */
static uint32_t rack_probertt_filter_life = 10000000;
static uint32_t rack_probertt_lower_within = 10;
-static uint32_t rack_min_rtt_movement = 250; /* Must move at least 250 useconds to count as a lowering */
+static uint32_t rack_min_rtt_movement = 250000; /* Must move at least 250ms (in microseconds) to count as a lowering */
static int32_t rack_pace_one_seg = 0; /* Shall we pace for less than 1.4Meg 1MSS at a time */
static int32_t rack_probertt_clear_is = 1;
static int32_t rack_max_drain_hbp = 1; /* Extra drain times gpsrtt for highly buffered paths */
@@ -264,7 +297,7 @@ static int32_t rack_max_per_above = 30; /* When we go to increment stop if abov
/* Timely information */
/* Combine these two gives the range of 'no change' to bw */
-/* ie the up/down provide the upper and lower bound */
+/* ie the up/down provide the upper and lower bound */
static int32_t rack_gp_per_bw_mul_up = 2; /* 2% */
static int32_t rack_gp_per_bw_mul_down = 4; /* 4% */
static int32_t rack_gp_rtt_maxmul = 3; /* 3 x maxmin */
@@ -286,6 +319,7 @@ static int32_t rack_timely_int_timely_only = 0; /* do interim timely's only use
static int32_t rack_timely_no_stopping = 0;
static int32_t rack_down_raise_thresh = 100;
static int32_t rack_req_segs = 1;
+static uint64_t rack_bw_rate_cap = 0;
/* Weird delayed ack mode */
static int32_t rack_use_imac_dack = 0;
@@ -301,9 +335,14 @@ counter_u64_t rack_unpaced_segments;
counter_u64_t rack_calc_zero;
counter_u64_t rack_calc_nonzero;
counter_u64_t rack_saw_enobuf;
+counter_u64_t rack_saw_enobuf_hw;
counter_u64_t rack_saw_enetunreach;
counter_u64_t rack_per_timer_hole;
-
+counter_u64_t rack_large_ackcmp;
+counter_u64_t rack_small_ackcmp;
+#ifdef INVARIANTS
+counter_u64_t rack_adjust_map_bw;
+#endif
/* Tail loss probe counters */
counter_u64_t rack_tlp_tot;
counter_u64_t rack_tlp_newdata;
@@ -313,6 +352,7 @@ counter_u64_t rack_tlp_retran_fail;
counter_u64_t rack_to_tot;
counter_u64_t rack_to_arm_rack;
counter_u64_t rack_to_arm_tlp;
+counter_u64_t rack_hot_alloc;
counter_u64_t rack_to_alloc;
counter_u64_t rack_to_alloc_hard;
counter_u64_t rack_to_alloc_emerg;
@@ -320,6 +360,17 @@ counter_u64_t rack_to_alloc_limited;
counter_u64_t rack_alloc_limited_conns;
counter_u64_t rack_split_limited;
+#define MAX_NUM_OF_CNTS 13
+counter_u64_t rack_proc_comp_ack[MAX_NUM_OF_CNTS];
+counter_u64_t rack_multi_single_eq;
+counter_u64_t rack_proc_non_comp_ack;
+
+counter_u64_t rack_fto_send;
+counter_u64_t rack_fto_rsm_send;
+counter_u64_t rack_nfto_resend;
+counter_u64_t rack_non_fto_send;
+counter_u64_t rack_extended_rfo;
+
counter_u64_t rack_sack_proc_all;
counter_u64_t rack_sack_proc_short;
counter_u64_t rack_sack_proc_restart;
@@ -342,6 +393,10 @@ counter_u64_t rack_input_idle_reduces;
counter_u64_t rack_collapsed_win;
counter_u64_t rack_tlp_does_nada;
counter_u64_t rack_try_scwnd;
+counter_u64_t rack_hw_pace_init_fail;
+counter_u64_t rack_hw_pace_lost;
+counter_u64_t rack_sbsndptr_right;
+counter_u64_t rack_sbsndptr_wrong;
/* Temp CPU counters */
counter_u64_t rack_find_high;
@@ -350,6 +405,17 @@ counter_u64_t rack_progress_drops;
counter_u64_t rack_out_size[TCP_MSS_ACCT_SIZE];
counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
+
+#define RACK_REXMTVAL(tp) max(rack_rto_min, ((tp)->t_srtt + ((tp)->t_rttvar << 2)))
+
+#define RACK_TCPT_RANGESET(tv, value, tvmin, tvmax) do { \
+ (tv) = (value) + TICKS_2_USEC(tcp_rexmit_slop); \
+ if ((u_long)(tv) < (u_long)(tvmin)) \
+ (tv) = (tvmin); \
+ if ((u_long)(tv) > (u_long)(tvmax)) \
+ (tv) = (tvmax); \
+} while (0)
+
static void
rack_log_progress_event(struct tcp_rack *rack, struct tcpcb *tp, uint32_t tick, int event, int line);
@@ -363,7 +429,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th,
uint32_t tiwin, int32_t thflags, int32_t nxt_pkt);
static void
rack_ack_received(struct tcpcb *tp, struct tcp_rack *rack,
- struct tcphdr *th, uint16_t nsegs, uint16_t type, int32_t recovery);
+ uint32_t th_ack, uint16_t nsegs, uint16_t type, int32_t recovery);
static struct rack_sendmap *rack_alloc(struct tcp_rack *rack);
static struct rack_sendmap *rack_alloc_limit(struct tcp_rack *rack,
uint8_t limit_type);
@@ -371,24 +437,21 @@ static struct rack_sendmap *
rack_check_recovery_mode(struct tcpcb *tp,
uint32_t tsused);
static void
-rack_cong_signal(struct tcpcb *tp, struct tcphdr *th,
- uint32_t type);
+rack_cong_signal(struct tcpcb *tp,
+ uint32_t type, uint32_t ack);
static void rack_counter_destroy(void);
static int
rack_ctloutput(struct socket *so, struct sockopt *sopt,
struct inpcb *inp, struct tcpcb *tp);
static int32_t rack_ctor(void *mem, int32_t size, void *arg, int32_t how);
static void
-rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack, uint32_t line);
+rack_set_pace_segments(struct tcpcb *tp, struct tcp_rack *rack, uint32_t line, uint64_t *fill_override);
static void
rack_do_segment(struct mbuf *m, struct tcphdr *th,
struct socket *so, struct tcpcb *tp, int32_t drop_hdrlen, int32_t tlen,
uint8_t iptos);
static void rack_dtor(void *mem, int32_t size, void *arg);
static void
-rack_earlier_retran(struct tcpcb *tp, struct rack_sendmap *rsm,
- uint32_t t, uint32_t cts);
-static void
rack_log_alt_to_to_cancel(struct tcp_rack *rack,
uint32_t flex1, uint32_t flex2,
uint32_t flex3, uint32_t flex4,
@@ -416,11 +479,12 @@ static int32_t rack_init(struct tcpcb *tp);
static void rack_init_sysctls(void);
static void
rack_log_ack(struct tcpcb *tp, struct tcpopt *to,
- struct tcphdr *th);
+ struct tcphdr *th, int entered_rec, int dup_ack_struck);
static void
rack_log_output(struct tcpcb *tp, struct tcpopt *to, int32_t len,
- uint32_t seq_out, uint8_t th_flags, int32_t err, uint32_t ts,
- uint8_t pass, struct rack_sendmap *hintrsm, uint32_t us_cts);
+ uint32_t seq_out, uint8_t th_flags, int32_t err, uint64_t ts,
+ struct rack_sendmap *hintrsm, uint16_t add_flags, struct mbuf *s_mb, uint32_t s_moff);
+
static void
rack_log_sack_passed(struct tcpcb *tp, struct tcp_rack *rack,
struct rack_sendmap *rsm);
@@ -431,7 +495,7 @@ static uint32_t
rack_proc_sack_blk(struct tcpcb *tp, struct tcp_rack *rack,
struct sackblk *sack, struct tcpopt *to, struct rack_sendmap **prsm,
uint32_t cts, int *moved_two);
-static void rack_post_recovery(struct tcpcb *tp, struct tcphdr *th);
+static void rack_post_recovery(struct tcpcb *tp, uint32_t th_seq);
static void rack_remxt_tmr(struct tcpcb *tp);
static int
rack_set_sockopt(struct socket *so, struct sockopt *sopt,
@@ -446,10 +510,10 @@ static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t
static void rack_timer_stop(struct tcpcb *tp, uint32_t timer_type);
static uint32_t
rack_update_entry(struct tcpcb *tp, struct tcp_rack *rack,
- struct rack_sendmap *rsm, uint32_t ts, int32_t * lenp);
+ struct rack_sendmap *rsm, uint64_t ts, int32_t * lenp, uint16_t add_flag);
static void
rack_update_rsm(struct tcpcb *tp, struct tcp_rack *rack,
- struct rack_sendmap *rsm, uint32_t ts);
+ struct rack_sendmap *rsm, uint64_t ts, uint16_t add_flag);
static int
rack_update_rtt(struct tcpcb *tp, struct tcp_rack *rack,
struct rack_sendmap *rsm, struct tcpopt *to, uint32_t cts, int32_t ack_type, tcp_seq th_ack);
@@ -496,15 +560,182 @@ tcp_rack_output(struct tcpcb *tp, struct tcp_rack *rack,
static void tcp_rack_xmit_timer(struct tcp_rack *rack, int32_t rtt,
uint32_t len, uint32_t us_tim, int confidence, struct rack_sendmap *rsm, uint16_t rtrcnt);
static void
- tcp_rack_partialack(struct tcpcb *tp, struct tcphdr *th);
+ tcp_rack_partialack(struct tcpcb *tp);
+static int
+rack_set_profile(struct tcp_rack *rack, int prof);
+static void
+rack_apply_deferred_options(struct tcp_rack *rack);
int32_t rack_clear_counter=0;
+static void
+rack_set_cc_pacing(struct tcp_rack *rack)
+{
+ struct sockopt sopt;
+ struct cc_newreno_opts opt;
+ struct newreno old, *ptr;
+ struct tcpcb *tp;
+ int error;
+
+ if (rack->rc_pacing_cc_set)
+ return;
+
+ tp = rack->rc_tp;
+ if (tp->cc_algo == NULL) {
+ /* Tcb is leaving */
+ printf("No cc algorithm?\n");
+ return;
+ }
+ rack->rc_pacing_cc_set = 1;
+ if (strcmp(tp->cc_algo->name, CCALGONAME_NEWRENO) != 0) {
+ /* Not new-reno we can't play games with beta! */
+ printf("cc_algo:%s is not NEWRENO:%s\n",
+ tp->cc_algo->name, CCALGONAME_NEWRENO);
+ goto out;
+ }
+ ptr = ((struct newreno *)tp->ccv->cc_data);
+ if (CC_ALGO(tp)->ctl_output == NULL) {
+ /* Huh, why does new_reno no longer have a set function? */
+ printf("no ctl_output for algo:%s\n", tp->cc_algo->name);
+ goto out;
+ }
+ if (ptr == NULL) {
+ /* Just the default values */
+ old.beta = V_newreno_beta_ecn;
*** 14144 LINES SKIPPED ***
More information about the dev-commits-src-main
mailing list