git: 6edfc10ca5fb - main - tcp: adding a functionality to define "trace points" so that BB logging can be enabled at specific events.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 14 Apr 2022 20:08:59 UTC
The branch main has been updated by rrs: URL: https://cgit.FreeBSD.org/src/commit/?id=6edfc10ca5fbefa5ca6a3d72821ba15006c2d148 commit 6edfc10ca5fbefa5ca6a3d72821ba15006c2d148 Author: Randall Stewart <rrs@FreeBSD.org> AuthorDate: 2022-04-14 20:07:34 +0000 Commit: Randall Stewart <rrs@FreeBSD.org> CommitDate: 2022-04-14 20:07:34 +0000 tcp: adding a functionality to define "trace points" so that BB logging can be enabled at specific events. This commit will add a new concept to rack, tracepoints. A tracepoint is a defined point inserted into the code (3 are included in this initial patch) that allows a developer to insert a point that might be of interest. The developer numbers the point in the tcp_rack.h file and then can use sysctl to enable that (or all) trace points. A limit is also given to how many BB logged connections will turn on so that a box is not overrun by BB logging. Reviewed by: tuexen Sponsored by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D34898 --- sys/netinet/tcp_stacks/rack.c | 56 +++++++++++++++++++++++++++++++++++++++ sys/netinet/tcp_stacks/tcp_rack.h | 30 +++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 2de40c902162..30a23a578dd4 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -240,6 +240,7 @@ static int32_t rack_enobuf_hw_boost_mult = 2; /* How many times the hw rate we b static int32_t rack_enobuf_hw_max = 12000; /* 12 ms in usecs */ static int32_t rack_enobuf_hw_min = 10000; /* 10 ms in usecs */ static int32_t rack_hw_rwnd_factor = 2; /* How many max_segs the rwnd must be before we hold off sending */ + /* * Currently regular tcp has a rto_min of 30ms * the backoff goes 12 times so that ends up @@ -326,6 +327,10 @@ static int32_t rack_timely_no_stopping = 0; static int32_t rack_down_raise_thresh = 100; static int32_t rack_req_segs = 1; static uint64_t rack_bw_rate_cap = 0; +static uint32_t rack_trace_point_config = 0; +static uint32_t rack_trace_point_bb_mode = 4; +static int32_t rack_trace_point_count = 0; + /* Weird delayed ack mode */ static int32_t rack_use_imac_dack = 0; @@ -547,6 +552,25 @@ rack_apply_deferred_options(struct tcp_rack *rack); int32_t rack_clear_counter=0; +static inline void +rack_trace_point(struct tcp_rack *rack, int num) +{ + if (((rack_trace_point_config == num) || + (rack_trace_point_config = 0xffffffff)) && + (rack_trace_point_bb_mode != 0) && + (rack_trace_point_count > 0) && + (rack->rc_tp->t_logstate == 0)) { + int res; + res = atomic_fetchadd_int(&rack_trace_point_count, -1); + if (res > 0) { + rack->rc_tp->t_logstate = rack_trace_point_bb_mode; + } else { + /* Loss a race assure its zero now */ + rack_trace_point_count = 0; + } + } +} + static void rack_set_cc_pacing(struct tcp_rack *rack) { @@ -785,6 +809,7 @@ rack_init_sysctls(void) struct sysctl_oid *rack_measure; struct sysctl_oid *rack_probertt; struct sysctl_oid *rack_hw_pacing; + struct sysctl_oid *rack_tracepoint; rack_attack = SYSCTL_ADD_NODE(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), @@ -915,6 +940,28 @@ rack_init_sysctls(void) OID_AUTO, "hbp_threshold", CTLFLAG_RW, &rack_hbp_thresh, 3, "We are highly buffered if min_rtt_seen / max_rtt_seen > this-threshold"); + + rack_tracepoint = SYSCTL_ADD_NODE(&rack_sysctl_ctx, + SYSCTL_CHILDREN(rack_sysctl_root), + OID_AUTO, + "tp", + CTLFLAG_RW | CTLFLAG_MPSAFE, 0, + "Rack tracepoint facility"); + SYSCTL_ADD_U32(&rack_sysctl_ctx, + SYSCTL_CHILDREN(rack_tracepoint), + OID_AUTO, "number", CTLFLAG_RW, + &rack_trace_point_config, 0, + "What is the trace point number to activate (0=none, 0xffffffff = all)?"); + SYSCTL_ADD_U32(&rack_sysctl_ctx, + SYSCTL_CHILDREN(rack_tracepoint), + OID_AUTO, "bbmode", CTLFLAG_RW, + &rack_trace_point_bb_mode, 4, + "What is BB logging mode that is activated?"); + SYSCTL_ADD_S32(&rack_sysctl_ctx, + SYSCTL_CHILDREN(rack_tracepoint), + OID_AUTO, "count", CTLFLAG_RW, + &rack_trace_point_count, 0, + "How many connections will have BB logging turned on that hit the tracepoint?"); /* Pacing related sysctls */ rack_pacing = SYSCTL_ADD_NODE(&rack_sysctl_ctx, SYSCTL_CHILDREN(rack_sysctl_root), @@ -10286,6 +10333,7 @@ rack_collapsed_window(struct tcp_rack *rack) #endif tcp_seq max_seq; + rack_trace_point(rack, RACK_TP_COLLAPSED_WND); max_seq = rack->rc_tp->snd_una + rack->rc_tp->snd_wnd; memset(&fe, 0, sizeof(fe)); fe.r_start = max_seq; @@ -15983,6 +16031,10 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack *rack, struct rack_sendma } counter_u64_add(rack_fto_rsm_send, 1); if (error && (error == ENOBUFS)) { + if (rack->r_ctl.crte != NULL) { + rack_trace_point(rack, RACK_TP_HWENOBUF); + } else + rack_trace_point(rack, RACK_TP_ENOBUF); slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC); if (rack->rc_enobuf < 0x7f) rack->rc_enobuf++; @@ -18839,6 +18891,10 @@ nomore: * Pace us right away to retry in a some * time */ + if (rack->r_ctl.crte != NULL) { + rack_trace_point(rack, RACK_TP_HWENOBUF); + } else + rack_trace_point(rack, RACK_TP_ENOBUF); slot = ((1 + rack->rc_enobuf) * HPTS_USEC_IN_MSEC); if (rack->rc_enobuf < 0x7f) rack->rc_enobuf++; diff --git a/sys/netinet/tcp_stacks/tcp_rack.h b/sys/netinet/tcp_stacks/tcp_rack.h index ad3c4d6883fb..91aefea0a81d 100644 --- a/sys/netinet/tcp_stacks/tcp_rack.h +++ b/sys/netinet/tcp_stacks/tcp_rack.h @@ -261,6 +261,36 @@ struct rack_opts_stats { #define RACK_QUALITY_PROBERTT 4 /* A measurement where we went into or exited probe RTT */ #define RACK_QUALITY_ALLACKED 5 /* All data is now acknowledged */ +/*********************/ +/* Rack Trace points */ +/*********************/ +/* + * Rack trace points are interesting points within + * the rack code that the author/debugger may want + * to have BB logging enabled if we hit that point. + * In order to enable a trace point you set the + * sysctl var net.inet.tcp.<stack>.tp.number to + * one of the numbers listed below. You also + * must make sure net.inet.tcp.<stack>.tp.bbmode is + * non-zero, the default is 4 for continous tracing. + * You also set in the number of connections you want + * have get BB logs in net.inet.tcp.<stack>.tp.count. + * + * Count will decrement every time BB logging is assigned + * to a connection that hit your tracepoint. + * + * You can enable all trace points by setting the number + * to 0xffffffff. You can disable all trace points by + * setting number to zero (or count to 0). + * + * Below are the enumerated list of tracepoints that + * have currently been defined in the code. Add more + * as you add a call to rack_trace_point(rack, <name>); + * where <name> is defined below. + */ +#define RACK_TP_HWENOBUF 0x00000001 /* When we are doing hardware pacing and hit enobufs */ +#define RACK_TP_ENOBUF 0x00000002 /* When we hit enobufs with software pacing */ +#define RACK_TP_COLLAPSED_WND 0x00000003 /* When a peer to collapses its rwnd on us */ #define MIN_GP_WIN 6 /* We need at least 6 MSS in a GP measurement */ #ifdef _KERNEL