svn commit: r198504 - projects/tcp_ffcaia2008_head/sys/netinet
Lawrence Stewart
lstewart at FreeBSD.org
Mon Oct 26 23:39:08 UTC 2009
Author: lstewart
Date: Mon Oct 26 23:39:07 2009
New Revision: 198504
URL: http://svn.freebsd.org/changeset/base/198504
Log:
WIP checkpoint commit for reassembly queue autotuning and related cleanup. More
cleanup and testing required.
Sponsored by: FreeBSD Foundation
Modified:
projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c
projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c
projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c
projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c
projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h
Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c
==============================================================================
--- projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c Mon Oct 26 23:24:59 2009 (r198503)
+++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c Mon Oct 26 23:39:07 2009 (r198504)
@@ -1461,10 +1461,23 @@ tcp_do_segment(struct mbuf *m, struct tc
* Set new socket buffer size.
* Give up when limit is reached.
*/
- if (newsize)
+ if (newsize) {
if (!sbreserve_locked(&so->so_rcv,
newsize, so, NULL))
so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
+ else {
+ /*
+ * Scale reassembly queue to 8/7
+ * the socket buffer size to
+ * allow a little wiggle room.
+ */
+ tp->t_segq.tsegq_maxbytes =
+ (newsize << 3) / 7;
+ tp->t_segq.tsegq_maxmbufs =
+ tp->t_segq.tsegq_maxbytes /
+ tp->t_maxseg;
+ }
+ }
m_adj(m, drop_hdrlen); /* delayed header drop */
sbappendstream_locked(&so->so_rcv, m);
}
Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c
==============================================================================
--- projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c Mon Oct 26 23:24:59 2009 (r198503)
+++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c Mon Oct 26 23:39:07 2009 (r198504)
@@ -74,41 +74,49 @@ __FBSDID("$FreeBSD$");
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
-static VNET_DEFINE(int, tcp_reass_maxseg);
-VNET_DEFINE(int, tcp_reass_qsize);
-static VNET_DEFINE(int, tcp_reass_maxqlen);
+#include <machine/atomic.h>
+
+static VNET_DEFINE(int, tcp_reass_maxmbufs);
+static VNET_DEFINE(int, tcp_reass_maxbytes);
+VNET_DEFINE(int, tcp_reass_curmbufs);
+VNET_DEFINE(int, tcp_reass_curbytes);
static VNET_DEFINE(int, tcp_reass_overflows);
-#define V_tcp_reass_maxseg VNET(tcp_reass_maxseg)
-#define V_tcp_reass_maxqlen VNET(tcp_reass_maxqlen)
+#define V_tcp_reass_maxmbufs VNET(tcp_reass_maxmbufs)
+#define V_tcp_reass_maxbytes VNET(tcp_reass_maxbytes)
#define V_tcp_reass_overflows VNET(tcp_reass_overflows)
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
"TCP Segment Reassembly Queue");
-SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
- &VNET_NAME(tcp_reass_maxseg), 0,
- "Global maximum number of TCP Segments in Reassembly Queue");
-
-SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
- &VNET_NAME(tcp_reass_qsize), 0,
- "Global number of TCP Segments currently in Reassembly Queue");
-
-SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
- &VNET_NAME(tcp_reass_maxqlen), 0,
- "Maximum number of TCP Segments per individual Reassembly Queue");
+SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxmbufs, CTLFLAG_RD,
+ &VNET_NAME(tcp_reass_maxmbufs), 0,
+ "Global maximum number of mbufs permitted across TCP reassembly queues");
+
+SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxbytes, CTLFLAG_RD,
+ &VNET_NAME(tcp_reass_maxbytes), 0,
+ "Global maximum number of bytes permitted across TCP reassembly queues");
+
+SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, curmbufs, CTLFLAG_RD,
+ &VNET_NAME(tcp_reass_curmbufs), 0,
+ "Global number of mbufs currently held in TCP reassembly queues");
+
+SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, curbytes, CTLFLAG_RD,
+ &VNET_NAME(tcp_reass_curbytes), 0,
+ "Global number of bytes currently held in TCP reassembly queues");
SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
&VNET_NAME(tcp_reass_overflows), 0,
- "Global number of TCP Segment Reassembly Queue Overflows");
+ "Global number of overflows across TCP reassembly queues");
/* Initialize TCP reassembly queue */
static void
tcp_reass_zone_change(void *tag)
{
- V_tcp_reass_maxseg = nmbclusters / 16;
- uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+ V_tcp_reass_maxmbufs = nmbclusters / 16;
+ V_tcp_reass_maxbytes = V_tcp_reass_maxmbufs * 1448;
+ uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxmbufs);
}
VNET_DEFINE(uma_zone_t, tcp_reass_zone);
@@ -117,19 +125,24 @@ void
tcp_reass_init(void)
{
- V_tcp_reass_maxseg = 0;
- V_tcp_reass_qsize = 0;
- V_tcp_reass_maxqlen = 48;
+ V_tcp_reass_maxmbufs = 0;
+ V_tcp_reass_maxbytes = 0;
+ V_tcp_reass_curmbufs = 0;
+ V_tcp_reass_curbytes = 0;
V_tcp_reass_overflows = 0;
- V_tcp_reass_maxseg = nmbclusters / 16;
- TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
- &V_tcp_reass_maxseg);
+ /**/
+ V_tcp_reass_maxmbufs = nmbclusters / 16;
+ TUNABLE_INT_FETCH("net.inet.tcp.reass.maxmbufs",
+ &V_tcp_reass_maxmbufs);
+ /* 1448 bytes is the most common segment size for bulk transfer */
+ V_tcp_reass_maxbytes = V_tcp_reass_maxmbufs * 1448;
V_tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+ uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxmbufs);
EVENTHANDLER_REGISTER(nmbclusters_change,
tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
+ /**/
}
int
@@ -141,6 +154,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd
struct tseg_qent *te = NULL;
struct socket *so = tp->t_inpcb->inp_socket;
int flags;
+ struct tsegq *t_segq = &tp->t_segq;
INP_WLOCK_ASSERT(tp->t_inpcb);
@@ -164,9 +178,11 @@ tcp_reass(struct tcpcb *tp, struct tcphd
* process the missing segment.
*/
if (th->th_seq != tp->rcv_nxt &&
- (V_tcp_reass_qsize + 1 >= V_tcp_reass_maxseg ||
- tp->t_segqlen >= V_tcp_reass_maxqlen)) {
- V_tcp_reass_overflows++;
+ (V_tcp_reass_curmbufs + 1 > V_tcp_reass_maxmbufs ||
+ V_tcp_reass_curbytes + *tlenp > V_tcp_reass_maxbytes ||
+ t_segq->tsegq_bytes + *tlenp >= t_segq->tsegq_maxbytes ||
+ t_segq->tsegq_mbufs + 1 > t_segq->tsegq_maxmbufs)) {
+ atomic_add_int(&V_tcp_reass_overflows, 1);
TCPSTAT_INC(tcps_rcvmemdrop);
m_freem(m);
*tlenp = 0;
@@ -184,8 +200,10 @@ tcp_reass(struct tcpcb *tp, struct tcphd
*tlenp = 0;
return (0);
}
- tp->t_segqlen++;
- V_tcp_reass_qsize++;
+ t_segq->tsegq_bytes += *tlenp;
+ t_segq->tsegq_mbufs++;
+ atomic_add_int(&V_tcp_reass_curmbufs, 1);
+ atomic_add_int(&V_tcp_reass_curbytes, *tlenp);
/*
* Find a segment which begins after this one does.
@@ -211,8 +229,10 @@ tcp_reass(struct tcpcb *tp, struct tcphd
TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
m_freem(m);
uma_zfree(V_tcp_reass_zone, te);
- tp->t_segqlen--;
- V_tcp_reass_qsize--;
+ t_segq->tsegq_bytes -= *tlenp;
+ t_segq->tsegq_mbufs--;
+ atomic_subtract_int(&V_tcp_reass_curmbufs, 1);
+ atomic_subtract_int(&V_tcp_reass_curbytes, *tlenp);
/*
* Try to present any queued data
* at the left window edge to the user.
@@ -248,8 +268,10 @@ tcp_reass(struct tcpcb *tp, struct tcphd
LIST_REMOVE(q, tqe_q);
m_freem(q->tqe_m);
uma_zfree(V_tcp_reass_zone, q);
- tp->t_segqlen--;
- V_tcp_reass_qsize--;
+ t_segq->tsegq_bytes -= *tlenp;
+ t_segq->tsegq_mbufs--;
+ atomic_subtract_int(&V_tcp_reass_curmbufs, 1);
+ atomic_subtract_int(&V_tcp_reass_curbytes, *tlenp);
q = nq;
}
@@ -285,8 +307,10 @@ present:
else
sbappendstream_locked(&so->so_rcv, q->tqe_m);
uma_zfree(V_tcp_reass_zone, q);
- tp->t_segqlen--;
- V_tcp_reass_qsize--;
+ t_segq->tsegq_bytes -= *tlenp;
+ t_segq->tsegq_mbufs--;
+ atomic_subtract_int(&V_tcp_reass_curmbufs, 1);
+ atomic_subtract_int(&V_tcp_reass_curbytes, *tlenp);
q = nq;
} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
ND6_HINT(tp);
Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c
==============================================================================
--- projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c Mon Oct 26 23:24:59 2009 (r198503)
+++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c Mon Oct 26 23:39:07 2009 (r198504)
@@ -106,6 +106,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syslog.h>
#endif /*IPSEC*/
+#include <machine/atomic.h>
#include <machine/in_cksum.h>
#include <sys/md5.h>
@@ -825,7 +826,6 @@ tcp_drop(struct tcpcb *tp, int errno)
void
tcp_discardcb(struct tcpcb *tp)
{
- struct tseg_qent *q;
struct inpcb *inp = tp->t_inpcb;
struct socket *so = inp->inp_socket;
#ifdef INET6
@@ -903,13 +903,8 @@ tcp_discardcb(struct tcpcb *tp)
}
/* free the reassembly queue, if any */
- while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
- LIST_REMOVE(q, tqe_q);
- m_freem(q->tqe_m);
- uma_zfree(V_tcp_reass_zone, q);
- tp->t_segqlen--;
- V_tcp_reass_qsize--;
- }
+ TCP_REASS_FLUSH(&tp->t_segq);
+
/* Disconnect offload device, if any. */
tcp_offload_detach(tp);
@@ -967,7 +962,6 @@ tcp_drain(void)
CURVNET_SET(vnet_iter);
struct inpcb *inpb;
struct tcpcb *tcpb;
- struct tseg_qent *te;
/*
* Walk the tcpbs, if existing, and flush the reassembly queue,
@@ -983,14 +977,7 @@ tcp_drain(void)
continue;
INP_WLOCK(inpb);
if ((tcpb = intotcpcb(inpb)) != NULL) {
- while ((te = LIST_FIRST(&tcpb->t_segq))
- != NULL) {
- LIST_REMOVE(te, tqe_q);
- m_freem(te->tqe_m);
- uma_zfree(V_tcp_reass_zone, te);
- tcpb->t_segqlen--;
- V_tcp_reass_qsize--;
- }
+ TCP_REASS_FLUSH(&tcpb->t_segq);
tcp_clean_sackreport(tcpb);
}
INP_WUNLOCK(inpb);
Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c
==============================================================================
--- projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c Mon Oct 26 23:24:59 2009 (r198503)
+++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c Mon Oct 26 23:39:07 2009 (r198504)
@@ -1452,6 +1452,9 @@ tcp_attach(struct socket *so)
INP_INFO_WUNLOCK(&V_tcbinfo);
return (ENOBUFS);
}
+
+ tp->t_segq.tsegq_maxbytes = (so->so_rcv.sb_hiwat << 3) / 7;
+ tp->t_segq.tsegq_maxmbufs = tp->t_segq.tsegq_maxbytes / tp->t_maxseg;
tp->t_state = TCPS_CLOSED;
INP_WUNLOCK(inp);
INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -1749,8 +1752,8 @@ db_print_tcpcb(struct tcpcb *tp, const c
indent += 2;
db_print_indent(indent);
- db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
- LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
+ db_printf("t_segq first: %p t_dupacks: %d\n",
+ LIST_FIRST(&tp->t_segq), tp->t_dupacks);
db_print_indent(indent);
db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",
Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h
==============================================================================
--- projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h Mon Oct 26 23:24:59 2009 (r198503)
+++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h Mon Oct 26 23:39:07 2009 (r198504)
@@ -42,10 +42,12 @@
* Kernel variables for tcp.
*/
VNET_DECLARE(int, tcp_do_rfc1323);
-VNET_DECLARE(int, tcp_reass_qsize);
VNET_DECLARE(struct uma_zone *, tcp_reass_zone);
+VNET_DECLARE(int, tcp_reass_curmbufs);
+VNET_DECLARE(int, tcp_reass_curbytes);
#define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323)
-#define V_tcp_reass_qsize VNET(tcp_reass_qsize)
+#define V_tcp_reass_curmbufs VNET(tcp_reass_curmbufs)
+#define V_tcp_reass_curbytes VNET(tcp_reass_curbytes)
#define V_tcp_reass_zone VNET(tcp_reass_zone)
#endif /* _KERNEL */
@@ -57,7 +59,14 @@ struct tseg_qent {
struct tcphdr *tqe_th; /* a pointer to tcp header */
struct mbuf *tqe_m; /* mbuf contains packet */
};
-LIST_HEAD(tsegqe_head, tseg_qent);
+
+struct tsegq {
+ int tsegq_mbufs;
+ int tsegq_bytes;
+ int tsegq_maxbytes;
+ int tsegq_maxmbufs;
+ struct tseg_qent *lh_first;
+};
struct sackblk {
tcp_seq start; /* start seq no. of sack block */
@@ -95,9 +104,8 @@ do { \
* Organized for 16 byte cacheline efficiency.
*/
struct tcpcb {
- struct tsegqe_head t_segq; /* segment reassembly queue */
+ struct tsegq t_segq; /* segment reassembly queue */
void *t_pspare[2]; /* new reassembly queue */
- int t_segqlen; /* segment reassembly queue length */
int t_dupacks; /* consecutive dup acks recd */
struct tcp_timer *t_timers; /* All the TCP timers in one struct */
@@ -356,6 +364,19 @@ struct tcptw {
max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \
+ (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
+#define TCP_REASS_FLUSH(segq) do { \
+ struct tseg_qent *qe; \
+ while ((qe = LIST_FIRST((segq))) != NULL) { \
+ LIST_REMOVE(qe, tqe_q); \
+ (segq)->tsegq_mbufs--; \
+ (segq)->tsegq_bytes -= qe->tqe_len; \
+ atomic_subtract_int(&V_tcp_reass_curmbufs, 1); \
+ atomic_subtract_int(&V_tcp_reass_curbytes, qe->tqe_len); \
+ m_freem(qe->tqe_m); \
+ uma_zfree(V_tcp_reass_zone, qe); \
+ } \
+} while (0)
+
/*
* TCP statistics.
* Many of these should be kept per connection,
More information about the svn-src-projects
mailing list