PERFORCE change 133432 for review
Andre Oppermann
andre at FreeBSD.org
Wed Jan 16 13:45:39 PST 2008
http://perforce.freebsd.org/chv.cgi?CH=133432
Change 133432 by andre at andre_flirtbox on 2008/01/16 21:45:36
Handle all edge cases.
Add SEQ_DELTA() function to ease segment trimming.
Affected files ...
.. //depot/projects/tcp_reass/netinet/tcp_reass.c#7 edit
.. //depot/projects/tcp_reass/netinet/tcp_seq.h#2 edit
Differences ...
==== //depot/projects/tcp_reass/netinet/tcp_reass.c#7 (text+ko) ====
@@ -100,23 +100,25 @@
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
"TCP Segment Reassembly Queue");
-static int tcp_reass_maxseg = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
- &tcp_reass_maxseg, 0,
+static int tcp_reass_maxblocks = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxblocks, CTLFLAG_RDTUN,
+ &tcp_reass_maxblocks, 0,
"Global maximum number of TCP Segment Blocks in Reassembly Queue");
int tcp_reass_qsize = 0;
-SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, curblocks, CTLFLAG_RD,
&tcp_reass_qsize, 0,
"Global number of TCP Segment Blocks currently in Reassembly Queue");
+static void tcp_reass_merge(struct tcpcb *, int *, struct trq *, struct trq *);
+
/* Initialize TCP reassembly queue */
static void
tcp_reass_zone_change(void *tag)
{
- tcp_reass_maxseg = nmbclusters / 16;
- uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+ tcp_reass_maxblocks = nmbclusters / 16;
+ uma_zone_set_max(tcp_reass_zone, tcp_reass_maxblocks);
}
uma_zone_t tcp_reass_zone;
@@ -126,12 +128,12 @@
{
/* XXX: nmbclusters may be zero. */
- tcp_reass_maxseg = nmbclusters / 16;
- TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
- &tcp_reass_maxseg);
+ tcp_reass_maxblocks = nmbclusters / 16;
+ TUNABLE_INT_FETCH("net.inet.tcp.reass.maxblocks",
+ &tcp_reass_maxblocks);
tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct trq),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
- uma_zone_set_max(tcp_reass_zone, tcp_reass_maxseg);
+ uma_zone_set_max(tcp_reass_zone, tcp_reass_maxblocks);
EVENTHANDLER_REGISTER(nmbclusters_change,
tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
}
@@ -150,11 +152,21 @@
* Call with th==NULL after becoming established to
* force pre-ESTABLISHED data up to user socket.
*/
- if (th == NULL)
+ if (th == NULL) {
+ if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
+ TAILQ_EMPTY(&tp->t_trq) ||
+ ((tqe = TAILQ_FIRST(&tp->t_trq)) &&
+ tqe->trq_seq != tp->rcv_nxt))
+ return (0);
goto present;
+ }
- KASSERT(SEQ_LEQ(tp->rcv_nxt, th->th_seq),
+ KASSERT(*tlenp > 0,
+ ("%s: segment doesn't contain any data", __func__));
+ KASSERT(SEQ_GT(tp->rcv_nxt, th->th_seq),
("%s: sequence number below rcv_nxt", __func__));
+ KASSERT(!(tp->rcv_nxt == th->th_seq) || !(TAILQ_EMPTY(&tp->t_trq)),
+ ("%s: got missing segment but queue is empty", __func__));
/*
* Limit the number of segments in the reassembly queue to prevent
@@ -183,13 +195,22 @@
return (0);
}
+ /* Get rid of packet header and mtags. */
+ m_demote(m, 1);
+
+#if 0
+ /* Trim empty mbufs from head of chain. */
+ while (m->m_len == 0) {
+ n = m;
+ m = m->m_next;
+ m_free(n);
+ }
+#endif
+
/* NB: m_adj(m, -i) may free mbufs at the tail of a chain. */
for (mcnt = 0, n = m; n; n = n->m_next)
mcnt += (n->m_flags & M_EXT) ? n->m_ext.ext_size : MSIZE;
- /* Get rid of packet header and mtags. */
- m_demote(m, 1);
-
/* Check if this segment attaches to the end. */
tqe = TAILQ_LAST(&tp->t_trq, trq_head);
if (tqe && tqe->trq_seq + tqe->trq_len == th->th_seq) {
@@ -204,27 +225,33 @@
return (0);
}
+ /* Check if beyond last block. */
+ if (tqe && SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq))
+ goto insert;
+
+ /* Check if this is the first segment. */
+ if (TAILQ_EMPTY(&tp->t_trq))
+ goto insert;
+
/* Check if this is the missing segment. */
if (tp->rcv_nxt == th->th_seq) {
tqe = TAILQ_FIRST(&tp->t_trq);
- KASSERT(tqe != NULL,
- ("%s: missing segment but nothing in queue", __func__));
- KASSERT(SEQ_GT(tqe->trq_seq, th->th_seq),
+ KASSERT(SEQ_LEQ(tqe->trq_seq, th->th_seq),
("%s: first block starts below missing segment", __func__));
- if (SEQ_LT(tqe->trq_seq, th->th_seq + *tlenp)) {
+ /* Check if segment prepends first block. */
+ if (SEQ_LEQ(tqe->trq_seq, th->th_seq + *tlenp)) {
/* Trim tail of segment. */
- if ((i = tqe->trq_seq - (th->th_seq + *tlenp))) {
- m_adj(m, i);
- *tlenp += i; /* NB: i is negative */
+ if ((i = SEQ_DELTA(tqe->trq_seq, th->th_seq + *tlenp))) {
+ m_adj(m, -i);
+ *tlenp -= i;
/* TCP statistics. */
tcpstat.tcps_rcvpartduppack++;
- tcpstat.tcps_rcvpartdupbyte -= i;
+ tcpstat.tcps_rcvpartdupbyte += i;
/* Update accounting. */
for (mcnt = 0, n = m; n; n = n->m_next)
mcnt += (n->m_flags & M_EXT) ?
n->m_ext.ext_size : MSIZE;
}
- /* Segment prepends first block. */
tqe->trq_len += *tlenp;
tqe->trq_mcnt += mcnt;
tp->t_trqmcnt += mcnt;
@@ -234,7 +261,7 @@
tqe->trq_m = m;
goto present;
}
- goto insert;
+ goto insert; /* No statistics, this segment is in line. */
}
/* TCP statistics. */
@@ -242,13 +269,19 @@
tcpstat.tcps_rcvoobyte += *tlenp;
/* See where it fits. */
- TAILQ_FOREACH(tqe, &tp->t_trq, trq_q) {
+ TAILQ_FOREACH_SAFE(tqe, &tp->t_trq, trq_q, tqen) {
+#if 1
/* Segment is after this blocks coverage. */
if (SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq))
continue;
+#endif
/* Segment is after the previous one but before this one. */
if (SEQ_GT(tqe->trq_seq, th->th_seq + *tlenp))
break; /* Insert as new block. */
+
+ KASSERT(SEQ_GT(tqe->trq_seq, th->th_seq + *tlenp),
+ ("%s: iterated past insert point", __func__));
+
/* Segment is already fully covered. */
if (SEQ_LEQ(tqe->trq_seq, th->th_seq) &&
SEQ_GEQ(tqe->trq_seq + tqe->trq_len, th->th_seq + *tlenp)) {
@@ -258,54 +291,36 @@
*tlenp = 0;
return (0);
}
- /* Segment appends to this block. */
- if (SEQ_LEQ(tqe->trq_seq + tqe->trq_len, th->th_seq)) {
- /* Trim head of segment. */
- if ((i = tqe->trq_seq + tqe->trq_len - th->th_seq)) {
- m_adj(m, i);
- *tlenp -= i;
- /* TCP Statistics. */
- tcpstat.tcps_rcvpartduppack++;
- tcpstat.tcps_rcvpartdupbyte += i;
- /* XXX dupes */
- }
- tqe->trq_len += *tlenp;
- tqe->trq_mcnt += mcnt;
+
+ /* Segment covers and extends on both ends. */
+ if (SEQ_GEQ(tqe->trq_seq, th->th_seq) &&
+ SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq + *tlenp)) {
+ /* Replace block content. */
+ tp->t_trqmcnt -= tqe->trq_mcnt;
+ m_freem(tqe->trq_m);
+ tqe->trq_len = *tlenp;
+ tqe->trq_mcnt = mcnt;
tp->t_trqmcnt += mcnt;
- tqe->trq_ml->m_next = m;
+ tqe->trq_seq = th->th_seq;
+ tqe->trq_m = m;
tqe->trq_ml = m_last(m);
- /* Check if segment bridges two blocks to merge. */
- if ((tqen = TAILQ_NEXT(tqe, trq_q)) &&
- SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq)) {
- /* Trim head of next block. */
- /* XXXAO: Should trim tail of segment instead. */
- if ((i = tqe->trq_seq + tqe->trq_len -
- tqen->trq_seq)) {
- m_adj(tqen->trq_m, i);
- tqen->trq_len -= i;
- /* TCP statistics. */
- tcpstat.tcps_rcvpartduppack++;
- tcpstat.tcps_rcvpartdupbyte += i;
- }
- tqe->trq_len += tqen->trq_len;
- tqe->trq_mcnt += tqen->trq_mcnt;
- tqe->trq_ml->m_next = tqen->trq_m;
- tqe->trq_ml = tqen->trq_ml;
- TAILQ_REMOVE(&tp->t_trq, tqen, trq_q);
- uma_zfree(tcp_reass_zone, tqen);
- tcp_reass_qsize--;
- }
+ /* Check if segment bridges next block to merge. */
+ if (tqen != NULL &&
+ SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq))
+ tcp_reass_merge(tp, tlenp, tqe, tqen);
return (0);
}
- /* Segment prepends. */
- if (SEQ_GT(tqe->trq_seq, th->th_seq)) {
+
+ /* Segment prepends to this block. */
+ if (SEQ_GT(tqe->trq_seq, th->th_seq) &&
+ SEQ_GEQ(tqe->trq_seq + tqe->trq_len, th->th_seq + *tlenp)) {
/* Trim tail of segment. */
- if ((i = tqe->trq_seq - (th->th_seq + *tlenp))) {
- m_adj(m, i);
- *tlenp += i; /* NB: i is negative */
+ if ((i = SEQ_DELTA(tqe->trq_seq, th->th_seq + *tlenp))) {
+ m_adj(m, -i);
+ *tlenp -= i;
/* TCP statistics. */
tcpstat.tcps_rcvpartduppack++;
- tcpstat.tcps_rcvpartdupbyte -= i;
+ tcpstat.tcps_rcvpartdupbyte += i;
/* Update accounting. */
for (mcnt = 0, n = m; n; n = n->m_next)
mcnt += (n->m_flags & M_EXT) ?
@@ -320,6 +335,29 @@
tqe->trq_m = m;
return (0);
}
+
+ /* Segment appends to this block. */
+ if (SEQ_LEQ(tqe->trq_seq + tqe->trq_len, th->th_seq) &&
+ SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq + *tlenp)) {
+ /* Trim head of segment. */
+ if ((i = SEQ_DELTA(tqe->trq_seq + tqe->trq_len, th->th_seq))) {
+ m_adj(m, i);
+ *tlenp -= i;
+ /* TCP Statistics. */
+ tcpstat.tcps_rcvpartduppack++;
+ tcpstat.tcps_rcvpartdupbyte += i;
+ }
+ tqe->trq_len += *tlenp;
+ tqe->trq_mcnt += mcnt;
+ tp->t_trqmcnt += mcnt;
+ tqe->trq_ml->m_next = m;
+ tqe->trq_ml = m_last(m);
+ /* Check if segment bridges two blocks to merge. */
+ if (tqen != NULL &&
+ SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq))
+ tcp_reass_merge(tp, tlenp, tqe, tqen);
+ return (0);
+ }
}
insert:
@@ -340,10 +378,15 @@
tqen->trq_ml = m_last(m);
/* Where to insert. */
- if (tqe)
+ if (tqe != NULL && SEQ_LT(tqe->trq_seq + tqe->trq_len, th->th_seq))
+ TAILQ_INSERT_TAIL(&tp->t_trq, tqen, trq_q);
+ else if (tqe != NULL)
TAILQ_INSERT_BEFORE(tqe, tqen, trq_q);
- else
+ else {
+ KASSERT(TAILQ_EMPTY(&tp->t_trq),
+ ("%s: queue not empty", __func__));
TAILQ_INSERT_HEAD(&tp->t_trq, tqen, trq_q);
+ }
/* Missing segment? */
if (tp->rcv_nxt != th->th_seq)
@@ -353,25 +396,21 @@
* Present data to user, advancing rcv_nxt through
* completed sequence space.
*/
- if (!TCPS_HAVEESTABLISHED(tp->t_state))
- return (0);
- tqe = TAILQ_FIRST(&tp->t_trq);
- if (tqe == NULL || tqe->trq_seq != tp->rcv_nxt)
- return (0);
SOCKBUF_LOCK(&so->so_rcv);
TAILQ_FOREACH_SAFE(tqe, &tp->t_trq, trq_q, tqen) {
- /* We can never go more than one round. */
+ KASSERT(SEQ_GEQ(tqe->trq_seq, tp->rcv_nxt),
+ ("%s: trq_seq < rcv_nxt", __func__));
if (tqe->trq_seq != tp->rcv_nxt)
break;
#if 1
/* XXX: This is bogus if we had a FIN. */
- flags = tqe->trq_flags & TH_FIN;
+ flags = tqe->trq_flags;
#endif
- tp->rcv_nxt += tqe->trq_len;
if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
m_freem(tqe->trq_m);
else
sbappendstream_locked(&so->so_rcv, tqe->trq_m);
+ tp->rcv_nxt += tqe->trq_len;
tp->t_trqmcnt -= tqe->trq_mcnt;
TAILQ_REMOVE(&tp->t_trq, tqe, trq_q);
uma_zfree(tcp_reass_zone, tqe);
@@ -387,11 +426,64 @@
#endif
}
+static void
+tcp_reass_merge(struct tcpcb *tp, int *tlenp, struct trq *tqe, struct trq *tqen)
+{
+#if 0
+ struct mbuf *m;
+#endif
+ int i;
+
+ KASSERT(SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq),
+ ("%s: blocks do not overlap, nothing to merge", __func__));
+
+ /* Appended block may reach beyond next block. */
+ while (SEQ_GEQ(tqe->trq_seq + tqe->trq_len, tqen->trq_seq + tqen->trq_len)) {
+ tcpstat.tcps_rcvpartdupbyte += tqen->trq_len; /* Statistics */
+ tp->t_trqmcnt -= tqe->trq_mcnt;
+ m_freem(tqen->trq_m);
+ TAILQ_REMOVE(&tp->t_trq, tqen, trq_q);
+ uma_zfree(tcp_reass_zone, tqen);
+ tcp_reass_qsize--;
+ /* And the one after that. */
+ if ((tqen = TAILQ_NEXT(tqe, trq_q)) == NULL)
+ return;
+ }
+
+ /* Trim head of next block. */
+ if ((i = SEQ_DELTA(tqe->trq_seq + tqe->trq_len, tqen->trq_seq))) {
+ m_adj(tqen->trq_m, i);
+ tqen->trq_len -= i;
+ tcpstat.tcps_rcvpartdupbyte += i; /* Statistics */
+#if 0
+ /* Trim ... */
+ while (tqen->trq_m->m_len == 0) {
+ m = tqen->trq_m;
+ tqen->trq_m = tqen->trq_m->m_next;
+ /* mcnt */
+ m_free(m);
+ }
+#endif
+ KASSERT(tqen->trq_m != NULL,
+ ("%s: no remaining mbufs in block", __func__));
+ }
+
+ /* Merge blocks together. */
+ tqe->trq_len += tqen->trq_len;
+ tqe->trq_mcnt += tqen->trq_mcnt;
+ tqe->trq_ml->m_next = tqen->trq_m;
+ tqe->trq_ml = tqen->trq_ml;
+ TAILQ_REMOVE(&tp->t_trq, tqen, trq_q);
+ uma_zfree(tcp_reass_zone, tqen);
+ tcp_reass_qsize--;
+}
+
/*
* Free the reassembly queue on tcpcb free and on general memory shortage.
*/
void
-tcp_reass_qfree(struct tcpcb *tp) {
+tcp_reass_qfree(struct tcpcb *tp)
+{
struct trq *tqe, *tqen;
INP_LOCK_ASSERT(tp->t_inpcb);
==== //depot/projects/tcp_reass/netinet/tcp_seq.h#2 (text+ko) ====
@@ -44,6 +44,7 @@
#define SEQ_MIN(a, b) ((SEQ_LT(a, b)) ? (a) : (b))
#define SEQ_MAX(a, b) ((SEQ_GT(a, b)) ? (a) : (b))
+#define SEQ_DELTA(a,b) ((int)SEQ_MAX(a, b) - SEQ_MIN(a, b))
/* for modulo comparisons of timestamps */
#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
More information about the p4-projects
mailing list