PERFORCE change 134584 for review
Kip Macy
kmacy at FreeBSD.org
Thu Jan 31 22:46:01 PST 2008
http://perforce.freebsd.org/chv.cgi?CH=134584
Change 134584 by kmacy at kmacy:storage:toehead on 2008/02/01 06:45:34
- various fixes for ddp socket buffer accounting
- verbose logging in ddp path
Affected files ...
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#12 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#13 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#5 edit
.. //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_defs.h#5 edit
Differences ...
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#12 (text+ko) ====
@@ -578,7 +578,7 @@
* to the HW for the amount of data processed.
*/
void
-t3_cleanup_rbuf(struct tcpcb *tp)
+t3_cleanup_rbuf(struct tcpcb *tp, int copied)
{
struct toepcb *toep = tp->t_toe;
struct socket *so;
@@ -593,10 +593,14 @@
so = tp->t_inpcb->inp_socket;
SOCKBUF_LOCK(&so->so_rcv);
- read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc;
- toep->tp_copied_seq += read;
- toep->tp_enqueued_bytes -= read;
+ if (copied)
+ toep->tp_copied_seq += copied;
+ else {
+ read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc;
+ toep->tp_copied_seq += read;
+ }
credits = toep->tp_copied_seq - toep->tp_rcv_wup;
+ toep->tp_enqueued_bytes = so->so_rcv.sb_cc;
SOCKBUF_UNLOCK(&so->so_rcv);
if (credits > so->so_rcv.sb_mbmax)
@@ -686,7 +690,7 @@
cxgb_toe_rcvd(struct tcpcb *tp)
{
INP_LOCK_ASSERT(tp->t_inpcb);
- t3_cleanup_rbuf(tp);
+ t3_cleanup_rbuf(tp, 0);
return (0);
}
@@ -1742,6 +1746,7 @@
tp = toep->tp_tp;
m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
m->m_seq = tp->rcv_nxt;
tp->rcv_nxt += m->m_pkthdr.len;
tp->t_rcvtime = ticks;
@@ -1793,7 +1798,10 @@
TRACE_ENTER;
q = &toep->tp_ddp_state;
bsp = &q->buf_state[q->cur_buf];
- m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+ m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
+
+ printf("rcv_nxt=0x%x tp->rcv_next=0x%x len=%d\n",
+ rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len);
#ifdef T3_TRACE
if ((int)m->m_pkthdr.len < 0) {
@@ -1802,11 +1810,14 @@
#endif
m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
m->m_cur_offset = bsp->cur_offset;
m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
if (bsp->flags & DDP_BF_NOCOPY)
bsp->flags &= ~DDP_BF_NOCOPY;
+ printf("ddp flags=0x%x\n", m->m_ddp_flags);
+
m->m_seq = tp->rcv_nxt;
tp->rcv_nxt = rcv_nxt;
bsp->cur_offset += m->m_pkthdr.len;
@@ -1836,14 +1847,14 @@
if (__predict_false(so_no_receive(so))) {
handle_excess_rx(toep, m);
INP_UNLOCK(tp->t_inpcb);
+ TRACE_EXIT;
return;
}
- if (toep->tp_ulp_mode == ULP_MODE_TCPDDP && toep->tp_ddp_state.kbuf[0])
+ if (toep->tp_ulp_mode == ULP_MODE_TCPDDP)
handle_ddp_data(toep, m);
-
+
m->m_seq = ntohl(hdr->seq);
- m->m_ddp_flags = 0;
m->m_ulp_mode = 0; /* for iSCSI */
#if VALIDATE_SEQ
@@ -1974,7 +1985,7 @@
/*
* Overload to store old RCV_NXT
*/
- m->m_pkthdr.csum_data = tp->rcv_nxt;
+ m->m_seq = tp->rcv_nxt;
tp->rcv_nxt = rcv_nxt;
/*
@@ -1982,7 +1993,7 @@
* m->m_len here, we need to be very careful that nothing from now on
* interprets ->len of this packet the usual way.
*/
- m->m_len = tp->rcv_nxt - m->m_pkthdr.csum_data;
+ m->m_len = m->m_pkthdr.len = tp->rcv_nxt - m->m_seq;
/*
* Figure out where the new data was placed in the buffer and store it
@@ -1992,8 +2003,9 @@
end_offset = G_DDP_OFFSET(ddp_report) + ddp_len;
m->m_cur_offset = end_offset - m->m_pkthdr.len;
m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
bsp->cur_offset = end_offset;
-
+ toep->tp_enqueued_bytes += m->m_pkthdr.len;
/*
* Bit 0 of flags stores whether the DDP buffer is completed.
* Note that other parts of the code depend on this being in bit 0.
@@ -2001,25 +2013,30 @@
if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) {
panic("spurious ddp completion");
} else {
- m->m_pkthdr.csum_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
- if (m->m_pkthdr.csum_flags && !(bsp->flags & DDP_BF_NOFLIP))
+ m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE);
+ if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP))
q->cur_buf ^= 1; /* flip buffers */
}
if (bsp->flags & DDP_BF_NOCOPY) {
- m->m_pkthdr.csum_flags |= (bsp->flags & DDP_BF_NOCOPY);
+ m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY);
bsp->flags &= ~DDP_BF_NOCOPY;
}
if (ddp_report & F_DDP_PSH)
- m->m_pkthdr.csum_flags |= DDP_BF_PSH;
+ m->m_ddp_flags |= DDP_BF_PSH;
tp->t_rcvtime = ticks;
+
+ printf("ddp set and ddp_flags=0x%x len=%d m_seq=0x%x rcv_nxt=0x%x\n", m->m_ddp_flags, m->m_len, m->m_seq, rcv_nxt);
+
+ SOCKBUF_LOCK(&so->so_rcv);
sbappendstream_locked(&so->so_rcv, m);
if ((so->so_state & SS_NOFDREF) == 0)
sorwakeup_locked(so);
-
+ else
+ SOCKBUF_UNLOCK(&so->so_rcv);
TRACE_EXIT;
}
@@ -2077,7 +2094,7 @@
bsp = &q->buf_state[buf_idx];
when = bsp->cur_offset;
- m->m_len = G_DDP_OFFSET(ddp_report) - when;
+ m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when;
#ifdef T3_TRACE
T3_TRACE5(TIDTB(sk),
@@ -2100,6 +2117,7 @@
G_DDP_OFFSET(ddp_report));
#endif
m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
m->m_pkthdr.csum_flags = (bsp->flags & DDP_BF_NOCOPY) | 1;
if (bsp->flags & DDP_BF_NOCOPY)
bsp->flags &= ~DDP_BF_NOCOPY;
@@ -2193,6 +2211,7 @@
bsp = &q->buf_state[q->cur_buf];
m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt;
m->m_ddp_gl = (unsigned char *)bsp->gl;
+ m->m_flags |= M_DDP;
m->m_cur_offset = bsp->cur_offset;
m->m_ddp_flags =
DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1;
@@ -2925,7 +2944,8 @@
th.th_seq = req->rcv_isn;
th.th_flags = TH_SYN;
- toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn;
+ toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1;
+
inc.inc_isipv6 = 0;
inc.inc_len = 0;
@@ -3075,9 +3095,6 @@
newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && /* !sock_flag(sk, NO_DDP) && */
tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
- printf("ddp=%d rcv_wnd=%ld min_win=%d\n",
- TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN);
-
if (newtoep->tp_ulp_mode) {
ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
@@ -3085,6 +3102,9 @@
newtoep->tp_ulp_mode = 0;
}
+ printf("ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d\n",
+ TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode);
+
#endif
set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
@@ -3369,7 +3389,8 @@
tp->t_toe = toep;
reset_wr_list(toep);
tp->rcv_wnd = select_rcv_wnd(tdev, so);
- DPRINTF("rcv_wnd=%ld\n", tp->rcv_wnd);
+ tp->rcv_nxt = toep->tp_copied_seq;
+ printf("rcv_wnd=%ld rcv_nxt=0x%x\n", tp->rcv_wnd, tp->rcv_nxt);
install_offload_ops(so);
toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_cpl_socket.c#13 (text+ko) ====
@@ -303,7 +303,7 @@
}
err = uiomove(buf, min(len, curlen), uio);
if (err) {
- printf("uiomove_frombuf returned %d\n", err);
+ printf("uiomove returned %d\n", err);
return (err);
}
@@ -559,7 +559,7 @@
restart:
len = uio->uio_resid;
m = so->so_rcv.sb_mb;
- target = (flags & MSG_WAITALL) ? min(len, so->so_rcv.sb_hiwat) : so->so_rcv.sb_lowat;
+ target = (flags & MSG_WAITALL) ? len : so->so_rcv.sb_lowat;
p = &toep->tp_ddp_state;
user_ddp_ok = p->ubuf_ddp_ready;
p->cancel_ubuf = 0;
@@ -597,22 +597,27 @@
if (so->so_rcv.sb_mb && !user_ddp_pending) {
SOCKBUF_UNLOCK(&so->so_rcv);
INP_LOCK(inp);
- t3_cleanup_rbuf(tp);
+ t3_cleanup_rbuf(tp, copied_unacked);
INP_UNLOCK(inp);
SOCKBUF_LOCK(&so->so_rcv);
copied_unacked = 0;
goto restart;
}
- if (p->ubuf && user_ddp_ok && !user_ddp_pending &&
+ if (p->kbuf[0] && user_ddp_ok && !user_ddp_pending &&
uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
p->ubuf_ddp_ready) {
user_ddp_pending =
- !t3_overlay_ubuf(so, uio, IS_NONBLOCKING(so), flags);
+ !t3_overlay_ubuf(so, uio, 1, 1);
if (user_ddp_pending) {
p->kbuf_posted++;
user_ddp_ok = 0;
}
+ printf("user_ddp_pending=%d\n", user_ddp_pending);
}
+ if (p->kbuf[0] && (p->kbuf_posted == 0)) {
+ t3_post_kbuf(so, 1);
+ p->kbuf_posted++;
+ }
if (user_ddp_pending) {
/* One shot at DDP if we already have enough data */
if (copied >= target)
@@ -626,16 +631,15 @@
else {
SOCKBUF_UNLOCK(&so->so_rcv);
INP_LOCK(inp);
- t3_cleanup_rbuf(tp);
+ t3_cleanup_rbuf(tp, copied_unacked);
INP_UNLOCK(inp);
SOCKBUF_LOCK(&so->so_rcv);
copied_unacked = 0;
printf("sbwaiting 2\n");
-
if ((err = sbwait(&so->so_rcv)) != 0)
goto done;
}
- goto restart;
+ goto restart;
got_mbuf:
if (m->m_pkthdr.len == 0) {
if ((m->m_ddp_flags & DDP_BF_NOCOPY) == 0)
@@ -645,9 +649,9 @@
m = so->so_rcv.sb_mb = m_free(m);
goto done;
}
- offset = toep->tp_copied_seq + copied_unacked - m->m_seq + 1 /* OFF by one somewhere :-{ */;
- DPRINTF("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d\n",
- m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset);
+ offset = toep->tp_copied_seq + copied_unacked - m->m_seq;
+ printf("m=%p copied_seq=0x%x copied_unacked=%d m_seq=0x%x offset=%d pktlen=%d is_ddp(m)=%d\n",
+ m, toep->tp_copied_seq, copied_unacked, m->m_seq, offset, m->m_pkthdr.len, is_ddp(m));
if (offset >= m->m_pkthdr.len)
panic("t3_soreceive: OFFSET >= LEN offset %d copied_seq 0x%x seq 0x%x "
"pktlen %d ddp flags 0x%x", offset, toep->tp_copied_seq + copied_unacked, m->m_seq,
@@ -690,19 +694,20 @@
}
if (user_ddp_ok && !user_ddp_pending &&
- /*
- * XXX
- */
uio->uio_iov->iov_len > p->kbuf[0]->dgl_length &&
p->ubuf_ddp_ready) {
user_ddp_pending =
- !t3_overlay_ubuf(so, uio, (so->so_state & SS_NBIO), flags);
+ !t3_overlay_ubuf(so, uio, 1, 1);
if (user_ddp_pending) {
p->kbuf_posted++;
user_ddp_ok = 0;
}
- }
-
+ printf("user_ddp_pending=%d\n", user_ddp_pending);
+ } else
+ printf("user_ddp_ok=%d user_ddp_pending=%d iov_len=%ld dgl_length=%d ubuf_ddp_ready=%d ulp_mode=%d is_ddp(m)=%d flags=0x%x ubuf=%p kbuf_posted=%d\n",
+ user_ddp_ok, user_ddp_pending, uio->uio_iov->iov_len, p->kbuf[0] ? p->kbuf[0]->dgl_length : 0,
+ p->ubuf_ddp_ready, toep->tp_ulp_mode, !!is_ddp(m), m->m_ddp_flags, p->ubuf, p->kbuf_posted);
+
/*
* If MSG_TRUNC is specified the data is discarded.
* XXX need to check pr_atomic
@@ -739,7 +744,7 @@
unsigned int fl = m->m_ddp_flags;
int got_psh = 0;
- if (p->ubuf != NULL && is_ddp(m) && (fl & 1)) {
+ if (p->kbuf[0] != NULL && is_ddp(m) && (fl & 1)) {
if (is_ddp_psh(m) && user_ddp_pending)
got_psh = 1;
@@ -748,6 +753,7 @@
else {
p->kbuf_posted--;
p->ubuf_ddp_ready = 1;
+ printf("ubuf ddp ready\n");
}
}
@@ -756,6 +762,7 @@
}
if (len > 0)
goto restart;
+
done:
/*
@@ -780,6 +787,7 @@
"chelsio_recvmsg: about to exit, repost kbuf");
#endif
+ printf("posting kbuf\n");
t3_post_kbuf(so, 1);
p->kbuf_posted++;
} else if (so_should_ddp(toep, copied)
@@ -791,9 +799,7 @@
t3_enter_ddp(so, TOM_TUNABLE(TOE_DEV(so),
ddp_copy_limit), 0);
p->kbuf_posted = 1;
- } else
- printf("user_ddp_pending=%d kbuf[0]=%p kbuf_posted=%d so_should_ddp=%d\n",
- user_ddp_pending, p->kbuf[0], p->kbuf_posted, so_should_ddp(toep, copied));
+ }
}
#ifdef T3_TRACE
T3_TRACE5(TIDTB(so),
@@ -806,7 +812,7 @@
done_unlocked:
if (copied) {
INP_LOCK(inp);
- t3_cleanup_rbuf(tp);
+ t3_cleanup_rbuf(tp, copied_unacked);
INP_UNLOCK(inp);
}
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_ddp.c#5 (text+ko) ====
@@ -260,7 +260,8 @@
{
struct toepcb *toep = sototcpcb(so)->t_toe;
struct ddp_state *p = &toep->tp_ddp_state;
-
+ TRACE_ENTER;
+
p->buf_state[bufidx].cur_offset = p->kbuf[bufidx]->dgl_offset;
p->buf_state[bufidx].flags = p->kbuf_noinval ? DDP_BF_NOINVAL : 0;
p->buf_state[bufidx].gl = p->kbuf[bufidx];
@@ -282,6 +283,7 @@
V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1),
modulate);
+ TRACE_EXIT;
}
/*
@@ -423,13 +425,19 @@
struct ddp_state *p = &toep->tp_ddp_state;
struct ddp_buf_state *dbs;
- if (p->ubuf == NULL)
+
+ TRACE_ENTER;
+ if (p->kbuf[0] == NULL) {
+ TRACE_EXIT;
return (EINVAL);
-
+ }
+
err = setup_uio_ppods(so, uio, 0, &len);
- if (err)
+ if (err) {
+ TRACE_EXIT;
return (err);
-
+ }
+
ubuf_idx = p->kbuf_idx;
p->buf_state[ubuf_idx].flags = DDP_BF_NOFLIP;
/* Use existing offset */
@@ -467,6 +475,7 @@
" kbuf_idx %d",
p->ubuf_tag, flags, OVERLAY_MASK, ubuf_idx, p->kbuf_idx);
#endif
+ TRACE_EXIT;
return (0);
}
@@ -528,10 +537,11 @@
struct toepcb *toep = sototcpcb(so)->t_toe;
struct ddp_state *p = &toep->tp_ddp_state;
+ TRACE_ENTER;
t3_set_ddp_tag(so, p->cur_buf, p->kbuf_tag[p->cur_buf] << 6);
t3_set_ddp_buf(so, p->cur_buf, 0, p->kbuf[p->cur_buf]->dgl_length);
- t3_repost_kbuf(so, p->cur_buf, modulate, 1);
-
+ t3_repost_kbuf(so, p->cur_buf, modulate, (so->so_state & SS_NBIO));
+ TRACE_EXIT;
#ifdef T3_TRACE
T3_TRACE1(TIDTB(so),
"t3_post_kbuf: cur_buf = kbuf_idx = %u ", p->cur_buf);
@@ -545,7 +555,7 @@
int
t3_enter_ddp(struct socket *so, unsigned int kbuf_size, unsigned int waitall)
{
- int i, err = ENOMEM;
+ int i, nonblock, err = ENOMEM;
static vm_pindex_t color;
unsigned int nppods, kbuf_pages, idx = 0;
struct toepcb *toep = sototcpcb(so)->t_toe;
@@ -555,11 +565,12 @@
if (kbuf_size > M_TCB_RX_DDP_BUF0_LEN)
return (EINVAL);
+ nonblock = (so->so_state & SS_NBIO);
+
kbuf_pages = (kbuf_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
nppods = pages2ppods(kbuf_pages);
p->kbuf_noinval = !!waitall;
-
p->kbuf_tag[NUM_DDP_KBUF - 1] = -1;
for (idx = 0; idx < NUM_DDP_KBUF; idx++) {
p->kbuf[idx] =
@@ -603,10 +614,11 @@
}
t3_set_ddp_tag(so, 0, p->kbuf_tag[0] << 6);
t3_set_ddp_buf(so, 0, 0, p->kbuf[0]->dgl_length);
- t3_repost_kbuf(so, 0, 0, 1);
+ t3_repost_kbuf(so, 0, 0, nonblock);
t3_set_rcv_coalesce_enable(so,
TOM_TUNABLE(TOE_DEV(so), ddp_rcvcoalesce));
+ printf("ddp entered\n");
#ifdef T3_TRACE
T3_TRACE4(TIDTB(so),
"t3_enter_ddp: kbuf_size %u waitall %u tag0 %d tag1 %d",
@@ -624,20 +636,27 @@
int
t3_ddp_copy(const struct mbuf *m, int offset, struct uio *uio, int len)
{
- int page_off;
+ int page_off, resid_init, err;
struct ddp_gather_list *gl = (struct ddp_gather_list *)m->m_ddp_gl;
TRACE_ENTER;
+ resid_init = uio->uio_resid;
+
if (!gl->dgl_pages)
panic("pages not set\n");
offset += gl->dgl_offset + m->m_cur_offset;
page_off = offset & ~PAGE_MASK;
+ err = uiomove_fromphys(gl->dgl_pages, page_off, len, uio);
+ printf("err=%d resid_init=%d uio_resid=%d offset=%d len=%d\n",
+ err, resid_init, uio->uio_resid, offset, len);
+
TRACE_EXIT;
- return uiomove_fromphys(gl->dgl_pages, page_off, len, uio);
+ return (err);
}
+
/*
* Allocate n page pods. Returns -1 on failure or the page pod tag.
*/
==== //depot/projects/toehead/sys/dev/cxgb/ulp/tom/cxgb_defs.h#5 (text+ko) ====
@@ -61,7 +61,7 @@
void t3_init_wr_tab(unsigned int wr_len);
uint32_t t3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail);
void t3_send_rx_modulate(struct toepcb *toep);
-void t3_cleanup_rbuf(struct tcpcb *tp);
+void t3_cleanup_rbuf(struct tcpcb *tp, int copied);
void t3_init_socket_ops(void);
void t3_install_socket_ops(struct socket *so);
More information about the p4-projects
mailing list