PERFORCE change 129530 for review
Kip Macy
kmacy at FreeBSD.org
Sun Nov 25 16:03:30 PST 2007
http://perforce.freebsd.org/chv.cgi?CH=129530
Change 129530 by kmacy at kmacy:storage:toestack on 2007/11/26 00:03:19
- add a bunch of missed wakeups
- set socket as not being able to receive more data when it receives a fin
so that the blocking read will return 0
- add inpcb locking for serializing toepcb access
- add some debugging printfs for current edge cases
Affected files ...
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#25 edit
Differences ...
==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#25 (text+ko) ====
@@ -368,18 +368,28 @@
struct mbuf *m;
struct cpl_close_con_req *req;
struct tom_data *d;
- struct tcpcb *tp = sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
- unsigned int tid = toep->tp_tid;
+ struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp;
+ struct toepcb *toep;
+ unsigned int tid;
+
+
+ INP_LOCK(inp);
+ tp = sototcpcb(so);
+ toep = tp->t_toe;
- d = TOM_DATA(toep->tp_toedev);
-
if (tp->t_state != TCPS_SYN_SENT)
t3_push_frames(so, 1);
- if (toep->tp_flags & TP_FIN_SENT)
+ if (toep->tp_flags & TP_FIN_SENT) {
+ INP_UNLOCK(inp);
return;
-
+ }
+
+ tid = toep->tp_tid;
+
+ d = TOM_DATA(toep->tp_toedev);
+
m = m_gethdr_nofail(sizeof(*req));
toep->tp_flags |= TP_FIN_SENT;
@@ -389,7 +399,7 @@
req->wr.wr_lo = htonl(V_WR_TID(tid));
OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
req->rsvd = htonl(toep->tp_write_seq);
-
+ INP_UNLOCK(inp);
/*
* XXX - need to defer shutdown while there is still data in the queue
*
@@ -459,15 +469,15 @@
if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) ||
(tp->t_state == TCPS_FIN_WAIT_2)))
return;
-
so = tp->t_inpcb->inp_socket;
+ SOCKBUF_LOCK(&so->so_rcv);
read = toep->tp_enqueued_bytes - so->so_rcv.sb_cc;
toep->tp_copied_seq += read;
toep->tp_enqueued_bytes -= read;
credits = toep->tp_copied_seq - toep->tp_rcv_wup;
DPRINTF("copied_seq=%u rcv_wup=%u credits=%u\n",
toep->tp_copied_seq, toep->tp_rcv_wup, credits);
-
+ SOCKBUF_UNLOCK(&so->so_rcv);
/*
* XXX this won't accurately reflect credit return - we need
* to look at the difference between the amount that has been
@@ -512,7 +522,7 @@
struct socket *so;
printf("cxgb_toe_disconnect\n");
-
+
so = tp->t_inpcb->inp_socket;
close_conn(so);
return (0);
@@ -544,6 +554,7 @@
static int
cxgb_toe_rcvd(struct tcpcb *tp)
{
+ INP_LOCK_ASSERT(tp->t_inpcb);
t3_cleanup_rbuf(tp);
return (0);
@@ -564,7 +575,7 @@
struct cpl_set_tcb_field *req;
struct tcpcb *tp = sototcpcb(so);
struct toepcb *toep = tp->t_toe;
-
+
req = mtod(m, struct cpl_set_tcb_field *);
m->m_pkthdr.len = m->m_len = sizeof(*req);
req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
@@ -588,7 +599,7 @@
if (toep == NULL)
return;
-
+
if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN))
return;
@@ -728,6 +739,7 @@
if (!m)
return (ENOMEM);
+ INP_LOCK_ASSERT(tp->t_inpcb);
m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, so));
req = mtod(m, struct cpl_get_tcb *);
m->m_pkthdr.len = m->m_len = sizeof(*req);
@@ -855,10 +867,6 @@
cxgb_remove_tid(cdev, (void *)so, tid);
toepcb_release(toep);
}
-#ifdef notyet
- t3_set_ca_ops(so, &tcp_init_congestion_ops);
-#endif
- TOE_DEV(so) = NULL;
#if 0
log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state);
#endif
@@ -936,16 +944,11 @@
struct toepcb *toep;
struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev);
- toep = malloc(sizeof(struct toepcb), M_DEVBUF, M_NOWAIT);
-
+ toep = toepcb_alloc();
if (toep == NULL)
return (ENOMEM);
printf("initializing offload socket\n");
-
- toepcb_init(toep);
- toepcb_hold(toep);
-
tp->t_toe = toep;
toep->tp_toedev = dev;
@@ -1309,7 +1312,7 @@
tp = sototcpcb(so);
if ((err = t3_set_cong_control(so, name)) == 0)
- tp->t_cong_control = strdup(name, M_DEVBUF);
+ tp->t_cong_control = strdup(name, M_CXGB);
else
return (err);
} else {
@@ -1370,6 +1373,8 @@
struct socket *so = toeptoso(toep);
int len = be16toh(hdr->len);
+ INP_LOCK(tp->t_inpcb);
+
#ifdef notyet
if (__predict_false(sk_no_receive(sk))) {
handle_excess_rx(so, skb);
@@ -1424,11 +1429,18 @@
"new_rx_data: seq 0x%x len %u",
TCP_SKB_CB(skb)->seq, skb->len);
#endif
+ SOCKBUF_LOCK(&so->so_rcv);
+ if (sb_notify(&so->so_rcv))
+ printf("rx_data so=%p flags=0x%x len=%d\n", so, so->so_rcv.sb_flags, m->m_pkthdr.len);
+
+
+ sbappendstream_locked(&so->so_rcv, m);
+ INP_UNLOCK(tp->t_inpcb);
- sbappend(&so->so_rcv, m);
-
if (__predict_true((so->so_state & SS_NOFDREF) == 0))
- sorwakeup(so);
+ sorwakeup_locked(so);
+ else
+ SOCKBUF_UNLOCK(&so->so_rcv);
}
/*
@@ -1686,13 +1698,18 @@
struct toepcb *toep = tp->t_toe;
int keep = 0, dead = (so->so_state & SS_NOFDREF);
+ printf("do_peer_fin state=%d dead=%d\n", tp->t_state, !!dead);
+
#ifdef T3_TRACE
T3_TRACE0(TIDTB(sk),"do_peer_fin:");
#endif
- if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING))
+ if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) {
+ printf("abort_pending set\n");
+
goto out;
-
+ }
+
#ifdef notyet
if (ULP_MODE(tp) == ULP_MODE_TCPDDP) {
keep = handle_peer_close_data(so, skb);
@@ -1702,8 +1719,12 @@
sk->sk_shutdown |= RCV_SHUTDOWN;
sock_set_flag(so, SOCK_DONE);
#endif
+ if (TCPS_HAVERCVDFIN(tp->t_state) == 0)
+ socantrcvmore(so);
switch (tp->t_state) {
case TCPS_SYN_RECEIVED:
+ tp->t_starttime = ticks;
+ /* FALLTHROUGH */
case TCPS_ESTABLISHED:
tp->t_state = TCPS_CLOSE_WAIT;
break;
@@ -1737,6 +1758,11 @@
}
if (!dead) {
+ printf("waking up waiters on %p rcv_notify=%d flags=0x%x\n", so, sb_notify(&so->so_rcv), so->so_rcv.sb_flags);
+
+ sorwakeup(so);
+ sowwakeup(so);
+ wakeup(&so->so_timeo);
#ifdef notyet
sk->sk_state_change(sk);
@@ -1778,6 +1804,8 @@
tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
+ printf("process_close_con_rpl(%p) state=%d dead=%d\n", so, tp->t_state,
+ !!(so->so_state & SS_NOFDREF));
if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING))
goto out;
@@ -1787,9 +1815,10 @@
if (toep->tp_flags & TP_ABORT_RPL_PENDING) {
INP_INFO_WLOCK(&tcbinfo);
INP_LOCK(inp);
- tcp_close(tp);
+ tp = tcp_close(tp);
INP_INFO_WUNLOCK(&tcbinfo);
- INP_UNLOCK(inp);
+ if (tp)
+ INP_UNLOCK(inp);
} else
enter_timewait(so);
break;
@@ -1802,9 +1831,10 @@
t3_release_offload_resources(so);
INP_INFO_WLOCK(&tcbinfo);
INP_LOCK(inp);
- tcp_close(tp);
+ tp = tcp_close(tp);
INP_INFO_WUNLOCK(&tcbinfo);
- INP_UNLOCK(inp);
+ if (tp)
+ INP_UNLOCK(inp);
break;
case TCPS_FIN_WAIT_1:
@@ -1817,7 +1847,7 @@
*/
sowwakeup(so);
sorwakeup(so);
-
+ wakeup(&so->so_timeo);
} else
printf("FIN_WAIT1 shutdown handling incomplete\n");
@@ -1881,9 +1911,9 @@
!is_t3a(TOE_DEV(so))) {
if (toep->tp_flags & TP_ABORT_REQ_RCVD)
panic("TP_ABORT_REQ_RCVD set");
- t3_release_offload_resources(so);
INP_INFO_WLOCK(&tcbinfo);
INP_LOCK(tp->t_inpcb);
+ t3_release_offload_resources(so);
tcp_close(tp);
INP_INFO_WUNLOCK(&tcbinfo);
INP_UNLOCK(tp->t_inpcb);
@@ -2057,9 +2087,9 @@
*/
if (__predict_false(parenttp->t_state == TCPS_LISTEN)) {
cleanup_syn_rcv_conn(child, parent);
- t3_release_offload_resources(child);
INP_INFO_WLOCK(&tcbinfo);
INP_LOCK(inp);
+ t3_release_offload_resources(child);
tcp_close(childtp);
INP_INFO_WUNLOCK(&tcbinfo);
INP_UNLOCK(inp);
@@ -2136,14 +2166,14 @@
#endif
/*
* SYN_RECV needs special processing. If abort_syn_rcv()
- * returns 0 is has taken care of the abort.2
+ * returns 0 is has taken care of the abort.
*/
if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m))
return;
- t3_release_offload_resources(so);
INP_INFO_WLOCK(&tcbinfo);
INP_LOCK(tp->t_inpcb);
+ t3_release_offload_resources(so);
tcp_close(tp);
INP_INFO_WUNLOCK(&tcbinfo);
INP_UNLOCK(tp->t_inpcb);
@@ -2385,6 +2415,7 @@
* entry already exists - free toepcb
* and l2t
*/
+ printf("syncache entry present\n");
toepcb_release(toep);
break;
case SC_DROP:
@@ -2393,6 +2424,7 @@
* either it timed out, or it was evicted
* we need to explicitly release the tid
*/
+ printf("syncache entry dropped\n");
toepcb_release(toep);
break;
default:
@@ -2990,6 +3022,8 @@
struct tom_data *d = TOM_DATA(tdev);
struct tcpcb *tp = sototcpcb(so);
struct toepcb *toep = tp->t_toe;
+
+ INP_LOCK(tp->t_inpcb);
/*
* It's OK if the TID is currently in use, the owning socket may have
@@ -3002,6 +3036,7 @@
toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
socket_act_establish(so, m);
+ INP_UNLOCK(tp->t_inpcb);
return 0;
}
@@ -3020,6 +3055,8 @@
int bytes = 0;
DPRINTF("wr_ack: snd_una=%u credits=%d\n", snd_una, credits);
+
+ INP_LOCK(tp->t_inpcb);
toep->tp_wr_avail += credits;
if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail)
@@ -3090,14 +3127,16 @@
}
if (bytes) {
DPRINTF("sbdrop(%d)\n", bytes);
-
- sbdrop(&so->so_snd, bytes);
+ SOCKBUF_LOCK(&so->so_snd);
+ sbdrop_locked(&so->so_snd, bytes);
+ sowwakeup_locked(so);
}
if (so->so_snd.sb_sndptroff < so->so_snd.sb_cc)
t3_push_frames(so, 0);
out_free:
+ INP_UNLOCK(tp->t_inpcb);
m_free(m);
}
More information about the p4-projects
mailing list