PERFORCE change 128924 for review
Kip Macy
kmacy at FreeBSD.org
Sat Nov 10 21:47:32 PST 2007
http://perforce.freebsd.org/chv.cgi?CH=128924
Change 128924 by kmacy at kmacy:storage:toestack on 2007/11/11 05:47:24
fix ctloutput null pointer deref
fix qset / qset_idx / mtu_idx misusage
remove toe_mbuf definition and all references to it
add initial connection accept
Affected files ...
.. //depot/projects/toestack/sys/dev/cxgb/cxgb_l2t.h#8 edit
.. //depot/projects/toestack/sys/dev/cxgb/cxgb_offload.c#17 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/toecore/toedev.h#7 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#20 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#11 edit
Differences ...
==== //depot/projects/toestack/sys/dev/cxgb/cxgb_l2t.h#8 (text+ko) ====
@@ -108,9 +108,7 @@
static __inline void set_arp_failure_handler(struct mbuf *m,
arp_failure_handler_func hnd)
{
- struct toe_mbuf *tm = (struct toe_mbuf *)m;
-
- tm->m_toe.mt_arp_fail = (opaque_arp_failure_handler_func)hnd;
+ m->m_pkthdr.header = (opaque_arp_failure_handler_func)hnd;
}
==== //depot/projects/toestack/sys/dev/cxgb/cxgb_offload.c#17 (text+ko) ====
@@ -732,7 +732,7 @@
unsigned int hwtid;
struct toe_tid_entry *toe_tid;
- printf("do_hwtid_rpl m=%p\n", m);
+ printf("do_hwtid_rpl opcode=0x%x\n", p->opcode);
hwtid = G_TID(ntohl(p->opcode_tid));
toe_tid = lookup_tid(&(T3C_DATA (dev))->tid_maps, hwtid);
==== //depot/projects/toestack/sys/dev/cxgb/ulp/toecore/toedev.h#7 (text+ko) ====
@@ -163,32 +163,4 @@
}
#endif /* CONFIG_TCP_OFFLOAD */
-struct toepcb;
-
-struct m_toe_ {
- void (*mt_arp_fail)(void *, struct mbuf *);
- void (*mt_backlog_rcv)(struct toepcb *, struct mbuf *);
- int priority;
- struct toepcb *mt_toepcb;
-};
-
-#define TMLEN (MLEN - sizeof(struct m_toe_))
-
-struct toe_mbuf {
- struct m_hdr m_hdr;
- union {
- struct {
- struct pkthdr MH_pkthdr; /* M_PKTHDR set */
- union {
- struct m_ext_ MH_ext; /* M_EXT set */
- char MH_databuf[MHLEN];
- } MH_dat;
- struct m_toe_ MH_toe; /* M_TOE set */
- } MH;
- char M_databuf[TMLEN]; /* !M_PKTHDR, !M_EXT */
- } M_dat;
-};
-
-#define m_toe M_dat.MH.MH_toe
-
#endif /* _OFFLOAD_DEV_H_ */
==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#20 (text+ko) ====
@@ -209,7 +209,7 @@
if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) {
req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT |
- V_TX_CPU_IDX(toep->tp_qset_idx));
+ V_TX_CPU_IDX(toep->tp_qset));
/* Sendbuffer is in units of 32KB.
*/
@@ -547,6 +547,9 @@
struct tcpcb *tp = sototcpcb(so);
struct toepcb *toep = tp->t_toe;
+ if (toep == NULL)
+ return;
+
if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN))
return;
@@ -727,28 +730,23 @@
}
static unsigned int
-select_mss(struct socket *so, unsigned int pmtu)
+select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu)
{
unsigned int idx;
- struct tcpcb *tp = sototcpcb(so);
- struct tom_data *d;
- const struct t3c_data *td;
- struct toedev *toed;
-
- toed = TOE_DEV(so);
- d = TOM_DATA(toed);
- if (d == NULL)
- panic("tom_data not set");
- td = T3C_DATA(d->cdev);
#ifdef notyet
struct rtentry *dst = sotoinpcb(so)->inp_route.ro_rt;
#endif
- tp->t_maxseg = pmtu - 40;
- if (tp->t_maxseg < td->mtus[0] - 40)
- tp->t_maxseg = td->mtus[0] - 40;
- idx = find_best_mtu(td, tp->t_maxseg + 40);
- tp->t_maxseg = td->mtus[idx] - 40;
+ if (tp) {
+ tp->t_maxseg = pmtu - 40;
+ if (tp->t_maxseg < td->mtus[0] - 40)
+ tp->t_maxseg = td->mtus[0] - 40;
+ idx = find_best_mtu(td, tp->t_maxseg + 40);
+
+ tp->t_maxseg = td->mtus[idx] - 40;
+ } else
+ idx = find_best_mtu(td, pmtu);
+
return (idx);
}
@@ -894,7 +892,8 @@
{
struct tcpcb *tp = sototcpcb(so);
struct toepcb *toep;
-
+ struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev);
+
toep = malloc(sizeof(struct toepcb), M_DEVBUF, M_NOWAIT);
if (toep == NULL)
@@ -915,7 +914,7 @@
toep->tp_wr_unacked = 0;
toep->tp_delack_mode = 0;
- toep->tp_mtu_idx = select_mss(so, dst->rt_ifp->if_mtu);
+ toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu);
tp->rcv_wnd = select_rcv_wnd(so);
toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) &&
@@ -931,21 +930,14 @@
* The next two functions calculate the option 0 value for a socket.
*/
static inline unsigned int
-calc_opt0h(struct socket *so)
+calc_opt0h(struct socket *so, int mtu_idx)
{
struct tcpcb *tp = sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
int wscale = select_rcv_wscale(tp->rcv_wnd);
- int qset_idx;
- if (toep)
- qset_idx = toep->tp_qset_idx;
- else
- qset_idx = 0;
-
return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
V_KEEP_ALIVE((so->so_options & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
- V_WND_SCALE(wscale) | V_MSS_IDX(qset_idx);
+ V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx);
}
static inline unsigned int
@@ -972,7 +964,7 @@
#endif
static void
-mk_act_open_req(struct socket *so, struct toe_mbuf *m,
+mk_act_open_req(struct socket *so, struct mbuf *m,
unsigned int atid, const struct l2t_entry *e)
{
struct cpl_act_open_req *req;
@@ -993,7 +985,7 @@
memcpy(&req->local_ip, &inp->inp_laddr, 4);
memcpy(&req->peer_ip, &inp->inp_faddr, 4);
- req->opt0h = htonl(calc_opt0h(so) | V_L2T_IDX(e->idx) |
+ req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) |
V_TX_CHANNEL(e->smt_idx));
req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
req->params = 0;
@@ -1128,7 +1120,7 @@
t3_connect(struct toedev *tdev, struct socket *so,
struct ifnet *egress_ifp)
{
- struct toe_mbuf *m;
+ struct mbuf *m;
struct l2t_entry *e;
struct tom_data *d = TOM_DATA(tdev);
struct inpcb *inp = sotoinpcb(so);
@@ -1145,7 +1137,7 @@
if (!e)
goto free_tid;
- m = (struct toe_mbuf *)m_gethdr(MT_DATA, M_WAITOK);
+ m = m_gethdr(MT_DATA, M_WAITOK);
m_set_toep(m, tp->t_toe);
#if 0
@@ -1307,10 +1299,8 @@
static int
t3_ctloutput(struct socket *so, struct sockopt *sopt)
{
- struct tcpcb *tp = sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
int err;
-
+
if (sopt->sopt_level != IPPROTO_TCP)
err = t3_ip_ctloutput(so, sopt);
else
@@ -1319,9 +1309,7 @@
if (err != EOPNOTSUPP)
return (err);
- printf("calling %p\n", toep->tp_ctloutput);
-
- return toep->tp_ctloutput(so, sopt);
+ return tcp_ctloutput(so, sopt);
}
/*
@@ -1404,6 +1392,8 @@
{
struct toepcb *toep = (struct toepcb *)ctx;
+ printf("rx_data len=%d\n", m->m_pkthdr.len);
+
new_rx_data(toep, m);
return (0);
@@ -1529,10 +1519,8 @@
tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */
-#if 0
if (!is_t3a(TOE_DEV(so)) && (toep->tp_flags & TP_ABORT_RPL_PENDING))
goto out;
-#endif
switch (tp->t_state) {
case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */
@@ -1577,9 +1565,7 @@
TOE_DEV(so)->name, toep->tp_tid,
tp->t_state);
}
-#if 0
out:
-#endif
m_free(m);
}
@@ -2149,17 +2135,21 @@
/*
* Fill out information for entering us into the syncache
*/
- th.th_sport = req->peer_port;
- th.th_dport = req->local_port;
- th.th_seq = req->rcv_isn;
+ inc.inc_fport = th.th_sport = req->peer_port;
+ inc.inc_lport = th.th_dport = req->local_port;
+ toep->tp_iss = th.th_seq = req->rcv_isn;
th.th_flags = TH_SYN;
inc.inc_isipv6 = 0;
inc.inc_len = 0;
- memcpy(&inc.inc_faddr, &req->peer_ip, 4);
- memcpy(&inc.inc_laddr, &req->local_ip, 4);
+ inc.inc_faddr.s_addr = req->peer_ip;
+ inc.inc_laddr.s_addr = req->local_ip;
inc.inc_ext = toep;
inc.inc_eh = handle_syncache_event;
+
+ printf("syncache add of %d:%d %d:%d\n",
+ ntohl(req->local_ip), ntohs(req->local_port),
+ ntohl(req->peer_ip), ntohs(req->peer_port));
mss = req->tcp_options.mss;
wsf = req->tcp_options.wsf;
@@ -2197,6 +2187,7 @@
struct toepcb *newtoep;
struct rtentry *dst;
struct sockaddr_in nam;
+ struct t3c_data *td = T3C_DATA(cdev);
reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
if (__predict_false(reply_mbuf == NULL)) {
@@ -2279,7 +2270,8 @@
newtoep->tp_tp = tp;
newtoep->tp_flags = TP_SYN_RCVD;
newtoep->tp_tid = tid;
-
+ newtoep->tp_toedev = tdev;
+
printf("inserting tid=%d\n", tid);
cxgb_insert_tid(cdev, d->client, newtoep, tid);
@@ -2303,17 +2295,14 @@
rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten
- printf("e=%p idxs:\n", e);
- printf("e->idx=%d e->smt_idx=%d\n", e->idx, e->smt_idx);
- rpl->opt0h = htonl(calc_opt0h(so) | V_L2T_IDX(e->idx) |
+ rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | V_L2T_IDX(e->idx) |
V_TX_CHANNEL(e->smt_idx));
rpl->opt0l_status = htonl(calc_opt0l(so, lctx->ulp_mode) |
CPL_PASS_OPEN_ACCEPT);
rpl->opt2 = htonl(calc_opt2(so, tdev));
rpl->rsvd = rpl->opt2; /* workaround for HW bug */
m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, so));
- printf("sending off reply\n");
l2t_send(cdev, reply_mbuf, e);
m_free(m);
@@ -2391,44 +2380,6 @@
}
/*
- * Add a passively open socket to its parent's accept queue. Note that the
- * child may be in any state by now, including TCP_CLOSE. We can guarantee
- * though that it has not been orphaned yet.
- */
-static void
-add_pass_open_to_parent(struct socket *child, struct socket *lso,
- struct toedev *dev)
-{
- struct tcpcb *tp = sototcpcb(lso);
- /*
- * If the server is closed it has already killed its embryonic
- * children. There is nothing further to do about child.
- */
- if (tp->t_state != TCPS_LISTEN)
- return;
-
- printf("need to move connection from syncache to so_comp for accept XXX\n");
- UNIMPLEMENTED();
-
-#ifdef notyet
- oreq = child->sk_user_data;
- child->sk_user_data = NULL;
-
- inet_csk_reqsk_queue_removed(lsk, oreq);
- synq_remove(tcp_sk(child));
-
- if (sk_acceptq_is_full(lsk) && !TOM_TUNABLE(dev, soft_backlog_limit)) {
- NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
- NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
- __reqsk_free(oreq);
- add_to_reap_list(child);
- } else {
- inet_csk_reqsk_queue_add(lsk, oreq, child);
- lsk->sk_data_ready(lsk, 0);
- }
-#endif
-}
-/*
* Called when a connection is established to translate the TCP options
* reported by HW to Linux's native format.
*/
@@ -2461,7 +2412,6 @@
toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn;
assign_rxopt(so, opt);
- toep->tp_ctloutput = so->so_proto->pr_ctloutput;
so->so_proto->pr_ctloutput = t3_ctloutput;
#if 0
@@ -2488,6 +2438,59 @@
tp->t_state = TCPS_ESTABLISHED;
}
+static int
+syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep)
+{
+
+ struct in_conninfo inc;
+ struct tcpopt to;
+ struct tcphdr th;
+ int mss, wsf, sack, ts;
+ struct mbuf *m = NULL;
+ const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev);
+ unsigned int opt;
+
+#ifdef MAC
+#error "no MAC support"
+#endif
+
+ opt = ntohs(req->tcp_opt);
+
+ bzero(&to, sizeof(struct tcpopt));
+
+ /*
+ * Fill out information for entering us into the syncache
+ */
+ inc.inc_fport = th.th_sport = req->peer_port;
+ inc.inc_lport = th.th_dport = req->local_port;
+ th.th_seq = req->rcv_isn;
+ th.th_flags = TH_ACK;
+
+ inc.inc_isipv6 = 0;
+ inc.inc_len = 0;
+ inc.inc_faddr.s_addr = req->peer_ip;
+ inc.inc_laddr.s_addr = req->local_ip;
+
+ inc.inc_ext = toep;
+ inc.inc_eh = handle_syncache_event;
+
+ mss = td->mtus[G_TCPOPT_MSS(opt)] - 40;
+ wsf = G_TCPOPT_WSCALE_OK(opt);
+ ts = G_TCPOPT_TSTAMP(opt);
+ sack = G_TCPOPT_SACK(opt);
+
+ to.to_mss = mss;
+ to.to_wscale = G_TCPOPT_SND_WSCALE(opt);
+ to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
+
+ printf("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n",
+ ntohl(req->local_ip), ntohs(req->local_port),
+ ntohl(req->peer_ip), ntohs(req->peer_port),
+ mss, wsf, ts, sack);
+ return syncache_expand(&inc, &to, &th, so, m);
+}
+
+
/*
* Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work
* if we are in TCP_SYN_RECV due to crossed SYNs
@@ -2497,23 +2500,50 @@
{
struct cpl_pass_establish *req = cplhdr(m);
struct toepcb *toep = (struct toepcb *)ctx;
+ struct tcpcb *tp;
struct socket *so, *lso;
+ struct t3c_data *td = T3C_DATA(cdev);
// Complete socket initialization now that we have the SND_ISN
struct toedev *tdev;
- struct toe_tid_entry *t3c_stid;
- struct tid_info *t;
- unsigned int stid;
- lso = toeptoso(toep);
- tdev = TOE_DEV(lso);
+ so = lso = toeptoso(toep);
+ tdev = toep->tp_toedev;
- SOCK_LOCK(lso);
+ INP_INFO_WLOCK(&tcbinfo);
+ if (!syncache_expand_establish_req(req, &so, toep)) {
+ /*
+ * No entry
+ */
+ UNIMPLEMENTED();
+ }
+ if (so == NULL) {
+ /*
+ * Couldn't create the socket
+ */
+ UNIMPLEMENTED();
+ }
+
+ tp = sototcpcb(so);
+ toep->tp_tp = tp;
+ tp->t_toe = toep;
+ reset_wr_list(tp);
+ tp->rcv_wnd = select_rcv_wnd(so);
+ install_offload_ops(so);
+
toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
toep->tp_wr_unacked = 0;
toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
+ toep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so->so_options & SO_NO_DDP) &&
+ tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0;
+ toep->tp_qset_idx = 0;
+ toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu);
+
make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt));
+ INP_INFO_WUNLOCK(&tcbinfo);
+ soisconnected(so);
+
#ifdef notyet
/*
* XXX not sure how these checks map to us
@@ -2539,22 +2569,9 @@
goto unlock;
}
#endif
+ m_free(m);
- stid = G_PASS_OPEN_TID(ntohl(req->tos_tid));
- t = &(T3C_DATA(cdev))->tid_maps;
- t3c_stid = lookup_stid(t, stid);
- lso = ((struct listen_ctx *)t3c_stid->ctx)->lso;
-
- SOCK_LOCK(lso);
- m_free(m);
- add_pass_open_to_parent(so, lso, tdev);
- SOCK_UNLOCK(lso);
-#if 0
-unlock:
-#endif
- SOCK_UNLOCK(lso);
-
- return 0;
+ return (0);
}
/*
@@ -2876,6 +2893,7 @@
tcphdr_skb->h.raw = tcphdr_skb->data;
memset(tcphdr_skb->data, 0, tcphdr_skb->len);
#endif
+
t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish);
t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl);
==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#11 (text+ko) ====
@@ -153,6 +153,7 @@
int tp_qset;
int tp_flags;
int tp_enqueued_bytes;
+ tcp_seq tp_iss;
tcp_seq tp_delack_seq;
tcp_seq tp_rcv_wup;
tcp_seq tp_copied_seq;
More information about the p4-projects
mailing list