PERFORCE change 128854 for review
Kip Macy
kmacy at FreeBSD.org
Thu Nov 8 21:39:15 PST 2007
http://perforce.freebsd.org/chv.cgi?CH=128854
Change 128854 by kmacy at kmacy:storage:toestack on 2007/11/09 05:38:46
add infrastructure to send SYN-ACK to establish connection
as well as tie into syncache
Affected files ...
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#19 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_defs.h#10 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#3 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.c#10 edit
.. //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#10 edit
Differences ...
==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c#19 (text+ko) ====
@@ -39,6 +39,7 @@
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/socket.h>
+#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
@@ -57,11 +58,11 @@
#include <dev/cxgb/sys/mbufq.h>
#include <netinet/ip.h>
-#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_ofld.h>
#include <netinet/tcp_seq.h>
+#include <netinet/tcp_syncache.h>
#include <net/route.h>
@@ -703,7 +704,8 @@
{
struct toepcb *toep = sototoep(so);
toepcb_hold(toep);
- cxgb_insert_tid(d->cdev, d->client, so, tid);
+
+ cxgb_insert_tid(d->cdev, d->client, toep, tid);
}
/**
@@ -934,26 +936,30 @@
struct tcpcb *tp = sototcpcb(so);
struct toepcb *toep = tp->t_toe;
int wscale = select_rcv_wscale(tp->rcv_wnd);
+ int qset_idx;
+
+ if (toep)
+ qset_idx = toep->tp_qset_idx;
+ else
+ qset_idx = 0;
return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) |
V_KEEP_ALIVE((so->so_options & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS |
- V_WND_SCALE(wscale) | V_MSS_IDX(toep->tp_qset_idx);
+ V_WND_SCALE(wscale) | V_MSS_IDX(qset_idx);
}
static inline unsigned int
-calc_opt0l(struct socket *so)
+calc_opt0l(struct socket *so, int ulp_mode)
{
struct tcpcb *tp = sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
- return V_TOS(SO_TOS(so)) | V_ULP_MODE(toep->tp_ulp_mode) |
+ return V_TOS(SO_TOS(so)) | V_ULP_MODE(ulp_mode) |
V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ));
}
static inline unsigned int
-calc_opt2(const struct socket *so)
+calc_opt2(const struct socket *so, struct toedev *dev)
{
- struct toedev *dev = TOE_DEV(so);
int flv_valid;
flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1);
@@ -971,6 +977,9 @@
{
struct cpl_act_open_req *req;
struct inpcb *inp = sotoinpcb(so);
+ struct tcpcb *tp = intotcpcb(inp);
+ struct toepcb *toep = tp->t_toe;
+ struct toedev *tdev = TOE_DEV(so);
m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, so));
@@ -986,9 +995,9 @@
req->opt0h = htonl(calc_opt0h(so) | V_L2T_IDX(e->idx) |
V_TX_CHANNEL(e->smt_idx));
- req->opt0l = htonl(calc_opt0l(so));
+ req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode));
req->params = 0;
- req->opt2 = htonl(calc_opt2(so));
+ req->opt2 = htonl(calc_opt2(so, tdev));
}
@@ -1319,11 +1328,11 @@
* Process new data received for a connection.
*/
static void
-new_rx_data(struct socket *so, struct mbuf *m)
+new_rx_data(struct toepcb *toep, struct mbuf *m)
{
struct cpl_rx_data *hdr = cplhdr(m);
- struct tcpcb *tp = sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
+ struct tcpcb *tp = toep->tp_tp;
+ struct socket *so = toeptoso(toep);
int len = be16toh(hdr->len);
#ifdef notyet
@@ -1393,11 +1402,9 @@
static int
do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx)
{
- struct socket *so = (struct socket *)ctx;
+ struct toepcb *toep = (struct toepcb *)ctx;
- VALIDATE_SOCK(so);
-
- new_rx_data(so, m);
+ new_rx_data(toep, m);
return (0);
}
@@ -1885,15 +1892,21 @@
do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx)
{
const struct cpl_abort_req_rss *req = cplhdr(m);
- struct socket *so = (struct socket *)ctx;
- struct tcpcb *tp = sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
+ struct toepcb *toep = (struct toepcb *)ctx;
+ struct socket *so;
if (is_neg_adv_abort(req->status)) {
m_free(m);
return (0);
}
-
+ printf("aborting tid=%d\n", toep->tp_tid);
+
+ if (toep->tp_flags & TP_SYN_RCVD) {
+ printf("abort for unestablished connection :-(\n");
+ return (0);
+ }
+
+ so = toeptoso(toep);
VALIDATE_SOCK(so);
toepcb_hold(toep);
process_abort_req(so, m, TOE_DEV(so));
@@ -1975,6 +1988,7 @@
* Create a new socket as a child of the listening socket 'lsk' and initialize
* with the information in the supplied PASS_ACCEPT_REQ message.
*/
+#ifdef notyet
static struct socket *
mk_pass_sock(struct socket *lso, struct toedev *dev, int tid,
struct cpl_pass_accept_req *req)
@@ -2057,6 +2071,7 @@
#endif
return NULL;
}
+#endif
/*
* Populate a reject CPL_PASS_ACCEPT_RPL WR.
@@ -2092,6 +2107,74 @@
m_free(m);
}
+static void
+handle_syncache_event(int event, void *arg)
+{
+ struct toepcb *toep = arg;
+
+ switch (event) {
+ case SC_ENTRY_PRESENT:
+ /*
+ * entry already exists - free toepcb
+ * and l2t
+ */
+ toepcb_release(toep);
+ break;
+ case SC_DROP:
+ /*
+ * The syncache has given up on this entry
+ * either it timed out, or it was evicted
+ * we need to explicitly release the tid
+ */
+ toepcb_release(toep);
+ break;
+ default:
+ log(LOG_ERR, "unknown syncache event %d\n", event);
+ break;
+ }
+}
+
+static void
+syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep)
+{
+ struct in_conninfo inc;
+ struct tcpopt to;
+ struct tcphdr th;
+ struct inpcb *inp;
+ int mss, wsf, sack, ts;
+
+ bzero(&to, sizeof(struct tcpopt));
+ inp = sotoinpcb(lso);
+
+ /*
+ * Fill out information for entering us into the syncache
+ */
+ th.th_sport = req->peer_port;
+ th.th_dport = req->local_port;
+ th.th_seq = req->rcv_isn;
+ th.th_flags = TH_SYN;
+
+ inc.inc_isipv6 = 0;
+ inc.inc_len = 0;
+ memcpy(&inc.inc_faddr, &req->peer_ip, 4);
+ memcpy(&inc.inc_laddr, &req->local_ip, 4);
+ inc.inc_ext = toep;
+ inc.inc_eh = handle_syncache_event;
+
+ mss = req->tcp_options.mss;
+ wsf = req->tcp_options.wsf;
+ ts = req->tcp_options.tstamp;
+ sack = req->tcp_options.sack;
+ to.to_mss = mss;
+ to.to_wscale = wsf;
+ to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0);
+
+ INP_INFO_WLOCK(&tcbinfo);
+ INP_LOCK(inp);
+ syncache_add(&inc, &to, &th, inp, &lso, NULL);
+}
+
+
/*
* Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket
* lock held. Note that the sock here is a listening socket that is not owned
@@ -2102,7 +2185,6 @@
struct listen_ctx *lctx)
{
int rt_flags;
- struct socket *newso;
struct l2t_entry *e;
struct iff_mac tim;
struct mbuf *reply_mbuf, *ddp_mbuf = NULL;
@@ -2111,26 +2193,37 @@
unsigned int tid = GET_TID(req);
struct tom_data *d = TOM_DATA(tdev);
struct t3cdev *cdev = d->cdev;
- struct tcpcb *newtp, *tp = sototcpcb(so);
- struct toepcb *toep, *newtoep;
-
+ struct tcpcb *tp = sototcpcb(so);
+ struct toepcb *newtoep;
+ struct rtentry *dst;
+ struct sockaddr_in nam;
+
reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
- if (__predict_false(!reply_mbuf)) {
+ if (__predict_false(reply_mbuf == NULL)) {
if (tdev->ttid == TOE_ID_CHELSIO_T3)
t3_defer_reply(m, tdev, reject_pass_request);
else {
cxgb_queue_tid_release(cdev, tid);
m_free(m);
}
+ printf("failed to get reply_mbuf\n");
+
goto out;
}
- if (tp->t_state != TCPS_LISTEN)
+ if (tp->t_state != TCPS_LISTEN) {
+ printf("socket not in listen state\n");
+
goto reject;
+ }
+
tim.mac_addr = req->dst_mac;
tim.vlan_tag = ntohs(req->vlan_tag);
- if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev)
+ if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) {
+ printf("rejecting from failed GET_IFF_FROM_MAC\n");
goto reject;
+ }
+
#ifdef notyet
/*
* XXX do route lookup to confirm that we're still listening on this
@@ -2154,52 +2247,89 @@
if ((rt_flags & RTF_LOCAL) == 0)
goto reject;
+ /*
+ * Calculate values and add to syncache
+ */
+
+ newtoep = toepcb_alloc();
+ if (newtoep == NULL)
+ goto reject;
+
+ bzero(&nam, sizeof(struct sockaddr_in));
- newso = sonewconn(so, SS_ISCONNECTED);
- newtp = sototcpcb(so);
+ nam.sin_len = sizeof(struct sockaddr_in);
+ nam.sin_family = AF_INET;
+ memcpy(&nam.sin_addr, &req->peer_ip, 4);
+ dst = rtalloc2((struct sockaddr *)&nam, 1, 0);
+
+ if (dst == NULL) {
+
+ printf("failed to find route\n");
+
+ }
+ e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev);
+ if (e == NULL) {
+
+ printf("failed to get l2t\n");
+
+ }
/*
- * XXX need to inherit ULP mode
+ * Point to our listen socket until accept
*/
- newtoep = toepcb_alloc(newtp);
+ newtoep->tp_tp = tp;
+ newtoep->tp_flags = TP_SYN_RCVD;
+ newtoep->tp_tid = tid;
- /* Don't get a reference, newsk starts out with ref count 2 */
- cxgb_insert_tid(cdev, d->client, newso, tid);
+ printf("inserting tid=%d\n", tid);
+ cxgb_insert_tid(cdev, d->client, newtoep, tid);
- if (newtoep->tp_ulp_mode) {
+ if (lctx->ulp_mode) {
ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA);
if (!ddp_mbuf)
newtoep->tp_ulp_mode = 0;
+ else
+ newtoep->tp_ulp_mode = lctx->ulp_mode;
}
- m_set_socket(reply_mbuf, newso);
set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure);
- e = newtoep->tp_l2t;
+
+ printf("adding request to syn cache\n");
+
+ syncache_add_accept_req(req, so, newtoep);
rpl = cplhdr(reply_mbuf);
+ reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl);
rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid));
rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten
- rpl->opt0h = htonl(calc_opt0h(newso) | V_L2T_IDX(e->idx) |
+ printf("e=%p idxs:\n", e);
+ printf("e->idx=%d e->smt_idx=%d\n", e->idx, e->smt_idx);
+
+ rpl->opt0h = htonl(calc_opt0h(so) | V_L2T_IDX(e->idx) |
V_TX_CHANNEL(e->smt_idx));
- rpl->opt0l_status = htonl(calc_opt0l(newso) |
+ rpl->opt0l_status = htonl(calc_opt0l(so, lctx->ulp_mode) |
CPL_PASS_OPEN_ACCEPT);
- rpl->opt2 = htonl(calc_opt2(newso));
-
+ rpl->opt2 = htonl(calc_opt2(so, tdev));
rpl->rsvd = rpl->opt2; /* workaround for HW bug */
- m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newso));
+ m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, so));
+ printf("sending off reply\n");
+
l2t_send(cdev, reply_mbuf, e);
m_free(m);
- if (toep->tp_ulp_mode) {
+#ifdef notyet
+ /*
+ * XXX this call path has to be converted to not depend on sockets
+ */
+ if (newtoep->tp_ulp_mode)
__set_tcb_field(newso, ddp_mbuf, W_TCB_RX_DDP_FLAGS,
V_TF_DDP_OFF(1) |
TP_DDP_TIMER_WORKAROUND_MASK,
V_TF_DDP_OFF(1) |
TP_DDP_TIMER_WORKAROUND_VAL, 1);
- return;
- }
-
+#endif
+ return;
reject:
if (tdev->ttid == TOE_ID_CHELSIO_T3)
mk_pass_accept_rpl(reply_mbuf, m);
@@ -2366,19 +2496,20 @@
do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx)
{
struct cpl_pass_establish *req = cplhdr(m);
- struct socket *lso, *so = (struct socket *)ctx;
- struct toedev *tdev = TOE_DEV(so);
+ struct toepcb *toep = (struct toepcb *)ctx;
+ struct socket *so, *lso;
// Complete socket initialization now that we have the SND_ISN
- struct tcpcb *tp = sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
+
+ struct toedev *tdev;
struct toe_tid_entry *t3c_stid;
struct tid_info *t;
unsigned int stid;
+
+ lso = toeptoso(toep);
+ tdev = TOE_DEV(lso);
- VALIDATE_SOCK(so);
+ SOCK_LOCK(lso);
- SOCK_LOCK(so);
-
toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs);
toep->tp_wr_unacked = 0;
toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
@@ -2421,7 +2552,7 @@
#if 0
unlock:
#endif
- SOCK_UNLOCK(so);
+ SOCK_UNLOCK(lso);
return 0;
}
@@ -2537,6 +2668,7 @@
* backlogged its last CPL message(s). Just take it away.
*/
toep->tp_tid = tid;
+ toep->tp_tp = tp;
so_insert_tid(d, so, tid);
free_atid(cdev, atid);
toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data));
@@ -2550,11 +2682,11 @@
* next batch of work requests from the write queue.
*/
static void
-wr_ack(struct socket *so, struct mbuf *m)
+wr_ack(struct toepcb *toep, struct mbuf *m)
{
- struct tcpcb *tp = sototcpcb(so);
- struct toepcb *toep = tp->t_toe;
+ struct tcpcb *tp = toep->tp_tp;
struct cpl_wr_ack *hdr = cplhdr(m);
+ struct socket *so = toeptoso(toep);
unsigned int credits = ntohs(hdr->credits);
u32 snd_una = ntohl(hdr->snd_una);
int bytes = 0;
@@ -2647,13 +2779,13 @@
static int
do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx)
{
- struct socket *so = (struct socket *)ctx;
+ struct toepcb *toep = (struct toepcb *)ctx;
printf("do_wr_ack\n");
VALIDATE_SOCK(so);
- wr_ack(so, m);
+ wr_ack(toep, m);
return 0;
}
==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_defs.h#10 (text+ko) ====
@@ -58,7 +58,7 @@
void t3_reset_synq(struct socket *listen_so);
void t3_defer_reply(struct mbuf *m, struct toedev *dev, defer_handler_t handler);
-struct toepcb *toepcb_alloc(struct tcpcb *);
+struct toepcb *toepcb_alloc(void);
void toepcb_hold(struct toepcb *);
void toepcb_release(struct toepcb *);
void toepcb_init(struct toepcb *);
==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_t3_ddp.h#3 (text+ko) ====
@@ -30,6 +30,7 @@
$FreeBSD$
***************************************************************************/
+
#ifndef T3_DDP_H
#define T3_DDP_H
==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.c#10 (text+ko) ====
@@ -133,7 +133,7 @@
}
struct toepcb *
-toepcb_alloc(struct tcpcb *tp)
+toepcb_alloc(void)
{
struct toepcb *toep;
@@ -142,11 +142,10 @@
if (toep == NULL)
return (NULL);
- toep->tp_tp = tp;
- tp->t_toe = toep;
-
toepcb_init(toep);
toepcb_hold(toep);
+
+ return (toep);
}
void
@@ -168,6 +167,7 @@
/*
* XXX clear our reference on the inpcb
*/
+ cxgb_remove_tid(TOM_DATA(toep->tp_toedev)->cdev, NULL, toep->tp_tid);
free(toep, M_DEVBUF);
return;
}
==== //depot/projects/toestack/sys/dev/cxgb/ulp/tom/cxgb_tom.h#10 (text+ko) ====
@@ -120,6 +120,7 @@
struct listen_ctx {
struct socket *lso;
struct tom_data *tom_data;
+ int ulp_mode;
};
#define TOM_DATA(dev) (*(struct tom_data **)&(dev)->l4opt)
@@ -134,7 +135,7 @@
#define TP_ABORT_RPL_RCVD (1 << 5)
#define TP_ABORT_REQ_RCVD (1 << 6)
#define TP_CLOSE_CON_REQUESTED (1 << 7)
-
+#define TP_SYN_RCVD (1 << 8)
struct toepcb {
struct toedev *tp_toedev;
More information about the p4-projects
mailing list