svn commit: r346970 - stable/11/sys/dev/cxgbe/tom
Navdeep Parhar
np at FreeBSD.org
Tue Apr 30 18:03:19 UTC 2019
Author: np
Date: Tue Apr 30 18:03:17 2019
New Revision: 346970
URL: https://svnweb.freebsd.org/changeset/base/346970
Log:
MFC r342208:
cxgbe/t4_tom: fixes for issues on the passive open side.
- Fix PR 227760 by getting the TOE to respond to the SYN after the call
to toe_syncache_add, not during it. The kernel syncache code calls
syncache_respond just before syncache_insert. If the ACK to the
syncache_respond is processed in another thread it may run before the
syncache_insert and won't find the entry. Note that this affects only
t4_tom because it's the only driver trying to insert and expand
syncache entries from different threads.
- Do not leak resources if an embryonic connection terminates at
SYN_RCVD because of L2 lookup failures.
- Retire lctx->synq and associated code because there is never a need to
walk the list of embryonic connections associated with a listener.
The per-tid state is still called a synq entry in the driver even
though the synq itself is now gone.
PR: 227760
Sponsored by: Chelsio Communications
Modified:
stable/11/sys/dev/cxgbe/tom/t4_connect.c
stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c
stable/11/sys/dev/cxgbe/tom/t4_listen.c
stable/11/sys/dev/cxgbe/tom/t4_tom.c
stable/11/sys/dev/cxgbe/tom/t4_tom.h
Directory Properties:
stable/11/ (props changed)
Modified: stable/11/sys/dev/cxgbe/tom/t4_connect.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_connect.c Tue Apr 30 17:45:22 2019 (r346969)
+++ stable/11/sys/dev/cxgbe/tom/t4_connect.c Tue Apr 30 18:03:17 2019 (r346970)
@@ -97,7 +97,8 @@ do_act_establish(struct sge_iq *iq, const struct rss_h
goto done;
}
- make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
+ make_established(toep, be32toh(cpl->snd_isn) - 1,
+ be32toh(cpl->rcv_isn) - 1, cpl->tcp_opt);
if (toep->ulp_mode == ULP_MODE_TLS)
tls_establish(toep);
Modified: stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c Tue Apr 30 17:45:22 2019 (r346969)
+++ stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c Tue Apr 30 18:03:17 2019 (r346970)
@@ -383,18 +383,15 @@ assign_rxopt(struct tcpcb *tp, unsigned int opt)
* Completes some final bits of initialization for just established connections
* and changes their state to TCPS_ESTABLISHED.
*
- * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1.
+ * The ISNs are from the exchange of SYNs.
*/
void
-make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn,
- uint16_t opt)
+make_established(struct toepcb *toep, uint32_t iss, uint32_t irs, uint16_t opt)
{
struct inpcb *inp = toep->inp;
struct socket *so = inp->inp_socket;
struct tcpcb *tp = intotcpcb(inp);
long bufsize;
- uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */
- uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */
uint16_t tcpopt = be16toh(opt);
struct flowc_tx_params ftxp;
@@ -1254,22 +1251,12 @@ do_peer_close(struct sge_iq *iq, const struct rss_head
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
if (__predict_false(toep->flags & TPF_SYNQE)) {
-#ifdef INVARIANTS
- struct synq_entry *synqe = (void *)toep;
-
- INP_WLOCK(synqe->lctx->inp);
- if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
- KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
- ("%s: listen socket closed but tid %u not aborted.",
- __func__, tid));
- } else {
- /*
- * do_pass_accept_req is still running and will
- * eventually take care of this tid.
- */
- }
- INP_WUNLOCK(synqe->lctx->inp);
-#endif
+ /*
+ * do_pass_establish must have run before do_peer_close and if
+ * this is still a synqe instead of a toepcb then the connection
+ * must be getting aborted.
+ */
+ MPASS(toep->flags & TPF_ABORT_SHUTDOWN);
CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
toep, toep->flags);
return (0);
@@ -1574,22 +1561,12 @@ do_rx_data(struct sge_iq *iq, const struct rss_header
uint32_t ddp_placed = 0;
if (__predict_false(toep->flags & TPF_SYNQE)) {
-#ifdef INVARIANTS
- struct synq_entry *synqe = (void *)toep;
-
- INP_WLOCK(synqe->lctx->inp);
- if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
- KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
- ("%s: listen socket closed but tid %u not aborted.",
- __func__, tid));
- } else {
- /*
- * do_pass_accept_req is still running and will
- * eventually take care of this tid.
- */
- }
- INP_WUNLOCK(synqe->lctx->inp);
-#endif
+ /*
+ * do_pass_establish must have run before do_rx_data and if this
+ * is still a synqe instead of a toepcb then the connection must
+ * be getting aborted.
+ */
+ MPASS(toep->flags & TPF_ABORT_SHUTDOWN);
CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
toep, toep->flags);
m_freem(m);
Modified: stable/11/sys/dev/cxgbe/tom/t4_listen.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_listen.c Tue Apr 30 17:45:22 2019 (r346969)
+++ stable/11/sys/dev/cxgbe/tom/t4_listen.c Tue Apr 30 18:03:17 2019 (r346970)
@@ -85,9 +85,6 @@ static struct listen_ctx *listen_hash_find(struct adap
static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
-static inline void save_qids_in_mbuf(struct mbuf *, struct vi_info *,
- struct offload_settings *);
-static inline void get_qids_from_mbuf(struct mbuf *m, int *, int *);
static void send_reset_synqe(struct toedev *, struct synq_entry *);
static int
@@ -221,7 +218,6 @@ alloc_lctx(struct adapter *sc, struct inpcb *inp, stru
lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id];
lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq];
refcount_init(&lctx->refcount, 1);
- TAILQ_INIT(&lctx->synq);
lctx->inp = inp;
lctx->vnet = inp->inp_socket->so_vnet;
@@ -239,8 +235,6 @@ free_lctx(struct adapter *sc, struct listen_ctx *lctx)
INP_WLOCK_ASSERT(inp);
KASSERT(lctx->refcount == 0,
("%s: refcount %d", __func__, lctx->refcount));
- KASSERT(TAILQ_EMPTY(&lctx->synq),
- ("%s: synq not empty.", __func__));
KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
@@ -356,7 +350,7 @@ send_reset_synqe(struct toedev *tod, struct synq_entry
struct wrqe *wr;
struct fw_flowc_wr *flowc;
struct cpl_abort_req *req;
- int txqid, rxqid, flowclen;
+ int flowclen;
struct sge_wrq *ofld_txq;
struct sge_ofld_rxq *ofld_rxq;
const int nparams = 6;
@@ -372,9 +366,8 @@ send_reset_synqe(struct toedev *tod, struct synq_entry
return; /* abort already in progress */
synqe->flags |= TPF_ABORT_SHUTDOWN;
- get_qids_from_mbuf(m, &txqid, &rxqid);
- ofld_txq = &sc->sge.ofld_txq[txqid];
- ofld_rxq = &sc->sge.ofld_rxq[rxqid];
+ ofld_txq = &sc->sge.ofld_txq[synqe->txqid];
+ ofld_rxq = &sc->sge.ofld_rxq[synqe->rxqid];
/* The wrqe will have two WRs - a flowc followed by an abort_req */
flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
@@ -604,7 +597,6 @@ t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
struct listen_ctx *lctx;
struct adapter *sc = tod->tod_softc;
struct inpcb *inp = tp->t_inpcb;
- struct synq_entry *synqe;
INP_WLOCK_ASSERT(inp);
@@ -620,25 +612,33 @@ t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
* arrive and clean up when it does.
*/
if (lctx->flags & LCTX_RPL_PENDING) {
- KASSERT(TAILQ_EMPTY(&lctx->synq),
- ("%s: synq not empty.", __func__));
return (EINPROGRESS);
}
- /*
- * The host stack will abort all the connections on the listening
- * socket's so_comp. It doesn't know about the connections on the synq
- * so we need to take care of those.
- */
- TAILQ_FOREACH(synqe, &lctx->synq, link) {
- if (synqe->flags & TPF_SYNQE_HAS_L2TE)
- send_reset_synqe(tod, synqe);
- }
-
destroy_server(sc, lctx);
return (0);
}
+static inline struct synq_entry *
+alloc_synqe(struct adapter *sc __unused, struct listen_ctx *lctx, int flags)
+{
+ struct synq_entry *synqe;
+
+ INP_WLOCK_ASSERT(lctx->inp);
+ MPASS(flags == M_WAITOK || flags == M_NOWAIT);
+
+ synqe = malloc(sizeof(*synqe), M_CXGBE, flags);
+ if (__predict_true(synqe != NULL)) {
+ synqe->flags = TPF_SYNQE;
+ refcount_init(&synqe->refcnt, 1);
+ synqe->lctx = lctx;
+ hold_lctx(lctx); /* Every synqe has a ref on its lctx. */
+ synqe->syn = NULL;
+ }
+
+ return (synqe);
+}
+
static inline void
hold_synqe(struct synq_entry *synqe)
{
@@ -646,17 +646,25 @@ hold_synqe(struct synq_entry *synqe)
refcount_acquire(&synqe->refcnt);
}
-static inline void
-release_synqe(struct synq_entry *synqe)
+static inline struct inpcb *
+release_synqe(struct adapter *sc, struct synq_entry *synqe)
{
+ struct inpcb *inp;
- if (refcount_release(&synqe->refcnt)) {
- int needfree = synqe->flags & TPF_SYNQE_NEEDFREE;
+ MPASS(synqe->flags & TPF_SYNQE);
+ MPASS(synqe->lctx != NULL);
+ inp = synqe->lctx->inp;
+ MPASS(inp != NULL);
+ INP_WLOCK_ASSERT(inp);
+
+ if (refcount_release(&synqe->refcnt)) {
+ inp = release_lctx(sc, synqe->lctx);
m_freem(synqe->syn);
- if (needfree)
- free(synqe, M_CXGBE);
+ free(synqe, M_CXGBE);
}
+
+ return (inp);
}
void
@@ -668,51 +676,45 @@ t4_syncache_added(struct toedev *tod __unused, void *a
}
void
-t4_syncache_removed(struct toedev *tod __unused, void *arg)
+t4_syncache_removed(struct toedev *tod, void *arg)
{
+ struct adapter *sc = tod->tod_softc;
struct synq_entry *synqe = arg;
+ struct inpcb *inp = synqe->lctx->inp;
- release_synqe(synqe);
+ /*
+ * XXX: this is a LOR but harmless when running from the softclock.
+ */
+ INP_WLOCK(inp);
+ inp = release_synqe(sc, synqe);
+ if (inp != NULL)
+ INP_WUNLOCK(inp);
}
int
t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
{
- struct adapter *sc = tod->tod_softc;
struct synq_entry *synqe = arg;
- struct wrqe *wr;
- struct l2t_entry *e;
- struct tcpopt to;
- struct ip *ip = mtod(m, struct ip *);
- struct tcphdr *th;
- wr = (struct wrqe *)atomic_readandclear_ptr(&synqe->wr);
- if (wr == NULL) {
- m_freem(m);
- return (EALREADY);
- }
+ if (atomic_fetchadd_int(&synqe->ok_to_respond, 1) == 0) {
+ struct tcpopt to;
+ struct ip *ip = mtod(m, struct ip *);
+ struct tcphdr *th;
- if (ip->ip_v == IPVERSION)
- th = (void *)(ip + 1);
- else
- th = (void *)((struct ip6_hdr *)ip + 1);
- bzero(&to, sizeof(to));
- tcp_dooptions(&to, (void *)(th + 1), (th->th_off << 2) - sizeof(*th),
- TO_SYN);
+ if (ip->ip_v == IPVERSION)
+ th = (void *)(ip + 1);
+ else
+ th = (void *)((struct ip6_hdr *)ip + 1);
+ bzero(&to, sizeof(to));
+ tcp_dooptions(&to, (void *)(th + 1),
+ (th->th_off << 2) - sizeof(*th), TO_SYN);
- /* save these for later */
- synqe->iss = be32toh(th->th_seq);
- synqe->ts = to.to_tsval;
-
- if (chip_id(sc) >= CHELSIO_T5) {
- struct cpl_t5_pass_accept_rpl *rpl5 = wrtod(wr);
-
- rpl5->iss = th->th_seq;
+ /* save these for later */
+ synqe->iss = be32toh(th->th_seq);
+ synqe->irs = be32toh(th->th_ack) - 1;
+ synqe->ts = to.to_tsval;
}
- e = &sc->l2t->l2tab[synqe->l2e_idx];
- t4_l2t_send(sc, wr, e);
-
m_freem(m); /* don't need this any more */
return (0);
}
@@ -832,23 +834,29 @@ done_with_synqe(struct adapter *sc, struct synq_entry
{
struct listen_ctx *lctx = synqe->lctx;
struct inpcb *inp = lctx->inp;
- struct vi_info *vi = synqe->syn->m_pkthdr.rcvif->if_softc;
struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
int ntids;
INP_WLOCK_ASSERT(inp);
ntids = inp->inp_vflag & INP_IPV6 ? 2 : 1;
- TAILQ_REMOVE(&lctx->synq, synqe, link);
- inp = release_lctx(sc, lctx);
- if (inp)
- INP_WUNLOCK(inp);
remove_tid(sc, synqe->tid, ntids);
- release_tid(sc, synqe->tid, &sc->sge.ctrlq[vi->pi->port_id]);
+ release_tid(sc, synqe->tid, lctx->ctrlq);
t4_l2t_release(e);
- release_synqe(synqe); /* removed from synq list */
+ inp = release_synqe(sc, synqe);
+ if (inp)
+ INP_WUNLOCK(inp);
}
+void
+synack_failure_cleanup(struct adapter *sc, int tid)
+{
+ struct synq_entry *synqe = lookup_tid(sc, tid);
+
+ INP_WLOCK(synqe->lctx->inp);
+ done_with_synqe(sc, synqe);
+}
+
int
do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
struct mbuf *m)
@@ -859,7 +867,6 @@ do_abort_req_synqe(struct sge_iq *iq, const struct rss
struct synq_entry *synqe = lookup_tid(sc, tid);
struct listen_ctx *lctx = synqe->lctx;
struct inpcb *inp = lctx->inp;
- int txqid;
struct sge_wrq *ofld_txq;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
@@ -878,8 +885,7 @@ do_abort_req_synqe(struct sge_iq *iq, const struct rss
INP_WLOCK(inp);
- get_qids_from_mbuf(synqe->syn, &txqid, NULL);
- ofld_txq = &sc->sge.ofld_txq[txqid];
+ ofld_txq = &sc->sge.ofld_txq[synqe->txqid];
/*
* If we'd initiated an abort earlier the reply to it is responsible for
@@ -939,23 +945,23 @@ t4_offload_socket(struct toedev *tod, void *arg, struc
#ifdef INVARIANTS
struct inpcb *inp = sotoinpcb(so);
#endif
- struct cpl_pass_establish *cpl = mtod(synqe->syn, void *);
- struct toepcb *toep = *(struct toepcb **)(cpl + 1);
+ struct toepcb *toep = synqe->toep;
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); /* prevents bad race with accept() */
INP_WLOCK_ASSERT(inp);
KASSERT(synqe->flags & TPF_SYNQE,
("%s: %p not a synq_entry?", __func__, arg));
+ MPASS(toep->tid == synqe->tid);
offload_socket(so, toep);
- make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
+ make_established(toep, synqe->iss, synqe->irs, synqe->tcp_opt);
toep->flags |= TPF_CPL_PENDING;
update_tid(sc, synqe->tid, toep);
synqe->flags |= TPF_SYNQE_EXPANDED;
}
static inline void
-save_qids_in_mbuf(struct mbuf *m, struct vi_info *vi,
+save_qids_in_synqe(struct synq_entry *synqe, struct vi_info *vi,
struct offload_settings *s)
{
uint32_t txqid, rxqid;
@@ -972,43 +978,10 @@ save_qids_in_mbuf(struct mbuf *m, struct vi_info *vi,
rxqid = arc4random() % vi->nofldrxq;
rxqid += vi->first_ofld_rxq;
- m->m_pkthdr.flowid = (txqid << 16) | (rxqid & 0xffff);
+ synqe->txqid = txqid;
+ synqe->rxqid = rxqid;
}
-static inline void
-get_qids_from_mbuf(struct mbuf *m, int *txqid, int *rxqid)
-{
-
- if (txqid)
- *txqid = m->m_pkthdr.flowid >> 16;
- if (rxqid)
- *rxqid = m->m_pkthdr.flowid & 0xffff;
-}
-
-/*
- * Use the trailing space in the mbuf in which the PASS_ACCEPT_REQ arrived to
- * store some state temporarily.
- */
-static struct synq_entry *
-mbuf_to_synqe(struct mbuf *m)
-{
- int len = roundup2(sizeof (struct synq_entry), 8);
- int tspace = M_TRAILINGSPACE(m);
- struct synq_entry *synqe = NULL;
-
- if (tspace < len) {
- synqe = malloc(sizeof(*synqe), M_CXGBE, M_NOWAIT);
- if (synqe == NULL)
- return (NULL);
- synqe->flags = TPF_SYNQE | TPF_SYNQE_NEEDFREE;
- } else {
- synqe = (void *)(m->m_data + m->m_len + tspace - len);
- synqe->flags = TPF_SYNQE;
- }
-
- return (synqe);
-}
-
static void
t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
{
@@ -1208,7 +1181,39 @@ get_l2te_for_nexthop(struct port_info *pi, struct ifne
return (e);
}
-#define REJECT_PASS_ACCEPT() do { \
+static int
+send_synack(struct adapter *sc, struct synq_entry *synqe, uint64_t opt0,
+ uint32_t opt2, int tid)
+{
+ struct wrqe *wr;
+ struct cpl_pass_accept_rpl *rpl;
+ struct l2t_entry *e = &sc->l2t->l2tab[synqe->l2e_idx];
+
+ wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
+ sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[0]);
+ if (wr == NULL)
+ return (ENOMEM);
+ rpl = wrtod(wr);
+
+ if (is_t4(sc))
+ INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
+ else {
+ struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
+
+ INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
+ rpl5->iss = htobe32(synqe->iss);
+ }
+ rpl->opt0 = opt0;
+ rpl->opt2 = opt2;
+
+ return (t4_l2t_send(sc, wr, e));
+}
+
+#define REJECT_PASS_ACCEPT_REQ(tunnel) do { \
+ if (!tunnel) { \
+ m_freem(m); \
+ m = NULL; \
+ } \
reject_reason = __LINE__; \
goto reject; \
} while (0)
@@ -1232,8 +1237,6 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss
struct adapter *sc = iq->adapter;
struct toedev *tod;
const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
- struct cpl_pass_accept_rpl *rpl;
- struct wrqe *wr;
unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
unsigned int tid = GET_TID(cpl);
struct listen_ctx *lctx = lookup_stid(sc, stid);
@@ -1246,10 +1249,9 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss
struct vi_info *vi;
struct ifnet *hw_ifp, *ifp;
struct l2t_entry *e = NULL;
- int rscale, mtu_idx, rx_credits, rxqid, ulp_mode;
struct synq_entry *synqe = NULL;
int reject_reason, v, ntids;
- uint16_t vid;
+ uint16_t vid, l2info;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif
@@ -1262,36 +1264,35 @@ do_pass_accept_req(struct sge_iq *iq, const struct rss
CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
lctx);
- pass_accept_req_to_protohdrs(sc, m, &inc, &th);
- t4opt_to_tcpopt(&cpl->tcpopt, &to);
+ CURVNET_SET(lctx->vnet); /* before any potential REJECT */
- pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
-
- CURVNET_SET(lctx->vnet);
-
/*
- * Use the MAC index to lookup the associated VI. If this SYN
- * didn't match a perfect MAC filter, punt.
+ * Use the MAC index to lookup the associated VI. If this SYN didn't
+ * match a perfect MAC filter, punt.
*/
- if (!(be16toh(cpl->l2info) & F_SYN_XACT_MATCH)) {
- m_freem(m);
- m = NULL;
- REJECT_PASS_ACCEPT();
+ l2info = be16toh(cpl->l2info);
+ pi = sc->port[G_SYN_INTF(l2info)];
+ if (!(l2info & F_SYN_XACT_MATCH)) {
+ REJECT_PASS_ACCEPT_REQ(false);
}
for_each_vi(pi, v, vi) {
- if (vi->xact_addr_filt == G_SYN_MAC_IDX(be16toh(cpl->l2info)))
+ if (vi->xact_addr_filt == G_SYN_MAC_IDX(l2info))
goto found;
}
- m_freem(m);
- m = NULL;
- REJECT_PASS_ACCEPT();
-
+ REJECT_PASS_ACCEPT_REQ(false);
found:
- hw_ifp = vi->ifp; /* the (v)cxgbeX ifnet */
+ hw_ifp = vi->ifp; /* the cxgbe ifnet */
m->m_pkthdr.rcvif = hw_ifp;
tod = TOEDEV(hw_ifp);
/*
+ * Don't offload if the peer requested a TCP option that's not known to
+ * the silicon. Send the SYN to the kernel instead.
+ */
+ if (__predict_false(cpl->tcpopt.unknown))
+ REJECT_PASS_ACCEPT_REQ(true);
+
+ /*
* Figure out if there is a pseudo interface (vlan, lagg, etc.)
* involved. Don't offload if the SYN had a VLAN tag and the vid
* doesn't match anything on this interface.
@@ -1302,75 +1303,57 @@ found:
if (vid != 0xfff) {
ifp = VLAN_DEVAT(hw_ifp, vid);
if (ifp == NULL)
- REJECT_PASS_ACCEPT();
+ REJECT_PASS_ACCEPT_REQ(true);
} else
ifp = hw_ifp;
/*
- * Don't offload if the peer requested a TCP option that's not known to
- * the silicon.
+ * Don't offload if the ifnet that the SYN came in on is not in the same
+ * vnet as the listening socket.
*/
- if (cpl->tcpopt.unknown)
- REJECT_PASS_ACCEPT();
+ if (lctx->vnet != ifp->if_vnet)
+ REJECT_PASS_ACCEPT_REQ(true);
+ pass_accept_req_to_protohdrs(sc, m, &inc, &th);
if (inc.inc_flags & INC_ISIPV6) {
/* Don't offload if the ifcap isn't enabled */
if ((ifp->if_capenable & IFCAP_TOE6) == 0)
- REJECT_PASS_ACCEPT();
+ REJECT_PASS_ACCEPT_REQ(true);
/*
* SYN must be directed to an IP6 address on this ifnet. This
* is more restrictive than in6_localip.
*/
if (!in6_ifhasaddr(ifp, &inc.inc6_laddr))
- REJECT_PASS_ACCEPT();
+ REJECT_PASS_ACCEPT_REQ(true);
ntids = 2;
} else {
/* Don't offload if the ifcap isn't enabled */
if ((ifp->if_capenable & IFCAP_TOE4) == 0)
- REJECT_PASS_ACCEPT();
+ REJECT_PASS_ACCEPT_REQ(true);
/*
* SYN must be directed to an IP address on this ifnet. This
* is more restrictive than in_localip.
*/
if (!in_ifhasaddr(ifp, inc.inc_laddr))
- REJECT_PASS_ACCEPT();
+ REJECT_PASS_ACCEPT_REQ(true);
ntids = 1;
}
- /*
- * Don't offload if the ifnet that the SYN came in on is not in the same
- * vnet as the listening socket.
- */
- if (lctx->vnet != ifp->if_vnet)
- REJECT_PASS_ACCEPT();
-
e = get_l2te_for_nexthop(pi, ifp, &inc);
if (e == NULL)
- REJECT_PASS_ACCEPT();
+ REJECT_PASS_ACCEPT_REQ(true);
- synqe = mbuf_to_synqe(m);
- if (synqe == NULL)
- REJECT_PASS_ACCEPT();
-
- wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
- sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[pi->port_id]);
- if (wr == NULL)
- REJECT_PASS_ACCEPT();
- rpl = wrtod(wr);
-
- INP_INFO_RLOCK(&V_tcbinfo); /* for 4-tuple check */
-
/* Don't offload if the 4-tuple is already in use */
+ INP_INFO_RLOCK(&V_tcbinfo); /* for 4-tuple check */
if (toe_4tuple_check(&inc, &th, ifp) != 0) {
INP_INFO_RUNLOCK(&V_tcbinfo);
- free(wr, M_CXGBE);
- REJECT_PASS_ACCEPT();
+ REJECT_PASS_ACCEPT_REQ(false);
}
INP_INFO_RUNLOCK(&V_tcbinfo);
@@ -1379,14 +1362,8 @@ found:
/* Don't offload if the listening socket has closed */
if (__predict_false(inp->inp_flags & INP_DROPPED)) {
- /*
- * The listening socket has closed. The reply from the TOE to
- * our CPL_CLOSE_LISTSRV_REQ will ultimately release all
- * resources tied to this listen context.
- */
INP_WUNLOCK(inp);
- free(wr, M_CXGBE);
- REJECT_PASS_ACCEPT();
+ REJECT_PASS_ACCEPT_REQ(false);
}
so = inp->inp_socket;
rw_rlock(&sc->policy_lock);
@@ -1394,119 +1371,62 @@ found:
rw_runlock(&sc->policy_lock);
if (!settings.offload) {
INP_WUNLOCK(inp);
- free(wr, M_CXGBE);
- REJECT_PASS_ACCEPT();
+ REJECT_PASS_ACCEPT_REQ(true); /* Rejected by COP. */
}
- mtu_idx = find_best_mtu_idx(sc, &inc, &settings);
- rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
- SOCKBUF_LOCK(&so->so_rcv);
- /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
- rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
- SOCKBUF_UNLOCK(&so->so_rcv);
-
- save_qids_in_mbuf(m, vi, &settings);
- get_qids_from_mbuf(m, NULL, &rxqid);
-
- if (is_t4(sc))
- INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
- else {
- struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
-
- INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
+ synqe = alloc_synqe(sc, lctx, M_NOWAIT);
+ if (synqe == NULL) {
+ INP_WUNLOCK(inp);
+ REJECT_PASS_ACCEPT_REQ(true);
}
- ulp_mode = select_ulp_mode(so, sc, &settings);
- switch (ulp_mode) {
- case ULP_MODE_TCPDDP:
- synqe->flags |= TPF_SYNQE_TCPDDP;
- break;
- case ULP_MODE_TLS:
- synqe->flags |= TPF_SYNQE_TLS;
- break;
- }
- rpl->opt0 = calc_opt0(so, vi, e, mtu_idx, rscale, rx_credits, ulp_mode,
- &settings);
- rpl->opt2 = calc_opt2p(sc, pi, rxqid, &cpl->tcpopt, &th, ulp_mode,
- CC_ALGO(intotcpcb(inp)), &settings);
+ atomic_store_int(&synqe->ok_to_respond, 0);
- synqe->tid = tid;
- synqe->lctx = lctx;
- synqe->syn = m;
- m = NULL;
- refcount_init(&synqe->refcnt, 1); /* 1 means extra hold */
- synqe->l2e_idx = e->idx;
- synqe->rcv_bufsize = rx_credits;
- atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr);
-
- insert_tid(sc, tid, synqe, ntids);
- TAILQ_INSERT_TAIL(&lctx->synq, synqe, link);
- hold_synqe(synqe); /* hold for the duration it's in the synq */
- hold_lctx(lctx); /* A synqe on the list has a ref on its lctx */
-
/*
* If all goes well t4_syncache_respond will get called during
* syncache_add. Note that syncache_add releases the pcb lock.
*/
+ t4opt_to_tcpopt(&cpl->tcpopt, &to);
toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
- INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */
- /*
- * If we replied during syncache_add (synqe->wr has been consumed),
- * good. Otherwise, set it to 0 so that further syncache_respond
- * attempts by the kernel will be ignored.
- */
- if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) {
+ if (atomic_load_int(&synqe->ok_to_respond) > 0) {
+ uint64_t opt0;
+ uint32_t opt2;
+ int rscale, mtu_idx, rx_credits;
- /*
- * syncache may or may not have a hold on the synqe, which may
- * or may not be stashed in the original SYN mbuf passed to us.
- * Just copy it over instead of dealing with all possibilities.
- */
- m = m_dup(synqe->syn, M_NOWAIT);
- if (m)
- m->m_pkthdr.rcvif = hw_ifp;
+ mtu_idx = find_best_mtu_idx(sc, &inc, &settings);
+ rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
+ /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
+ rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
- remove_tid(sc, synqe->tid, ntids);
- free(wr, M_CXGBE);
+ save_qids_in_synqe(synqe, vi, &settings);
+ synqe->ulp_mode = select_ulp_mode(so, sc, &settings);
- /* Yank the synqe out of the lctx synq. */
- INP_WLOCK(inp);
- TAILQ_REMOVE(&lctx->synq, synqe, link);
- release_synqe(synqe); /* removed from synq list */
- inp = release_lctx(sc, lctx);
- if (inp)
- INP_WUNLOCK(inp);
+ opt0 = calc_opt0(so, vi, e, mtu_idx, rscale, rx_credits,
+ synqe->ulp_mode, &settings);
+ opt2 = calc_opt2p(sc, pi, synqe->rxqid, &cpl->tcpopt, &th,
+ synqe->ulp_mode, CC_ALGO(intotcpcb(inp)), &settings);
- release_synqe(synqe); /* extra hold */
- REJECT_PASS_ACCEPT();
- }
+ insert_tid(sc, tid, synqe, ntids);
+ synqe->tid = tid;
+ synqe->l2e_idx = e->idx;
+ synqe->rcv_bufsize = rx_credits;
+ synqe->syn = m;
+ m = NULL;
- CTR6(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK mode %d",
- __func__, stid, tid, lctx, synqe, ulp_mode);
+ if (send_synack(sc, synqe, opt0, opt2, tid) != 0) {
+ remove_tid(sc, tid, ntids);
+ m = synqe->syn;
+ synqe->syn = NULL;
+ REJECT_PASS_ACCEPT_REQ(true);
+ }
- INP_WLOCK(inp);
- synqe->flags |= TPF_SYNQE_HAS_L2TE;
- if (__predict_false(inp->inp_flags & INP_DROPPED)) {
- /*
- * Listening socket closed but tod_listen_stop did not abort
- * this tid because there was no L2T entry for the tid at that
- * time. Abort it now. The reply to the abort will clean up.
- */
CTR6(KTR_CXGBE,
- "%s: stid %u, tid %u, lctx %p, synqe %p (0x%x), ABORT",
- __func__, stid, tid, lctx, synqe, synqe->flags);
- if (!(synqe->flags & TPF_SYNQE_EXPANDED))
- send_reset_synqe(tod, synqe);
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
+ "%s: stid %u, tid %u, lctx %p, synqe %p, mode %d, SYNACK",
+ __func__, stid, tid, lctx, synqe, synqe->ulp_mode);
+ } else
+ REJECT_PASS_ACCEPT_REQ(false);
- release_synqe(synqe); /* extra hold */
- return (__LINE__);
- }
- INP_WUNLOCK(inp);
CURVNET_RESTORE();
-
- release_synqe(synqe); /* extra hold */
return (0);
reject:
CURVNET_RESTORE();
@@ -1516,8 +1436,19 @@ reject:
if (e)
t4_l2t_release(e);
release_tid(sc, tid, lctx->ctrlq);
+ if (synqe) {
+ inp = synqe->lctx->inp;
+ INP_WLOCK(inp);
+ inp = release_synqe(sc, synqe);
+ if (inp)
+ INP_WUNLOCK(inp);
+ }
- if (__predict_true(m != NULL)) {
+ if (m) {
+ /*
+ * The connection request hit a TOE listener but is being passed
+ * on to the kernel sw stack instead of getting offloaded.
+ */
m_adj(m, sizeof(*cpl));
m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
@@ -1569,7 +1500,6 @@ do_pass_establish(struct sge_iq *iq, const struct rss_
struct tcpopt to;
struct in_conninfo inc;
struct toepcb *toep;
- u_int txqid, rxqid;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
#endif
@@ -1589,73 +1519,46 @@ do_pass_establish(struct sge_iq *iq, const struct rss_
"%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
__func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
- if (__predict_false(inp->inp_flags & INP_DROPPED)) {
-
- if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
- KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
- ("%s: listen socket closed but tid %u not aborted.",
- __func__, tid));
- }
-
- INP_WUNLOCK(inp);
- INP_INFO_RUNLOCK(&V_tcbinfo);
- CURVNET_RESTORE();
- return (0);
- }
-
ifp = synqe->syn->m_pkthdr.rcvif;
vi = ifp->if_softc;
KASSERT(vi->pi->adapter == sc,
("%s: vi %p, sc %p mismatch", __func__, vi, sc));
- get_qids_from_mbuf(synqe->syn, &txqid, &rxqid);
- KASSERT(rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
- ("%s: CPL arrived on unexpected rxq. %d %d", __func__, rxqid,
- (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
-
- toep = alloc_toepcb(vi, txqid, rxqid, M_NOWAIT);
- if (toep == NULL) {
+ if (__predict_false(inp->inp_flags & INP_DROPPED)) {
reset:
- /*
- * The reply to this abort will perform final cleanup. There is
- * no need to check for HAS_L2TE here. We can be here only if
- * we responded to the PASS_ACCEPT_REQ, and our response had the
- * L2T idx.
- */
send_reset_synqe(TOEDEV(ifp), synqe);
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
return (0);
}
+
+ KASSERT(synqe->rxqid == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
+ ("%s: CPL arrived on unexpected rxq. %d %d", __func__,
+ synqe->rxqid, (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
+
+ toep = alloc_toepcb(vi, synqe->txqid, synqe->rxqid, M_NOWAIT);
+ if (toep == NULL)
+ goto reset;
toep->tid = tid;
toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx];
- if (synqe->flags & TPF_SYNQE_TCPDDP)
- set_ulp_mode(toep, ULP_MODE_TCPDDP);
- else if (synqe->flags & TPF_SYNQE_TLS)
- set_ulp_mode(toep, ULP_MODE_TLS);
- else
- set_ulp_mode(toep, ULP_MODE_NONE);
+ toep->vnet = lctx->vnet;
+ set_ulp_mode(toep, synqe->ulp_mode);
/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
toep->rx_credits = synqe->rcv_bufsize;
- so = inp->inp_socket;
- KASSERT(so != NULL, ("%s: socket is NULL", __func__));
+ MPASS(be32toh(cpl->snd_isn) - 1 == synqe->iss);
+ MPASS(be32toh(cpl->rcv_isn) - 1 == synqe->irs);
+ synqe->tcp_opt = cpl->tcp_opt;
+ synqe->toep = toep;
/* Come up with something that syncache_expand should be ok with. */
synqe_to_protohdrs(sc, synqe, cpl, &inc, &th, &to);
+ if (inc.inc_flags & INC_ISIPV6)
+ toep->ce = t4_hold_lip(sc, &inc.inc6_laddr, lctx->ce);
+ so = inp->inp_socket;
+ KASSERT(so != NULL, ("%s: socket is NULL", __func__));
- /*
- * No more need for anything in the mbuf that carried the
- * CPL_PASS_ACCEPT_REQ. Drop the CPL_PASS_ESTABLISH and toep pointer
- * there. XXX: bad form but I don't want to increase the size of synqe.
- */
- m = synqe->syn;
- KASSERT(sizeof(*cpl) + sizeof(toep) <= m->m_len,
- ("%s: no room in mbuf %p (m_len %d)", __func__, m, m->m_len));
- bcopy(cpl, mtod(m, void *), sizeof(*cpl));
- *(struct toepcb **)(mtod(m, struct cpl_pass_establish *) + 1) = toep;
-
if (!toe_syncache_expand(&inc, &to, &th, &so) || so == NULL) {
free_toepcb(toep);
goto reset;
@@ -1665,14 +1568,9 @@ reset:
new_inp = sotoinpcb(so);
INP_WLOCK_ASSERT(new_inp);
MPASS(so->so_vnet == lctx->vnet);
- toep->vnet = lctx->vnet;
- if (inc.inc_flags & INC_ISIPV6)
- toep->ce = t4_hold_lip(sc, &inc.inc6_laddr, lctx->ce);
/*
- * This is for the unlikely case where the syncache entry that we added
- * has been evicted from the syncache, but the syncache_expand above
- * works because of syncookies.
+ * This is for expansion from syncookies.
*
* XXX: we've held the tcbinfo lock throughout so there's no risk of
* anyone accept'ing a connection before we've installed our hooks, but
@@ -1686,13 +1584,11 @@ reset:
INP_WUNLOCK(new_inp);
/* Done with the synqe */
- TAILQ_REMOVE(&lctx->synq, synqe, link);
- inp = release_lctx(sc, lctx);
+ inp = release_synqe(sc, synqe);
if (inp != NULL)
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
CURVNET_RESTORE();
- release_synqe(synqe);
return (0);
}
Modified: stable/11/sys/dev/cxgbe/tom/t4_tom.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_tom.c Tue Apr 30 17:45:22 2019 (r346969)
+++ stable/11/sys/dev/cxgbe/tom/t4_tom.c Tue Apr 30 18:03:17 2019 (r346970)
@@ -1011,9 +1011,9 @@ reclaim_wr_resources(void *arg, int count)
struct tom_data *td = arg;
STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list);
struct cpl_act_open_req *cpl;
- u_int opcode, atid;
+ u_int opcode, atid, tid;
struct wrqe *wr;
- struct adapter *sc;
+ struct adapter *sc = td_adapter(td);
mtx_lock(&td->unsent_wr_lock);
STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe);
@@ -1029,10 +1029,14 @@ reclaim_wr_resources(void *arg, int count)
case CPL_ACT_OPEN_REQ:
case CPL_ACT_OPEN_REQ6:
atid = G_TID_TID(be32toh(OPCODE_TID(cpl)));
- sc = td_adapter(td);
-
CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid);
act_open_failure_cleanup(sc, atid, EHOSTUNREACH);
+ free(wr, M_CXGBE);
+ break;
+ case CPL_PASS_ACCEPT_RPL:
+ tid = GET_TID(cpl);
+ CTR2(KTR_CXGBE, "%s: tid %u ", __func__, tid);
+ synack_failure_cleanup(sc, tid);
free(wr, M_CXGBE);
break;
default:
Modified: stable/11/sys/dev/cxgbe/tom/t4_tom.h
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_tom.h Tue Apr 30 17:45:22 2019 (r346969)
+++ stable/11/sys/dev/cxgbe/tom/t4_tom.h Tue Apr 30 18:03:17 2019 (r346970)
@@ -66,12 +66,8 @@ enum {
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-11
mailing list