svn commit: r311949 - head/sys/dev/cxgbe/tom
Navdeep Parhar
np at FreeBSD.org
Wed Jan 11 23:48:19 UTC 2017
Author: np
Date: Wed Jan 11 23:48:17 2017
New Revision: 311949
URL: https://svnweb.freebsd.org/changeset/base/311949
Log:
cxgbe/tom: Add VIMAGE support to the TOE driver.
Active Open:
- Save the socket's vnet at the time of the active open (t4_connect) and
switch to it when processing the reply (do_act_open_rpl or
do_act_establish).
Passive Open:
- Save the listening socket's vnet in the driver's listen_ctx and switch
to it when processing incoming SYNs for the socket.
- Reject SYNs that arrive on an ifnet that's not in the same vnet as the
listening socket.
CLIP (Compressed Local IPv6) table:
- Add only those IPv6 addresses to the CLIP that are in a vnet
associated with one of the card's ifnets.
Misc:
- Set vnet from the toepcb when processing TCP state transitions.
- The kernel sets the vnet when calling the driver's output routine
so t4_push_frames runs in proper vnet context already. One exception
is when incoming credits trigger tx within the driver's ithread. Set
the vnet explicitly in do_fw4_ack for that case.
MFC after: 3 days
Sponsored by: Chelsio Communications
Modified:
head/sys/dev/cxgbe/tom/t4_connect.c
head/sys/dev/cxgbe/tom/t4_cpl_io.c
head/sys/dev/cxgbe/tom/t4_ddp.c
head/sys/dev/cxgbe/tom/t4_listen.c
head/sys/dev/cxgbe/tom/t4_tom.c
head/sys/dev/cxgbe/tom/t4_tom.h
Modified: head/sys/dev/cxgbe/tom/t4_connect.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_connect.c Wed Jan 11 23:32:40 2017 (r311948)
+++ head/sys/dev/cxgbe/tom/t4_connect.c Wed Jan 11 23:48:17 2017 (r311949)
@@ -126,6 +126,7 @@ do_act_establish(struct sge_iq *iq, cons
CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid);
free_atid(sc, atid);
+ CURVNET_SET(toep->vnet);
INP_WLOCK(inp);
toep->tid = tid;
insert_tid(sc, tid, toep, inp->inp_vflag & INP_IPV6 ? 2 : 1);
@@ -141,6 +142,7 @@ do_act_establish(struct sge_iq *iq, cons
make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
done:
INP_WUNLOCK(inp);
+ CURVNET_RESTORE();
return (0);
}
@@ -178,6 +180,7 @@ act_open_failure_cleanup(struct adapter
free_atid(sc, atid);
toep->tid = -1;
+ CURVNET_SET(toep->vnet);
if (status != EAGAIN)
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
@@ -185,6 +188,7 @@ act_open_failure_cleanup(struct adapter
final_cpl_received(toep); /* unlocks inp */
if (status != EAGAIN)
INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
}
/*
@@ -360,6 +364,7 @@ t4_connect(struct toedev *tod, struct so
if (wr == NULL)
DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM);
+ toep->vnet = so->so_vnet;
if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
set_tcpddp_ulp_mode(toep);
else
Modified: head/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_cpl_io.c Wed Jan 11 23:32:40 2017 (r311948)
+++ head/sys/dev/cxgbe/tom/t4_cpl_io.c Wed Jan 11 23:48:17 2017 (r311949)
@@ -306,7 +306,6 @@ make_established(struct toepcb *toep, ui
uint16_t tcpopt = be16toh(opt);
struct flowc_tx_params ftxp;
- CURVNET_SET(so->so_vnet);
INP_WLOCK_ASSERT(inp);
KASSERT(tp->t_state == TCPS_SYN_SENT ||
tp->t_state == TCPS_SYN_RECEIVED,
@@ -357,7 +356,6 @@ make_established(struct toepcb *toep, ui
send_flowc_wr(toep, &ftxp);
soisconnected(so);
- CURVNET_RESTORE();
}
static int
@@ -1146,6 +1144,7 @@ do_peer_close(struct sge_iq *iq, const s
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
+ CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1191,6 +1190,7 @@ do_peer_close(struct sge_iq *iq, const s
tcp_twstart(tp);
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
INP_WLOCK(inp);
final_cpl_received(toep);
@@ -1203,6 +1203,7 @@ do_peer_close(struct sge_iq *iq, const s
done:
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
return (0);
}
@@ -1229,6 +1230,7 @@ do_close_con_rpl(struct sge_iq *iq, cons
KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
+ CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = intotcpcb(inp);
@@ -1248,6 +1250,7 @@ do_close_con_rpl(struct sge_iq *iq, cons
release:
INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */
INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
INP_WLOCK(inp);
final_cpl_received(toep); /* no more CPLs expected */
@@ -1272,6 +1275,7 @@ release:
done:
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
return (0);
}
@@ -1345,6 +1349,7 @@ do_abort_req(struct sge_iq *iq, const st
}
inp = toep->inp;
+ CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */
INP_WLOCK(inp);
@@ -1380,6 +1385,7 @@ do_abort_req(struct sge_iq *iq, const st
final_cpl_received(toep);
done:
INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
return (0);
}
@@ -1501,18 +1507,21 @@ do_rx_data(struct sge_iq *iq, const stru
DDP_UNLOCK(toep);
INP_WUNLOCK(inp);
+ CURVNET_SET(toep->vnet);
INP_INFO_RLOCK(&V_tcbinfo);
INP_WLOCK(inp);
tp = tcp_drop(tp, ECONNRESET);
if (tp)
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
return (0);
}
/* receive buffer autosize */
- CURVNET_SET(so->so_vnet);
+ MPASS(toep->vnet == so->so_vnet);
+ CURVNET_SET(toep->vnet);
if (sb->sb_flags & SB_AUTOSIZE &&
V_tcp_do_autorcvbuf &&
sb->sb_hiwat < V_tcp_autorcvbuf_max &&
@@ -1713,10 +1722,12 @@ do_fw4_ack(struct sge_iq *iq, const stru
tid);
#endif
toep->flags &= ~TPF_TX_SUSPENDED;
+ CURVNET_SET(toep->vnet);
if (toep->ulp_mode == ULP_MODE_ISCSI)
t4_push_pdus(sc, toep, plen);
else
t4_push_frames(sc, toep, plen);
+ CURVNET_RESTORE();
} else if (plen > 0) {
struct sockbuf *sb = &so->so_snd;
int sbu;
@@ -2143,7 +2154,7 @@ t4_aiotx_task(void *context, int pending
struct socket *so = inp->inp_socket;
struct kaiocb *job;
- CURVNET_SET(so->so_vnet);
+ CURVNET_SET(toep->vnet);
SOCKBUF_LOCK(&so->so_snd);
while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) {
job = TAILQ_FIRST(&toep->aiotx_jobq);
Modified: head/sys/dev/cxgbe/tom/t4_ddp.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_ddp.c Wed Jan 11 23:32:40 2017 (r311948)
+++ head/sys/dev/cxgbe/tom/t4_ddp.c Wed Jan 11 23:48:17 2017 (r311949)
@@ -546,7 +546,8 @@ handle_ddp_data(struct toepcb *toep, __b
#endif
/* receive buffer autosize */
- CURVNET_SET(so->so_vnet);
+ MPASS(toep->vnet == so->so_vnet);
+ CURVNET_SET(toep->vnet);
SOCKBUF_LOCK(sb);
if (sb->sb_flags & SB_AUTOSIZE &&
V_tcp_do_autorcvbuf &&
Modified: head/sys/dev/cxgbe/tom/t4_listen.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_listen.c Wed Jan 11 23:32:40 2017 (r311948)
+++ head/sys/dev/cxgbe/tom/t4_listen.c Wed Jan 11 23:48:17 2017 (r311949)
@@ -222,6 +222,7 @@ alloc_lctx(struct adapter *sc, struct in
TAILQ_INIT(&lctx->synq);
lctx->inp = inp;
+ lctx->vnet = inp->inp_socket->so_vnet;
in_pcbref(inp);
return (lctx);
@@ -1200,6 +1201,8 @@ do_pass_accept_req(struct sge_iq *iq, co
pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))];
+ CURVNET_SET(lctx->vnet);
+
/*
* Use the MAC index to lookup the associated VI. If this SYN
* didn't match a perfect MAC filter, punt.
@@ -1274,6 +1277,13 @@ found:
ntids = 1;
}
+ /*
+ * Don't offload if the ifnet that the SYN came in on is not in the same
+ * vnet as the listening socket.
+ */
+ if (lctx->vnet != ifp->if_vnet)
+ REJECT_PASS_ACCEPT();
+
e = get_l2te_for_nexthop(pi, ifp, &inc);
if (e == NULL)
REJECT_PASS_ACCEPT();
@@ -1313,7 +1323,6 @@ found:
REJECT_PASS_ACCEPT();
}
so = inp->inp_socket;
- CURVNET_SET(so->so_vnet);
mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss));
rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
@@ -1360,7 +1369,6 @@ found:
*/
toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */
- CURVNET_RESTORE();
/*
* If we replied during syncache_add (synqe->wr has been consumed),
@@ -1415,10 +1423,12 @@ found:
return (__LINE__);
}
INP_WUNLOCK(inp);
+ CURVNET_RESTORE();
release_synqe(synqe); /* extra hold */
return (0);
reject:
+ CURVNET_RESTORE();
CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
reject_reason);
@@ -1490,6 +1500,7 @@ do_pass_establish(struct sge_iq *iq, con
KASSERT(synqe->flags & TPF_SYNQE,
("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
+ CURVNET_SET(lctx->vnet);
INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */
INP_WLOCK(inp);
@@ -1507,6 +1518,7 @@ do_pass_establish(struct sge_iq *iq, con
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
return (0);
}
@@ -1532,6 +1544,7 @@ reset:
send_reset_synqe(TOEDEV(ifp), synqe);
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
return (0);
}
toep->tid = tid;
@@ -1568,6 +1581,8 @@ reset:
/* New connection inpcb is already locked by syncache_expand(). */
new_inp = sotoinpcb(so);
INP_WLOCK_ASSERT(new_inp);
+ MPASS(so->so_vnet == lctx->vnet);
+ toep->vnet = lctx->vnet;
/*
* This is for the unlikely case where the syncache entry that we added
@@ -1591,6 +1606,7 @@ reset:
if (inp != NULL)
INP_WUNLOCK(inp);
INP_INFO_RUNLOCK(&V_tcbinfo);
+ CURVNET_RESTORE();
release_synqe(synqe);
return (0);
Modified: head/sys/dev/cxgbe/tom/t4_tom.c
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_tom.c Wed Jan 11 23:32:40 2017 (r311948)
+++ head/sys/dev/cxgbe/tom/t4_tom.c Wed Jan 11 23:48:17 2017 (r311949)
@@ -799,74 +799,96 @@ update_clip_table(struct adapter *sc, st
struct in6_addr *lip, tlip;
struct clip_head stale;
struct clip_entry *ce, *ce_temp;
- int rc, gen = atomic_load_acq_int(&in6_ifaddr_gen);
+ struct vi_info *vi;
+ int rc, gen, i, j;
+ uintptr_t last_vnet;
ASSERT_SYNCHRONIZED_OP(sc);
IN6_IFADDR_RLOCK(&in6_ifa_tracker);
mtx_lock(&td->clip_table_lock);
+ gen = atomic_load_acq_int(&in6_ifaddr_gen);
if (gen == td->clip_gen)
goto done;
TAILQ_INIT(&stale);
TAILQ_CONCAT(&stale, &td->clip_table, link);
- TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
- lip = &ia->ia_addr.sin6_addr;
+ /*
+ * last_vnet optimizes the common cases where all if_vnet = NULL (no
+ * VIMAGE) or all if_vnet = vnet0.
+ */
+ last_vnet = (uintptr_t)(-1);
+ for_each_port(sc, i)
+ for_each_vi(sc->port[i], j, vi) {
+ if (last_vnet == (uintptr_t)vi->ifp->if_vnet)
+ continue;
- KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
- ("%s: mcast address in in6_ifaddr list", __func__));
+ /* XXX: races with if_vmove */
+ CURVNET_SET(vi->ifp->if_vnet);
+ TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) {
+ lip = &ia->ia_addr.sin6_addr;
+
+ KASSERT(!IN6_IS_ADDR_MULTICAST(lip),
+ ("%s: mcast address in in6_ifaddr list", __func__));
+
+ if (IN6_IS_ADDR_LOOPBACK(lip))
+ continue;
+ if (IN6_IS_SCOPE_EMBED(lip)) {
+ /* Remove the embedded scope */
+ tlip = *lip;
+ lip = &tlip;
+ in6_clearscope(lip);
+ }
+ /*
+ * XXX: how to weed out the link local address for the
+ * loopback interface? It's fe80::1 usually (always?).
+ */
+
+ /*
+ * If it's in the main list then we already know it's
+ * not stale.
+ */
+ TAILQ_FOREACH(ce, &td->clip_table, link) {
+ if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
+ goto next;
+ }
- if (IN6_IS_ADDR_LOOPBACK(lip))
- continue;
- if (IN6_IS_SCOPE_EMBED(lip)) {
- /* Remove the embedded scope */
- tlip = *lip;
- lip = &tlip;
- in6_clearscope(lip);
- }
- /*
- * XXX: how to weed out the link local address for the loopback
- * interface? It's fe80::1 usually (always?).
- */
-
- /*
- * If it's in the main list then we already know it's not stale.
- */
- TAILQ_FOREACH(ce, &td->clip_table, link) {
- if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip))
- goto next;
- }
+ /*
+ * If it's in the stale list we should move it to the
+ * main list.
+ */
+ TAILQ_FOREACH(ce, &stale, link) {
+ if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) {
+ TAILQ_REMOVE(&stale, ce, link);
+ TAILQ_INSERT_TAIL(&td->clip_table, ce,
+ link);
+ goto next;
+ }
+ }
- /*
- * If it's in the stale list we should move it to the main list.
- */
- TAILQ_FOREACH(ce, &stale, link) {
- if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) {
- TAILQ_REMOVE(&stale, ce, link);
+ /* A new IP6 address; add it to the CLIP table */
+ ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
+ memcpy(&ce->lip, lip, sizeof(ce->lip));
+ ce->refcount = 0;
+ rc = add_lip(sc, lip);
+ if (rc == 0)
TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
- goto next;
- }
- }
+ else {
+ char ip[INET6_ADDRSTRLEN];
- /* A new IP6 address; add it to the CLIP table */
- ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT);
- memcpy(&ce->lip, lip, sizeof(ce->lip));
- ce->refcount = 0;
- rc = add_lip(sc, lip);
- if (rc == 0)
- TAILQ_INSERT_TAIL(&td->clip_table, ce, link);
- else {
- char ip[INET6_ADDRSTRLEN];
-
- inet_ntop(AF_INET6, &ce->lip, &ip[0], sizeof(ip));
- log(LOG_ERR, "%s: could not add %s (%d)\n",
- __func__, ip, rc);
- free(ce, M_CXGBE);
- }
+ inet_ntop(AF_INET6, &ce->lip, &ip[0],
+ sizeof(ip));
+ log(LOG_ERR, "%s: could not add %s (%d)\n",
+ __func__, ip, rc);
+ free(ce, M_CXGBE);
+ }
next:
- continue;
+ continue;
+ }
+ CURVNET_RESTORE();
+ last_vnet = (uintptr_t)vi->ifp->if_vnet;
}
/*
Modified: head/sys/dev/cxgbe/tom/t4_tom.h
==============================================================================
--- head/sys/dev/cxgbe/tom/t4_tom.h Wed Jan 11 23:32:40 2017 (r311948)
+++ head/sys/dev/cxgbe/tom/t4_tom.h Wed Jan 11 23:48:17 2017 (r311949)
@@ -141,6 +141,7 @@ struct toepcb {
int refcount;
struct tom_data *td;
struct inpcb *inp; /* backpointer to host stack's PCB */
+ struct vnet *vnet;
struct vi_info *vi; /* virtual interface */
struct sge_wrq *ofld_txq;
struct sge_ofld_rxq *ofld_rxq;
@@ -232,6 +233,7 @@ struct listen_ctx {
struct stid_region stid_region;
int flags;
struct inpcb *inp; /* listening socket's inp */
+ struct vnet *vnet;
struct sge_wrq *ctrlq;
struct sge_ofld_rxq *ofld_rxq;
struct clip_entry *ce;
More information about the svn-src-all
mailing list