git: 67360f7bb0bb - main - cxgbei: Support iSCSI offload on T6.
John Baldwin
jhb at FreeBSD.org
Sat May 29 00:02:24 UTC 2021
The branch main has been updated by jhb:
URL: https://cgit.FreeBSD.org/src/commit/?id=67360f7bb0bb575d823c21420abaf165ecf62066
commit 67360f7bb0bb575d823c21420abaf165ecf62066
Author: John Baldwin <jhb at FreeBSD.org>
AuthorDate: 2021-05-28 23:45:29 +0000
Commit: John Baldwin <jhb at FreeBSD.org>
CommitDate: 2021-05-28 23:45:29 +0000
cxgbei: Support iSCSI offload on T6.
T6 makes several changes relative to T5 for receive of iSCSI PDUs.
First, earlier adapters issue either 2 or 3 messages to the host for
each PDU received: CPL_ISCSI_HDR contains the BHS of the PDU,
CPL_ISCSI_DATA (when DDP is not used for zero-copy receive) contains
the PDU data as buffers on the freelist, and CPL_RX_ISCSI_DDP with
status of the PDU such as result of CRC checks. In T6, a new
CPL_RX_ISCSI_CMP combines CPL_ISCSI_HDR and CPL_RX_ISCSI_DDP. Data
PDUs which are directly placed via DDP only report a single
CPL_RX_ISCSI_CMP message. Data PDUs received on the free lists are
reported as CPL_ISCSI_DATA followed by CPL_RX_ISCSI_CMP. Control PDUs
such as R2T are still reported via CPL_ISCSI_HDR and CPL_RX_ISCSI_DDP.
Supporting this requires changing the CPL_ISCSI_DATA handler to
allocate a PDU structure if it is not preceded by a CPL_ISCSI_HDR as
well as support for the new CPL_RX_ISCSI_CMP.
Second, when using DDP for zero-copy receive, T6 will only issue a
CPL_RX_ISCSI_CMP after a burst of PDUs have been received (indicated
by the F flag in the BHS). In this case, the CPL_RX_ISCSI_CMP can
reflect the completion of multiple PDUs and the BHS and TCP sequence
number included in the message are from the last PDU received in the
burst. Notably, the message does not include any information about
earlier PDUs received as part of the burst. Instead, the driver must
track the amount of data already received for a given transfer and use
this to compute the amount of data received in a burst. In addition,
the iSCSI layer currently has no way to permit receiving a logical PDU
which spans multiple PDUs. Instead, the driver presents each burst as
a single, "large" PDU to the iSCSI target and initiators. This is
done by rewriting the buffer offset and data length fields in the BHS
of the final PDU as well as rewriting the DataSN so that the received
PDUs appear to be in order.
To track all this, cxgbei maintains a hash table of 'cxgbei_cmp'
structures indexed by transfer tags for each offloaded iSCSI
connection. When a SCSI_DATA_IN message is received, the ITT from the
received BHS is used to find the necessary state in the hash table,
whereas SCSI_DATA_OUT replies use the TTT as the key. The structure
tracks the expected starting offset and DataSN of the next burst as
well as the rewritten DataSN value used for the previously received
PDU.
Discussed with: np
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D30458
---
sys/dev/cxgbe/cxgbei/cxgbei.c | 280 ++++++++++++++++++++++++++++++++++++--
sys/dev/cxgbe/cxgbei/cxgbei.h | 16 +++
sys/dev/cxgbe/cxgbei/icl_cxgbei.c | 154 +++++++++++++++------
3 files changed, 395 insertions(+), 55 deletions(-)
diff --git a/sys/dev/cxgbe/cxgbei/cxgbei.c b/sys/dev/cxgbe/cxgbei/cxgbei.c
index f95c9f60163f..c70bda7e0436 100644
--- a/sys/dev/cxgbe/cxgbei/cxgbei.c
+++ b/sys/dev/cxgbe/cxgbei/cxgbei.c
@@ -222,27 +222,47 @@ do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m
u_int tid = GET_TID(cpl);
struct toepcb *toep = lookup_tid(sc, tid);
struct icl_cxgbei_pdu *icp = toep->ulpcb2;
+ struct icl_pdu *ip;
M_ASSERTPKTHDR(m);
MPASS(m->m_pkthdr.len == be16toh(cpl->len) + sizeof(*cpl));
- /* Must already have received the header (but not the data). */
- MPASS(icp != NULL);
- MPASS(icp->icp_flags == ICPF_RX_HDR);
- MPASS(icp->ip.ip_data_mbuf == NULL);
-
+ if (icp == NULL) {
+ /*
+ * T6 completion enabled, start of a new pdu. Header
+ * will come in completion CPL.
+ */
+ ip = icl_cxgbei_new_pdu(M_NOWAIT);
+ if (ip == NULL)
+ CXGBE_UNIMPLEMENTED("PDU allocation failure");
+ icp = ip_to_icp(ip);
+ } else {
+ /* T5 mode, header is already received. */
+ MPASS(icp->icp_flags == ICPF_RX_HDR);
+ MPASS(icp->ip.ip_data_mbuf == NULL);
+ MPASS(icp->ip.ip_data_len == m->m_pkthdr.len - sizeof(*cpl));
+ }
+ /* Trim the cpl header from mbuf. */
m_adj(m, sizeof(*cpl));
- MPASS(icp->ip.ip_data_len == m->m_pkthdr.len);
icp->icp_flags |= ICPF_RX_FLBUF;
icp->ip.ip_data_mbuf = m;
toep->ofld_rxq->rx_iscsi_fl_pdus++;
toep->ofld_rxq->rx_iscsi_fl_octets += m->m_pkthdr.len;
+ /*
+ * For T6, save the icp for further processing in the
+ * completion handler.
+ */
+ if (icp->icp_flags == ICPF_RX_FLBUF) {
+ MPASS(toep->ulpcb2 == NULL);
+ toep->ulpcb2 = icp;
+ }
+
#if 0
- CTR3(KTR_CXGBE, "%s: tid %u, cpl->len %u", __func__, tid,
- be16toh(cpl->len));
+ CTR4(KTR_CXGBE, "%s: tid %u, cpl->len %u, icp %p", __func__, tid,
+ be16toh(cpl->len), icp);
#endif
return (0);
@@ -304,15 +324,17 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
__func__, tid, pdu_len, inp->inp_flags);
INP_WUNLOCK(inp);
icl_cxgbei_conn_pdu_free(NULL, ip);
-#ifdef INVARIANTS
toep->ulpcb2 = NULL;
-#endif
return (0);
}
+ /*
+ * T6+ does not report data PDUs received via DDP without F
+ * set. This can result in gaps in the TCP sequence space.
+ */
tp = intotcpcb(inp);
- MPASS(icp->icp_seq == tp->rcv_nxt);
- tp->rcv_nxt += pdu_len;
+ MPASS(chip_id(sc) >= CHELSIO_T6 || icp->icp_seq == tp->rcv_nxt);
+ tp->rcv_nxt = icp->icp_seq + pdu_len;
tp->t_rcvtime = ticks;
/*
@@ -342,9 +364,7 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
CURVNET_RESTORE();
icl_cxgbei_conn_pdu_free(NULL, ip);
-#ifdef INVARIANTS
toep->ulpcb2 = NULL;
-#endif
return (0);
}
MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
@@ -399,10 +419,238 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
SOCKBUF_UNLOCK(sb);
INP_WUNLOCK(inp);
-#ifdef INVARIANTS
toep->ulpcb2 = NULL;
+
+ return (0);
+}
+
+static int
+do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+{
+ struct epoch_tracker et;
+ struct adapter *sc = iq->adapter;
+ struct cpl_rx_iscsi_cmp *cpl = mtod(m, struct cpl_rx_iscsi_cmp *);
+ u_int tid = GET_TID(cpl);
+ struct toepcb *toep = lookup_tid(sc, tid);
+ struct icl_cxgbei_pdu *icp = toep->ulpcb2;
+ struct icl_pdu *ip;
+ struct cxgbei_cmp *cmp;
+ struct inpcb *inp = toep->inp;
+#ifdef INVARIANTS
+ uint16_t len = be16toh(cpl->len);
+#endif
+ struct socket *so;
+ struct sockbuf *sb;
+ struct tcpcb *tp;
+ struct icl_cxgbei_conn *icc;
+ struct icl_conn *ic;
+ struct iscsi_bhs_data_out *bhsdo;
+ u_int val = be32toh(cpl->ddpvld);
+ u_int npdus, pdu_len, data_digest_len, hdr_digest_len;
+ uint32_t prev_seg_len;
+
+ M_ASSERTPKTHDR(m);
+ MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
+
+ if ((val & F_DDP_PDU) == 0) {
+ MPASS(icp != NULL);
+ MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
+ ip = &icp->ip;
+ }
+
+ if (icp == NULL) {
+ /* T6 completion enabled, start of a new PDU. */
+ ip = icl_cxgbei_new_pdu(M_NOWAIT);
+ if (ip == NULL)
+ CXGBE_UNIMPLEMENTED("PDU allocation failure");
+ icp = ip_to_icp(ip);
+ }
+ pdu_len = G_ISCSI_PDU_LEN(be16toh(cpl->pdu_len_ddp));
+
+#if 0
+ CTR5(KTR_CXGBE,
+ "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp %p",
+ __func__, tid, pdu_len, val, icp);
#endif
+ /* Copy header */
+ m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
+ bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
+ ip->ip_data_len = bhsdo->bhsdo_data_segment_len[0] << 16 |
+ bhsdo->bhsdo_data_segment_len[1] << 8 |
+ bhsdo->bhsdo_data_segment_len[2];
+ icp->icp_seq = ntohl(cpl->seq);
+ icp->icp_flags |= ICPF_RX_HDR;
+ icp->icp_flags |= ICPF_RX_STATUS;
+
+ if (val & F_DDP_PADDING_ERR)
+ icp->icp_flags |= ICPF_PAD_ERR;
+ if (val & F_DDP_HDRCRC_ERR)
+ icp->icp_flags |= ICPF_HCRC_ERR;
+ if (val & F_DDP_DATACRC_ERR)
+ icp->icp_flags |= ICPF_DCRC_ERR;
+
+ INP_WLOCK(inp);
+ if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) {
+ CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
+ __func__, tid, pdu_len, inp->inp_flags);
+ INP_WUNLOCK(inp);
+ icl_cxgbei_conn_pdu_free(NULL, ip);
+ toep->ulpcb2 = NULL;
+ m_freem(m);
+ return (0);
+ }
+
+ tp = intotcpcb(inp);
+
+ /*
+ * If icc is NULL, the connection is being closed in
+ * icl_cxgbei_conn_close(), just drop this data.
+ */
+ icc = toep->ulpcb;
+ if (__predict_false(icc == NULL)) {
+ CTR4(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes), icc %p",
+ __func__, tid, pdu_len, icc);
+
+ /*
+ * Update rcv_nxt so the sequence number of the FIN
+ * doesn't appear wrong.
+ */
+ tp->rcv_nxt = icp->icp_seq + pdu_len;
+ tp->t_rcvtime = ticks;
+ INP_WUNLOCK(inp);
+
+ icl_cxgbei_conn_pdu_free(NULL, ip);
+ toep->ulpcb2 = NULL;
+ m_freem(m);
+ return (0);
+ }
+
+ data_digest_len = (icc->ulp_submode & ULP_CRC_DATA) ?
+ ISCSI_DATA_DIGEST_SIZE : 0;
+ hdr_digest_len = (icc->ulp_submode & ULP_CRC_HEADER) ?
+ ISCSI_HEADER_DIGEST_SIZE : 0;
+ MPASS(roundup2(ip->ip_data_len, 4) == pdu_len - len - data_digest_len);
+
+ if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
+ MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
+ MPASS(ip->ip_data_len > 0);
+ icp->icp_flags |= ICPF_RX_DDP;
+ bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
+
+ switch (ip->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) {
+ case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
+ cmp = cxgbei_find_cmp(icc,
+ be32toh(bhsdo->bhsdo_initiator_task_tag));
+ break;
+ case ISCSI_BHS_OPCODE_SCSI_DATA_OUT:
+ cmp = cxgbei_find_cmp(icc,
+ be32toh(bhsdo->bhsdo_target_transfer_tag));
+ break;
+ default:
+ __assert_unreachable();
+ }
+ MPASS(cmp != NULL);
+
+ /* Must be the final PDU. */
+ MPASS(bhsdo->bhsdo_flags & BHSDO_FLAGS_F);
+
+ /*
+ * The difference between the end of the last burst
+ * and the offset of the last PDU in this burst is
+ * the additional data received via DDP.
+ */
+ prev_seg_len = be32toh(bhsdo->bhsdo_buffer_offset) -
+ cmp->next_buffer_offset;
+
+ if (prev_seg_len != 0) {
+ /*
+ * Since cfiscsi doesn't know about previous
+ * headers, pretend that the entire r2t data
+ * length was received in this single segment.
+ */
+ ip->ip_data_len += prev_seg_len;
+ bhsdo->bhsdo_data_segment_len[2] = ip->ip_data_len;
+ bhsdo->bhsdo_data_segment_len[1] = ip->ip_data_len >> 8;
+ bhsdo->bhsdo_data_segment_len[0] = ip->ip_data_len >> 16;
+ bhsdo->bhsdo_buffer_offset =
+ htobe32(cmp->next_buffer_offset);
+
+ npdus = htobe32(bhsdo->bhsdo_datasn) - cmp->last_datasn;
+ } else {
+ MPASS(htobe32(bhsdo->bhsdo_datasn) ==
+ cmp->last_datasn + 1);
+ npdus = 1;
+ }
+
+ cmp->next_buffer_offset += ip->ip_data_len;
+ cmp->last_datasn = htobe32(bhsdo->bhsdo_datasn);
+ bhsdo->bhsdo_datasn = htobe32(cmp->next_datasn);
+ cmp->next_datasn++;
+ toep->ofld_rxq->rx_iscsi_ddp_pdus += npdus;
+ toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
+ } else {
+ MPASS(icp->icp_flags & (ICPF_RX_FLBUF));
+ MPASS(ip->ip_data_len == ip->ip_data_mbuf->m_pkthdr.len);
+ MPASS(icp->icp_seq == tp->rcv_nxt);
+ }
+
+ tp->rcv_nxt = icp->icp_seq + pdu_len;
+ tp->t_rcvtime = ticks;
+
+ /*
+ * Don't update the window size or return credits since RX
+ * flow control is disabled.
+ */
+
+ so = inp->inp_socket;
+ sb = &so->so_rcv;
+ SOCKBUF_LOCK(sb);
+ if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
+ CTR5(KTR_CXGBE,
+ "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
+ __func__, tid, pdu_len, icc, sb->sb_state);
+ SOCKBUF_UNLOCK(sb);
+ INP_WUNLOCK(inp);
+
+ CURVNET_SET(so->so_vnet);
+ NET_EPOCH_ENTER(et);
+ INP_WLOCK(inp);
+ tp = tcp_drop(tp, ECONNRESET);
+ if (tp != NULL)
+ INP_WUNLOCK(inp);
+ NET_EPOCH_EXIT(et);
+ CURVNET_RESTORE();
+
+ icl_cxgbei_conn_pdu_free(NULL, ip);
+ toep->ulpcb2 = NULL;
+ m_freem(m);
+ return (0);
+ }
+ MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
+ ic = &icc->ic;
+ icl_cxgbei_new_pdu_set_conn(ip, ic);
+
+ /* Enqueue the PDU to the received pdus queue. */
+ STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
+ if ((icc->rx_flags & RXF_ACTIVE) == 0) {
+ struct cxgbei_worker_thread_softc *cwt = &cwt_softc[icc->cwt];
+
+ mtx_lock(&cwt->cwt_lock);
+ icc->rx_flags |= RXF_ACTIVE;
+ TAILQ_INSERT_TAIL(&cwt->rx_head, icc, rx_link);
+ if (cwt->cwt_state == CWT_SLEEPING) {
+ cwt->cwt_state = CWT_RUNNING;
+ cv_signal(&cwt->cwt_cv);
+ }
+ mtx_unlock(&cwt->cwt_lock);
+ }
+ SOCKBUF_UNLOCK(sb);
+ INP_WUNLOCK(inp);
+
+ toep->ulpcb2 = NULL;
+ m_freem(m);
+
return (0);
}
@@ -669,6 +917,7 @@ cxgbei_mod_load(void)
t4_register_cpl_handler(CPL_ISCSI_HDR, do_rx_iscsi_hdr);
t4_register_cpl_handler(CPL_ISCSI_DATA, do_rx_iscsi_data);
t4_register_cpl_handler(CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp);
+ t4_register_cpl_handler(CPL_RX_ISCSI_CMP, do_rx_iscsi_cmp);
rc = start_worker_threads();
if (rc != 0)
@@ -699,6 +948,7 @@ cxgbei_mod_unload(void)
t4_register_cpl_handler(CPL_ISCSI_HDR, NULL);
t4_register_cpl_handler(CPL_ISCSI_DATA, NULL);
t4_register_cpl_handler(CPL_RX_ISCSI_DDP, NULL);
+ t4_register_cpl_handler(CPL_RX_ISCSI_CMP, NULL);
return (0);
}
diff --git a/sys/dev/cxgbe/cxgbei/cxgbei.h b/sys/dev/cxgbe/cxgbei/cxgbei.h
index 9941e817b9cb..45d3398d545c 100644
--- a/sys/dev/cxgbe/cxgbei/cxgbei.h
+++ b/sys/dev/cxgbe/cxgbei/cxgbei.h
@@ -53,6 +53,17 @@ enum {
RXF_ACTIVE = 1 << 0, /* In the worker thread's queue */
};
+struct cxgbei_cmp {
+ LIST_ENTRY(cxgbei_cmp) link;
+
+ uint32_t tt; /* Transfer tag. */
+
+ uint32_t next_datasn;
+ uint32_t next_buffer_offset;
+ uint32_t last_datasn;
+};
+LIST_HEAD(cxgbei_cmp_head, cxgbei_cmp);
+
struct icl_cxgbei_conn {
struct icl_conn ic;
@@ -67,6 +78,10 @@ struct icl_cxgbei_conn {
u_int cwt;
STAILQ_HEAD(, icl_pdu) rcvd_pdus; /* protected by so_rcv lock */
TAILQ_ENTRY(icl_cxgbei_conn) rx_link; /* protected by cwt lock */
+
+ struct cxgbei_cmp_head *cmp_table; /* protected by cmp_lock */
+ struct mtx cmp_lock;
+ unsigned long cmp_hash_mask;
};
static inline struct icl_cxgbei_conn *
@@ -128,5 +143,6 @@ int icl_cxgbei_mod_unload(void);
struct icl_pdu *icl_cxgbei_new_pdu(int);
void icl_cxgbei_new_pdu_set_conn(struct icl_pdu *, struct icl_conn *);
void icl_cxgbei_conn_pdu_free(struct icl_conn *, struct icl_pdu *);
+struct cxgbei_cmp *cxgbei_find_cmp(struct icl_cxgbei_conn *, uint32_t);
#endif
diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
index 17d5685f1c1a..b9f7c6355b6f 100644
--- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
+++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
@@ -60,7 +60,6 @@ __FBSDID("$FreeBSD$");
#include <sys/sx.h>
#include <sys/uio.h>
#include <machine/bus.h>
-#include <vm/uma.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <netinet/in.h>
@@ -100,6 +99,16 @@ __FBSDID("$FreeBSD$");
#include "tom/t4_tom.h"
#include "cxgbei.h"
+/*
+ * Use the page pod tag for the TT hash.
+ */
+#define TT_HASH(icc, tt) (G_PPOD_TAG(tt) & (icc)->cmp_hash_mask)
+
+struct cxgbei_ddp_state {
+ struct ppod_reservation prsv;
+ struct cxgbei_cmp cmp;
+};
+
static MALLOC_DEFINE(M_CXGBEI, "cxgbei", "cxgbei(4)");
SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
@@ -117,7 +126,6 @@ static int recvspace = 1048576;
SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN,
&recvspace, 0, "Default receive socket buffer size");
-static uma_zone_t prsv_zone;
static volatile u_int icl_cxgbei_ncons;
#define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock)
@@ -555,6 +563,9 @@ icl_cxgbei_new_conn(const char *name, struct mtx *lock)
icc->icc_signature = CXGBEI_CONN_SIGNATURE;
STAILQ_INIT(&icc->rcvd_pdus);
+ icc->cmp_table = hashinit(64, M_CXGBEI, &icc->cmp_hash_mask);
+ mtx_init(&icc->cmp_lock, "cxgbei_cmp", NULL, MTX_DEF);
+
ic = &icc->ic;
ic->ic_lock = lock;
@@ -586,6 +597,8 @@ icl_cxgbei_conn_free(struct icl_conn *ic)
cv_destroy(&ic->ic_send_cv);
cv_destroy(&ic->ic_receive_cv);
+ mtx_destroy(&icc->cmp_lock);
+ hashdestroy(icc->cmp_table, M_CXGBEI, icc->cmp_hash_mask);
kobj_delete((struct kobj *)icc, M_CXGBE);
refcount_release(&icl_cxgbei_ncons);
}
@@ -904,6 +917,61 @@ icl_cxgbei_conn_close(struct icl_conn *ic)
soclose(so);
}
+static void
+cxgbei_insert_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp,
+ uint32_t tt)
+{
+#ifdef INVARIANTS
+ struct cxgbei_cmp *cmp2;
+#endif
+
+ cmp->tt = tt;
+
+ mtx_lock(&icc->cmp_lock);
+#ifdef INVARIANTS
+ LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, tt)], link) {
+ KASSERT(cmp2->tt != tt, ("%s: duplicate cmp", __func__));
+ }
+#endif
+ LIST_INSERT_HEAD(&icc->cmp_table[TT_HASH(icc, tt)], cmp, link);
+ mtx_unlock(&icc->cmp_lock);
+}
+
+struct cxgbei_cmp *
+cxgbei_find_cmp(struct icl_cxgbei_conn *icc, uint32_t tt)
+{
+ struct cxgbei_cmp *cmp;
+
+ mtx_lock(&icc->cmp_lock);
+ LIST_FOREACH(cmp, &icc->cmp_table[TT_HASH(icc, tt)], link) {
+ if (cmp->tt == tt)
+ break;
+ }
+ mtx_unlock(&icc->cmp_lock);
+ return (cmp);
+}
+
+static void
+cxgbei_rm_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp)
+{
+#ifdef INVARIANTS
+ struct cxgbei_cmp *cmp2;
+#endif
+
+ mtx_lock(&icc->cmp_lock);
+
+#ifdef INVARIANTS
+ LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, cmp->tt)], link) {
+ if (cmp2 == cmp)
+ goto found;
+ }
+ panic("%s: could not find cmp", __func__);
+found:
+#endif
+ LIST_REMOVE(cmp, link);
+ mtx_unlock(&icc->cmp_lock);
+}
+
int
icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
struct ccb_scsiio *csio, uint32_t *ittp, void **arg)
@@ -913,6 +981,7 @@ icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
struct adapter *sc = icc->sc;
struct cxgbei_data *ci = sc->iscsi_ulp_softc;
struct ppod_region *pr = &ci->pr;
+ struct cxgbei_ddp_state *ddp;
struct ppod_reservation *prsv;
uint32_t itt;
int rc = 0;
@@ -943,30 +1012,32 @@ no_ddp:
* Reserve resources for DDP, update the itt that should be used in the
* PDU, and save DDP specific state for this I/O in *arg.
*/
-
- prsv = uma_zalloc(prsv_zone, M_NOWAIT);
- if (prsv == NULL) {
+ ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO);
+ if (ddp == NULL) {
rc = ENOMEM;
goto no_ddp;
}
+ prsv = &ddp->prsv;
/* XXX add support for all CAM_DATA_ types */
MPASS((csio->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_VADDR);
rc = t4_alloc_page_pods_for_buf(pr, (vm_offset_t)csio->data_ptr,
csio->dxfer_len, prsv);
if (rc != 0) {
- uma_zfree(prsv_zone, prsv);
+ free(ddp, M_CXGBEI);
goto no_ddp;
}
rc = t4_write_page_pods_for_buf(sc, toep, prsv,
(vm_offset_t)csio->data_ptr, csio->dxfer_len);
- if (rc != 0) {
+ if (__predict_false(rc != 0)) {
t4_free_page_pods(prsv);
- uma_zfree(prsv_zone, prsv);
+ free(ddp, M_CXGBEI);
goto no_ddp;
}
+ ddp->cmp.last_datasn = -1;
+ cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag);
*ittp = htobe32(prsv->prsv_tag);
*arg = prsv;
counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1);
@@ -978,10 +1049,11 @@ icl_cxgbei_conn_task_done(struct icl_conn *ic, void *arg)
{
if (arg != NULL) {
- struct ppod_reservation *prsv = arg;
+ struct cxgbei_ddp_state *ddp = arg;
- t4_free_page_pods(prsv);
- uma_zfree(prsv_zone, prsv);
+ cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp);
+ t4_free_page_pods(&ddp->prsv);
+ free(ddp, M_CXGBEI);
}
}
@@ -1009,7 +1081,7 @@ ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen)
/* XXXNP: PDU should be passed in as parameter, like on the initiator. */
#define io_to_request_pdu(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr)
-#define io_to_ppod_reservation(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr)
+#define io_to_ddp_state(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr)
int
icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
@@ -1021,6 +1093,7 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
struct adapter *sc = icc->sc;
struct cxgbei_data *ci = sc->iscsi_ulp_softc;
struct ppod_region *pr = &ci->pr;
+ struct cxgbei_ddp_state *ddp;
struct ppod_reservation *prsv;
struct ctl_sg_entry *sgl, sg_entry;
int sg_entries = ctsio->kern_sg_entries;
@@ -1064,7 +1137,7 @@ no_ddp:
ttt = *tttp & M_PPOD_TAG;
ttt = V_PPOD_TAG(ttt) | pr->pr_invalid_bit;
*tttp = htobe32(ttt);
- MPASS(io_to_ppod_reservation(io) == NULL);
+ MPASS(io_to_ddp_state(io) == NULL);
if (rc != 0)
counter_u64_add(
toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1);
@@ -1086,17 +1159,17 @@ no_ddp:
* Reserve resources for DDP, update the ttt that should be used
* in the PDU, and save DDP specific state for this I/O.
*/
-
- MPASS(io_to_ppod_reservation(io) == NULL);
- prsv = uma_zalloc(prsv_zone, M_NOWAIT);
- if (prsv == NULL) {
+ MPASS(io_to_ddp_state(io) == NULL);
+ ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO);
+ if (ddp == NULL) {
rc = ENOMEM;
goto no_ddp;
}
+ prsv = &ddp->prsv;
rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv);
if (rc != 0) {
- uma_zfree(prsv_zone, prsv);
+ free(ddp, M_CXGBEI);
goto no_ddp;
}
@@ -1104,12 +1177,16 @@ no_ddp:
xferlen);
if (__predict_false(rc != 0)) {
t4_free_page_pods(prsv);
- uma_zfree(prsv_zone, prsv);
+ free(ddp, M_CXGBEI);
goto no_ddp;
}
+ ddp->cmp.next_buffer_offset = ctsio->kern_rel_offset +
+ first_burst;
+ ddp->cmp.last_datasn = -1;
+ cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag);
*tttp = htobe32(prsv->prsv_tag);
- io_to_ppod_reservation(io) = prsv;
+ io_to_ddp_state(io) = ddp;
*arg = ctsio;
counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1);
return (0);
@@ -1119,16 +1196,19 @@ no_ddp:
* In the middle of an I/O. A non-NULL page pod reservation indicates
* that a DDP buffer is being used for the I/O.
*/
-
- prsv = io_to_ppod_reservation(ctsio);
- if (prsv == NULL)
+ ddp = io_to_ddp_state(ctsio);
+ if (ddp == NULL)
goto no_ddp;
+ prsv = &ddp->prsv;
alias = (prsv->prsv_tag & pr->pr_alias_mask) >> pr->pr_alias_shift;
alias++;
prsv->prsv_tag &= ~pr->pr_alias_mask;
prsv->prsv_tag |= alias << pr->pr_alias_shift & pr->pr_alias_mask;
+ ddp->cmp.next_datasn = 0;
+ ddp->cmp.last_datasn = -1;
+ cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag);
*tttp = htobe32(prsv->prsv_tag);
*arg = ctsio;
@@ -1140,16 +1220,19 @@ icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *arg)
{
struct ctl_scsiio *ctsio = arg;
- if (ctsio != NULL && (ctsio->kern_data_len == ctsio->ext_data_filled ||
- ic->ic_disconnecting)) {
- struct ppod_reservation *prsv;
+ if (ctsio != NULL) {
+ struct cxgbei_ddp_state *ddp;
- prsv = io_to_ppod_reservation(ctsio);
- MPASS(prsv != NULL);
+ ddp = io_to_ddp_state(ctsio);
+ MPASS(ddp != NULL);
- t4_free_page_pods(prsv);
- uma_zfree(prsv_zone, prsv);
- io_to_ppod_reservation(ctsio) = NULL;
+ cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp);
+ if (ctsio->kern_data_len == ctsio->ext_data_filled ||
+ ic->ic_disconnecting) {
+ t4_free_page_pods(&ddp->prsv);
+ free(ddp, M_CXGBEI);
+ io_to_ddp_state(ctsio) = NULL;
+ }
}
}
@@ -1208,13 +1291,6 @@ icl_cxgbei_mod_load(void)
{
int rc;
- /*
- * Space to track pagepod reservations.
- */
- prsv_zone = uma_zcreate("Pagepod reservations",
- sizeof(struct ppod_reservation), NULL, NULL, NULL, NULL,
- UMA_ALIGN_CACHE, 0);
-
refcount_init(&icl_cxgbei_ncons, 0);
rc = icl_register("cxgbei", false, -100, icl_cxgbei_limits,
@@ -1232,8 +1308,6 @@ icl_cxgbei_mod_unload(void)
icl_unregister("cxgbei", false);
- uma_zdestroy(prsv_zone);
-
return (0);
}
#endif
More information about the dev-commits-src-main
mailing list