git: 46bee8043ee2 - main - cxgbei: Support DDP for target I/O S/G lists with more than one entry.
John Baldwin
jhb at FreeBSD.org
Fri May 14 19:21:55 UTC 2021
The branch main has been updated by jhb:
URL: https://cgit.FreeBSD.org/src/commit/?id=46bee8043ee2bd352d420cd573e0364ca45f813e
commit 46bee8043ee2bd352d420cd573e0364ca45f813e
Author: John Baldwin <jhb at FreeBSD.org>
AuthorDate: 2021-05-14 19:17:06 +0000
Commit: John Baldwin <jhb at FreeBSD.org>
CommitDate: 2021-05-14 19:17:06 +0000
cxgbei: Support DDP for target I/O S/G lists with more than one entry.
A CAM target layer I/O CCB can use a S/G list of virtual address ranges
to describe its data buffer. This change adds zero-copy receive support
for such requests.
Sponsored by: Chelsio Communications
Differential Revision: https://reviews.freebsd.org/D29908
---
sys/dev/cxgbe/cxgbei/icl_cxgbei.c | 50 +++++++---
sys/dev/cxgbe/tom/t4_ddp.c | 191 ++++++++++++++++++++++++++++++++++++++
sys/dev/cxgbe/tom/t4_tom.h | 5 +
3 files changed, 232 insertions(+), 14 deletions(-)
diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
index 655cc1de1478..5770599eeeef 100644
--- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
+++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
@@ -873,6 +873,28 @@ icl_cxgbei_conn_task_done(struct icl_conn *ic, void *arg)
}
}
+static inline bool
+ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen)
+{
+ int total_len = 0;
+
+ MPASS(entries > 0);
+ if (((vm_offset_t)sg[--entries].addr & 3U) != 0)
+ return (false);
+
+ total_len += sg[entries].len;
+
+ while (--entries >= 0) {
+ if (((vm_offset_t)sg[entries].addr & PAGE_MASK) != 0 ||
+ (sg[entries].len % PAGE_SIZE) != 0)
+ return (false);
+ total_len += sg[entries].len;
+ }
+
+ MPASS(total_len == xferlen);
+ return (true);
+}
+
/* XXXNP: PDU should be passed in as parameter, like on the initiator. */
#define io_to_request_pdu(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr)
#define io_to_ppod_reservation(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr)
@@ -888,6 +910,8 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
struct cxgbei_data *ci = sc->iscsi_ulp_softc;
struct ppod_region *pr = &ci->pr;
struct ppod_reservation *prsv;
+ struct ctl_sg_entry *sgl, sg_entry;
+ int sg_entries = ctsio->kern_sg_entries;
uint32_t ttt;
int xferlen, rc = 0, alias;
@@ -898,7 +922,6 @@ icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io,
if (ctsio->ext_data_filled == 0) {
int first_burst;
struct icl_pdu *ip = io_to_request_pdu(io);
- vm_offset_t buf;
#ifdef INVARIANTS
struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
@@ -931,18 +954,16 @@ no_ddp:
return (0);
}
- if (ctsio->kern_sg_entries == 0)
- buf = (vm_offset_t)ctsio->kern_data_ptr;
- else if (ctsio->kern_sg_entries == 1) {
- struct ctl_sg_entry *sgl = (void *)ctsio->kern_data_ptr;
+ if (sg_entries == 0) {
+ sgl = &sg_entry;
+ sgl->len = xferlen;
+ sgl->addr = (void *)ctsio->kern_data_ptr;
+ sg_entries = 1;
+ } else
+ sgl = (void *)ctsio->kern_data_ptr;
- MPASS(sgl->len == xferlen);
- buf = (vm_offset_t)sgl->addr;
- } else {
- rc = EAGAIN; /* XXX implement */
+ if (!ddp_sgl_check(sgl, sg_entries, xferlen))
goto no_ddp;
- }
-
/*
* Reserve resources for DDP, update the ttt that should be used
@@ -956,14 +977,15 @@ no_ddp:
goto no_ddp;
}
- rc = t4_alloc_page_pods_for_buf(pr, buf, xferlen, prsv);
+ rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv);
if (rc != 0) {
uma_zfree(prsv_zone, prsv);
goto no_ddp;
}
- rc = t4_write_page_pods_for_buf(sc, toep, prsv, buf, xferlen);
- if (rc != 0) {
+ rc = t4_write_page_pods_for_sgl(sc, toep, prsv, sgl, sg_entries,
+ xferlen);
+ if (__predict_false(rc != 0)) {
t4_free_page_pods(prsv);
uma_zfree(prsv_zone, prsv);
goto no_ddp;
diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c
index e87d013a0453..c266a2e39fa9 100644
--- a/sys/dev/cxgbe/tom/t4_ddp.c
+++ b/sys/dev/cxgbe/tom/t4_ddp.c
@@ -62,6 +62,9 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_object.h>
+#include <cam/scsi/scsi_all.h>
+#include <cam/ctl/ctl_io.h>
+
#ifdef TCP_OFFLOAD
#include "common/common.h"
#include "common/t4_msg.h"
@@ -981,6 +984,76 @@ have_pgsz:
return (0);
}
+int
+t4_alloc_page_pods_for_sgl(struct ppod_region *pr, struct ctl_sg_entry *sgl,
+ int entries, struct ppod_reservation *prsv)
+{
+ int hcf, seglen, idx = 0, npages, nppods, i, len;
+ uintptr_t start_pva, end_pva, pva, p1 ;
+ vm_offset_t buf;
+ struct ctl_sg_entry *sge;
+
+ MPASS(entries > 0);
+ MPASS(sgl);
+
+ /*
+ * The DDP page size is unrelated to the VM page size. We combine
+ * contiguous physical pages into larger segments to get the best DDP
+ * page size possible. This is the largest of the four sizes in
+ * A_ULP_RX_ISCSI_PSZ that evenly divides the HCF of the segment sizes
+ * in the page list.
+ */
+ hcf = 0;
+ for (i = entries - 1; i >= 0; i--) {
+ sge = sgl + i;
+ buf = (vm_offset_t)sge->addr;
+ len = sge->len;
+ start_pva = trunc_page(buf);
+ end_pva = trunc_page(buf + len - 1);
+ pva = start_pva;
+ while (pva <= end_pva) {
+ seglen = PAGE_SIZE;
+ p1 = pmap_kextract(pva);
+ pva += PAGE_SIZE;
+ while (pva <= end_pva && p1 + seglen ==
+ pmap_kextract(pva)) {
+ seglen += PAGE_SIZE;
+ pva += PAGE_SIZE;
+ }
+
+ hcf = calculate_hcf(hcf, seglen);
+ if (hcf < (1 << pr->pr_page_shift[1])) {
+ idx = 0;
+ goto have_pgsz; /* give up, short circuit */
+ }
+ }
+ }
+#define PR_PAGE_MASK(x) ((1 << pr->pr_page_shift[(x)]) - 1)
+ MPASS((hcf & PR_PAGE_MASK(0)) == 0); /* PAGE_SIZE is >= 4K everywhere */
+ for (idx = nitems(pr->pr_page_shift) - 1; idx > 0; idx--) {
+ if ((hcf & PR_PAGE_MASK(idx)) == 0)
+ break;
+ }
+#undef PR_PAGE_MASK
+
+have_pgsz:
+ MPASS(idx <= M_PPOD_PGSZ);
+
+ npages = 0;
+ while (entries--) {
+ npages++;
+ start_pva = trunc_page(sgl->addr);
+ end_pva = trunc_page((vm_offset_t)sgl->addr + sgl->len - 1);
+ npages += (end_pva - start_pva) >> pr->pr_page_shift[idx];
+ sgl = sgl + 1;
+ }
+ nppods = howmany(npages, PPOD_PAGES);
+ if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
+ return (ENOMEM);
+ MPASS(prsv->prsv_nppods > 0);
+ return (0);
+}
+
void
t4_free_page_pods(struct ppod_reservation *prsv)
{
@@ -1197,6 +1270,124 @@ t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep,
return (0);
}
+int
+t4_write_page_pods_for_sgl(struct adapter *sc, struct toepcb *toep,
+ struct ppod_reservation *prsv, struct ctl_sg_entry *sgl, int entries,
+ int xferlen)
+{
+ struct inpcb *inp = toep->inp;
+ struct ulp_mem_io *ulpmc;
+ struct ulptx_idata *ulpsc;
+ struct pagepod *ppod;
+ int i, j, k, n, chunk, len, ddp_pgsz;
+ u_int ppod_addr, offset, sg_offset = 0;
+ uint32_t cmd;
+ struct ppod_region *pr = prsv->prsv_pr;
+ uintptr_t pva, pa;
+ struct mbuf *m;
+ struct mbufq wrq;
+
+ MPASS(sgl != NULL);
+ MPASS(entries > 0);
+ cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
+ if (is_t4(sc))
+ cmd |= htobe32(F_ULP_MEMIO_ORDER);
+ else
+ cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
+ ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
+ offset = (vm_offset_t)sgl->addr & PAGE_MASK;
+ ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
+ pva = trunc_page(sgl->addr);
+ mbufq_init(&wrq, INT_MAX);
+ for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
+
+ /* How many page pods are we writing in this cycle */
+ n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
+ MPASS(n > 0);
+ chunk = PPOD_SZ(n);
+ len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
+
+ m = alloc_raw_wr_mbuf(len);
+ if (m == NULL) {
+ mbufq_drain(&wrq);
+ return (ENOMEM);
+ }
+ ulpmc = mtod(m, struct ulp_mem_io *);
+
+ INIT_ULPTX_WR(ulpmc, len, 0, toep->tid);
+ ulpmc->cmd = cmd;
+ ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
+ ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
+ ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
+
+ ulpsc = (struct ulptx_idata *)(ulpmc + 1);
+ ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
+ ulpsc->len = htobe32(chunk);
+
+ ppod = (struct pagepod *)(ulpsc + 1);
+ for (j = 0; j < n; i++, j++, ppod++) {
+ ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
+ V_PPOD_TID(toep->tid) |
+ (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
+ ppod->len_offset = htobe64(V_PPOD_LEN(xferlen) |
+ V_PPOD_OFST(offset));
+ ppod->rsvd = 0;
+
+ for (k = 0; k < nitems(ppod->addr); k++) {
+ if (entries != 0) {
+ pa = pmap_kextract(pva + sg_offset);
+ ppod->addr[k] = htobe64(pa);
+ } else
+ ppod->addr[k] = 0;
+
+#if 0
+ CTR5(KTR_CXGBE,
+ "%s: tid %d ppod[%d]->addr[%d] = %p",
+ __func__, toep->tid, i, k,
+ htobe64(ppod->addr[k]));
+#endif
+
+ /*
+ * If this is the last entry in a pod,
+ * reuse the same entry for first address
+ * in the next pod.
+ */
+ if (k + 1 == nitems(ppod->addr))
+ break;
+
+ /*
+ * Don't move to the next DDP page if the
+ * sgl is already finished.
+ */
+ if (entries == 0)
+ continue;
+
+ sg_offset += ddp_pgsz;
+ if (sg_offset == sgl->len) {
+ /*
+ * This sgl entry is done. Go
+ * to the next.
+ */
+ entries--;
+ sgl++;
+ sg_offset = 0;
+ if (entries != 0)
+ pva = trunc_page(
+ (vm_offset_t)sgl->addr);
+ }
+ }
+ }
+
+ mbufq_enqueue(&wrq, m);
+ }
+
+ INP_WLOCK(inp);
+ mbufq_concat(&toep->ulp_pduq, &wrq);
+ INP_WUNLOCK(inp);
+
+ return (0);
+}
+
/*
* Prepare a pageset for DDP. This sets up page pods.
*/
diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h
index f1129b47cbcf..c7984f838735 100644
--- a/sys/dev/cxgbe/tom/t4_tom.h
+++ b/sys/dev/cxgbe/tom/t4_tom.h
@@ -88,6 +88,7 @@ enum {
DDP_DEAD = (1 << 6), /* toepcb is shutting down */
};
+struct ctl_sg_entry;
struct sockopt;
struct offload_settings;
@@ -437,10 +438,14 @@ void t4_free_ppod_region(struct ppod_region *);
int t4_alloc_page_pods_for_ps(struct ppod_region *, struct pageset *);
int t4_alloc_page_pods_for_buf(struct ppod_region *, vm_offset_t, int,
struct ppod_reservation *);
+int t4_alloc_page_pods_for_sgl(struct ppod_region *, struct ctl_sg_entry *, int,
+ struct ppod_reservation *);
int t4_write_page_pods_for_ps(struct adapter *, struct sge_wrq *, int,
struct pageset *);
int t4_write_page_pods_for_buf(struct adapter *, struct toepcb *,
struct ppod_reservation *, vm_offset_t, int);
+int t4_write_page_pods_for_sgl(struct adapter *, struct toepcb *,
+ struct ppod_reservation *, struct ctl_sg_entry *, int, int);
void t4_free_page_pods(struct ppod_reservation *);
int t4_soreceive_ddp(struct socket *, struct sockaddr **, struct uio *,
struct mbuf **, struct mbuf **, int *);
More information about the dev-commits-src-main
mailing list