git: 2beaefe88443 - main - cxgbei: Support unmapped I/O requests.

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Thu, 10 Mar 2022 23:51:29 UTC
The branch main has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=2beaefe88443f0644d65b3ae0ac2f1ccff51e748

commit 2beaefe88443f0644d65b3ae0ac2f1ccff51e748
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2022-03-10 23:50:52 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2022-03-10 23:50:52 +0000

    cxgbei: Support unmapped I/O requests.
    
    - Add icl_pdu_append_bio and icl_pdu_get_bio methods.
    
    - Add new page pod routines for allocating and writing page pods for
      unmapped bio requests.  Use these new routines for setting up DDP
      for iSCSI tasks with a SCSI I/O CCB which uses CAM_DATA_BIO.
    
    - When ICL_NOCOPY is used to append data from an unmapped I/O request
      to a PDU, construct unmapped mbufs from the relevant pages backing
      the struct bio.  This also requires changes in the t4_push_pdus path
      to support unmapped mbufs.
    
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D34383
---
 sys/dev/cxgbe/cxgbei/icl_cxgbei.c | 344 +++++++++++++++++++++++++++++++-------
 sys/dev/cxgbe/tom/t4_cpl_io.c     |  17 +-
 sys/dev/cxgbe/tom/t4_ddp.c        | 126 ++++++++++++--
 sys/dev/cxgbe/tom/t4_tom.h        |   5 +
 4 files changed, 416 insertions(+), 76 deletions(-)

diff --git a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
index 296d4f2d270a..a3dbf4cb88a2 100644
--- a/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
+++ b/sys/dev/cxgbe/cxgbei/icl_cxgbei.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 
 #ifdef TCP_OFFLOAD
 #include <sys/param.h>
+#include <sys/bio.h>
 #include <sys/capsicum.h>
 #include <sys/condvar.h>
 #include <sys/conf.h>
@@ -61,6 +62,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/uio.h>
 #include <machine/bus.h>
 #include <vm/vm.h>
+#include <vm/vm_page.h>
 #include <vm/pmap.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
@@ -132,7 +134,9 @@ static volatile u_int icl_cxgbei_ncons;
 static icl_conn_new_pdu_t	icl_cxgbei_conn_new_pdu;
 static icl_conn_pdu_data_segment_length_t
 				    icl_cxgbei_conn_pdu_data_segment_length;
+static icl_conn_pdu_append_bio_t	icl_cxgbei_conn_pdu_append_bio;
 static icl_conn_pdu_append_data_t	icl_cxgbei_conn_pdu_append_data;
+static icl_conn_pdu_get_bio_t	icl_cxgbei_conn_pdu_get_bio;
 static icl_conn_pdu_get_data_t	icl_cxgbei_conn_pdu_get_data;
 static icl_conn_pdu_queue_t	icl_cxgbei_conn_pdu_queue;
 static icl_conn_pdu_queue_cb_t	icl_cxgbei_conn_pdu_queue_cb;
@@ -149,7 +153,9 @@ static kobj_method_t icl_cxgbei_methods[] = {
 	KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free),
 	KOBJMETHOD(icl_conn_pdu_data_segment_length,
 	    icl_cxgbei_conn_pdu_data_segment_length),
+	KOBJMETHOD(icl_conn_pdu_append_bio, icl_cxgbei_conn_pdu_append_bio),
 	KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data),
+	KOBJMETHOD(icl_conn_pdu_get_bio, icl_cxgbei_conn_pdu_get_bio),
 	KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data),
 	KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue),
 	KOBJMETHOD(icl_conn_pdu_queue_cb, icl_cxgbei_conn_pdu_queue_cb),
@@ -552,6 +558,189 @@ out:
 	kthread_exit();
 }
 
+static void
+cxgbei_free_mext_pg(struct mbuf *m)
+{
+	struct icl_cxgbei_pdu *icp;
+
+	M_ASSERTEXTPG(m);
+
+	/*
+	 * Nothing to do for the pages; they are owned by the PDU /
+	 * I/O request.
+	 */
+
+	/* Drop reference on the PDU. */
+	icp = m->m_ext.ext_arg1;
+	if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1)
+		icl_cxgbei_pdu_call_cb(&icp->ip);
+}
+
+static struct mbuf *
+cxgbei_getm(size_t len, int flags)
+{
+	struct mbuf *m, *m0, *m_tail;
+
+	m_tail = m0 = NULL;
+
+	/* Allocate as jumbo mbufs of size MJUM16BYTES. */
+	while (len >= MJUM16BYTES) {
+		m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES);
+		if (__predict_false(m == NULL)) {
+			if ((flags & M_WAITOK) != 0) {
+				/* Fall back to non-jumbo mbufs. */
+				break;
+			}
+			return (NULL);
+		}
+		if (m0 == NULL) {
+			m0 = m_tail = m;
+		} else {
+			m_tail->m_next = m;
+			m_tail = m;
+		}
+		len -= MJUM16BYTES;
+	}
+
+	/* Allocate mbuf chain for the remaining data. */
+	if (len != 0) {
+		m = m_getm2(NULL, len, flags, MT_DATA, 0);
+		if (__predict_false(m == NULL)) {
+			m_freem(m0);
+			return (NULL);
+		}
+		if (m0 == NULL)
+			m0 = m;
+		else
+			m_tail->m_next = m;
+	}
+
+	return (m0);
+}
+
+int
+icl_cxgbei_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *ip,
+    struct bio *bp, size_t offset, size_t len, int flags)
+{
+	struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
+	struct mbuf *m, *m_tail;
+	vm_offset_t vaddr;
+	size_t page_offset, todo, mtodo;
+	boolean_t mapped;
+	int i;
+
+	MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE);
+	MPASS(ic == ip->ip_conn);
+	KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len));
+
+	m_tail = ip->ip_data_mbuf;
+	if (m_tail != NULL)
+		for (; m_tail->m_next != NULL; m_tail = m_tail->m_next)
+			;
+
+	MPASS(bp->bio_flags & BIO_UNMAPPED);
+	if (offset < PAGE_SIZE - bp->bio_ma_offset) {
+		page_offset = bp->bio_ma_offset + offset;
+		i = 0;
+	} else {
+		offset -= PAGE_SIZE - bp->bio_ma_offset;
+		for (i = 1; offset >= PAGE_SIZE; i++)
+			offset -= PAGE_SIZE;
+		page_offset = offset;
+	}
+
+	if (flags & ICL_NOCOPY) {
+		m = NULL;
+		while (len > 0) {
+			if (m == NULL) {
+				m = mb_alloc_ext_pgs(flags & ~ICL_NOCOPY,
+				    cxgbei_free_mext_pg);
+				if (__predict_false(m == NULL))
+					return (ENOMEM);
+				atomic_add_int(&icp->ref_cnt, 1);
+				m->m_ext.ext_arg1 = icp;
+				m->m_epg_1st_off = page_offset;
+			}
+
+			todo = MIN(len, PAGE_SIZE - page_offset);
+
+			m->m_epg_pa[m->m_epg_npgs] =
+			    VM_PAGE_TO_PHYS(bp->bio_ma[i]);
+			m->m_epg_npgs++;
+			m->m_epg_last_len = todo;
+			m->m_len += todo;
+			m->m_ext.ext_size += PAGE_SIZE;
+			MBUF_EXT_PGS_ASSERT_SANITY(m);
+
+			if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS) {
+				if (m_tail != NULL)
+					m_tail->m_next = m;
+				else
+					ip->ip_data_mbuf = m;
+				m_tail = m;
+				ip->ip_data_len += m->m_len;
+				m = NULL;
+			}
+
+			page_offset = 0;
+			len -= todo;
+			i++;
+		}
+
+		if (m != NULL) {
+			if (m_tail != NULL)
+				m_tail->m_next = m;
+			else
+				ip->ip_data_mbuf = m;
+			ip->ip_data_len += m->m_len;
+		}
+		return (0);
+	}
+
+	m = cxgbei_getm(len, flags);
+	if (__predict_false(m == NULL))
+		return (ENOMEM);
+
+	if (ip->ip_data_mbuf == NULL) {
+		ip->ip_data_mbuf = m;
+		ip->ip_data_len = len;
+	} else {
+		m_tail->m_next = m;
+		ip->ip_data_len += len;
+	}
+
+	while (len > 0) {
+		todo = MIN(len, PAGE_SIZE - page_offset);
+
+		mapped = pmap_map_io_transient(bp->bio_ma + i, &vaddr, 1,
+		    FALSE);
+
+		do {
+			mtodo = min(todo, M_SIZE(m) - m->m_len);
+			memcpy(mtod(m, char *) + m->m_len, (char *)vaddr +
+			    page_offset, mtodo);
+			m->m_len += mtodo;
+			if (m->m_len == M_SIZE(m))
+				m = m->m_next;
+			page_offset += mtodo;
+			todo -= mtodo;
+		} while (todo > 0);
+
+		if (__predict_false(mapped))
+			pmap_unmap_io_transient(bp->bio_ma + 1, &vaddr, 1,
+			    FALSE);
+
+		page_offset = 0;
+		len -= todo;
+		i++;
+	}
+
+	MPASS(ip->ip_data_len <= max(ic->ic_max_send_data_segment_length,
+	    ic->ic_hw_isomax));
+
+	return (0);
+}
+
 int
 icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip,
     const void *addr, size_t len, int flags)
@@ -592,58 +781,74 @@ icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip,
 		return (0);
 	}
 
-	src = (const char *)addr;
+	m = cxgbei_getm(len, flags);
+	if (__predict_false(m == NULL))
+		return (ENOMEM);
 
-	/* Allocate as jumbo mbufs of size MJUM16BYTES. */
-	while (len >= MJUM16BYTES) {
-		m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES);
-		if (__predict_false(m == NULL)) {
-			if ((flags & M_WAITOK) != 0) {
-				/* Fall back to non-jumbo mbufs. */
-				break;
-			}
-			return (ENOMEM);
-		}
-		memcpy(mtod(m, void *), src, MJUM16BYTES);
-		m->m_len = MJUM16BYTES;
-		if (ip->ip_data_mbuf == NULL) {
-			ip->ip_data_mbuf = m_tail = m;
-			ip->ip_data_len = MJUM16BYTES;
-		} else {
-			m_tail->m_next = m;
-			m_tail = m_tail->m_next;
-			ip->ip_data_len += MJUM16BYTES;
-		}
-		src += MJUM16BYTES;
-		len -= MJUM16BYTES;
+	if (ip->ip_data_mbuf == NULL) {
+		ip->ip_data_mbuf = m;
+		ip->ip_data_len = len;
+	} else {
+		m_tail->m_next = m;
+		ip->ip_data_len += len;
 	}
-
-	/* Allocate mbuf chain for the remaining data. */
-	if (len != 0) {
-		m = m_getm2(NULL, len, flags, MT_DATA, 0);
-		if (__predict_false(m == NULL))
-			return (ENOMEM);
-		if (ip->ip_data_mbuf == NULL) {
-			ip->ip_data_mbuf = m;
-			ip->ip_data_len = len;
-		} else {
-			m_tail->m_next = m;
-			ip->ip_data_len += len;
-		}
-		for (; m != NULL; m = m->m_next) {
-			m->m_len = min(len, M_SIZE(m));
-			memcpy(mtod(m, void *), src, m->m_len);
-			src += m->m_len;
-			len -= m->m_len;
-		}
-		MPASS(len == 0);
+	src = (const char *)addr;
+	for (; m != NULL; m = m->m_next) {
+		m->m_len = min(len, M_SIZE(m));
+		memcpy(mtod(m, void *), src, m->m_len);
+		src += m->m_len;
+		len -= m->m_len;
 	}
+	MPASS(len == 0);
+
 	MPASS(ip->ip_data_len <= max(ic->ic_max_send_data_segment_length,
 	    ic->ic_hw_isomax));
 
 	return (0);
 }
 
+void
+icl_cxgbei_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip,
+    size_t pdu_off, struct bio *bp, size_t bio_off, size_t len)
+{
+	struct icl_cxgbei_pdu *icp = ip_to_icp(ip);
+	vm_offset_t vaddr;
+	size_t page_offset, todo;
+	boolean_t mapped;
+	int i;
+
+	if (icp->icp_flags & ICPF_RX_DDP)
+		return; /* data is DDP'ed, no need to copy */
+
+	MPASS(bp->bio_flags & BIO_UNMAPPED);
+	if (bio_off < PAGE_SIZE - bp->bio_ma_offset) {
+		page_offset = bp->bio_ma_offset + bio_off;
+		i = 0;
+	} else {
+		bio_off -= PAGE_SIZE - bp->bio_ma_offset;
+		for (i = 1; bio_off >= PAGE_SIZE; i++)
+			bio_off -= PAGE_SIZE;
+		page_offset = bio_off;
+	}
+
+	while (len > 0) {
+		todo = MIN(len, PAGE_SIZE - page_offset);
+
+		mapped = pmap_map_io_transient(bp->bio_ma + i, &vaddr, 1,
+		    FALSE);
+		m_copydata(ip->ip_data_mbuf, pdu_off, todo, (char *)vaddr +
+		    page_offset);
+		if (__predict_false(mapped))
+			pmap_unmap_io_transient(bp->bio_ma + 1, &vaddr, 1,
+			    FALSE);
+
+		page_offset = 0;
+		pdu_off += todo;
+		len -= todo;
+		i++;
+	}
+}
+
 void
 icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
     size_t off, void *addr, size_t len)
@@ -716,7 +921,7 @@ icl_cxgbei_new_conn(const char *name, struct mtx *lock)
 #endif
 	ic->ic_name = name;
 	ic->ic_offload = "cxgbei";
-	ic->ic_unmapped = false;
+	ic->ic_unmapped = true;
 
 	CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc);
 
@@ -1251,22 +1456,45 @@ no_ddp:
 	}
 	prsv = &ddp->prsv;
 
-	/* XXX add support for all CAM_DATA_ types */
-	MPASS((csio->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_VADDR);
-	rc = t4_alloc_page_pods_for_buf(pr, (vm_offset_t)csio->data_ptr,
-	    csio->dxfer_len, prsv);
-	if (rc != 0) {
-		free(ddp, M_CXGBEI);
-		goto no_ddp;
-	}
-
 	mbufq_init(&mq, INT_MAX);
-	rc = t4_write_page_pods_for_buf(sc, toep, prsv,
-	    (vm_offset_t)csio->data_ptr, csio->dxfer_len, &mq);
-	if (__predict_false(rc != 0)) {
-		mbufq_drain(&mq);
-		t4_free_page_pods(prsv);
+	switch (csio->ccb_h.flags & CAM_DATA_MASK) {
+	case CAM_DATA_BIO:
+		rc = t4_alloc_page_pods_for_bio(pr,
+		    (struct bio *)csio->data_ptr, prsv);
+		if (rc != 0) {
+			free(ddp, M_CXGBEI);
+			goto no_ddp;
+		}
+
+		rc = t4_write_page_pods_for_bio(sc, toep, prsv,
+		    (struct bio *)csio->data_ptr, &mq);
+		if (__predict_false(rc != 0)) {
+			mbufq_drain(&mq);
+			t4_free_page_pods(prsv);
+			free(ddp, M_CXGBEI);
+			goto no_ddp;
+		}
+		break;
+	case CAM_DATA_VADDR:
+		rc = t4_alloc_page_pods_for_buf(pr, (vm_offset_t)csio->data_ptr,
+		    csio->dxfer_len, prsv);
+		if (rc != 0) {
+			free(ddp, M_CXGBEI);
+			goto no_ddp;
+		}
+
+		rc = t4_write_page_pods_for_buf(sc, toep, prsv,
+		    (vm_offset_t)csio->data_ptr, csio->dxfer_len, &mq);
+		if (__predict_false(rc != 0)) {
+			mbufq_drain(&mq);
+			t4_free_page_pods(prsv);
+			free(ddp, M_CXGBEI);
+			goto no_ddp;
+		}
+		break;
+	default:
 		free(ddp, M_CXGBEI);
+		rc = EINVAL;
 		goto no_ddp;
 	}
 
diff --git a/sys/dev/cxgbe/tom/t4_cpl_io.c b/sys/dev/cxgbe/tom/t4_cpl_io.c
index 56bb7910b5c1..4c8b1fa27579 100644
--- a/sys/dev/cxgbe/tom/t4_cpl_io.c
+++ b/sys/dev/cxgbe/tom/t4_cpl_io.c
@@ -1002,7 +1002,7 @@ write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr)
 	int tx_credits, shove, npdu, wr_len;
 	uint16_t iso_mss;
 	static const u_int ulp_extra_len[] = {0, 4, 4, 8};
-	bool iso;
+	bool iso, nomap_mbuf_seen;
 
 	M_ASSERTPKTHDR(sndptr);
 
@@ -1030,8 +1030,15 @@ write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr)
 	plen = 0;
 	nsegs = 0;
 	max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */
+	nomap_mbuf_seen = false;
 	for (m = sndptr; m != NULL; m = m->m_next) {
-		int n = sglist_count(mtod(m, void *), m->m_len);
+		int n;
+
+		if (m->m_flags & M_EXTPG)
+			n = sglist_count_mbuf_epg(m, mtod(m, vm_offset_t),
+			    m->m_len);
+		else
+			n = sglist_count(mtod(m, void *), m->m_len);
 
 		nsegs += n;
 		plen += m->m_len;
@@ -1040,9 +1047,11 @@ write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr)
 		 * This mbuf would send us _over_ the nsegs limit.
 		 * Suspend tx because the PDU can't be sent out.
 		 */
-		if (plen > max_imm && nsegs > max_nsegs)
+		if ((nomap_mbuf_seen || plen > max_imm) && nsegs > max_nsegs)
 			return (NULL);
 
+		if (m->m_flags & M_EXTPG)
+			nomap_mbuf_seen = true;
 		if (max_nsegs_1mbuf < n)
 			max_nsegs_1mbuf = n;
 	}
@@ -1075,7 +1084,7 @@ write_iscsi_mbuf_wr(struct toepcb *toep, struct mbuf *sndptr)
 	wr_len = sizeof(*txwr);
 	if (iso)
 		wr_len += sizeof(struct cpl_tx_data_iso);
-	if (plen <= max_imm) {
+	if (plen <= max_imm && !nomap_mbuf_seen) {
 		/* Immediate data tx */
 		imm_data = plen;
 		wr_len += plen;
diff --git a/sys/dev/cxgbe/tom/t4_ddp.c b/sys/dev/cxgbe/tom/t4_ddp.c
index 18c464b5c034..05bb903a28aa 100644
--- a/sys/dev/cxgbe/tom/t4_ddp.c
+++ b/sys/dev/cxgbe/tom/t4_ddp.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/aio.h>
+#include <sys/bio.h>
 #include <sys/file.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -887,14 +888,11 @@ alloc_page_pods(struct ppod_region *pr, u_int nppods, u_int pgsz_idx,
 	return (0);
 }
 
-int
-t4_alloc_page_pods_for_ps(struct ppod_region *pr, struct pageset *ps)
+static int
+t4_alloc_page_pods_for_vmpages(struct ppod_region *pr, vm_page_t *pages,
+    int npages, struct ppod_reservation *prsv)
 {
 	int i, hcf, seglen, idx, nppods;
-	struct ppod_reservation *prsv = &ps->prsv;
-
-	KASSERT(prsv->prsv_nppods == 0,
-	    ("%s: page pods already allocated", __func__));
 
 	/*
 	 * The DDP page size is unrelated to the VM page size.  We combine
@@ -904,11 +902,11 @@ t4_alloc_page_pods_for_ps(struct ppod_region *pr, struct pageset *ps)
 	 * the page list.
 	 */
 	hcf = 0;
-	for (i = 0; i < ps->npages; i++) {
+	for (i = 0; i < npages; i++) {
 		seglen = PAGE_SIZE;
-		while (i < ps->npages - 1 &&
-		    VM_PAGE_TO_PHYS(ps->pages[i]) + PAGE_SIZE ==
-		    VM_PAGE_TO_PHYS(ps->pages[i + 1])) {
+		while (i < npages - 1 &&
+		    VM_PAGE_TO_PHYS(pages[i]) + PAGE_SIZE ==
+		    VM_PAGE_TO_PHYS(pages[i + 1])) {
 			seglen += PAGE_SIZE;
 			i++;
 		}
@@ -931,12 +929,35 @@ t4_alloc_page_pods_for_ps(struct ppod_region *pr, struct pageset *ps)
 have_pgsz:
 	MPASS(idx <= M_PPOD_PGSZ);
 
-	nppods = pages_to_nppods(ps->npages, pr->pr_page_shift[idx]);
+	nppods = pages_to_nppods(npages, pr->pr_page_shift[idx]);
 	if (alloc_page_pods(pr, nppods, idx, prsv) != 0)
-		return (0);
+		return (ENOMEM);
 	MPASS(prsv->prsv_nppods > 0);
 
-	return (1);
+	return (0);
+}
+
+int
+t4_alloc_page_pods_for_ps(struct ppod_region *pr, struct pageset *ps)
+{
+	struct ppod_reservation *prsv = &ps->prsv;
+
+	KASSERT(prsv->prsv_nppods == 0,
+	    ("%s: page pods already allocated", __func__));
+
+	return (t4_alloc_page_pods_for_vmpages(pr, ps->pages, ps->npages,
+	    prsv));
+}
+
+int
+t4_alloc_page_pods_for_bio(struct ppod_region *pr, struct bio *bp,
+    struct ppod_reservation *prsv)
+{
+
+	MPASS(bp->bio_flags & BIO_UNMAPPED);
+
+	return (t4_alloc_page_pods_for_vmpages(pr, bp->bio_ma, bp->bio_ma_n,
+	    prsv));
 }
 
 int
@@ -1189,6 +1210,83 @@ alloc_raw_wr_mbuf(int len)
 	return (m);
 }
 
+int
+t4_write_page_pods_for_bio(struct adapter *sc, struct toepcb *toep,
+    struct ppod_reservation *prsv, struct bio *bp, struct mbufq *wrq)
+{
+	struct ulp_mem_io *ulpmc;
+	struct ulptx_idata *ulpsc;
+	struct pagepod *ppod;
+	int i, j, k, n, chunk, len, ddp_pgsz, idx;
+	u_int ppod_addr;
+	uint32_t cmd;
+	struct ppod_region *pr = prsv->prsv_pr;
+	vm_paddr_t pa;
+	struct mbuf *m;
+
+	MPASS(bp->bio_flags & BIO_UNMAPPED);
+
+	cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE));
+	if (is_t4(sc))
+		cmd |= htobe32(F_ULP_MEMIO_ORDER);
+	else
+		cmd |= htobe32(F_T5_ULP_MEMIO_IMM);
+	ddp_pgsz = 1 << pr->pr_page_shift[G_PPOD_PGSZ(prsv->prsv_tag)];
+	ppod_addr = pr->pr_start + (prsv->prsv_tag & pr->pr_tag_mask);
+	for (i = 0; i < prsv->prsv_nppods; ppod_addr += chunk) {
+
+		/* How many page pods are we writing in this cycle */
+		n = min(prsv->prsv_nppods - i, NUM_ULP_TX_SC_IMM_PPODS);
+		MPASS(n > 0);
+		chunk = PPOD_SZ(n);
+		len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16);
+
+		m = alloc_raw_wr_mbuf(len);
+		if (m == NULL)
+			return (ENOMEM);
+
+		ulpmc = mtod(m, struct ulp_mem_io *);
+		INIT_ULPTX_WR(ulpmc, len, 0, toep->tid);
+		ulpmc->cmd = cmd;
+		ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32));
+		ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16));
+		ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5));
+
+		ulpsc = (struct ulptx_idata *)(ulpmc + 1);
+		ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM));
+		ulpsc->len = htobe32(chunk);
+
+		ppod = (struct pagepod *)(ulpsc + 1);
+		for (j = 0; j < n; i++, j++, ppod++) {
+			ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID |
+			    V_PPOD_TID(toep->tid) |
+			    (prsv->prsv_tag & ~V_PPOD_PGSZ(M_PPOD_PGSZ)));
+			ppod->len_offset = htobe64(V_PPOD_LEN(bp->bio_bcount) |
+			    V_PPOD_OFST(bp->bio_ma_offset));
+			ppod->rsvd = 0;
+			idx = i * PPOD_PAGES * (ddp_pgsz / PAGE_SIZE);
+			for (k = 0; k < nitems(ppod->addr); k++) {
+				if (idx < bp->bio_ma_n) {
+					pa = VM_PAGE_TO_PHYS(bp->bio_ma[idx]);
+					ppod->addr[k] = htobe64(pa);
+					idx += ddp_pgsz / PAGE_SIZE;
+				} else
+					ppod->addr[k] = 0;
+#if 0
+				CTR5(KTR_CXGBE,
+				    "%s: tid %d ppod[%d]->addr[%d] = %p",
+				    __func__, toep->tid, i, k,
+				    be64toh(ppod->addr[k]));
+#endif
+			}
+		}
+
+		mbufq_enqueue(wrq, m);
+	}
+
+	return (0);
+}
+
 int
 t4_write_page_pods_for_buf(struct adapter *sc, struct toepcb *toep,
     struct ppod_reservation *prsv, vm_offset_t buf, int buflen,
@@ -1398,7 +1496,7 @@ prep_pageset(struct adapter *sc, struct toepcb *toep, struct pageset *ps)
 	struct tom_data *td = sc->tom_softc;
 
 	if (ps->prsv.prsv_nppods == 0 &&
-	    !t4_alloc_page_pods_for_ps(&td->pr, ps)) {
+	    t4_alloc_page_pods_for_ps(&td->pr, ps) != 0) {
 		return (0);
 	}
 	if (!(ps->flags & PS_PPODS_WRITTEN) &&
diff --git a/sys/dev/cxgbe/tom/t4_tom.h b/sys/dev/cxgbe/tom/t4_tom.h
index dc462d4b4a66..4040b556f60e 100644
--- a/sys/dev/cxgbe/tom/t4_tom.h
+++ b/sys/dev/cxgbe/tom/t4_tom.h
@@ -89,6 +89,7 @@ enum {
 	DDP_DEAD	= (1 << 6),	/* toepcb is shutting down */
 };
 
+struct bio;
 struct ctl_sg_entry;
 struct sockopt;
 struct offload_settings;
@@ -488,12 +489,16 @@ int t4_init_ppod_region(struct ppod_region *, struct t4_range *, u_int,
     const char *);
 void t4_free_ppod_region(struct ppod_region *);
 int t4_alloc_page_pods_for_ps(struct ppod_region *, struct pageset *);
+int t4_alloc_page_pods_for_bio(struct ppod_region *, struct bio *,
+    struct ppod_reservation *);
 int t4_alloc_page_pods_for_buf(struct ppod_region *, vm_offset_t, int,
     struct ppod_reservation *);
 int t4_alloc_page_pods_for_sgl(struct ppod_region *, struct ctl_sg_entry *, int,
     struct ppod_reservation *);
 int t4_write_page_pods_for_ps(struct adapter *, struct sge_wrq *, int,
     struct pageset *);
+int t4_write_page_pods_for_bio(struct adapter *, struct toepcb *,
+    struct ppod_reservation *, struct bio *, struct mbufq *);
 int t4_write_page_pods_for_buf(struct adapter *, struct toepcb *,
     struct ppod_reservation *, vm_offset_t, int, struct mbufq *);
 int t4_write_page_pods_for_sgl(struct adapter *, struct toepcb *,