git: b82d7897595d - main - vm_object: add getpages utility

From: Doug Moore <dougm_at_FreeBSD.org>
Date: Sat, 15 Mar 2025 02:45:27 UTC
The branch main has been updated by dougm:

URL: https://cgit.FreeBSD.org/src/commit/?id=b82d7897595d9b28a2b9861c2e09a943500c28ed

commit b82d7897595d9b28a2b9861c2e09a943500c28ed
Author:     Doug Moore <dougm@FreeBSD.org>
AuthorDate: 2025-03-15 02:43:44 +0000
Commit:     Doug Moore <dougm@FreeBSD.org>
CommitDate: 2025-03-15 02:43:44 +0000

    vm_object: add getpages utility
    
    vnode_pager_generic_getpages() and swap_pager_getpages_locked() each
    include code to read a few pages behind, and a few pages ahead of, a
    specified page range. The same code can serve each function, and that
    code is encapsulated in a new vm_object_page function. For the swap
    case, this also eliminates some needless linked-list traversal.
    
    Reviewed by:    alc, kib
    Differential Revision:  https://reviews.freebsd.org/D49224
---
 sys/vm/swap_pager.c  | 140 +++++++++++++++++----------------------------------
 sys/vm/vm_object.c   |  58 +++++++++++++++++++++
 sys/vm/vm_object.h   |   2 +
 sys/vm/vnode_pager.c |  80 +++++------------------------
 4 files changed, 119 insertions(+), 161 deletions(-)

diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 0dee95fb2c5e..7b83ae4bfb7b 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1344,39 +1344,36 @@ swap_pager_unswapped(vm_page_t m)
 }
 
 /*
- * swap_pager_getpages() - bring pages in from swap
+ * swap_pager_getpages_locked() - bring pages in from swap
  *
  *	Attempt to page in the pages in array "ma" of length "count".  The
  *	caller may optionally specify that additional pages preceding and
  *	succeeding the specified range be paged in.  The number of such pages
- *	is returned in the "rbehind" and "rahead" parameters, and they will
+ *	is returned in the "a_rbehind" and "a_rahead" parameters, and they will
  *	be in the inactive queue upon return.
  *
  *	The pages in "ma" must be busied and will remain busied upon return.
  */
 static int
 swap_pager_getpages_locked(struct pctrie_iter *blks, vm_object_t object,
-    vm_page_t *ma, int count, int *rbehind, int *rahead)
+    vm_page_t *ma, int count, int *a_rbehind, int *a_rahead, struct buf *bp)
 {
-	struct buf *bp;
-	vm_page_t bm, mpred, msucc, p;
 	vm_pindex_t pindex;
-	daddr_t blk;
-	int i, maxahead, maxbehind, reqcount;
+	int rahead, rbehind;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
-	reqcount = count;
 
 	KASSERT((object->flags & OBJ_SWAP) != 0,
 	    ("%s: object not swappable", __func__));
-	if (!swp_pager_haspage_iter(blks, ma[0]->pindex, &maxbehind,
-	    &maxahead)) {
+	pindex = ma[0]->pindex;
+	if (!swp_pager_haspage_iter(blks, pindex, &rbehind, &rahead)) {
 		VM_OBJECT_WUNLOCK(object);
+		uma_zfree(swrbuf_zone, bp);
 		return (VM_PAGER_FAIL);
 	}
 
-	KASSERT(reqcount - 1 <= maxahead,
-	    ("page count %d extends beyond swap block", reqcount));
+	KASSERT(count - 1 <= rahead,
+	    ("page count %d extends beyond swap block", count));
 
 	/*
 	 * Do not transfer any pages other than those that are xbusied
@@ -1385,95 +1382,43 @@ swap_pager_getpages_locked(struct pctrie_iter *blks, vm_object_t object,
 	 * moved into another object.
 	 */
 	if ((object->flags & (OBJ_SPLIT | OBJ_DEAD)) != 0) {
-		maxahead = reqcount - 1;
-		maxbehind = 0;
-	}
-
-	/*
-	 * Clip the readahead and readbehind ranges to exclude resident pages.
-	 */
-	if (rahead != NULL) {
-		*rahead = imin(*rahead, maxahead - (reqcount - 1));
-		pindex = ma[reqcount - 1]->pindex;
-		msucc = TAILQ_NEXT(ma[reqcount - 1], listq);
-		if (msucc != NULL && msucc->pindex - pindex - 1 < *rahead)
-			*rahead = msucc->pindex - pindex - 1;
-	}
-	if (rbehind != NULL) {
-		*rbehind = imin(*rbehind, maxbehind);
-		pindex = ma[0]->pindex;
-		mpred = TAILQ_PREV(ma[0], pglist, listq);
-		if (mpred != NULL && pindex - mpred->pindex - 1 < *rbehind)
-			*rbehind = pindex - mpred->pindex - 1;
-	}
-
-	bm = ma[0];
-	for (i = 0; i < count; i++)
-		ma[i]->oflags |= VPO_SWAPINPROG;
-
-	/*
-	 * Allocate readahead and readbehind pages.
-	 */
-	if (rbehind != NULL) {
-		pindex = ma[0]->pindex;
-		/* Stepping backward from pindex, mpred doesn't change. */
-		for (i = 1; i <= *rbehind; i++) {
-			p = vm_page_alloc_after(object, pindex - i,
-			    VM_ALLOC_NORMAL, mpred);
-			if (p == NULL)
-				break;
-			p->oflags |= VPO_SWAPINPROG;
-			bm = p;
-		}
-		*rbehind = i - 1;
-	}
-	if (rahead != NULL) {
-		p = ma[reqcount - 1];
-		pindex = p->pindex;
-		for (i = 0; i < *rahead; i++) {
-			p = vm_page_alloc_after(object, pindex + i + 1,
-			    VM_ALLOC_NORMAL, p);
-			if (p == NULL)
-				break;
-			p->oflags |= VPO_SWAPINPROG;
-		}
-		*rahead = i;
-	}
-	if (rbehind != NULL)
-		count += *rbehind;
-	if (rahead != NULL)
-		count += *rahead;
-
-	vm_object_pip_add(object, count);
-
-	pindex = bm->pindex;
-	blk = swp_pager_meta_lookup(blks, pindex);
-	KASSERT(blk != SWAPBLK_NONE,
+		rahead = count - 1;
+		rbehind = 0;
+	}
+	/* Clip readbehind/ahead ranges to exclude already resident pages. */
+	rbehind = a_rbehind != NULL ? imin(*a_rbehind, rbehind) : 0;
+	rahead = a_rahead != NULL ? imin(*a_rahead, rahead - count + 1) : 0;
+	/* Allocate pages. */
+	vm_object_prepare_buf_pages(object, bp->b_pages, count, &rbehind,
+	    &rahead, ma);
+	bp->b_npages = rbehind + count + rahead;
+	for (int i = 0; i < bp->b_npages; i++)
+		bp->b_pages[i]->oflags |= VPO_SWAPINPROG;
+	bp->b_blkno = swp_pager_meta_lookup(blks, pindex - rbehind);
+	KASSERT(bp->b_blkno != SWAPBLK_NONE,
 	    ("no swap blocking containing %p(%jx)", object, (uintmax_t)pindex));
 
+	vm_object_pip_add(object, bp->b_npages);
 	VM_OBJECT_WUNLOCK(object);
-	bp = uma_zalloc(swrbuf_zone, M_WAITOK);
 	MPASS((bp->b_flags & B_MAXPHYS) != 0);
-	/* Pages cannot leave the object while busy. */
-	for (i = 0, p = bm; i < count; i++, p = TAILQ_NEXT(p, listq)) {
-		MPASS(p->pindex == bm->pindex + i);
-		bp->b_pages[i] = p;
-	}
+
+	/* Report back actual behind/ahead read. */
+	if (a_rbehind != NULL)
+		*a_rbehind = rbehind;
+	if (a_rahead != NULL)
+		*a_rahead = rahead;
 
 	bp->b_flags |= B_PAGING;
 	bp->b_iocmd = BIO_READ;
 	bp->b_iodone = swp_pager_async_iodone;
 	bp->b_rcred = crhold(thread0.td_ucred);
 	bp->b_wcred = crhold(thread0.td_ucred);
-	bp->b_blkno = blk;
-	bp->b_bcount = PAGE_SIZE * count;
-	bp->b_bufsize = PAGE_SIZE * count;
-	bp->b_npages = count;
-	bp->b_pgbefore = rbehind != NULL ? *rbehind : 0;
-	bp->b_pgafter = rahead != NULL ? *rahead : 0;
+	bp->b_bufsize = bp->b_bcount = ptoa(bp->b_npages);
+	bp->b_pgbefore = rbehind;
+	bp->b_pgafter = rahead;
 
 	VM_CNT_INC(v_swapin);
-	VM_CNT_ADD(v_swappgsin, count);
+	VM_CNT_ADD(v_swappgsin, bp->b_npages);
 
 	/*
 	 * perform the I/O.  NOTE!!!  bp cannot be considered valid after
@@ -1511,7 +1456,7 @@ swap_pager_getpages_locked(struct pctrie_iter *blks, vm_object_t object,
 	/*
 	 * If we had an unrecoverable read error pages will not be valid.
 	 */
-	for (i = 0; i < reqcount; i++)
+	for (int i = 0; i < count; i++)
 		if (ma[i]->valid != VM_PAGE_BITS_ALL)
 			return (VM_PAGER_ERROR);
 
@@ -1529,12 +1474,14 @@ static int
 swap_pager_getpages(vm_object_t object, vm_page_t *ma, int count,
     int *rbehind, int *rahead)
 {
+	struct buf *bp;
 	struct pctrie_iter blks;
 
+	bp = uma_zalloc(swrbuf_zone, M_WAITOK);
 	VM_OBJECT_WLOCK(object);
 	swblk_iter_init_only(&blks, object);
 	return (swap_pager_getpages_locked(&blks, object, ma, count, rbehind,
-	    rahead));
+	    rahead, bp));
 }
 
 /*
@@ -1929,7 +1876,8 @@ swap_pager_swapped_pages(vm_object_t object)
  *	to a swap device.
  */
 static void
-swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object)
+swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object,
+    struct buf **bp)
 {
 	struct pctrie_iter blks, pages;
 	struct page_range range;
@@ -1999,10 +1947,11 @@ swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object)
 			vm_object_pip_add(object, 1);
 			rahead = SWAP_META_PAGES;
 			rv = swap_pager_getpages_locked(&blks, object, &m, 1,
-			    NULL, &rahead);
+			    NULL, &rahead, *bp);
 			if (rv != VM_PAGER_OK)
 				panic("%s: read from swap failed: %d",
 				    __func__, rv);
+			*bp = uma_zalloc(swrbuf_zone, M_WAITOK);
 			VM_OBJECT_WLOCK(object);
 			vm_object_pip_wakeupn(object, 1);
 			KASSERT(vm_page_all_valid(m),
@@ -2064,12 +2013,14 @@ static void
 swap_pager_swapoff(struct swdevt *sp)
 {
 	vm_object_t object;
+	struct buf *bp;
 	int retries;
 
 	sx_assert(&swdev_syscall_lock, SA_XLOCKED);
 
 	retries = 0;
 full_rescan:
+	bp = uma_zalloc(swrbuf_zone, M_WAITOK);
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		if ((object->flags & OBJ_SWAP) == 0)
@@ -2094,12 +2045,13 @@ full_rescan:
 		if ((object->flags & OBJ_SWAP) == 0)
 			goto next_obj;
 
-		swap_pager_swapoff_object(sp, object);
+		swap_pager_swapoff_object(sp, object, &bp);
 next_obj:
 		VM_OBJECT_WUNLOCK(object);
 		mtx_lock(&vm_object_list_mtx);
 	}
 	mtx_unlock(&vm_object_list_mtx);
+	uma_zfree(swrbuf_zone, bp);
 
 	if (sp->sw_used) {
 		/*
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 79a28c4d77b0..592f48296c22 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -2258,6 +2258,64 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
 	return (TRUE);
 }
 
+/*
+ * Fill in the m_dst array with up to *rbehind optional pages before m_src[0]
+ * and up to *rahead optional pages after m_src[count - 1].  In both cases, stop
+ * the filling-in short on encountering a cached page, an object boundary limit,
+ * or an allocation error.  Update *rbehind and *rahead to indicate the number
+ * of pages allocated.  Copy elements of m_src into array elements from
+ * m_dst[*rbehind] to m_dst[*rbehind + count -1].
+ */
+void
+vm_object_prepare_buf_pages(vm_object_t object, vm_page_t *ma_dst, int count,
+    int *rbehind, int *rahead, vm_page_t *ma_src)
+{
+	vm_pindex_t pindex;
+	vm_page_t m, mpred, msucc;
+
+	VM_OBJECT_ASSERT_LOCKED(object);
+	if (*rbehind != 0) {
+		m = ma_src[0];
+		pindex = m->pindex;
+		mpred = TAILQ_PREV(m, pglist, listq);
+		*rbehind = MIN(*rbehind,
+		    pindex - (mpred != NULL ? mpred->pindex + 1 : 0));
+		/* Stepping backward from pindex, mpred doesn't change. */
+		for (int i = 0; i < *rbehind; i++) {
+			m = vm_page_alloc_after(object, pindex - i - 1,
+			    VM_ALLOC_NORMAL, mpred);
+			if (m == NULL) {
+				/* Shift the array. */
+				for (int j = 0; j < i; j++)
+					ma_dst[j] = ma_dst[j + *rbehind - i];
+				*rbehind = i;
+				*rahead = 0;
+				break;
+			}
+			ma_dst[*rbehind - i - 1] = m;
+		}
+	}	
+	for (int i = 0; i < count; i++)
+		ma_dst[*rbehind + i] = ma_src[i];
+	if (*rahead != 0) {
+		m = ma_src[count - 1];
+		pindex = m->pindex + 1;
+		msucc = TAILQ_NEXT(m, listq);
+		*rahead = MIN(*rahead,
+		    (msucc != NULL ? msucc->pindex : object->size) - pindex);
+		mpred = m;
+		for (int i = 0; i < *rahead; i++) {
+			m = vm_page_alloc_after(object, pindex + i,
+			    VM_ALLOC_NORMAL, mpred);
+			if (m == NULL) {
+				*rahead = i;
+				break;
+			}
+			ma_dst[*rbehind + count + i] = mpred = m;
+		}
+	}
+}
+
 void
 vm_object_set_writeable_dirty_(vm_object_t object)
 {
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index fc39041d02d6..fabd21809036 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -377,6 +377,8 @@ void vm_object_page_noreuse(vm_object_t object, vm_pindex_t start,
 void vm_object_page_remove(vm_object_t object, vm_pindex_t start,
     vm_pindex_t end, int options);
 boolean_t vm_object_populate(vm_object_t, vm_pindex_t, vm_pindex_t);
+void vm_object_prepare_buf_pages(vm_object_t object, vm_page_t *ma_dst,
+    int count, int *rbehind, int *rahead, vm_page_t *ma_src);
 void vm_object_print(long addr, boolean_t have_addr, long count, char *modif);
 void vm_object_reference (vm_object_t);
 void vm_object_reference_locked(vm_object_t);
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index 69509fe8948a..24e3bef07f7c 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -1007,9 +1007,8 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
 	rbehind = a_rbehind ? *a_rbehind : 0;
 	rahead = a_rahead ? *a_rahead : 0;
 	rbehind = min(rbehind, before);
-	rbehind = min(rbehind, m[0]->pindex);
 	rahead = min(rahead, after);
-	rahead = min(rahead, object->size - m[count - 1]->pindex);
+
 	/*
 	 * Check that total amount of pages fit into buf.  Trim rbehind and
 	 * rahead evenly if not.
@@ -1039,72 +1038,19 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
 	 * for read ahead pages, but there is no need to shift the array
 	 * in case of encountering a cached page.
 	 */
-	i = bp->b_npages = 0;
-	if (rbehind) {
-		vm_pindex_t startpindex, tpindex;
-		vm_page_t mpred, p;
-
+	if (rbehind != 0 || rahead != 0) {
 		VM_OBJECT_WLOCK(object);
-		tpindex = m[0]->pindex;
-		startpindex = MAX(tpindex, rbehind) - rbehind;
-		if ((mpred = TAILQ_PREV(m[0], pglist, listq)) != NULL)
-			startpindex = MAX(startpindex, mpred->pindex + 1);
-
-		/* Stepping backward from pindex, mpred doesn't change. */
-		for (; tpindex-- > startpindex; i++) {
-			p = vm_page_alloc_after(object, tpindex,
-			    VM_ALLOC_NORMAL, mpred);
-			if (p == NULL) {
-				/* Shift the array. */
-				for (int j = 0; j < i; j++)
-					bp->b_pages[j] = bp->b_pages[j + 
-					    tpindex + 1 - startpindex]; 
-				break;
-			}
-			bp->b_pages[tpindex - startpindex] = p;
-		}
-
-		bp->b_pgbefore = i;
-		bp->b_npages += i;
-		bp->b_blkno -= IDX_TO_OFF(i) / DEV_BSIZE;
-	} else
-		bp->b_pgbefore = 0;
-
-	/* Requested pages. */
-	for (int j = 0; j < count; j++, i++)
-		bp->b_pages[i] = m[j];
-	bp->b_npages += count;
-
-	if (rahead) {
-		vm_pindex_t endpindex, tpindex;
-		vm_page_t p;
-
-		if (!VM_OBJECT_WOWNED(object))
-			VM_OBJECT_WLOCK(object);
-		endpindex = m[count - 1]->pindex + rahead + 1;
-		if ((p = TAILQ_NEXT(m[count - 1], listq)) != NULL &&
-		    p->pindex < endpindex)
-			endpindex = p->pindex;
-		if (endpindex > object->size)
-			endpindex = object->size;
-
-		p = m[count - 1];
-		for (tpindex = p->pindex + 1;
-		    tpindex < endpindex; i++, tpindex++) {
-			p = vm_page_alloc_after(object, tpindex,
-			    VM_ALLOC_NORMAL, p);
-			if (p == NULL)
-				break;
-			bp->b_pages[i] = p;
-		}
-
-		bp->b_pgafter = i - bp->b_npages;
-		bp->b_npages = i;
-	} else
-		bp->b_pgafter = 0;
-
-	if (VM_OBJECT_WOWNED(object))
+		vm_object_prepare_buf_pages(object, bp->b_pages, count,
+		    &rbehind, &rahead, m);
 		VM_OBJECT_WUNLOCK(object);
+	} else {
+		for (int j = 0; j < count; j++)
+			bp->b_pages[j] = m[j];
+	}
+	bp->b_blkno -= IDX_TO_OFF(rbehind) / DEV_BSIZE;
+	bp->b_pgbefore = rbehind;
+	bp->b_pgafter = rahead;
+	bp->b_npages = rbehind + count + rahead;
 
 	/* Report back actual behind/ahead read. */
 	if (a_rbehind)
@@ -1131,7 +1077,7 @@ vnode_pager_generic_getpages(struct vnode *vp, vm_page_t *m, int count,
 	 * for real devices.
 	 */
 	foff = IDX_TO_OFF(bp->b_pages[0]->pindex);
-	bytecount = bp->b_npages << PAGE_SHIFT;
+	bytecount = ptoa(bp->b_npages);
 	if ((foff + bytecount) > object->un_pager.vnp.vnp_size)
 		bytecount = object->un_pager.vnp.vnp_size - foff;
 	secmask = bo->bo_bsize - 1;