git: 9cc73397534e - main - vm_page: use iterators in page allocaction

From: Doug Moore <dougm_at_FreeBSD.org>
Date: Mon, 14 Apr 2025 07:10:36 UTC
The branch main has been updated by dougm:

URL: https://cgit.FreeBSD.org/src/commit/?id=9cc73397534ee5373593f5946abe0b00c1d2b657

commit 9cc73397534ee5373593f5946abe0b00c1d2b657
Author:     Doug Moore <dougm@FreeBSD.org>
AuthorDate: 2025-04-14 07:05:21 +0000
Commit:     Doug Moore <dougm@FreeBSD.org>
CommitDate: 2025-04-14 07:05:21 +0000

    vm_page: use iterators in page allocaction
    
    Change vm_page_alloc_after() and vm_page_alloc_domain_after() to take
    a page iterator argument, to allow for faster insertion into the radix
    trie of pages.  Where a caller of one of those functions doesn't
    already have a page iterator to use, define one.  Where vm_page_grab()
    is invoked in a loop, invoke a new version with an iterator argument,
    and pass the same iterator in each call.  Make sure that
    vm_page_alloc_after() and vm_page_alloc_domain_after() return with the
    iterator in a valid state, even when the object lock was briefly lost
    and regained.
    
    Reviewed by:    markj
    Differential Revision:  https://reviews.freebsd.org/D49688
---
 sys/vm/phys_pager.c |   6 +-
 sys/vm/swap_pager.c |   5 +-
 sys/vm/vm_fault.c   |  14 +++--
 sys/vm/vm_glue.c    |  43 +++++++-------
 sys/vm/vm_kern.c    |   6 +-
 sys/vm/vm_object.c  |  10 ++--
 sys/vm/vm_page.c    | 165 ++++++++++++++++++++++++++--------------------------
 sys/vm/vm_page.h    |   9 ++-
 8 files changed, 136 insertions(+), 122 deletions(-)

diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c
index 6ee5076f9a91..ee39221402fa 100644
--- a/sys/vm/phys_pager.c
+++ b/sys/vm/phys_pager.c
@@ -33,6 +33,7 @@
 #include <sys/proc.h>
 #include <sys/mutex.h>
 #include <sys/mman.h>
+#include <sys/pctrie.h>
 #include <sys/rwlock.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
@@ -230,10 +231,12 @@ default_phys_pager_populate(vm_object_t object, vm_pindex_t pidx,
     int fault_type __unused, vm_prot_t max_prot __unused, vm_pindex_t *first,
     vm_pindex_t *last)
 {
+	struct pctrie_iter pages;
 	vm_page_t m;
 	vm_pindex_t base, end, i;
 	int ahead;
 
+	VM_OBJECT_ASSERT_WLOCKED(object);
 	base = rounddown(pidx, phys_pager_cluster);
 	end = base + phys_pager_cluster - 1;
 	if (end >= object->size)
@@ -244,10 +247,11 @@ default_phys_pager_populate(vm_object_t object, vm_pindex_t pidx,
 		end = *last;
 	*first = base;
 	*last = end;
+	vm_page_iter_init(&pages, object);
 
 	for (i = base; i <= end; i++) {
 		ahead = MIN(end - i, PHYSALLOC);
-		m = vm_page_grab(object, i,
+		m = vm_page_grab_iter(object, &pages, i,
 		    VM_ALLOC_NORMAL | VM_ALLOC_COUNT(ahead));
 		if (!vm_page_all_valid(m))
 			vm_page_zero_invalid(m, TRUE);
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 7b83ae4bfb7b..38203e4cd0af 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1935,9 +1935,10 @@ swap_pager_swapoff_object(struct swdevt *sp, vm_object_t object,
 				if (!vm_page_busy_acquire(m, VM_ALLOC_WAITFAIL))
 					break;
 			} else {
-				m = vm_radix_iter_lookup_le(&pages,
+				m = vm_radix_iter_lookup_lt(&pages,
 				    blks.index + i);
-				m = vm_page_alloc_after(object, blks.index + i,
+				m = vm_page_alloc_after(
+				    object, &pages, blks.index + i,
 				    VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL, m);
 				if (m == NULL)
 					break;
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 0bd3a8207c4a..e1db7ac8b524 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -1292,7 +1292,7 @@ vm_fault_allocate(struct faultstate *fs, struct pctrie_iter *pages)
 			vm_fault_unlock_and_deallocate(fs);
 			return (FAULT_FAILURE);
 		}
-		fs->m = vm_page_alloc_after(fs->object, fs->pindex,
+		fs->m = vm_page_alloc_after(fs->object, pages, fs->pindex,
 		    P_KILLED(curproc) ? VM_ALLOC_SYSTEM : 0,
 		    vm_radix_iter_lookup_lt(pages, fs->pindex));
 	}
@@ -2100,6 +2100,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map __unused,
     vm_map_entry_t dst_entry, vm_map_entry_t src_entry,
     vm_ooffset_t *fork_charge)
 {
+	struct pctrie_iter pages;
 	vm_object_t backing_object, dst_object, object, src_object;
 	vm_pindex_t dst_pindex, pindex, src_pindex;
 	vm_prot_t access, prot;
@@ -2176,6 +2177,7 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map __unused,
 	 * with the source object, all of its pages must be dirtied,
 	 * regardless of whether they can be written.
 	 */
+	vm_page_iter_init(&pages, dst_object);
 	mpred = (src_object == dst_object) ?
 	   vm_page_mpred(src_object, src_pindex) : NULL;
 	for (vaddr = dst_entry->start, dst_pindex = 0;
@@ -2220,14 +2222,15 @@ again:
 			 */
 			pindex = (src_object == dst_object ? src_pindex : 0) +
 			    dst_pindex;
-			dst_m = vm_page_alloc_after(dst_object, pindex,
+			dst_m = vm_page_alloc_after(dst_object, &pages, pindex,
 			    VM_ALLOC_NORMAL, mpred);
 			if (dst_m == NULL) {
 				VM_OBJECT_WUNLOCK(dst_object);
 				VM_OBJECT_RUNLOCK(object);
 				vm_wait(dst_object);
 				VM_OBJECT_WLOCK(dst_object);
-				mpred = vm_page_mpred(dst_object, pindex);
+				pctrie_iter_reset(&pages);
+				mpred = vm_radix_iter_lookup_lt(&pages, pindex);
 				goto again;
 			}
 
@@ -2249,8 +2252,11 @@ again:
 			VM_OBJECT_RUNLOCK(object);
 		} else {
 			dst_m = src_m;
-			if (vm_page_busy_acquire(dst_m, VM_ALLOC_WAITFAIL) == 0)
+			if (vm_page_busy_acquire(
+			    dst_m, VM_ALLOC_WAITFAIL) == 0) {
+				pctrie_iter_reset(&pages);
 				goto again;
+			}
 			if (dst_m->pindex >= dst_object->size) {
 				/*
 				 * We are upgrading.  Index can occur
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 0090904785ab..c441b8b3155f 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -98,6 +98,7 @@
 #include <vm/vm_pagequeue.h>
 #include <vm/vm_object.h>
 #include <vm/vm_kern.h>
+#include <vm/vm_radix.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_phys.h>
@@ -611,40 +612,36 @@ static int
 vm_thread_stack_back(vm_offset_t ks, vm_page_t ma[], int npages, int req_class,
     int domain)
 {
+	struct pctrie_iter pages;
 	vm_object_t obj = vm_thread_kstack_size_to_obj(npages);
 	vm_pindex_t pindex;
-	vm_page_t m;
+	vm_page_t m, mpred;
 	int n;
 
 	pindex = vm_kstack_pindex(ks, npages);
 
+	vm_page_iter_init(&pages, obj);
 	VM_OBJECT_WLOCK(obj);
-	for (n = 0; n < npages;) {
-		m = vm_page_grab(obj, pindex + n,
+	for (n = 0; n < npages; ma[n++] = m) {
+		m = vm_page_grab_iter(obj, &pages, pindex + n,
 		    VM_ALLOC_NOCREAT | VM_ALLOC_WIRED);
-		if (m == NULL) {
-			m = n > 0 ? ma[n - 1] : vm_page_mpred(obj, pindex);
-			m = vm_page_alloc_domain_after(obj, pindex + n, domain,
-			    req_class | VM_ALLOC_WIRED, m);
+		if (m != NULL)
+			continue;
+		mpred = (n > 0) ? ma[n - 1] :
+		    vm_radix_iter_lookup_lt(&pages, pindex);
+		m = vm_page_alloc_domain_after(obj, &pages, pindex + n,
+		    domain, req_class | VM_ALLOC_WIRED, mpred);
+		if (m != NULL)
+			continue;
+		for (int i = 0; i < n; i++) {
+			m = ma[i];
+			(void)vm_page_unwire_noq(m);
+			vm_page_free(m);
 		}
-		if (m == NULL)
-			break;
-		ma[n++] = m;
+		break;
 	}
-	if (n < npages)
-		goto cleanup;
 	VM_OBJECT_WUNLOCK(obj);
-
-	return (0);
-cleanup:
-	for (int i = 0; i < n; i++) {
-		m = ma[i];
-		(void)vm_page_unwire_noq(m);
-		vm_page_free(m);
-	}
-	VM_OBJECT_WUNLOCK(obj);
-
-	return (ENOMEM);
+	return (n < npages ? ENOMEM : 0);
 }
 
 static vm_object_t
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 8945061aabdc..2aa5588a7588 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -530,6 +530,7 @@ int
 kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr,
     vm_size_t size, int flags)
 {
+	struct pctrie_iter pages;
 	vm_offset_t offset, i;
 	vm_page_t m, mpred;
 	vm_prot_t prot;
@@ -546,11 +547,12 @@ kmem_back_domain(int domain, vm_object_t object, vm_offset_t addr,
 	prot = (flags & M_EXEC) != 0 ? VM_PROT_ALL : VM_PROT_RW;
 
 	i = 0;
+	vm_page_iter_init(&pages, object);
 	VM_OBJECT_WLOCK(object);
 retry:
-	mpred = vm_radix_lookup_le(&object->rtree, atop(offset + i));
+	mpred = vm_radix_iter_lookup_lt(&pages, atop(offset + i));
 	for (; i < size; i += PAGE_SIZE, mpred = m) {
-		m = vm_page_alloc_domain_after(object, atop(offset + i),
+		m = vm_page_alloc_domain_after(object, &pages, atop(offset + i),
 		    domain, pflags, mpred);
 
 		/*
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 689ad7499f5c..19e585745a80 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -2275,19 +2275,21 @@ void
 vm_object_prepare_buf_pages(vm_object_t object, vm_page_t *ma_dst, int count,
     int *rbehind, int *rahead, vm_page_t *ma_src)
 {
+	struct pctrie_iter pages;
 	vm_pindex_t pindex;
 	vm_page_t m, mpred, msucc;
 
+	vm_page_iter_init(&pages, object);
 	VM_OBJECT_ASSERT_LOCKED(object);
 	if (*rbehind != 0) {
 		m = ma_src[0];
 		pindex = m->pindex;
-		mpred = TAILQ_PREV(m, pglist, listq);
+		mpred = vm_radix_iter_lookup_lt(&pages, pindex);
 		*rbehind = MIN(*rbehind,
 		    pindex - (mpred != NULL ? mpred->pindex + 1 : 0));
 		/* Stepping backward from pindex, mpred doesn't change. */
 		for (int i = 0; i < *rbehind; i++) {
-			m = vm_page_alloc_after(object, pindex - i - 1,
+			m = vm_page_alloc_after(object, &pages, pindex - i - 1,
 			    VM_ALLOC_NORMAL, mpred);
 			if (m == NULL) {
 				/* Shift the array. */
@@ -2305,12 +2307,12 @@ vm_object_prepare_buf_pages(vm_object_t object, vm_page_t *ma_dst, int count,
 	if (*rahead != 0) {
 		m = ma_src[count - 1];
 		pindex = m->pindex + 1;
-		msucc = TAILQ_NEXT(m, listq);
+		msucc = vm_radix_iter_lookup_ge(&pages, pindex);
 		*rahead = MIN(*rahead,
 		    (msucc != NULL ? msucc->pindex : object->size) - pindex);
 		mpred = m;
 		for (int i = 0; i < *rahead; i++) {
-			m = vm_page_alloc_after(object, pindex + i,
+			m = vm_page_alloc_after(object, &pages, pindex + i,
 			    VM_ALLOC_NORMAL, mpred);
 			if (m == NULL) {
 				*rahead = i;
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index f1a891ff6955..65d278caf3fd 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -171,8 +171,6 @@ static void vm_page_enqueue(vm_page_t m, uint8_t queue);
 static bool vm_page_free_prep(vm_page_t m);
 static void vm_page_free_toq(vm_page_t m);
 static void vm_page_init(void *dummy);
-static int vm_page_insert_after(vm_page_t m, vm_object_t object,
-    vm_pindex_t pindex, vm_page_t mpred);
 static void vm_page_insert_radixdone(vm_page_t m, vm_object_t object,
     vm_page_t mpred);
 static void vm_page_mvqueue(vm_page_t m, const uint8_t queue,
@@ -1473,18 +1471,17 @@ vm_page_dirty_KBI(vm_page_t m)
 
 /*
  * Insert the given page into the given object at the given pindex.  mpred is
- * used for memq linkage.  From vm_page_insert, lookup is true, mpred is
- * initially NULL, and this procedure looks it up.  From vm_page_insert_after
- * and vm_page_iter_insert, lookup is false and mpred is known to the caller
- * to be valid, and may be NULL if this will be the page with the lowest
- * pindex.
+ * used for memq linkage.  From vm_page_insert, iter is false, mpred is
+ * initially NULL, and this procedure looks it up.  From vm_page_iter_insert,
+ * iter is true and mpred is known to the caller to be valid, and may be NULL if
+ * this will be the page with the lowest pindex.
  *
  * The procedure is marked __always_inline to suggest to the compiler to
  * eliminate the lookup parameter and the associated alternate branch.
  */
 static __always_inline int
 vm_page_insert_lookup(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
-    struct pctrie_iter *pages, bool iter, vm_page_t mpred, bool lookup)
+    struct pctrie_iter *pages, bool iter, vm_page_t mpred)
 {
 	int error;
 
@@ -1503,13 +1500,10 @@ vm_page_insert_lookup(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
 	 * Add this page to the object's radix tree, and look up mpred if
 	 * needed.
 	 */
-	if (iter) {
-		KASSERT(!lookup, ("%s: cannot lookup mpred", __func__));
+	if (iter)
 		error = vm_radix_iter_insert(pages, m);
-	} else if (lookup)
-		error = vm_radix_insert_lookup_lt(&object->rtree, m, &mpred);
 	else
-		error = vm_radix_insert(&object->rtree, m);
+		error = vm_radix_insert_lookup_lt(&object->rtree, m, &mpred);
 	if (__predict_false(error != 0)) {
 		m->object = NULL;
 		m->pindex = 0;
@@ -1535,26 +1529,7 @@ vm_page_insert_lookup(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
 int
 vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
 {
-	return (vm_page_insert_lookup(m, object, pindex, NULL, false, NULL,
-	    true));
-}
-
-/*
- *	vm_page_insert_after:
- *
- *	Inserts the page "m" into the specified object at offset "pindex".
- *
- *	The page "mpred" must immediately precede the offset "pindex" within
- *	the specified object.
- *
- *	The object must be locked.
- */
-static int
-vm_page_insert_after(vm_page_t m, vm_object_t object, vm_pindex_t pindex,
-    vm_page_t mpred)
-{
-	return (vm_page_insert_lookup(m, object, pindex, NULL, false, mpred,
-	    false));
+	return (vm_page_insert_lookup(m, object, pindex, NULL, false, NULL));
 }
 
 /*
@@ -1573,8 +1548,7 @@ static int
 vm_page_iter_insert(struct pctrie_iter *pages, vm_page_t m, vm_object_t object,
     vm_pindex_t pindex, vm_page_t mpred)
 {
-	return (vm_page_insert_lookup(m, object, pindex, pages, true, mpred,
-	    false));
+	return (vm_page_insert_lookup(m, object, pindex, pages, true, mpred));
 }
 
 /*
@@ -2125,8 +2099,10 @@ vm_page_mpred(vm_object_t object, vm_pindex_t pindex)
 vm_page_t
 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 {
+	struct pctrie_iter pages;
 
-	return (vm_page_alloc_after(object, pindex, req,
+	vm_page_iter_init(&pages, object);
+	return (vm_page_alloc_after(object, &pages, pindex, req,
 	    vm_page_mpred(object, pindex)));
 }
 
@@ -2137,8 +2113,8 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
  * page index, or NULL if no such page exists.
  */
 vm_page_t
-vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
-    int req, vm_page_t mpred)
+vm_page_alloc_after(vm_object_t object, struct pctrie_iter *pages,
+    vm_pindex_t pindex, int req, vm_page_t mpred)
 {
 	struct vm_domainset_iter di;
 	vm_page_t m;
@@ -2146,8 +2122,8 @@ vm_page_alloc_after(vm_object_t object, vm_pindex_t pindex,
 
 	vm_domainset_iter_page_init(&di, object, pindex, &domain, &req);
 	do {
-		m = vm_page_alloc_domain_after(object, pindex, domain, req,
-		    mpred);
+		m = vm_page_alloc_domain_after(object, pages, pindex, domain,
+		    req, mpred);
 		if (m != NULL)
 			break;
 	} while (vm_domainset_iter_page(&di, object, &domain) == 0);
@@ -2209,8 +2185,8 @@ vm_domain_allocate(struct vm_domain *vmd, int req, int npages)
 }
 
 vm_page_t
-vm_page_alloc_domain_after(vm_object_t object, vm_pindex_t pindex, int domain,
-    int req, vm_page_t mpred)
+vm_page_alloc_domain_after(vm_object_t object, struct pctrie_iter *pages,
+    vm_pindex_t pindex, int domain, int req, vm_page_t mpred)
 {
 	struct vm_domain *vmd;
 	vm_page_t m;
@@ -2279,6 +2255,7 @@ again:
 		/*
 		 * Not allocatable, give up.
 		 */
+		pctrie_iter_reset(pages);
 		if (vm_domain_alloc_fail(vmd, object, req))
 			goto again;
 		return (NULL);
@@ -2315,7 +2292,7 @@ found:
 	}
 	m->a.act_count = 0;
 
-	if (vm_page_insert_after(m, object, pindex, mpred)) {
+	if (vm_page_insert_lookup(m, object, pindex, pages, true, mpred)) {
 		if (req & VM_ALLOC_WIRED) {
 			vm_wire_sub(1);
 			m->ref_count = 0;
@@ -2328,6 +2305,7 @@ found:
 		if (req & VM_ALLOC_WAITFAIL) {
 			VM_OBJECT_WUNLOCK(object);
 			vm_radix_wait();
+			pctrie_iter_reset(pages);
 			VM_OBJECT_WLOCK(object);
 		}
 		return (NULL);
@@ -2471,7 +2449,7 @@ vm_page_alloc_contig_domain(vm_object_t object, vm_pindex_t pindex, int domain,
 	KASSERT(npages > 0, ("vm_page_alloc_contig: npages is zero"));
 
 	vm_page_iter_init(&pages, object);
-	mpred = vm_radix_iter_lookup_le(&pages, pindex);
+	mpred = vm_radix_iter_lookup_lt(&pages, pindex);
 	KASSERT(mpred == NULL || mpred->pindex != pindex,
 	    ("vm_page_alloc_contig: pindex already allocated"));
 	for (;;) {
@@ -4875,32 +4853,32 @@ vm_page_grab_lookup(struct pctrie_iter *pages, vm_object_t object,
 }
 
 /*
- * Grab a page.  Keep on waiting, as long as the page exists in the object.  If
- * the page doesn't exist, first allocate it and then conditionally zero it.
+ * Grab a page.  Use an iterator parameter. Keep on waiting, as long as the page
+ * exists in the object.  If the page doesn't exist, first allocate it and then
+ * conditionally zero it.
  *
  * The object must be locked on entry.  This routine may sleep.  The lock will,
  * however, be released and reacquired if the routine sleeps.
  */
 vm_page_t
-vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
+vm_page_grab_iter(vm_object_t object, struct pctrie_iter *pages,
+    vm_pindex_t pindex, int allocflags)
 {
-	struct pctrie_iter pages;
 	vm_page_t m, mpred;
 	bool found;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	vm_page_grab_check(allocflags);
 
-	vm_page_iter_init(&pages, object);
 	while ((m = vm_page_grab_lookup(
-	    &pages, object, pindex, allocflags, &found)) == NULL) {
+	    pages, object, pindex, allocflags, &found)) == NULL) {
 		if ((allocflags & VM_ALLOC_NOCREAT) != 0)
 			return (NULL);
 		if (found &&
 		    (allocflags & (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL)) != 0)
 			return (NULL);
-		mpred = vm_radix_iter_lookup_le(&pages, pindex);
-		m = vm_page_alloc_after(object, pindex,
+		mpred = vm_radix_iter_lookup_lt(pages, pindex);
+		m = vm_page_alloc_after(object, pages, pindex,
 		    vm_page_grab_pflags(allocflags), mpred);
 		if (m != NULL) {
 			if ((allocflags & VM_ALLOC_ZERO) != 0 &&
@@ -4911,13 +4889,29 @@ vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
 		if ((allocflags &
 		    (VM_ALLOC_NOWAIT | VM_ALLOC_WAITFAIL)) != 0)
 			return (NULL);
-		pctrie_iter_reset(&pages);
 	}
 	vm_page_grab_release(m, allocflags);
 
 	return (m);
 }
 
+/*
+ * Grab a page.  Keep on waiting, as long as the page exists in the object.  If
+ * the page doesn't exist, first allocate it and then conditionally zero it.
+ *
+ * The object must be locked on entry.  This routine may sleep.  The lock will,
+ * however, be released and reacquired if the routine sleeps.
+ */
+vm_page_t
+vm_page_grab(vm_object_t object, vm_pindex_t pindex, int allocflags)
+{
+	struct pctrie_iter pages;
+
+	VM_OBJECT_ASSERT_WLOCKED(object);
+	vm_page_iter_init(&pages, object);
+	return (vm_page_grab_iter(object, &pages, pindex, allocflags));
+}
+
 /*
  * Attempt to validate a page, locklessly acquiring it if necessary, given a
  * (object, pindex) tuple and either an invalided page or NULL.  The resulting
@@ -4998,9 +4992,11 @@ vm_page_grab_unlocked(vm_object_t object, vm_pindex_t pindex, int allocflags)
  * will neither be wired nor busy regardless of allocflags.
  */
 int
-vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int allocflags)
+vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex,
+    int allocflags)
 {
-	vm_page_t m;
+	struct pctrie_iter pages;
+	vm_page_t m, mpred;
 	vm_page_t ma[VM_INITIAL_PAGEIN];
 	int after, i, pflags, rv;
 
@@ -5014,9 +5010,10 @@ vm_page_grab_valid(vm_page_t *mp, vm_object_t object, vm_pindex_t pindex, int al
 	pflags = allocflags & ~(VM_ALLOC_NOBUSY | VM_ALLOC_SBUSY |
 	    VM_ALLOC_WIRED | VM_ALLOC_IGN_SBUSY);
 	pflags |= VM_ALLOC_WAITFAIL;
+	vm_page_iter_init(&pages, object);
 
 retrylookup:
-	if ((m = vm_page_lookup(object, pindex)) != NULL) {
+	if ((m = vm_radix_iter_lookup(&pages, pindex)) != NULL) {
 		/*
 		 * If the page is fully valid it can only become invalid
 		 * with the object lock held.  If it is not valid it can
@@ -5030,6 +5027,7 @@ retrylookup:
 		    vm_page_all_valid(m) ? allocflags : 0)) {
 			(void)vm_page_grab_sleep(object, m, pindex, "pgrbwt",
 			    allocflags, true);
+			pctrie_iter_reset(&pages);
 			goto retrylookup;
 		}
 		if (vm_page_all_valid(m))
@@ -5042,12 +5040,16 @@ retrylookup:
 	} else if ((allocflags & VM_ALLOC_NOCREAT) != 0) {
 		*mp = NULL;
 		return (VM_PAGER_FAIL);
-	} else if ((m = vm_page_alloc(object, pindex, pflags)) == NULL) {
-		if (!vm_pager_can_alloc_page(object, pindex)) {
-			*mp = NULL;
-			return (VM_PAGER_AGAIN);
+	} else {
+		mpred = vm_radix_iter_lookup_lt(&pages, pindex);
+		m = vm_page_alloc_after(object, &pages, pindex, pflags, mpred);
+		if (m == NULL) {
+			if (!vm_pager_can_alloc_page(object, pindex)) {
+				*mp = NULL;
+				return (VM_PAGER_AGAIN);
+			}
+			goto retrylookup;
 		}
-		goto retrylookup;
 	}
 
 	vm_page_assert_xbusied(m);
@@ -5055,18 +5057,17 @@ retrylookup:
 		after = MIN(after, VM_INITIAL_PAGEIN);
 		after = MIN(after, allocflags >> VM_ALLOC_COUNT_SHIFT);
 		after = MAX(after, 1);
-		ma[0] = m;
+		ma[0] = mpred = m;
 		for (i = 1; i < after; i++) {
-			if ((ma[i] = vm_page_next(ma[i - 1])) != NULL) {
-				if (vm_page_any_valid(ma[i]) ||
-				    !vm_page_tryxbusy(ma[i]))
-					break;
-			} else {
-				ma[i] = vm_page_alloc_after(object,
-				    m->pindex + i, VM_ALLOC_NORMAL, ma[i - 1]);
-				if (ma[i] == NULL)
+			m = vm_radix_iter_lookup(&pages, pindex + i);
+			if (m == NULL) {
+				m = vm_page_alloc_after(object, &pages,
+				    pindex + i, VM_ALLOC_NORMAL, mpred);
+				if (m == NULL)
 					break;
-			}
+			} else if (vm_page_any_valid(m) || !vm_page_tryxbusy(m))
+				break;
+			mpred = ma[i] = m;
 		}
 		after = i;
 		vm_object_pip_add(object, after);
@@ -5131,8 +5132,8 @@ vm_page_grab_zero_partial(vm_object_t object, vm_pindex_t pindex, int base,
 	    &pages, object, pindex, allocflags, &found)) == NULL) {
 		if (!vm_pager_has_page(object, pindex, NULL, NULL))
 			return (0);
-		mpred = vm_radix_iter_lookup_le(&pages, pindex);
-		m = vm_page_alloc_after(object, pindex,
+		mpred = vm_radix_iter_lookup_lt(&pages, pindex);
+		m = vm_page_alloc_after(object, &pages, pindex,
 		    vm_page_grab_pflags(allocflags), mpred);
 		if (m != NULL) {
 			vm_object_pip_add(object, 1);
@@ -5154,7 +5155,6 @@ vm_page_grab_zero_partial(vm_object_t object, vm_pindex_t pindex, int base,
 			vm_page_launder(m);
 			break;
 		}
-		pctrie_iter_reset(&pages);
 	}
 
 	pmap_zero_page_area(m, base, end - base);
@@ -5244,6 +5244,7 @@ int
 vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
     vm_page_t *ma, int count)
 {
+	struct pctrie_iter pages;
 	vm_page_t m, mpred;
 	int pflags;
 	int i;
@@ -5257,25 +5258,24 @@ vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
 
 	pflags = vm_page_grab_pflags(allocflags);
 	i = 0;
+	vm_page_iter_init(&pages, object);
 retrylookup:
-	m = vm_page_mpred(object, pindex + i);
-	if (m == NULL || m->pindex != pindex + i) {
-		mpred = m;
-		m = NULL;
-	} else
-		mpred = TAILQ_PREV(m, pglist, listq);
+	mpred = vm_radix_iter_lookup_lt(&pages, pindex + i);
 	for (; i < count; i++) {
+		m = vm_radix_iter_lookup(&pages, pindex + i);
 		if (m != NULL) {
 			if (!vm_page_tryacquire(m, allocflags)) {
 				if (vm_page_grab_sleep(object, m, pindex + i,
-				    "grbmaw", allocflags, true))
+				    "grbmaw", allocflags, true)) {
+					pctrie_iter_reset(&pages);
 					goto retrylookup;
+				}
 				break;
 			}
 		} else {
 			if ((allocflags & VM_ALLOC_NOCREAT) != 0)
 				break;
-			m = vm_page_alloc_after(object, pindex + i,
+			m = vm_page_alloc_after(object, &pages, pindex + i,
 			    pflags | VM_ALLOC_COUNT(count - i), mpred);
 			if (m == NULL) {
 				if ((allocflags & (VM_ALLOC_NOWAIT |
@@ -5292,7 +5292,6 @@ retrylookup:
 		}
 		vm_page_grab_release(m, allocflags);
 		ma[i] = mpred = m;
-		m = vm_page_next(m);
 	}
 	return (i);
 }
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 1cac80c6a8ef..0a40b42ffb62 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -608,9 +608,10 @@ void vm_page_activate (vm_page_t);
 void vm_page_advise(vm_page_t m, int advice);
 vm_page_t vm_page_mpred(vm_object_t, vm_pindex_t);
 vm_page_t vm_page_alloc(vm_object_t, vm_pindex_t, int);
-vm_page_t vm_page_alloc_after(vm_object_t, vm_pindex_t, int, vm_page_t);
-vm_page_t vm_page_alloc_domain_after(vm_object_t, vm_pindex_t, int, int,
-    vm_page_t);
+vm_page_t vm_page_alloc_after(vm_object_t, struct pctrie_iter *, vm_pindex_t,
+    int, vm_page_t);
+vm_page_t vm_page_alloc_domain_after(vm_object_t, struct pctrie_iter *,
+    vm_pindex_t, int, int, vm_page_t);
 vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr);
@@ -631,6 +632,8 @@ bool vm_page_blacklist_add(vm_paddr_t pa, bool verbose);
 int vm_page_grab_zero_partial(vm_object_t object, vm_pindex_t pindex, int base,
     int end);
 vm_page_t vm_page_grab(vm_object_t, vm_pindex_t, int);
+vm_page_t vm_page_grab_iter(vm_object_t object, struct pctrie_iter *pages, vm_pindex_t pindex,
+    int allocflags);
 vm_page_t vm_page_grab_unlocked(vm_object_t, vm_pindex_t, int);
 int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
     vm_page_t *ma, int count);