git: ae10431c9833 - main - vm_page: Allow PG_NOFREE pages to be freed

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Thu, 10 Apr 2025 12:47:20 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=ae10431c9833bd6b176afe4d8021d233fd985107

commit ae10431c9833bd6b176afe4d8021d233fd985107
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2025-04-10 12:43:12 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-04-10 12:47:05 +0000

    vm_page: Allow PG_NOFREE pages to be freed
    
    There is at least one case where we need to support it: kmem_malloc()
    might need to allocate multiple pages to satisfy a NOFREE allocation,
    which it implements by calling vm_page_alloc() in a loop.  If it fails
    part-way though, it needs to free already-allocated pages, but this was
    illegal.
    
    Convert the bump allocator to a linked list; (ab)use the pindex field of
    each page in the list to store the number of contiguous pages in the
    block.  (Originally I added a new plinks member for this purpose, but
    it's not safe to use that until after vm_page_dequeue() is called due to
    lazy page queue removal.) Then, modify vm_page_free() to support freeing
    pages to this list.
    
    While here, add a __noinline qualifier to vm_page_alloc_nofree_domain()
    to ensure that it doesn't get inlined into a hot path.
    
    Reported by:    syzbot+93bc9edd2d0f22ae426a@syzkaller.appspotmail.com
    Reviewed by:    bnovkov, kib
    Fixes:          a8693e89e3e4 ("vm: Introduce vm_page_alloc_nofree_domain")
    Differential Revision:  https://reviews.freebsd.org/D49480
---
 sys/vm/vm_page.c      | 56 ++++++++++++++++++++++++++++++++++++++-------------
 sys/vm/vm_pagequeue.h |  5 +----
 2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 741c45490d96..5214b3c956ba 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -2648,40 +2648,66 @@ found:
  * the routine will try to fetch a new one from the freelists
  * and discard the old one.
  */
-static vm_page_t
+static vm_page_t __noinline
 vm_page_alloc_nofree_domain(int domain, int req)
 {
 	vm_page_t m;
 	struct vm_domain *vmd;
-	struct vm_nofreeq *nqp;
 
 	KASSERT((req & VM_ALLOC_NOFREE) != 0, ("invalid request %#x", req));
 
 	vmd = VM_DOMAIN(domain);
-	nqp = &vmd->vmd_nofreeq;
 	vm_domain_free_lock(vmd);
-	if (nqp->offs >= (1 << VM_NOFREE_IMPORT_ORDER) || nqp->ma == NULL) {
-		if (!vm_domain_allocate(vmd, req,
-		    1 << VM_NOFREE_IMPORT_ORDER)) {
+	if (TAILQ_EMPTY(&vmd->vmd_nofreeq)) {
+		int count;
+
+		count = 1 << VM_NOFREE_IMPORT_ORDER;
+		if (!vm_domain_allocate(vmd, req, count)) {
 			vm_domain_free_unlock(vmd);
 			return (NULL);
 		}
-		nqp->ma = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
+		m = vm_phys_alloc_pages(domain, VM_FREEPOOL_DEFAULT,
 		    VM_NOFREE_IMPORT_ORDER);
-		if (nqp->ma == NULL) {
-			vm_domain_freecnt_inc(vmd, 1 << VM_NOFREE_IMPORT_ORDER);
+		if (m == NULL) {
+			vm_domain_freecnt_inc(vmd, count);
 			vm_domain_free_unlock(vmd);
 			return (NULL);
 		}
-		nqp->offs = 0;
+		m->pindex = count;
+		TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq);
+		VM_CNT_ADD(v_nofree_count, count);
+	}
+	m = TAILQ_FIRST(&vmd->vmd_nofreeq);
+	TAILQ_REMOVE(&vmd->vmd_nofreeq, m, listq);
+	if (m->pindex > 1) {
+		vm_page_t m_next;
+
+		m_next = &m[1];
+		m_next->pindex = m->pindex - 1;
+		TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m_next, listq);
 	}
-	m = &nqp->ma[nqp->offs++];
 	vm_domain_free_unlock(vmd);
-	VM_CNT_ADD(v_nofree_count, 1);
+	VM_CNT_ADD(v_nofree_count, -1);
 
 	return (m);
 }
 
+/*
+ * Though a NOFREE page by definition should not be freed, we support putting
+ * them aside for future NOFREE allocations.  This enables code which allocates
+ * NOFREE pages for some purpose but then encounters an error and releases
+ * resources.
+ */
+static void __noinline
+vm_page_free_nofree(struct vm_domain *vmd, vm_page_t m)
+{
+	vm_domain_free_lock(vmd);
+	m->pindex = 1;
+	TAILQ_INSERT_HEAD(&vmd->vmd_nofreeq, m, listq);
+	vm_domain_free_unlock(vmd);
+	VM_CNT_ADD(v_nofree_count, 1);
+}
+
 vm_page_t
 vm_page_alloc_noobj(int req)
 {
@@ -4145,8 +4171,6 @@ vm_page_free_prep(vm_page_t m)
 			    m, i, (uintmax_t)*p));
 	}
 #endif
-	KASSERT((m->flags & PG_NOFREE) == 0,
-	    ("%s: attempting to free a PG_NOFREE page", __func__));
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		KASSERT(!pmap_page_is_mapped(m),
 		    ("vm_page_free_prep: freeing mapped page %p", m));
@@ -4230,6 +4254,10 @@ vm_page_free_toq(vm_page_t m)
 		return;
 
 	vmd = vm_pagequeue_domain(m);
+	if (__predict_false((m->flags & PG_NOFREE) != 0)) {
+		vm_page_free_nofree(vmd, m);
+		return;
+	}
 	zone = vmd->vmd_pgcache[m->pool].zone;
 	if ((m->flags & PG_PCPU_CACHE) != 0 && zone != NULL) {
 		uma_zfree(zone, m);
diff --git a/sys/vm/vm_pagequeue.h b/sys/vm/vm_pagequeue.h
index 72fd1bb47318..cbbd27389662 100644
--- a/sys/vm/vm_pagequeue.h
+++ b/sys/vm/vm_pagequeue.h
@@ -247,10 +247,7 @@ struct vm_domain {
 	u_int vmd_domain;		/* (c) Domain number. */
 	u_int vmd_page_count;		/* (c) Total page count. */
 	long vmd_segs;			/* (c) bitmask of the segments */
-	struct vm_nofreeq {
-		vm_page_t ma;
-		int offs;
-	} vmd_nofreeq;			/* (f) NOFREE page bump allocator. */
+	struct pglist vmd_nofreeq;	/* (f) NOFREE page bump allocator. */
 	u_int __aligned(CACHE_LINE_SIZE) vmd_free_count; /* (a,f) free page count */
 	u_int vmd_pageout_deficit;	/* (a) Estimated number of pages deficit */
 	uint8_t vmd_pad[CACHE_LINE_SIZE - (sizeof(u_int) * 2)];