git: aa3bcaad5107 - main - amd64: Add a leaf PTP when pmap_enter(psind=1) creates a wired mapping

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Sun, 17 Sep 2023 10:37:23 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=aa3bcaad51076ceb346fa0c64ab69a35e5241b20

commit aa3bcaad51076ceb346fa0c64ab69a35e5241b20
Author:     Bojan Novković <bojan.novkovic@fer.hr>
AuthorDate: 2023-09-15 10:41:10 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2023-09-17 10:27:22 +0000

    amd64: Add a leaf PTP when pmap_enter(psind=1) creates a wired mapping
    
    This patch reverts the changes made in D19670 and fixes the original
    issue by allocating and prepopulating a leaf page table page for wired
    userspace 2M pages.
    
    The original issue is an edge case that creates an unmapped, wired
    region in userspace. Subsequent faults on this region can trigger wired
    superpage creation, which leads to a panic in pmap_demote_pde_locked()
    as the pmap does not create a leaf page table page for the wired
    superpage. D19670 fixed this by disallowing preemptive creation of
    wired superpage mappings, but that fix is currently interfering with an
    ongoing effort of speeding up vm_map_wire for large, contiguous entries
    (e.g. bhyve wiring guest memory).
    
    Reviewed by:    alc, markj
    Sponsored by:   Google, Inc. (GSoC 2023)
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D41132
---
 sys/amd64/amd64/pmap.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index b10997fd657b..ff83d8749313 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -7493,9 +7493,8 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
 	pd_entry_t oldpde, *pde;
 	pt_entry_t PG_G, PG_RW, PG_V;
 	vm_page_t mt, pdpg;
+	vm_page_t uwptpg;
 
-	KASSERT(pmap == kernel_pmap || (newpde & PG_W) == 0,
-	    ("pmap_enter_pde: cannot create wired user mapping"));
 	PG_G = pmap_global_bit(pmap);
 	PG_RW = pmap_rw_bit(pmap);
 	KASSERT((newpde & (pmap_modified_bit(pmap) | PG_RW)) != PG_RW,
@@ -7592,6 +7591,22 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
 		}
 	}
 
+	/*
+	 * Allocate leaf ptpage for wired userspace pages.
+	 */
+	uwptpg = NULL;
+	if ((newpde & PG_W) != 0 && pmap != kernel_pmap) {
+		uwptpg = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va),
+		    VM_ALLOC_WIRED);
+		if (uwptpg == NULL)
+			return (KERN_RESOURCE_SHORTAGE);
+		if (pmap_insert_pt_page(pmap, uwptpg, true, false)) {
+			pmap_free_pt_page(pmap, uwptpg, false);
+			return (KERN_RESOURCE_SHORTAGE);
+		}
+
+		uwptpg->ref_count = NPTEPG;
+	}
 	if ((newpde & PG_MANAGED) != 0) {
 		/*
 		 * Abort this mapping if its PV entry could not be created.
@@ -7599,6 +7614,14 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags,
 		if (!pmap_pv_insert_pde(pmap, va, newpde, flags, lockp)) {
 			if (pdpg != NULL)
 				pmap_abort_ptp(pmap, va, pdpg);
+			if (uwptpg != NULL) {
+				mt = pmap_remove_pt_page(pmap, va);
+				KASSERT(mt == uwptpg,
+				    ("removed pt page %p, expected %p", mt,
+				    uwptpg));
+				uwptpg->ref_count = 1;
+				pmap_free_pt_page(pmap, uwptpg, false);
+			}
 			CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx"
 			    " in pmap %p", va, pmap);
 			return (KERN_RESOURCE_SHORTAGE);