svn commit: r196780 - in stable/8/sys: . amd64/amd64 amd64/include/xen cddl/contrib/opensolaris contrib/dev/acpica contrib/pf dev/xen/xenpci i386/i386 i386/include

John Baldwin jhb at FreeBSD.org
Thu Sep 3 13:54:58 UTC 2009


Author: jhb
Date: Thu Sep  3 13:54:58 2009
New Revision: 196780
URL: http://svn.freebsd.org/changeset/base/196780

Log:
  MFC 196705 and 196707:
  - Improve pmap_change_attr() on i386 so that it is able to demote a large
    (2/4MB) page into 4KB pages as needed.  This should be fairly rare in
    practice.
  - Simplify pmap_change_attr() a bit:
    - Always calculate the cache bits instead of doing it on-demand.
    - Always set changed to TRUE rather than only doing it if it is false.
  
  Approved by:	re (kib)

Modified:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/amd64/pmap.c
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/xen/xenpci/   (props changed)
  stable/8/sys/i386/i386/pmap.c
  stable/8/sys/i386/include/pmap.h

Modified: stable/8/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/8/sys/amd64/amd64/pmap.c	Thu Sep  3 13:40:41 2009	(r196779)
+++ stable/8/sys/amd64/amd64/pmap.c	Thu Sep  3 13:54:58 2009	(r196780)
@@ -4476,7 +4476,8 @@ pmap_change_attr_locked(vm_offset_t va, 
 	if (base < DMAP_MIN_ADDRESS)
 		return (EINVAL);
 
-	cache_bits_pde = cache_bits_pte = -1;
+	cache_bits_pde = pmap_cache_bits(mode, 1);
+	cache_bits_pte = pmap_cache_bits(mode, 0);
 	changed = FALSE;
 
 	/*
@@ -4493,8 +4494,6 @@ pmap_change_attr_locked(vm_offset_t va, 
 			 * memory type, then we need not demote this page. Just
 			 * increment tmpva to the next 1GB page frame.
 			 */
-			if (cache_bits_pde < 0)
-				cache_bits_pde = pmap_cache_bits(mode, 1);
 			if ((*pdpe & PG_PDE_CACHE) == cache_bits_pde) {
 				tmpva = trunc_1gpage(tmpva) + NBPDP;
 				continue;
@@ -4522,8 +4521,6 @@ pmap_change_attr_locked(vm_offset_t va, 
 			 * memory type, then we need not demote this page. Just
 			 * increment tmpva to the next 2MB page frame.
 			 */
-			if (cache_bits_pde < 0)
-				cache_bits_pde = pmap_cache_bits(mode, 1);
 			if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
 				tmpva = trunc_2mpage(tmpva) + NBPDR;
 				continue;
@@ -4557,12 +4554,9 @@ pmap_change_attr_locked(vm_offset_t va, 
 	for (tmpva = base; tmpva < base + size; ) {
 		pdpe = pmap_pdpe(kernel_pmap, tmpva);
 		if (*pdpe & PG_PS) {
-			if (cache_bits_pde < 0)
-				cache_bits_pde = pmap_cache_bits(mode, 1);
 			if ((*pdpe & PG_PDE_CACHE) != cache_bits_pde) {
 				pmap_pde_attr(pdpe, cache_bits_pde);
-				if (!changed)
-					changed = TRUE;
+				changed = TRUE;
 			}
 			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
 				if (pa_start == pa_end) {
@@ -4588,12 +4582,9 @@ pmap_change_attr_locked(vm_offset_t va, 
 		}
 		pde = pmap_pdpe_to_pde(pdpe, tmpva);
 		if (*pde & PG_PS) {
-			if (cache_bits_pde < 0)
-				cache_bits_pde = pmap_cache_bits(mode, 1);
 			if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
 				pmap_pde_attr(pde, cache_bits_pde);
-				if (!changed)
-					changed = TRUE;
+				changed = TRUE;
 			}
 			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
 				if (pa_start == pa_end) {
@@ -4616,13 +4607,10 @@ pmap_change_attr_locked(vm_offset_t va, 
 			}
 			tmpva = trunc_2mpage(tmpva) + NBPDR;
 		} else {
-			if (cache_bits_pte < 0)
-				cache_bits_pte = pmap_cache_bits(mode, 0);
 			pte = pmap_pde_to_pte(pde, tmpva);
 			if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
 				pmap_pte_attr(pte, cache_bits_pte);
-				if (!changed)
-					changed = TRUE;
+				changed = TRUE;
 			}
 			if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
 				if (pa_start == pa_end) {

Modified: stable/8/sys/i386/i386/pmap.c
==============================================================================
--- stable/8/sys/i386/i386/pmap.c	Thu Sep  3 13:40:41 2009	(r196779)
+++ stable/8/sys/i386/i386/pmap.c	Thu Sep  3 13:54:58 2009	(r196780)
@@ -288,12 +288,15 @@ static boolean_t pmap_enter_pde(pmap_t p
 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
 static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
 static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
 static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
+static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
 static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
     vm_prot_t prot);
+static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
 static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
     vm_page_t *free);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
@@ -2289,32 +2292,62 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offse
 }
 
 /*
- * Tries to demote a 2- or 4MB page mapping.
+ * Fills a page table page with mappings to consecutive physical pages.
+ */
+static void
+pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
+{
+	pt_entry_t *pte;
+
+	for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
+		*pte = newpte;	
+		newpte += PAGE_SIZE;
+	}
+}
+
+/*
+ * Tries to demote a 2- or 4MB page mapping.  If demotion fails, the
+ * 2- or 4MB page mapping is invalidated.
  */
 static boolean_t
 pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
 {
 	pd_entry_t newpde, oldpde;
 	pmap_t allpmaps_entry;
-	pt_entry_t *firstpte, newpte, *pte;
+	pt_entry_t *firstpte, newpte;
 	vm_paddr_t mptepa;
 	vm_page_t free, mpte;
 
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	oldpde = *pde;
+	KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
+	    ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
 	mpte = pmap_lookup_pt_page(pmap, va);
 	if (mpte != NULL)
 		pmap_remove_pt_page(pmap, mpte);
 	else {
-		KASSERT((*pde & PG_W) == 0,
+		KASSERT((oldpde & PG_W) == 0,
 		    ("pmap_demote_pde: page table page for a wired mapping"
 		    " is missing"));
-		free = NULL;
-		pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
-		pmap_invalidate_page(pmap, trunc_4mpage(va));
-		pmap_free_zero_pages(free);
-		CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
-		    " in pmap %p", va, pmap);
-		return (FALSE);
+
+		/*
+		 * Invalidate the 2- or 4MB page mapping and return
+		 * "failure" if the mapping was never accessed or the
+		 * allocation of the new page table page fails.
+		 */
+		if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
+		    va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
+		    VM_ALLOC_WIRED)) == NULL) {
+			free = NULL;
+			pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
+			pmap_invalidate_page(pmap, trunc_4mpage(va));
+			pmap_free_zero_pages(free);
+			CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
+			    " in pmap %p", va, pmap);
+			return (FALSE);
+		}
+		if (va < VM_MAXUSER_ADDRESS)
+			pmap->pm_stats.resident_count++;
 	}
 	mptepa = VM_PAGE_TO_PHYS(mpte);
 
@@ -2348,30 +2381,32 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t 
 		}
 		firstpte = PADDR2;
 	}
-	oldpde = *pde;
 	newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
-	KASSERT((oldpde & (PG_A | PG_V)) == (PG_A | PG_V),
-	    ("pmap_demote_pde: oldpde is missing PG_A and/or PG_V"));
+	KASSERT((oldpde & PG_A) != 0,
+	    ("pmap_demote_pde: oldpde is missing PG_A"));
 	KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
 	    ("pmap_demote_pde: oldpde is missing PG_M"));
-	KASSERT((oldpde & PG_PS) != 0,
-	    ("pmap_demote_pde: oldpde is missing PG_PS"));
 	newpte = oldpde & ~PG_PS;
 	if ((newpte & PG_PDE_PAT) != 0)
 		newpte ^= PG_PDE_PAT | PG_PTE_PAT;
 
 	/*
-	 * If the mapping has changed attributes, update the page table
-	 * entries.
-	 */ 
+	 * If the page table page is new, initialize it.
+	 */
+	if (mpte->wire_count == 1) {
+		mpte->wire_count = NPTEPG;
+		pmap_fill_ptp(firstpte, newpte);
+	}
 	KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
 	    ("pmap_demote_pde: firstpte and newpte map different physical"
 	    " addresses"));
+
+	/*
+	 * If the mapping has changed attributes, update the page table
+	 * entries.
+	 */ 
 	if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
-		for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
-			*pte = newpte;	
-			newpte += PAGE_SIZE;
-		}
+		pmap_fill_ptp(firstpte, newpte);
 	
 	/*
 	 * Demote the mapping.  This pmap is locked.  The old PDE has
@@ -4426,6 +4461,40 @@ pmap_clear_reference(vm_page_t m)
  * Miscellaneous support routines follow
  */
 
+/* Adjust the cache mode for a 4KB page mapped via a PTE. */
+static __inline void
+pmap_pte_attr(pt_entry_t *pte, int cache_bits)
+{
+	u_int opte, npte;
+
+	/*
+	 * The cache mode bits are all in the low 32-bits of the
+	 * PTE, so we can just spin on updating the low 32-bits.
+	 */
+	do {
+		opte = *(u_int *)pte;
+		npte = opte & ~PG_PTE_CACHE;
+		npte |= cache_bits;
+	} while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
+}
+
+/* Adjust the cache mode for a 2/4MB page mapped via a PDE. */
+static __inline void
+pmap_pde_attr(pd_entry_t *pde, int cache_bits)
+{
+	u_int opde, npde;
+
+	/*
+	 * The cache mode bits are all in the low 32-bits of the
+	 * PDE, so we can just spin on updating the low 32-bits.
+	 */
+	do {
+		opde = *(u_int *)pde;
+		npde = opde & ~PG_PDE_CACHE;
+		npde |= cache_bits;
+	} while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
+}
+
 /*
  * Map a set of physical memory pages into the kernel virtual
  * address space. Return a pointer to where it is mapped. This
@@ -4537,13 +4606,23 @@ pmap_page_set_memattr(vm_page_t m, vm_me
 	}
 }
 
+/*
+ * Changes the specified virtual address range's memory type to that given by
+ * the parameter "mode".  The specified virtual address range must be
+ * completely contained within either the kernel map.
+ *
+ * Returns zero if the change completed successfully, and either EINVAL or
+ * ENOMEM if the change failed.  Specifically, EINVAL is returned if some part
+ * of the virtual address range was not mapped, and ENOMEM is returned if
+ * there was insufficient memory available to complete the change.
+ */
 int
 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
 {
 	vm_offset_t base, offset, tmpva;
-	pt_entry_t *pte;
-	u_int opte, npte;
 	pd_entry_t *pde;
+	pt_entry_t *pte;
+	int cache_bits_pte, cache_bits_pde;
 	boolean_t changed;
 
 	base = trunc_page(va);
@@ -4556,47 +4635,84 @@ pmap_change_attr(vm_offset_t va, vm_size
 	if (base < VM_MIN_KERNEL_ADDRESS)
 		return (EINVAL);
 
-	/* 4MB pages and pages that aren't mapped aren't supported. */
-	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
+	cache_bits_pde = pmap_cache_bits(mode, 1);
+	cache_bits_pte = pmap_cache_bits(mode, 0);
+	changed = FALSE;
+
+	/*
+	 * Pages that aren't mapped aren't supported.  Also break down
+	 * 2/4MB pages into 4KB pages if required.
+	 */
+	PMAP_LOCK(kernel_pmap);
+	for (tmpva = base; tmpva < base + size; ) {
 		pde = pmap_pde(kernel_pmap, tmpva);
-		if (*pde & PG_PS)
-			return (EINVAL);
-		if (*pde == 0)
+		if (*pde == 0) {
+			PMAP_UNLOCK(kernel_pmap);
 			return (EINVAL);
+		}
+		if (*pde & PG_PS) {
+			/*
+			 * If the current 2/4MB page already has
+			 * the required memory type, then we need not
+			 * demote this page.  Just increment tmpva to
+			 * the next 2/4MB page frame.
+			 */
+			if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
+				tmpva = trunc_4mpage(tmpva) + NBPDR;
+				continue;
+			}
+
+			/*
+			 * If the current offset aligns with a 2/4MB
+			 * page frame and there is at least 2/4MB left
+			 * within the range, then we need not break
+			 * down this page into 4KB pages.
+			 */
+			if ((tmpva & PDRMASK) == 0 &&
+			    tmpva + PDRMASK < base + size) {
+				tmpva += NBPDR;
+				continue;
+			}
+			if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) {
+				PMAP_UNLOCK(kernel_pmap);
+				return (ENOMEM);
+			}
+		}
 		pte = vtopte(tmpva);
-		if (*pte == 0)
+		if (*pte == 0) {
+			PMAP_UNLOCK(kernel_pmap);
 			return (EINVAL);
+		}
+		tmpva += PAGE_SIZE;
 	}
-
-	changed = FALSE;
+	PMAP_UNLOCK(kernel_pmap);
 
 	/*
-	 * Ok, all the pages exist and are 4k, so run through them updating
-	 * their cache mode.
+	 * Ok, all the pages exist, so run through them updating their
+	 * cache mode if required.
 	 */
-	for (tmpva = base; size > 0; ) {
-		pte = vtopte(tmpva);
-
-		/*
-		 * The cache mode bits are all in the low 32-bits of the
-		 * PTE, so we can just spin on updating the low 32-bits.
-		 */
-		do {
-			opte = *(u_int *)pte;
-			npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
-			npte |= pmap_cache_bits(mode, 0);
-		} while (npte != opte &&
-		    !atomic_cmpset_int((u_int *)pte, opte, npte));
-		if (npte != opte)
-			changed = TRUE;
-		tmpva += PAGE_SIZE;
-		size -= PAGE_SIZE;
+	for (tmpva = base; tmpva < base + size; ) {
+		pde = pmap_pde(kernel_pmap, tmpva);
+		if (*pde & PG_PS) {
+			if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
+				pmap_pde_attr(pde, cache_bits_pde);
+				changed = TRUE;
+			}
+			tmpva = trunc_4mpage(tmpva) + NBPDR;
+		} else {
+			pte = vtopte(tmpva);
+			if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
+				pmap_pte_attr(pte, cache_bits_pte);
+				changed = TRUE;
+			}
+			tmpva += PAGE_SIZE;
+		}
 	}
 
 	/*
-	 * Flush CPU caches to make sure any data isn't cached that shouldn't
-	 * be, etc.
-	 */    
+	 * Flush CPU caches to make sure any data isn't cached that
+	 * shouldn't be, etc.
+	 */
 	if (changed) {
 		pmap_invalidate_range(kernel_pmap, base, tmpva);
 		pmap_invalidate_cache_range(base, tmpva);

Modified: stable/8/sys/i386/include/pmap.h
==============================================================================
--- stable/8/sys/i386/include/pmap.h	Thu Sep  3 13:40:41 2009	(r196779)
+++ stable/8/sys/i386/include/pmap.h	Thu Sep  3 13:54:58 2009	(r196780)
@@ -81,6 +81,10 @@
 #define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
 #define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
 
+/* Page level cache control fields used to determine the PAT type */
+#define PG_PDE_CACHE	(PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD)
+#define PG_PTE_CACHE	(PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD)
+
 /*
  * Promotion to a 2 or 4MB (PDE) page mapping requires that the corresponding
  * 4KB (PTE) page mappings have identical settings for the following fields:


More information about the svn-src-stable-8 mailing list