svn commit: r196780 - in stable/8/sys: . amd64/amd64
amd64/include/xen cddl/contrib/opensolaris contrib/dev/acpica
contrib/pf dev/xen/xenpci i386/i386 i386/include
John Baldwin
jhb at FreeBSD.org
Thu Sep 3 13:54:58 UTC 2009
Author: jhb
Date: Thu Sep 3 13:54:58 2009
New Revision: 196780
URL: http://svn.freebsd.org/changeset/base/196780
Log:
MFC 196705 and 196707:
- Improve pmap_change_attr() on i386 so that it is able to demote a large
(2/4MB) page into 4KB pages as needed. This should be fairly rare in
practice.
- Simplify pmap_change_attr() a bit:
- Always calculate the cache bits instead of doing it on-demand.
- Always set changed to TRUE rather than only doing it if it is false.
Approved by: re (kib)
Modified:
stable/8/sys/ (props changed)
stable/8/sys/amd64/amd64/pmap.c
stable/8/sys/amd64/include/xen/ (props changed)
stable/8/sys/cddl/contrib/opensolaris/ (props changed)
stable/8/sys/contrib/dev/acpica/ (props changed)
stable/8/sys/contrib/pf/ (props changed)
stable/8/sys/dev/xen/xenpci/ (props changed)
stable/8/sys/i386/i386/pmap.c
stable/8/sys/i386/include/pmap.h
Modified: stable/8/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/8/sys/amd64/amd64/pmap.c Thu Sep 3 13:40:41 2009 (r196779)
+++ stable/8/sys/amd64/amd64/pmap.c Thu Sep 3 13:54:58 2009 (r196780)
@@ -4476,7 +4476,8 @@ pmap_change_attr_locked(vm_offset_t va,
if (base < DMAP_MIN_ADDRESS)
return (EINVAL);
- cache_bits_pde = cache_bits_pte = -1;
+ cache_bits_pde = pmap_cache_bits(mode, 1);
+ cache_bits_pte = pmap_cache_bits(mode, 0);
changed = FALSE;
/*
@@ -4493,8 +4494,6 @@ pmap_change_attr_locked(vm_offset_t va,
* memory type, then we need not demote this page. Just
* increment tmpva to the next 1GB page frame.
*/
- if (cache_bits_pde < 0)
- cache_bits_pde = pmap_cache_bits(mode, 1);
if ((*pdpe & PG_PDE_CACHE) == cache_bits_pde) {
tmpva = trunc_1gpage(tmpva) + NBPDP;
continue;
@@ -4522,8 +4521,6 @@ pmap_change_attr_locked(vm_offset_t va,
* memory type, then we need not demote this page. Just
* increment tmpva to the next 2MB page frame.
*/
- if (cache_bits_pde < 0)
- cache_bits_pde = pmap_cache_bits(mode, 1);
if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
tmpva = trunc_2mpage(tmpva) + NBPDR;
continue;
@@ -4557,12 +4554,9 @@ pmap_change_attr_locked(vm_offset_t va,
for (tmpva = base; tmpva < base + size; ) {
pdpe = pmap_pdpe(kernel_pmap, tmpva);
if (*pdpe & PG_PS) {
- if (cache_bits_pde < 0)
- cache_bits_pde = pmap_cache_bits(mode, 1);
if ((*pdpe & PG_PDE_CACHE) != cache_bits_pde) {
pmap_pde_attr(pdpe, cache_bits_pde);
- if (!changed)
- changed = TRUE;
+ changed = TRUE;
}
if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
if (pa_start == pa_end) {
@@ -4588,12 +4582,9 @@ pmap_change_attr_locked(vm_offset_t va,
}
pde = pmap_pdpe_to_pde(pdpe, tmpva);
if (*pde & PG_PS) {
- if (cache_bits_pde < 0)
- cache_bits_pde = pmap_cache_bits(mode, 1);
if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
pmap_pde_attr(pde, cache_bits_pde);
- if (!changed)
- changed = TRUE;
+ changed = TRUE;
}
if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
if (pa_start == pa_end) {
@@ -4616,13 +4607,10 @@ pmap_change_attr_locked(vm_offset_t va,
}
tmpva = trunc_2mpage(tmpva) + NBPDR;
} else {
- if (cache_bits_pte < 0)
- cache_bits_pte = pmap_cache_bits(mode, 0);
pte = pmap_pde_to_pte(pde, tmpva);
if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
pmap_pte_attr(pte, cache_bits_pte);
- if (!changed)
- changed = TRUE;
+ changed = TRUE;
}
if (tmpva >= VM_MIN_KERNEL_ADDRESS) {
if (pa_start == pa_end) {
Modified: stable/8/sys/i386/i386/pmap.c
==============================================================================
--- stable/8/sys/i386/i386/pmap.c Thu Sep 3 13:40:41 2009 (r196779)
+++ stable/8/sys/i386/i386/pmap.c Thu Sep 3 13:54:58 2009 (r196780)
@@ -288,12 +288,15 @@ static boolean_t pmap_enter_pde(pmap_t p
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
static boolean_t pmap_is_modified_pvh(struct md_page *pvh);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
+static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
vm_prot_t prot);
+static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
vm_page_t *free);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
@@ -2289,32 +2292,62 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offse
}
/*
- * Tries to demote a 2- or 4MB page mapping.
+ * Fills a page table page with mappings to consecutive physical pages.
+ */
+static void
+pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte)
+{
+ pt_entry_t *pte;
+
+ for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
+ *pte = newpte;
+ newpte += PAGE_SIZE;
+ }
+}
+
+/*
+ * Tries to demote a 2- or 4MB page mapping. If demotion fails, the
+ * 2- or 4MB page mapping is invalidated.
*/
static boolean_t
pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va)
{
pd_entry_t newpde, oldpde;
pmap_t allpmaps_entry;
- pt_entry_t *firstpte, newpte, *pte;
+ pt_entry_t *firstpte, newpte;
vm_paddr_t mptepa;
vm_page_t free, mpte;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ oldpde = *pde;
+ KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V),
+ ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V"));
mpte = pmap_lookup_pt_page(pmap, va);
if (mpte != NULL)
pmap_remove_pt_page(pmap, mpte);
else {
- KASSERT((*pde & PG_W) == 0,
+ KASSERT((oldpde & PG_W) == 0,
("pmap_demote_pde: page table page for a wired mapping"
" is missing"));
- free = NULL;
- pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
- pmap_invalidate_page(pmap, trunc_4mpage(va));
- pmap_free_zero_pages(free);
- CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
- " in pmap %p", va, pmap);
- return (FALSE);
+
+ /*
+ * Invalidate the 2- or 4MB page mapping and return
+ * "failure" if the mapping was never accessed or the
+ * allocation of the new page table page fails.
+ */
+ if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL,
+ va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL |
+ VM_ALLOC_WIRED)) == NULL) {
+ free = NULL;
+ pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free);
+ pmap_invalidate_page(pmap, trunc_4mpage(va));
+ pmap_free_zero_pages(free);
+ CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x"
+ " in pmap %p", va, pmap);
+ return (FALSE);
+ }
+ if (va < VM_MAXUSER_ADDRESS)
+ pmap->pm_stats.resident_count++;
}
mptepa = VM_PAGE_TO_PHYS(mpte);
@@ -2348,30 +2381,32 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t
}
firstpte = PADDR2;
}
- oldpde = *pde;
newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V;
- KASSERT((oldpde & (PG_A | PG_V)) == (PG_A | PG_V),
- ("pmap_demote_pde: oldpde is missing PG_A and/or PG_V"));
+ KASSERT((oldpde & PG_A) != 0,
+ ("pmap_demote_pde: oldpde is missing PG_A"));
KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW,
("pmap_demote_pde: oldpde is missing PG_M"));
- KASSERT((oldpde & PG_PS) != 0,
- ("pmap_demote_pde: oldpde is missing PG_PS"));
newpte = oldpde & ~PG_PS;
if ((newpte & PG_PDE_PAT) != 0)
newpte ^= PG_PDE_PAT | PG_PTE_PAT;
/*
- * If the mapping has changed attributes, update the page table
- * entries.
- */
+ * If the page table page is new, initialize it.
+ */
+ if (mpte->wire_count == 1) {
+ mpte->wire_count = NPTEPG;
+ pmap_fill_ptp(firstpte, newpte);
+ }
KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME),
("pmap_demote_pde: firstpte and newpte map different physical"
" addresses"));
+
+ /*
+ * If the mapping has changed attributes, update the page table
+ * entries.
+ */
if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE))
- for (pte = firstpte; pte < firstpte + NPTEPG; pte++) {
- *pte = newpte;
- newpte += PAGE_SIZE;
- }
+ pmap_fill_ptp(firstpte, newpte);
/*
* Demote the mapping. This pmap is locked. The old PDE has
@@ -4426,6 +4461,40 @@ pmap_clear_reference(vm_page_t m)
* Miscellaneous support routines follow
*/
+/* Adjust the cache mode for a 4KB page mapped via a PTE. */
+static __inline void
+pmap_pte_attr(pt_entry_t *pte, int cache_bits)
+{
+ u_int opte, npte;
+
+ /*
+ * The cache mode bits are all in the low 32-bits of the
+ * PTE, so we can just spin on updating the low 32-bits.
+ */
+ do {
+ opte = *(u_int *)pte;
+ npte = opte & ~PG_PTE_CACHE;
+ npte |= cache_bits;
+ } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte));
+}
+
+/* Adjust the cache mode for a 2/4MB page mapped via a PDE. */
+static __inline void
+pmap_pde_attr(pd_entry_t *pde, int cache_bits)
+{
+ u_int opde, npde;
+
+ /*
+ * The cache mode bits are all in the low 32-bits of the
+ * PDE, so we can just spin on updating the low 32-bits.
+ */
+ do {
+ opde = *(u_int *)pde;
+ npde = opde & ~PG_PDE_CACHE;
+ npde |= cache_bits;
+ } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde));
+}
+
/*
* Map a set of physical memory pages into the kernel virtual
* address space. Return a pointer to where it is mapped. This
@@ -4537,13 +4606,23 @@ pmap_page_set_memattr(vm_page_t m, vm_me
}
}
+/*
+ * Changes the specified virtual address range's memory type to that given by
+ * the parameter "mode". The specified virtual address range must be
+ * completely contained within either the kernel map.
+ *
+ * Returns zero if the change completed successfully, and either EINVAL or
+ * ENOMEM if the change failed. Specifically, EINVAL is returned if some part
+ * of the virtual address range was not mapped, and ENOMEM is returned if
+ * there was insufficient memory available to complete the change.
+ */
int
pmap_change_attr(vm_offset_t va, vm_size_t size, int mode)
{
vm_offset_t base, offset, tmpva;
- pt_entry_t *pte;
- u_int opte, npte;
pd_entry_t *pde;
+ pt_entry_t *pte;
+ int cache_bits_pte, cache_bits_pde;
boolean_t changed;
base = trunc_page(va);
@@ -4556,47 +4635,84 @@ pmap_change_attr(vm_offset_t va, vm_size
if (base < VM_MIN_KERNEL_ADDRESS)
return (EINVAL);
- /* 4MB pages and pages that aren't mapped aren't supported. */
- for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
+ cache_bits_pde = pmap_cache_bits(mode, 1);
+ cache_bits_pte = pmap_cache_bits(mode, 0);
+ changed = FALSE;
+
+ /*
+ * Pages that aren't mapped aren't supported. Also break down
+ * 2/4MB pages into 4KB pages if required.
+ */
+ PMAP_LOCK(kernel_pmap);
+ for (tmpva = base; tmpva < base + size; ) {
pde = pmap_pde(kernel_pmap, tmpva);
- if (*pde & PG_PS)
- return (EINVAL);
- if (*pde == 0)
+ if (*pde == 0) {
+ PMAP_UNLOCK(kernel_pmap);
return (EINVAL);
+ }
+ if (*pde & PG_PS) {
+ /*
+ * If the current 2/4MB page already has
+ * the required memory type, then we need not
+ * demote this page. Just increment tmpva to
+ * the next 2/4MB page frame.
+ */
+ if ((*pde & PG_PDE_CACHE) == cache_bits_pde) {
+ tmpva = trunc_4mpage(tmpva) + NBPDR;
+ continue;
+ }
+
+ /*
+ * If the current offset aligns with a 2/4MB
+ * page frame and there is at least 2/4MB left
+ * within the range, then we need not break
+ * down this page into 4KB pages.
+ */
+ if ((tmpva & PDRMASK) == 0 &&
+ tmpva + PDRMASK < base + size) {
+ tmpva += NBPDR;
+ continue;
+ }
+ if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) {
+ PMAP_UNLOCK(kernel_pmap);
+ return (ENOMEM);
+ }
+ }
pte = vtopte(tmpva);
- if (*pte == 0)
+ if (*pte == 0) {
+ PMAP_UNLOCK(kernel_pmap);
return (EINVAL);
+ }
+ tmpva += PAGE_SIZE;
}
-
- changed = FALSE;
+ PMAP_UNLOCK(kernel_pmap);
/*
- * Ok, all the pages exist and are 4k, so run through them updating
- * their cache mode.
+ * Ok, all the pages exist, so run through them updating their
+ * cache mode if required.
*/
- for (tmpva = base; size > 0; ) {
- pte = vtopte(tmpva);
-
- /*
- * The cache mode bits are all in the low 32-bits of the
- * PTE, so we can just spin on updating the low 32-bits.
- */
- do {
- opte = *(u_int *)pte;
- npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
- npte |= pmap_cache_bits(mode, 0);
- } while (npte != opte &&
- !atomic_cmpset_int((u_int *)pte, opte, npte));
- if (npte != opte)
- changed = TRUE;
- tmpva += PAGE_SIZE;
- size -= PAGE_SIZE;
+ for (tmpva = base; tmpva < base + size; ) {
+ pde = pmap_pde(kernel_pmap, tmpva);
+ if (*pde & PG_PS) {
+ if ((*pde & PG_PDE_CACHE) != cache_bits_pde) {
+ pmap_pde_attr(pde, cache_bits_pde);
+ changed = TRUE;
+ }
+ tmpva = trunc_4mpage(tmpva) + NBPDR;
+ } else {
+ pte = vtopte(tmpva);
+ if ((*pte & PG_PTE_CACHE) != cache_bits_pte) {
+ pmap_pte_attr(pte, cache_bits_pte);
+ changed = TRUE;
+ }
+ tmpva += PAGE_SIZE;
+ }
}
/*
- * Flush CPU caches to make sure any data isn't cached that shouldn't
- * be, etc.
- */
+ * Flush CPU caches to make sure any data isn't cached that
+ * shouldn't be, etc.
+ */
if (changed) {
pmap_invalidate_range(kernel_pmap, base, tmpva);
pmap_invalidate_cache_range(base, tmpva);
Modified: stable/8/sys/i386/include/pmap.h
==============================================================================
--- stable/8/sys/i386/include/pmap.h Thu Sep 3 13:40:41 2009 (r196779)
+++ stable/8/sys/i386/include/pmap.h Thu Sep 3 13:54:58 2009 (r196780)
@@ -81,6 +81,10 @@
#define PG_PROT (PG_RW|PG_U) /* all protection bits . */
#define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */
+/* Page level cache control fields used to determine the PAT type */
+#define PG_PDE_CACHE (PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD)
+#define PG_PTE_CACHE (PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD)
+
/*
* Promotion to a 2 or 4MB (PDE) page mapping requires that the corresponding
* 4KB (PTE) page mappings have identical settings for the following fields:
More information about the svn-src-stable-8
mailing list