svn commit: r230435 - in stable/9/sys: amd64/amd64 i386/i386
i386/xen
Alan Cox
alc at FreeBSD.org
Sat Jan 21 19:21:42 UTC 2012
Author: alc
Date: Sat Jan 21 19:21:42 2012
New Revision: 230435
URL: http://svn.freebsd.org/changeset/base/230435
Log:
MFC r228923, r228935, and r229007
Eliminate many of the unnecessary differences between the native and
paravirtualized pmap implementations for i386.
Fix a bug in the Xen pmap's implementation of
pmap_extract_and_hold(): If the page lock acquisition is retried,
then the underlying thread is not unpinned.
Wrap nearby lines that exceed 80 columns.
Merge r216333 and r216555 from the native pmap
When r207410 eliminated the acquisition and release of the page
queues lock from pmap_extract_and_hold(), it didn't take into
account that pmap_pte_quick() sometimes requires the page queues
lock to be held. This change reimplements pmap_extract_and_hold()
such that it no longer uses pmap_pte_quick(), and thus never
requires the page queues lock.
Merge r177525 from the native pmap
Prevent the overflow in the calculation of the next page
directory. The overflow causes the wraparound with consequent
corruption of the (almost) whole address space mapping.
Strictly speaking, r177525 is not required by the Xen pmap because
the hypervisor steals the uppermost region of the normal kernel
address space. I am nonetheless merging it in order to reduce the
number of unnecessary differences between the native and Xen pmap
implementations.
Modified:
stable/9/sys/amd64/amd64/pmap.c
stable/9/sys/i386/i386/pmap.c
stable/9/sys/i386/xen/pmap.c
Directory Properties:
stable/9/sys/ (props changed)
stable/9/sys/amd64/include/xen/ (props changed)
stable/9/sys/boot/ (props changed)
stable/9/sys/boot/i386/efi/ (props changed)
stable/9/sys/boot/ia64/efi/ (props changed)
stable/9/sys/boot/ia64/ski/ (props changed)
stable/9/sys/boot/powerpc/boot1.chrp/ (props changed)
stable/9/sys/boot/powerpc/ofw/ (props changed)
stable/9/sys/cddl/contrib/opensolaris/ (props changed)
stable/9/sys/conf/ (props changed)
stable/9/sys/contrib/dev/acpica/ (props changed)
stable/9/sys/contrib/octeon-sdk/ (props changed)
stable/9/sys/contrib/pf/ (props changed)
stable/9/sys/contrib/x86emu/ (props changed)
Modified: stable/9/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/9/sys/amd64/amd64/pmap.c Sat Jan 21 18:54:19 2012 (r230434)
+++ stable/9/sys/amd64/amd64/pmap.c Sat Jan 21 19:21:42 2012 (r230435)
@@ -1255,8 +1255,8 @@ retry:
if (pdep != NULL && (pde = *pdep)) {
if (pde & PG_PS) {
if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) |
- (va & PDRMASK), &pa))
+ if (vm_page_pa_tryrelock(pmap, (pde &
+ PG_PS_FRAME) | (va & PDRMASK), &pa))
goto retry;
m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
(va & PDRMASK));
@@ -1266,7 +1266,8 @@ retry:
pte = *pmap_pde_to_pte(pdep, va);
if ((pte & PG_V) &&
((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
+ if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
+ &pa))
goto retry;
m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
vm_page_hold(m);
Modified: stable/9/sys/i386/i386/pmap.c
==============================================================================
--- stable/9/sys/i386/i386/pmap.c Sat Jan 21 18:54:19 2012 (r230434)
+++ stable/9/sys/i386/i386/pmap.c Sat Jan 21 19:21:42 2012 (r230435)
@@ -330,7 +330,7 @@ static void pmap_update_pde_invalidate(v
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
-static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
+static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags);
static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
static void pmap_pte_release(pt_entry_t *pte);
@@ -340,6 +340,8 @@ static void *pmap_pdpt_allocf(uma_zone_t
#endif
static void pmap_set_pg(void);
+static __inline void pagezero(void *page);
+
CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
@@ -1216,7 +1218,7 @@ pmap_is_current(pmap_t pmap)
{
return (pmap == kernel_pmap ||
- (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
+ (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
(pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
}
@@ -1366,8 +1368,8 @@ retry:
if (pde != 0) {
if (pde & PG_PS) {
if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) |
- (va & PDRMASK), &pa))
+ if (vm_page_pa_tryrelock(pmap, (pde &
+ PG_PS_FRAME) | (va & PDRMASK), &pa))
goto retry;
m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
(va & PDRMASK));
@@ -1379,7 +1381,8 @@ retry:
pmap_pte_release(ptep);
if (pte != 0 &&
((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
+ if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
+ &pa))
goto retry;
m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
vm_page_hold(m);
@@ -1732,7 +1735,6 @@ pmap_pinit(pmap_t pmap)
if (pmap->pm_pdir == NULL) {
pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
NBPTD);
-
if (pmap->pm_pdir == NULL) {
PMAP_LOCK_DESTROY(pmap);
return (0);
@@ -1766,10 +1768,9 @@ pmap_pinit(pmap_t pmap)
pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
- for (i = 0; i < NPGPTD; i++) {
+ for (i = 0; i < NPGPTD; i++)
if ((ptdpg[i]->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
- }
+ pagezero(pmap->pm_pdir + (i * NPDEPG));
mtx_lock_spin(&allpmaps_lock);
LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
@@ -1798,7 +1799,7 @@ pmap_pinit(pmap_t pmap)
* mapped correctly.
*/
static vm_page_t
-_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags)
{
vm_paddr_t ptepa;
vm_page_t m;
@@ -1846,7 +1847,7 @@ _pmap_allocpte(pmap_t pmap, unsigned pte
static vm_page_t
pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
{
- unsigned ptepindex;
+ u_int ptepindex;
pd_entry_t ptepa;
vm_page_t m;
@@ -1994,7 +1995,7 @@ pmap_lazyfix(pmap_t pmap)
cr3 = vtophys(pmap->pm_pdir);
if (cr3 == rcr3()) {
load_cr3(PCPU_GET(curpcb)->pcb_cr3);
- CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
+ CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
}
}
#endif /* SMP */
@@ -2825,7 +2826,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
}
for (; sva < eva; sva = pdnxt) {
- unsigned pdirindex;
+ u_int pdirindex;
/*
* Calculate index for next page table.
@@ -3046,7 +3047,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sv
PMAP_LOCK(pmap);
for (; sva < eva; sva = pdnxt) {
pt_entry_t obits, pbits;
- unsigned pdirindex;
+ u_int pdirindex;
pdnxt = (sva + NBPDR) & ~PDRMASK;
if (pdnxt < sva)
@@ -3572,7 +3573,7 @@ pmap_enter_object(pmap_t pmap, vm_offset
m = TAILQ_NEXT(m, listq);
}
vm_page_unlock_queues();
- PMAP_UNLOCK(pmap);
+ PMAP_UNLOCK(pmap);
}
/*
@@ -3614,7 +3615,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_
* resident, we are creating it here.
*/
if (va < VM_MAXUSER_ADDRESS) {
- unsigned ptepindex;
+ u_int ptepindex;
pd_entry_t ptepa;
/*
@@ -3880,7 +3881,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
pt_entry_t *src_pte, *dst_pte;
vm_page_t dstmpte, srcmpte;
pd_entry_t srcptepaddr;
- unsigned ptepindex;
+ u_int ptepindex;
KASSERT(addr < UPT_MIN_ADDRESS,
("pmap_copy: invalid to pmap_copy page tables"));
@@ -5220,7 +5221,7 @@ pmap_pid_dump(int pid)
#if defined(DEBUG)
static void pads(pmap_t pm);
-void pmap_pvdump(vm_offset_t pa);
+void pmap_pvdump(vm_paddr_t pa);
/* print address space of pmap*/
static void
Modified: stable/9/sys/i386/xen/pmap.c
==============================================================================
--- stable/9/sys/i386/xen/pmap.c Sat Jan 21 18:54:19 2012 (r230434)
+++ stable/9/sys/i386/xen/pmap.c Sat Jan 21 19:21:42 2012 (r230435)
@@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#ifdef SMP
#include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
#endif
#include <vm/vm.h>
@@ -221,6 +223,8 @@ extern u_int32_t KERNend;
pt_entry_t pg_nx;
#endif
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
+
static int pat_works; /* Is page attribute table sane? */
/*
@@ -273,19 +277,6 @@ SYSCTL_INT(_debug, OID_AUTO, PMAP1unchan
"Number of times pmap_pte_quick didn't change PMAP1");
static struct mtx PMAP2mutex;
-SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
- "Max number of PV entries");
-SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
- "Page share factor per proc");
-SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
- "2/4MB page mapping counters");
-
-static u_long pmap_pde_mappings;
-SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
- &pmap_pde_mappings, 0, "2/4MB page mappings");
-
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
@@ -294,6 +285,8 @@ static pv_entry_t pmap_pvh_remove(struct
static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte);
+static void pmap_flush_page(vm_page_t m);
+static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
vm_page_t *free);
static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
@@ -305,14 +298,12 @@ static boolean_t pmap_try_insert_pv_entr
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
-static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
+static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags);
static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
static void pmap_pte_release(pt_entry_t *pte);
static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
-static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr);
-static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static __inline void pagezero(void *page);
@@ -326,8 +317,6 @@ CTASSERT(1 << PTESHIFT == sizeof(pt_entr
*/
CTASSERT(KERNBASE % (1 << 24) == 0);
-
-
void
pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
{
@@ -359,24 +348,6 @@ pd_set(struct pmap *pmap, int ptepindex,
}
/*
- * Move the kernel virtual free pointer to the next
- * 4MB. This is used to help improve performance
- * by using a large (4MB) page for much of the kernel
- * (.text, .data, .bss)
- */
-static vm_offset_t
-pmap_kmem_choose(vm_offset_t addr)
-{
- vm_offset_t newaddr = addr;
-
-#ifndef DISABLE_PSE
- if (cpu_feature & CPUID_PSE)
- newaddr = (addr + PDRMASK) & ~PDRMASK;
-#endif
- return newaddr;
-}
-
-/*
* Bootstrap the system enough to run with virtual memory.
*
* On the i386 this is called after mapping has already been enabled
@@ -395,15 +366,13 @@ pmap_bootstrap(vm_paddr_t firstaddr)
int i;
/*
- * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
- * large. It should instead be correctly calculated in locore.s and
- * not based on 'first' (which is a physical address, not a virtual
- * address, for the start of unused physical memory). The kernel
- * page tables are NOT double mapped and thus should not be included
- * in this calculation.
+ * Initialize the first available kernel virtual address. However,
+ * using "firstaddr" may waste a few pages of the kernel virtual
+ * address space, because locore may not have mapped every physical
+ * page that it allocated. Preferably, locore would provide a first
+ * unused virtual address in addition to "firstaddr".
*/
virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
- virtual_avail = pmap_kmem_choose(virtual_avail);
virtual_end = VM_MAX_KERNEL_ADDRESS;
@@ -468,8 +437,8 @@ pmap_bootstrap(vm_paddr_t firstaddr)
/*
* ptemap is used for pmap_pte_quick
*/
- SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
- SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
+ SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
+ SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
@@ -650,6 +619,18 @@ pmap_init(void)
}
+SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
+ "Max number of PV entries");
+SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
+ "Page share factor per proc");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
+ "2/4MB page mapping counters");
+
+static u_long pmap_pde_mappings;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
+ &pmap_pde_mappings, 0, "2/4MB page mappings");
+
/***************************************************
* Low level helper routines.....
***************************************************/
@@ -896,6 +877,8 @@ pmap_invalidate_cache(void)
}
#endif /* !SMP */
+#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024)
+
void
pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
{
@@ -907,7 +890,8 @@ pmap_invalidate_cache_range(vm_offset_t
if (cpu_feature & CPUID_SS)
; /* If "Self Snoop" is supported, do nothing. */
- else if (cpu_feature & CPUID_CLFSH) {
+ else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+ eva - sva < PMAP_CLFLUSH_THRESHOLD) {
/*
* Otherwise, do per-cache line flush. Use the mfence
@@ -924,12 +908,27 @@ pmap_invalidate_cache_range(vm_offset_t
/*
* No targeted cache flush methods are supported by CPU,
- * globally invalidate cache as a last resort.
+ * or the supplied range is bigger than 2MB.
+ * Globally invalidate cache.
*/
pmap_invalidate_cache();
}
}
+void
+pmap_invalidate_cache_pages(vm_page_t *pages, int count)
+{
+ int i;
+
+ if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
+ (cpu_feature & CPUID_CLFSH) == 0) {
+ pmap_invalidate_cache();
+ } else {
+ for (i = 0; i < count; i++)
+ pmap_flush_page(pages[i]);
+ }
+}
+
/*
* Are we current address space or kernel? N.B. We return FALSE when
* a pmap's page table is in use because a kernel thread is borrowing
@@ -942,7 +941,7 @@ pmap_is_current(pmap_t pmap)
return (pmap == kernel_pmap ||
(pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
- (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
+ (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
}
/*
@@ -971,10 +970,9 @@ pmap_pte(pmap_t pmap, vm_offset_t va)
CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x",
pmap, va, (*PMAP2 & 0xffffffff));
}
-
return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
}
- return (0);
+ return (NULL);
}
/*
@@ -1065,7 +1063,7 @@ pmap_extract(pmap_t pmap, vm_offset_t va
pt_entry_t *pte;
pd_entry_t pde;
pt_entry_t pteval;
-
+
rtval = 0;
PMAP_LOCK(pmap);
pde = pmap->pm_pdir[va >> PDRSHIFT];
@@ -1124,7 +1122,7 @@ vm_page_t
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
pd_entry_t pde;
- pt_entry_t pte;
+ pt_entry_t pte, *ptep;
vm_page_t m;
vm_paddr_t pa;
@@ -1136,26 +1134,25 @@ retry:
if (pde != 0) {
if (pde & PG_PS) {
if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
- if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) |
- (va & PDRMASK), &pa))
+ if (vm_page_pa_tryrelock(pmap, (pde &
+ PG_PS_FRAME) | (va & PDRMASK), &pa))
goto retry;
m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
(va & PDRMASK));
vm_page_hold(m);
}
} else {
- sched_pin();
- pte = PT_GET(pmap_pte_quick(pmap, va));
- if (*PMAP1)
- PT_SET_MA(PADDR1, 0);
- if ((pte & PG_V) &&
+ ptep = pmap_pte(pmap, va);
+ pte = PT_GET(ptep);
+ pmap_pte_release(ptep);
+ if (pte != 0 &&
((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
- if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
+ if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
+ &pa))
goto retry;
m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
vm_page_hold(m);
}
- sched_unpin();
}
}
PA_UNLOCK_COND(pa);
@@ -1170,10 +1167,13 @@ retry:
/*
* Add a wired page to the kva.
* Note: not SMP coherent.
+ *
+ * This function may be used before pmap_bootstrap() is called.
*/
void
pmap_kenter(vm_offset_t va, vm_paddr_t pa)
{
+
PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
}
@@ -1186,16 +1186,18 @@ pmap_kenter_ma(vm_offset_t va, vm_paddr_
pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag);
}
-
-static __inline void
+static __inline void
pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
{
+
PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
}
/*
* Remove a page from the kernel pagetables.
* Note: not SMP coherent.
+ *
+ * This function may be used before pmap_bootstrap() is called.
*/
PMAP_INLINE void
pmap_kremove(vm_offset_t va)
@@ -1292,7 +1294,6 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
#endif
}
-
/*
* This routine tears out page mappings from the
* kernel -- it is meant only for temporary mappings.
@@ -1342,9 +1343,9 @@ pmap_unwire_pte_hold(pmap_t pmap, vm_pag
--m->wire_count;
if (m->wire_count == 0)
- return _pmap_unwire_pte_hold(pmap, m, free);
+ return (_pmap_unwire_pte_hold(pmap, m, free));
else
- return 0;
+ return (0);
}
static int
@@ -1385,7 +1386,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_pa
m->right = *free;
*free = m;
- return 1;
+ return (1);
}
/*
@@ -1399,17 +1400,25 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t v
vm_page_t mpte;
if (va >= VM_MAXUSER_ADDRESS)
- return 0;
+ return (0);
ptepde = PT_GET(pmap_pde(pmap, va));
mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
- return pmap_unwire_pte_hold(pmap, mpte, free);
+ return (pmap_unwire_pte_hold(pmap, mpte, free));
}
+/*
+ * Initialize the pmap for the swapper process.
+ */
void
pmap_pinit0(pmap_t pmap)
{
PMAP_LOCK_INIT(pmap);
+ /*
+ * Since the page table directory is shared with the kernel pmap,
+ * which is already included in the list "allpmaps", this pmap does
+ * not need to be inserted into that list.
+ */
pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
#ifdef PAE
pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
@@ -1418,9 +1427,6 @@ pmap_pinit0(pmap_t pmap)
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
- mtx_lock_spin(&allpmaps_lock);
- LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
- mtx_unlock_spin(&allpmaps_lock);
}
/*
@@ -1473,18 +1479,19 @@ pmap_pinit(pmap_t pmap)
ptdpg[i++] = m;
}
}
+
pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
- for (i = 0; i < NPGPTD; i++) {
+
+ for (i = 0; i < NPGPTD; i++)
if ((ptdpg[i]->flags & PG_ZERO) == 0)
- pagezero(&pmap->pm_pdir[i*NPTEPG]);
- }
+ pagezero(pmap->pm_pdir + (i * NPDEPG));
mtx_lock_spin(&allpmaps_lock);
LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
+ /* Copy the kernel page table directory entries. */
+ bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
mtx_unlock_spin(&allpmaps_lock);
- /* Wire in kernel global address entries. */
- bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
#ifdef PAE
pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
@@ -1536,7 +1543,7 @@ pmap_pinit(pmap_t pmap)
* mapped correctly.
*/
static vm_page_t
-_pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags)
{
vm_paddr_t ptema;
vm_page_t m;
@@ -1571,6 +1578,7 @@ _pmap_allocpte(pmap_t pmap, unsigned int
* Map the pagetable page into the process address space, if
* it isn't already there.
*/
+
pmap->pm_stats.resident_count++;
ptema = VM_PAGE_TO_MACH(m);
@@ -1586,7 +1594,7 @@ _pmap_allocpte(pmap_t pmap, unsigned int
static vm_page_t
pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
{
- unsigned ptepindex;
+ u_int ptepindex;
pd_entry_t ptema;
vm_page_t m;
@@ -1764,6 +1772,7 @@ pmap_release(pmap_t pmap)
#else
int npgptd = NPGPTD;
#endif
+
KASSERT(pmap->pm_stats.resident_count == 0,
("pmap_release: pmap resident count %ld != 0",
pmap->pm_stats.resident_count));
@@ -1819,7 +1828,7 @@ kvm_size(SYSCTL_HANDLER_ARGS)
{
unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
- return sysctl_handle_long(oidp, &ksize, 0, req);
+ return (sysctl_handle_long(oidp, &ksize, 0, req));
}
SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
0, 0, kvm_size, "IU", "Size of KVM");
@@ -1829,7 +1838,7 @@ kvm_free(SYSCTL_HANDLER_ARGS)
{
unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
- return sysctl_handle_long(oidp, &kfree, 0, req);
+ return (sysctl_handle_long(oidp, &kfree, 0, req));
}
SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
0, 0, kvm_free, "IU", "Amount of KVM free");
@@ -1858,12 +1867,12 @@ pmap_growkernel(vm_offset_t addr)
}
}
}
- addr = roundup2(addr, PAGE_SIZE * NPTEPG);
+ addr = roundup2(addr, NBPDR);
if (addr - 1 >= kernel_map->max_offset)
addr = kernel_map->max_offset;
while (kernel_vm_end < addr) {
if (pdir_pde(PTD, kernel_vm_end)) {
- kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+ kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
if (kernel_vm_end - 1 >= kernel_map->max_offset) {
kernel_vm_end = kernel_map->max_offset;
break;
@@ -1871,17 +1880,16 @@ pmap_growkernel(vm_offset_t addr)
continue;
}
- /*
- * This index is bogus, but out of the way
- */
- nkpg = vm_page_alloc(NULL, nkpt,
- VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
- if (!nkpg)
+ nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
+ VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+ VM_ALLOC_ZERO);
+ if (nkpg == NULL)
panic("pmap_growkernel: no memory to grow kernel");
nkpt++;
- pmap_zero_page(nkpg);
+ if ((nkpg->flags & PG_ZERO) == 0)
+ pmap_zero_page(nkpg);
ptppaddr = VM_PAGE_TO_PHYS(nkpg);
newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
vm_page_lock_queues();
@@ -1893,7 +1901,7 @@ pmap_growkernel(vm_offset_t addr)
mtx_unlock_spin(&allpmaps_lock);
vm_page_unlock_queues();
- kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+ kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
if (kernel_vm_end - 1 >= kernel_map->max_offset) {
kernel_vm_end = kernel_map->max_offset;
break;
@@ -1913,7 +1921,7 @@ static __inline struct pv_chunk *
pv_to_chunk(pv_entry_t pv)
{
- return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK);
+ return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
}
#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
@@ -2035,15 +2043,15 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv
pc->pc_map[field] |= 1ul << bit;
/* move to head of list */
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
for (idx = 0; idx < _NPCM; idx++)
- if (pc->pc_map[idx] != pc_freemask[idx])
+ if (pc->pc_map[idx] != pc_freemask[idx]) {
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
return;
+ }
PV_STAT(pv_entry_spare -= _NPCPV);
PV_STAT(pc_chunk_count--);
PV_STAT(pc_chunk_frees++);
/* entire chunk is free, return it */
- TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
pmap_qremove((vm_offset_t)pc, 1);
vm_page_unwire(m, 0);
@@ -2274,10 +2282,10 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
pt_entry_t *pte;
vm_page_t free = NULL;
int anyvalid;
-
+
CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
pmap, sva, eva);
-
+
/*
* Perform an unsynchronized read. This is, however, safe.
*/
@@ -2302,12 +2310,14 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
}
for (; sva < eva; sva = pdnxt) {
- unsigned pdirindex;
+ u_int pdirindex;
/*
* Calculate index for next page table.
*/
pdnxt = (sva + NBPDR) & ~PDRMASK;
+ if (pdnxt < sva)
+ pdnxt = eva;
if (pmap->pm_stats.resident_count == 0)
break;
@@ -2397,7 +2407,6 @@ pmap_remove_all(vm_page_t m)
PMAP_LOCK(pmap);
pmap->pm_stats.resident_count--;
pte = pmap_pte_quick(pmap, pv->pv_va);
-
tpte = *pte;
PT_SET_VA_MA(pte, 0, TRUE);
if (tpte & PG_W)
@@ -2461,9 +2470,11 @@ pmap_protect(pmap_t pmap, vm_offset_t sv
PMAP_LOCK(pmap);
for (; sva < eva; sva = pdnxt) {
pt_entry_t obits, pbits;
- unsigned pdirindex;
+ u_int pdirindex;
pdnxt = (sva + NBPDR) & ~PDRMASK;
+ if (pdnxt < sva)
+ pdnxt = eva;
pdirindex = sva >> PDRSHIFT;
ptpaddr = pmap->pm_pdir[pdirindex];
@@ -2573,7 +2584,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va,
KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
va));
- KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0,
+ KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
+ VM_OBJECT_LOCKED(m->object),
("pmap_enter: page %p is not busy", m));
mpte = NULL;
@@ -2776,10 +2788,9 @@ pmap_enter_object(pmap_t pmap, vm_offset
multicall_entry_t mcl[16];
multicall_entry_t *mclp = mcl;
int error, count = 0;
-
+
VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
psize = atop(end - start);
-
mpte = NULL;
m = m_start;
vm_page_lock_queues();
@@ -2818,7 +2829,7 @@ pmap_enter_quick(pmap_t pmap, vm_offset_
multicall_entry_t mcl, *mclp;
int count = 0;
mclp = &mcl;
-
+
CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x",
pmap, va, m, prot);
@@ -2869,7 +2880,7 @@ pmap_enter_quick_locked(multicall_entry_
vm_paddr_t pa;
vm_page_t free;
multicall_entry_t *mcl = *mclpp;
-
+
KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
(m->oflags & VPO_UNMANAGED) != 0,
("pmap_enter_quick_locked: managed mapping within the clean submap"));
@@ -2881,7 +2892,7 @@ pmap_enter_quick_locked(multicall_entry_
* resident, we are creating it here.
*/
if (va < VM_MAXUSER_ADDRESS) {
- unsigned ptepindex;
+ u_int ptepindex;
pd_entry_t ptema;
/*
@@ -2985,7 +2996,7 @@ pmap_enter_quick_locked(multicall_entry_
*mclpp = mcl + 1;
*count = *count + 1;
#endif
- return mpte;
+ return (mpte);
}
/*
@@ -3010,9 +3021,8 @@ pmap_kenter_temporary(vm_paddr_t pa, int
* are taken, but the code works.
*/
void
-pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
- vm_object_t object, vm_pindex_t pindex,
- vm_size_t size)
+pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
+ vm_pindex_t pindex, vm_size_t size)
{
pd_entry_t *pde;
vm_paddr_t pa, ptepa;
@@ -3030,6 +3040,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offs
KASSERT(p->valid == VM_PAGE_BITS_ALL,
("pmap_object_init_pt: invalid page %p", p));
pat_mode = p->md.pat_mode;
+
/*
* Abort the mapping if the first page is not physically
* aligned to a 2/4MB page boundary.
@@ -3037,6 +3048,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offs
ptepa = VM_PAGE_TO_PHYS(p);
if (ptepa & (NBPDR - 1))
return;
+
/*
* Skip the first page. Abort the mapping if the rest of
* the pages are not physically contiguous or have differing
@@ -3052,7 +3064,12 @@ pmap_object_init_pt(pmap_t pmap, vm_offs
return;
p = TAILQ_NEXT(p, listq);
}
- /* Map using 2/4MB pages. */
+
+ /*
+ * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and
+ * "size" is a multiple of 2/4M, adding the PAT setting to
+ * "pa" will not affect the termination of this loop.
+ */
PMAP_LOCK(pmap);
for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
size; pa += NBPDR) {
@@ -3116,7 +3133,7 @@ pmap_change_wiring(pmap_t pmap, vm_offse
void
pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
- vm_offset_t src_addr)
+ vm_offset_t src_addr)
{
vm_page_t free;
vm_offset_t addr;
@@ -3153,12 +3170,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
pt_entry_t *src_pte, *dst_pte;
vm_page_t dstmpte, srcmpte;
pd_entry_t srcptepaddr;
- unsigned ptepindex;
+ u_int ptepindex;
KASSERT(addr < UPT_MIN_ADDRESS,
("pmap_copy: invalid to pmap_copy page tables"));
pdnxt = (addr + NBPDR) & ~PDRMASK;
+ if (pdnxt < addr)
+ pdnxt = end_addr;
ptepindex = addr >> PDRSHIFT;
srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
@@ -3192,7 +3211,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
dstmpte = pmap_allocpte(dst_pmap, addr,
M_NOWAIT);
if (dstmpte == NULL)
- break;
+ goto out;
dst_pte = pmap_pte_quick(dst_pmap, addr);
if (*dst_pte == 0 &&
pmap_try_insert_pv_entry(dst_pmap, addr,
@@ -3216,6 +3235,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
addr);
pmap_free_zero_pages(free);
}
+ goto out;
}
if (dstmpte->wire_count >= srcmpte->wire_count)
break;
@@ -3224,6 +3244,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
src_pte++;
}
}
+out:
PT_UPDATES_FLUSH();
sched_unpin();
vm_page_unlock_queues();
@@ -3286,7 +3307,7 @@ pmap_zero_page_area(vm_page_t m, int off
sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
mtx_lock(&sysmaps->lock);
if (*sysmaps->CMAP2)
- panic("pmap_zero_page: CMAP2 busy");
+ panic("pmap_zero_page_area: CMAP2 busy");
sched_pin();
PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
@@ -3310,7 +3331,7 @@ pmap_zero_page_idle(vm_page_t m)
{
if (*CMAP3)
- panic("pmap_zero_page: CMAP3 busy");
+ panic("pmap_zero_page_idle: CMAP3 busy");
sched_pin();
PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
pagezero(CADDR3);
@@ -3774,7 +3795,6 @@ pmap_ts_referenced(vm_page_t m)
PT_UPDATES_FLUSH();
if (*PMAP1)
PT_SET_MA(PADDR1, 0);
-
sched_unpin();
vm_page_unlock_queues();
return (rtval);
@@ -3809,7 +3829,7 @@ pmap_clear_modify(vm_page_t m)
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pte = pmap_pte_quick(pmap, pv->pv_va);
- if ((*pte & PG_M) != 0) {
+ if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
/*
* Regardless of whether a pte is 32 or 64 bits
* in size, PG_M is among the least significant
@@ -3931,8 +3951,6 @@ pmap_unmapdev(vm_offset_t va, vm_size_t
void
pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
{
- struct sysmaps *sysmaps;
- vm_offset_t sva, eva;
m->md.pat_mode = ma;
if ((m->flags & PG_FICTITIOUS) != 0)
@@ -3955,11 +3973,21 @@ pmap_page_set_memattr(vm_page_t m, vm_me
* invalidation. In the worst case, whole cache is flushed by
* pmap_invalidate_cache_range().
*/
- if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) {
+ if ((cpu_feature & CPUID_SS) == 0)
+ pmap_flush_page(m);
+}
+
+static void
+pmap_flush_page(vm_page_t m)
+{
+ struct sysmaps *sysmaps;
+ vm_offset_t sva, eva;
+
+ if ((cpu_feature & CPUID_CLFSH) != 0) {
sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
mtx_lock(&sysmaps->lock);
if (*sysmaps->CMAP2)
- panic("pmap_page_set_memattr: CMAP2 busy");
+ panic("pmap_flush_page: CMAP2 busy");
sched_pin();
PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
VM_PAGE_TO_MACH(m) | PG_A | PG_M |
@@ -3967,21 +3995,35 @@ pmap_page_set_memattr(vm_page_t m, vm_me
invlcaddr(sysmaps->CADDR2);
sva = (vm_offset_t)sysmaps->CADDR2;
eva = sva + PAGE_SIZE;
- } else
- sva = eva = 0; /* gcc */
- pmap_invalidate_cache_range(sva, eva);
- if (sva != 0) {
+
+ /*
+ * Use mfence despite the ordering implied by
+ * mtx_{un,}lock() because clflush is not guaranteed
+ * to be ordered by any other instruction.
+ */
+ mfence();
+ for (; sva < eva; sva += cpu_clflush_line_size)
+ clflush(sva);
+ mfence();
PT_SET_MA(sysmaps->CADDR2, 0);
sched_unpin();
mtx_unlock(&sysmaps->lock);
- }
+ } else
+ pmap_invalidate_cache();
}
+/*
+ * Changes the specified virtual address range's memory type to that given by
+ * the parameter "mode". The specified virtual address range must be
+ * completely contained within either the kernel map.
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-9
mailing list