svn commit: r230435 - in stable/9/sys: amd64/amd64 i386/i386 i386/xen

Alan Cox alc at FreeBSD.org
Sat Jan 21 19:21:42 UTC 2012


Author: alc
Date: Sat Jan 21 19:21:42 2012
New Revision: 230435
URL: http://svn.freebsd.org/changeset/base/230435

Log:
  MFC r228923, r228935, and r229007
    Eliminate many of the unnecessary differences between the native and
    paravirtualized pmap implementations for i386.
  
    Fix a bug in the Xen pmap's implementation of
    pmap_extract_and_hold(): If the page lock acquisition is retried,
    then the underlying thread is not unpinned.
  
    Wrap nearby lines that exceed 80 columns.
  
    Merge r216333 and r216555 from the native pmap
      When r207410 eliminated the acquisition and release of the page
      queues lock from pmap_extract_and_hold(), it didn't take into
      account that pmap_pte_quick() sometimes requires the page queues
      lock to be held.  This change reimplements pmap_extract_and_hold()
      such that it no longer uses pmap_pte_quick(), and thus never
      requires the page queues lock.
  
    Merge r177525 from the native pmap
      Prevent the overflow in the calculation of the next page
      directory.  The overflow causes the wraparound with consequent
      corruption of the (almost) whole address space mapping.
  
    Strictly speaking, r177525 is not required by the Xen pmap because
    the hypervisor steals the uppermost region of the normal kernel
    address space.  I am nonetheless merging it in order to reduce the
    number of unnecessary differences between the native and Xen pmap
    implementations.

Modified:
  stable/9/sys/amd64/amd64/pmap.c
  stable/9/sys/i386/i386/pmap.c
  stable/9/sys/i386/xen/pmap.c
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/amd64/include/xen/   (props changed)
  stable/9/sys/boot/   (props changed)
  stable/9/sys/boot/i386/efi/   (props changed)
  stable/9/sys/boot/ia64/efi/   (props changed)
  stable/9/sys/boot/ia64/ski/   (props changed)
  stable/9/sys/boot/powerpc/boot1.chrp/   (props changed)
  stable/9/sys/boot/powerpc/ofw/   (props changed)
  stable/9/sys/cddl/contrib/opensolaris/   (props changed)
  stable/9/sys/conf/   (props changed)
  stable/9/sys/contrib/dev/acpica/   (props changed)
  stable/9/sys/contrib/octeon-sdk/   (props changed)
  stable/9/sys/contrib/pf/   (props changed)
  stable/9/sys/contrib/x86emu/   (props changed)

Modified: stable/9/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/9/sys/amd64/amd64/pmap.c	Sat Jan 21 18:54:19 2012	(r230434)
+++ stable/9/sys/amd64/amd64/pmap.c	Sat Jan 21 19:21:42 2012	(r230435)
@@ -1255,8 +1255,8 @@ retry:
 	if (pdep != NULL && (pde = *pdep)) {
 		if (pde & PG_PS) {
 			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
-				if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) |
-				       (va & PDRMASK), &pa))
+				if (vm_page_pa_tryrelock(pmap, (pde &
+				    PG_PS_FRAME) | (va & PDRMASK), &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
 				    (va & PDRMASK));
@@ -1266,7 +1266,8 @@ retry:
 			pte = *pmap_pde_to_pte(pdep, va);
 			if ((pte & PG_V) &&
 			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
-				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
+				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
+				    &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
 				vm_page_hold(m);

Modified: stable/9/sys/i386/i386/pmap.c
==============================================================================
--- stable/9/sys/i386/i386/pmap.c	Sat Jan 21 18:54:19 2012	(r230434)
+++ stable/9/sys/i386/i386/pmap.c	Sat Jan 21 19:21:42 2012	(r230435)
@@ -330,7 +330,7 @@ static void pmap_update_pde_invalidate(v
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
 
-static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
+static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags);
 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
 static void pmap_pte_release(pt_entry_t *pte);
@@ -340,6 +340,8 @@ static void *pmap_pdpt_allocf(uma_zone_t
 #endif
 static void pmap_set_pg(void);
 
+static __inline void pagezero(void *page);
+
 CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
 CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
 
@@ -1216,7 +1218,7 @@ pmap_is_current(pmap_t pmap)
 {
 
 	return (pmap == kernel_pmap ||
-		(pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
+	    (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
 	    (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
 }
 
@@ -1366,8 +1368,8 @@ retry:
 	if (pde != 0) {
 		if (pde & PG_PS) {
 			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
-				if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) |
-				       (va & PDRMASK), &pa))
+				if (vm_page_pa_tryrelock(pmap, (pde &
+				    PG_PS_FRAME) | (va & PDRMASK), &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
 				    (va & PDRMASK));
@@ -1379,7 +1381,8 @@ retry:
 			pmap_pte_release(ptep);
 			if (pte != 0 &&
 			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
-				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
+				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
+				    &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
 				vm_page_hold(m);
@@ -1732,7 +1735,6 @@ pmap_pinit(pmap_t pmap)
 	if (pmap->pm_pdir == NULL) {
 		pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
 		    NBPTD);
-
 		if (pmap->pm_pdir == NULL) {
 			PMAP_LOCK_DESTROY(pmap);
 			return (0);
@@ -1766,10 +1768,9 @@ pmap_pinit(pmap_t pmap)
 
 	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
 
-	for (i = 0; i < NPGPTD; i++) {
+	for (i = 0; i < NPGPTD; i++)
 		if ((ptdpg[i]->flags & PG_ZERO) == 0)
-			bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
-	}
+			pagezero(pmap->pm_pdir + (i * NPDEPG));
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
@@ -1798,7 +1799,7 @@ pmap_pinit(pmap_t pmap)
  * mapped correctly.
  */
 static vm_page_t
-_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags)
 {
 	vm_paddr_t ptepa;
 	vm_page_t m;
@@ -1846,7 +1847,7 @@ _pmap_allocpte(pmap_t pmap, unsigned pte
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 {
-	unsigned ptepindex;
+	u_int ptepindex;
 	pd_entry_t ptepa;
 	vm_page_t m;
 
@@ -1994,7 +1995,7 @@ pmap_lazyfix(pmap_t pmap)
 	cr3 = vtophys(pmap->pm_pdir);
 	if (cr3 == rcr3()) {
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
-		CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); 
+		CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
 	}
 }
 #endif	/* SMP */
@@ -2825,7 +2826,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
 	}
 
 	for (; sva < eva; sva = pdnxt) {
-		unsigned pdirindex;
+		u_int pdirindex;
 
 		/*
 		 * Calculate index for next page table.
@@ -3046,7 +3047,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sv
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = pdnxt) {
 		pt_entry_t obits, pbits;
-		unsigned pdirindex;
+		u_int pdirindex;
 
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
 		if (pdnxt < sva)
@@ -3572,7 +3573,7 @@ pmap_enter_object(pmap_t pmap, vm_offset
 		m = TAILQ_NEXT(m, listq);
 	}
 	vm_page_unlock_queues();
- 	PMAP_UNLOCK(pmap);
+	PMAP_UNLOCK(pmap);
 }
 
 /*
@@ -3614,7 +3615,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
-		unsigned ptepindex;
+		u_int ptepindex;
 		pd_entry_t ptepa;
 
 		/*
@@ -3880,7 +3881,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
 		pt_entry_t *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		pd_entry_t srcptepaddr;
-		unsigned ptepindex;
+		u_int ptepindex;
 
 		KASSERT(addr < UPT_MIN_ADDRESS,
 		    ("pmap_copy: invalid to pmap_copy page tables"));
@@ -5220,7 +5221,7 @@ pmap_pid_dump(int pid)
 #if defined(DEBUG)
 
 static void	pads(pmap_t pm);
-void		pmap_pvdump(vm_offset_t pa);
+void		pmap_pvdump(vm_paddr_t pa);
 
 /* print address space of pmap*/
 static void

Modified: stable/9/sys/i386/xen/pmap.c
==============================================================================
--- stable/9/sys/i386/xen/pmap.c	Sat Jan 21 18:54:19 2012	(r230434)
+++ stable/9/sys/i386/xen/pmap.c	Sat Jan 21 19:21:42 2012	(r230435)
@@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
 #endif
 
 #include <vm/vm.h>
@@ -221,6 +223,8 @@ extern u_int32_t KERNend;
 pt_entry_t pg_nx;
 #endif
 
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
+
 static int pat_works;			/* Is page attribute table sane? */
 
 /*
@@ -273,19 +277,6 @@ SYSCTL_INT(_debug, OID_AUTO, PMAP1unchan
 	   "Number of times pmap_pte_quick didn't change PMAP1");
 static struct mtx PMAP2mutex;
 
-SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
-
-SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
-	"Max number of PV entries");
-SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
-	"Page share factor per proc");
-SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
-    "2/4MB page mapping counters");
-
-static u_long pmap_pde_mappings;
-SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
-    &pmap_pde_mappings, 0, "2/4MB page mappings");
-
 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
 static pv_entry_t get_pv_entry(pmap_t locked_pmap, int try);
 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
@@ -294,6 +285,8 @@ static pv_entry_t pmap_pvh_remove(struct
 
 static vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va,
     vm_page_t m, vm_prot_t prot, vm_page_t mpte);
+static void pmap_flush_page(vm_page_t m);
+static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
     vm_page_t *free);
 static void pmap_remove_page(struct pmap *pmap, vm_offset_t va,
@@ -305,14 +298,12 @@ static boolean_t pmap_try_insert_pv_entr
 
 static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
 
-static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
+static vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags);
 static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
 static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
 static void pmap_pte_release(pt_entry_t *pte);
 static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
-static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
 static boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr);
-static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
 
 static __inline void pagezero(void *page);
 
@@ -326,8 +317,6 @@ CTASSERT(1 << PTESHIFT == sizeof(pt_entr
  */
 CTASSERT(KERNBASE % (1 << 24) == 0);
 
-
-
 void 
 pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
 {
@@ -359,24 +348,6 @@ pd_set(struct pmap *pmap, int ptepindex,
 }
 
 /*
- * Move the kernel virtual free pointer to the next
- * 4MB.  This is used to help improve performance
- * by using a large (4MB) page for much of the kernel
- * (.text, .data, .bss)
- */
-static vm_offset_t
-pmap_kmem_choose(vm_offset_t addr)
-{
-	vm_offset_t newaddr = addr;
-
-#ifndef DISABLE_PSE
-	if (cpu_feature & CPUID_PSE)
-		newaddr = (addr + PDRMASK) & ~PDRMASK;
-#endif
-	return newaddr;
-}
-
-/*
  *	Bootstrap the system enough to run with virtual memory.
  *
  *	On the i386 this is called after mapping has already been enabled
@@ -395,15 +366,13 @@ pmap_bootstrap(vm_paddr_t firstaddr)
 	int i;
 
 	/*
-	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
-	 * large. It should instead be correctly calculated in locore.s and
-	 * not based on 'first' (which is a physical address, not a virtual
-	 * address, for the start of unused physical memory). The kernel
-	 * page tables are NOT double mapped and thus should not be included
-	 * in this calculation.
+	 * Initialize the first available kernel virtual address.  However,
+	 * using "firstaddr" may waste a few pages of the kernel virtual
+	 * address space, because locore may not have mapped every physical
+	 * page that it allocated.  Preferably, locore would provide a first
+	 * unused virtual address in addition to "firstaddr".
 	 */
 	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
-	virtual_avail = pmap_kmem_choose(virtual_avail);
 
 	virtual_end = VM_MAX_KERNEL_ADDRESS;
 
@@ -468,8 +437,8 @@ pmap_bootstrap(vm_paddr_t firstaddr)
 	/*
 	 * ptemap is used for pmap_pte_quick
 	 */
-	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
-	SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
+	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1)
+	SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1)
 
 	mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
 
@@ -650,6 +619,18 @@ pmap_init(void)
 }
 
 
+SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0,
+	"Max number of PV entries");
+SYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0,
+	"Page share factor per proc");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0,
+    "2/4MB page mapping counters");
+
+static u_long pmap_pde_mappings;
+SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD,
+    &pmap_pde_mappings, 0, "2/4MB page mappings");
+
 /***************************************************
  * Low level helper routines.....
  ***************************************************/
@@ -896,6 +877,8 @@ pmap_invalidate_cache(void)
 }
 #endif /* !SMP */
 
+#define	PMAP_CLFLUSH_THRESHOLD	(2 * 1024 * 1024)
+
 void
 pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva)
 {
@@ -907,7 +890,8 @@ pmap_invalidate_cache_range(vm_offset_t 
 
 	if (cpu_feature & CPUID_SS)
 		; /* If "Self Snoop" is supported, do nothing. */
-	else if (cpu_feature & CPUID_CLFSH) {
+	else if ((cpu_feature & CPUID_CLFSH) != 0 &&
+	    eva - sva < PMAP_CLFLUSH_THRESHOLD) {
 
 		/*
 		 * Otherwise, do per-cache line flush.  Use the mfence
@@ -924,12 +908,27 @@ pmap_invalidate_cache_range(vm_offset_t 
 
 		/*
 		 * No targeted cache flush methods are supported by CPU,
-		 * globally invalidate cache as a last resort.
+		 * or the supplied range is bigger than 2MB.
+		 * Globally invalidate cache.
 		 */
 		pmap_invalidate_cache();
 	}
 }
 
+void
+pmap_invalidate_cache_pages(vm_page_t *pages, int count)
+{
+	int i;
+
+	if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE ||
+	    (cpu_feature & CPUID_CLFSH) == 0) {
+		pmap_invalidate_cache();
+	} else {
+		for (i = 0; i < count; i++)
+			pmap_flush_page(pages[i]);
+	}
+}
+
 /*
  * Are we current address space or kernel?  N.B. We return FALSE when
  * a pmap's page table is in use because a kernel thread is borrowing
@@ -942,7 +941,7 @@ pmap_is_current(pmap_t pmap)
 
 	return (pmap == kernel_pmap ||
 	    (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
-		(pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
+	    (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
 }
 
 /*
@@ -971,10 +970,9 @@ pmap_pte(pmap_t pmap, vm_offset_t va)
 			CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x",
 			    pmap, va, (*PMAP2 & 0xffffffff));
 		}
-		
 		return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
 	}
-	return (0);
+	return (NULL);
 }
 
 /*
@@ -1065,7 +1063,7 @@ pmap_extract(pmap_t pmap, vm_offset_t va
 	pt_entry_t *pte;
 	pd_entry_t pde;
 	pt_entry_t pteval;
-	
+
 	rtval = 0;
 	PMAP_LOCK(pmap);
 	pde = pmap->pm_pdir[va >> PDRSHIFT];
@@ -1124,7 +1122,7 @@ vm_page_t
 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
 {
 	pd_entry_t pde;
-	pt_entry_t pte;
+	pt_entry_t pte, *ptep;
 	vm_page_t m;
 	vm_paddr_t pa;
 
@@ -1136,26 +1134,25 @@ retry:
 	if (pde != 0) {
 		if (pde & PG_PS) {
 			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
-				if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) |
-				       (va & PDRMASK), &pa))
+				if (vm_page_pa_tryrelock(pmap, (pde &
+				    PG_PS_FRAME) | (va & PDRMASK), &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) |
 				    (va & PDRMASK));
 				vm_page_hold(m);
 			}
 		} else {
-			sched_pin();
-			pte = PT_GET(pmap_pte_quick(pmap, va));
-			if (*PMAP1)
-				PT_SET_MA(PADDR1, 0);
-			if ((pte & PG_V) &&
+			ptep = pmap_pte(pmap, va);
+			pte = PT_GET(ptep);
+			pmap_pte_release(ptep);
+			if (pte != 0 &&
 			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
-				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa))
+				if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME,
+				    &pa))
 					goto retry;
 				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
 				vm_page_hold(m);
 			}
-			sched_unpin();
 		}
 	}
 	PA_UNLOCK_COND(pa);
@@ -1170,10 +1167,13 @@ retry:
 /*
  * Add a wired page to the kva.
  * Note: not SMP coherent.
+ *
+ * This function may be used before pmap_bootstrap() is called.
  */
 void 
 pmap_kenter(vm_offset_t va, vm_paddr_t pa)
 {
+
 	PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
 }
 
@@ -1186,16 +1186,18 @@ pmap_kenter_ma(vm_offset_t va, vm_paddr_
 	pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag);
 }
 
-
-static __inline void 
+static __inline void
 pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
 {
+
 	PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
 }
 
 /*
  * Remove a page from the kernel pagetables.
  * Note: not SMP coherent.
+ *
+ * This function may be used before pmap_bootstrap() is called.
  */
 PMAP_INLINE void
 pmap_kremove(vm_offset_t va)
@@ -1292,7 +1294,6 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
 #endif	
 }
 
-
 /*
  * This routine tears out page mappings from the
  * kernel -- it is meant only for temporary mappings.
@@ -1342,9 +1343,9 @@ pmap_unwire_pte_hold(pmap_t pmap, vm_pag
 
 	--m->wire_count;
 	if (m->wire_count == 0)
-		return _pmap_unwire_pte_hold(pmap, m, free);
+		return (_pmap_unwire_pte_hold(pmap, m, free));
 	else
-		return 0;
+		return (0);
 }
 
 static int 
@@ -1385,7 +1386,7 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_pa
 	m->right = *free;
 	*free = m;
 
-	return 1;
+	return (1);
 }
 
 /*
@@ -1399,17 +1400,25 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t v
 	vm_page_t mpte;
 
 	if (va >= VM_MAXUSER_ADDRESS)
-		return 0;
+		return (0);
 	ptepde = PT_GET(pmap_pde(pmap, va));
 	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
-	return pmap_unwire_pte_hold(pmap, mpte, free);
+	return (pmap_unwire_pte_hold(pmap, mpte, free));
 }
 
+/*
+ * Initialize the pmap for the swapper process.
+ */
 void
 pmap_pinit0(pmap_t pmap)
 {
 
 	PMAP_LOCK_INIT(pmap);
+	/*
+	 * Since the page table directory is shared with the kernel pmap,
+	 * which is already included in the list "allpmaps", this pmap does
+	 * not need to be inserted into that list.
+	 */
 	pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
 #ifdef PAE
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
@@ -1418,9 +1427,6 @@ pmap_pinit0(pmap_t pmap)
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
-	mtx_lock_spin(&allpmaps_lock);
-	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
-	mtx_unlock_spin(&allpmaps_lock);
 }
 
 /*
@@ -1473,18 +1479,19 @@ pmap_pinit(pmap_t pmap)
 			ptdpg[i++] = m;
 		}
 	}
+
 	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
-	for (i = 0; i < NPGPTD; i++) {
+
+	for (i = 0; i < NPGPTD; i++)
 		if ((ptdpg[i]->flags & PG_ZERO) == 0)
-			pagezero(&pmap->pm_pdir[i*NPTEPG]);
-	}
+			pagezero(pmap->pm_pdir + (i * NPDEPG));
 
 	mtx_lock_spin(&allpmaps_lock);
 	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
+	/* Copy the kernel page table directory entries. */
+	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 	mtx_unlock_spin(&allpmaps_lock);
-	/* Wire in kernel global address entries. */
 
-	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
 #ifdef PAE
 	pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
 	if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
@@ -1536,7 +1543,7 @@ pmap_pinit(pmap_t pmap)
  * mapped correctly.
  */
 static vm_page_t
-_pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags)
+_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags)
 {
 	vm_paddr_t ptema;
 	vm_page_t m;
@@ -1571,6 +1578,7 @@ _pmap_allocpte(pmap_t pmap, unsigned int
 	 * Map the pagetable page into the process address space, if
 	 * it isn't already there.
 	 */
+
 	pmap->pm_stats.resident_count++;
 
 	ptema = VM_PAGE_TO_MACH(m);
@@ -1586,7 +1594,7 @@ _pmap_allocpte(pmap_t pmap, unsigned int
 static vm_page_t
 pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
 {
-	unsigned ptepindex;
+	u_int ptepindex;
 	pd_entry_t ptema;
 	vm_page_t m;
 
@@ -1764,6 +1772,7 @@ pmap_release(pmap_t pmap)
 #else
 	int npgptd = NPGPTD;
 #endif
+
 	KASSERT(pmap->pm_stats.resident_count == 0,
 	    ("pmap_release: pmap resident count %ld != 0",
 	    pmap->pm_stats.resident_count));
@@ -1819,7 +1828,7 @@ kvm_size(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
 
-	return sysctl_handle_long(oidp, &ksize, 0, req);
+	return (sysctl_handle_long(oidp, &ksize, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_size, "IU", "Size of KVM");
@@ -1829,7 +1838,7 @@ kvm_free(SYSCTL_HANDLER_ARGS)
 {
 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
 
-	return sysctl_handle_long(oidp, &kfree, 0, req);
+	return (sysctl_handle_long(oidp, &kfree, 0, req));
 }
 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
     0, 0, kvm_free, "IU", "Amount of KVM free");
@@ -1858,12 +1867,12 @@ pmap_growkernel(vm_offset_t addr)
 			}
 		}
 	}
-	addr = roundup2(addr, PAGE_SIZE * NPTEPG);
+	addr = roundup2(addr, NBPDR);
 	if (addr - 1 >= kernel_map->max_offset)
 		addr = kernel_map->max_offset;
 	while (kernel_vm_end < addr) {
 		if (pdir_pde(PTD, kernel_vm_end)) {
-			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+			kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 				kernel_vm_end = kernel_map->max_offset;
 				break;
@@ -1871,17 +1880,16 @@ pmap_growkernel(vm_offset_t addr)
 			continue;
 		}
 
-		/*
-		 * This index is bogus, but out of the way
-		 */
-		nkpg = vm_page_alloc(NULL, nkpt,
-		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
-		if (!nkpg)
+		nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
+		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+		    VM_ALLOC_ZERO);
+		if (nkpg == NULL)
 			panic("pmap_growkernel: no memory to grow kernel");
 
 		nkpt++;
 
-		pmap_zero_page(nkpg);
+		if ((nkpg->flags & PG_ZERO) == 0)
+			pmap_zero_page(nkpg);
 		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
 		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
 		vm_page_lock_queues();
@@ -1893,7 +1901,7 @@ pmap_growkernel(vm_offset_t addr)
 		mtx_unlock_spin(&allpmaps_lock);
 		vm_page_unlock_queues();
 
-		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+		kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK;
 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
 			kernel_vm_end = kernel_map->max_offset;
 			break;
@@ -1913,7 +1921,7 @@ static __inline struct pv_chunk *
 pv_to_chunk(pv_entry_t pv)
 {
 
-	return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK);
+	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
 }
 
 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
@@ -2035,15 +2043,15 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv
 	pc->pc_map[field] |= 1ul << bit;
 	/* move to head of list */
 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
-	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 	for (idx = 0; idx < _NPCM; idx++)
-		if (pc->pc_map[idx] != pc_freemask[idx])
+		if (pc->pc_map[idx] != pc_freemask[idx]) {
+			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
 			return;
+		}
 	PV_STAT(pv_entry_spare -= _NPCPV);
 	PV_STAT(pc_chunk_count--);
 	PV_STAT(pc_chunk_frees++);
 	/* entire chunk is free, return it */
-	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
 	m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc));
 	pmap_qremove((vm_offset_t)pc, 1);
 	vm_page_unwire(m, 0);
@@ -2274,10 +2282,10 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
 	pt_entry_t *pte;
 	vm_page_t free = NULL;
 	int anyvalid;
-	
+
 	CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
 	    pmap, sva, eva);
-	
+
 	/*
 	 * Perform an unsynchronized read.  This is, however, safe.
 	 */
@@ -2302,12 +2310,14 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
 	}
 
 	for (; sva < eva; sva = pdnxt) {
-		unsigned pdirindex;
+		u_int pdirindex;
 
 		/*
 		 * Calculate index for next page table.
 		 */
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
+		if (pdnxt < sva)
+			pdnxt = eva;
 		if (pmap->pm_stats.resident_count == 0)
 			break;
 
@@ -2397,7 +2407,6 @@ pmap_remove_all(vm_page_t m)
 		PMAP_LOCK(pmap);
 		pmap->pm_stats.resident_count--;
 		pte = pmap_pte_quick(pmap, pv->pv_va);
-
 		tpte = *pte;
 		PT_SET_VA_MA(pte, 0, TRUE);
 		if (tpte & PG_W)
@@ -2461,9 +2470,11 @@ pmap_protect(pmap_t pmap, vm_offset_t sv
 	PMAP_LOCK(pmap);
 	for (; sva < eva; sva = pdnxt) {
 		pt_entry_t obits, pbits;
-		unsigned pdirindex;
+		u_int pdirindex;
 
 		pdnxt = (sva + NBPDR) & ~PDRMASK;
+		if (pdnxt < sva)
+			pdnxt = eva;
 
 		pdirindex = sva >> PDRSHIFT;
 		ptpaddr = pmap->pm_pdir[pdirindex];
@@ -2573,7 +2584,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, 
 	KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
 	    ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)",
 	    va));
-	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0,
+	KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 ||
+	    VM_OBJECT_LOCKED(m->object),
 	    ("pmap_enter: page %p is not busy", m));
 
 	mpte = NULL;
@@ -2776,10 +2788,9 @@ pmap_enter_object(pmap_t pmap, vm_offset
 	multicall_entry_t mcl[16];
 	multicall_entry_t *mclp = mcl;
 	int error, count = 0;
-	
+
 	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
 	psize = atop(end - start);
-	    
 	mpte = NULL;
 	m = m_start;
 	vm_page_lock_queues();
@@ -2818,7 +2829,7 @@ pmap_enter_quick(pmap_t pmap, vm_offset_
 	multicall_entry_t mcl, *mclp;
 	int count = 0;
 	mclp = &mcl;
-	
+
 	CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x",
 	    pmap, va, m, prot);
 	
@@ -2869,7 +2880,7 @@ pmap_enter_quick_locked(multicall_entry_
 	vm_paddr_t pa;
 	vm_page_t free;
 	multicall_entry_t *mcl = *mclpp;
-	
+
 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
 	    (m->oflags & VPO_UNMANAGED) != 0,
 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
@@ -2881,7 +2892,7 @@ pmap_enter_quick_locked(multicall_entry_
 	 * resident, we are creating it here.
 	 */
 	if (va < VM_MAXUSER_ADDRESS) {
-		unsigned ptepindex;
+		u_int ptepindex;
 		pd_entry_t ptema;
 
 		/*
@@ -2985,7 +2996,7 @@ pmap_enter_quick_locked(multicall_entry_
 	*mclpp = mcl + 1;
 	*count = *count + 1;
 #endif	
-	return mpte;
+	return (mpte);
 }
 
 /*
@@ -3010,9 +3021,8 @@ pmap_kenter_temporary(vm_paddr_t pa, int
  * are taken, but the code works.
  */
 void
-pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
-		    vm_object_t object, vm_pindex_t pindex,
-		    vm_size_t size)
+pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
+    vm_pindex_t pindex, vm_size_t size)
 {
 	pd_entry_t *pde;
 	vm_paddr_t pa, ptepa;
@@ -3030,6 +3040,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offs
 		KASSERT(p->valid == VM_PAGE_BITS_ALL,
 		    ("pmap_object_init_pt: invalid page %p", p));
 		pat_mode = p->md.pat_mode;
+
 		/*
 		 * Abort the mapping if the first page is not physically
 		 * aligned to a 2/4MB page boundary.
@@ -3037,6 +3048,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offs
 		ptepa = VM_PAGE_TO_PHYS(p);
 		if (ptepa & (NBPDR - 1))
 			return;
+
 		/*
 		 * Skip the first page.  Abort the mapping if the rest of
 		 * the pages are not physically contiguous or have differing
@@ -3052,7 +3064,12 @@ pmap_object_init_pt(pmap_t pmap, vm_offs
 				return;
 			p = TAILQ_NEXT(p, listq);
 		}
-		/* Map using 2/4MB pages. */
+
+		/*
+		 * Map using 2/4MB pages.  Since "ptepa" is 2/4M aligned and
+		 * "size" is a multiple of 2/4M, adding the PAT setting to
+		 * "pa" will not affect the termination of this loop.
+		 */
 		PMAP_LOCK(pmap);
 		for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa +
 		    size; pa += NBPDR) {
@@ -3116,7 +3133,7 @@ pmap_change_wiring(pmap_t pmap, vm_offse
 
 void
 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
-	  vm_offset_t src_addr)
+    vm_offset_t src_addr)
 {
 	vm_page_t   free;
 	vm_offset_t addr;
@@ -3153,12 +3170,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
 		pt_entry_t *src_pte, *dst_pte;
 		vm_page_t dstmpte, srcmpte;
 		pd_entry_t srcptepaddr;
-		unsigned ptepindex;
+		u_int ptepindex;
 
 		KASSERT(addr < UPT_MIN_ADDRESS,
 		    ("pmap_copy: invalid to pmap_copy page tables"));
 
 		pdnxt = (addr + NBPDR) & ~PDRMASK;
+		if (pdnxt < addr)
+			pdnxt = end_addr;
 		ptepindex = addr >> PDRSHIFT;
 
 		srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]);
@@ -3192,7 +3211,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
 				dstmpte = pmap_allocpte(dst_pmap, addr,
 				    M_NOWAIT);
 				if (dstmpte == NULL)
-					break;
+					goto out;
 				dst_pte = pmap_pte_quick(dst_pmap, addr);
 				if (*dst_pte == 0 &&
 				    pmap_try_insert_pv_entry(dst_pmap, addr,
@@ -3216,6 +3235,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
 						    addr);
 						pmap_free_zero_pages(free);
 					}
+					goto out;
 				}
 				if (dstmpte->wire_count >= srcmpte->wire_count)
 					break;
@@ -3224,6 +3244,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
 			src_pte++;
 		}
 	}
+out:
 	PT_UPDATES_FLUSH();
 	sched_unpin();
 	vm_page_unlock_queues();
@@ -3286,7 +3307,7 @@ pmap_zero_page_area(vm_page_t m, int off
 	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 	mtx_lock(&sysmaps->lock);
 	if (*sysmaps->CMAP2)
-		panic("pmap_zero_page: CMAP2 busy");
+		panic("pmap_zero_page_area: CMAP2 busy");
 	sched_pin();
 	PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
 
@@ -3310,7 +3331,7 @@ pmap_zero_page_idle(vm_page_t m)
 {
 
 	if (*CMAP3)
-		panic("pmap_zero_page: CMAP3 busy");
+		panic("pmap_zero_page_idle: CMAP3 busy");
 	sched_pin();
 	PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M);
 	pagezero(CADDR3);
@@ -3774,7 +3795,6 @@ pmap_ts_referenced(vm_page_t m)
 	PT_UPDATES_FLUSH();
 	if (*PMAP1)
 		PT_SET_MA(PADDR1, 0);
-
 	sched_unpin();
 	vm_page_unlock_queues();
 	return (rtval);
@@ -3809,7 +3829,7 @@ pmap_clear_modify(vm_page_t m)
 		pmap = PV_PMAP(pv);
 		PMAP_LOCK(pmap);
 		pte = pmap_pte_quick(pmap, pv->pv_va);
-		if ((*pte & PG_M) != 0) {
+		if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) {
 			/*
 			 * Regardless of whether a pte is 32 or 64 bits
 			 * in size, PG_M is among the least significant
@@ -3931,8 +3951,6 @@ pmap_unmapdev(vm_offset_t va, vm_size_t 
 void
 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
 {
-	struct sysmaps *sysmaps;
-	vm_offset_t sva, eva;
 
 	m->md.pat_mode = ma;
 	if ((m->flags & PG_FICTITIOUS) != 0)
@@ -3955,11 +3973,21 @@ pmap_page_set_memattr(vm_page_t m, vm_me
 	 * invalidation. In the worst case, whole cache is flushed by
 	 * pmap_invalidate_cache_range().
 	 */
-	if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) {
+	if ((cpu_feature & CPUID_SS) == 0)
+		pmap_flush_page(m);
+}
+
+static void
+pmap_flush_page(vm_page_t m)
+{
+	struct sysmaps *sysmaps;
+	vm_offset_t sva, eva;
+
+	if ((cpu_feature & CPUID_CLFSH) != 0) {
 		sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
 		mtx_lock(&sysmaps->lock);
 		if (*sysmaps->CMAP2)
-			panic("pmap_page_set_memattr: CMAP2 busy");
+			panic("pmap_flush_page: CMAP2 busy");
 		sched_pin();
 		PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW |
 		    VM_PAGE_TO_MACH(m) | PG_A | PG_M |
@@ -3967,21 +3995,35 @@ pmap_page_set_memattr(vm_page_t m, vm_me
 		invlcaddr(sysmaps->CADDR2);
 		sva = (vm_offset_t)sysmaps->CADDR2;
 		eva = sva + PAGE_SIZE;
-	} else
-		sva = eva = 0; /* gcc */
-	pmap_invalidate_cache_range(sva, eva);
-	if (sva != 0) {
+
+		/*
+		 * Use mfence despite the ordering implied by
+		 * mtx_{un,}lock() because clflush is not guaranteed
+		 * to be ordered by any other instruction.
+		 */
+		mfence();
+		for (; sva < eva; sva += cpu_clflush_line_size)
+			clflush(sva);
+		mfence();
 		PT_SET_MA(sysmaps->CADDR2, 0);
 		sched_unpin();
 		mtx_unlock(&sysmaps->lock);
-	}
+	} else
+		pmap_invalidate_cache();
 }
 
+/*
+ * Changes the specified virtual address range's memory type to that given by
+ * the parameter "mode".  The specified virtual address range must be
+ * completely contained within either the kernel map.

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-stable-9 mailing list