svn commit: r205337 - in projects/ppc64/sys/powerpc: aim include powerpc

Nathan Whitehorn nwhitehorn at FreeBSD.org
Fri Mar 19 16:15:11 UTC 2010


Author: nwhitehorn
Date: Fri Mar 19 16:15:11 2010
New Revision: 205337
URL: http://svn.freebsd.org/changeset/base/205337

Log:
  Provide a long list of segment mappings for each process, and change the
  kernel mappings to be (a) calculated instead of looked up, and (b)
  cached per-CPU. This gives SMP a fighting chance at working, and should
  allow the use of more than 16 GB of RAM. It also allows the use of more
  than 16 GB of VA space per process, and allows some minor speed and
  correctness improvements in a few places.
  
  32-bit kernels are probably very broken on this branch at the moment. This
  will be fixed later. More breakage should not be forthcoming -- this is
  the last major change to the memory management code.

Modified:
  projects/ppc64/sys/powerpc/aim/machdep.c
  projects/ppc64/sys/powerpc/aim/mmu_oea64.c
  projects/ppc64/sys/powerpc/aim/slb.c
  projects/ppc64/sys/powerpc/aim/trap.c
  projects/ppc64/sys/powerpc/aim/trap_subr64.S
  projects/ppc64/sys/powerpc/include/pcpu.h
  projects/ppc64/sys/powerpc/include/pmap.h
  projects/ppc64/sys/powerpc/include/slb.h
  projects/ppc64/sys/powerpc/powerpc/genassym.c

Modified: projects/ppc64/sys/powerpc/aim/machdep.c
==============================================================================
--- projects/ppc64/sys/powerpc/aim/machdep.c	Fri Mar 19 16:09:57 2010	(r205336)
+++ projects/ppc64/sys/powerpc/aim/machdep.c	Fri Mar 19 16:15:11 2010	(r205337)
@@ -738,7 +738,10 @@ kdb_cpu_set_singlestep(void)
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz)
 {
-
+#ifdef __powerpc64__
+/* Copy the SLB contents from the current CPU */
+memcpy(pcpu->pc_slb, PCPU_GET(slb), sizeof(pcpu->pc_slb));
+#endif
 }
 
 void

Modified: projects/ppc64/sys/powerpc/aim/mmu_oea64.c
==============================================================================
--- projects/ppc64/sys/powerpc/aim/mmu_oea64.c	Fri Mar 19 16:09:57 2010	(r205336)
+++ projects/ppc64/sys/powerpc/aim/mmu_oea64.c	Fri Mar 19 16:15:11 2010	(r205337)
@@ -160,6 +160,7 @@ __FBSDID("$FreeBSD$");
 #define	MOEA_DEBUG
 
 #define TODO	panic("%s: not implemented", __func__);
+void moea64_release_vsid(uint64_t vsid);
 uintptr_t moea64_get_unique_vsid(void); 
 
 static __inline register_t
@@ -183,21 +184,14 @@ cntlzd(volatile register_t a) {
 struct mtx	tlbie_mutex;
 
 static __inline void
-TLBIE(pmap_t pmap, vm_offset_t va) {
+TLBIE(uint64_t vpn) {
 #ifndef __powerpc64__
 	register_t vpn_hi, vpn_lo;
 	register_t msr;
 	register_t scratch;
 #endif
-	uint64_t vpn;
 
-	/*
-	 * Compute the virtual page number we wish to invalidate.
-	 */
-
-	vpn = (uint64_t)(va & ADDR_PIDX);
-	if (pmap != NULL)
-		vpn |= (va_to_vsid(pmap,va) << 28);
+	vpn <<= ADDR_PIDX_SHFT;
 	vpn &= ~(0xffffULL << 48);
 
 	mtx_lock_spin(&tlbie_mutex);
@@ -257,6 +251,7 @@ TLBIE(pmap_t pmap, vm_offset_t va) {
 	((void)((pvo)->pvo_vaddr &= ~(PVO_PTEGIDX_VALID|PVO_PTEGIDX_MASK)))
 #define	PVO_PTEGIDX_SET(pvo, i)	\
 	((void)((pvo)->pvo_vaddr |= (i)|PVO_PTEGIDX_VALID))
+#define	PVO_VSID(pvo)		((pvo)->pvo_vpn >> 16)
 
 #define	MOEA_PVO_CHECK(pvo)
 
@@ -347,6 +342,7 @@ SYSCTL_INT(_machdep, OID_AUTO, moea64_pv
     &moea64_pvo_remove_calls, 0, "");
 
 vm_offset_t	moea64_scratchpage_va[2];
+uint64_t	moea64_scratchpage_vpn[2];
 struct	lpte 	*moea64_scratchpage_pte[2];
 struct	mtx	moea64_scratchpage_mtx;
 
@@ -572,7 +568,7 @@ moea64_pte_synch(struct lpte *pt, struct
 }
 
 static __inline void
-moea64_pte_clear(struct lpte *pt, pmap_t pmap, vm_offset_t va, u_int64_t ptebit)
+moea64_pte_clear(struct lpte *pt, uint64_t vpn, u_int64_t ptebit)
 {
 	ASSERT_TABLE_LOCK();
 
@@ -580,7 +576,7 @@ moea64_pte_clear(struct lpte *pt, pmap_t
 	 * As shown in Section 7.6.3.2.3
 	 */
 	pt->pte_lo &= ~ptebit;
-	TLBIE(pmap,va);
+	TLBIE(vpn);
 }
 
 static __inline void
@@ -603,7 +599,7 @@ moea64_pte_set(struct lpte *pt, struct l
 }
 
 static __inline void
-moea64_pte_unset(struct lpte *pt, struct lpte *pvo_pt, pmap_t pmap, vm_offset_t va)
+moea64_pte_unset(struct lpte *pt, struct lpte *pvo_pt, uint64_t vpn)
 {
 	ASSERT_TABLE_LOCK();
 	pvo_pt->pte_hi &= ~LPTE_VALID;
@@ -617,7 +613,7 @@ moea64_pte_unset(struct lpte *pt, struct
 	 * Invalidate the pte.
 	 */
 	pt->pte_hi &= ~LPTE_VALID;
-	TLBIE(pmap,va);
+	TLBIE(vpn);
 
 	/*
 	 * Save the reg & chg bits.
@@ -627,16 +623,14 @@ moea64_pte_unset(struct lpte *pt, struct
 }
 
 static __inline void
-moea64_pte_change(struct lpte *pt, struct lpte *pvo_pt, pmap_t pmap, vm_offset_t va)
+moea64_pte_change(struct lpte *pt, struct lpte *pvo_pt, uint64_t vpn)
 {
 
 	/*
 	 * Invalidate the PTE
 	 */
-	moea64_pte_unset(pt, pvo_pt, pmap, va);
+	moea64_pte_unset(pt, pvo_pt, vpn);
 	moea64_pte_set(pt, pvo_pt);
-	if (pmap == kernel_pmap)
-		isync();
 }
 
 static __inline uint64_t
@@ -708,6 +702,9 @@ static void
 moea64_cpu_bootstrap(mmu_t mmup, int ap)
 {
 	int i = 0;
+	#ifdef __powerpc64__
+	struct slb *slb = PCPU_GET(slb);
+	#endif
 
 	/*
 	 * Initialize segment registers and MMU
@@ -723,12 +720,11 @@ moea64_cpu_bootstrap(mmu_t mmup, int ap)
 		slbia();
 
 		for (i = 0; i < 64; i++) {
-			if (!(kernel_pmap->pm_slb[i].slbe & SLBE_VALID))
+			if (!(slb[i].slbe & SLBE_VALID))
 				continue;
 
 			__asm __volatile ("slbmte %0, %1" :: 
-			    "r"(kernel_pmap->pm_slb[i].slbv),
-			    "r"(kernel_pmap->pm_slb[i].slbe)); 
+			    "r"(slb[i].slbv), "r"(slb[i].slbe)); 
 		}
 	#else
 		for (i = 0; i < 16; i++)
@@ -796,6 +792,7 @@ moea64_add_ofw_mappings(mmu_t mmup, phan
 	}
 }
 
+#ifdef __powerpc64__
 static void
 moea64_probe_large_page(void)
 {
@@ -822,6 +819,32 @@ moea64_probe_large_page(void)
 }
 
 static void
+moea64_bootstrap_slb_prefault(vm_offset_t va, int large)
+{
+	struct slb *cache;
+	struct slb entry;
+	uint64_t esid, slbe;
+	uint64_t i;
+
+	cache = PCPU_GET(slb);
+	esid = va >> ADDR_SR_SHFT;
+	slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
+
+	for (i = 0; i < 64; i++) {
+		if (cache[i].slbe == (slbe | i))
+			return;
+	}
+
+	entry.slbe = slbe;
+	entry.slbv = (esid | KERNEL_VSID_BIT) << SLBV_VSID_SHIFT;
+	if (large)
+		entry.slbv |= SLBV_L;
+
+	slb_insert(kernel_pmap, cache, &entry);
+}
+#endif
+
+static void
 moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart,
     vm_offset_t kernelend)
 {
@@ -836,6 +859,7 @@ moea64_setup_direct_map(mmu_t mmup, vm_o
 
 	DISABLE_TRANS(msr);
 	if (hw_direct_map) {
+		#ifdef __powerpc64__
 		PMAP_LOCK(kernel_pmap);
 		for (i = 0; i < pregions_sz; i++) {
 		  for (pa = pregions[i].mr_start; pa < pregions[i].mr_start +
@@ -854,14 +878,6 @@ moea64_setup_direct_map(mmu_t mmup, vm_o
 			    pregions[i].mr_start + pregions[i].mr_size)
 				pte_lo |= LPTE_G;
 
-			/*
-			 * Allocate a new SLB entry to make sure it is
-			 * for large pages.
-			 */
-			if (va_to_slb_entry(kernel_pmap, pa) == NULL)
-				allocate_vsid(kernel_pmap, pa >> ADDR_SR_SHFT,
-				    1 /* large */);
-	
 			moea64_pvo_enter(kernel_pmap, moea64_upvo_zone,
 				    &moea64_pvo_kunmanaged, pa, pa,
 				    pte_lo, PVO_WIRED | PVO_LARGE |
@@ -869,6 +885,7 @@ moea64_setup_direct_map(mmu_t mmup, vm_o
 		  }
 		}
 		PMAP_UNLOCK(kernel_pmap);
+		#endif
 	} else {
 		size = moea64_pteg_count * sizeof(struct lpteg);
 		off = (vm_offset_t)(moea64_pteg_table);
@@ -1076,8 +1093,8 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t
 	 */
 	#ifdef __powerpc64__
 	for (i = 0; i < 64; i++) {
-		kernel_pmap->pm_slb[i].slbv = 0;
-		kernel_pmap->pm_slb[i].slbe = 0;
+		pcpup->pc_slb[i].slbv = 0;
+		pcpup->pc_slb[i].slbe = 0;
 	}
 	#else
 	for (i = 0; i < 16; i++) 
@@ -1101,9 +1118,9 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t
 	 */
 
 	if (!ofw_real_mode) {
+	    #ifndef __powerpc64__
 	    moea64_pinit(mmup, &ofw_pmap);
 
-	    #ifndef __powerpc64__
 	    for (i = 0; i < 16; i++)
 		ofw_pmap.pm_sr[i] = kernel_pmap->pm_sr[i];
 	    #endif
@@ -1148,6 +1165,14 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t
 	virtual_end = VM_MAX_SAFE_KERNEL_ADDRESS; 
 
 	/*
+	 * Map the entire KVA range into the SLB. We must not fault there.
+	 */
+	#ifdef __powerpc64__
+	for (va = virtual_avail; va < virtual_end; va += SEGMENT_LENGTH)
+		moea64_bootstrap_slb_prefault(va, 0);
+	#endif
+
+	/*
 	 * Figure out how far we can extend virtual_end into segment 16
 	 * without running into existing mappings. Segment 16 is guaranteed
 	 * to contain neither RAM nor devices (at least on Apple hardware),
@@ -1189,6 +1214,9 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t
 			    LPTE_NOEXEC, 0);
 			pt.pte_hi |= LPTE_LOCKED;
 
+			moea64_scratchpage_vpn[i] = (vsid << 16) |
+			    ((moea64_scratchpage_va[i] & ADDR_PIDX) >>
+			    ADDR_PIDX_SHFT);
 			ptegidx = va_to_pteg(vsid, moea64_scratchpage_va[i], 0);
 			pteidx = moea64_pte_insert(ptegidx, &pt);
 			if (pt.pte_hi & LPTE_HID)
@@ -1245,23 +1273,22 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t
 }
 
 /*
- * Activate a user pmap.  The pmap must be activated before it's address
+ * Activate a user pmap.  The pmap must be activated before its address
  * space can be accessed in any way.
  */
 void
 moea64_activate(mmu_t mmu, struct thread *td)
 {
-	pmap_t	pm, pmr;
+	pmap_t	pm;
 
-	/*
-	 * Load all the data we need up front to encourage the compiler to
-	 * not issue any loads while we have interrupts disabled below.
-	 */
 	pm = &td->td_proc->p_vmspace->vm_pmap;
-	pmr = pm->pmap_phys;
-
 	pm->pm_active |= PCPU_GET(cpumask);
-	PCPU_SET(curpmap, pmr);
+
+	#ifdef __powerpc64__
+	PCPU_SET(userslb, pm->pm_slb);
+	#else
+	PCPU_SET(curpmap, pm->pmap_phys);
+	#endif
 }
 
 void
@@ -1271,7 +1298,11 @@ moea64_deactivate(mmu_t mmu, struct thre
 
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 	pm->pm_active &= ~(PCPU_GET(cpumask));
+	#ifdef __powerpc64__
+	PCPU_SET(userslb, NULL);
+	#else
 	PCPU_SET(curpmap, NULL);
+	#endif
 }
 
 void
@@ -1310,7 +1341,7 @@ void moea64_set_scratchpage_pa(int which
 	mtx_assert(&moea64_scratchpage_mtx, MA_OWNED);
 
 	moea64_scratchpage_pte[which]->pte_hi &= ~LPTE_VALID;
-	TLBIE(kernel_pmap, moea64_scratchpage_va[which]);
+	TLBIE(moea64_scratchpage_vpn[which]);
 	
 	moea64_scratchpage_pte[which]->pte_lo &= 
 	    ~(LPTE_WIMG | LPTE_RPGN);
@@ -1742,7 +1773,9 @@ moea64_remove_write(mmu_t mmu, vm_page_t
 				lo |= pvo->pvo_pte.lpte.pte_lo;
 				pvo->pvo_pte.lpte.pte_lo &= ~LPTE_CHG;
 				moea64_pte_change(pt, &pvo->pvo_pte.lpte,
-				    pvo->pvo_pmap, PVO_VADDR(pvo));
+				    pvo->pvo_vpn);
+				if (pvo->pvo_pmap == kernel_pmap)
+					isync();
 			}
 		}
 		UNLOCK_TABLE();
@@ -1971,13 +2004,20 @@ moea64_get_unique_vsid(void) {
 	panic("%s: out of segments",__func__);
 }
 
+#ifdef __powerpc64__
 void
 moea64_pinit(mmu_t mmu, pmap_t pmap)
 {
+	PMAP_LOCK_INIT(pmap);
+
+	SPLAY_INIT(&pmap->pm_slbtree);
+	pmap->pm_slb = slb_alloc_user_cache();
+}
+#else
+void
+moea64_pinit(mmu_t mmu, pmap_t pmap)
 	int	i;
-	#ifndef __powerpc64__
 	register_t hash;
-	#endif
 
 	PMAP_LOCK_INIT(pmap);
 
@@ -1987,18 +2027,6 @@ moea64_pinit(mmu_t mmu, pmap_t pmap)
 	else
 		pmap->pmap_phys = pmap;
 
-	#ifdef __powerpc64__
-	/*
-	 * 64-bit PowerPC uses lazy segment allocation, so NULL
-	 * all the segment entries for now.
-	 */
-	for (i = 0; i < sizeof(pmap->pm_slb)/sizeof(pmap->pm_slb[0]); i++) {
-		pmap->pm_slb[i].slbv = 0;
-		pmap->pm_slb[i].slbe = 0;
-	}
-
-	#else
-
 	/*
 	 * Allocate some segment registers for this pmap.
 	 */
@@ -2006,9 +2034,8 @@ moea64_pinit(mmu_t mmu, pmap_t pmap)
 
 	for (i = 0; i < 16; i++) 
 		pmap->pm_sr[i] = VSID_MAKE(i, hash);
-
-	#endif
 }
+#endif
 
 /*
  * Initialize the pmap associated with process 0.
@@ -2070,8 +2097,7 @@ moea64_protect(mmu_t mmu, pmap_t pm, vm_
 		 * If the PVO is in the page table, update that pte as well.
 		 */
 		if (pt != NULL) {
-			moea64_pte_change(pt, &pvo->pvo_pte.lpte, 
-			    pvo->pvo_pmap, PVO_VADDR(pvo));
+			moea64_pte_change(pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn);
 			if ((pvo->pvo_pte.lpte.pte_lo & 
 			    (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) {
 				moea64_syncicache(pm, sva,
@@ -2113,7 +2139,7 @@ moea64_qremove(mmu_t mmu, vm_offset_t va
 	}
 }
 
-static __inline void
+void
 moea64_release_vsid(uint64_t vsid)
 {
         int idx, mask;
@@ -2133,9 +2159,8 @@ moea64_release(mmu_t mmu, pmap_t pmap)
 	 * Free segment registers' VSIDs
 	 */
     #ifdef __powerpc64__
-	int i;
-	for (i = 0; i < sizeof(pmap->pm_slb)/sizeof(pmap->pm_slb[0]); i++)
-		moea64_release_vsid(pmap->pm_slb[i].slbv);
+	free_vsids(pmap);
+	slb_free_user_cache(pmap->pm_slb);
     #else
         if (pmap->pm_sr[0] == 0)
                 panic("moea64_release");
@@ -2368,6 +2393,8 @@ moea64_pvo_enter(pmap_t pm, uma_zone_t z
 
 	moea64_pvo_entries++;
 	pvo->pvo_vaddr = va;
+	pvo->pvo_vpn = (uint64_t)((va & ADDR_PIDX) >> ADDR_PIDX_SHFT)
+	    | (vsid << 16);
 	pvo->pvo_pmap = pm;
 	LIST_INSERT_HEAD(&moea64_pvo_table[ptegidx], pvo, pvo_olink);
 	pvo->pvo_vaddr &= ~ADDR_POFF;
@@ -2416,6 +2443,15 @@ moea64_pvo_enter(pmap_t pm, uma_zone_t z
 
 	UNLOCK_TABLE();
 
+#ifdef __powerpc64__
+	/*
+	 * Make sure all our bootstrap mappings are in the SLB as soon
+	 * as virtual memory is switched on.
+	 */
+	if (!pmap_bootstrapped)
+		moea64_bootstrap_slb_prefault(va, flags & PVO_LARGE);
+#endif
+
 	return (first ? ENOENT : 0);
 }
 
@@ -2431,8 +2467,7 @@ moea64_pvo_remove(struct pvo_entry *pvo,
 	LOCK_TABLE();
 	pt = moea64_pvo_to_pte(pvo, pteidx);
 	if (pt != NULL) {
-		moea64_pte_unset(pt, &pvo->pvo_pte.lpte, pvo->pvo_pmap,
-		    PVO_VADDR(pvo));
+		moea64_pte_unset(pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn);
 		PVO_PTEGIDX_CLR(pvo);
 	} else {
 		moea64_pte_overflow--;
@@ -2500,18 +2535,18 @@ moea64_pvo_find_va(pmap_t pm, vm_offset_
 	int		ptegidx;
 	uint64_t	vsid;
 	#ifdef __powerpc64__
-	struct slb	*slb;
+	struct slb	slb;
 
-	slb = va_to_slb_entry(pm, va);
-	if (slb == NULL) /* The page is not mapped if the segment isn't */
+	/* The page is not mapped if the segment isn't */
+	if (va_to_slb_entry(pm, va, &slb) != 0)
 		return NULL;
 
-	vsid = (slb->slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT;
-	if (slb->slbv & SLBV_L)
+	vsid = (slb.slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT;
+	if (slb.slbv & SLBV_L)
 		va &= ~moea64_large_page_mask;
 	else
 		va &= ~ADDR_POFF;
-	ptegidx = va_to_pteg(vsid, va, slb->slbv & SLBV_L);
+	ptegidx = va_to_pteg(vsid, va, slb.slbv & SLBV_L);
 	#else
 	va &= ~ADDR_POFF;
 	vsid = va_to_vsid(pm, va);
@@ -2543,7 +2578,7 @@ moea64_pvo_to_pte(const struct pvo_entry
 		int		ptegidx;
 		uint64_t	vsid;
 
-		vsid = va_to_vsid(pvo->pvo_pmap, PVO_VADDR(pvo));
+		vsid = PVO_VSID(pvo);
 		ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo),
 		    pvo->pvo_vaddr & PVO_LARGE);
 		pteidx = moea64_pvo_pte_index(pvo, ptegidx);
@@ -2724,7 +2759,7 @@ moea64_clear_bit(vm_page_t m, u_int64_t 
 			moea64_pte_synch(pt, &pvo->pvo_pte.lpte);
 			if (pvo->pvo_pte.lpte.pte_lo & ptebit) {
 				count++;
-				moea64_pte_clear(pt, pvo->pvo_pmap, PVO_VADDR(pvo), ptebit);
+				moea64_pte_clear(pt, pvo->pvo_vpn, ptebit);
 			}
 		}
 		rv |= pvo->pvo_pte.lpte.pte_lo;

Modified: projects/ppc64/sys/powerpc/aim/slb.c
==============================================================================
--- projects/ppc64/sys/powerpc/aim/slb.c	Fri Mar 19 16:09:57 2010	(r205336)
+++ projects/ppc64/sys/powerpc/aim/slb.c	Fri Mar 19 16:15:11 2010	(r205337)
@@ -25,95 +25,160 @@
  */
 
 #include <sys/param.h>
-#include <sys/kdb.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
+#include <sys/tree.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
+#include <vm/uma.h>
+#include <vm/vm_map.h>
 
+#include <machine/md_var.h>
+#include <machine/pmap.h>
 #include <machine/vmparam.h>
 
 uintptr_t moea64_get_unique_vsid(void);
+void moea64_release_vsid(uint64_t vsid);
 
-struct slb *
-va_to_slb_entry(pmap_t pm, vm_offset_t va)
+struct slbcontainer {
+	struct slb slb;
+	SPLAY_ENTRY(slbcontainer) slb_node;
+};
+
+static int slb_compare(struct slbcontainer *a, struct slbcontainer *b);
+static void slb_zone_init(void *);
+
+SPLAY_PROTOTYPE(slb_tree, slbcontainer, slb_node, slb_compare);
+SPLAY_GENERATE(slb_tree, slbcontainer, slb_node, slb_compare);
+
+uma_zone_t slb_zone;
+uma_zone_t slb_cache_zone;
+
+SYSINIT(slb_zone_init, SI_SUB_KMEM, SI_ORDER_ANY, slb_zone_init, NULL);
+
+int
+va_to_slb_entry(pmap_t pm, vm_offset_t va, struct slb *slb)
 {
-	uint64_t slbe, i;
+	struct slbcontainer cont, *found;
+	uint64_t esid;
 
-	slbe = (uintptr_t)va >> ADDR_SR_SHFT;
-	slbe = (slbe << SLBE_ESID_SHIFT) | SLBE_VALID;
+	esid = (uintptr_t)va >> ADDR_SR_SHFT;
+	slb->slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
 
-	for (i = 0; i < sizeof(pm->pm_slb)/sizeof(pm->pm_slb[0]); i++) {
-		if (pm->pm_slb[i].slbe == (slbe | i))
-			return &pm->pm_slb[i];
+	if (pm == kernel_pmap) {
+		/* Set kernel VSID to ESID | KERNEL_VSID_BIT */
+		slb->slbv = (esid | KERNEL_VSID_BIT) << SLBV_VSID_SHIFT;
+
+		/* Figure out if this is a large-page mapping */
+		if (hw_direct_map && va < VM_MIN_KERNEL_ADDRESS) {
+			/*
+			 * XXX: If we have set up a direct map, assumes
+			 * all physical memory is mapped with large pages.
+			 */
+			if (mem_valid(va, 0) == 0)
+				slb->slbv |= SLBV_L;
+		}
+			
+		return (0);
 	}
 
-	/* XXX: Have a long list for processes mapping more than 16 GB */
+	PMAP_LOCK_ASSERT(pm, MA_OWNED);
 
-	return (NULL);
+	cont.slb.slbe = slb->slbe;
+	found = SPLAY_FIND(slb_tree, &pm->pm_slbtree, &cont);
+
+	if (found == NULL)
+		return (-1);
+
+	slb->slbv = found->slb.slbv;
+	return (0);
 }
 
 uint64_t
 va_to_vsid(pmap_t pm, vm_offset_t va)
 {
-	struct slb *entry;
+	struct slb entry;
 
-	entry = va_to_slb_entry(pm, va);
+	/* Shortcut kernel case: VSID = ESID | KERNEL_VSID_BIT */
 
+	if (pm == kernel_pmap) 
+		return (((uintptr_t)va >> ADDR_SR_SHFT) | KERNEL_VSID_BIT);
 	/*
 	 * If there is no vsid for this VA, we need to add a new entry
 	 * to the PMAP's segment table.
-	 * 
-	 * XXX We assume (for now) that we are not mapping large pages.
 	 */
 
-	if (entry == NULL)
+	if (va_to_slb_entry(pm, va, &entry) != 0)
 		return (allocate_vsid(pm, (uintptr_t)va >> ADDR_SR_SHFT, 0));
 
-	return ((entry->slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT);
+	return ((entry.slbv & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT);
 }
 
 uint64_t
 allocate_vsid(pmap_t pm, uint64_t esid, int large)
 {
 	uint64_t vsid;
-	struct slb slb_entry;
+	struct slbcontainer *slb_entry, kern_entry;
+	struct slb *prespill;
 
-	vsid = moea64_get_unique_vsid();
+	prespill = NULL;
 
-	slb_entry.slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
-	slb_entry.slbv = vsid << SLBV_VSID_SHIFT;
+	if (pm == kernel_pmap) {
+		vsid = esid | KERNEL_VSID_BIT;
+		slb_entry = &kern_entry;
+		prespill = PCPU_GET(slb);
+	} else {
+		vsid = moea64_get_unique_vsid();
+		slb_entry = uma_zalloc(slb_zone, M_NOWAIT);
+
+		if (slb_entry == NULL)
+			panic("Could not allocate SLB mapping!");
+
+		prespill = pm->pm_slb;
+	}
+
+	slb_entry->slb.slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID;
+	slb_entry->slb.slbv = vsid << SLBV_VSID_SHIFT;
 
 	if (large)
-		slb_entry.slbv |= SLBV_L;
+		slb_entry->slb.slbv |= SLBV_L;
+
+	if (pm != kernel_pmap) {
+		PMAP_LOCK_ASSERT(pm, MA_OWNED);
+		SPLAY_INSERT(slb_tree, &pm->pm_slbtree, slb_entry);
+	}
 
 	/*
 	 * Someone probably wants this soon, and it may be a wired
 	 * SLB mapping, so pre-spill this entry.
 	 */
-	slb_insert(pm, &slb_entry, 1);
+	if (prespill != NULL)
+		slb_insert(pm, prespill, &slb_entry->slb);
 
 	return (vsid);
 }
 
-#ifdef NOTYET /* We don't have a back-up list. Spills are a bad idea. */
 /* Lock entries mapping kernel text and stacks */
 
 #define SLB_SPILLABLE(slbe) \
 	(((slbe & SLBE_ESID_MASK) < VM_MIN_KERNEL_ADDRESS && \
-	    (slbe & SLBE_ESID_MASK) > SEGMENT_LENGTH) || \
+	    (slbe & SLBE_ESID_MASK) > 16*SEGMENT_LENGTH) || \
 	    (slbe & SLBE_ESID_MASK) > VM_MAX_KERNEL_ADDRESS)
-#else
-#define SLB_SPILLABLE(slbe) 0
-#endif
-
 void
-slb_insert(pmap_t pm, struct slb *slb_entry, int prefer_empty)
+slb_insert(pmap_t pm, struct slb *slbcache, struct slb *slb_entry)
 {
 	uint64_t slbe, slbv;
 	int i, j, to_spill;
 
+	/*
+	 * Note: no locking is necessary in this function because all slbcaches
+	 * are either for the current thread or per-CPU.
+	 */
+
 	to_spill = -1;
 	slbv = slb_entry->slbv;
 	slbe = slb_entry->slbe;
@@ -124,31 +189,104 @@ slb_insert(pmap_t pm, struct slb *slb_en
 		if (pm == kernel_pmap && i == USER_SR)
 				continue;
 
-		if (!(pm->pm_slb[i].slbe & SLBE_VALID)) {
+		if (!(slbcache[i].slbe & SLBE_VALID)) {
 			to_spill = i;
 			break;
 		}
 
 		if (to_spill < 0 && (pm != kernel_pmap ||
-		    SLB_SPILLABLE(pm->pm_slb[i].slbe))) {
+		    SLB_SPILLABLE(slbcache[i].slbe)))
 			to_spill = i;
-			if (!prefer_empty)
-				break;
-		}
 	}
 
 	if (to_spill < 0)
 		panic("SLB spill on ESID %#lx, but no available candidates!\n",
 		   (slbe & SLBE_ESID_MASK) >> SLBE_ESID_SHIFT);
 
-	pm->pm_slb[to_spill].slbv = slbv;
-	pm->pm_slb[to_spill].slbe = slbe | (uint64_t)to_spill;
+	slbcache[to_spill].slbv = slbv;
+	slbcache[to_spill].slbe = slbe | (uint64_t)to_spill;
 
+	/* If it is for this CPU, put it in the SLB right away */
 	if (pm == kernel_pmap && pmap_bootstrapped) {
 		/* slbie not required */
 		__asm __volatile ("slbmte %0, %1" :: 
-		    "r"(kernel_pmap->pm_slb[to_spill].slbv),
-		    "r"(kernel_pmap->pm_slb[to_spill].slbe)); 
+		    "r"(slbcache[to_spill].slbv),
+		    "r"(slbcache[to_spill].slbe)); 
+	}
+}
+
+int
+vsid_to_esid(pmap_t pm, uint64_t vsid, uint64_t *esid)
+{
+	uint64_t slbv;
+	struct slbcontainer *entry;
+
+#ifdef INVARIANTS
+	if (pm == kernel_pmap)
+		panic("vsid_to_esid only works on user pmaps");
+
+	PMAP_LOCK_ASSERT(pm, MA_OWNED);
+#endif
+
+	slbv = vsid << SLBV_VSID_SHIFT;
+
+	SPLAY_FOREACH(entry, slb_tree, &pm->pm_slbtree) {
+		if (slbv == entry->slb.slbv) {
+			*esid = entry->slb.slbe >> SLBE_ESID_SHIFT;
+			return (0);
+		}
+	}
+
+	return (-1);
+}
+
+void
+free_vsids(pmap_t pm)
+{
+	struct slbcontainer *entry;
+
+	while (!SPLAY_EMPTY(&pm->pm_slbtree)) {
+		entry = SPLAY_MIN(slb_tree, &pm->pm_slbtree);
+
+		SPLAY_REMOVE(slb_tree, &pm->pm_slbtree, entry);
+
+		moea64_release_vsid(entry->slb.slbv >> SLBV_VSID_SHIFT);
+		uma_zfree(slb_zone, entry);
 	}
 }
 
+static int
+slb_compare(struct slbcontainer *a, struct slbcontainer *b)
+{
+	if (a->slb.slbe == b->slb.slbe)
+		return (0);
+	else if (a->slb.slbe < b->slb.slbe)
+		return (-1);
+	else
+		return (1);
+}
+
+static void
+slb_zone_init(void *dummy)
+{
+
+	slb_zone = uma_zcreate("SLB segment", sizeof(struct slbcontainer),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
+	slb_cache_zone = uma_zcreate("SLB cache", 64*sizeof(struct slb),
+	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
+}
+
+struct slb *
+slb_alloc_user_cache(void)
+{
+	struct slb *tmp;
+	tmp = uma_zalloc(slb_cache_zone, M_NOWAIT | M_ZERO);
+	bzero(tmp,64*sizeof(struct slb));
+	return (tmp);
+}
+
+void
+slb_free_user_cache(struct slb *slb)
+{
+	uma_zfree(slb_cache_zone, slb);
+}

Modified: projects/ppc64/sys/powerpc/aim/trap.c
==============================================================================
--- projects/ppc64/sys/powerpc/aim/trap.c	Fri Mar 19 16:09:57 2010	(r205336)
+++ projects/ppc64/sys/powerpc/aim/trap.c	Fri Mar 19 16:15:11 2010	(r205337)
@@ -88,8 +88,7 @@ static int	handle_onfault(struct trapfra
 static void	syscall(struct trapframe *frame);
 
 #ifdef __powerpc64__
-static void	handle_slb_spill(pmap_t pm, vm_offset_t addr);
-static uint64_t	slb_esid_lookup(pmap_t pm, uint64_t vsid);
+static int	handle_slb_spill(pmap_t pm, vm_offset_t addr);
 #endif
 
 int	setfault(faultbuf);		/* defined in locore.S */
@@ -187,9 +186,10 @@ trap(struct trapframe *frame)
 #ifdef __powerpc64__
 		case EXC_ISE:
 		case EXC_DSE:
-			handle_slb_spill(&p->p_vmspace->vm_pmap,
+			if (handle_slb_spill(&p->p_vmspace->vm_pmap,
 			    (type == EXC_ISE) ? frame->srr0 :
-			    frame->cpu.aim.dar);
+			    frame->cpu.aim.dar) != 0)
+				sig = SIGSEGV;
 			break;
 #endif
 		case EXC_DSI:
@@ -251,9 +251,10 @@ trap(struct trapframe *frame)
 #ifdef __powerpc64__
 		case EXC_ISE:
 		case EXC_DSE:
-			handle_slb_spill(kernel_pmap,
+			if (handle_slb_spill(kernel_pmap,
 			    (type == EXC_ISE) ? frame->srr0 :
-			    frame->cpu.aim.dar);
+			    frame->cpu.aim.dar) != 0)
+				panic("Fault handling kernel SLB miss");
 			return;
 #endif
 		case EXC_MCHK:
@@ -503,37 +504,40 @@ syscall(struct trapframe *frame)
 }
 
 #ifdef __powerpc64__
-static uint64_t
-slb_esid_lookup(pmap_t pm, uint64_t vsid)
+static int 
+handle_slb_spill(pmap_t pm, vm_offset_t addr)
 {
-	uint64_t esid;
-	int i;
+	struct slb slb_entry;
+	int error, i;
 
-	vsid <<= SLBV_VSID_SHIFT;
+	if (pm == kernel_pmap) {
+		error = va_to_slb_entry(pm, addr, &slb_entry);
+		if (error)
+			return (error);
 
-	for (i = 0; i < sizeof(pm->pm_slb)/sizeof(pm->pm_slb[0]); i++) {
-		if ((pm->pm_slb[i].slbv & SLBV_VSID_MASK) == vsid) {
-			esid = pm->pm_slb[i].slbe & SLBE_ESID_MASK;
-			esid >>= SLBE_ESID_SHIFT;
-			return (esid);
-		}
+		slb_insert(pm, PCPU_GET(slb), &slb_entry);
+		return (0);
 	}
 
-	return (0);
-}
-
-static void
-handle_slb_spill(pmap_t pm, vm_offset_t addr)
-{
-	struct slb *slb_entry;
-
 	PMAP_LOCK(pm);
-	slb_entry = va_to_slb_entry(pm, addr);
-	if (slb_entry == NULL)
-		(void)va_to_vsid(pm, addr);
-	else
-		slb_insert(pm, slb_entry, 0 /* Don't prefer empty */);
+	error = va_to_slb_entry(pm, addr, &slb_entry);
+	if (error != 0)
+		(void)allocate_vsid(pm, (uintptr_t)addr >> ADDR_SR_SHFT, 0);
+	else {
+		/*
+		 * Check that another CPU has not already mapped this.
+		 * XXX: Per-thread SLB caches would be better.
+		 */
+		for (i = 0; i < 64; i++)
+			if (pm->pm_slb[i].slbe == (slb_entry.slbe | i))
+				break;
+
+		if (i == 64)
+			slb_insert(pm, pm->pm_slb, &slb_entry);
+	}
 	PMAP_UNLOCK(pm);
+
+	return (0);
 }
 #endif
 
@@ -568,15 +572,22 @@ trap_pfault(struct trapframe *frame, int
 			if (p->p_vmspace == NULL)
 				return (SIGSEGV);
 
+			map = &p->p_vmspace->vm_map;
+
 			#ifdef __powerpc64__
 			user_sr = 0;
 			__asm ("slbmfev %0, %1"
 			    : "=r"(user_sr)
 			    : "r"(USER_SR));
 
-			user_sr = (user_sr & SLBV_VSID_MASK) >> SLBV_VSID_SHIFT;
-			user_sr = slb_esid_lookup(&p->p_vmspace->vm_pmap, user_sr);
+			PMAP_LOCK(&p->p_vmspace->vm_pmap);
+			user_sr >>= SLBV_VSID_SHIFT;
+			rv = vsid_to_esid(&p->p_vmspace->vm_pmap, user_sr,
+			    &user_sr);
+			PMAP_UNLOCK(&p->p_vmspace->vm_pmap);
 
+			if (rv != 0) 
+				return (SIGSEGV);
 			#else
 			__asm ("mfsr %0, %1"
 			    : "=r"(user_sr)
@@ -584,7 +595,6 @@ trap_pfault(struct trapframe *frame, int
 			#endif
 			eva &= ADDR_PIDX | ADDR_POFF;
 			eva |= user_sr << ADDR_SR_SHFT;
-			map = &p->p_vmspace->vm_map;
 		} else {
 			map = kernel_map;
 		}

Modified: projects/ppc64/sys/powerpc/aim/trap_subr64.S
==============================================================================
--- projects/ppc64/sys/powerpc/aim/trap_subr64.S	Fri Mar 19 16:09:57 2010	(r205336)
+++ projects/ppc64/sys/powerpc/aim/trap_subr64.S	Fri Mar 19 16:15:11 2010	(r205337)
@@ -46,7 +46,7 @@
 /*
  * Restore SRs for a pmap
  *
- * Requires that r28-r31 be scratch, with r28 initialized to the pmap
+ * Requires that r28-r31 be scratch, with r28 initialized to the SLB cache
  */
 
 restoresrs: 
@@ -54,14 +54,15 @@ restoresrs: 
 
 	slbia
 	slbmfee	%r31,%r29		
+	clrrdi	%r31,%r31,28
 	slbie	%r31
 instslb:
-	ld	%r31, PM_SLB+8(%r28);	/* Load SLBE */
+	ld	%r31, 8(%r28);		/* Load SLBE */
 
 	cmpli	0, %r31, 0;		/* If SLBE is not valid, get the next */
 	beq	nslb;
 
-	ld	%r30, PM_SLB(%r28)	/* Load SLBV */
+	ld	%r30, 0(%r28)		/* Load SLBV */
 	slbmte	%r30, %r31;		/* Install SLB entry */
 
 nslb:
@@ -77,15 +78,15 @@ nslb:
  */
 #define RESTORE_USER_SRS() \
 	GET_CPUINFO(%r28); \
-	ld	%r28,PC_CURPMAP(%r28); \
+	ld	%r28,PC_USERSLB(%r28); \
 	bl	restoresrs;
 
 /*
  * Kernel SRs are loaded directly from kernel_pmap_
  */
 #define RESTORE_KERN_SRS() \
-	lis	%r28,CNAME(kernel_pmap_store)@ha; \
-	addi	%r28,%r28,CNAME(kernel_pmap_store)@l; \
+	GET_CPUINFO(%r28); \
+	addi	%r28,%r28,PC_KERNSLB; \
 	bl	restoresrs;
 
 /*

Modified: projects/ppc64/sys/powerpc/include/pcpu.h
==============================================================================
--- projects/ppc64/sys/powerpc/include/pcpu.h	Fri Mar 19 16:09:57 2010	(r205336)
+++ projects/ppc64/sys/powerpc/include/pcpu.h	Fri Mar 19 16:15:11 2010	(r205337)
@@ -31,6 +31,7 @@
 #define	_MACHINE_PCPU_H_
 
 #include <machine/cpufunc.h>
+#include <machine/slb.h>
 #include <machine/tlb.h>
 
 struct pmap;
@@ -50,7 +51,17 @@ struct pmap;
 	register_t	pc_disisave[CPUSAVE_LEN];			\
 	register_t	pc_dbsave[CPUSAVE_LEN];
 
-#define PCPU_MD_AIM_FIELDS
+#define PCPU_MD_AIM32_FIELDS
+
+#define PCPU_MD_AIM64_FIELDS						\
+	struct slb	pc_slb[64];					\
+	struct slb	*pc_userslb;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-projects mailing list