PERFORCE change 95975 for review
Kip Macy
kmacy at FreeBSD.org
Mon Apr 24 02:07:21 UTC 2006
http://perforce.freebsd.org/chv.cgi?CH=95975
Change 95975 by kmacy at kmacy_storage:sun4v_rwbuf on 2006/04/24 02:07:00
massive fixes for various issues including OFW not registering
allocated pages, IPI acks etc.
Affected files ...
.. //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#36 edit
Differences ...
==== //depot/projects/kmacy_sun4v/src/sys/sun4v/sun4v/pmap.c#36 (text+ko) ====
@@ -31,6 +31,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/kdb.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/msgbuf.h>
@@ -68,6 +69,7 @@
#include <machine/tte.h>
#include <machine/tte_hash.h>
#include <machine/pcb.h>
+#include <machine/pstate.h>
#include <machine/tsb.h>
#include <machine/hypervisor_api.h>
@@ -94,6 +96,7 @@
* Map of physical memory reagions.
*/
vm_paddr_t phys_avail[128];
+vm_paddr_t phys_avail_tmp[128];
static struct ofw_mem_region mra[128];
static struct ofw_map translations[128];
static int translations_size;
@@ -355,15 +358,17 @@
pmap_t pmap, oldpmap;
DPRINTF("activating pmap\n");
- critical_enter();
+ spinlock_enter();
pmap = vmspace_pmap(td->td_proc->p_vmspace);
oldpmap = PCPU_GET(curpmap);
#if defined(SMP)
atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
- pmap->pm_active = PCPU_GET(cpumask);
+ pmap->pm_tlbactive = pmap->pm_active = PCPU_GET(cpumask);
+
#else
oldpmap->pm_active &= ~1;
pmap->pm_active |= 1;
+ pmap->pm_tlbactive |= 1;
#endif
#if 0
tsb_clear(&pmap->pm_tsb);
@@ -377,7 +382,7 @@
hv_set_ctxnon0(1, pmap->pm_tsb_ra);
stxa(MMU_CID_S, ASI_MMU_CONTEXTID, pmap->pm_context);
membar(Sync);
- critical_exit();
+ spinlock_exit();
}
vm_offset_t
@@ -399,7 +404,7 @@
ihandle_t pmem, vmem;
int i, sz, j;
uint64_t tsb_8k_size, tsb_4m_size, error;
-
+
/*
* Find out what physical memory is available from the prom and
* initialize the phys_avail array. This must be done before
@@ -426,7 +431,9 @@
physmem = ctob(physmem);
for (i = 0, j = 0; i < sz; i++, j += 2) {
- CTR2(KTR_PMAP, "start=%#lx size=%#lx", mra[i].mr_start,
+ CTR2(KTR_PMAP, "start=%#lx size=%#lx\n", mra[i].mr_start,
+ mra[i].mr_size);
+ DPRINTF("start=%#lx size=%#lx\n", mra[i].mr_start,
mra[i].mr_size);
if (physmem != 0 && btoc(physsz + mra[i].mr_size) >= physmem) {
if (btoc(physsz) < physmem) {
@@ -443,6 +450,71 @@
}
physmem = btoc(physsz);
+ if ((vmem = OF_finddevice("/virtual-memory")) == -1)
+ panic("pmap_bootstrap: finddevice /virtual-memory");
+ if ((sz = OF_getproplen(vmem, "translations")) == -1)
+ panic("pmap_bootstrap: getproplen translations");
+ if (sizeof(translations) < sz)
+ panic("pmap_bootstrap: translations too small");
+ bzero(translations, sz);
+ if (OF_getprop(vmem, "translations", translations, sz) == -1)
+ panic("pmap_bootstrap: getprop /virtual-memory/translations");
+ sz /= sizeof(*translations);
+ translations_size = sz;
+ CTR0(KTR_PMAP, "pmap_bootstrap: translations");
+ qsort(translations, sz, sizeof (*translations), om_cmp);
+ for (i = 0; i < sz; i++) {
+ int j, k;
+ DPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n",
+ translations[i].om_size, translations[i].om_start,
+ translations[i].om_tte);
+ if (translations[i].om_size == PAGE_SIZE_4M &&
+ (translations[i].om_start == KERNBASE ||
+ translations[i].om_start == KERNBASE + PAGE_SIZE_4M)) {
+ DPRINTF("mapping permanent translation\n");
+ pa = TTE_GET_PA(translations[i].om_tte);
+ error = hv_mmu_map_perm_addr((char *)translations[i].om_start,
+ KCONTEXT, pa | TTE_KERNEL | VTD_4M, MAP_ITLB | MAP_DTLB);
+ if (error != H_EOK)
+ panic("map_perm_addr returned error=%ld", error);
+
+ nucleus_mappings[permanent_mappings++] = pa;
+ nucleus_memory += PAGE_SIZE_4M;
+#ifdef SMP
+ mp_add_nucleus_mapping(translations[i].om_start,
+ pa | TTE_KERNEL | VTD_4M);
+#endif
+ for (j = 0, k = 0; phys_avail[j + 2] != 0; j += 2, k += 2) {
+
+ if (pa == phys_avail[j]) {
+ phys_avail_tmp[k] = phys_avail[j] + PAGE_SIZE_4M;
+ phys_avail_tmp[k + 1] = phys_avail[j + 1];
+ break;
+ } else if (phys_avail[j] < pa && pa + PAGE_SIZE_4M == phys_avail[j + 1]) {
+ phys_avail_tmp[k] = phys_avail[j];
+ phys_avail_tmp[k + 1] = pa;
+ } else if (phys_avail[j] < pa && pa + PAGE_SIZE_4M < phys_avail[j + 1]) {
+ phys_avail_tmp[k] = phys_avail[j];
+ phys_avail_tmp[k + 1] = pa;
+ phys_avail_tmp[k + 2] = pa + PAGE_SIZE_4M;
+ phys_avail_tmp[k + 3] = phys_avail[j + 1];
+ k += 2;
+ } else {
+ phys_avail_tmp[k] = phys_avail[j];
+ phys_avail_tmp[k + 1] = phys_avail[j + 1];
+ }
+ }
+
+ for (j = 0; phys_avail_tmp[j + 2] != 0; phys_avail[j] = phys_avail_tmp[j],
+ phys_avail[j + 1] = phys_avail_tmp[j + 1], j += 2)
+ ;
+#if 0
+ for (i = 0; phys_avail_tmp[i + 1] != 0; i++)
+ phys_avail[i] = phys_avail_tmp[i];
+#endif
+ }
+ }
+
/*
* Calculate the size of kernel virtual memory, and the size and mask
* for the kernel tsb.
@@ -466,7 +538,7 @@
if (kernel_hash_pa & PAGE_MASK_4M)
panic("pmap_bootstrap: hashtable pa unaligned\n");
pmap_scrub_pages(kernel_hash_pa, PAGE_SIZE_4M);
-
+ printf("allocated hash\n");
/*
* Set up TSB descriptors for the hypervisor
*
@@ -556,26 +628,14 @@
/*
* Calculate the last available physical address.
*/
- for (i = 0; phys_avail[i + 2] != 0; i += 2)
- ;
+ for (i = 0; phys_avail[i + 2] != 0; i += 2)
+ DPRINTF("phys_avail[%d]=0x%lx phys_avail[%d]=0x%lx\n",
+ i, phys_avail[i], i+1, phys_avail[i+1]);
Maxmem = sparc64_btop(phys_avail[i + 1]);
/*
* Add the prom mappings to the kernel tsb.
*/
- if ((vmem = OF_finddevice("/virtual-memory")) == -1)
- panic("pmap_bootstrap: finddevice /virtual-memory");
- if ((sz = OF_getproplen(vmem, "translations")) == -1)
- panic("pmap_bootstrap: getproplen translations");
- if (sizeof(translations) < sz)
- panic("pmap_bootstrap: translations too small");
- bzero(translations, sz);
- if (OF_getprop(vmem, "translations", translations, sz) == -1)
- panic("pmap_bootstrap: getprop /virtual-memory/translations");
- sz /= sizeof(*translations);
- translations_size = sz;
- CTR0(KTR_PMAP, "pmap_bootstrap: translations");
- qsort(translations, sz, sizeof (*translations), om_cmp);
for (i = 0; i < sz; i++) {
CTR3(KTR_PMAP,
"translation: start=%#lx size=%#lx tte=%#lx",
@@ -584,37 +644,18 @@
DPRINTF("om_size=%ld om_start=%lx om_tte=%lx\n",
translations[i].om_size, translations[i].om_start,
translations[i].om_tte);
-
- if (translations[i].om_size == PAGE_SIZE_4M &&
- (translations[i].om_start == KERNBASE ||
- translations[i].om_start == KERNBASE + PAGE_SIZE_4M)) {
- DPRINTF("mapping permanent translation\n");
- pa = TTE_GET_PA(translations[i].om_tte);
- error = hv_mmu_map_perm_addr((char *)translations[i].om_start,
- KCONTEXT, pa | TTE_KERNEL | VTD_4M, MAP_ITLB | MAP_DTLB);
- nucleus_mappings[permanent_mappings++] = pa;
- nucleus_memory += PAGE_SIZE_4M;
-
-#ifdef SMP
- mp_add_nucleus_mapping(translations[i].om_start,
- pa | TTE_KERNEL | VTD_4M);
-#endif
-
- if (error != H_EOK)
- panic("map_perm_addr returned error=%ld", error);
+
+ if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
+ translations[i].om_start > VM_MAX_PROM_ADDRESS)
continue;
- } else if (translations[i].om_start < VM_MIN_PROM_ADDRESS ||
- translations[i].om_start > VM_MAX_PROM_ADDRESS) {
- continue;
- } else {
- for (off = 0; off < translations[i].om_size;
- off += PAGE_SIZE) {
- va = translations[i].om_start + off;
- pa = TTE_GET_PA(translations[i].om_tte) + off;
- tsb_assert_invalid(&kernel_td[TSB8K_INDEX], va);
- tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa |
- TTE_KERNEL | VTD_8K, 0);
- }
+
+ for (off = 0; off < translations[i].om_size;
+ off += PAGE_SIZE) {
+ va = translations[i].om_start + off;
+ pa = TTE_GET_PA(translations[i].om_tte) + off;
+ tsb_assert_invalid(&kernel_td[TSB8K_INDEX], va);
+ tsb_set_tte(&kernel_td[TSB8K_INDEX], va, pa |
+ TTE_KERNEL | VTD_8K, 0);
}
}
@@ -924,6 +965,7 @@
pmap_invalidate_page(pmap, va);
} else {
tte_hash_insert(pmap->pm_hash, va, tte_data);
+ membar(Sync);
}
}
@@ -974,7 +1016,6 @@
tte_data |= VTD_MANAGED;
tte_hash_insert(pmap->pm_hash, va, tte_data | TTE_MINFLAGS);
-
PMAP_UNLOCK(pmap);
return (0);
@@ -1012,14 +1053,14 @@
m = NULL;
vm_page_lock_queues();
PMAP_LOCK(pmap);
-
+ sched_pin();
tte_data = tte_hash_lookup(pmap->pm_hash, va);
if (tte_data != 0 &&
((*tte_data & VTD_SW_W) || (prot & VM_PROT_WRITE) == 0)) {
m = PHYS_TO_VM_PAGE(TTE_GET_PA(*tte_data));
vm_page_hold(m);
}
-
+ sched_unpin();
vm_page_unlock_queues();
PMAP_UNLOCK(pmap);
@@ -1081,13 +1122,21 @@
pmap_ipi(pmap_t pmap, char *func, uint64_t arg1, uint64_t arg2)
{
- int active;
+ int i, active, cpu_count;
+ u_int cpus;
cpumask_t cpumask;
+ uint16_t *cpulist;
+ uint32_t ackmask, ackexpect;
+ int inext;
if (!smp_started)
return;
-
+
cpumask = PCPU_GET(cpumask);
+ cpulist = PCPU_GET(cpulist);
+
+ if (rdpr(pil) != 14)
+ panic("pil %ld != 14", rdpr(pil));
#ifndef CPUMASK_NOT_BEING_ERRONEOUSLY_CHANGED
/* by definition cpumask should have curcpu's bit set */
@@ -1096,6 +1145,14 @@
cpumask, (1 << curcpu));
/* tlbactive should be set if we're using a pmap */
+ if ((cpumask & pmap->pm_active) == 0 && curthread->td_proc && curthread->td_proc->p_pid != 1) {
+ if (curthread->td_proc)
+ printf("%s not active\n",
+ curthread->td_proc->p_comm);
+ printf("td->pcb->pcb_kstack=0x%lx\n", curthread->td_pcb->pcb_kstack);
+ panic("cpumask(0x%x) & active (0x%x) == 0 pid == %d\n",
+ cpumask, pmap->pm_active, curthread->td_proc->p_pid);
+ }
if ((cpumask & pmap->pm_tlbactive) == 0)
panic("cpumask(0x%x) & tlbactive (0x%x) == 0\n",
cpumask, pmap->pm_tlbactive);
@@ -1103,73 +1160,137 @@
if (cpumask == pmap->pm_tlbactive)
return;
-
+#if 0
if (pmap != kernel_pmap)
- active = pmap->pm_tlbactive & ~cpumask;
+ active = (pmap->pm_tlbactive & ~cpumask);
else
+#endif
active = PCPU_GET(other_cpus);
+
+#if 1
- cpu_ipi_selected(active, (uint64_t)func, (uint64_t)arg1, (uint64_t)arg2);
+ for (cpu_count = 0, i = 0, ackexpect = 0, cpus = active; i < 32 && cpus;) {
+
+ if (!(cpus & 0x1) /*|| (i & ~0x3) == (curcpu & ~0x3) */ )
+ {
+ cpus = cpus >> 1;
+ i++;
+ continue;
+ }
+
+ cpulist[cpu_count] = (uint16_t)i;
+ cpu_count++;
+ ackexpect |= (1 << i);
+#if 0
+ inext = ((i & ~0x3) + 4);
+#else
+ inext = i++;
+#endif
+ cpus = (cpus >> (inext - i));
+ i = inext;
+ }
+#else
+ inext = i = cpus = 0;
+ cpulist[0] = curcpu ? 0 : 1;
+ cpu_count = 1;
+ ackexpect = curcpu ? 1 : 2;
+#endif
+
+ if (cpu_count == 0)
+ return;
+
+ ackmask = 0;
+ cpu_ipi_selected(cpu_count, cpulist, (uint64_t)func, (uint64_t)arg1,
+ (uint64_t)arg2, (uint64_t *)&ackmask);
+
+ while (ackmask != ackexpect) {
+ DELAY(1);
+ i++;
+ if (i > 1000000)
+ panic(" ackmask=0x%x active=0x%x\n", ackmask, ackexpect);
+ }
}
#endif
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
+ spinlock_enter();
tsb_clear_tte(&pmap->pm_tsb, va);
DPRINTF("pmap_invalidate_page(va=0x%lx)\n", va);
invlpg(va, pmap->pm_context);
#ifdef SMP
+
pmap_ipi(pmap, (void *)tl_invlpg, (uint64_t)va, (uint64_t)pmap->pm_context);
#endif
+ spinlock_exit();
+
+
+
}
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t tva;
+#ifdef SMP
char *func;
+#endif
+ spinlock_enter();
+
if (pmap != kernel_pmap)
DPRINTF("pmap_invalidate_range(sva=%lx, eva=%lx)\n", sva, eva);
if ((((eva - sva) >> PAGE_SHIFT) < MAX_TSB_CLEARS) ||
(pmap->pm_context == 0)) {
tsb_clear_range(&pmap->pm_tsb, sva, eva);
- func = tl_invltlb;
} else {
tsb_clear(&pmap->pm_tsb);
- func = tl_invlctx;
}
/* XXX */
invltlb();
+
if ((((eva - sva) >> PAGE_SHIFT) < MAX_INVALIDATES)) {
for (tva = sva; tva < eva; tva += PAGE_SIZE_8K)
invlpg(tva, pmap->pm_context);
- } else if (pmap->pm_context) {
+ } else if (pmap->pm_context)
invlctx(pmap->pm_context);
- } else
+ else
invltlb();
+
#ifdef SMP
- pmap_ipi(pmap, (void *)func, 0, 0);
- pmap->pm_tlbactive = pmap->pm_active;
+ if (pmap == kernel_pmap)
+ func = tl_invltlb;
+ else
+ func = tl_invlctx;
+
+
+ pmap_ipi(pmap, (void *)func, pmap->pm_context, 0);
+ if (pmap != kernel_pmap)
+ pmap->pm_tlbactive = pmap->pm_active;
#endif
-
+ spinlock_exit();
}
void
pmap_invalidate_all(pmap_t pmap)
{
char *func;
+
+ spinlock_enter();
+
+ if (pmap == kernel_pmap)
+ panic("invalidate_all called on kernel_pmap");
+
tsb_clear(&pmap->pm_tsb);
-
if (pmap->pm_context) {
invlctx(pmap->pm_context);
func = tl_invlctx;
@@ -1180,9 +1301,11 @@
#ifdef SMP
pmap_ipi(pmap, func, pmap->pm_context, 0);
- pmap->pm_tlbactive = pmap->pm_active;
+ if (pmap != kernel_pmap)
+ pmap->pm_tlbactive = pmap->pm_active;
#endif
+ spinlock_exit();
}
boolean_t
@@ -1203,7 +1326,7 @@
/*
* Map a wired page into kernel virtual address space.
*/
-void
+static void
pmap_kenter(vm_offset_t va, vm_paddr_t pa)
{
tte_hash_insert(kernel_pmap->pm_hash, va, pa | TTE_KERNEL | VTD_8K);
@@ -1235,7 +1358,7 @@
return pa;
}
-void
+static void
pmap_kremove(vm_offset_t va)
{
tte_hash_delete(kernel_pmap->pm_hash, va);
@@ -1352,7 +1475,8 @@
pmap_pinit0(pmap_t pmap)
{
PMAP_LOCK_INIT(pmap);
- pmap->pm_active = 0;
+ pmap->pm_active = ~0;
+ pmap->pm_tlbactive = ~0;
pmap->pm_context = 0;
PCPU_SET(curpmap, pmap);
TAILQ_INIT(&pmap->pm_pvlist);
@@ -1375,6 +1499,7 @@
pmap->pm_hash = tte_hash_create(pmap->pm_context, &pmap->pm_hashscratch);
pmap->pm_tsb_ra = tsb_init(&pmap->pm_tsb, &pmap->pm_tsbscratch);
pmap->pm_active = 0;
+ pmap->pm_tlbactive = 0;
TAILQ_INIT(&pmap->pm_pvlist);
PMAP_LOCK_INIT(pmap);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1520,7 +1645,6 @@
sched_pin();
PMAP_LOCK(pmap);
for (tva = start; tva < end; tva += PAGE_SIZE) {
-
if ((tte = tte_hash_lookup(pmap->pm_hash, tva)) == NULL)
continue;
pmap_remove_tte(pmap, tte, tva);
@@ -1530,8 +1654,12 @@
}
sched_unpin();
vm_page_unlock_queues();
- if (invlva)
- pmap_invalidate_all(pmap);
+ if (invlva) {
+ if (pmap == kernel_pmap)
+ pmap_invalidate_range(pmap, start, end);
+ else
+ pmap_invalidate_all(pmap);
+ }
PMAP_UNLOCK(pmap);
}
@@ -1621,7 +1749,7 @@
void
-pmap_remove_pages(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+pmap_remove_pages(pmap_t pmap)
{
vm_page_t m;
@@ -1632,12 +1760,6 @@
PMAP_LOCK(pmap);
sched_pin();
for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
-#if 0
- if (pv->pv_va >= eva || pv->pv_va < sva) {
- npv = TAILQ_NEXT(pv, pv_plist);
- continue;
- }
-#endif
tte = tte_hash_lookup(pmap->pm_hash, pv->pv_va);
tte_data = tte ? *tte : 0;
@@ -1658,7 +1780,6 @@
m = PHYS_TO_VM_PAGE(TTE_GET_PA(tte_data));
pmap->pm_stats.resident_count--;
- tte_hash_delete(pmap->pm_hash, pv->pv_va);
if (tte_data & VTD_W) {
vm_page_dirty(m);
@@ -1675,6 +1796,8 @@
free_pv_entry(pv);
}
+ tte_hash_delete_all(pmap->pm_hash);
+
sched_unpin();
pmap_invalidate_all(pmap);
PMAP_UNLOCK(pmap);
@@ -1744,12 +1867,7 @@
void
pmap_zero_page(vm_page_t m)
{
- uint64_t bytes_zeroed, error;
-
- error = hv_mem_scrub(VM_PAGE_TO_PHYS(m), PAGE_SIZE, &bytes_zeroed);
- if (error || bytes_zeroed != PAGE_SIZE)
- panic("hv_mem_scrub failed error: %ld - bytes_zeroed: %ld\n",
- error, bytes_zeroed);
+ bzero((char *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)), PAGE_SIZE);
}
void
@@ -1768,12 +1886,21 @@
void
pmap_zero_page_idle(vm_page_t m)
{
- uint64_t bytes_zeroed, error;
- error = hv_mem_scrub(VM_PAGE_TO_PHYS(m), PAGE_SIZE, &bytes_zeroed);
- if (error || bytes_zeroed != PAGE_SIZE)
- panic("hv_mem_scrub failed error: %ld - bytes_zeroed: %ld\n",
- error, bytes_zeroed);
+ int i;
+ for (i = 0; i < (nucleus_memory >> PAGE_SHIFT_4M); i++) {
+ if (VM_PAGE_TO_PHYS(m) >= nucleus_mappings[i] &&
+ VM_PAGE_TO_PHYS(m) < nucleus_mappings[i] + PAGE_SIZE_4M)
+ panic("zeroing nucleus");
+ if (pmap_kextract(TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m))) >=
+ nucleus_mappings[i] &&
+ pmap_kextract(TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m))) <
+ nucleus_mappings[i] + PAGE_SIZE_4M)
+ panic("zeroing nucleus in direct area");
+ }
+
+
+ bzero((char *)TLB_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)), PAGE_SIZE);
}
More information about the p4-projects
mailing list