svn commit: r258920 - projects/amd64_xen_pv/sys/amd64/xen
Cherry G. Mathew
cherry at FreeBSD.org
Wed Dec 4 12:28:57 UTC 2013
Author: cherry
Date: Wed Dec 4 12:28:56 2013
New Revision: 258920
URL: http://svnweb.freebsd.org/changeset/base/258920
Log:
This commit enables userland to load and start executing, as it
did before recent vm/ shakeups. fork(2) is still wip.
Changes:
pmap.c:
i) Initial split out of pmap_enter() variants:
- pmap_enter_locked()
- pmap_enter_quick()
- pmap_enter_object()
ii) Improve locking behaviour - make it more consistent with native pmap
iii) Fix two api change related bugs. (kva_alloc() and friends)
iv) Miscellaneous further pmap functions to make boot progress.
pmap_pv.[hc]:
i) Modify the api to export locking behaviour control.
ii) Fine tune pv chunk garbage collecting.
iii) Remove stray debug/instrumentation code.
"- if (pmap == kernel_pmap) return true;"
Approved by: gibbs(implicit)
Modified:
projects/amd64_xen_pv/sys/amd64/xen/pmap.c
projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.c
projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.h
Modified: projects/amd64_xen_pv/sys/amd64/xen/pmap.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/pmap.c Wed Dec 4 12:07:46 2013 (r258919)
+++ projects/amd64_xen_pv/sys/amd64/xen/pmap.c Wed Dec 4 12:28:56 2013 (r258920)
@@ -646,6 +646,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
kernel_pmap->pm_root.rt_root = 0;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
pmap_pv_init();
+ pmap_pv_pmap_init(kernel_pmap);
tsz = mmu_map_t_size();
@@ -756,6 +757,7 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_cr3 = pmap_kextract_ma((vm_offset_t) KPML4phys);
pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
+ CPU_ZERO(&pmap->pm_save);
PCPU_SET(curpmap, pmap);
pmap_pv_pmap_init(pmap);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1003,27 +1005,16 @@ pmap_qremove(vm_offset_t sva, int count)
// XXX: TODO: pmap_invalidate_range(kernel_pmap, sva, va);
}
-/*
- * Insert the given physical page (p) at
- * the specified virtual address (v) in the
- * target physical map with the protection requested.
- *
- * If specified, the page will be wired down, meaning
- * that the related pte can not be reclaimed.
- *
- * NB: This is the only routine which MAY NOT lazy-evaluate
- * or lose information. That is, this routine must actually
- * insert this page into the given map NOW.
- */
+#include <ddb/ddb.h>
-void
-pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
+static void
+pmap_enter_locked(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
vm_prot_t prot, boolean_t wired)
{
pt_entry_t *pte;
pt_entry_t newpte, origpte;
vm_paddr_t opa, pa;
- vm_page_t mpte, om;
+ vm_page_t om;
va = trunc_page(va);
@@ -1031,12 +1022,12 @@ pmap_enter(pmap_t pmap, vm_offset_t va,
KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)",
va));
- if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
- VM_OBJECT_ASSERT_WLOCKED(m->object);
KASSERT(VM_PAGE_TO_PHYS(m) != 0,
("VM_PAGE_TO_PHYS(m) == 0x%lx\n", VM_PAGE_TO_PHYS(m)));
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
pa = VM_PAGE_TO_PHYS(m);
newpte = (pt_entry_t)(xpmap_ptom(pa) | PG_A | PG_V | PG_U);
if ((access & VM_PROT_WRITE) != 0)
@@ -1052,10 +1043,6 @@ pmap_enter(pmap_t pmap, vm_offset_t va,
/* newpte |= pmap_cache_bits(m->md.pat_mode, 0); XXX */
- mpte = NULL;
-
- PMAP_LOCK(pmap);
-
/*
* In the case that a page table page is not
* resident, we are creating it here.
@@ -1114,13 +1101,16 @@ pmap_enter(pmap_t pmap, vm_offset_t va,
pmap_resident_count_inc(pmap, 1);
}
-
/*
* Enter on the PV list if part of our managed memory.
*/
if ((m->oflags & VPO_UNMANAGED) == 0) {
+ bool pvunmanaged = false;
newpte |= PG_MANAGED;
- pmap_put_pv_entry(pmap, va, m);
+ pvunmanaged = pmap_put_pv_entry(pmap, va, m);
+
+ KASSERT(pvunmanaged == true,
+ ("VPO_UNMANAGED flag set on existing pv entry for m == %p\n", m));
if ((newpte & PG_RW) != 0)
vm_page_aflag_set(m, PGA_WRITEABLE);
@@ -1195,6 +1185,44 @@ validate:
unchanged:
pmap_vtopte_release(pmap, va, &tptr);
+
+}
+
+/*
+ * Insert the given physical page (p) at
+ * the specified virtual address (v) in the
+ * target physical map with the protection requested.
+ *
+ * If specified, the page will be wired down, meaning
+ * that the related pte can not be reclaimed.
+ *
+ * NB: This is the only routine which MAY NOT lazy-evaluate
+ * or lose information. That is, this routine must actually
+ * insert this page into the given map NOW.
+ */
+
+void
+pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
+ vm_prot_t prot, boolean_t wired)
+{
+ va = trunc_page(va);
+
+ KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
+ KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
+ ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)",
+ va));
+
+ if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
+ VM_OBJECT_ASSERT_WLOCKED(m->object);
+
+ KASSERT(VM_PAGE_TO_PHYS(m) != 0,
+ ("VM_PAGE_TO_PHYS(m) == 0x%lx\n", VM_PAGE_TO_PHYS(m)));
+
+
+ PMAP_LOCK(pmap);
+
+ pmap_enter_locked(pmap, va, access, m, prot, wired);
+
PMAP_UNLOCK(pmap);
}
@@ -1219,16 +1247,20 @@ pmap_enter_object(pmap_t pmap, vm_offset
vm_pindex_t diff, psize;
vm_page_t m;
- VM_OBJECT_ASSERT_WLOCKED(m_start->object);
+ VM_OBJECT_ASSERT_LOCKED(m_start->object);
psize = atop(end - start);
m = m_start;
+ PMAP_LOCK(pmap);
+
while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
va = start + ptoa(diff);
- pmap_enter(pmap, va, prot, m, prot, false);
+ pmap_enter_locked(pmap, va, prot, m, prot, false);
m = TAILQ_NEXT(m, listq);
}
+
+ PMAP_UNLOCK(pmap);
}
void
@@ -1237,8 +1269,22 @@ pmap_enter_quick(pmap_t pmap, vm_offset_
/* RO and unwired */
prot = (prot & ~VM_PROT_WRITE) | VM_PROT_READ;
+ va = trunc_page(va);
+
+ KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
+ KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS,
+ ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)",
+ va));
+
+ KASSERT(VM_PAGE_TO_PHYS(m) != 0,
+ ("VM_PAGE_TO_PHYS(m) == 0x%lx\n", VM_PAGE_TO_PHYS(m)));
+
+ PMAP_LOCK(pmap);
+
/* XXX: do we care about "speed" ? */
- pmap_enter(pmap, va, prot, m, prot, false);
+ pmap_enter_locked(pmap, va, prot, m, prot, false);
+
+ PMAP_UNLOCK(pmap);
}
void *
@@ -1493,7 +1539,7 @@ pmap_remove_all(vm_page_t m)
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
- pmap_pv_iterate(m, pv_remove);
+ pmap_pv_iterate(m, pv_remove, PV_RW_ITERATE);
/* free pv entry from all pmaps */
pmap_pv_page_unmap(m);
@@ -1894,6 +1940,9 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pm
if (dst_addr != src_addr)
return;
+ if (dst_pmap->pm_type != src_pmap->pm_type)
+ return;
+
if (dst_pmap < src_pmap) {
PMAP_LOCK(dst_pmap);
PMAP_LOCK(src_pmap);
@@ -2000,7 +2049,6 @@ out:
mmu_map_t_fini(dtptr);
mmu_map_t_fini(stptr);
-
PMAP_UNLOCK(src_pmap);
PMAP_UNLOCK(dst_pmap);
}
@@ -2014,7 +2062,7 @@ pmap_copy_page(vm_page_t msrc, vm_page_t
KASSERT(msrc != NULL && mdst != NULL,
("Invalid source or destination page!"));
- va_src = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_ZERO);
+ va_src = kva_alloc(PAGE_SIZE * 2);
va_dst = va_src + PAGE_SIZE;
KASSERT(va_src != 0,
@@ -2031,7 +2079,7 @@ pmap_copy_page(vm_page_t msrc, vm_page_t
pmap_kremove(va_src);
pmap_kremove(va_dst);
- kmem_free(kernel_arena, va_src, PAGE_SIZE * 2);
+ kva_free(va_src, PAGE_SIZE * 2);
}
int unmapped_buf_allowed = 1;
@@ -2072,7 +2120,7 @@ pmap_copy_pages(vm_page_t ma[], vm_offse
pmap_kremove(a_pg);
pmap_kremove(b_pg);
- kmem_free(kernel_arena, a_pg, PAGE_SIZE * 2);
+ kva_free(a_pg, PAGE_SIZE * 2);
}
void
@@ -2211,6 +2259,8 @@ pv_map_remove(pmap_t pmap, vm_offset_t v
void
pmap_remove_pages(pmap_t pmap)
{
+ KASSERT(pmap != kernel_pmap,
+ ("Trying to destroy kernel_pmap pv mappings!"));
if (pmap != PCPU_GET(curpmap)) {
printf("warning: pmap_remove_pages called with non-current pmap\n");
return;
@@ -2228,7 +2278,7 @@ pmap_page_set_memattr(vm_page_t m, vm_me
{
KASSERT(0, ("XXX: %s: TODO\n", __func__));
}
-#include <ddb/ddb.h>
+
static bool
pv_dummy(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
@@ -2238,10 +2288,11 @@ pv_dummy(pmap_t pmap, vm_offset_t va, vm
boolean_t
pmap_page_is_mapped(vm_page_t m)
{
+
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
- return pmap_pv_iterate(m, pv_dummy);
+ return pmap_pv_iterate(m, pv_dummy, PV_RO_ITERATE);
}
boolean_t
@@ -2251,11 +2302,48 @@ pmap_page_exists_quick(pmap_t pmap, vm_p
return 0;
}
+static bool
+pv_page_is_wired(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+ pt_entry_t *pte, tpte;
+
+ char tbuf[tsz]; /* Safe to do this on the stack since tsz is
+ * effectively const.
+ */
+
+ mmu_map_t tptr = tbuf;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+ pte = pmap_vtopte_inspect(pmap, va, &tptr);
+
+ KASSERT(pte != NULL, ("pte has no backing page tables!"));
+
+ tpte = *pte;
+
+ if ((tpte & PG_V) == 0) {
+ panic("bad pte va %lx pte %lx", va, tpte);
+ }
+
+ /*
+ * We cannot remove wired pages from a process' mapping at this time
+ */
+ if (tpte & PG_W) {
+ return false; /* Continue iteration */
+ }
+
+ pmap_vtopte_release(pmap, va, &tptr);
+
+ return true; /* stop iteration */
+}
+
int
pmap_page_wired_mappings(vm_page_t m)
{
- KASSERT(0, ("XXX: %s: TODO\n", __func__));
- return -1;
+ if ((m->oflags & VPO_UNMANAGED) != 0)
+ return (0);
+
+ return pmap_pv_iterate(m, pv_page_is_wired, PV_RW_ITERATE);
}
boolean_t
@@ -2477,15 +2565,32 @@ pmap_remove_write(vm_page_t m)
if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
return;
- pmap_pv_iterate(m, pv_remove_write);
+ pmap_pv_iterate(m, pv_remove_write, PV_RW_ITERATE);
vm_page_aflag_clear(m, PGA_WRITEABLE);
}
+/*
+ * pmap_ts_referenced:
+ *
+ * Return a count of reference bits for a page, clearing those bits.
+ * It is not necessary for every reference bit to be cleared, but it
+ * is necessary that 0 only be returned when there are truly no
+ * reference bits set.
+ *
+ */
+
int
pmap_ts_referenced(vm_page_t m)
{
- KASSERT(0, ("XXX: %s: TODO\n", __func__));
- return -1;
+ /*
+ * XXX: we don't clear refs yet. We just return non-zero if at
+ * least one reference exists.
+ * This obeys the required semantics - but only just.
+ */
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_ts_referenced: page %p is not managed", m));
+
+ return pmap_pv_iterate(m, pv_dummy, PV_RO_ITERATE);
}
void
Modified: projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.c Wed Dec 4 12:07:46 2013 (r258919)
+++ projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.c Wed Dec 4 12:28:56 2013 (r258920)
@@ -136,7 +136,6 @@ static struct mtx pv_chunks_mutex;
static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
vm_map_t pv_map; /* Kernel submap for pc chunk alloc */
-extern vm_offset_t pv_minva, pv_maxva; /* VA range for submap */
/***************************************************
* page management routines.
@@ -177,6 +176,10 @@ free_pv_chunk(struct pv_chunk *pc)
{
vm_page_t m;
mtx_lock(&pv_chunks_mutex);
+
+ KASSERT(pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
+ pc->pc_map[2] == PC_FREE2, ("Tried to free chunk in use"));
+
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
mtx_unlock(&pv_chunks_mutex);
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
@@ -224,7 +227,15 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv
return;
}
TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
- free_pv_chunk(pc);
+
+ /*
+ * We don't reclaim the pc backing memory here, in case it's
+ * still being scanned. This is the responsibility of
+ * pmap_free_pv_entry().
+ * XXX: This is quite fragile. pc management needs to be
+ * formalised a bit better.
+ */
+
}
pv_entry_t
@@ -235,8 +246,6 @@ pmap_get_pv_entry(pmap_t pmap)
struct pv_chunk *pc;
vm_page_t m;
- KASSERT(pmap != kernel_pmap,
- ("Trying to track kernel va"));
rw_assert(&pvh_global_lock, RA_LOCKED);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
@@ -265,7 +274,7 @@ pmap_get_pv_entry(pmap_t pmap)
}
/* No free items, allocate another chunk */
- m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ m = vm_page_alloc(NULL, 0, VM_ALLOC_SYSTEM | VM_ALLOC_NOOBJ |
VM_ALLOC_WIRED);
if (m == NULL) {
panic("XXX: TODO: memory pressure reclaim\n");
@@ -317,12 +326,13 @@ pmap_put_pv_entry(pmap_t pmap, vm_offset
}
rw_rlock(&pvh_global_lock);
- if (pmap != kernel_pmap) {
- pv = pmap_get_pv_entry(pmap);
- pv->pv_va = va;
- TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
- }
+
+ pv = pmap_get_pv_entry(pmap);
+ pv->pv_va = va;
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+
rw_runlock(&pvh_global_lock);
+
return true;
}
@@ -332,9 +342,9 @@ pmap_free_pv_entry(pmap_t pmap, vm_offse
{
bool found = false;
pv_entry_t pv;
+ struct pv_chunk *pc;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if (pmap == kernel_pmap) return true;
rw_rlock(&pvh_global_lock);
@@ -342,6 +352,11 @@ pmap_free_pv_entry(pmap_t pmap, vm_offse
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
free_pv_entry(pmap, pv);
+ pc = pv_to_chunk(pv);
+ if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
+ pc->pc_map[2] == PC_FREE2) {
+ free_pv_chunk(pc);
+ }
found = true;
break;
}
@@ -358,9 +373,8 @@ pmap_find_pv_entry(pmap_t pmap, vm_offse
pv_entry_t pv = NULL;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- if (pmap == kernel_pmap) return NULL;
- rw_rlock(&pvh_global_lock);
+ rw_rlock(&pvh_global_lock);
TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
break;
@@ -441,7 +455,6 @@ pmap_pv_vm_page_to_v(pmap_t pmap, vm_pag
bool
pmap_pv_vm_page_mapped(pmap_t pmap, vm_page_t m)
{
- if (pmap == kernel_pmap) return true;
return (pmap_pv_vm_page_to_v(pmap, m) ==
(VM_MAX_KERNEL_ADDRESS + 1)) ? false : true;
@@ -454,18 +467,37 @@ pmap_pv_vm_page_mapped(pmap_t pmap, vm_p
*/
int
-pmap_pv_iterate(vm_page_t m, pv_cb_t cb)
+pmap_pv_iterate(vm_page_t m, pv_cb_t cb, iterate_flags iflag)
{
int iter = 0;
pv_entry_t next_pv, pv;
- rw_wlock(&pvh_global_lock);
+ switch(iflag) {
+ case PV_RO_ITERATE:
+ rw_rlock(&pvh_global_lock);
+ break;
+ case PV_RW_ITERATE:
+ rw_wlock(&pvh_global_lock);
+ break;
+ default:
+ panic("%s: unknown iterate flag, %d, requested\n", __func__, iflag);
+ }
TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_next, next_pv) {
iter++;
if (cb(PV_PMAP(pv), pv->pv_va, m)) break;
}
- rw_wunlock(&pvh_global_lock);
+
+ switch(iflag) {
+ case PV_RO_ITERATE:
+ rw_runlock(&pvh_global_lock);
+ break;
+ case PV_RW_ITERATE:
+ rw_wunlock(&pvh_global_lock);
+ break;
+ default:
+ panic("%s: unknown iterate flag, %d, requested\n", __func__, iflag);
+ }
return iter;
}
@@ -493,20 +525,39 @@ pmap_pv_iterate_map(pmap_t pmap, pv_cb_t
for (field = 0; field < _NPCM; field++) {
inuse = ~pc->pc_map[field] & pc_freemask[field];
while (inuse != 0) {
+ bool cbresult = false;
bit = bsfq(inuse);
bitmask = 1UL << bit;
idx = field * 64 + bit;
pv = &pc->pc_pventry[idx];
inuse &= ~bitmask;
- if (cb(PV_PMAP(pv), pv->pv_va, NULL)) break;
- }
- if (TAILQ_EMPTY(&pmap->pm_pvchunk)) {
- /* Chunks were all freed! Bail. */
- break;
+ cbresult = cb(PV_PMAP(pv), pv->pv_va, NULL);
+
+ /*
+ * Check to see the chunk was not
+ * freed by callback. If it is,
+ * reclaim chunk memory.
+ */
+
+ if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 &&
+ pc->pc_map[2] == PC_FREE2) {
+ goto nextpc;
+ }
+
+ if (TAILQ_EMPTY(&pmap->pm_pvchunk)) {
+ /* Chunks were all freed in the callback! Bail. */
+ goto done_iterating;
+ }
+
+ /* Try the next va */
+ if (cbresult == false) break;
}
}
+ nextpc:
+ continue;
}
+done_iterating:
return iter;
}
Modified: projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.h
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.h Wed Dec 4 12:07:46 2013 (r258919)
+++ projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.h Wed Dec 4 12:28:56 2013 (r258920)
@@ -36,6 +36,12 @@
#ifndef _MACHINE_PMAP_PV_H_
#define _MACHINE_PMAP_PV_H_
+/* requested pvh_global_lock state during iteration */
+typedef enum {
+ PV_RO_ITERATE,
+ PV_RW_ITERATE
+} iterate_flags;
+
/*
* Used as a callback when iterating through multiple pmaps
* If the callback returns 'true', iteration is stopped.
@@ -51,7 +57,7 @@ pv_entry_t pmap_get_pv_entry(pmap_t pmap
bool pmap_put_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
bool pmap_free_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
pv_entry_t pmap_find_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
-int pmap_pv_iterate(vm_page_t m, pv_cb_t cb);
+int pmap_pv_iterate(vm_page_t m, pv_cb_t cb, iterate_flags iflag);
int pmap_pv_iterate_map(pmap_t pmap, pv_cb_t cb);
void pmap_pv_page_unmap(vm_page_t m);
More information about the svn-src-projects
mailing list