svn commit: r249465 - in projects/amd64_xen_pv/sys/amd64: conf include xen
Cherry G. Mathew
cherry at FreeBSD.org
Sun Apr 14 09:05:41 UTC 2013
Author: cherry
Date: Sun Apr 14 09:05:40 2013
New Revision: 249465
URL: http://svnweb.freebsd.org/changeset/base/249465
Log:
pmap.c:
- Bump copyright year.
- Fill in further pmap stubs, mainly pmap_enter()/pmap_remove()
- Refine the use of managed and unmanaged pages.
pmap_pv.c:
- Improve support to free pv entries
- Remove responsibility for locking the pmap to pmap.c
cpufunc.h:
- invltlb() should work on xen too.
machdep.c:
- Copy exec_setregs() implementation from native, sans debug register
support.
mmu_map.c:
- flush the mmu update queue after pml4 entry update.
conf/XEN:
- revert VERBOSE_SYSINIT. This was re-enabled by mistake in r248719,
contrary to the status message, which claimed to disable it.
Modified:
projects/amd64_xen_pv/sys/amd64/conf/XEN
projects/amd64_xen_pv/sys/amd64/include/cpufunc.h
projects/amd64_xen_pv/sys/amd64/xen/machdep.c
projects/amd64_xen_pv/sys/amd64/xen/mmu_map.c
projects/amd64_xen_pv/sys/amd64/xen/pmap.c
projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.c
Modified: projects/amd64_xen_pv/sys/amd64/conf/XEN
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/conf/XEN Sun Apr 14 08:49:35 2013 (r249464)
+++ projects/amd64_xen_pv/sys/amd64/conf/XEN Sun Apr 14 09:05:40 2013 (r249465)
@@ -55,7 +55,7 @@ options INVARIANTS # Enable calls of e
options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS
nooptions WITNESS # Enable checks to detect deadlocks and cycles
nooptions WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed
-options VERBOSE_SYSINIT
+nooptions VERBOSE_SYSINIT
nooptions NATIVE
option XEN
Modified: projects/amd64_xen_pv/sys/amd64/include/cpufunc.h
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/include/cpufunc.h Sun Apr 14 08:49:35 2013 (r249464)
+++ projects/amd64_xen_pv/sys/amd64/include/cpufunc.h Sun Apr 14 09:05:40 2013 (r249465)
@@ -493,8 +493,11 @@ load_xcr(u_int reg, u_long val)
static __inline void
invltlb(void)
{
-
+#ifdef XEN
+ xen_tlb_flush();
+#else
load_cr3(rcr3());
+#endif
}
/*
Modified: projects/amd64_xen_pv/sys/amd64/xen/machdep.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/machdep.c Sun Apr 14 08:49:35 2013 (r249464)
+++ projects/amd64_xen_pv/sys/amd64/xen/machdep.c Sun Apr 14 09:05:40 2013 (r249465)
@@ -746,7 +746,43 @@ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST,
void
exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
{
- KASSERT(0, ("TODO"));
+ struct trapframe *regs = td->td_frame;
+ struct pcb *pcb = td->td_pcb;
+
+ mtx_lock(&dt_lock);
+ if (td->td_proc->p_md.md_ldt != NULL)
+ user_ldt_free(td);
+ else
+ mtx_unlock(&dt_lock);
+
+ pcb->pcb_fsbase = 0;
+ pcb->pcb_gsbase = 0;
+ clear_pcb_flags(pcb, PCB_32BIT | PCB_GS32BIT);
+ pcb->pcb_initial_fpucw = __INITIAL_FPUCW__;
+ set_pcb_flags(pcb, PCB_FULL_IRET);
+
+ bzero((char *)regs, sizeof(struct trapframe));
+ regs->tf_rip = imgp->entry_addr;
+ regs->tf_rsp = ((stack - 8) & ~0xFul) + 8;
+ regs->tf_rdi = stack; /* argv */
+ regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
+ regs->tf_ss = _udatasel;
+ regs->tf_cs = _ucodesel;
+ regs->tf_ds = _udatasel;
+ regs->tf_es = _udatasel;
+ regs->tf_fs = _ufssel;
+ regs->tf_gs = _ugssel;
+ regs->tf_flags = TF_HASSEGS;
+ td->td_retval[1] = 0;
+
+ /* XXX: we don't do PCB_DBREGS */
+
+ /*
+ * Drop the FP state if we hold it, so that the process gets a
+ * clean FP state if it uses the FPU again.
+ */
+ fpstate_drop(td);
+
}
void
Modified: projects/amd64_xen_pv/sys/amd64/xen/mmu_map.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/mmu_map.c Sun Apr 14 08:49:35 2013 (r249464)
+++ projects/amd64_xen_pv/sys/amd64/xen/mmu_map.c Sun Apr 14 09:05:40 2013 (r249465)
@@ -337,7 +337,7 @@ mmu_map_hold_va(struct pmap *pm, void *a
pml4tep_ma = xpmap_ptom(pti->ptmb.vtop((uintptr_t)pml4tep));
pml4te = xpmap_ptom(pti->ptmb.vtop((uintptr_t)pti->pdpt)) | PG_RW | PG_V | PG_U; /* XXX: revisit flags */
xen_queue_pt_update(pml4tep_ma, pml4te);
-
+ xen_flush_queue();
} else {
pti->pdpt = (pdp_entry_t *) pti->ptmb.ptov(pt);
}
Modified: projects/amd64_xen_pv/sys/amd64/xen/pmap.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/pmap.c Sun Apr 14 08:49:35 2013 (r249464)
+++ projects/amd64_xen_pv/sys/amd64/xen/pmap.c Sun Apr 14 09:05:40 2013 (r249465)
@@ -8,10 +8,10 @@
* All rights reserved.
* Copyright (c) 2005 Alan L. Cox <alc at cs.rice.edu>
* All rights reserved.
- * Copyright (c) 2012 Spectra Logic Corporation
- * All rights reserved.
* Copyright (c) 2012 Citrix Systems
* All rights reserved.
+ * Copyright (c) 2012, 2013 Spectra Logic Corporation
+ * All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* the Systems Programming Group of the University of Utah Computer
@@ -147,6 +147,7 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <machine/md_var.h>
+#include <machine/pcb.h>
#include <xen/hypervisor.h>
#include <machine/xen/xenvar.h>
@@ -183,7 +184,7 @@ static vm_paddr_t dmaplimit;
#endif
uintptr_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
-pt_entry_t pg_nx; /* XXX: do we need this ? */
+pt_entry_t pg_nx = 0; /* XXX: probe for this ? */
struct msgbuf *msgbufp = 0;
@@ -218,6 +219,15 @@ extern char *console_page; /* The shared
extern struct xenstore_domain_interface *xen_store; /* xenstore page */
extern vm_map_t pv_map;
+vm_offset_t pv_minva, pv_maxva;
+
+/* Index offset into a pagetable, for a given va */
+static int
+pt_index(uintptr_t va)
+{
+ return ((va & PDRMASK) >> PAGE_SHIFT);
+}
+
/* return kernel virtual address of 'n' claimed physical pages at boot. */
static uintptr_t
@@ -634,7 +644,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
kernel_pmap->pm_root.rt_root = 0;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
pmap_pv_init();
- pmap_pv_pmap_init(kernel_pmap);
tsz = mmu_map_t_size();
@@ -712,20 +721,6 @@ pmap_init(void)
/* XXX: review the use of gdtset for the purpose below */
- /*
- * At this point we initialise the pv mappings of all PAs that
- * have been mapped into the kernel VA by pmap_bootstrap()
- */
-
- vm_paddr_t pa;
-
- for (pa = phys_avail[0]; pa < VTOP(virtual_avail); pa += PAGE_SIZE) {
- vm_page_t m;
- m = PHYS_TO_VM_PAGE(pa);
- if (m == NULL) continue;
- pmap_put_pv_entry(kernel_pmap, PTOV(pa), m);
- }
-
/* Get a va for console and map the console mfn into it */
vm_paddr_t ma = xen_start_info->console.domU.mfn << PAGE_SHIFT;
@@ -745,7 +740,6 @@ pmap_init(void)
xen_store = (void *)va;
/* Reserve pv VA space by allocating a submap */
- vm_offset_t pv_minva, pv_maxva;
KASSERT(kernel_map != 0, ("Initialising kernel submap before kernel_map!"));
pv_map = kmem_suballoc(kernel_map, &pv_minva, &pv_maxva,
sizeof(struct pv_chunk) * 100 /* XXX: Totally arbitrary */, 0);
@@ -772,7 +766,7 @@ pmap_pinit(pmap_t pmap)
{
KASSERT(pmap != kernel_pmap,
- ("kernel map re-initialised!"));
+ ("kernel map re-initialised!", __func__));
PMAP_LOCK_INIT(pmap);
@@ -781,7 +775,7 @@ pmap_pinit(pmap_t pmap)
*/
pmap->pm_pml4 = (void *) kmem_alloc(kernel_map, PAGE_SIZE);
bzero(pmap->pm_pml4, PAGE_SIZE);
- printf("%s: pmap->pm_pml4 == %p\n", __func__, pmap->pm_pml4);
+
/*
* We do not wire in kernel space, or the self-referencial
* entry in userspace pmaps becase both kernel and userland
@@ -795,7 +789,7 @@ pmap_pinit(pmap_t pmap)
pmap_xen_setpages_ro((uintptr_t)pmap->pm_pml4, 1);
- xen_pgdir_pin(phystomach(ptmb_vtop((uintptr_t)pmap->pm_pml4)));
+ xen_pgdir_pin(pmap_kextract_ma((uintptr_t)pmap->pm_pml4));
pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
@@ -810,8 +804,6 @@ void pmap_xen_userload(pmap_t pmap)
KASSERT(pmap != kernel_pmap,
("Kernel pmap requested on user load.\n"));
- printf("%s: pmap->pm_pml4 == %p\n", __func__, pmap->pm_pml4);
- printf("%s: curthread %s\n", __func__, curthread->td_name);
int i;
for (i = 0; i < NUPML4E; i++) {
pml4_entry_t pml4e;
@@ -827,11 +819,20 @@ void pmap_xen_userload(pmap_t pmap)
void
pmap_release(pmap_t pmap)
{
- KASSERT(0, ("XXX: %s: TODO\n", __func__));
+ KASSERT(pmap != kernel_pmap,
+ ("%s: kernel pmap released", __func__));
+
+ xen_pgdir_unpin(pmap_kextract_ma((uintptr_t)pmap->pm_pml4));
+ pmap_xen_setpages_rw((uintptr_t)pmap->pm_pml4, 1);
+
+ bzero(pmap->pm_pml4, PAGE_SIZE);
+ kmem_free(kernel_map, (vm_offset_t)pmap->pm_pml4, PAGE_SIZE);
+
+ PMAP_LOCK_DESTROY(pmap);
}
static pt_entry_t *
-pmap_vtopte_hold(pmap_t pmap, uintptr_t va, void *addr)
+pmap_vtopte_inspect(pmap_t pmap, uintptr_t va, void *addr)
{
KASSERT(addr != NULL, ("addr == NULL"));
@@ -848,14 +849,51 @@ pmap_vtopte_hold(pmap_t pmap, uintptr_t
mmu_map_t_init(tptr, &mb);
- if (!mmu_map_inspect_va(kernel_pmap, tptr, va)) {
+ if (!mmu_map_inspect_va(pmap, tptr, va)) {
return NULL; /* XXX: fix api, return some kind of #define */
}
pte = mmu_map_pt(tptr); /* Read out PT from mmu state */
/* add VA offset */
- pte += (va & PDRMASK) >> PAGE_SHIFT;
+ pte += pt_index(va);
+
+ return pte;
+}
+
+static pt_entry_t *
+vtopte_inspect(uintptr_t va, void *addr)
+{
+ return pmap_vtopte_inspect(kernel_pmap, va, addr);
+}
+
+static pt_entry_t *
+pmap_vtopte_hold(pmap_t pmap, uintptr_t va, void *addr)
+{
+ KASSERT(addr != NULL, ("addr == NULL"));
+
+ mmu_map_t tptr = *(mmu_map_t *)addr;
+
+ pd_entry_t *pte; /* PTE address to return */
+
+ struct mmu_map_mbackend mb = {
+ ptmb_mappedalloc,
+ ptmb_mappedfree,
+ ptmb_ptov,
+ ptmb_vtop
+ };
+
+ mmu_map_t_init(tptr, &mb);
+
+ if (!mmu_map_inspect_va(pmap, tptr, va)) {
+ mmu_map_hold_va(pmap, tptr, va); /* PT hierarchy */
+ xen_flush_queue();
+ }
+
+ pte = mmu_map_pt(tptr); /* Read out PT from mmu state */
+
+ /* add VA offset */
+ pte += pt_index(va);
return pte;
}
@@ -892,6 +930,22 @@ pmap_lazyfix_action(void)
}
#endif /* SMP */
+static __inline void
+pmap_resident_count_inc(pmap_t pmap, int count)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ pmap->pm_stats.resident_count += count;
+}
+
+static __inline void
+pmap_resident_count_dec(pmap_t pmap, int count)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ pmap->pm_stats.resident_count -= count;
+}
+
/*
* Add a list of wired pages to the kva
* this routine is only used for temporary
@@ -964,6 +1018,11 @@ void
pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
vm_prot_t prot, boolean_t wired)
{
+ pt_entry_t *pte;
+ pt_entry_t newpte, origpte;
+ vm_paddr_t opa, pa;
+ vm_page_t mpte, om;
+
va = trunc_page(va);
KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig"));
@@ -973,21 +1032,201 @@ pmap_enter(pmap_t pmap, vm_offset_t va,
if ((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == 0)
VM_OBJECT_ASSERT_WLOCKED(m->object);
- KASSERT(pmap == kernel_pmap, ("XXX: TODO: Userland pmap\n"));
KASSERT(VM_PAGE_TO_PHYS(m) != 0,
("VM_PAGE_TO_PHYS(m) == 0x%lx\n", VM_PAGE_TO_PHYS(m)));
- pmap_kenter(va, VM_PAGE_TO_PHYS(m)); /* Shim to keep bootup
- * happy for now */
+ pa = VM_PAGE_TO_PHYS(m);
+ newpte = (pt_entry_t)(xpmap_ptom(pa) | PG_A | PG_V | PG_U);
+ if ((access & VM_PROT_WRITE) != 0)
+ newpte |= PG_M;
+ if ((prot & VM_PROT_WRITE) != 0)
+ newpte |= PG_RW;
+ KASSERT((newpte & (PG_M | PG_RW)) != PG_M,
+ ("pmap_enter: access includes VM_PROT_WRITE but prot doesn't"));
+ if ((prot & VM_PROT_EXECUTE) == 0)
+ newpte |= pg_nx;
+ if (wired)
+ newpte |= PG_W;
+
+ /* newpte |= pmap_cache_bits(m->md.pat_mode, 0); XXX */
+
+ mpte = NULL;
+
+ PMAP_LOCK(pmap);
- /* XXX: TODO: */
+ /*
+ * In the case that a page table page is not
+ * resident, we are creating it here.
+ */
+
+ KASSERT(tsz != 0, ("tsz != 0"));
+
+ char tbuf[tsz]; /* Safe to do this on the stack since tsz is
+ * effectively const.
+ */
+
+ mmu_map_t tptr = tbuf;
+
+ pte = pmap_vtopte_hold(pmap, va, &tptr);
+
+ origpte = pte_load(pte);
+
+ /*
+ * Is the specified virtual address already mapped?
+ */
+ if ((origpte & PG_V) != 0) {
+ /*
+ * Wiring change, just update stats. We don't worry about
+ * wiring PT pages as they remain resident as long as there
+ * are valid mappings in them. Hence, if a user page is wired,
+ * the PT page will be also.
+ */
+ if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0)
+ pmap->pm_stats.wired_count++;
+ else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0)
+ pmap->pm_stats.wired_count--;
+
+ /*
+ * Has the physical page changed?
+ */
+ opa = xpmap_mtop(origpte & PG_FRAME);
+ if (opa == pa) {
+ /*
+ * No, might be a protection or wiring change.
+ */
+ if ((origpte & PG_MANAGED) != 0) {
+ newpte |= PG_MANAGED;
+ if ((newpte & PG_RW) != 0)
+ vm_page_aflag_set(m, PGA_WRITEABLE);
+ }
+ if (((origpte ^ newpte) & ~(PG_M | PG_A)) == 0)
+ goto unchanged;
+ goto validate;
+ }
+ } else {
+ /*
+ * Increment the counters.
+ */
+ if ((newpte & PG_W) != 0)
+ pmap->pm_stats.wired_count++;
+ pmap_resident_count_inc(pmap, 1);
+ }
+
+
+ /*
+ * Enter on the PV list if part of our managed memory.
+ */
+ if ((m->oflags & VPO_UNMANAGED) == 0) {
+ newpte |= PG_MANAGED;
+ pmap_put_pv_entry(pmap, va, m);
+
+ if ((newpte & PG_RW) != 0)
+ vm_page_aflag_set(m, PGA_WRITEABLE);
+ }
+
+ /*
+ * Update the PTE.
+ */
+ if ((origpte & PG_V) != 0) {
+validate:
+ {
+ /* XXX: This is not atomic */
+ origpte = pte_load(pte);
+ PT_SET_VA_MA(pte, newpte, true);
+ /* Sync the kernel's view of the pmap */
+ if (pmap != kernel_pmap && PCPU_GET(curpmap) == pmap) {
+ /* XXX: this can be optimised to a single entry update */
+ pmap_xen_userload(pmap);
+ }
+
+ }
+ opa = xpmap_mtop(origpte & PG_FRAME);
+ if (opa != pa) {
+ if ((origpte & PG_MANAGED) != 0) {
+ om = PHYS_TO_VM_PAGE(opa);
+ if ((origpte & (PG_M | PG_RW)) == (PG_M |
+ PG_RW))
+ vm_page_dirty(om);
+ if ((origpte & PG_A) != 0)
+ vm_page_aflag_set(om, PGA_REFERENCED);
+ if (!pmap_free_pv_entry(pmap, va, om)) {
+ panic("Unable to free pv entry!");
+ }
+
+ if ((om->aflags & PGA_WRITEABLE) != 0 &&
+ !pmap_page_is_mapped(om) &&
+ (om->flags & PG_FICTITIOUS) != 0)
+ vm_page_aflag_clear(om, PGA_WRITEABLE);
+ }
+ } else if ((newpte & PG_M) == 0 && (origpte & (PG_M |
+ PG_RW)) == (PG_M | PG_RW)) {
+ if ((origpte & PG_MANAGED) != 0)
+ vm_page_dirty(m);
+ /*
+ * Although the PTE may still have PG_RW set, TLB
+ * invalidation may nonetheless be required because
+ * the PTE no longer has PG_M set.
+ */
+ } else if ((origpte & PG_NX) != 0 || (newpte & PG_NX) == 0) {
+ /*
+ * This PTE change does not require TLB invalidation.
+ */
+ goto unchanged;
+ }
+
+ if ((origpte & PG_A) != 0)
+ pmap_invalidate_page(pmap, va);
+
+ } else {
+ PT_SET_VA_MA(pte, newpte, true);
+
+ /* Sync the kernel's view of the pmap */
+ if (pmap != kernel_pmap && PCPU_GET(curpmap) == pmap) {
+ /* XXX: this can be optimised to a single entry update */
+ pmap_xen_userload(pmap);
+ }
+ }
+
+
+ if (pmap != kernel_pmap) pmap_xen_userload(pmap);
+
+
+unchanged:
+ pmap_vtopte_release(pmap, va, &tptr);
+ PMAP_UNLOCK(pmap);
}
+/*
+ * Maps a sequence of resident pages belonging to the same object.
+ * The sequence begins with the given page m_start. This page is
+ * mapped at the given virtual address start. Each subsequent page is
+ * mapped at a virtual address that is offset from start by the same
+ * amount as the page is offset from m_start within the object. The
+ * last page in the sequence is the page with the largest offset from
+ * m_start that can be mapped at a virtual address less than the given
+ * virtual address end. Not every virtual page between start and end
+ * is mapped; only those for which a resident page exists with the
+ * corresponding offset from m_start are mapped.
+ */
+
void
pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
vm_page_t m_start, vm_prot_t prot)
{
- KASSERT(0, ("XXX: %s: TODO\n", __func__));
+ vm_offset_t va;
+ vm_pindex_t diff, psize;
+ vm_page_t m;
+
+ VM_OBJECT_ASSERT_WLOCKED(m_start->object);
+
+ psize = atop(end - start);
+ m = m_start;
+
+ while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
+ va = start + ptoa(diff);
+ pmap_enter(pmap, va, prot, m, prot, false);
+ m = TAILQ_NEXT(m, listq);
+ }
}
void
@@ -1011,13 +1250,189 @@ pmap_object_init_pt(pmap_t pmap, vm_offs
KASSERT(0, ("XXX: %s: TODO\n", __func__));
}
+/*
+ * pmap_remove_pte: do the things to unmap a page in a process
+ */
+static int
+pmap_remove_pte(pmap_t pmap, vm_offset_t va, pt_entry_t *ptq)
+{
+ pt_entry_t oldpte;
+ vm_page_t m;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+ { /* XXX: there's no way to make this atomic ? */
+ oldpte = pte_load(ptq);
+ if (oldpte & PG_FRAME) { /* Optimise */
+ PT_CLEAR_VA(ptq, TRUE);
+ }
+ }
+
+ if (oldpte & PG_W)
+ pmap->pm_stats.wired_count -= 1;
+ pmap_resident_count_dec(pmap, 1);
+
+ if (oldpte & PG_MANAGED) {
+ m = MACH_TO_VM_PAGE(oldpte & PG_FRAME);
+
+ if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW))
+ vm_page_dirty(m);
+
+ if (oldpte & PG_A)
+ vm_page_aflag_set(m, PGA_REFERENCED);
+
+ if (!pmap_free_pv_entry(pmap, va, m)) {
+ panic("%s: pv 0x%lx: 0x%lx, unknown on managed page!",
+ __func__, VM_PAGE_TO_PHYS(m), va);
+ }
+
+ if (!pmap_page_is_mapped(m) &&
+ (m->flags & PG_FICTITIOUS) == 0) {
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+ }
+ }
+ /*
+ * We never remove the backing pages - that's the job of
+ * mmu_map.[ch]
+ */
+ return false;
+}
+
+/*
+ * Remove a single page from a process address space
+ */
+static void
+pmap_remove_page(pmap_t pmap, vm_offset_t va, pt_entry_t *pte)
+{
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if ((pte_load(pte) & PG_V) == 0)
+ return;
+
+ pmap_remove_pte(pmap, va, pte);
+
+ pmap_invalidate_page(pmap, va);
+}
+
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
- KASSERT(pmap == kernel_pmap, ("XXX: TODO: Userland pmap\n"));
-
KASSERT(eva >= sva, ("End VA is lower than Start VA"));
- pmap_qremove(sva, atop(eva - sva));
+ vm_offset_t va, va_next;
+ pt_entry_t *pte;
+ int anyvalid;
+
+ /*
+ * Perform an unsynchronized read. This is, however, safe.
+ */
+ if (pmap->pm_stats.resident_count == 0)
+ return;
+
+ anyvalid = 0;
+
+ KASSERT(tsz != 0, ("tsz != 0"));
+
+ char tbuf[tsz]; /* Safe to do this on the stack since tsz is
+ * effectively const.
+ */
+
+ mmu_map_t tptr = tbuf;
+
+ struct mmu_map_mbackend mb = {
+ ptmb_mappedalloc,
+ ptmb_mappedfree,
+ ptmb_ptov,
+ ptmb_vtop
+ };
+
+ mmu_map_t_init(tptr, &mb);
+
+
+ PMAP_LOCK(pmap);
+
+ /*
+ * special handling of removing one page. a very
+ * common operation and easy to short circuit some
+ * code.
+ */
+
+ if (sva + PAGE_SIZE == eva) {
+ if (!mmu_map_inspect_va(pmap, tptr, sva)) {
+ goto out;
+ }
+
+ pte = mmu_map_pt(tptr) + pt_index(sva);
+
+ pmap_remove_page(pmap, sva, pte);
+ goto out;
+ }
+
+ for (; sva < eva; sva = va_next) {
+ if (pmap->pm_stats.resident_count == 0)
+ break;
+
+ if (!mmu_map_inspect_va(pmap, tptr, sva)) {
+ if (mmu_map_pdpt(tptr) == NULL) {
+ va_next = (sva + NBPML4) & ~PML4MASK;
+ if (va_next < sva) /* Overflow */
+ va_next = eva;
+ continue;
+ }
+
+ if (mmu_map_pdt(tptr) == NULL) {
+ va_next = (sva + NBPDP) & ~PDPMASK;
+ if (va_next < sva) /* Overflow */
+ va_next = eva;
+ continue;
+ }
+
+ if (mmu_map_pt(tptr) == NULL) {
+ va_next = (sva + NBPDR) & ~PDRMASK;
+ if (va_next < sva) /* Overflow */
+ va_next = eva;
+ continue;
+ }
+
+ panic("%s: All backing tables non-NULL,"
+ "yet hierarchy can't be inspected at va = 0x%lx\n",
+ __func__, sva);
+ }
+
+ va_next = (sva + NBPDR) & ~PDRMASK;
+ if (va_next < sva)
+ va_next = eva;
+
+ va = va_next;
+
+ for (pte = (mmu_map_pt(tptr) + pt_index(sva));
+ sva != va_next;pte++, sva += PAGE_SIZE) {
+ if (pte_load(pte) == 0) {
+ if (va != va_next) {
+ pmap_invalidate_range(pmap, sva, va);
+ va = va_next;
+ }
+ continue;
+ }
+ /*
+ * XXX: PG_G is set on *user* entries unlike
+ * native, where it is set on kernel entries
+ */
+ if ((pte_load(pte) & PG_G) != 0)
+ anyvalid = 1;
+ else if (va == va_next)
+ va = sva;
+
+ pmap_remove_pte(pmap, sva, pte);
+ }
+ if (va != va_next) {
+ pmap_invalidate_range(pmap, sva, va);
+ }
+ }
+out:
+ if (anyvalid)
+ pmap_invalidate_all(pmap);
+
+ PMAP_UNLOCK(pmap);
+ mmu_map_t_fini(tptr);
}
static bool
@@ -1032,8 +1447,11 @@ pv_remove(pmap_t pmap, vm_offset_t va, v
mmu_map_t tptr = tbuf;
PMAP_LOCK(pmap);
- pte = pmap_vtopte_hold(pmap, va, &tptr);
- tpte = *pte;
+ pte = pmap_vtopte_inspect(pmap, va, &tptr);
+
+ KASSERT(pte != NULL, ("pte has no backing page tables!"));
+
+ tpte = pte_load(pte);
PT_CLEAR_VA(pte, TRUE);
if (tpte & PG_A)
vm_page_aflag_set(m, PGA_REFERENCED);
@@ -1102,6 +1520,11 @@ pmap_kextract(vm_offset_t va)
vm_paddr_t
pmap_kextract_ma(vm_offset_t va)
{
+
+ if (ISDMAPVA(va)) {
+ return xpmap_ptom(DMAP_TO_PHYS(va));
+ }
+
vm_paddr_t ma;
/* Walk the PT hierarchy to get the ma */
@@ -1124,7 +1547,7 @@ pmap_kextract_ma(vm_offset_t va)
goto nomapping;
}
- ma = mmu_map_pt(tptr)[(PDRMASK & va) >> PAGE_SHIFT];
+ ma = mmu_map_pt(tptr)[pt_index(va)];
mmu_map_t_fini(tptr);
@@ -1146,18 +1569,6 @@ nomapping:
void
pmap_kenter(vm_offset_t va, vm_paddr_t pa)
{
-
- vm_page_t m;
-
- m = PHYS_TO_VM_PAGE(pa);
-
- if (gdtset == 1 && m != NULL) {
- /*
- * Enter on the PV list if part of our managed memory.
- */
-
- pmap_put_pv_entry(kernel_pmap, va, m);
- }
pmap_kenter_ma(va, xpmap_ptom(pa));
}
@@ -1211,40 +1622,18 @@ pmap_kremove(vm_offset_t va)
mmu_map_t tptr = tbuf;
-#define nobackingfree /* Shim to make boot progress. XXX: MUST go away */
-#ifdef nobackingfree
- (void) pte;
- struct mmu_map_mbackend mb = {
- ptmb_mappedalloc,
- ptmb_mappedfree,
- ptmb_ptov,
- ptmb_vtop
- };
- mmu_map_t_init(tptr, &mb);
-
- if (!mmu_map_inspect_va(kernel_pmap, tptr, va)) {
- mmu_map_hold_va(kernel_pmap, tptr, va); /* PT hierarchy */
- xen_flush_queue(); /* XXX: cleanup */
- }
-
- /* Backing page tables are in place, let xen do the maths */
- PT_SET_MA(va, 0);
- PT_UPDATES_FLUSH();
-
- mmu_map_t_fini(tptr);
-
-#else
- pte = vtopte_hold(va, &tptr);
+ pte = vtopte_inspect(va, &tptr);
if (pte == NULL) { /* Mapping doesn't exist */
- goto unmappte;
+ goto notmapped;
}
PT_CLEAR_VA(pte, TRUE);
PT_UPDATES_FLUSH();
-unmappte:
- vtopte_release(va, &tptr);
-#endif
+ pmap_invalidate_page(kernel_pmap, va);
+notmapped:
+// XXX: vtopte_release(va, &tptr);
+ mmu_map_t_fini(tptr);
}
/*
@@ -1292,6 +1681,14 @@ pmap_protect(pmap_t pmap, vm_offset_t sv
}
void
+pmap_invalidate_all(pmap_t pmap)
+{
+
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+ invltlb();
+}
+
+void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
@@ -1365,7 +1762,32 @@ pmap_zero_page_idle(vm_page_t m)
void
pmap_activate(struct thread *td)
{
- KASSERT(0, ("XXX: %s: TODO\n", __func__));
+ pmap_t pmap, oldpmap;
+ u_int cpuid;
+ u_int64_t cr3;
+
+ critical_enter();
+ pmap = vmspace_pmap(td->td_proc->p_vmspace);
+ oldpmap = PCPU_GET(curpmap);
+ cpuid = PCPU_GET(cpuid);
+#ifdef SMP
+ CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active);
+ CPU_SET_ATOMIC(cpuid, &pmap->pm_active);
+#else
+ CPU_CLR(cpuid, &oldpmap->pm_active);
+ CPU_SET(cpuid, &pmap->pm_active);
+#endif
+ cr3 = pmap_kextract((vm_offset_t)pmap->pm_pml4);
+ td->td_pcb->pcb_cr3 = cr3;
+ if (__predict_false(pmap == kernel_pmap)) {
+ xen_load_cr3(cr3);
+ }
+ else {
+ pmap_xen_userload(pmap);
+ }
+
+ PCPU_SET(curpmap, pmap);
+ critical_exit();
}
void
@@ -1379,12 +1801,10 @@ pmap_page_set_memattr(vm_page_t m, vm_me
{
KASSERT(0, ("XXX: %s: TODO\n", __func__));
}
-
+#include <ddb/ddb.h>
static bool
pv_dummy(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
- printf("%s: va == 0x%lx, pa == 0x%lx\n",
- __func__, va, VM_PAGE_TO_PHYS(m));
return true; /* stop at the first iteration */
}
@@ -1393,7 +1813,7 @@ pmap_page_is_mapped(vm_page_t m)
{
if ((m->oflags & VPO_UNMANAGED) != 0)
return (FALSE);
- printf("pmap_pv_iterate(m, pv_dummy) == %d\n", pmap_pv_iterate(m, pv_dummy));
+
return pmap_pv_iterate(m, pv_dummy);
}
@@ -1425,11 +1845,47 @@ pmap_is_referenced(vm_page_t m)
return 0;
}
+/*
+ * pmap_is_prefaultable:
+ *
+ * Return whether or not the specified virtual address is elgible
+ * for prefault.
+ */
+
+/*
+ * XXX: I've just duplicated what native does here. I *think*, with
+ * mmu_map.[ch] (which native doesn't have), addr is always
+ * prefaultable. Research this.
+ */
boolean_t
pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
{
- KASSERT(0, ("XXX: %s: TODO\n", __func__));
- return 0;
+ boolean_t prefaultable = false;
+
+ KASSERT(tsz != 0, ("tsz != 0"));
+
+ char tbuf[tsz]; /* Safe to do this on the stack since tsz is
+ * effectively const.
+ */
+
+ mmu_map_t tptr = tbuf;
+
+ struct mmu_map_mbackend mb = {
+ ptmb_mappedalloc,
+ ptmb_mappedfree,
+ ptmb_ptov,
+ ptmb_vtop
+ };
+
+ mmu_map_t_init(tptr, &mb);
+
+ PMAP_LOCK(pmap);
+ prefaultable = mmu_map_inspect_va(pmap, tptr, addr);
+ PMAP_UNLOCK(pmap);
+
+ mmu_map_t_fini(tptr);
+
+ return prefaultable;
}
void
@@ -1448,7 +1904,6 @@ pmap_clear_reference(vm_page_t m)
static bool
pv_remove_write(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
-
pt_entry_t oldpte, *pte;
char tbuf[tsz]; /* Safe to do this on the stack since tsz is
* effectively const.
@@ -1457,9 +1912,11 @@ pv_remove_write(pmap_t pmap, vm_offset_t
mmu_map_t tptr = tbuf;
PMAP_LOCK(pmap);
- pte = pmap_vtopte_hold(pmap, va, &tptr);
+ pte = pmap_vtopte_inspect(pmap, va, &tptr);
- oldpte = *pte;
+ KASSERT(pte != NULL, ("pte has no backing page tables!"));
+
+ oldpte = pte_load(pte);
if (oldpte & PG_RW) {
PT_SET_MA(va, oldpte & ~(PG_RW | PG_M));
if ((oldpte & PG_M) != 0)
@@ -1592,6 +2049,12 @@ xen_vm_vtop(uintptr_t va)
return DMAP_TO_PHYS(va);
}
+ if (ISKERNELVA(va)) {
+ return pmap_kextract(va);
+ }
+
+ panic("Unknown VA 0x%lxpassed to %s\n", va, __func__);
+
return 0;
}
Modified: projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.c Sun Apr 14 08:49:35 2013 (r249464)
+++ projects/amd64_xen_pv/sys/amd64/xen/pmap_pv.c Sun Apr 14 09:05:40 2013 (r249465)
@@ -136,6 +136,7 @@ static struct mtx pv_chunks_mutex;
static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
vm_map_t pv_map; /* Kernel submap for pc chunk alloc */
+extern vm_offset_t pv_minva, pv_maxva; /* VA range for submap */
/***************************************************
* page management routines.
@@ -169,21 +170,31 @@ pv_to_chunk(pv_entry_t pv)
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
-
static void
free_pv_chunk(struct pv_chunk *pc)
{
vm_page_t m;
-
mtx_lock(&pv_chunks_mutex);
TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
mtx_unlock(&pv_chunks_mutex);
PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list