svn commit: r256080 - in projects/random_number_generator: lib/libvmmapi sys/amd64/amd64 sys/amd64/include sys/amd64/vmm sys/amd64/vmm/amd sys/amd64/vmm/intel sys/amd64/vmm/io sys/cam/ctl sys/conf ...
Mark Murray
markm at FreeBSD.org
Sun Oct 6 09:37:59 UTC 2013
Author: markm
Date: Sun Oct 6 09:37:57 2013
New Revision: 256080
URL: http://svnweb.freebsd.org/changeset/base/256080
Log:
MFC - tracking commit
Deleted:
projects/random_number_generator/sys/kern/uipc_cow.c
Modified:
projects/random_number_generator/lib/libvmmapi/vmmapi.c
projects/random_number_generator/lib/libvmmapi/vmmapi.h
projects/random_number_generator/sys/amd64/amd64/machdep.c
projects/random_number_generator/sys/amd64/amd64/pmap.c
projects/random_number_generator/sys/amd64/amd64/trap.c
projects/random_number_generator/sys/amd64/include/pcpu.h
projects/random_number_generator/sys/amd64/include/pmap.h
projects/random_number_generator/sys/amd64/include/vmm.h (contents, props changed)
projects/random_number_generator/sys/amd64/include/vmm_dev.h (contents, props changed)
projects/random_number_generator/sys/amd64/include/vmm_instruction_emul.h (contents, props changed)
projects/random_number_generator/sys/amd64/vmm/amd/amdv.c
projects/random_number_generator/sys/amd64/vmm/intel/ept.c
projects/random_number_generator/sys/amd64/vmm/intel/ept.h
projects/random_number_generator/sys/amd64/vmm/intel/vmcs.c
projects/random_number_generator/sys/amd64/vmm/intel/vmcs.h
projects/random_number_generator/sys/amd64/vmm/intel/vmx.c
projects/random_number_generator/sys/amd64/vmm/intel/vmx.h
projects/random_number_generator/sys/amd64/vmm/intel/vmx_genassym.c
projects/random_number_generator/sys/amd64/vmm/intel/vmx_support.S
projects/random_number_generator/sys/amd64/vmm/io/ppt.c
projects/random_number_generator/sys/amd64/vmm/io/ppt.h
projects/random_number_generator/sys/amd64/vmm/vmm.c
projects/random_number_generator/sys/amd64/vmm/vmm_dev.c
projects/random_number_generator/sys/amd64/vmm/vmm_instruction_emul.c
projects/random_number_generator/sys/amd64/vmm/vmm_mem.c
projects/random_number_generator/sys/amd64/vmm/vmm_mem.h
projects/random_number_generator/sys/cam/ctl/ctl_frontend_iscsi.c
projects/random_number_generator/sys/conf/files
projects/random_number_generator/sys/dev/e1000/if_igb.c
projects/random_number_generator/sys/dev/hyperv/vmbus/hv_hv.c
projects/random_number_generator/sys/dev/ixgbe/ixgbe.c
projects/random_number_generator/sys/dev/virtio/network/if_vtnet.c
projects/random_number_generator/sys/dev/xen/timer/timer.c
projects/random_number_generator/sys/i386/include/pcpu.h
projects/random_number_generator/sys/i386/xen/mp_machdep.c
projects/random_number_generator/sys/i386/xen/mptable.c
projects/random_number_generator/sys/kern/kern_malloc.c
projects/random_number_generator/sys/x86/acpica/madt.c
projects/random_number_generator/sys/x86/xen/hvm.c
projects/random_number_generator/sys/x86/xen/xen_intr.c
projects/random_number_generator/usr.sbin/bhyve/bhyverun.c
projects/random_number_generator/usr.sbin/bhyve/pci_emul.c
projects/random_number_generator/usr.sbin/bhyve/rtc.c
projects/random_number_generator/usr.sbin/bhyvectl/bhyvectl.c
projects/random_number_generator/usr.sbin/bhyveload/bhyveload.c
Directory Properties:
projects/random_number_generator/ (props changed)
projects/random_number_generator/lib/libvmmapi/ (props changed)
projects/random_number_generator/sys/ (props changed)
projects/random_number_generator/sys/amd64/vmm/ (props changed)
projects/random_number_generator/sys/conf/ (props changed)
projects/random_number_generator/sys/dev/hyperv/ (props changed)
projects/random_number_generator/usr.sbin/bhyve/ (props changed)
projects/random_number_generator/usr.sbin/bhyvectl/ (props changed)
projects/random_number_generator/usr.sbin/bhyveload/ (props changed)
Modified: projects/random_number_generator/lib/libvmmapi/vmmapi.c
==============================================================================
--- projects/random_number_generator/lib/libvmmapi/vmmapi.c Sun Oct 6 06:57:28 2013 (r256079)
+++ projects/random_number_generator/lib/libvmmapi/vmmapi.c Sun Oct 6 09:37:57 2013 (r256080)
@@ -124,7 +124,8 @@ vm_destroy(struct vmctx *vm)
}
int
-vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len)
+vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
+ int *wired)
{
int error;
struct vm_memory_segment seg;
@@ -133,6 +134,8 @@ vm_get_memory_seg(struct vmctx *ctx, vm_
seg.gpa = gpa;
error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
*ret_len = seg.len;
+ if (wired != NULL)
+ *wired = seg.wired;
return (error);
}
@@ -741,3 +744,23 @@ vcpu_reset(struct vmctx *vmctx, int vcpu
done:
return (error);
}
+
+int
+vm_get_gpa_pmap(struct vmctx *ctx, uint64_t gpa, uint64_t *pte, int *num)
+{
+ int error, i;
+ struct vm_gpa_pte gpapte;
+
+ bzero(&gpapte, sizeof(gpapte));
+ gpapte.gpa = gpa;
+
+ error = ioctl(ctx->fd, VM_GET_GPA_PMAP, &gpapte);
+
+ if (error == 0) {
+ *num = gpapte.ptenum;
+ for (i = 0; i < gpapte.ptenum; i++)
+ pte[i] = gpapte.pte[i];
+ }
+
+ return (error);
+}
Modified: projects/random_number_generator/lib/libvmmapi/vmmapi.h
==============================================================================
--- projects/random_number_generator/lib/libvmmapi/vmmapi.h Sun Oct 6 06:57:28 2013 (r256079)
+++ projects/random_number_generator/lib/libvmmapi/vmmapi.h Sun Oct 6 09:37:57 2013 (r256080)
@@ -45,9 +45,11 @@ enum vm_mmap_style {
int vm_create(const char *name);
struct vmctx *vm_open(const char *name);
void vm_destroy(struct vmctx *ctx);
-int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len);
+int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
+ int *wired);
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
+int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
Modified: projects/random_number_generator/sys/amd64/amd64/machdep.c
==============================================================================
--- projects/random_number_generator/sys/amd64/amd64/machdep.c Sun Oct 6 06:57:28 2013 (r256079)
+++ projects/random_number_generator/sys/amd64/amd64/machdep.c Sun Oct 6 09:37:57 2013 (r256080)
@@ -1574,7 +1574,7 @@ getmemsize(caddr_t kmdp, u_int64_t first
/*
* map page into kernel: valid, read/write,non-cacheable
*/
- *pte = pa | PG_V | PG_RW | PG_N;
+ *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD;
invltlb();
tmp = *(int *)ptr;
Modified: projects/random_number_generator/sys/amd64/amd64/pmap.c
==============================================================================
--- projects/random_number_generator/sys/amd64/amd64/pmap.c Sun Oct 6 06:57:28 2013 (r256079)
+++ projects/random_number_generator/sys/amd64/amd64/pmap.c Sun Oct 6 09:37:57 2013 (r256080)
@@ -76,6 +76,8 @@
* SUCH DAMAGE.
*/
+#define AMD64_NPT_AWARE
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
@@ -143,6 +145,120 @@ __FBSDID("$FreeBSD$");
#include <machine/smp.h>
#endif
+static __inline boolean_t
+pmap_emulate_ad_bits(pmap_t pmap)
+{
+
+ return ((pmap->pm_flags & PMAP_EMULATE_AD_BITS) != 0);
+}
+
+static __inline pt_entry_t
+pmap_valid_bit(pmap_t pmap)
+{
+ pt_entry_t mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = X86_PG_V;
+ break;
+ case PT_EPT:
+ if (pmap_emulate_ad_bits(pmap))
+ mask = EPT_PG_EMUL_V;
+ else
+ mask = EPT_PG_READ;
+ break;
+ default:
+ panic("pmap_valid_bit: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
+static __inline pt_entry_t
+pmap_rw_bit(pmap_t pmap)
+{
+ pt_entry_t mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = X86_PG_RW;
+ break;
+ case PT_EPT:
+ if (pmap_emulate_ad_bits(pmap))
+ mask = EPT_PG_EMUL_RW;
+ else
+ mask = EPT_PG_WRITE;
+ break;
+ default:
+ panic("pmap_rw_bit: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
+static __inline pt_entry_t
+pmap_global_bit(pmap_t pmap)
+{
+ pt_entry_t mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = X86_PG_G;
+ break;
+ case PT_EPT:
+ mask = 0;
+ break;
+ default:
+ panic("pmap_global_bit: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
+static __inline pt_entry_t
+pmap_accessed_bit(pmap_t pmap)
+{
+ pt_entry_t mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = X86_PG_A;
+ break;
+ case PT_EPT:
+ if (pmap_emulate_ad_bits(pmap))
+ mask = EPT_PG_READ;
+ else
+ mask = EPT_PG_A;
+ break;
+ default:
+ panic("pmap_accessed_bit: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
+static __inline pt_entry_t
+pmap_modified_bit(pmap_t pmap)
+{
+ pt_entry_t mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = X86_PG_M;
+ break;
+ case PT_EPT:
+ if (pmap_emulate_ad_bits(pmap))
+ mask = EPT_PG_WRITE;
+ else
+ mask = EPT_PG_M;
+ break;
+ default:
+ panic("pmap_modified_bit: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
@@ -247,6 +363,8 @@ static struct md_page *pv_table;
pt_entry_t *CMAP1 = 0;
caddr_t CADDR1 = 0;
+static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */
+
static struct unrhdr pcid_unr;
static struct mtx pcid_mtx;
int pmap_pcid_enabled = 1;
@@ -306,12 +424,12 @@ static void pmap_fill_ptp(pt_entry_t *fi
static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
-static void pmap_pde_attr(pd_entry_t *pde, int cache_bits);
+static void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask);
static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
struct rwlock **lockp);
static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
vm_prot_t prot);
-static void pmap_pte_attr(pt_entry_t *pte, int cache_bits);
+static void pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask);
static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
struct spglist *free, struct rwlock **lockp);
static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
@@ -323,7 +441,7 @@ static boolean_t pmap_try_insert_pv_entr
vm_page_t m, struct rwlock **lockp);
static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
pd_entry_t newpde);
-static void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde);
+static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde);
static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex,
struct rwlock **lockp);
@@ -416,7 +534,9 @@ static __inline pdp_entry_t *
pmap_pdpe(pmap_t pmap, vm_offset_t va)
{
pml4_entry_t *pml4e;
+ pt_entry_t PG_V;
+ PG_V = pmap_valid_bit(pmap);
pml4e = pmap_pml4e(pmap, va);
if ((*pml4e & PG_V) == 0)
return (NULL);
@@ -438,7 +558,9 @@ static __inline pd_entry_t *
pmap_pde(pmap_t pmap, vm_offset_t va)
{
pdp_entry_t *pdpe;
+ pt_entry_t PG_V;
+ PG_V = pmap_valid_bit(pmap);
pdpe = pmap_pdpe(pmap, va);
if (pdpe == NULL || (*pdpe & PG_V) == 0)
return (NULL);
@@ -460,7 +582,9 @@ static __inline pt_entry_t *
pmap_pte(pmap_t pmap, vm_offset_t va)
{
pd_entry_t *pde;
+ pt_entry_t PG_V;
+ PG_V = pmap_valid_bit(pmap);
pde = pmap_pde(pmap, va);
if (pde == NULL || (*pde & PG_V) == 0)
return (NULL);
@@ -490,6 +614,8 @@ vtopte(vm_offset_t va)
{
u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
+ KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopte on a uva/gpa 0x%0lx", va));
+
return (PTmap + ((va >> PAGE_SHIFT) & mask));
}
@@ -498,6 +624,8 @@ vtopde(vm_offset_t va)
{
u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
+ KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopde on a uva/gpa 0x%0lx", va));
+
return (PDmap + ((va >> PDRSHIFT) & mask));
}
@@ -601,22 +729,24 @@ create_pagetables(vm_paddr_t *firstaddr)
/* XXX not fully used, underneath 2M pages */
pt_p = (pt_entry_t *)KPTphys;
for (i = 0; ptoa(i) < *firstaddr; i++)
- pt_p[i] = ptoa(i) | PG_RW | PG_V | PG_G;
+ pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | X86_PG_G;
/* Now map the page tables at their location within PTmap */
pd_p = (pd_entry_t *)KPDphys;
for (i = 0; i < nkpt; i++)
- pd_p[i] = (KPTphys + ptoa(i)) | PG_RW | PG_V;
+ pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V;
/* Map from zero to end of allocations under 2M pages */
/* This replaces some of the KPTphys entries above */
for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
- pd_p[i] = (i << PDRSHIFT) | PG_RW | PG_V | PG_PS | PG_G;
+ pd_p[i] = (i << PDRSHIFT) | X86_PG_RW | X86_PG_V | PG_PS |
+ X86_PG_G;
/* And connect up the PD to the PDP (leaving room for L4 pages) */
pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE));
for (i = 0; i < nkpdpe; i++)
- pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | PG_RW | PG_V | PG_U;
+ pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | X86_PG_RW | X86_PG_V |
+ PG_U;
/*
* Now, set up the direct map region using 2MB and/or 1GB pages. If
@@ -630,36 +760,36 @@ create_pagetables(vm_paddr_t *firstaddr)
for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) {
pd_p[j] = (vm_paddr_t)i << PDRSHIFT;
/* Preset PG_M and PG_A because demotion expects it. */
- pd_p[j] |= PG_RW | PG_V | PG_PS | PG_G |
- PG_M | PG_A;
+ pd_p[j] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G |
+ X86_PG_M | X86_PG_A;
}
pdp_p = (pdp_entry_t *)DMPDPphys;
for (i = 0; i < ndm1g; i++) {
pdp_p[i] = (vm_paddr_t)i << PDPSHIFT;
/* Preset PG_M and PG_A because demotion expects it. */
- pdp_p[i] |= PG_RW | PG_V | PG_PS | PG_G |
- PG_M | PG_A;
+ pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G |
+ X86_PG_M | X86_PG_A;
}
for (j = 0; i < ndmpdp; i++, j++) {
pdp_p[i] = DMPDphys + ptoa(j);
- pdp_p[i] |= PG_RW | PG_V | PG_U;
+ pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_U;
}
/* And recursively map PML4 to itself in order to get PTmap */
p4_p = (pml4_entry_t *)KPML4phys;
p4_p[PML4PML4I] = KPML4phys;
- p4_p[PML4PML4I] |= PG_RW | PG_V | PG_U;
+ p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | PG_U;
/* Connect the Direct Map slot(s) up to the PML4. */
for (i = 0; i < ndmpdpphys; i++) {
p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
- p4_p[DMPML4I + i] |= PG_RW | PG_V | PG_U;
+ p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V | PG_U;
}
/* Connect the KVA slots up to the PML4 */
for (i = 0; i < NKPML4E; i++) {
p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
- p4_p[KPML4BASE + i] |= PG_RW | PG_V | PG_U;
+ p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V | PG_U;
}
}
@@ -705,6 +835,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
CPU_ZERO(&kernel_pmap->pm_save);
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
+ kernel_pmap->pm_flags = pmap_flags;
/*
* Initialize the global pv list lock.
@@ -948,35 +1079,131 @@ SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, de
* Low level helper routines.....
***************************************************/
+static pt_entry_t
+pmap_swap_pat(pmap_t pmap, pt_entry_t entry)
+{
+ int x86_pat_bits = X86_PG_PTE_PAT | X86_PG_PDE_PAT;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ /* Verify that both PAT bits are not set at the same time */
+ KASSERT((entry & x86_pat_bits) != x86_pat_bits,
+ ("Invalid PAT bits in entry %#lx", entry));
+
+ /* Swap the PAT bits if one of them is set */
+ if ((entry & x86_pat_bits) != 0)
+ entry ^= x86_pat_bits;
+ break;
+ case PT_EPT:
+ /*
+ * Nothing to do - the memory attributes are represented
+ * the same way for regular pages and superpages.
+ */
+ break;
+ default:
+ panic("pmap_switch_pat_bits: bad pm_type %d", pmap->pm_type);
+ }
+
+ return (entry);
+}
+
/*
* Determine the appropriate bits to set in a PTE or PDE for a specified
* caching mode.
*/
static int
-pmap_cache_bits(int mode, boolean_t is_pde)
+pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde)
{
int cache_bits, pat_flag, pat_idx;
if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0)
panic("Unknown caching mode %d\n", mode);
- /* The PAT bit is different for PTE's and PDE's. */
- pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
+ switch (pmap->pm_type) {
+ case PT_X86:
+ /* The PAT bit is different for PTE's and PDE's. */
+ pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT;
+
+ /* Map the caching mode to a PAT index. */
+ pat_idx = pat_index[mode];
+
+ /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
+ cache_bits = 0;
+ if (pat_idx & 0x4)
+ cache_bits |= pat_flag;
+ if (pat_idx & 0x2)
+ cache_bits |= PG_NC_PCD;
+ if (pat_idx & 0x1)
+ cache_bits |= PG_NC_PWT;
+ break;
+
+ case PT_EPT:
+ cache_bits = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(mode);
+ break;
- /* Map the caching mode to a PAT index. */
- pat_idx = pat_index[mode];
+ default:
+ panic("unsupported pmap type %d", pmap->pm_type);
+ }
- /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
- cache_bits = 0;
- if (pat_idx & 0x4)
- cache_bits |= pat_flag;
- if (pat_idx & 0x2)
- cache_bits |= PG_NC_PCD;
- if (pat_idx & 0x1)
- cache_bits |= PG_NC_PWT;
return (cache_bits);
}
+static int
+pmap_cache_mask(pmap_t pmap, boolean_t is_pde)
+{
+ int mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = is_pde ? X86_PG_PDE_CACHE : X86_PG_PTE_CACHE;
+ break;
+ case PT_EPT:
+ mask = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(0x7);
+ break;
+ default:
+ panic("pmap_cache_mask: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
+static __inline boolean_t
+pmap_ps_enabled(pmap_t pmap)
+{
+
+ return (pg_ps_enabled && (pmap->pm_flags & PMAP_PDE_SUPERPAGE) != 0);
+}
+
+static void
+pmap_update_pde_store(pmap_t pmap, pd_entry_t *pde, pd_entry_t newpde)
+{
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ break;
+ case PT_EPT:
+ /*
+ * XXX
+ * This is a little bogus since the generation number is
+ * supposed to be bumped up when a region of the address
+ * space is invalidated in the page tables.
+ *
+ * In this case the old PDE entry is valid but yet we want
+ * to make sure that any mappings using the old entry are
+ * invalidated in the TLB.
+ *
+ * The reason this works as expected is because we rendezvous
+ * "all" host cpus and force any vcpu context to exit as a
+ * side-effect.
+ */
+ atomic_add_acq_long(&pmap->pm_eptgen, 1);
+ break;
+ default:
+ panic("pmap_update_pde_store: bad pm_type %d", pmap->pm_type);
+ }
+ pde_store(pde, newpde);
+}
+
/*
* After changing the page size for the specified virtual address in the page
* table, flush the corresponding entries from the processor's TLB. Only the
@@ -985,8 +1212,17 @@ pmap_cache_bits(int mode, boolean_t is_p
* The calling thread must be pinned to a processor.
*/
static void
-pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
+pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)
{
+ pt_entry_t PG_G;
+
+ if (pmap->pm_type == PT_EPT)
+ return;
+
+ KASSERT(pmap->pm_type == PT_X86,
+ ("pmap_update_pde_invalidate: invalid type %d", pmap->pm_type));
+
+ PG_G = pmap_global_bit(pmap);
if ((newpde & PG_PS) == 0)
/* Demotion: flush a specific 2MB page mapping. */
@@ -1048,12 +1284,61 @@ pmap_invalidate_page_pcid(pmap_t pmap, v
* immutable. The kernel page table is always active on every
* processor.
*/
+
+/*
+ * Interrupt the cpus that are executing in the guest context.
+ * This will force the vcpu to exit and the cached EPT mappings
+ * will be invalidated by the host before the next vmresume.
+ */
+static __inline void
+pmap_invalidate_ept(pmap_t pmap)
+{
+
+ sched_pin();
+ KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
+ ("pmap_invalidate_ept: absurd pm_active"));
+
+ /*
+ * The TLB mappings associated with a vcpu context are not
+ * flushed each time a different vcpu is chosen to execute.
+ *
+ * This is in contrast with a process's vtop mappings that
+ * are flushed from the TLB on each context switch.
+ *
+ * Therefore we need to do more than just a TLB shootdown on
+ * the active cpus in 'pmap->pm_active'. To do this we keep
+ * track of the number of invalidations performed on this pmap.
+ *
+ * Each vcpu keeps a cache of this counter and compares it
+ * just before a vmresume. If the counter is out-of-date an
+ * invept will be done to flush stale mappings from the TLB.
+ */
+ atomic_add_acq_long(&pmap->pm_eptgen, 1);
+
+ /*
+ * Force the vcpu to exit and trap back into the hypervisor.
+ *
+ * XXX this is not optimal because IPI_AST builds a trapframe
+ * whereas all we need is an 'eoi' followed by 'iret'.
+ */
+ ipi_selected(pmap->pm_active, IPI_AST);
+ sched_unpin();
+}
+
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
cpuset_t other_cpus;
u_int cpuid;
+ if (pmap->pm_type == PT_EPT) {
+ pmap_invalidate_ept(pmap);
+ return;
+ }
+
+ KASSERT(pmap->pm_type == PT_X86,
+ ("pmap_invalidate_page: invalid type %d", pmap->pm_type));
+
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
if (!pmap_pcid_enabled) {
@@ -1124,6 +1409,14 @@ pmap_invalidate_range(pmap_t pmap, vm_of
vm_offset_t addr;
u_int cpuid;
+ if (pmap->pm_type == PT_EPT) {
+ pmap_invalidate_ept(pmap);
+ return;
+ }
+
+ KASSERT(pmap->pm_type == PT_X86,
+ ("pmap_invalidate_range: invalid type %d", pmap->pm_type));
+
sched_pin();
if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
if (!pmap_pcid_enabled) {
@@ -1175,6 +1468,14 @@ pmap_invalidate_all(pmap_t pmap)
uint64_t cr3;
u_int cpuid;
+ if (pmap->pm_type == PT_EPT) {
+ pmap_invalidate_ept(pmap);
+ return;
+ }
+
+ KASSERT(pmap->pm_type == PT_X86,
+ ("pmap_invalidate_all: invalid type %d", pmap->pm_type));
+
sched_pin();
cpuid = PCPU_GET(cpuid);
if (pmap == kernel_pmap ||
@@ -1243,6 +1544,7 @@ pmap_invalidate_cache(void)
struct pde_action {
cpuset_t invalidate; /* processors that invalidate their TLB */
+ pmap_t pmap;
vm_offset_t va;
pd_entry_t *pde;
pd_entry_t newpde;
@@ -1255,7 +1557,7 @@ pmap_update_pde_action(void *arg)
struct pde_action *act = arg;
if (act->store == PCPU_GET(cpuid))
- pde_store(act->pde, act->newpde);
+ pmap_update_pde_store(act->pmap, act->pde, act->newpde);
}
static void
@@ -1264,7 +1566,7 @@ pmap_update_pde_teardown(void *arg)
struct pde_action *act = arg;
if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate))
- pmap_update_pde_invalidate(act->va, act->newpde);
+ pmap_update_pde_invalidate(act->pmap, act->va, act->newpde);
}
/*
@@ -1286,7 +1588,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t
cpuid = PCPU_GET(cpuid);
other_cpus = all_cpus;
CPU_CLR(cpuid, &other_cpus);
- if (pmap == kernel_pmap)
+ if (pmap == kernel_pmap || pmap->pm_type == PT_EPT)
active = all_cpus;
else {
active = pmap->pm_active;
@@ -1296,6 +1598,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t
act.store = cpuid;
act.invalidate = active;
act.va = va;
+ act.pmap = pmap;
act.pde = pde;
act.newpde = newpde;
CPU_SET(cpuid, &active);
@@ -1303,9 +1606,9 @@ pmap_update_pde(pmap_t pmap, vm_offset_t
smp_no_rendevous_barrier, pmap_update_pde_action,
pmap_update_pde_teardown, &act);
} else {
- pde_store(pde, newpde);
+ pmap_update_pde_store(pmap, pde, newpde);
if (CPU_ISSET(cpuid, &active))
- pmap_update_pde_invalidate(va, newpde);
+ pmap_update_pde_invalidate(pmap, va, newpde);
}
sched_unpin();
}
@@ -1318,8 +1621,17 @@ PMAP_INLINE void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- invlpg(va);
+ switch (pmap->pm_type) {
+ case PT_X86:
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+ invlpg(va);
+ break;
+ case PT_EPT:
+ pmap->pm_eptgen++;
+ break;
+ default:
+ panic("pmap_invalidate_page: unknown type: %d", pmap->pm_type);
+ }
}
PMAP_INLINE void
@@ -1327,17 +1639,35 @@ pmap_invalidate_range(pmap_t pmap, vm_of
{
vm_offset_t addr;
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- for (addr = sva; addr < eva; addr += PAGE_SIZE)
- invlpg(addr);
+ switch (pmap->pm_type) {
+ case PT_X86:
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+ for (addr = sva; addr < eva; addr += PAGE_SIZE)
+ invlpg(addr);
+ break;
+ case PT_EPT:
+ pmap->pm_eptgen++;
+ break;
+ default:
+ panic("pmap_invalidate_range: unknown type: %d", pmap->pm_type);
+ }
}
PMAP_INLINE void
pmap_invalidate_all(pmap_t pmap)
{
- if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- invltlb();
+ switch (pmap->pm_type) {
+ case PT_X86:
+ if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
+ invltlb();
+ break;
+ case PT_EPT:
+ pmap->pm_eptgen++;
+ break;
+ default:
+ panic("pmap_invalidate_all: unknown type %d", pmap->pm_type);
+ }
}
PMAP_INLINE void
@@ -1351,9 +1681,9 @@ static void
pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
{
- pde_store(pde, newpde);
+ pmap_update_pde_store(pmap, pde, newpde);
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
- pmap_update_pde_invalidate(va, newpde);
+ pmap_update_pde_invalidate(pmap, va, newpde);
else
CPU_ZERO(&pmap->pm_save);
}
@@ -1455,10 +1785,11 @@ pmap_extract(pmap_t pmap, vm_offset_t va
{
pdp_entry_t *pdpe;
pd_entry_t *pde;
- pt_entry_t *pte;
+ pt_entry_t *pte, PG_V;
vm_paddr_t pa;
pa = 0;
+ PG_V = pmap_valid_bit(pmap);
PMAP_LOCK(pmap);
pdpe = pmap_pdpe(pmap, va);
if (pdpe != NULL && (*pdpe & PG_V) != 0) {
@@ -1493,12 +1824,14 @@ vm_page_t
pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
{
pd_entry_t pde, *pdep;
- pt_entry_t pte;
+ pt_entry_t pte, PG_RW, PG_V;
vm_paddr_t pa;
vm_page_t m;
pa = 0;
m = NULL;
+ PG_RW = pmap_rw_bit(pmap);
+ PG_V = pmap_valid_bit(pmap);
PMAP_LOCK(pmap);
retry:
pdep = pmap_pde(pmap, va);
@@ -1571,16 +1904,18 @@ pmap_kenter(vm_offset_t va, vm_paddr_t p
pt_entry_t *pte;
pte = vtopte(va);
- pte_store(pte, pa | PG_RW | PG_V | PG_G);
+ pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G);
}
static __inline void
pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
{
pt_entry_t *pte;
+ int cache_bits;
pte = vtopte(va);
- pte_store(pte, pa | PG_RW | PG_V | PG_G | pmap_cache_bits(mode, 0));
+ cache_bits = pmap_cache_bits(kernel_pmap, mode, 0);
+ pte_store(pte, pa | X86_PG_RW | X86_PG_V | X86_PG_G | cache_bits);
}
/*
@@ -1629,20 +1964,22 @@ pmap_qenter(vm_offset_t sva, vm_page_t *
{
pt_entry_t *endpte, oldpte, pa, *pte;
vm_page_t m;
+ int cache_bits;
oldpte = 0;
pte = vtopte(sva);
endpte = pte + count;
while (pte < endpte) {
m = *ma++;
- pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0);
- if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) {
+ cache_bits = pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0);
+ pa = VM_PAGE_TO_PHYS(m) | cache_bits;
+ if ((*pte & (PG_FRAME | X86_PG_PTE_CACHE)) != pa) {
oldpte |= *pte;
- pte_store(pte, pa | PG_G | PG_RW | PG_V);
+ pte_store(pte, pa | X86_PG_G | X86_PG_RW | X86_PG_V);
}
pte++;
}
- if (__predict_false((oldpte & PG_V) != 0))
+ if (__predict_false((oldpte & X86_PG_V) != 0))
pmap_invalidate_range(kernel_pmap, sva, sva + count *
PAGE_SIZE);
}
@@ -1841,6 +2178,7 @@ pmap_pinit0(pmap_t pmap)
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
pmap->pm_pcid = pmap_pcid_enabled ? 0 : -1;
+ pmap->pm_flags = pmap_flags;
}
/*
@@ -1848,9 +2186,10 @@ pmap_pinit0(pmap_t pmap)
* such as one in a vmspace structure.
*/
int
-pmap_pinit(pmap_t pmap)
+pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags)
{
vm_page_t pml4pg;
+ vm_paddr_t pml4phys;
int i;
/*
@@ -1860,41 +2199,61 @@ pmap_pinit(pmap_t pmap)
VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
VM_WAIT;
- pmap->pm_cr3 = VM_PAGE_TO_PHYS(pml4pg);
- pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pmap->pm_cr3);
+ pml4phys = VM_PAGE_TO_PHYS(pml4pg);
+ pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys);
+ pmap->pm_pcid = -1;
+ pmap->pm_cr3 = ~0; /* initialize to an invalid value */
if ((pml4pg->flags & PG_ZERO) == 0)
pagezero(pmap->pm_pml4);
- /* Wire in kernel global address entries. */
- for (i = 0; i < NKPML4E; i++) {
- pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + (i << PAGE_SHIFT)) |
- PG_RW | PG_V | PG_U;
- }
- for (i = 0; i < ndmpdpphys; i++) {
- pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + (i << PAGE_SHIFT)) |
- PG_RW | PG_V | PG_U;
- }
+ /*
+ * Do not install the host kernel mappings in the nested page
+ * tables. These mappings are meaningless in the guest physical
+ * address space.
+ */
+ if ((pmap->pm_type = pm_type) == PT_X86) {
+ pmap->pm_cr3 = pml4phys;
+
+ /* Wire in kernel global address entries. */
+ for (i = 0; i < NKPML4E; i++) {
+ pmap->pm_pml4[KPML4BASE + i] = (KPDPphys + ptoa(i)) |
+ X86_PG_RW | X86_PG_V | PG_U;
+ }
+ for (i = 0; i < ndmpdpphys; i++) {
+ pmap->pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) |
+ X86_PG_RW | X86_PG_V | PG_U;
+ }
+
+ /* install self-referential address mapping entry(s) */
+ pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) |
+ X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M;
- /* install self-referential address mapping entry(s) */
- pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M;
+ if (pmap_pcid_enabled) {
+ pmap->pm_pcid = alloc_unr(&pcid_unr);
+ if (pmap->pm_pcid != -1)
+ pmap->pm_cr3 |= pmap->pm_pcid;
+ }
+ }
pmap->pm_root.rt_root = 0;
CPU_ZERO(&pmap->pm_active);
TAILQ_INIT(&pmap->pm_pvchunk);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
- if (pmap_pcid_enabled) {
- pmap->pm_pcid = alloc_unr(&pcid_unr);
- if (pmap->pm_pcid != -1)
- pmap->pm_cr3 |= pmap->pm_pcid;
- } else {
- pmap->pm_pcid = -1;
- }
+ pmap->pm_flags = flags;
+ pmap->pm_eptgen = 0;
CPU_ZERO(&pmap->pm_save);
return (1);
}
+int
+pmap_pinit(pmap_t pmap)
+{
+
+ return (pmap_pinit_type(pmap, PT_X86, pmap_flags));
+}
+
/*
* This routine is called if the desired page table page does not exist.
*
@@ -1910,9 +2269,15 @@ static vm_page_t
_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
{
vm_page_t m, pdppg, pdpg;
+ pt_entry_t PG_A, PG_M, PG_RW, PG_V;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ PG_A = pmap_accessed_bit(pmap);
+ PG_M = pmap_modified_bit(pmap);
+ PG_V = pmap_valid_bit(pmap);
+ PG_RW = pmap_rw_bit(pmap);
+
/*
* Allocate a page table page.
*/
@@ -2040,9 +2405,11 @@ static vm_page_t
pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
{
vm_pindex_t pdpindex, ptepindex;
- pdp_entry_t *pdpe;
+ pdp_entry_t *pdpe, PG_V;
vm_page_t pdpg;
+ PG_V = pmap_valid_bit(pmap);
+
retry:
pdpe = pmap_pdpe(pmap, va);
if (pdpe != NULL && (*pdpe & PG_V) != 0) {
@@ -2064,9 +2431,11 @@ static vm_page_t
pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
{
vm_pindex_t ptepindex;
- pd_entry_t *pd;
+ pd_entry_t *pd, PG_V;
vm_page_t m;
+ PG_V = pmap_valid_bit(pmap);
+
/*
* Calculate pagetable page index
*/
@@ -2140,7 +2509,7 @@ pmap_release(pmap_t pmap)
pmap_invalidate_all(pmap);
}
- m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME);
+ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4));
for (i = 0; i < NKPML4E; i++) /* KVA */
pmap->pm_pml4[KPML4BASE + i] = 0;
@@ -2211,7 +2580,7 @@ pmap_growkernel(vm_offset_t addr)
addr = kernel_map->max_offset;
while (kernel_vm_end < addr) {
pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end);
- if ((*pdpe & PG_V) == 0) {
+ if ((*pdpe & X86_PG_V) == 0) {
/* We need a new PDP entry */
nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT,
VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
@@ -2221,12 +2590,12 @@ pmap_growkernel(vm_offset_t addr)
if ((nkpg->flags & PG_ZERO) == 0)
pmap_zero_page(nkpg);
paddr = VM_PAGE_TO_PHYS(nkpg);
- *pdpe = (pdp_entry_t)
- (paddr | PG_V | PG_RW | PG_A | PG_M);
+ *pdpe = (pdp_entry_t)(paddr | X86_PG_V | X86_PG_RW |
+ X86_PG_A | X86_PG_M);
continue; /* try again */
}
pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list