svn commit: r270784 - in projects/arm64/sys/arm64: arm64 include
Andrew Turner
andrew at FreeBSD.org
Fri Aug 29 08:11:06 UTC 2014
Author: andrew
Date: Fri Aug 29 08:11:05 2014
New Revision: 270784
URL: http://svnweb.freebsd.org/changeset/base/270784
Log:
Change the pmap implementation to be based on the amd64 version. With
this I am able to start executing userland tasks.
Modified:
projects/arm64/sys/arm64/arm64/pmap.c
projects/arm64/sys/arm64/include/pmap.h
projects/arm64/sys/arm64/include/pte.h
projects/arm64/sys/arm64/include/vm.h
Modified: projects/arm64/sys/arm64/arm64/pmap.c
==============================================================================
--- projects/arm64/sys/arm64/arm64/pmap.c Fri Aug 29 08:02:35 2014 (r270783)
+++ projects/arm64/sys/arm64/arm64/pmap.c Fri Aug 29 08:11:05 2014 (r270784)
@@ -1,7 +1,61 @@
/*-
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ * Copyright (c) 2003 Peter Wemm
+ * All rights reserved.
+ * Copyright (c) 2005-2010 Alan L. Cox <alc at cs.rice.edu>
+ * All rights reserved.
* Copyright (c) 2014 Andrew Turner
* All rights reserved.
*
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and William Jolitz of UUNET Technologies Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91
+ */
+/*-
+ * Copyright (c) 2003 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Jake Burkholder,
+ * Safeport Network Services, and Network Associates Laboratories, the
+ * Security Research Division of Network Associates, Inc. under
+ * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
+ * CHATS research program.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -22,34 +76,201 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
*/
+#define AMD64_NPT_AWARE
+
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+/*
+ * Manages physical address maps.
+ *
+ * Since the information managed by this module is
+ * also stored by the logical address mapping module,
+ * this module may throw away valid virtual-to-physical
+ * mappings at almost any time. However, invalidations
+ * of virtual-to-physical mappings must be done as
+ * requested.
+ *
+ * In order to cope with hardware architectures which
+ * make virtual-to-physical map invalidates expensive,
+ * this module may delay invalidate or reduced protection
+ * operations until such time as they are actually
+ * necessary. This module is given full information as
+ * to which processors are currently using which maps,
+ * and to when physical maps must be made correct.
+ */
+
+//#include "opt_pmap.h"
+//#include "opt_vm.h"
+
#include <sys/param.h>
+#include <sys/bus.h>
#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
#include <sys/lock.h>
-#include <sys/msgbuf.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
+#include <sys/sx.h>
+#include <sys/vmmeter.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#include <sys/_unrhdr.h>
+#include <sys/smp.h>
#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_radix.h>
+#include <vm/vm_reserv.h>
+#include <vm/uma.h>
-#include <machine/devmap.h>
#include <machine/machdep.h>
#include <machine/pcb.h>
-#include <machine/vmparam.h>
-/*
-#define PMAP_DEBUG
-*/
+#if 0
+#include <machine/intr_machdep.h>
+#include <x86/apicvar.h>
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/specialreg.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
+#endif
+
+#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
+#define NUPDE (NPDEPG * NPDEPG)
+#define NUSERPGTBLS (NUPDE + NPDEPG)
+
+#if 0
+static __inline boolean_t
+pmap_emulate_ad_bits(pmap_t pmap)
+{
+
+ return ((pmap->pm_flags & PMAP_EMULATE_AD_BITS) != 0);
+}
+
+static __inline pt_entry_t
+pmap_valid_bit(pmap_t pmap)
+{
+ pt_entry_t mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = X86_PG_V;
+ break;
+ case PT_EPT:
+ if (pmap_emulate_ad_bits(pmap))
+ mask = EPT_PG_EMUL_V;
+ else
+ mask = EPT_PG_READ;
+ break;
+ default:
+ panic("pmap_valid_bit: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
+static __inline pt_entry_t
+pmap_rw_bit(pmap_t pmap)
+{
+ pt_entry_t mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = X86_PG_RW;
+ break;
+ case PT_EPT:
+ if (pmap_emulate_ad_bits(pmap))
+ mask = EPT_PG_EMUL_RW;
+ else
+ mask = EPT_PG_WRITE;
+ break;
+ default:
+ panic("pmap_rw_bit: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
+static __inline pt_entry_t
+pmap_global_bit(pmap_t pmap)
+{
+ pt_entry_t mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = X86_PG_G;
+ break;
+ case PT_EPT:
+ mask = 0;
+ break;
+ default:
+ panic("pmap_global_bit: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
+static __inline pt_entry_t
+pmap_accessed_bit(pmap_t pmap)
+{
+ pt_entry_t mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = X86_PG_A;
+ break;
+ case PT_EPT:
+ if (pmap_emulate_ad_bits(pmap))
+ mask = EPT_PG_READ;
+ else
+ mask = EPT_PG_A;
+ break;
+ default:
+ panic("pmap_accessed_bit: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
+static __inline pt_entry_t
+pmap_modified_bit(pmap_t pmap)
+{
+ pt_entry_t mask;
+
+ switch (pmap->pm_type) {
+ case PT_X86:
+ mask = X86_PG_M;
+ break;
+ case PT_EPT:
+ if (pmap_emulate_ad_bits(pmap))
+ mask = EPT_PG_WRITE;
+ else
+ mask = EPT_PG_M;
+ break;
+ default:
+ panic("pmap_modified_bit: invalid pm_type %d", pmap->pm_type);
+ }
+
+ return (mask);
+}
+
+#endif /* 0 */
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
@@ -68,29 +289,272 @@ __FBSDID("$FreeBSD$");
#define UNCACHED_MEMORY 1
#define CACHED_MEMORY 2
+
+#ifdef PV_STATS
+#define PV_STAT(x) do { x ; } while (0)
+#else
+#define PV_STAT(x) do { } while (0)
+#endif
+
+#define pmap_l2_pindex(v) ((v) >> L2_SHIFT)
+
+#if 0
+#define pa_index(pa) ((pa) >> PDRSHIFT)
+#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
+#endif /* 0 */
+
+#define NPV_LIST_LOCKS MAXCPU
+
+#define PHYS_TO_PV_LIST_LOCK(pa) \
+ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
+
+#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \
+ struct rwlock **_lockp = (lockp); \
+ struct rwlock *_new_lock; \
+ \
+ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \
+ if (_new_lock != *_lockp) { \
+ if (*_lockp != NULL) \
+ rw_wunlock(*_lockp); \
+ *_lockp = _new_lock; \
+ rw_wlock(*_lockp); \
+ } \
+} while (0)
+
+#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
+
+#define RELEASE_PV_LIST_LOCK(lockp) do { \
+ struct rwlock **_lockp = (lockp); \
+ \
+ if (*_lockp != NULL) { \
+ rw_wunlock(*_lockp); \
+ *_lockp = NULL; \
+ } \
+} while (0)
+
+#define VM_PAGE_TO_PV_LIST_LOCK(m) \
+ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
+
+struct pmap kernel_pmap_store;
+
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
vm_offset_t kernel_vm_end = 0;
-vm_offset_t vm_max_kernel_address;
-int unmapped_buf_allowed = 0;
+struct msgbuf *msgbufp = NULL;
-struct pmap kernel_pmap_store;
+#if 0
+int nkpt;
+SYSCTL_INT(_machdep, OID_AUTO, nkpt, CTLFLAG_RD, &nkpt, 0,
+ "Number of kernel page table pages allocated on bootup");
+
+static int ndmpdp;
+vm_paddr_t dmaplimit;
+vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS;
+pt_entry_t pg_nx;
+
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters");
+
+static int pat_works = 1;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1,
+ "Is page attribute table fully functional?");
+
+static int pg_ps_enabled = 1;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ &pg_ps_enabled, 0, "Are large page mappings enabled?");
+
+#define PAT_INDEX_SIZE 8
+static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */
+
+static u_int64_t KPTphys; /* phys addr of kernel level 1 */
+static u_int64_t KPDphys; /* phys addr of kernel level 2 */
+u_int64_t KPDPphys; /* phys addr of kernel level 3 */
+u_int64_t KPML4phys; /* phys addr of kernel level 4 */
+
+static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */
+static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */
+static int ndmpdpphys; /* number of DMPDPphys pages */
+#endif
-struct msgbuf *msgbufp = NULL;
+static struct rwlock_padalign pvh_global_lock;
+
+/*
+ * Data for the pv entry allocation mechanism
+ */
+static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
+static struct mtx pv_chunks_mutex;
+static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
+#if 0
+static struct md_page *pv_table;
+
+/*
+ * All those kernel PT submaps that BSD is so fond of
+ */
+pt_entry_t *CMAP1 = 0;
+caddr_t CADDR1 = 0;
+
+static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */
+
+static struct unrhdr pcid_unr;
+static struct mtx pcid_mtx;
+int pmap_pcid_enabled = 0;
+SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ &pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?");
+int invpcid_works = 0;
+SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0,
+ "Is the invpcid instruction available ?");
+
+static int
+pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
+{
+ int i;
+ uint64_t res;
+
+ res = 0;
+ CPU_FOREACH(i) {
+ res += cpuid_to_pcpu[i]->pc_pm_save_cnt;
+ }
+ return (sysctl_handle_64(oidp, &res, 0, req));
+}
+SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW |
+ CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU",
+ "Count of saved TLB context on switch");
+
+/* pmap_copy_pages() over non-DMAP */
+static struct mtx cpage_lock;
+static vm_offset_t cpage_a;
+static vm_offset_t cpage_b;
+
+/*
+ * Crashdump maps.
+ */
+static caddr_t crashdumpmap;
+#endif /* 0 */
+
+static void free_pv_chunk(struct pv_chunk *pc);
+static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
+static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
+#if 0
+static int popcnt_pc_map_elem(uint64_t elem);
+#endif
+static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
+#if 0
+static void reserve_pv_entries(pmap_t pmap, int needed,
+ struct rwlock **lockp);
+static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp);
+static boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp);
+static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp);
+#endif /* 0 */
+static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
+static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
+ vm_offset_t va);
+#if 0
+
+static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode);
+static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va);
+static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde,
+ vm_offset_t va, struct rwlock **lockp);
+static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe,
+ vm_offset_t va);
+static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m,
+ vm_prot_t prot, struct rwlock **lockp);
+#endif /* 0 */
+static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
+ vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
+#if 0
+static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte);
+static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode);
+static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va);
+static void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask);
+static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va,
+ struct rwlock **lockp);
+static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva,
+ vm_prot_t prot);
+static void pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask);
+static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva,
+ struct spglist *free, struct rwlock **lockp);
+#endif /* 0 */
+static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
+ pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
+#if 0
+static void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte);
+static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+ struct spglist *free);
+#endif /* 0 */
+static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
+ vm_page_t m, struct rwlock **lockp);
+#if 0
+static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde,
+ pd_entry_t newpde);
+static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde);
+#endif /* 0 */
+
+static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
+ struct rwlock **lockp);
+#if 0
+static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va,
+ struct rwlock **lockp);
+static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va,
+ struct rwlock **lockp);
+#endif /* 0 */
+
+static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
+ struct spglist *free);
+static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
+#if 0
+static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
+
+/*
+ * Move the kernel virtual free pointer to the next
+ * 2MB. This is used to help improve performance
+ * by using a large (2MB) page for much of the kernel
+ * (.text, .data, .bss)
+ */
+static vm_offset_t
+pmap_kmem_choose(vm_offset_t addr)
+{
+ vm_offset_t newaddr = addr;
+
+ newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
+ return (newaddr);
+}
+#endif /* 0 */
+
+/********************/
+/* Inline functions */
+/********************/
+
+static __inline void
+pagecopy(void *s, void *d)
+{
+
+ memcpy(d, s, PAGE_SIZE);
+}
+
+static __inline void
+pagezero(void *p)
+{
+
+ bzero(p, PAGE_SIZE);
+}
#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK)
#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK)
#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK)
-static pd_entry_t *
+static __inline pd_entry_t *
pmap_l1(pmap_t pmap, vm_offset_t va)
{
return (&pmap->pm_l1[pmap_l1_index(va)]);
}
-static pd_entry_t *
+static __inline pd_entry_t *
pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
{
pd_entry_t *l2;
@@ -99,7 +563,7 @@ pmap_l1_to_l2(pd_entry_t *l1, vm_offset_
return (&l2[pmap_l2_index(va)]);
}
-static pd_entry_t *
+static __inline pd_entry_t *
pmap_l2(pmap_t pmap, vm_offset_t va)
{
pd_entry_t *l1;
@@ -111,7 +575,7 @@ pmap_l2(pmap_t pmap, vm_offset_t va)
return (pmap_l1_to_l2(l1, va));
}
-static pt_entry_t *
+static __inline pt_entry_t *
pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
{
pt_entry_t *l3;
@@ -120,7 +584,7 @@ pmap_l2_to_l3(pd_entry_t *l2, vm_offset_
return (&l3[pmap_l3_index(va)]);
}
-static pt_entry_t *
+static __inline pt_entry_t *
pmap_l3(pmap_t pmap, vm_offset_t va)
{
pd_entry_t *l2;
@@ -132,6 +596,199 @@ pmap_l3(pmap_t pmap, vm_offset_t va)
return (pmap_l2_to_l3(l2, va));
}
+static __inline void
+pmap_resident_count_inc(pmap_t pmap, int count)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ pmap->pm_stats.resident_count += count;
+}
+
+static __inline void
+pmap_resident_count_dec(pmap_t pmap, int count)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT(pmap->pm_stats.resident_count >= count,
+ ("pmap %p resident count underflow %ld %d", pmap,
+ pmap->pm_stats.resident_count, count));
+ pmap->pm_stats.resident_count -= count;
+}
+
+#if 0
+PMAP_INLINE pt_entry_t *
+vtopte(vm_offset_t va)
+{
+ u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
+
+ KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopte on a uva/gpa 0x%0lx", va));
+
+ return (PTmap + ((va >> PAGE_SHIFT) & mask));
+}
+
+static __inline pd_entry_t *
+vtopde(vm_offset_t va)
+{
+ u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
+
+ KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopde on a uva/gpa 0x%0lx", va));
+
+ return (PDmap + ((va >> PDRSHIFT) & mask));
+}
+
+CTASSERT(powerof2(NDMPML4E));
+
+/* number of kernel PDP slots */
+#define NKPDPE(ptpgs) howmany((ptpgs), NPDEPG)
+
+static void
+nkpt_init(vm_paddr_t addr)
+{
+ int pt_pages;
+
+#ifdef NKPT
+ pt_pages = NKPT;
+#else
+ pt_pages = howmany(addr, 1 << PDRSHIFT);
+ pt_pages += NKPDPE(pt_pages);
+
+ /*
+ * Add some slop beyond the bare minimum required for bootstrapping
+ * the kernel.
+ *
+ * This is quite important when allocating KVA for kernel modules.
+ * The modules are required to be linked in the negative 2GB of
+ * the address space. If we run out of KVA in this region then
+ * pmap_growkernel() will need to allocate page table pages to map
+ * the entire 512GB of KVA space which is an unnecessary tax on
+ * physical memory.
+ */
+ pt_pages += 8; /* 16MB additional slop for kernel modules */
+#endif
+ nkpt = pt_pages;
+}
+
+static void
+create_pagetables(vm_paddr_t *firstaddr)
+{
+ int i, j, ndm1g, nkpdpe;
+ pt_entry_t *pt_p;
+ pd_entry_t *pd_p;
+ pdp_entry_t *pdp_p;
+ pml4_entry_t *p4_p;
+
+ /* Allocate page table pages for the direct map */
+ ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT;
+ if (ndmpdp < 4) /* Minimum 4GB of dirmap */
+ ndmpdp = 4;
+ ndmpdpphys = howmany(ndmpdp, NPDPEPG);
+ if (ndmpdpphys > NDMPML4E) {
+ /*
+ * Each NDMPML4E allows 512 GB, so limit to that,
+ * and then readjust ndmpdp and ndmpdpphys.
+ */
+ printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512);
+ Maxmem = atop(NDMPML4E * NBPML4);
+ ndmpdpphys = NDMPML4E;
+ ndmpdp = NDMPML4E * NPDEPG;
+ }
+ DMPDPphys = allocpages(firstaddr, ndmpdpphys);
+ ndm1g = 0;
+ if ((amd_feature & AMDID_PAGE1GB) != 0)
+ ndm1g = ptoa(Maxmem) >> PDPSHIFT;
+ if (ndm1g < ndmpdp)
+ DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g);
+ dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT;
+
+ /* Allocate pages */
+ KPML4phys = allocpages(firstaddr, 1);
+ KPDPphys = allocpages(firstaddr, NKPML4E);
+
+ /*
+ * Allocate the initial number of kernel page table pages required to
+ * bootstrap. We defer this until after all memory-size dependent
+ * allocations are done (e.g. direct map), so that we don't have to
+ * build in too much slop in our estimate.
+ *
+ * Note that when NKPML4E > 1, we have an empty page underneath
+ * all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed)
+ * pages. (pmap_enter requires a PD page to exist for each KPML4E.)
+ */
+ nkpt_init(*firstaddr);
+ nkpdpe = NKPDPE(nkpt);
+
+ KPTphys = allocpages(firstaddr, nkpt);
+ KPDphys = allocpages(firstaddr, nkpdpe);
+
+ /* Fill in the underlying page table pages */
+ /* Nominally read-only (but really R/W) from zero to physfree */
+ /* XXX not fully used, underneath 2M pages */
+ pt_p = (pt_entry_t *)KPTphys;
+ for (i = 0; ptoa(i) < *firstaddr; i++)
+ pt_p[i] = ptoa(i) | X86_PG_RW | X86_PG_V | X86_PG_G;
+
+ /* Now map the page tables at their location within PTmap */
+ pd_p = (pd_entry_t *)KPDphys;
+ for (i = 0; i < nkpt; i++)
+ pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V;
+
+ /* Map from zero to end of allocations under 2M pages */
+ /* This replaces some of the KPTphys entries above */
+ for (i = 0; (i << PDRSHIFT) < *firstaddr; i++)
+ pd_p[i] = (i << PDRSHIFT) | X86_PG_RW | X86_PG_V | PG_PS |
+ X86_PG_G;
+
+ /* And connect up the PD to the PDP (leaving room for L4 pages) */
+ pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE));
+ for (i = 0; i < nkpdpe; i++)
+ pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | X86_PG_RW | X86_PG_V |
+ PG_U;
+
+ /*
+ * Now, set up the direct map region using 2MB and/or 1GB pages. If
+ * the end of physical memory is not aligned to a 1GB page boundary,
+ * then the residual physical memory is mapped with 2MB pages. Later,
+ * if pmap_mapdev{_attr}() uses the direct map for non-write-back
+ * memory, pmap_change_attr() will demote any 2MB or 1GB page mappings
+ * that are partially used.
+ */
+ pd_p = (pd_entry_t *)DMPDphys;
+ for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) {
+ pd_p[j] = (vm_paddr_t)i << PDRSHIFT;
+ /* Preset PG_M and PG_A because demotion expects it. */
+ pd_p[j] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G |
+ X86_PG_M | X86_PG_A;
+ }
+ pdp_p = (pdp_entry_t *)DMPDPphys;
+ for (i = 0; i < ndm1g; i++) {
+ pdp_p[i] = (vm_paddr_t)i << PDPSHIFT;
+ /* Preset PG_M and PG_A because demotion expects it. */
+ pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_PS | X86_PG_G |
+ X86_PG_M | X86_PG_A;
+ }
+ for (j = 0; i < ndmpdp; i++, j++) {
+ pdp_p[i] = DMPDphys + ptoa(j);
+ pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_U;
+ }
+
+ /* And recursively map PML4 to itself in order to get PTmap */
+ p4_p = (pml4_entry_t *)KPML4phys;
+ p4_p[PML4PML4I] = KPML4phys;
+ p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | PG_U;
+
+ /* Connect the Direct Map slot(s) up to the PML4. */
+ for (i = 0; i < ndmpdpphys; i++) {
+ p4_p[DMPML4I + i] = DMPDPphys + ptoa(i);
+ p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V | PG_U;
+ }
+
+ /* Connect the KVA slots up to the PML4 */
+ for (i = 0; i < NKPML4E; i++) {
+ p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
+ p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V | PG_U;
+ }
+}
+#endif /* 0 */
static pt_entry_t *
pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
@@ -207,7 +864,7 @@ pmap_bootstrap_l2(vm_offset_t l1pt, vm_o
KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
pa = pmap_early_vtophys(l1pt, l2pt);
- l1[l1_slot] = (pa & ~Ln_TABLE_MASK) | ATTR_AF | L1_TABLE;
+ l1[l1_slot] = (pa & ~Ln_TABLE_MASK) | L1_TABLE;
l2pt += PAGE_SIZE;
}
@@ -237,7 +894,7 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_o
KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
pa = pmap_early_vtophys(l1pt, l3pt);
- l2[l2_slot] = (pa & ~Ln_TABLE_MASK) | ATTR_AF | L2_TABLE;
+ l2[l2_slot] = (pa & ~Ln_TABLE_MASK) | L2_TABLE;
l3pt += PAGE_SIZE;
}
@@ -247,6 +904,9 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_o
return l3pt;
}
+/*
+ * Bootstrap the system enough to run with virtual memory.
+ */
void
pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
{
@@ -268,6 +928,11 @@ pmap_bootstrap(vm_offset_t l1pt, vm_padd
kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
PMAP_LOCK_INIT(kernel_pmap);
+ /*
+ * Initialize the global pv list lock.
+ */
+ rw_init(&pvh_global_lock, "pmap pv global");
+
/* Create a direct map region early so we can use it for pa -> va */
pmap_bootstrap_dmap(l1pt);
@@ -377,7 +1042,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_padd
/* TODO: Move this to a function */
__asm __volatile(
"dsb sy \n"
- "tlbi vmalle1is \n"
+ "tlbi vmalle1 \n"
"dsb sy \n"
"isb \n");
@@ -433,12 +1098,99 @@ pmap_bootstrap(vm_offset_t l1pt, vm_padd
"isb \n");
}
+#if 0
+/*
+ * Setup the PAT MSR.
+ */
+void
+pmap_init_pat(void)
+{
+ int pat_table[PAT_INDEX_SIZE];
+ uint64_t pat_msr;
+ u_long cr0, cr4;
+ int i;
+
+ /* Bail if this CPU doesn't implement PAT. */
+ if ((cpu_feature & CPUID_PAT) == 0)
+ panic("no PAT??");
+
+ /* Set default PAT index table. */
+ for (i = 0; i < PAT_INDEX_SIZE; i++)
+ pat_table[i] = -1;
+ pat_table[PAT_WRITE_BACK] = 0;
+ pat_table[PAT_WRITE_THROUGH] = 1;
+ pat_table[PAT_UNCACHEABLE] = 3;
+ pat_table[PAT_WRITE_COMBINING] = 3;
+ pat_table[PAT_WRITE_PROTECTED] = 3;
+ pat_table[PAT_UNCACHED] = 3;
+
+ /* Initialize default PAT entries. */
+ pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) |
+ PAT_VALUE(1, PAT_WRITE_THROUGH) |
+ PAT_VALUE(2, PAT_UNCACHED) |
+ PAT_VALUE(3, PAT_UNCACHEABLE) |
+ PAT_VALUE(4, PAT_WRITE_BACK) |
+ PAT_VALUE(5, PAT_WRITE_THROUGH) |
+ PAT_VALUE(6, PAT_UNCACHED) |
+ PAT_VALUE(7, PAT_UNCACHEABLE);
+
+ if (pat_works) {
+ /*
+ * Leave the indices 0-3 at the default of WB, WT, UC-, and UC.
+ * Program 5 and 6 as WP and WC.
+ * Leave 4 and 7 as WB and UC.
+ */
+ pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6));
+ pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) |
+ PAT_VALUE(6, PAT_WRITE_COMBINING);
+ pat_table[PAT_UNCACHED] = 2;
+ pat_table[PAT_WRITE_PROTECTED] = 5;
+ pat_table[PAT_WRITE_COMBINING] = 6;
+ } else {
+ /*
+ * Just replace PAT Index 2 with WC instead of UC-.
+ */
+ pat_msr &= ~PAT_MASK(2);
+ pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
+ pat_table[PAT_WRITE_COMBINING] = 2;
+ }
+
+ /* Disable PGE. */
+ cr4 = rcr4();
+ load_cr4(cr4 & ~CR4_PGE);
+
+ /* Disable caches (CD = 1, NW = 0). */
+ cr0 = rcr0();
+ load_cr0((cr0 & ~CR0_NW) | CR0_CD);
+
+ /* Flushes caches and TLBs. */
+ wbinvd();
+ invltlb();
+
+ /* Update PAT and index table. */
+ wrmsr(MSR_PAT, pat_msr);
+ for (i = 0; i < PAT_INDEX_SIZE; i++)
+ pat_index[i] = pat_table[i];
+
+ /* Flush caches and TLBs again. */
+ wbinvd();
+ invltlb();
+
+ /* Restore caches and PGE. */
+ load_cr0(cr0);
+ load_cr4(cr4);
+}
+#endif /* 0 */
+
/*
- * Initialize a vm_page's machine-dependent fields.
+ * Initialize a vm_page's machine-dependent fields.
*/
void
pmap_page_init(vm_page_t m)
{
+
+ TAILQ_INIT(&m->md.pv_list);
+ m->md.pv_memattr = VM_MEMATTR_UNCACHEABLE;
}
/*
@@ -449,57 +1201,463 @@ pmap_page_init(vm_page_t m)
void
pmap_init(void)
{
+ //vm_page_t mpte;
+ //vm_size_t s;
+ int i;//, pv_npg;
+
+#if 0
+ /*
+ * Initialize the vm page array entries for the kernel pmap's
+ * page table pages.
+ */
+ for (i = 0; i < nkpt; i++) {
+ mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT));
+ KASSERT(mpte >= vm_page_array &&
+ mpte < &vm_page_array[vm_page_array_size],
+ ("pmap_init: page table page is out of range"));
+ mpte->pindex = pmap_pde_pindex(KERNBASE) + i;
+ mpte->phys_addr = KPTphys + (i << PAGE_SHIFT);
+ }
+
+ /*
+ * If the kernel is running on a virtual machine, then it must assume
+ * that MCA is enabled by the hypervisor. Moreover, the kernel must
+ * be prepared for the hypervisor changing the vendor and family that
+ * are reported by CPUID. Consequently, the workaround for AMD Family
+ * 10h Erratum 383 is enabled if the processor's feature set does not
+ * include at least one feature that is only supported by older Intel
+ * or newer AMD processors.
+ */
+ if (vm_guest == VM_GUEST_VM && (cpu_feature & CPUID_SS) == 0 &&
+ (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI |
+ CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP |
+ AMDID2_FMA4)) == 0)
+ workaround_erratum383 = 1;
+
+ /*
+ * Are large page mappings enabled?
+ */
+ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled);
+ if (pg_ps_enabled) {
+ KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0,
+ ("pmap_init: can't assign to pagesizes[1]"));
+ pagesizes[1] = NBPDR;
+ }
+#endif
+
+ /*
+ * Initialize the pv chunk list mutex.
+ */
+ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
+
+ /*
+ * Initialize the pool of pv list locks.
+ */
+ for (i = 0; i < NPV_LIST_LOCKS; i++)
+ rw_init(&pv_list_locks[i], "pmap pv list");
+
+#if 0
+ /*
+ * Calculate the size of the pv head table for superpages.
+ */
+ for (i = 0; phys_avail[i + 1]; i += 2);
+ pv_npg = round_2mpage(phys_avail[(i - 2) + 1]) / NBPDR;
+
+ /*
+ * Allocate memory for the pv head table for superpages.
+ */
+ s = (vm_size_t)(pv_npg * sizeof(struct md_page));
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-projects
mailing list