svn commit: r348644 - in stable/12/sys: riscv/include riscv/riscv vm
Mark Johnston
markj at FreeBSD.org
Tue Jun 4 17:31:07 UTC 2019
Author: markj
Date: Tue Jun 4 17:31:05 2019
New Revision: 348644
URL: https://svnweb.freebsd.org/changeset/base/348644
Log:
MFC r344106:
Implement transparent 2MB superpage promotion for RISC-V.
Modified:
stable/12/sys/riscv/include/param.h
stable/12/sys/riscv/include/pmap.h
stable/12/sys/riscv/include/pte.h
stable/12/sys/riscv/include/vmparam.h
stable/12/sys/riscv/riscv/pmap.c
stable/12/sys/vm/vm_fault.c
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/sys/riscv/include/param.h
==============================================================================
--- stable/12/sys/riscv/include/param.h Tue Jun 4 17:30:22 2019 (r348643)
+++ stable/12/sys/riscv/include/param.h Tue Jun 4 17:31:05 2019 (r348644)
@@ -82,7 +82,7 @@
#define PAGE_SIZE (1 << PAGE_SHIFT) /* Page size */
#define PAGE_MASK (PAGE_SIZE - 1)
-#define MAXPAGESIZES 1 /* maximum number of supported page sizes */
+#define MAXPAGESIZES 3 /* maximum number of supported page sizes */
#ifndef KSTACK_PAGES
#define KSTACK_PAGES 4 /* pages of kernel stack (with pcb) */
Modified: stable/12/sys/riscv/include/pmap.h
==============================================================================
--- stable/12/sys/riscv/include/pmap.h Tue Jun 4 17:30:22 2019 (r348643)
+++ stable/12/sys/riscv/include/pmap.h Tue Jun 4 17:31:05 2019 (r348644)
@@ -44,6 +44,8 @@
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <vm/_vm_radix.h>
+
#ifdef _KERNEL
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
@@ -80,6 +82,7 @@ struct pmap {
pd_entry_t *pm_l1;
TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */
LIST_ENTRY(pmap) pm_list; /* List of all pmaps */
+ struct vm_radix pm_root;
};
typedef struct pv_entry {
@@ -139,6 +142,7 @@ void pmap_kenter_device(vm_offset_t, vm_size_t, vm_pad
vm_paddr_t pmap_kextract(vm_offset_t va);
void pmap_kremove(vm_offset_t);
void pmap_kremove_device(vm_offset_t, vm_size_t);
+bool pmap_ps_enabled(pmap_t);
void *pmap_mapdev(vm_offset_t, vm_size_t);
void *pmap_mapbios(vm_paddr_t, vm_size_t);
Modified: stable/12/sys/riscv/include/pte.h
==============================================================================
--- stable/12/sys/riscv/include/pte.h Tue Jun 4 17:30:22 2019 (r348643)
+++ stable/12/sys/riscv/include/pte.h Tue Jun 4 17:31:05 2019 (r348644)
@@ -62,7 +62,8 @@ typedef uint64_t pn_t; /* page number */
#define L3_SIZE (1 << L3_SHIFT)
#define L3_OFFSET (L3_SIZE - 1)
-#define Ln_ENTRIES (1 << 9)
+#define Ln_ENTRIES_SHIFT 9
+#define Ln_ENTRIES (1 << Ln_ENTRIES_SHIFT)
#define Ln_ADDR_MASK (Ln_ENTRIES - 1)
/* Bits 9:8 are reserved for software */
@@ -79,6 +80,8 @@ typedef uint64_t pn_t; /* page number */
#define PTE_RWX (PTE_R | PTE_W | PTE_X)
#define PTE_RX (PTE_R | PTE_X)
#define PTE_KERN (PTE_V | PTE_R | PTE_W | PTE_A | PTE_D)
+#define PTE_PROMOTE (PTE_V | PTE_RWX | PTE_D | PTE_A | PTE_G | PTE_U | \
+ PTE_SW_MANAGED | PTE_SW_WIRED)
#define PTE_PPN0_S 10
#define PTE_PPN1_S 19
Modified: stable/12/sys/riscv/include/vmparam.h
==============================================================================
--- stable/12/sys/riscv/include/vmparam.h Tue Jun 4 17:30:22 2019 (r348643)
+++ stable/12/sys/riscv/include/vmparam.h Tue Jun 4 17:31:05 2019 (r348644)
@@ -99,10 +99,10 @@
#define VM_NFREEORDER 12
/*
- * Disable superpage reservations.
+ * Enable superpage reservations: 1 level.
*/
#ifndef VM_NRESERVLEVEL
-#define VM_NRESERVLEVEL 0
+#define VM_NRESERVLEVEL 1
#endif
/*
Modified: stable/12/sys/riscv/riscv/pmap.c
==============================================================================
--- stable/12/sys/riscv/riscv/pmap.c Tue Jun 4 17:30:22 2019 (r348643)
+++ stable/12/sys/riscv/riscv/pmap.c Tue Jun 4 17:31:05 2019 (r348644)
@@ -118,6 +118,7 @@ __FBSDID("$FreeBSD$");
*/
#include <sys/param.h>
+#include <sys/bitstring.h>
#include <sys/bus.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -145,6 +146,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_extern.h>
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
#include <vm/vm_radix.h>
#include <vm/vm_reserv.h>
#include <vm/uma.h>
@@ -154,9 +156,8 @@ __FBSDID("$FreeBSD$");
#include <machine/pcb.h>
#include <machine/sbi.h>
-#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
-#define NUPDE (NPDEPG * NPDEPG)
-#define NUSERPGTBLS (NUPDE + NPDEPG)
+#define NUL1E (Ln_ENTRIES * Ln_ENTRIES)
+#define NUL2E (Ln_ENTRIES * NUL1E)
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
@@ -175,11 +176,12 @@ __FBSDID("$FreeBSD$");
#endif
#define pmap_l2_pindex(v) ((v) >> L2_SHIFT)
+#define pa_to_pvh(pa) (&pv_table[pa_index(pa)])
#define NPV_LIST_LOCKS MAXCPU
#define PHYS_TO_PV_LIST_LOCK(pa) \
- (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
+ (&pv_list_locks[pmap_l2_pindex(pa) % NPV_LIST_LOCKS])
#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \
struct rwlock **_lockp = (lockp); \
@@ -230,13 +232,52 @@ CTASSERT((DMAP_MAX_ADDRESS & ~L1_OFFSET) == DMAP_MAX_
static struct rwlock_padalign pvh_global_lock;
static struct mtx_padalign allpmaps_lock;
+static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0,
+ "VM/pmap parameters");
+
+static int superpages_enabled = 1;
+SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
+ CTLFLAG_RDTUN, &superpages_enabled, 0,
+ "Enable support for transparent superpages");
+
+static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0,
+ "2MB page mapping counters");
+
+static u_long pmap_l2_demotions;
+SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD,
+ &pmap_l2_demotions, 0,
+ "2MB page demotions");
+
+static u_long pmap_l2_mappings;
+SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD,
+ &pmap_l2_mappings, 0,
+ "2MB page mappings");
+
+static u_long pmap_l2_p_failures;
+SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD,
+ &pmap_l2_p_failures, 0,
+ "2MB page promotion failures");
+
+static u_long pmap_l2_promotions;
+SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD,
+ &pmap_l2_promotions, 0,
+ "2MB page promotions");
+
/*
* Data for the pv entry allocation mechanism
*/
static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
static struct mtx pv_chunks_mutex;
static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
+static struct md_page *pv_table;
+static struct md_page pv_dummy;
+/*
+ * Internal flags for pmap_enter()'s helper functions.
+ */
+#define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */
+#define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */
+
static void free_pv_chunk(struct pv_chunk *pc);
static void free_pv_entry(pmap_t pmap, pv_entry_t pv);
static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
@@ -244,6 +285,11 @@ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap,
static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
vm_offset_t va);
+static bool pmap_demote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va);
+static bool pmap_demote_l2_locked(pmap_t pmap, pd_entry_t *l2,
+ vm_offset_t va, struct rwlock **lockp);
+static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2,
+ u_int flags, vm_page_t m, struct rwlock **lockp);
static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
@@ -254,9 +300,9 @@ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap,
static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
struct rwlock **lockp);
-static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
+static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m,
struct spglist *free);
-static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
+static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
#define pmap_clear(pte) pmap_store(pte, 0)
#define pmap_clear_bits(pte, bits) atomic_clear_64(pte, bits)
@@ -636,7 +682,8 @@ pmap_page_init(vm_page_t m)
void
pmap_init(void)
{
- int i;
+ vm_size_t s;
+ int i, pv_npg;
/*
* Initialize the pv chunk and pmap list mutexes.
@@ -649,6 +696,24 @@ pmap_init(void)
*/
for (i = 0; i < NPV_LIST_LOCKS; i++)
rw_init(&pv_list_locks[i], "pmap pv list");
+
+ /*
+ * Calculate the size of the pv head table for superpages.
+ */
+ pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE);
+
+ /*
+ * Allocate memory for the pv head table for superpages.
+ */
+ s = (vm_size_t)(pv_npg * sizeof(struct md_page));
+ s = round_page(s);
+ pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO);
+ for (i = 0; i < pv_npg; i++)
+ TAILQ_INIT(&pv_table[i].pv_list);
+ TAILQ_INIT(&pv_dummy.pv_list);
+
+ if (superpages_enabled)
+ pagesizes[1] = L2_SIZE;
}
#ifdef SMP
@@ -999,6 +1064,13 @@ pmap_qremove(vm_offset_t sva, int count)
pmap_invalidate_range(kernel_pmap, sva, va);
}
+bool
+pmap_ps_enabled(pmap_t pmap __unused)
+{
+
+ return (superpages_enabled);
+}
+
/***************************************************
* Page table page management routines.....
***************************************************/
@@ -1018,6 +1090,34 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist
m->flags &= ~PG_ZERO;
SLIST_INSERT_HEAD(free, m, plinks.s.ss);
}
+
+/*
+ * Inserts the specified page table page into the specified pmap's collection
+ * of idle page table pages. Each of a pmap's page table pages is responsible
+ * for mapping a distinct range of virtual addresses. The pmap's collection is
+ * ordered by this virtual address range.
+ */
+static __inline int
+pmap_insert_pt_page(pmap_t pmap, vm_page_t ml3)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ return (vm_radix_insert(&pmap->pm_root, ml3));
+}
+
+/*
+ * Removes the page table page mapping the specified virtual address from the
+ * specified pmap's collection of idle page table pages, and returns it.
+ * Otherwise, returns NULL if there is no page table page corresponding to the
+ * specified virtual address.
+ */
+static __inline vm_page_t
+pmap_remove_pt_page(pmap_t pmap, vm_offset_t va)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va)));
+}
/*
* Decrements a page table page's wire count, which is used to record the
@@ -1026,12 +1126,12 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist
* page table page was unmapped and FALSE otherwise.
*/
static inline boolean_t
-pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
+pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
{
--m->wire_count;
if (m->wire_count == 0) {
- _pmap_unwire_l3(pmap, va, m, free);
+ _pmap_unwire_ptp(pmap, va, m, free);
return (TRUE);
} else {
return (FALSE);
@@ -1039,36 +1139,30 @@ pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t
}
static void
-_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
+_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
{
vm_paddr_t phys;
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
- /*
- * unmap the page table page
- */
- if (m->pindex >= NUPDE) {
- /* PD page */
+ if (m->pindex >= NUL1E) {
pd_entry_t *l1;
l1 = pmap_l1(pmap, va);
pmap_clear(l1);
pmap_distribute_l1(pmap, pmap_l1_index(va), 0);
} else {
- /* PTE page */
pd_entry_t *l2;
l2 = pmap_l2(pmap, va);
pmap_clear(l2);
}
pmap_resident_count_dec(pmap, 1);
- if (m->pindex < NUPDE) {
+ if (m->pindex < NUL1E) {
pd_entry_t *l1;
- /* We just released a PT, unhold the matching PD */
vm_page_t pdpg;
l1 = pmap_l1(pmap, va);
phys = PTE_TO_PHYS(pmap_load(l1));
pdpg = PHYS_TO_VM_PAGE(phys);
- pmap_unwire_l3(pmap, va, pdpg, free);
+ pmap_unwire_ptp(pmap, va, pdpg, free);
}
pmap_invalidate_page(pmap, va);
@@ -1082,24 +1176,20 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t
}
/*
- * After removing an l3 entry, this routine is used to
+ * After removing a page table entry, this routine is used to
* conditionally free the page, and manage the hold/wire counts.
*/
static int
-pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
+pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
struct spglist *free)
{
- vm_paddr_t phys;
vm_page_t mpte;
if (va >= VM_MAXUSER_ADDRESS)
return (0);
KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
-
- phys = PTE_TO_PHYS(ptepde);
-
- mpte = PHYS_TO_VM_PAGE(phys);
- return (pmap_unwire_l3(pmap, va, mpte, free));
+ mpte = PHYS_TO_VM_PAGE(PTE_TO_PHYS(ptepde));
+ return (pmap_unwire_ptp(pmap, va, mpte, free));
}
void
@@ -1140,6 +1230,8 @@ pmap_pinit(pmap_t pmap)
LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
mtx_unlock(&allpmaps_lock);
+ vm_radix_init(&pmap->pm_root);
+
return (1);
}
@@ -1193,11 +1285,11 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, str
* it isn't already there.
*/
- if (ptepindex >= NUPDE) {
+ if (ptepindex >= NUL1E) {
pd_entry_t *l1;
vm_pindex_t l1index;
- l1index = ptepindex - NUPDE;
+ l1index = ptepindex - NUL1E;
l1 = &pmap->pm_l1[l1index];
pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE);
@@ -1213,7 +1305,7 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, str
l1 = &pmap->pm_l1[l1index];
if (pmap_load(l1) == 0) {
/* recurse for allocating page dir */
- if (_pmap_alloc_l3(pmap, NUPDE + l1index,
+ if (_pmap_alloc_l3(pmap, NUL1E + l1index,
lockp) == NULL) {
vm_page_unwire_noq(m);
vm_page_free_zero(m);
@@ -1241,6 +1333,29 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, str
}
static vm_page_t
+pmap_alloc_l2(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
+{
+ pd_entry_t *l1;
+ vm_page_t l2pg;
+ vm_pindex_t l2pindex;
+
+retry:
+ l1 = pmap_l1(pmap, va);
+ if (l1 != NULL && (pmap_load(l1) & PTE_RWX) == 0) {
+ /* Add a reference to the L2 page. */
+ l2pg = PHYS_TO_VM_PAGE(PTE_TO_PHYS(pmap_load(l1)));
+ l2pg->wire_count++;
+ } else {
+ /* Allocate a L2 page. */
+ l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT;
+ l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp);
+ if (l2pg == NULL && lockp != NULL)
+ goto retry;
+ }
+ return (l2pg);
+}
+
+static vm_page_t
pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
{
vm_pindex_t ptepindex;
@@ -1599,6 +1714,79 @@ retry:
}
/*
+ * Ensure that the number of spare PV entries in the specified pmap meets or
+ * exceeds the given count, "needed".
+ *
+ * The given PV list lock may be released.
+ */
+static void
+reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp)
+{
+ struct pch new_tail;
+ struct pv_chunk *pc;
+ vm_page_t m;
+ int avail, free;
+ bool reclaimed;
+
+ rw_assert(&pvh_global_lock, RA_LOCKED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL"));
+
+ /*
+ * Newly allocated PV chunks must be stored in a private list until
+ * the required number of PV chunks have been allocated. Otherwise,
+ * reclaim_pv_chunk() could recycle one of these chunks. In
+ * contrast, these chunks must be added to the pmap upon allocation.
+ */
+ TAILQ_INIT(&new_tail);
+retry:
+ avail = 0;
+ TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) {
+ bit_count((bitstr_t *)pc->pc_map, 0,
+ sizeof(pc->pc_map) * NBBY, &free);
+ if (free == 0)
+ break;
+ avail += free;
+ if (avail >= needed)
+ break;
+ }
+ for (reclaimed = false; avail < needed; avail += _NPCPV) {
+ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
+ VM_ALLOC_WIRED);
+ if (m == NULL) {
+ m = reclaim_pv_chunk(pmap, lockp);
+ if (m == NULL)
+ goto retry;
+ reclaimed = true;
+ }
+ /* XXX PV STATS */
+#if 0
+ dump_add_page(m->phys_addr);
+#endif
+ pc = (void *)PHYS_TO_DMAP(m->phys_addr);
+ pc->pc_pmap = pmap;
+ pc->pc_map[0] = PC_FREE0;
+ pc->pc_map[1] = PC_FREE1;
+ pc->pc_map[2] = PC_FREE2;
+ TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru);
+
+ /*
+ * The reclaim might have freed a chunk from the current pmap.
+ * If that chunk contained available entries, we need to
+ * re-count the number of available entries.
+ */
+ if (reclaimed)
+ goto retry;
+ }
+ if (!TAILQ_EMPTY(&new_tail)) {
+ mtx_lock(&pv_chunks_mutex);
+ TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru);
+ mtx_unlock(&pv_chunks_mutex);
+ }
+}
+
+/*
* First find and then remove the pv entry for the specified pmap and virtual
* address from the specified pv list. Returns the pv entry if found and NULL
* otherwise. This operation can be performed on pv lists for either 4KB or
@@ -1632,7 +1820,7 @@ pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_off
pv = pmap_pvh_remove(pvh, pmap, va);
- KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
+ KASSERT(pv != NULL, ("pmap_pvh_free: pv not found for %#lx", va));
free_pv_entry(pmap, pv);
}
@@ -1660,6 +1848,222 @@ pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
}
/*
+ * After demotion from a 2MB page mapping to 512 4KB page mappings,
+ * destroy the pv entry for the 2MB page mapping and reinstantiate the pv
+ * entries for each of the 4KB page mappings.
+ */
+static void __unused
+pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp)
+{
+ struct md_page *pvh;
+ struct pv_chunk *pc;
+ pv_entry_t pv;
+ vm_page_t m;
+ vm_offset_t va_last;
+ int bit, field;
+
+ rw_assert(&pvh_global_lock, RA_LOCKED);
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
+
+ /*
+ * Transfer the 2mpage's pv entry for this mapping to the first
+ * page's pv list. Once this transfer begins, the pv list lock
+ * must not be released until the last pv entry is reinstantiated.
+ */
+ pvh = pa_to_pvh(pa);
+ va &= ~L2_OFFSET;
+ pv = pmap_pvh_remove(pvh, pmap, va);
+ KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found"));
+ m = PHYS_TO_VM_PAGE(pa);
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+ m->md.pv_gen++;
+ /* Instantiate the remaining 511 pv entries. */
+ va_last = va + L2_SIZE - PAGE_SIZE;
+ for (;;) {
+ pc = TAILQ_FIRST(&pmap->pm_pvchunk);
+ KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 ||
+ pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare"));
+ for (field = 0; field < _NPCM; field++) {
+ while (pc->pc_map[field] != 0) {
+ bit = ffsl(pc->pc_map[field]) - 1;
+ pc->pc_map[field] &= ~(1ul << bit);
+ pv = &pc->pc_pventry[field * 64 + bit];
+ va += PAGE_SIZE;
+ pv->pv_va = va;
+ m++;
+ KASSERT((m->oflags & VPO_UNMANAGED) == 0,
+ ("pmap_pv_demote_l2: page %p is not managed", m));
+ TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
+ m->md.pv_gen++;
+ if (va == va_last)
+ goto out;
+ }
+ }
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+ }
+out:
+ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) {
+ TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
+ TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list);
+ }
+ /* XXX PV stats */
+}
+
+#if VM_NRESERVLEVEL > 0
+static void
+pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa,
+ struct rwlock **lockp)
+{
+ struct md_page *pvh;
+ pv_entry_t pv;
+ vm_page_t m;
+ vm_offset_t va_last;
+
+ rw_assert(&pvh_global_lock, RA_LOCKED);
+ KASSERT((va & L2_OFFSET) == 0,
+ ("pmap_pv_promote_l2: misaligned va %#lx", va));
+
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
+
+ m = PHYS_TO_VM_PAGE(pa);
+ pv = pmap_pvh_remove(&m->md, pmap, va);
+ KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv for %#lx not found", va));
+ pvh = pa_to_pvh(pa);
+ TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
+ pvh->pv_gen++;
+
+ va_last = va + L2_SIZE - PAGE_SIZE;
+ do {
+ m++;
+ va += PAGE_SIZE;
+ pmap_pvh_free(&m->md, pmap, va);
+ } while (va < va_last);
+}
+#endif /* VM_NRESERVLEVEL > 0 */
+
+/*
+ * Create the PV entry for a 2MB page mapping. Always returns true unless the
+ * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns
+ * false if the PV entry cannot be allocated without resorting to reclamation.
+ */
+static bool
+pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags,
+ struct rwlock **lockp)
+{
+ struct md_page *pvh;
+ pv_entry_t pv;
+ vm_paddr_t pa;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ /* Pass NULL instead of the lock pointer to disable reclamation. */
+ if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ?
+ NULL : lockp)) == NULL)
+ return (false);
+ pv->pv_va = va;
+ pa = PTE_TO_PHYS(l2e);
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa);
+ pvh = pa_to_pvh(pa);
+ TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next);
+ pvh->pv_gen++;
+ return (true);
+}
+
+static void
+pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va)
+{
+ pt_entry_t newl2, oldl2;
+ vm_page_t ml3;
+ vm_paddr_t ml3pa;
+
+ KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va));
+ KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap));
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+ ml3 = pmap_remove_pt_page(pmap, va);
+ if (ml3 == NULL)
+ panic("pmap_remove_kernel_l2: Missing pt page");
+
+ ml3pa = VM_PAGE_TO_PHYS(ml3);
+ newl2 = ml3pa | PTE_V;
+
+ /*
+ * Initialize the page table page.
+ */
+ pagezero((void *)PHYS_TO_DMAP(ml3pa));
+
+ /*
+ * Demote the mapping.
+ */
+ oldl2 = pmap_load_store(l2, newl2);
+ KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx",
+ __func__, l2, oldl2));
+}
+
+/*
+ * pmap_remove_l2: Do the things to unmap a level 2 superpage.
+ */
+static int
+pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva,
+ pd_entry_t l1e, struct spglist *free, struct rwlock **lockp)
+{
+ struct md_page *pvh;
+ pt_entry_t oldl2;
+ vm_offset_t eva, va;
+ vm_page_t m, ml3;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned"));
+ oldl2 = pmap_load_clear(l2);
+ KASSERT((oldl2 & PTE_RWX) != 0,
+ ("pmap_remove_l2: L2e %lx is not a superpage mapping", oldl2));
+
+ /*
+ * The sfence.vma documentation states that it is sufficient to specify
+ * a single address within a superpage mapping. However, since we do
+ * not perform any invalidation upon promotion, TLBs may still be
+ * caching 4KB mappings within the superpage, so we must invalidate the
+ * entire range.
+ */
+ pmap_invalidate_range(pmap, sva, sva + L2_SIZE);
+ if ((oldl2 & PTE_SW_WIRED) != 0)
+ pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE;
+ pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE);
+ if ((oldl2 & PTE_SW_MANAGED) != 0) {
+ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, PTE_TO_PHYS(oldl2));
+ pvh = pa_to_pvh(PTE_TO_PHYS(oldl2));
+ pmap_pvh_free(pvh, pmap, sva);
+ eva = sva + L2_SIZE;
+ for (va = sva, m = PHYS_TO_VM_PAGE(PTE_TO_PHYS(oldl2));
+ va < eva; va += PAGE_SIZE, m++) {
+ if ((oldl2 & PTE_D) != 0)
+ vm_page_dirty(m);
+ if ((oldl2 & PTE_A) != 0)
+ vm_page_aflag_set(m, PGA_REFERENCED);
+ if (TAILQ_EMPTY(&m->md.pv_list) &&
+ TAILQ_EMPTY(&pvh->pv_list))
+ vm_page_aflag_clear(m, PGA_WRITEABLE);
+ }
+ }
+ if (pmap == kernel_pmap) {
+ pmap_remove_kernel_l2(pmap, l2, sva);
+ } else {
+ ml3 = pmap_remove_pt_page(pmap, sva);
+ if (ml3 != NULL) {
+ pmap_resident_count_dec(pmap, 1);
+ KASSERT(ml3->wire_count == Ln_ENTRIES,
+ ("pmap_remove_l2: l3 page wire count error"));
+ ml3->wire_count = 1;
+ vm_page_unwire_noq(ml3);
+ pmap_add_delayed_free_list(ml3, free, FALSE);
+ }
+ }
+ return (pmap_unuse_pt(pmap, sva, l1e, free));
+}
+
+/*
* pmap_remove_l3: do the things to unmap a page in a process
*/
static int
@@ -1687,7 +2091,7 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_
pmap_pvh_free(&m->md, pmap, va);
}
- return (pmap_unuse_l3(pmap, va, l2e, free));
+ return (pmap_unuse_pt(pmap, va, l2e, free));
}
/*
@@ -1699,11 +2103,11 @@ pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_
void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
+ struct spglist free;
struct rwlock *lock;
vm_offset_t va, va_next;
- pd_entry_t *l1, *l2;
- pt_entry_t l3_pte, *l3;
- struct spglist free;
+ pd_entry_t *l1, *l2, l2e;
+ pt_entry_t *l3;
/*
* Perform an unsynchronized read. This is, however, safe.
@@ -1739,16 +2143,22 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t
l2 = pmap_l1_to_l2(l1, sva);
if (l2 == NULL)
continue;
-
- l3_pte = pmap_load(l2);
-
- /*
- * Weed out invalid mappings.
- */
- if (l3_pte == 0)
+ if ((l2e = pmap_load(l2)) == 0)
continue;
- if ((pmap_load(l2) & PTE_RX) != 0)
- continue;
+ if ((l2e & PTE_RWX) != 0) {
+ if (sva + L2_SIZE == va_next && eva >= va_next) {
+ (void)pmap_remove_l2(pmap, l2, sva,
+ pmap_load(l1), &free, &lock);
+ continue;
+ } else if (!pmap_demote_l2_locked(pmap, l2, sva,
+ &lock)) {
+ /*
+ * The large page mapping was destroyed.
+ */
+ continue;
+ }
+ l2e = pmap_load(l2);
+ }
/*
* Limit our scan to either the end of the va represented
@@ -1761,8 +2171,6 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t
va = va_next;
for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
sva += L3_SIZE) {
- if (l3 == NULL)
- panic("l3 == NULL");
if (pmap_load(l3) == 0) {
if (va != va_next) {
pmap_invalidate_range(pmap, va, sva);
@@ -1772,8 +2180,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t
}
if (va == va_next)
va = sva;
- if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free,
- &lock)) {
+ if (pmap_remove_l3(pmap, l3, sva, l2e, &free, &lock)) {
sva += L3_SIZE;
break;
}
@@ -1783,7 +2190,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t
}
if (lock != NULL)
rw_wunlock(lock);
- rw_runlock(&pvh_global_lock);
+ rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, false);
}
@@ -1804,42 +2211,54 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t
void
pmap_remove_all(vm_page_t m)
{
- pv_entry_t pv;
- pmap_t pmap;
- pt_entry_t *l3, tl3;
- pd_entry_t *l2, tl2;
struct spglist free;
+ struct md_page *pvh;
+ pmap_t pmap;
+ pt_entry_t *l3, l3e;
+ pd_entry_t *l2, l2e;
+ pv_entry_t pv;
+ vm_offset_t va;
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("pmap_remove_all: page %p is not managed", m));
SLIST_INIT(&free);
+ pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy :
+ pa_to_pvh(VM_PAGE_TO_PHYS(m));
+
rw_wlock(&pvh_global_lock);
+ while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) {
+ pmap = PV_PMAP(pv);
+ PMAP_LOCK(pmap);
+ va = pv->pv_va;
+ l2 = pmap_l2(pmap, va);
+ (void)pmap_demote_l2(pmap, l2, va);
+ PMAP_UNLOCK(pmap);
+ }
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pmap = PV_PMAP(pv);
PMAP_LOCK(pmap);
pmap_resident_count_dec(pmap, 1);
l2 = pmap_l2(pmap, pv->pv_va);
KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found"));
- tl2 = pmap_load(l2);
+ l2e = pmap_load(l2);
- KASSERT((tl2 & PTE_RX) == 0,
- ("pmap_remove_all: found a table when expecting "
- "a block in %p's pv list", m));
+ KASSERT((l2e & PTE_RX) == 0,
+ ("pmap_remove_all: found a superpage in %p's pv list", m));
l3 = pmap_l2_to_l3(l2, pv->pv_va);
- tl3 = pmap_load_clear(l3);
+ l3e = pmap_load_clear(l3);
pmap_invalidate_page(pmap, pv->pv_va);
- if (tl3 & PTE_SW_WIRED)
+ if (l3e & PTE_SW_WIRED)
pmap->pm_stats.wired_count--;
- if ((tl3 & PTE_A) != 0)
+ if ((l3e & PTE_A) != 0)
vm_page_aflag_set(m, PGA_REFERENCED);
/*
* Update the vm_page_t clean and reference bits.
*/
- if ((tl3 & PTE_D) != 0)
+ if ((l3e & PTE_D) != 0)
vm_page_dirty(m);
- pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free);
+ pmap_unuse_pt(pmap, pv->pv_va, pmap_load(l2), &free);
TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
m->md.pv_gen++;
free_pv_entry(pmap, pv);
@@ -1857,10 +2276,12 @@ pmap_remove_all(vm_page_t m)
void
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
{
- pd_entry_t *l1, *l2;
+ pd_entry_t *l1, *l2, l2e;
pt_entry_t *l3, l3e, mask;
vm_page_t m;
- vm_offset_t va_next;
+ vm_paddr_t pa;
+ vm_offset_t va, va_next;
+ bool anychanged, pv_lists_locked;
if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
pmap_remove(pmap, sva, eva);
@@ -1871,12 +2292,14 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
(VM_PROT_WRITE | VM_PROT_EXECUTE))
return;
+ anychanged = false;
+ pv_lists_locked = false;
mask = 0;
if ((prot & VM_PROT_WRITE) == 0)
mask |= PTE_W | PTE_D;
if ((prot & VM_PROT_EXECUTE) == 0)
mask |= PTE_X;
-
+resume:
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
l1 = pmap_l1(pmap, sva);
@@ -1892,10 +2315,41 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
va_next = eva;
l2 = pmap_l1_to_l2(l1, sva);
- if (l2 == NULL || pmap_load(l2) == 0)
+ if (l2 == NULL || (l2e = pmap_load(l2)) == 0)
continue;
- if ((pmap_load(l2) & PTE_RX) != 0)
- continue;
+ if ((l2e & PTE_RWX) != 0) {
+ if (sva + L2_SIZE == va_next && eva >= va_next) {
+retryl2:
+ if ((l2e & (PTE_SW_MANAGED | PTE_D)) ==
+ (PTE_SW_MANAGED | PTE_D)) {
+ pa = PTE_TO_PHYS(l2e);
+ for (va = sva, m = PHYS_TO_VM_PAGE(pa);
+ va < va_next; m++, va += PAGE_SIZE)
+ vm_page_dirty(m);
+ }
+ if (!atomic_fcmpset_long(l2, &l2e, l2e & ~mask))
+ goto retryl2;
+ anychanged = true;
+ } else {
+ if (!pv_lists_locked) {
+ pv_lists_locked = true;
+ if (!rw_try_rlock(&pvh_global_lock)) {
+ if (anychanged)
+ pmap_invalidate_all(
+ pmap);
+ PMAP_UNLOCK(pmap);
+ rw_rlock(&pvh_global_lock);
+ goto resume;
+ }
+ }
+ if (!pmap_demote_l2(pmap, l2, sva)) {
+ /*
+ * The large page mapping was destroyed.
+ */
+ continue;
+ }
+ }
+ }
if (va_next > eva)
va_next = eva;
@@ -1903,7 +2357,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
sva += L3_SIZE) {
l3e = pmap_load(l3);
-retry:
+retryl3:
if ((l3e & PTE_V) == 0)
continue;
if ((prot & VM_PROT_WRITE) == 0 &&
@@ -1913,60 +2367,236 @@ retry:
vm_page_dirty(m);
}
if (!atomic_fcmpset_long(l3, &l3e, l3e & ~mask))
- goto retry;
- /* XXX: Use pmap_invalidate_range */
- pmap_invalidate_page(pmap, sva);
+ goto retryl3;
+ anychanged = true;
}
}
+ if (anychanged)
+ pmap_invalidate_all(pmap);
+ if (pv_lists_locked)
+ rw_runlock(&pvh_global_lock);
PMAP_UNLOCK(pmap);
}
int
pmap_fault_fixup(pmap_t pmap, vm_offset_t va, vm_prot_t ftype)
{
- pt_entry_t orig_l3;
- pt_entry_t new_l3;
- pt_entry_t *l3;
+ pd_entry_t *l2, l2e;
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-12
mailing list