svn commit: r350365 - in stable/12/sys: amd64/amd64 amd64/include sys
Konstantin Belousov
kib at FreeBSD.org
Fri Jul 26 19:35:35 UTC 2019
Author: kib
Date: Fri Jul 26 19:35:33 2019
New Revision: 350365
URL: https://svnweb.freebsd.org/changeset/base/350365
Log:
MFC r347695, r347696, r347697, r347957, r349326:
Lockless delayed invalidation for amd64 pmap.
Modified:
stable/12/sys/amd64/amd64/machdep.c
stable/12/sys/amd64/amd64/pmap.c
stable/12/sys/amd64/amd64/trap.c
stable/12/sys/amd64/amd64/vm_machdep.c
stable/12/sys/amd64/include/pmap.h
stable/12/sys/amd64/include/proc.h
stable/12/sys/sys/proc.h
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/sys/amd64/amd64/machdep.c
==============================================================================
--- stable/12/sys/amd64/amd64/machdep.c Fri Jul 26 19:16:02 2019 (r350364)
+++ stable/12/sys/amd64/amd64/machdep.c Fri Jul 26 19:35:33 2019 (r350365)
@@ -1618,6 +1618,13 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
physfree += kstack0_sz;
/*
+ * Initialize enough of thread0 for delayed invalidation to
+ * work very early. Rely on thread0.td_base_pri
+ * zero-initialization, it is reset to PVM at proc0_init().
+ */
+ pmap_thread_init_invl_gen(&thread0);
+
+ /*
* make gdt memory segments
*/
for (x = 0; x < NGDT; x++) {
Modified: stable/12/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/12/sys/amd64/amd64/pmap.c Fri Jul 26 19:16:02 2019 (r350364)
+++ stable/12/sys/amd64/amd64/pmap.c Fri Jul 26 19:35:33 2019 (r350365)
@@ -107,6 +107,7 @@ __FBSDID("$FreeBSD$");
* and to when physical maps must be made correct.
*/
+#include "opt_ddb.h"
#include "opt_pmap.h"
#include "opt_vm.h"
@@ -130,6 +131,10 @@ __FBSDID("$FreeBSD$");
#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/smp.h>
+#ifdef DDB
+#include <sys/kdb.h>
+#include <ddb/ddb.h>
+#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -475,22 +480,100 @@ SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE
static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker =
LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker);
static struct mtx invl_gen_mtx;
-static u_long pmap_invl_gen = 0;
/* Fake lock object to satisfy turnstiles interface. */
static struct lock_object invl_gen_ts = {
.lo_name = "invlts",
};
+static struct pmap_invl_gen pmap_invl_gen_head = {
+ .gen = 1,
+ .next = NULL,
+};
+static u_long pmap_invl_gen = 1;
+static int pmap_invl_waiters;
+static struct callout pmap_invl_callout;
+static bool pmap_invl_callout_inited;
+#define PMAP_ASSERT_NOT_IN_DI() \
+ KASSERT(pmap_not_in_di(), ("DI already started"))
+
static bool
-pmap_not_in_di(void)
+pmap_di_locked(void)
{
+ int tun;
- return (curthread->td_md.md_invl_gen.gen == 0);
+ if ((cpu_feature2 & CPUID2_CX16) == 0)
+ return (true);
+ tun = 0;
+ TUNABLE_INT_FETCH("vm.pmap.di_locked", &tun);
+ return (tun != 0);
}
-#define PMAP_ASSERT_NOT_IN_DI() \
- KASSERT(pmap_not_in_di(), ("DI already started"))
+static int
+sysctl_pmap_di_locked(SYSCTL_HANDLER_ARGS)
+{
+ int locked;
+ locked = pmap_di_locked();
+ return (sysctl_handle_int(oidp, &locked, 0, req));
+}
+SYSCTL_PROC(_vm_pmap, OID_AUTO, di_locked, CTLTYPE_INT | CTLFLAG_RDTUN |
+ CTLFLAG_MPSAFE, 0, 0, sysctl_pmap_di_locked, "",
+ "Locked delayed invalidation");
+
+static bool pmap_not_in_di_l(void);
+static bool pmap_not_in_di_u(void);
+DEFINE_IFUNC(, bool, pmap_not_in_di, (void), static)
+{
+
+ return (pmap_di_locked() ? pmap_not_in_di_l : pmap_not_in_di_u);
+}
+
+static bool
+pmap_not_in_di_l(void)
+{
+ struct pmap_invl_gen *invl_gen;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ return (invl_gen->gen == 0);
+}
+
+static void
+pmap_thread_init_invl_gen_l(struct thread *td)
+{
+ struct pmap_invl_gen *invl_gen;
+
+ invl_gen = &td->td_md.md_invl_gen;
+ invl_gen->gen = 0;
+}
+
+static void
+pmap_delayed_invl_wait_block(u_long *m_gen, u_long *invl_gen)
+{
+ struct turnstile *ts;
+
+ ts = turnstile_trywait(&invl_gen_ts);
+ if (*m_gen > atomic_load_long(invl_gen))
+ turnstile_wait(ts, NULL, TS_SHARED_QUEUE);
+ else
+ turnstile_cancel(ts);
+}
+
+static void
+pmap_delayed_invl_finish_unblock(u_long new_gen)
+{
+ struct turnstile *ts;
+
+ turnstile_chain_lock(&invl_gen_ts);
+ ts = turnstile_lookup(&invl_gen_ts);
+ if (new_gen != 0)
+ pmap_invl_gen = new_gen;
+ if (ts != NULL) {
+ turnstile_broadcast(ts, TS_SHARED_QUEUE);
+ turnstile_unpend(ts);
+ }
+ turnstile_chain_unlock(&invl_gen_ts);
+}
+
/*
* Start a new Delayed Invalidation (DI) block of code, executed by
* the current thread. Within a DI block, the current thread may
@@ -500,7 +583,7 @@ pmap_not_in_di(void)
* pmap active.
*/
static void
-pmap_delayed_invl_started(void)
+pmap_delayed_invl_start_l(void)
{
struct pmap_invl_gen *invl_gen;
u_long currgen;
@@ -532,36 +615,311 @@ pmap_delayed_invl_started(void)
* current thread's DI.
*/
static void
-pmap_delayed_invl_finished(void)
+pmap_delayed_invl_finish_l(void)
{
struct pmap_invl_gen *invl_gen, *next;
- struct turnstile *ts;
invl_gen = &curthread->td_md.md_invl_gen;
- KASSERT(invl_gen->gen != 0, ("missed invl_started"));
+ KASSERT(invl_gen->gen != 0, ("missed invl_start"));
mtx_lock(&invl_gen_mtx);
next = LIST_NEXT(invl_gen, link);
- if (next == NULL) {
- turnstile_chain_lock(&invl_gen_ts);
- ts = turnstile_lookup(&invl_gen_ts);
- pmap_invl_gen = invl_gen->gen;
- if (ts != NULL) {
- turnstile_broadcast(ts, TS_SHARED_QUEUE);
- turnstile_unpend(ts);
- }
- turnstile_chain_unlock(&invl_gen_ts);
- } else {
+ if (next == NULL)
+ pmap_delayed_invl_finish_unblock(invl_gen->gen);
+ else
next->gen = invl_gen->gen;
- }
LIST_REMOVE(invl_gen, link);
mtx_unlock(&invl_gen_mtx);
invl_gen->gen = 0;
}
+static bool
+pmap_not_in_di_u(void)
+{
+ struct pmap_invl_gen *invl_gen;
+
+ invl_gen = &curthread->td_md.md_invl_gen;
+ return (((uintptr_t)invl_gen->next & PMAP_INVL_GEN_NEXT_INVALID) != 0);
+}
+
+static void
+pmap_thread_init_invl_gen_u(struct thread *td)
+{
+ struct pmap_invl_gen *invl_gen;
+
+ invl_gen = &td->td_md.md_invl_gen;
+ invl_gen->gen = 0;
+ invl_gen->next = (void *)PMAP_INVL_GEN_NEXT_INVALID;
+}
+
+static bool
+pmap_di_load_invl(struct pmap_invl_gen *ptr, struct pmap_invl_gen *out)
+{
+ uint64_t new_high, new_low, old_high, old_low;
+ char res;
+
+ old_low = new_low = 0;
+ old_high = new_high = (uintptr_t)0;
+
+ __asm volatile("lock;cmpxchg16b\t%1;sete\t%0"
+ : "=r" (res), "+m" (*ptr), "+a" (old_low), "+d" (old_high)
+ : "b"(new_low), "c" (new_high)
+ : "memory", "cc");
+ if (res == 0) {
+ if ((old_high & PMAP_INVL_GEN_NEXT_INVALID) != 0)
+ return (false);
+ out->gen = old_low;
+ out->next = (void *)old_high;
+ } else {
+ out->gen = new_low;
+ out->next = (void *)new_high;
+ }
+ return (true);
+}
+
+static bool
+pmap_di_store_invl(struct pmap_invl_gen *ptr, struct pmap_invl_gen *old_val,
+ struct pmap_invl_gen *new_val)
+{
+ uint64_t new_high, new_low, old_high, old_low;
+ char res;
+
+ new_low = new_val->gen;
+ new_high = (uintptr_t)new_val->next;
+ old_low = old_val->gen;
+ old_high = (uintptr_t)old_val->next;
+
+ __asm volatile("lock;cmpxchg16b\t%1;sete\t%0"
+ : "=r" (res), "+m" (*ptr), "+a" (old_low), "+d" (old_high)
+ : "b"(new_low), "c" (new_high)
+ : "memory", "cc");
+ return (res);
+}
+
#ifdef PV_STATS
+static long invl_start_restart;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_start_restart, CTLFLAG_RD,
+ &invl_start_restart, 0,
+ "");
+static long invl_finish_restart;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_finish_restart, CTLFLAG_RD,
+ &invl_finish_restart, 0,
+ "");
+static int invl_max_qlen;
+SYSCTL_INT(_vm_pmap, OID_AUTO, invl_max_qlen, CTLFLAG_RD,
+ &invl_max_qlen, 0,
+ "");
+#endif
+
+static struct lock_delay_config __read_frequently di_delay;
+LOCK_DELAY_SYSINIT_DEFAULT(di_delay);
+
+static void
+pmap_delayed_invl_start_u(void)
+{
+ struct pmap_invl_gen *invl_gen, *p, prev, new_prev;
+ struct thread *td;
+ struct lock_delay_arg lda;
+ uintptr_t prevl;
+ u_char pri;
+#ifdef PV_STATS
+ int i, ii;
+#endif
+
+ td = curthread;
+ invl_gen = &td->td_md.md_invl_gen;
+ PMAP_ASSERT_NOT_IN_DI();
+ lock_delay_arg_init(&lda, &di_delay);
+ invl_gen->saved_pri = 0;
+ pri = td->td_base_pri;
+ if (pri > PVM) {
+ thread_lock(td);
+ pri = td->td_base_pri;
+ if (pri > PVM) {
+ invl_gen->saved_pri = pri;
+ sched_prio(td, PVM);
+ }
+ thread_unlock(td);
+ }
+again:
+ PV_STAT(i = 0);
+ for (p = &pmap_invl_gen_head;; p = prev.next) {
+ PV_STAT(i++);
+ prevl = atomic_load_ptr(&p->next);
+ if ((prevl & PMAP_INVL_GEN_NEXT_INVALID) != 0) {
+ PV_STAT(atomic_add_long(&invl_start_restart, 1));
+ lock_delay(&lda);
+ goto again;
+ }
+ if (prevl == 0)
+ break;
+ prev.next = (void *)prevl;
+ }
+#ifdef PV_STATS
+ if ((ii = invl_max_qlen) < i)
+ atomic_cmpset_int(&invl_max_qlen, ii, i);
+#endif
+
+ if (!pmap_di_load_invl(p, &prev) || prev.next != NULL) {
+ PV_STAT(atomic_add_long(&invl_start_restart, 1));
+ lock_delay(&lda);
+ goto again;
+ }
+
+ new_prev.gen = prev.gen;
+ new_prev.next = invl_gen;
+ invl_gen->gen = prev.gen + 1;
+
+ /* Formal fence between store to invl->gen and updating *p. */
+ atomic_thread_fence_rel();
+
+ /*
+ * After inserting an invl_gen element with invalid bit set,
+ * this thread blocks any other thread trying to enter the
+ * delayed invalidation block. Do not allow to remove us from
+ * the CPU, because it causes starvation for other threads.
+ */
+ critical_enter();
+
+ /*
+ * ABA for *p is not possible there, since p->gen can only
+ * increase. So if the *p thread finished its di, then
+ * started a new one and got inserted into the list at the
+ * same place, its gen will appear greater than the previously
+ * read gen.
+ */
+ if (!pmap_di_store_invl(p, &prev, &new_prev)) {
+ critical_exit();
+ PV_STAT(atomic_add_long(&invl_start_restart, 1));
+ lock_delay(&lda);
+ goto again;
+ }
+
+ /*
+ * There we clear PMAP_INVL_GEN_NEXT_INVALID in
+ * invl_gen->next, allowing other threads to iterate past us.
+ * pmap_di_store_invl() provides fence between the generation
+ * write and the update of next.
+ */
+ invl_gen->next = NULL;
+ critical_exit();
+}
+
+static bool
+pmap_delayed_invl_finish_u_crit(struct pmap_invl_gen *invl_gen,
+ struct pmap_invl_gen *p)
+{
+ struct pmap_invl_gen prev, new_prev;
+ u_long mygen;
+
+ /*
+ * Load invl_gen->gen after setting invl_gen->next
+ * PMAP_INVL_GEN_NEXT_INVALID. This prevents larger
+ * generations to propagate to our invl_gen->gen. Lock prefix
+ * in atomic_set_ptr() worked as seq_cst fence.
+ */
+ mygen = atomic_load_long(&invl_gen->gen);
+
+ if (!pmap_di_load_invl(p, &prev) || prev.next != invl_gen)
+ return (false);
+
+ KASSERT(prev.gen < mygen,
+ ("invalid di gen sequence %lu %lu", prev.gen, mygen));
+ new_prev.gen = mygen;
+ new_prev.next = (void *)((uintptr_t)invl_gen->next &
+ ~PMAP_INVL_GEN_NEXT_INVALID);
+
+ /* Formal fence between load of prev and storing update to it. */
+ atomic_thread_fence_rel();
+
+ return (pmap_di_store_invl(p, &prev, &new_prev));
+}
+
+static void
+pmap_delayed_invl_finish_u(void)
+{
+ struct pmap_invl_gen *invl_gen, *p;
+ struct thread *td;
+ struct lock_delay_arg lda;
+ uintptr_t prevl;
+
+ td = curthread;
+ invl_gen = &td->td_md.md_invl_gen;
+ KASSERT(invl_gen->gen != 0, ("missed invl_start: gen 0"));
+ KASSERT(((uintptr_t)invl_gen->next & PMAP_INVL_GEN_NEXT_INVALID) == 0,
+ ("missed invl_start: INVALID"));
+ lock_delay_arg_init(&lda, &di_delay);
+
+again:
+ for (p = &pmap_invl_gen_head; p != NULL; p = (void *)prevl) {
+ prevl = atomic_load_ptr(&p->next);
+ if ((prevl & PMAP_INVL_GEN_NEXT_INVALID) != 0) {
+ PV_STAT(atomic_add_long(&invl_finish_restart, 1));
+ lock_delay(&lda);
+ goto again;
+ }
+ if ((void *)prevl == invl_gen)
+ break;
+ }
+
+ /*
+ * It is legitimate to not find ourself on the list if a
+ * thread before us finished its DI and started it again.
+ */
+ if (__predict_false(p == NULL)) {
+ PV_STAT(atomic_add_long(&invl_finish_restart, 1));
+ lock_delay(&lda);
+ goto again;
+ }
+
+ critical_enter();
+ atomic_set_ptr((uintptr_t *)&invl_gen->next,
+ PMAP_INVL_GEN_NEXT_INVALID);
+ if (!pmap_delayed_invl_finish_u_crit(invl_gen, p)) {
+ atomic_clear_ptr((uintptr_t *)&invl_gen->next,
+ PMAP_INVL_GEN_NEXT_INVALID);
+ critical_exit();
+ PV_STAT(atomic_add_long(&invl_finish_restart, 1));
+ lock_delay(&lda);
+ goto again;
+ }
+ critical_exit();
+ if (atomic_load_int(&pmap_invl_waiters) > 0)
+ pmap_delayed_invl_finish_unblock(0);
+ if (invl_gen->saved_pri != 0) {
+ thread_lock(td);
+ sched_prio(td, invl_gen->saved_pri);
+ thread_unlock(td);
+ }
+}
+
+#ifdef DDB
+DB_SHOW_COMMAND(di_queue, pmap_di_queue)
+{
+ struct pmap_invl_gen *p, *pn;
+ struct thread *td;
+ uintptr_t nextl;
+ bool first;
+
+ for (p = &pmap_invl_gen_head, first = true; p != NULL; p = pn,
+ first = false) {
+ nextl = atomic_load_ptr(&p->next);
+ pn = (void *)(nextl & ~PMAP_INVL_GEN_NEXT_INVALID);
+ td = first ? NULL : __containerof(p, struct thread,
+ td_md.md_invl_gen);
+ db_printf("gen %lu inv %d td %p tid %d\n", p->gen,
+ (nextl & PMAP_INVL_GEN_NEXT_INVALID) != 0, td,
+ td != NULL ? td->td_tid : -1);
+ }
+}
+#endif
+
+#ifdef PV_STATS
static long invl_wait;
SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0,
"Number of times DI invalidation blocked pmap_remove_all/write");
+static long invl_wait_slow;
+SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait_slow, CTLFLAG_RD, &invl_wait_slow, 0,
+ "Number of slow invalidation waits for lockless DI");
#endif
static u_long *
@@ -571,6 +929,27 @@ pmap_delayed_invl_genp(vm_page_t m)
return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]);
}
+static void
+pmap_delayed_invl_callout_func(void *arg __unused)
+{
+
+ if (atomic_load_int(&pmap_invl_waiters) == 0)
+ return;
+ pmap_delayed_invl_finish_unblock(0);
+}
+
+static void
+pmap_delayed_invl_callout_init(void *arg __unused)
+{
+
+ if (pmap_di_locked())
+ return;
+ callout_init(&pmap_invl_callout, 1);
+ pmap_invl_callout_inited = true;
+}
+SYSINIT(pmap_di_callout, SI_SUB_CPU + 1, SI_ORDER_ANY,
+ pmap_delayed_invl_callout_init, NULL);
+
/*
* Ensure that all currently executing DI blocks, that need to flush
* TLB for the given page m, actually flushed the TLB at the time the
@@ -586,9 +965,8 @@ pmap_delayed_invl_genp(vm_page_t m)
* processor.
*/
static void
-pmap_delayed_invl_wait(vm_page_t m)
+pmap_delayed_invl_wait_l(vm_page_t m)
{
- struct turnstile *ts;
u_long *m_gen;
#ifdef PV_STATS
bool accounted = false;
@@ -602,14 +980,92 @@ pmap_delayed_invl_wait(vm_page_t m)
accounted = true;
}
#endif
- ts = turnstile_trywait(&invl_gen_ts);
- if (*m_gen > pmap_invl_gen)
- turnstile_wait(ts, NULL, TS_SHARED_QUEUE);
- else
- turnstile_cancel(ts);
+ pmap_delayed_invl_wait_block(m_gen, &pmap_invl_gen);
}
}
+static void
+pmap_delayed_invl_wait_u(vm_page_t m)
+{
+ u_long *m_gen;
+ struct lock_delay_arg lda;
+ bool fast;
+
+ fast = true;
+ m_gen = pmap_delayed_invl_genp(m);
+ lock_delay_arg_init(&lda, &di_delay);
+ while (*m_gen > atomic_load_long(&pmap_invl_gen_head.gen)) {
+ if (fast || !pmap_invl_callout_inited) {
+ PV_STAT(atomic_add_long(&invl_wait, 1));
+ lock_delay(&lda);
+ fast = false;
+ } else {
+ /*
+ * The page's invalidation generation number
+ * is still below the current thread's number.
+ * Prepare to block so that we do not waste
+ * CPU cycles or worse, suffer livelock.
+ *
+ * Since it is impossible to block without
+ * racing with pmap_delayed_invl_finish_u(),
+ * prepare for the race by incrementing
+ * pmap_invl_waiters and arming a 1-tick
+ * callout which will unblock us if we lose
+ * the race.
+ */
+ atomic_add_int(&pmap_invl_waiters, 1);
+
+ /*
+ * Re-check the current thread's invalidation
+ * generation after incrementing
+ * pmap_invl_waiters, so that there is no race
+ * with pmap_delayed_invl_finish_u() setting
+ * the page generation and checking
+ * pmap_invl_waiters. The only race allowed
+ * is for a missed unblock, which is handled
+ * by the callout.
+ */
+ if (*m_gen >
+ atomic_load_long(&pmap_invl_gen_head.gen)) {
+ callout_reset(&pmap_invl_callout, 1,
+ pmap_delayed_invl_callout_func, NULL);
+ PV_STAT(atomic_add_long(&invl_wait_slow, 1));
+ pmap_delayed_invl_wait_block(m_gen,
+ &pmap_invl_gen_head.gen);
+ }
+ atomic_add_int(&pmap_invl_waiters, -1);
+ }
+ }
+}
+
+DEFINE_IFUNC(, void, pmap_thread_init_invl_gen, (struct thread *), static)
+{
+
+ return (pmap_di_locked() ? pmap_thread_init_invl_gen_l :
+ pmap_thread_init_invl_gen_u);
+}
+
+DEFINE_IFUNC(static, void, pmap_delayed_invl_start, (void), static)
+{
+
+ return (pmap_di_locked() ? pmap_delayed_invl_start_l :
+ pmap_delayed_invl_start_u);
+}
+
+DEFINE_IFUNC(static, void, pmap_delayed_invl_finish, (void), static)
+{
+
+ return (pmap_di_locked() ? pmap_delayed_invl_finish_l :
+ pmap_delayed_invl_finish_u);
+}
+
+DEFINE_IFUNC(static, void, pmap_delayed_invl_wait, (vm_page_t), static)
+{
+
+ return (pmap_di_locked() ? pmap_delayed_invl_wait_l :
+ pmap_delayed_invl_wait_u);
+}
+
/*
* Mark the page m's PV list as participating in the current thread's
* DI block. Any threads concurrently using m's PV list to remove or
@@ -619,7 +1075,7 @@ pmap_delayed_invl_wait(vm_page_t m)
* The function works by setting the DI generation number for m's PV
* list to at least the DI generation number of the current thread.
* This forces a caller of pmap_delayed_invl_wait() to block until
- * current thread calls pmap_delayed_invl_finished().
+ * current thread calls pmap_delayed_invl_finish().
*/
static void
pmap_delayed_invl_page(vm_page_t m)
@@ -2869,6 +3325,7 @@ void
pmap_pinit0(pmap_t pmap)
{
struct proc *p;
+ struct thread *td;
int i;
PMAP_LOCK_INIT(pmap);
@@ -2887,12 +3344,14 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_pcids[i].pm_gen = 1;
}
pmap_activate_boot(pmap);
+ td = curthread;
if (pti) {
- p = curproc;
+ p = td->td_proc;
PROC_LOCK(p);
p->p_amd64_md_flags |= P_MD_KPTI;
PROC_UNLOCK(p);
}
+ pmap_thread_init_invl_gen(td);
if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
pmap_pkru_ranges_zone = uma_zcreate("pkru ranges",
@@ -3450,7 +3909,7 @@ reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked
if (pmap != locked_pmap)
PMAP_UNLOCK(pmap);
if (start_di)
- pmap_delayed_invl_finished();
+ pmap_delayed_invl_finish();
}
/*
@@ -3533,13 +3992,13 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **l
RELEASE_PV_LIST_LOCK(lockp);
PMAP_LOCK(pmap);
if (start_di)
- pmap_delayed_invl_started();
+ pmap_delayed_invl_start();
mtx_lock(&pv_chunks_mutex);
continue;
} else if (pmap != locked_pmap) {
if (PMAP_TRYLOCK(pmap)) {
if (start_di)
- pmap_delayed_invl_started();
+ pmap_delayed_invl_start();
mtx_lock(&pv_chunks_mutex);
continue;
} else {
@@ -3552,7 +4011,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **l
goto next_chunk;
}
} else if (start_di)
- pmap_delayed_invl_started();
+ pmap_delayed_invl_start();
PG_G = pmap_global_bit(pmap);
PG_A = pmap_accessed_bit(pmap);
PG_M = pmap_modified_bit(pmap);
@@ -4531,7 +4990,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t
anyvalid = 0;
SLIST_INIT(&free);
- pmap_delayed_invl_started();
+ pmap_delayed_invl_start();
PMAP_LOCK(pmap);
pmap_pkru_on_remove(pmap, sva, eva);
@@ -4628,7 +5087,7 @@ out:
if (anyvalid)
pmap_invalidate_all(pmap);
PMAP_UNLOCK(pmap);
- pmap_delayed_invl_finished();
+ pmap_delayed_invl_finish();
vm_page_free_pages_toq(&free, true);
}
@@ -4812,8 +5271,8 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t
/*
* Although this function delays and batches the invalidation
* of stale TLB entries, it does not need to call
- * pmap_delayed_invl_started() and
- * pmap_delayed_invl_finished(), because it does not
+ * pmap_delayed_invl_start() and
+ * pmap_delayed_invl_finish(), because it does not
* ordinarily destroy mappings. Stale TLB entries from
* protection-only changes need only be invalidated before the
* pmap lock is released, because protection-only changes do
@@ -5440,11 +5899,11 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t
if ((oldpde & PG_G) == 0)
pmap_invalidate_pde_page(pmap, va, oldpde);
} else {
- pmap_delayed_invl_started();
+ pmap_delayed_invl_start();
if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free,
lockp))
pmap_invalidate_all(pmap);
- pmap_delayed_invl_finished();
+ pmap_delayed_invl_finish();
}
vm_page_free_pages_toq(&free, true);
if (va >= VM_MAXUSER_ADDRESS) {
@@ -5800,7 +6259,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_
* The wired attribute of the page table entry is not a hardware
* feature, so there is no need to invalidate any TLB entries.
* Since pmap_demote_pde() for the wired entry must never fail,
- * pmap_delayed_invl_started()/finished() calls around the
+ * pmap_delayed_invl_start()/finish() calls around the
* function are not needed.
*/
void
@@ -6299,8 +6758,8 @@ pmap_page_is_mapped(vm_page_t m)
*
* Although this function destroys all of the pmap's managed,
* non-wired mappings, it can delay and batch the invalidation of TLB
- * entries without calling pmap_delayed_invl_started() and
- * pmap_delayed_invl_finished(). Because the pmap is not active on
+ * entries without calling pmap_delayed_invl_start() and
+ * pmap_delayed_invl_finish(). Because the pmap is not active on
* any other processor, none of these TLB entries will ever be used
* before their eventual invalidation. Consequently, there is no need
* for either pmap_remove_all() or pmap_remove_write() to wait for
@@ -7005,7 +7464,7 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
anychanged = FALSE;
- pmap_delayed_invl_started();
+ pmap_delayed_invl_start();
PMAP_LOCK(pmap);
for (; sva < eva; sva = va_next) {
pml4e = pmap_pml4e(pmap, sva);
@@ -7102,7 +7561,7 @@ maybe_invlrng:
if (anychanged)
pmap_invalidate_all(pmap);
PMAP_UNLOCK(pmap);
- pmap_delayed_invl_finished();
+ pmap_delayed_invl_finish();
}
/*
@@ -9433,11 +9892,7 @@ pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offse
return (error);
}
-#include "opt_ddb.h"
#ifdef DDB
-#include <sys/kdb.h>
-#include <ddb/ddb.h>
-
DB_SHOW_COMMAND(pte, pmap_print_pte)
{
pmap_t pmap;
Modified: stable/12/sys/amd64/amd64/trap.c
==============================================================================
--- stable/12/sys/amd64/amd64/trap.c Fri Jul 26 19:16:02 2019 (r350364)
+++ stable/12/sys/amd64/amd64/trap.c Fri Jul 26 19:35:33 2019 (r350365)
@@ -1203,7 +1203,7 @@ amd64_syscall(struct thread *td, int traced)
KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
("System call %s returning with mangled pcb_save",
syscallname(td->td_proc, td->td_sa.code)));
- KASSERT(td->td_md.md_invl_gen.gen == 0,
+ KASSERT(pmap_not_in_di(),
("System call %s returning with leaked invl_gen %lu",
syscallname(td->td_proc, td->td_sa.code),
td->td_md.md_invl_gen.gen));
Modified: stable/12/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- stable/12/sys/amd64/amd64/vm_machdep.c Fri Jul 26 19:16:02 2019 (r350364)
+++ stable/12/sys/amd64/amd64/vm_machdep.c Fri Jul 26 19:35:33 2019 (r350365)
@@ -229,7 +229,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct t
/* Setup to release spin count in fork_exit(). */
td2->td_md.md_spinlock_count = 1;
td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
- td2->td_md.md_invl_gen.gen = 0;
+ pmap_thread_init_invl_gen(td2);
/* As an i386, do not copy io permission bitmap. */
pcb2->pcb_tssp = NULL;
@@ -545,6 +545,7 @@ cpu_copy_thread(struct thread *td, struct thread *td0)
/* Setup to release spin count in fork_exit(). */
td->td_md.md_spinlock_count = 1;
td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+ pmap_thread_init_invl_gen(td);
}
/*
Modified: stable/12/sys/amd64/include/pmap.h
==============================================================================
--- stable/12/sys/amd64/include/pmap.h Fri Jul 26 19:16:02 2019 (r350364)
+++ stable/12/sys/amd64/include/pmap.h Fri Jul 26 19:35:33 2019 (r350365)
@@ -442,6 +442,7 @@ void *pmap_mapbios(vm_paddr_t, vm_size_t);
void *pmap_mapdev(vm_paddr_t, vm_size_t);
void *pmap_mapdev_attr(vm_paddr_t, vm_size_t, int);
void *pmap_mapdev_pciecfg(vm_paddr_t pa, vm_size_t size);
+bool pmap_not_in_di(void);
boolean_t pmap_page_is_mapped(vm_page_t m);
void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
void pmap_pinit_pml4(vm_page_t);
@@ -466,6 +467,7 @@ void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr
int pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
u_int keyidx, int flags);
+void pmap_thread_init_invl_gen(struct thread *td);
int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
#endif /* _KERNEL */
Modified: stable/12/sys/amd64/include/proc.h
==============================================================================
--- stable/12/sys/amd64/include/proc.h Fri Jul 26 19:16:02 2019 (r350364)
+++ stable/12/sys/amd64/include/proc.h Fri Jul 26 19:35:33 2019 (r350365)
@@ -50,10 +50,17 @@ struct proc_ldt {
int ldt_refcnt;
};
+#define PMAP_INVL_GEN_NEXT_INVALID 0x1ULL
struct pmap_invl_gen {
u_long gen; /* (k) */
- LIST_ENTRY(pmap_invl_gen) link; /* (pp) */
-};
+ union {
+ LIST_ENTRY(pmap_invl_gen) link; /* (pp) */
+ struct {
+ struct pmap_invl_gen *next;
+ u_char saved_pri;
+ };
+ };
+} __aligned(16);
/*
* Machine-dependent part of the proc structure for AMD64.
Modified: stable/12/sys/sys/proc.h
==============================================================================
--- stable/12/sys/sys/proc.h Fri Jul 26 19:16:02 2019 (r350364)
+++ stable/12/sys/sys/proc.h Fri Jul 26 19:35:33 2019 (r350365)
@@ -343,7 +343,11 @@ struct thread {
vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */
int td_kstack_pages; /* (a) Size of the kstack. */
volatile u_int td_critnest; /* (k*) Critical section nest level. */
+#ifdef __amd64__
+ uint32_t td_md_pad0[16];
+#else
struct mdthread td_md; /* (k) Any machine-dependent fields. */
+#endif
struct kaudit_record *td_ar; /* (k) Active audit record, if any. */
struct lpohead td_lprof[2]; /* (a) lock profiling objects. */
struct kdtrace_thread *td_dtrace; /* (*) DTrace-specific data. */
@@ -361,6 +365,9 @@ struct thread {
int td_oncpu; /* (t) Which cpu we are on. */
void *td_lkpi_task; /* LinuxKPI task struct pointer */
int td_pmcpend;
+#ifdef __amd64__
+ struct mdthread td_md; /* (k) Any machine-dependent fields. */
+#endif
};
struct thread0_storage {
More information about the svn-src-all
mailing list