svn commit: r344848 - in stable/12/sys: amd64/amd64 amd64/include arm/include arm64/include i386/include mips/include powerpc/include riscv/include sparc64/include vm x86/include
Konstantin Belousov
kib at FreeBSD.org
Wed Mar 6 17:33:09 UTC 2019
Author: kib
Date: Wed Mar 6 17:33:05 2019
New Revision: 344848
URL: https://svnweb.freebsd.org/changeset/base/344848
Log:
MFC r344353:
Add kernel support for Intel userspace protection keys feature on
Skylake Xeons.
Modified:
stable/12/sys/amd64/amd64/initcpu.c
stable/12/sys/amd64/amd64/pmap.c
stable/12/sys/amd64/amd64/sys_machdep.c
stable/12/sys/amd64/amd64/trap.c
stable/12/sys/amd64/include/pmap.h
stable/12/sys/arm/include/pmap.h
stable/12/sys/arm64/include/pmap.h
stable/12/sys/i386/include/pmap.h
stable/12/sys/mips/include/pmap.h
stable/12/sys/powerpc/include/pmap.h
stable/12/sys/riscv/include/pmap.h
stable/12/sys/sparc64/include/pmap.h
stable/12/sys/vm/vm_fault.c
stable/12/sys/vm/vm_map.c
stable/12/sys/x86/include/sysarch.h
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/sys/amd64/amd64/initcpu.c
==============================================================================
--- stable/12/sys/amd64/amd64/initcpu.c Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/amd64/amd64/initcpu.c Wed Mar 6 17:33:05 2019 (r344848)
@@ -233,6 +233,9 @@ initializecpu(void)
if (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE)
cr4 |= CR4_FSGSBASE;
+ if (cpu_stdext_feature2 & CPUID_STDEXT2_PKU)
+ cr4 |= CR4_PKE;
+
/*
* Postpone enabling the SMEP on the boot CPU until the page
* tables are switched from the boot loader identity mapping
Modified: stable/12/sys/amd64/amd64/pmap.c
==============================================================================
--- stable/12/sys/amd64/amd64/pmap.c Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/amd64/amd64/pmap.c Wed Mar 6 17:33:05 2019 (r344848)
@@ -48,7 +48,7 @@
*/
/*-
* Copyright (c) 2003 Networks Associates Technology, Inc.
- * Copyright (c) 2014-2018 The FreeBSD Foundation
+ * Copyright (c) 2014-2019 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Jake Burkholder,
@@ -121,6 +121,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mman.h>
#include <sys/mutex.h>
#include <sys/proc.h>
+#include <sys/rangeset.h>
#include <sys/rwlock.h>
#include <sys/sx.h>
#include <sys/turnstile.h>
@@ -155,6 +156,7 @@ __FBSDID("$FreeBSD$");
#ifdef SMP
#include <machine/smp.h>
#endif
+#include <machine/sysarch.h>
#include <machine/tss.h>
static __inline boolean_t
@@ -285,6 +287,13 @@ pmap_modified_bit(pmap_t pmap)
return (mask);
}
+static __inline pt_entry_t
+pmap_pku_mask_bit(pmap_t pmap)
+{
+
+ return (pmap->pm_type == PT_X86 ? X86_PG_PKU_MASK : 0);
+}
+
#if !defined(DIAGNOSTIC)
#ifdef __GNUC_GNU_INLINE__
#define PMAP_INLINE __attribute__((__gnu_inline__)) inline
@@ -429,6 +438,22 @@ static pml4_entry_t *pti_pml4;
static vm_pindex_t pti_pg_idx;
static bool pti_finalized;
+struct pmap_pkru_range {
+ struct rs_el pkru_rs_el;
+ u_int pkru_keyidx;
+ int pkru_flags;
+};
+
+static uma_zone_t pmap_pkru_ranges_zone;
+static bool pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static pt_entry_t pmap_pkru_get(pmap_t pmap, vm_offset_t va);
+static void pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static void *pkru_dup_range(void *ctx, void *data);
+static void pkru_free_range(void *ctx, void *node);
+static int pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap);
+static int pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static void pmap_pkru_deassign_all(pmap_t pmap);
+
static int
pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS)
{
@@ -2851,6 +2876,12 @@ pmap_pinit0(pmap_t pmap)
pmap->pm_pcids[i].pm_gen = 1;
}
pmap_activate_boot(pmap);
+
+ if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
+ pmap_pkru_ranges_zone = uma_zcreate("pkru ranges",
+ sizeof(struct pmap_pkru_range), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_PTR, 0);
+ }
}
void
@@ -2939,6 +2970,10 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, i
pmap_pinit_pml4_pti(pml4pgu);
pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pml4pgu);
}
+ if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
+ rangeset_init(&pmap->pm_pkru, pkru_dup_range,
+ pkru_free_range, pmap, M_NOWAIT);
+ }
}
pmap->pm_root.rt_root = 0;
@@ -3235,6 +3270,9 @@ pmap_release(pmap_t pmap)
vm_page_unwire_noq(m);
vm_page_free(m);
}
+ if (pmap->pm_type == PT_X86 &&
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0)
+ rangeset_fini(&pmap->pm_pkru);
}
static int
@@ -4065,7 +4103,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, v
{
pd_entry_t newpde, oldpde;
pt_entry_t *firstpte, newpte;
- pt_entry_t PG_A, PG_G, PG_M, PG_RW, PG_V;
+ pt_entry_t PG_A, PG_G, PG_M, PG_PKU_MASK, PG_RW, PG_V;
vm_paddr_t mptepa;
vm_page_t mpte;
struct spglist free;
@@ -4078,6 +4116,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, v
PG_RW = pmap_rw_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
+ PG_PKU_MASK = pmap_pku_mask_bit(pmap);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
oldpde = *pde;
@@ -4510,6 +4549,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t
out:
if (anyvalid)
pmap_invalidate_all(pmap);
+ pmap_pkru_on_remove(pmap, sva, eva);
PMAP_UNLOCK(pmap);
pmap_delayed_invl_finished();
vm_page_free_pages_toq(&free, true);
@@ -4821,7 +4861,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offs
{
pd_entry_t newpde;
pt_entry_t *firstpte, oldpte, pa, *pte;
- pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V;
+ pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V, PG_PKU_MASK;
vm_page_t mpte;
int PG_PTE_CACHE;
@@ -4830,6 +4870,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offs
PG_M = pmap_modified_bit(pmap);
PG_V = pmap_valid_bit(pmap);
PG_RW = pmap_rw_bit(pmap);
+ PG_PKU_MASK = pmap_pku_mask_bit(pmap);
PG_PTE_CACHE = pmap_cache_mask(pmap, 0);
PMAP_LOCK_ASSERT(pmap, MA_OWNED);
@@ -5057,6 +5098,8 @@ retry:
origpte = *pte;
pv = NULL;
+ if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86)
+ newpte |= pmap_pkru_get(pmap, va);
/*
* Is the specified virtual address already mapped?
@@ -5276,6 +5319,25 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t
" in pmap %p", va, pmap);
return (KERN_RESOURCE_SHORTAGE);
}
+
+ /*
+ * If pkru is not same for the whole pde range, return failure
+ * and let vm_fault() cope. Check after pde allocation, since
+ * it could sleep.
+ */
+ if (!pmap_pkru_same(pmap, va, va + NBPDR)) {
+ SLIST_INIT(&free);
+ if (pmap_unwire_ptp(pmap, va, pdpg, &free)) {
+ pmap_invalidate_page(pmap, va);
+ vm_page_free_pages_toq(&free, true);
+ }
+ return (KERN_FAILURE);
+ }
+ if (va < VM_MAXUSER_ADDRESS && pmap->pm_type == PT_X86) {
+ newpde &= ~X86_PG_PKU_MASK;
+ newpde |= pmap_pkru_get(pmap, va);
+ }
+
pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg));
pde = &pde[pmap_pde_index(va)];
oldpde = *pde;
@@ -5535,7 +5597,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, v
if ((prot & VM_PROT_EXECUTE) == 0)
newpte |= pg_nx;
if (va < VM_MAXUSER_ADDRESS)
- newpte |= PG_U;
+ newpte |= PG_U | pmap_pkru_get(pmap, va);
pte_store(pte, newpte);
return (mpte);
}
@@ -5911,6 +5973,36 @@ out:
PMAP_UNLOCK(dst_pmap);
}
+int
+pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap)
+{
+ int error;
+
+ if (dst_pmap->pm_type != src_pmap->pm_type ||
+ dst_pmap->pm_type != PT_X86 ||
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0)
+ return (0);
+ for (;;) {
+ if (dst_pmap < src_pmap) {
+ PMAP_LOCK(dst_pmap);
+ PMAP_LOCK(src_pmap);
+ } else {
+ PMAP_LOCK(src_pmap);
+ PMAP_LOCK(dst_pmap);
+ }
+ error = pmap_pkru_copy(dst_pmap, src_pmap);
+ /* Clean up partial copy on failure due to no memory. */
+ if (error == ENOMEM)
+ pmap_pkru_deassign_all(dst_pmap);
+ PMAP_UNLOCK(src_pmap);
+ PMAP_UNLOCK(dst_pmap);
+ if (error != ENOMEM)
+ break;
+ vm_wait(NULL);
+ }
+ return (error);
+}
+
/*
* Zero the specified hardware page.
*/
@@ -6310,6 +6402,7 @@ pmap_remove_pages(pmap_t pmap)
if (lock != NULL)
rw_wunlock(lock);
pmap_invalidate_all(pmap);
+ pmap_pkru_deassign_all(pmap);
PMAP_UNLOCK(pmap);
vm_page_free_pages_toq(&free, true);
}
@@ -8944,6 +9037,285 @@ pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva)
}
pmap_invalidate_range(kernel_pmap, sva, eva);
VM_OBJECT_WUNLOCK(pti_obj);
+}
+
+static void *
+pkru_dup_range(void *ctx __unused, void *data)
+{
+ struct pmap_pkru_range *node, *new_node;
+
+ new_node = uma_zalloc(pmap_pkru_ranges_zone, M_NOWAIT);
+ if (new_node == NULL)
+ return (NULL);
+ node = data;
+ memcpy(new_node, node, sizeof(*node));
+ return (new_node);
+}
+
+static void
+pkru_free_range(void *ctx __unused, void *node)
+{
+
+ uma_zfree(pmap_pkru_ranges_zone, node);
+}
+
+static int
+pmap_pkru_assign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, u_int keyidx,
+ int flags)
+{
+ struct pmap_pkru_range *ppr;
+ int error;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ MPASS(pmap->pm_type == PT_X86);
+ MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
+ if ((flags & AMD64_PKRU_EXCL) != 0 &&
+ !rangeset_check_empty(&pmap->pm_pkru, sva, eva))
+ return (EBUSY);
+ ppr = uma_zalloc(pmap_pkru_ranges_zone, M_NOWAIT);
+ if (ppr == NULL)
+ return (ENOMEM);
+ ppr->pkru_keyidx = keyidx;
+ ppr->pkru_flags = flags & AMD64_PKRU_PERSIST;
+ error = rangeset_insert(&pmap->pm_pkru, sva, eva, ppr);
+ if (error != 0)
+ uma_zfree(pmap_pkru_ranges_zone, ppr);
+ return (error);
+}
+
+static int
+pmap_pkru_deassign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ MPASS(pmap->pm_type == PT_X86);
+ MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
+ return (rangeset_remove(&pmap->pm_pkru, sva, eva));
+}
+
+static void
+pmap_pkru_deassign_all(pmap_t pmap)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (pmap->pm_type == PT_X86 &&
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0)
+ rangeset_remove_all(&pmap->pm_pkru);
+}
+
+static bool
+pmap_pkru_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ struct pmap_pkru_range *ppr, *prev_ppr;
+ vm_offset_t va;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (pmap->pm_type != PT_X86 ||
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 ||
+ sva >= VM_MAXUSER_ADDRESS)
+ return (true);
+ MPASS(eva <= VM_MAXUSER_ADDRESS);
+ for (va = sva, prev_ppr = NULL; va < eva;) {
+ ppr = rangeset_lookup(&pmap->pm_pkru, va);
+ if ((ppr == NULL) ^ (prev_ppr == NULL))
+ return (false);
+ if (ppr == NULL) {
+ va += PAGE_SIZE;
+ continue;
+ }
+ if (prev_ppr->pkru_keyidx != ppr->pkru_keyidx)
+ return (false);
+ va = ppr->pkru_rs_el.re_end;
+ }
+ return (true);
+}
+
+static pt_entry_t
+pmap_pkru_get(pmap_t pmap, vm_offset_t va)
+{
+ struct pmap_pkru_range *ppr;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (pmap->pm_type != PT_X86 ||
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0 ||
+ va >= VM_MAXUSER_ADDRESS)
+ return (0);
+ ppr = rangeset_lookup(&pmap->pm_pkru, va);
+ if (ppr != NULL)
+ return (X86_PG_PKU(ppr->pkru_keyidx));
+ return (0);
+}
+
+static bool
+pred_pkru_on_remove(void *ctx __unused, void *r)
+{
+ struct pmap_pkru_range *ppr;
+
+ ppr = r;
+ return ((ppr->pkru_flags & AMD64_PKRU_PERSIST) == 0);
+}
+
+static void
+pmap_pkru_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ if (pmap->pm_type == PT_X86 &&
+ (cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) {
+ rangeset_remove_pred(&pmap->pm_pkru, sva, eva,
+ pred_pkru_on_remove);
+ }
+}
+
+static int
+pmap_pkru_copy(pmap_t dst_pmap, pmap_t src_pmap)
+{
+
+ PMAP_LOCK_ASSERT(dst_pmap, MA_OWNED);
+ PMAP_LOCK_ASSERT(src_pmap, MA_OWNED);
+ MPASS(dst_pmap->pm_type == PT_X86);
+ MPASS(src_pmap->pm_type == PT_X86);
+ MPASS((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0);
+ if (src_pmap->pm_pkru.rs_data_ctx == NULL)
+ return (0);
+ return (rangeset_copy(&dst_pmap->pm_pkru, &src_pmap->pm_pkru));
+}
+
+static void
+pmap_pkru_update_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+ u_int keyidx)
+{
+ pml4_entry_t *pml4e;
+ pdp_entry_t *pdpe;
+ pd_entry_t newpde, ptpaddr, *pde;
+ pt_entry_t newpte, *ptep, pte;
+ vm_offset_t va, va_next;
+ bool changed;
+
+ PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+ MPASS(pmap->pm_type == PT_X86);
+ MPASS(keyidx <= PMAP_MAX_PKRU_IDX);
+
+ for (changed = false, va = sva; va < eva; va = va_next) {
+ pml4e = pmap_pml4e(pmap, va);
+ if ((*pml4e & X86_PG_V) == 0) {
+ va_next = (va + NBPML4) & ~PML4MASK;
+ if (va_next < va)
+ va_next = eva;
+ continue;
+ }
+
+ pdpe = pmap_pml4e_to_pdpe(pml4e, va);
+ if ((*pdpe & X86_PG_V) == 0) {
+ va_next = (va + NBPDP) & ~PDPMASK;
+ if (va_next < va)
+ va_next = eva;
+ continue;
+ }
+
+ va_next = (va + NBPDR) & ~PDRMASK;
+ if (va_next < va)
+ va_next = eva;
+
+ pde = pmap_pdpe_to_pde(pdpe, va);
+ ptpaddr = *pde;
+ if (ptpaddr == 0)
+ continue;
+
+ MPASS((ptpaddr & X86_PG_V) != 0);
+ if ((ptpaddr & PG_PS) != 0) {
+ if (va + NBPDR == va_next && eva >= va_next) {
+ newpde = (ptpaddr & ~X86_PG_PKU_MASK) |
+ X86_PG_PKU(keyidx);
+ if (newpde != ptpaddr) {
+ *pde = newpde;
+ changed = true;
+ }
+ continue;
+ } else if (!pmap_demote_pde(pmap, pde, va)) {
+ continue;
+ }
+ }
+
+ if (va_next > eva)
+ va_next = eva;
+
+ for (ptep = pmap_pde_to_pte(pde, va); va != va_next;
+ ptep++, va += PAGE_SIZE) {
+ pte = *ptep;
+ if ((pte & X86_PG_V) == 0)
+ continue;
+ newpte = (pte & ~X86_PG_PKU_MASK) | X86_PG_PKU(keyidx);
+ if (newpte != pte) {
+ *ptep = newpte;
+ changed = true;
+ }
+ }
+ }
+ if (changed)
+ pmap_invalidate_range(pmap, sva, eva);
+}
+
+static int
+pmap_pkru_check_uargs(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+ u_int keyidx, int flags)
+{
+
+ if (pmap->pm_type != PT_X86 || keyidx > PMAP_MAX_PKRU_IDX ||
+ (flags & ~(AMD64_PKRU_PERSIST | AMD64_PKRU_EXCL)) != 0)
+ return (EINVAL);
+ if (eva <= sva || eva > VM_MAXUSER_ADDRESS)
+ return (EFAULT);
+ if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) == 0)
+ return (ENOTSUP);
+ return (0);
+}
+
+int
+pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, u_int keyidx,
+ int flags)
+{
+ int error;
+
+ sva = trunc_page(sva);
+ eva = round_page(eva);
+ error = pmap_pkru_check_uargs(pmap, sva, eva, keyidx, flags);
+ if (error != 0)
+ return (error);
+ for (;;) {
+ PMAP_LOCK(pmap);
+ error = pmap_pkru_assign(pmap, sva, eva, keyidx, flags);
+ if (error == 0)
+ pmap_pkru_update_range(pmap, sva, eva, keyidx);
+ PMAP_UNLOCK(pmap);
+ if (error != ENOMEM)
+ break;
+ vm_wait(NULL);
+ }
+ return (error);
+}
+
+int
+pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+ int error;
+
+ sva = trunc_page(sva);
+ eva = round_page(eva);
+ error = pmap_pkru_check_uargs(pmap, sva, eva, 0, 0);
+ if (error != 0)
+ return (error);
+ for (;;) {
+ PMAP_LOCK(pmap);
+ error = pmap_pkru_deassign(pmap, sva, eva);
+ if (error == 0)
+ pmap_pkru_update_range(pmap, sva, eva, 0);
+ PMAP_UNLOCK(pmap);
+ if (error != ENOMEM)
+ break;
+ vm_wait(NULL);
+ }
+ return (error);
}
#include "opt_ddb.h"
Modified: stable/12/sys/amd64/amd64/sys_machdep.c
==============================================================================
--- stable/12/sys/amd64/amd64/sys_machdep.c Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/amd64/amd64/sys_machdep.c Wed Mar 6 17:33:05 2019 (r344848)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
+#include <sys/pcpu.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/smp.h>
@@ -53,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_kern.h> /* for kernel_map */
+#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <machine/frame.h>
@@ -170,13 +172,16 @@ update_gdt_fsbase(struct thread *td, uint32_t base)
int
sysarch(struct thread *td, struct sysarch_args *uap)
{
- int error = 0;
- struct pcb *pcb = curthread->td_pcb;
+ struct pcb *pcb;
+ struct vm_map *map;
uint32_t i386base;
uint64_t a64base;
struct i386_ioperm_args iargs;
struct i386_get_xfpustate i386xfpu;
+ struct i386_set_pkru i386pkru;
struct amd64_get_xfpustate a64xfpu;
+ struct amd64_set_pkru a64pkru;
+ int error;
#ifdef CAPABILITY_MODE
/*
@@ -194,11 +199,15 @@ sysarch(struct thread *td, struct sysarch_args *uap)
case I386_GET_GSBASE:
case I386_SET_GSBASE:
case I386_GET_XFPUSTATE:
+ case I386_SET_PKRU:
+ case I386_CLEAR_PKRU:
case AMD64_GET_FSBASE:
case AMD64_SET_FSBASE:
case AMD64_GET_GSBASE:
case AMD64_SET_GSBASE:
case AMD64_GET_XFPUSTATE:
+ case AMD64_SET_PKRU:
+ case AMD64_CLEAR_PKRU:
break;
case I386_SET_IOPERM:
@@ -214,6 +223,10 @@ sysarch(struct thread *td, struct sysarch_args *uap)
if (uap->op == I386_GET_LDT || uap->op == I386_SET_LDT)
return (sysarch_ldt(td, uap, UIO_USERSPACE));
+
+ error = 0;
+ pcb = td->td_pcb;
+
/*
* XXXKIB check that the BSM generation code knows to encode
* the op argument.
@@ -233,11 +246,27 @@ sysarch(struct thread *td, struct sysarch_args *uap)
a64xfpu.addr = (void *)(uintptr_t)i386xfpu.addr;
a64xfpu.len = i386xfpu.len;
break;
+ case I386_SET_PKRU:
+ case I386_CLEAR_PKRU:
+ if ((error = copyin(uap->parms, &i386pkru,
+ sizeof(struct i386_set_pkru))) != 0)
+ return (error);
+ a64pkru.addr = (void *)(uintptr_t)i386pkru.addr;
+ a64pkru.len = i386pkru.len;
+ a64pkru.keyidx = i386pkru.keyidx;
+ a64pkru.flags = i386pkru.flags;
+ break;
case AMD64_GET_XFPUSTATE:
if ((error = copyin(uap->parms, &a64xfpu,
sizeof(struct amd64_get_xfpustate))) != 0)
return (error);
break;
+ case AMD64_SET_PKRU:
+ case AMD64_CLEAR_PKRU:
+ if ((error = copyin(uap->parms, &a64pkru,
+ sizeof(struct amd64_set_pkru))) != 0)
+ return (error);
+ break;
default:
break;
}
@@ -324,6 +353,34 @@ sysarch(struct thread *td, struct sysarch_args *uap)
fpugetregs(td);
error = copyout((char *)(get_pcb_user_save_td(td) + 1),
a64xfpu.addr, a64xfpu.len);
+ break;
+
+ case I386_SET_PKRU:
+ case AMD64_SET_PKRU:
+ /*
+ * Read-lock the map to synchronize with parallel
+ * pmap_vmspace_copy() on fork.
+ */
+ map = &td->td_proc->p_vmspace->vm_map;
+ vm_map_lock_read(map);
+ error = pmap_pkru_set(PCPU_GET(curpmap),
+ (vm_offset_t)a64pkru.addr, (vm_offset_t)a64pkru.addr +
+ a64pkru.len, a64pkru.keyidx, a64pkru.flags);
+ vm_map_unlock_read(map);
+ break;
+
+ case I386_CLEAR_PKRU:
+ case AMD64_CLEAR_PKRU:
+ if (a64pkru.flags != 0 || a64pkru.keyidx != 0) {
+ error = EINVAL;
+ break;
+ }
+ map = &td->td_proc->p_vmspace->vm_map;
+ vm_map_lock_read(map);
+ error = pmap_pkru_clear(PCPU_GET(curpmap),
+ (vm_offset_t)a64pkru.addr,
+ (vm_offset_t)a64pkru.addr + a64pkru.len);
+ vm_map_unlock(map);
break;
default:
Modified: stable/12/sys/amd64/amd64/trap.c
==============================================================================
--- stable/12/sys/amd64/amd64/trap.c Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/amd64/amd64/trap.c Wed Mar 6 17:33:05 2019 (r344848)
@@ -808,6 +808,20 @@ trap_pfault(struct trapframe *frame, int usermode)
}
/*
+ * User-mode protection key violation (PKU). May happen
+ * either from usermode or from kernel if copyin accessed
+ * key-protected mapping.
+ */
+ if ((frame->tf_err & PGEX_PK) != 0) {
+ if (eva > VM_MAXUSER_ADDRESS) {
+ trap_fatal(frame, eva);
+ return (-1);
+ }
+ rv = KERN_PROTECTION_FAILURE;
+ goto after_vmfault;
+ }
+
+ /*
* If nx protection of the usermode portion of kernel page
* tables caused trap, panic.
*/
@@ -842,6 +856,7 @@ trap_pfault(struct trapframe *frame, int usermode)
#endif
return (0);
}
+after_vmfault:
if (!usermode) {
if (td->td_intr_nesting_level == 0 &&
curpcb->pcb_onfault != NULL) {
Modified: stable/12/sys/amd64/include/pmap.h
==============================================================================
--- stable/12/sys/amd64/include/pmap.h Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/amd64/include/pmap.h Wed Mar 6 17:33:05 2019 (r344848)
@@ -66,6 +66,7 @@
#define X86_PG_AVAIL2 0x400 /* < programmers use */
#define X86_PG_AVAIL3 0x800 /* \ */
#define X86_PG_PDE_PAT 0x1000 /* PAT PAT index */
+#define X86_PG_PKU(idx) ((pt_entry_t)idx << 59)
#define X86_PG_NX (1ul<<63) /* No-execute */
#define X86_PG_AVAIL(x) (1ul << (x))
@@ -73,6 +74,10 @@
#define X86_PG_PDE_CACHE (X86_PG_PDE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
#define X86_PG_PTE_CACHE (X86_PG_PTE_PAT | X86_PG_NC_PWT | X86_PG_NC_PCD)
+/* Protection keys indexes */
+#define PMAP_MAX_PKRU_IDX 0xf
+#define X86_PG_PKU_MASK X86_PG_PKU(PMAP_MAX_PKRU_IDX)
+
/*
* Intel extended page table (EPT) bit definitions.
*/
@@ -120,7 +125,7 @@
* (PTE) page mappings have identical settings for the following fields:
*/
#define PG_PTE_PROMOTE (PG_NX | PG_MANAGED | PG_W | PG_G | PG_PTE_CACHE | \
- PG_M | PG_A | PG_U | PG_RW | PG_V)
+ PG_M | PG_A | PG_U | PG_RW | PG_V | PG_PKU_MASK)
/*
* Page Protection Exception bits
@@ -242,6 +247,8 @@
#include <sys/_cpuset.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <sys/_pctrie.h>
+#include <sys/_rangeset.h>
#include <vm/_vm_radix.h>
@@ -336,6 +343,7 @@ struct pmap {
long pm_eptgen; /* EPT pmap generation id */
int pm_flags;
struct pmap_pcids pm_pcids[MAXCPU];
+ struct rangeset pm_pkru;
};
/* flags */
@@ -454,6 +462,10 @@ void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t
void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va);
void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva,
vm_offset_t eva);
+int pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
+ u_int keyidx, int flags);
+int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
#endif /* _KERNEL */
/* Return various clipped indexes for a given VA */
Modified: stable/12/sys/arm/include/pmap.h
==============================================================================
--- stable/12/sys/arm/include/pmap.h Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/arm/include/pmap.h Wed Mar 6 17:33:05 2019 (r344848)
@@ -71,5 +71,12 @@ void pmap_kremove_device(vm_offset_t, vm_size_t);
vm_paddr_t pmap_kextract(vm_offset_t);
#define vtophys(va) pmap_kextract((vm_offset_t)(va))
+static inline int
+pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
+{
+
+ return (0);
+}
+
#endif /* _KERNEL */
#endif /* !_MACHINE_PMAP_H_ */
Modified: stable/12/sys/arm64/include/pmap.h
==============================================================================
--- stable/12/sys/arm64/include/pmap.h Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/arm64/include/pmap.h Wed Mar 6 17:33:05 2019 (r344848)
@@ -171,6 +171,13 @@ struct pcb *pmap_switch(struct thread *, struct thread
#define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list))
+static inline int
+pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
+{
+
+ return (0);
+}
+
#endif /* _KERNEL */
#endif /* !LOCORE */
Modified: stable/12/sys/i386/include/pmap.h
==============================================================================
--- stable/12/sys/i386/include/pmap.h Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/i386/include/pmap.h Wed Mar 6 17:33:05 2019 (r344848)
@@ -372,6 +372,13 @@ extern vm_offset_t virtual_end;
#define pmap_page_is_write_mapped(m) (((m)->aflags & PGA_WRITEABLE) != 0)
#define pmap_unmapbios(va, sz) pmap_unmapdev((va), (sz))
+static inline int
+pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
+{
+
+ return (0);
+}
+
/*
* Only the following functions or macros may be used before pmap_bootstrap()
* is called: pmap_kenter(), pmap_kextract(), pmap_kremove(), vtophys(), and
Modified: stable/12/sys/mips/include/pmap.h
==============================================================================
--- stable/12/sys/mips/include/pmap.h Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/mips/include/pmap.h Wed Mar 6 17:33:05 2019 (r344848)
@@ -185,6 +185,13 @@ int pmap_emulate_modified(pmap_t pmap, vm_offset_t va)
void pmap_page_set_memattr(vm_page_t, vm_memattr_t);
int pmap_change_attr(vm_offset_t, vm_size_t, vm_memattr_t);
+static inline int
+pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
+{
+
+ return (0);
+}
+
#endif /* _KERNEL */
#endif /* !LOCORE */
Modified: stable/12/sys/powerpc/include/pmap.h
==============================================================================
--- stable/12/sys/powerpc/include/pmap.h Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/powerpc/include/pmap.h Wed Mar 6 17:33:05 2019 (r344848)
@@ -288,6 +288,13 @@ vm_offset_t pmap_early_io_map(vm_paddr_t pa, vm_size_t
void pmap_early_io_unmap(vm_offset_t va, vm_size_t size);
void pmap_track_page(pmap_t pmap, vm_offset_t va);
+static inline int
+pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
+{
+
+ return (0);
+}
+
#endif
#endif /* !_MACHINE_PMAP_H_ */
Modified: stable/12/sys/riscv/include/pmap.h
==============================================================================
--- stable/12/sys/riscv/include/pmap.h Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/riscv/include/pmap.h Wed Mar 6 17:33:05 2019 (r344848)
@@ -155,6 +155,13 @@ bool pmap_get_tables(pmap_t, vm_offset_t, pd_entry_t *
int pmap_fault_fixup(pmap_t, vm_offset_t, vm_prot_t);
+static inline int
+pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
+{
+
+ return (0);
+}
+
#endif /* _KERNEL */
#endif /* !LOCORE */
Modified: stable/12/sys/sparc64/include/pmap.h
==============================================================================
--- stable/12/sys/sparc64/include/pmap.h Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/sparc64/include/pmap.h Wed Mar 6 17:33:05 2019 (r344848)
@@ -128,4 +128,11 @@ SYSCTL_DECL(_debug_pmap_stats);
#endif
+static inline int
+pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
+{
+
+ return (0);
+}
+
#endif /* !_MACHINE_PMAP_H_ */
Modified: stable/12/sys/vm/vm_fault.c
==============================================================================
--- stable/12/sys/vm/vm_fault.c Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/vm/vm_fault.c Wed Mar 6 17:33:05 2019 (r344848)
@@ -479,8 +479,20 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t pro
fault_flags, true);
}
VM_OBJECT_WUNLOCK(fs->first_object);
- pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type | (wired ?
- PMAP_ENTER_WIRED : 0), psind);
+ rv = pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type |
+ (wired ? PMAP_ENTER_WIRED : 0), psind);
+#if defined(__amd64__)
+ if (psind > 0 && rv == KERN_FAILURE) {
+ for (i = 0; i < npages; i++) {
+ rv = pmap_enter(fs->map->pmap, vaddr + ptoa(i),
+ &m[i], prot, fault_type |
+ (wired ? PMAP_ENTER_WIRED : 0), 0);
+ MPASS(rv == KERN_SUCCESS);
+ }
+ }
+#else
+ MPASS(rv == KERN_SUCCESS);
+#endif
VM_OBJECT_WLOCK(fs->first_object);
m_mtx = NULL;
for (i = 0; i < npages; i++) {
Modified: stable/12/sys/vm/vm_map.c
==============================================================================
--- stable/12/sys/vm/vm_map.c Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/vm/vm_map.c Wed Mar 6 17:33:05 2019 (r344848)
@@ -3424,7 +3424,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
vm_map_t new_map, old_map;
vm_map_entry_t new_entry, old_entry;
vm_object_t object;
- int locked;
+ int error, locked;
vm_inherit_t inh;
old_map = &vm1->vm_map;
@@ -3433,6 +3433,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
pmap_pinit);
if (vm2 == NULL)
return (NULL);
+
vm2->vm_taddr = vm1->vm_taddr;
vm2->vm_daddr = vm1->vm_daddr;
vm2->vm_maxsaddr = vm1->vm_maxsaddr;
@@ -3442,6 +3443,15 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
new_map = &vm2->vm_map;
locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */
KASSERT(locked, ("vmspace_fork: lock failed"));
+
+ error = pmap_vmspace_copy(new_map->pmap, old_map->pmap);
+ if (error != 0) {
+ sx_xunlock(&old_map->lock);
+ sx_xunlock(&new_map->lock);
+ vm_map_process_deferred();
+ vmspace_free(vm2);
+ return (NULL);
+ }
old_entry = old_map->header.next;
Modified: stable/12/sys/x86/include/sysarch.h
==============================================================================
--- stable/12/sys/x86/include/sysarch.h Wed Mar 6 17:26:30 2019 (r344847)
+++ stable/12/sys/x86/include/sysarch.h Wed Mar 6 17:33:05 2019 (r344848)
@@ -52,6 +52,8 @@
#define I386_GET_GSBASE 9
#define I386_SET_GSBASE 10
#define I386_GET_XFPUSTATE 11
+#define I386_SET_PKRU 12
+#define I386_CLEAR_PKRU 13
/* Leave space for 0-127 for to avoid translating syscalls */
#define AMD64_GET_FSBASE 128
@@ -59,7 +61,13 @@
#define AMD64_GET_GSBASE 130
#define AMD64_SET_GSBASE 131
#define AMD64_GET_XFPUSTATE 132
+#define AMD64_SET_PKRU 133
+#define AMD64_CLEAR_PKRU 134
+/* Flags for AMD64_SET_PKRU */
+#define AMD64_PKRU_EXCL 0x0001
+#define AMD64_PKRU_PERSIST 0x0002
+
struct i386_ioperm_args {
unsigned int start;
unsigned int length;
@@ -94,11 +102,25 @@ struct i386_get_xfpustate {
int len;
};
+struct i386_set_pkru {
+ unsigned int addr;
+ unsigned int len;
+ unsigned int keyidx;
+ int flags;
+};
+
struct amd64_get_xfpustate {
void *addr;
int len;
};
#endif
+
+struct amd64_set_pkru {
+ void *addr;
+ unsigned long len;
+ unsigned int keyidx;
+ int flags;
+};
#ifndef _KERNEL
union descriptor;
More information about the svn-src-stable-12
mailing list