git: e3917bb256de - main - Disable promotion on pcpu memory on arm64
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 16 Aug 2022 14:48:27 UTC
The branch main has been updated by andrew: URL: https://cgit.FreeBSD.org/src/commit/?id=e3917bb256dea63945b5bef7fe2b792a280fcafe commit e3917bb256dea63945b5bef7fe2b792a280fcafe Author: Andrew Turner <andrew@FreeBSD.org> AuthorDate: 2022-04-29 09:30:38 +0000 Commit: Andrew Turner <andrew@FreeBSD.org> CommitDate: 2022-08-16 14:41:24 +0000 Disable promotion on pcpu memory on arm64 We need to be careful to not promote or demote the memory containing the per-CPU structures as the exception handlers will dereference it so any time it's invalid may cause recursive exceptions. Add a new pmap function to set a flag in the pte marking memory that cannot be promoted or demoted and use it to mark pcpu memory. Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D35434 --- sys/arm64/arm64/mp_machdep.c | 15 ++++--- sys/arm64/arm64/pmap.c | 97 ++++++++++++++++++++++++++++++-------------- sys/arm64/include/pmap.h | 1 + sys/arm64/include/pte.h | 4 +- 4 files changed, 80 insertions(+), 37 deletions(-) diff --git a/sys/arm64/arm64/mp_machdep.c b/sys/arm64/arm64/mp_machdep.c index adf5592832c5..a6e2be300bae 100644 --- a/sys/arm64/arm64/mp_machdep.c +++ b/sys/arm64/arm64/mp_machdep.c @@ -495,6 +495,8 @@ static bool start_cpu(u_int cpuid, uint64_t target_cpu, int domain) { struct pcpu *pcpup; + vm_offset_t pcpu_mem; + vm_size_t size; vm_paddr_t pa; int err, naps; @@ -508,13 +510,16 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain) KASSERT(cpuid < MAXCPU, ("Too many CPUs")); - pcpup = (void *)kmem_malloc_domainset(DOMAINSET_PREF(domain), - sizeof(*pcpup), M_WAITOK | M_ZERO); + size = round_page(sizeof(*pcpup) + DPCPU_SIZE); + pcpu_mem = kmem_malloc_domainset(DOMAINSET_PREF(domain), size, + M_WAITOK | M_ZERO); + pmap_disable_promotion(pcpu_mem, size); + + pcpup = (struct pcpu *)pcpu_mem; pcpu_init(pcpup, cpuid, sizeof(struct pcpu)); pcpup->pc_mpidr = target_cpu & CPU_AFF_MASK; - dpcpu[cpuid - 1] = (void *)kmem_malloc_domainset( - DOMAINSET_PREF(domain), DPCPU_SIZE, M_WAITOK | M_ZERO); + dpcpu[cpuid - 1] = (void *)(pcpup + 1); dpcpu_init(dpcpu[cpuid - 1], cpuid); bootstacks[cpuid] = (void *)kmem_malloc_domainset( @@ -538,9 +543,9 @@ start_cpu(u_int cpuid, uint64_t target_cpu, int domain) cpuid, target_cpu, err)); pcpu_destroy(pcpup); - kmem_free((vm_offset_t)dpcpu[cpuid - 1], DPCPU_SIZE); dpcpu[cpuid - 1] = NULL; kmem_free((vm_offset_t)bootstacks[cpuid], PAGE_SIZE); + kmem_free(pcpu_mem, size); bootstacks[cpuid] = NULL; mp_ncpus--; return (false); diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index d95eccb445a5..62276c024212 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -3467,7 +3467,7 @@ retry: } /* - * pmap_protect_l2: do the things to protect a 2MB page in a pmap + * Masks and sets bits in a level 2 page table entries in the specified pmap */ static void pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask, @@ -3515,34 +3515,16 @@ pmap_protect_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, pt_entry_t mask, } /* - * Set the physical protection on the - * specified range of this map as requested. + * Masks and sets bits in last level page table entries in the specified + * pmap and range */ -void -pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) +static void +pmap_mask_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pt_entry_t mask, + pt_entry_t nbits, bool invalidate) { vm_offset_t va, va_next; pd_entry_t *l0, *l1, *l2; - pt_entry_t *l3p, l3, mask, nbits; - - PMAP_ASSERT_STAGE1(pmap); - KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); - if (prot == VM_PROT_NONE) { - pmap_remove(pmap, sva, eva); - return; - } - - mask = nbits = 0; - if ((prot & VM_PROT_WRITE) == 0) { - mask |= ATTR_S1_AP_RW_BIT | ATTR_SW_DBM; - nbits |= ATTR_S1_AP(ATTR_S1_AP_RO); - } - if ((prot & VM_PROT_EXECUTE) == 0) { - mask |= ATTR_S1_XN; - nbits |= ATTR_S1_XN; - } - if (mask == 0) - return; + pt_entry_t *l3p, l3; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { @@ -3569,7 +3551,8 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) MPASS((pmap_load(l1) & ATTR_SW_MANAGED) == 0); if ((pmap_load(l1) & mask) != nbits) { pmap_store(l1, (pmap_load(l1) & ~mask) | nbits); - pmap_invalidate_page(pmap, sva, true); + if (invalidate) + pmap_invalidate_page(pmap, sva, true); } continue; } @@ -3610,8 +3593,9 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) */ if (!pmap_l3_valid(l3) || (l3 & mask) == nbits) { if (va != va_next) { - pmap_invalidate_range(pmap, va, sva, - true); + if (invalidate) + pmap_invalidate_range(pmap, + va, sva, true); va = va_next; } continue; @@ -3633,12 +3617,54 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) if (va == va_next) va = sva; } - if (va != va_next) + if (va != va_next && invalidate) pmap_invalidate_range(pmap, va, sva, true); } PMAP_UNLOCK(pmap); } +/* + * Set the physical protection on the + * specified range of this map as requested. + */ +void +pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) +{ + pt_entry_t mask, nbits; + + PMAP_ASSERT_STAGE1(pmap); + KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); + if (prot == VM_PROT_NONE) { + pmap_remove(pmap, sva, eva); + return; + } + + mask = nbits = 0; + if ((prot & VM_PROT_WRITE) == 0) { + mask |= ATTR_S1_AP_RW_BIT | ATTR_SW_DBM; + nbits |= ATTR_S1_AP(ATTR_S1_AP_RO); + } + if ((prot & VM_PROT_EXECUTE) == 0) { + mask |= ATTR_S1_XN; + nbits |= ATTR_S1_XN; + } + if (mask == 0) + return; + + pmap_mask_set(pmap, sva, eva, mask, nbits, true); +} + +void +pmap_disable_promotion(vm_offset_t sva, vm_size_t size) +{ + + MPASS((sva & L3_OFFSET) == 0); + MPASS(((sva + size) & L3_OFFSET) == 0); + + pmap_mask_set(kernel_pmap, sva, sva + size, ATTR_SW_NO_PROMOTE, + ATTR_SW_NO_PROMOTE, false); +} + /* * Inserts the specified page table page into the specified pmap's collection * of idle page table pages. Each of a pmap's page table pages is responsible @@ -3683,6 +3709,9 @@ pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, PMAP_LOCK_ASSERT(pmap, MA_OWNED); + if ((newpte & ATTR_SW_NO_PROMOTE) != 0) + panic("%s: Updating non-promote pte", __func__); + /* * Ensure we don't get switched out with the page table in an * inconsistent state. We also need to ensure no interrupts fire @@ -3775,7 +3804,8 @@ pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, firstl3 = pmap_l2_to_l3(l2, sva); newl2 = pmap_load(firstl3); - if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF) { + if (((newl2 & (~ATTR_MASK | ATTR_AF)) & L2_OFFSET) != ATTR_AF || + (newl2 & ATTR_SW_NO_PROMOTE) != 0) { atomic_add_long(&pmap_l2_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" " in pmap %p", va, pmap); @@ -6284,6 +6314,9 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, break; } } else { + /* We can't demote/promote this entry */ + MPASS((pmap_load(ptep) & ATTR_SW_NO_PROMOTE) == 0); + /* * Split the entry to an level 3 table, then * set the new attribute. @@ -6375,6 +6408,8 @@ pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) ("pmap_demote_l1: Invalid virtual address %#lx", va)); KASSERT((oldl1 & ATTR_SW_MANAGED) == 0, ("pmap_demote_l1: Level 1 table shouldn't be managed")); + KASSERT((oldl1 & ATTR_SW_NO_PROMOTE) == 0, + ("pmap_demote_l1: Demoting entry with no-demote flag set")); tmpl1 = 0; if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) { @@ -6470,6 +6505,8 @@ pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, oldl2 = pmap_load(l2); KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, ("pmap_demote_l2: Demoting a non-block entry")); + KASSERT((oldl2 & ATTR_SW_NO_PROMOTE) == 0, + ("pmap_demote_l2: Demoting entry with no-demote flag set")); va &= ~L2_OFFSET; tmpl2 = 0; diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h index 6da54a841a28..f36b081ea869 100644 --- a/sys/arm64/include/pmap.h +++ b/sys/arm64/include/pmap.h @@ -189,6 +189,7 @@ bool pmap_page_is_mapped(vm_page_t m); int pmap_pinit_stage(pmap_t, enum pmap_stage, int); bool pmap_ps_enabled(pmap_t pmap); uint64_t pmap_to_ttbr0(pmap_t pmap); +void pmap_disable_promotion(vm_offset_t sva, vm_size_t size); void *pmap_mapdev(vm_paddr_t, vm_size_t); void *pmap_mapbios(vm_paddr_t, vm_size_t); diff --git a/sys/arm64/include/pte.h b/sys/arm64/include/pte.h index eaf6745f9679..24130f26cee1 100644 --- a/sys/arm64/include/pte.h +++ b/sys/arm64/include/pte.h @@ -52,8 +52,8 @@ typedef uint64_t pt_entry_t; /* page table entry */ #define ATTR_MASK_L UINT64_C(0x0000000000000fff) #define ATTR_MASK (ATTR_MASK_H | ATTR_MASK_L) /* Bits 58:55 are reserved for software */ -#define ATTR_SW_UNUSED2 (1UL << 58) -#define ATTR_SW_UNUSED1 (1UL << 57) +#define ATTR_SW_UNUSED1 (1UL << 58) +#define ATTR_SW_NO_PROMOTE (1UL << 57) #define ATTR_SW_MANAGED (1UL << 56) #define ATTR_SW_WIRED (1UL << 55)