git: 0b29f5efcc7e - main - amd64: Make it possible to grow the KERNBASE region of KVA
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 24 Sep 2022 13:38:16 UTC
The branch main has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=0b29f5efcc7ee8271ad2f6b6447898b489d618ec commit 0b29f5efcc7ee8271ad2f6b6447898b489d618ec Author: Mark Johnston <markj@FreeBSD.org> AuthorDate: 2022-09-24 13:19:21 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2022-09-24 13:27:50 +0000 amd64: Make it possible to grow the KERNBASE region of KVA pmap_growkernel() may be called when mapping a region above KERNBASE, typically for a kernel module. If we have enough PTPs left over from bootstrap, pmap_growkernel() does nothing. However, it's possible to run out, and in this case pmap_growkernel() will try to grow the kernel map all the way from kernel_vm_end to somewhere past KERNBASE, which can easily run the system out of memory. This happens with large kernel modules such as the nvidia GPU driver. There is also a WIP dtrace provider which needs to map KVA in the region above KERNBASE (to provide trampolines which allow a copy of traced kernel instruction to be executed), and its allocations could potentially trigger this scenario. This change modifies pmap_growkernel() to manage the two regions separately, allowing them to grow independently. The end of the KERNBASE region is tracked by modifying "nkpt". PR: 265019 Reviewed by: alc, imp, kib MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D36673 --- sys/amd64/amd64/pmap.c | 68 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index d7aeb8dcbd98..9a33298944cc 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -4997,13 +4997,21 @@ pmap_growkernel(vm_offset_t addr) vm_page_t nkpg; pd_entry_t *pde, newpdir; pdp_entry_t *pdpe; + vm_offset_t end; mtx_assert(&kernel_map->system_mtx, MA_OWNED); /* - * Return if "addr" is within the range of kernel page table pages - * that were preallocated during pmap bootstrap. Moreover, leave - * "kernel_vm_end" and the kernel page table as they were. + * The kernel map covers two distinct regions of KVA: that used + * for dynamic kernel memory allocations, and the uppermost 2GB + * of the virtual address space. The latter is used to map the + * kernel and loadable kernel modules. This scheme enables the + * use of a special code generation model for kernel code which + * takes advantage of compact addressing modes in machine code. + * + * Both regions grow upwards; to avoid wasting memory, the gap + * in between is unmapped. If "addr" is above "KERNBASE", the + * kernel's region is grown, otherwise the kmem region is grown. * * The correctness of this action is based on the following * argument: vm_map_insert() allocates contiguous ranges of the @@ -5015,22 +5023,32 @@ pmap_growkernel(vm_offset_t addr) * any new kernel page table pages between "kernel_vm_end" and * "KERNBASE". */ - if (KERNBASE < addr && addr <= KERNBASE + nkpt * NBPDR) - return; + if (KERNBASE < addr) { + end = KERNBASE + nkpt * NBPDR; + if (end == 0) + return; + } else { + end = kernel_vm_end; + } addr = roundup2(addr, NBPDR); if (addr - 1 >= vm_map_max(kernel_map)) addr = vm_map_max(kernel_map); - if (kernel_vm_end < addr) - kasan_shadow_map(kernel_vm_end, addr - kernel_vm_end); - if (kernel_vm_end < addr) - kmsan_shadow_map(kernel_vm_end, addr - kernel_vm_end); - while (kernel_vm_end < addr) { - pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end); + if (addr <= end) { + /* + * The grown region is already mapped, so there is + * nothing to do. + */ + return; + } + + kasan_shadow_map(end, addr - end); + kmsan_shadow_map(end, addr - end); + while (end < addr) { + pdpe = pmap_pdpe(kernel_pmap, end); if ((*pdpe & X86_PG_V) == 0) { - /* We need a new PDP entry */ nkpg = pmap_alloc_pt_page(kernel_pmap, - kernel_vm_end >> PDPSHIFT, VM_ALLOC_WIRED | + pmap_pdpe_pindex(end), VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); @@ -5039,31 +5057,35 @@ pmap_growkernel(vm_offset_t addr) X86_PG_A | X86_PG_M); continue; /* try again */ } - pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); + pde = pmap_pdpe_to_pde(pdpe, end); if ((*pde & X86_PG_V) != 0) { - kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; - if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { - kernel_vm_end = vm_map_max(kernel_map); + end = (end + NBPDR) & ~PDRMASK; + if (end - 1 >= vm_map_max(kernel_map)) { + end = vm_map_max(kernel_map); break; } continue; } - nkpg = pmap_alloc_pt_page(kernel_pmap, - pmap_pde_pindex(kernel_vm_end), VM_ALLOC_WIRED | - VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO); + nkpg = pmap_alloc_pt_page(kernel_pmap, pmap_pde_pindex(end), + VM_ALLOC_WIRED | VM_ALLOC_INTERRUPT | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); paddr = VM_PAGE_TO_PHYS(nkpg); newpdir = paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; pde_store(pde, newpdir); - kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; - if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { - kernel_vm_end = vm_map_max(kernel_map); + end = (end + NBPDR) & ~PDRMASK; + if (end - 1 >= vm_map_max(kernel_map)) { + end = vm_map_max(kernel_map); break; } } + + if (end <= KERNBASE) + kernel_vm_end = end; + else + nkpt = howmany(end - KERNBASE, NBPDR); } /***************************************************