git: 939f0b6323e0 - main - Implement shared page address randomization

From: Kornel Dulęba <kd_at_FreeBSD.org>
Date: Mon, 18 Jul 2022 14:29:47 UTC
The branch main has been updated by kd:

URL: https://cgit.FreeBSD.org/src/commit/?id=939f0b6323e0a17349aa5e766eacfd0a138cfcc1

commit 939f0b6323e0a17349aa5e766eacfd0a138cfcc1
Author:     Kornel Dulęba <kd@FreeBSD.org>
AuthorDate: 2022-05-10 13:22:55 +0000
Commit:     Kornel Dulęba <kd@FreeBSD.org>
CommitDate: 2022-07-18 14:27:37 +0000

    Implement shared page address randomization
    
    It used to be mapped at the top of the UVA.
    If the randomization is enabled any address above .data section will be
    randomly chosen and a guard page will be inserted in the shared page
    default location.
    The shared page is now mapped in exec_map_stack, instead of
    exec_new_vmspace. The latter function is called before image activator
    has a chance to parse ASLR related flags.
    The KERN_PROC_VM_LAYOUT sysctl was extended to provide shared page
    address.
    The feature is enabled by default for 64 bit applications on all
    architectures.
    It can be toggled kern.elf64.aslr.shared_page sysctl.
    
    Approved by:    mw(mentor)
    Sponsored by:   Stormshield
    Obtained from:  Semihalf
    Reviewed by:    kib
    Differential Revision: https://reviews.freebsd.org/D35349
---
 sys/compat/freebsd32/freebsd32.h |  4 +-
 sys/kern/imgact_elf.c            |  8 ++++
 sys/kern/kern_exec.c             | 79 ++++++++++++++++++++++++++++++----------
 sys/kern/kern_proc.c             |  7 ++++
 sys/sys/imgact.h                 |  2 +
 sys/sys/user.h                   |  5 ++-
 tests/sys/kern/kern_copyin.c     | 58 ++++++++++++++++-------------
 7 files changed, 117 insertions(+), 46 deletions(-)

diff --git a/sys/compat/freebsd32/freebsd32.h b/sys/compat/freebsd32/freebsd32.h
index 96bf79d28c02..96dce0d1afa4 100644
--- a/sys/compat/freebsd32/freebsd32.h
+++ b/sys/compat/freebsd32/freebsd32.h
@@ -442,7 +442,9 @@ struct kinfo_vm_layout32 {
 	uint32_t	kvm_stack_addr;
 	uint32_t	kvm_stack_size;
 	int		kvm_map_flags;
-	uint32_t	kvm_spare[14];
+	uint32_t	kvm_shp_addr;
+	uint32_t	kvm_shp_size;
+	uint32_t	kvm_spare[12];
 };
 
 struct kld_file_stat_1_32 {
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index c71b00337027..ca1a7aaca331 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -209,6 +209,12 @@ SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, stack, CTLFLAG_RWTUN,
     __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
     ": enable stack address randomization");
 
+static int __elfN(aslr_shared_page) = __ELF_WORD_SIZE == 64;
+SYSCTL_INT(ASLR_NODE_OID, OID_AUTO, shared_page, CTLFLAG_RWTUN,
+    &__elfN(aslr_shared_page), 0,
+    __XSTRING(__CONCAT(ELF, __ELF_WORD_SIZE))
+    ": enable shared page address randomization");
+
 static int __elfN(sigfastblock) = 1;
 SYSCTL_INT(__CONCAT(_kern_elf, __ELF_WORD_SIZE), OID_AUTO, sigfastblock,
     CTLFLAG_RWTUN, &__elfN(sigfastblock), 0,
@@ -1305,6 +1311,8 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp)
 			imgp->map_flags |= MAP_ASLR_IGNSTART;
 		if (__elfN(aslr_stack))
 			imgp->map_flags |= MAP_ASLR_STACK;
+		if (__elfN(aslr_shared_page))
+			imgp->imgp_flags |= IMGP_ASLR_SHARED_PAGE;
 	}
 
 	if ((!__elfN(allow_wx) && (fctl0 & NT_FREEBSD_FCTL_WXNEEDED) == 0 &&
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 2d46bc018173..0ead8d81fe59 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -1119,8 +1119,7 @@ exec_free_abi_mappings(struct proc *p)
 }
 
 /*
- * Run down the current address space and install a new one.  Map the shared
- * page.
+ * Run down the current address space and install a new one.
  */
 int
 exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
@@ -1129,7 +1128,6 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
 	struct proc *p = imgp->proc;
 	struct vmspace *vmspace = p->p_vmspace;
 	struct thread *td = curthread;
-	vm_object_t obj;
 	vm_offset_t sv_minuser;
 	vm_map_t map;
 
@@ -1177,27 +1175,12 @@ exec_new_vmspace(struct image_params *imgp, struct sysentvec *sv)
 	}
 	map->flags |= imgp->map_flags;
 
-	/* Map a shared page */
-	obj = sv->sv_shared_page_obj;
-	if (obj != NULL) {
-		vm_object_reference(obj);
-		error = vm_map_fixed(map, obj, 0,
-		    sv->sv_shared_page_base, sv->sv_shared_page_len,
-		    VM_PROT_READ | VM_PROT_EXECUTE,
-		    VM_PROT_READ | VM_PROT_EXECUTE,
-		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
-		if (error != KERN_SUCCESS) {
-			vm_object_deallocate(obj);
-			return (vm_mmap_to_errno(error));
-		}
-		vmspace->vm_shp_base = sv->sv_shared_page_base;
-	}
-
 	return (sv->sv_onexec != NULL ? sv->sv_onexec(p, imgp) : 0);
 }
 
 /*
  * Compute the stack size limit and map the main process stack.
+ * Map the shared page.
  */
 int
 exec_map_stack(struct image_params *imgp)
@@ -1208,9 +1191,11 @@ exec_map_stack(struct image_params *imgp)
 	vm_map_t map;
 	struct vmspace *vmspace;
 	vm_offset_t stack_addr, stack_top;
+	vm_offset_t sharedpage_addr;
 	u_long ssiz;
 	int error, find_space, stack_off;
 	vm_prot_t stack_prot;
+	vm_object_t obj;
 
 	p = imgp->proc;
 	sv = p->p_sysent;
@@ -1262,6 +1247,61 @@ exec_map_stack(struct image_params *imgp)
 		stack_top -= rounddown2(stack_off & PAGE_MASK, sizeof(void *));
 	}
 
+	/* Map a shared page */
+	obj = sv->sv_shared_page_obj;
+	if (obj == NULL) {
+		sharedpage_addr = 0;
+		goto out;
+	}
+
+	/*
+	 * If randomization is disabled then the shared page will
+	 * be mapped at address specified in sysentvec.
+	 * Otherwise any address above .data section can be selected.
+	 * Same logic is used for stack address randomization.
+	 * If the address randomization is applied map a guard page
+	 * at the top of UVA.
+	 */
+	vm_object_reference(obj);
+	if ((imgp->imgp_flags & IMGP_ASLR_SHARED_PAGE) != 0) {
+		sharedpage_addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr +
+		    lim_max(curthread, RLIMIT_DATA));
+
+		error = vm_map_fixed(map, NULL, 0,
+		    sv->sv_maxuser - PAGE_SIZE, PAGE_SIZE,
+		    VM_PROT_NONE, VM_PROT_NONE, MAP_CREATE_GUARD);
+		if (error != KERN_SUCCESS) {
+			/*
+			 * This is not fatal, so let's just print a warning
+			 * and continue.
+			 */
+			uprintf("%s: Mapping guard page at the top of UVA failed"
+			    " mach error %d errno %d",
+			    __func__, error, vm_mmap_to_errno(error));
+		}
+
+		error = vm_map_find(map, obj, 0,
+		    &sharedpage_addr, sv->sv_shared_page_len,
+		    sv->sv_maxuser, VMFS_ANY_SPACE,
+		    VM_PROT_READ | VM_PROT_EXECUTE,
+		    VM_PROT_READ | VM_PROT_EXECUTE,
+		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
+	} else {
+		sharedpage_addr = sv->sv_shared_page_base;
+		vm_map_fixed(map, obj, 0,
+		    sharedpage_addr, sv->sv_shared_page_len,
+		    VM_PROT_READ | VM_PROT_EXECUTE,
+		    VM_PROT_READ | VM_PROT_EXECUTE,
+		    MAP_INHERIT_SHARE | MAP_ACC_NO_CHARGE);
+	}
+	if (error != KERN_SUCCESS) {
+		uprintf("%s: mapping shared page at addr: %p"
+		    "failed, mach error %d errno %d\n", __func__,
+		    (void *)sharedpage_addr, error, vm_mmap_to_errno(error));
+		vm_object_deallocate(obj);
+		return (vm_mmap_to_errno(error));
+	}
+out:
 	/*
 	 * vm_ssize and vm_maxsaddr are somewhat antiquated concepts, but they
 	 * are still used to enforce the stack rlimit on the process stack.
@@ -1269,6 +1309,7 @@ exec_map_stack(struct image_params *imgp)
 	vmspace->vm_maxsaddr = (char *)stack_addr;
 	vmspace->vm_stacktop = stack_top;
 	vmspace->vm_ssize = sgrowsiz >> PAGE_SHIFT;
+	vmspace->vm_shp_base = sharedpage_addr;
 
 	return (0);
 }
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 56bdb6aa3837..4962e83c4ad4 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -3235,6 +3235,8 @@ sysctl_kern_proc_vm_layout(SYSCTL_HANDLER_ARGS)
 	kvm.kvm_data_size = vmspace->vm_dsize;
 	kvm.kvm_stack_addr = (uintptr_t)vmspace->vm_maxsaddr;
 	kvm.kvm_stack_size = vmspace->vm_ssize;
+	kvm.kvm_shp_addr = vmspace->vm_shp_base;
+	kvm.kvm_shp_size = p->p_sysent->sv_shared_page_len;
 	if ((vmspace->vm_map.flags & MAP_WIREFUTURE) != 0)
 		kvm.kvm_map_flags |= KMAP_FLAG_WIREFUTURE;
 	if ((vmspace->vm_map.flags & MAP_ASLR) != 0)
@@ -3245,6 +3247,9 @@ sysctl_kern_proc_vm_layout(SYSCTL_HANDLER_ARGS)
 		kvm.kvm_map_flags |= KMAP_FLAG_WXORX;
 	if ((vmspace->vm_map.flags & MAP_ASLR_STACK) != 0)
 		kvm.kvm_map_flags |= KMAP_FLAG_ASLR_STACK;
+	if (vmspace->vm_shp_base != p->p_sysent->sv_shared_page_base &&
+	    PROC_HAS_SHP(p))
+		kvm.kvm_map_flags |= KMAP_FLAG_ASLR_SHARED_PAGE;
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32)) {
@@ -3259,6 +3264,8 @@ sysctl_kern_proc_vm_layout(SYSCTL_HANDLER_ARGS)
 		kvm32.kvm_data_size = (uint32_t)kvm.kvm_data_size;
 		kvm32.kvm_stack_addr = (uint32_t)kvm.kvm_stack_addr;
 		kvm32.kvm_stack_size = (uint32_t)kvm.kvm_stack_size;
+		kvm32.kvm_shp_addr = (uint32_t)kvm.kvm_shp_addr;
+		kvm32.kvm_shp_size = (uint32_t)kvm.kvm_shp_size;
 		kvm32.kvm_map_flags = kvm.kvm_map_flags;
 		vmspace_free(vmspace);
 		error = SYSCTL_OUT(req, &kvm32, sizeof(kvm32));
diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h
index bc1ab77a491e..0be3e71604bf 100644
--- a/sys/sys/imgact.h
+++ b/sys/sys/imgact.h
@@ -92,6 +92,8 @@ struct image_params {
 	bool opened;			/* we have opened executable vnode */
 	bool textset;
 	u_int map_flags;
+#define IMGP_ASLR_SHARED_PAGE	0x1
+	uint32_t imgp_flags;
 };
 
 #ifdef _KERNEL
diff --git a/sys/sys/user.h b/sys/sys/user.h
index c9245dad67e7..edbe6a8655ff 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -633,6 +633,7 @@ struct kinfo_sigtramp {
 #define	KMAP_FLAG_ASLR_IGNSTART	0x04	/* ASLR may map into sbrk grow region */
 #define	KMAP_FLAG_WXORX		0x08	/* W^X mapping policy is enforced */
 #define	KMAP_FLAG_ASLR_STACK	0x10	/* the stack location is randomized */
+#define	KMAP_FLAG_ASLR_SHARED_PAGE 0x20	/* the shared page location is randomized */
 
 struct kinfo_vm_layout {
 	uintptr_t	kvm_min_user_addr;
@@ -644,7 +645,9 @@ struct kinfo_vm_layout {
 	uintptr_t	kvm_stack_addr;
 	size_t		kvm_stack_size;
 	int		kvm_map_flags;
-	uintptr_t	kvm_spare[14];
+	uintptr_t	kvm_shp_addr;
+	size_t		kvm_shp_size;
+	uintptr_t	kvm_spare[12];
 };
 
 #ifdef _KERNEL
diff --git a/tests/sys/kern/kern_copyin.c b/tests/sys/kern/kern_copyin.c
index de12753c8dcd..db29fb1ce3b1 100644
--- a/tests/sys/kern/kern_copyin.c
+++ b/tests/sys/kern/kern_copyin.c
@@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/exec.h>
 #include <sys/sysctl.h>
 #include <sys/user.h>
+#include <sys/mman.h>
 
 #include <errno.h>
 #include <fcntl.h>
@@ -72,26 +73,20 @@ copyin_checker2(uintptr_t uaddr)
 }
 #endif
 
-#ifdef __amd64__
-static uintptr_t
-get_maxuser_address(void)
+static int
+get_vm_layout(struct kinfo_vm_layout *kvm)
 {
-	struct kinfo_vm_layout kvm;
 	size_t len;
-	int error, mib[4];
+	int mib[4];
 
 	mib[0] = CTL_KERN;
 	mib[1] = KERN_PROC;
 	mib[2] = KERN_PROC_VM_LAYOUT;
 	mib[3] = getpid();
-	len = sizeof(kvm);
-	error = sysctl(mib, nitems(mib), &kvm, &len, NULL, 0);
-	if (error != 0)
-		return (0);
+	len = sizeof(*kvm);
 
-	return (kvm.kvm_max_user_addr);
+	return (sysctl(mib, nitems(mib), kvm, &len, NULL, 0));
 }
-#endif
 
 #define	FMAX	ULONG_MAX
 #if __SIZEOF_POINTER__ == 8
@@ -103,27 +98,37 @@ ATF_TC_WITHOUT_HEAD(kern_copyin);
 ATF_TC_BODY(kern_copyin, tc)
 {
 	char template[] = "copyin.XXXXXX";
+	struct kinfo_vm_layout kvm;
 	uintptr_t maxuser;
+	long page_size;
+	void *addr;
+	int error;
 
-#if defined(__mips__)
-	/*
-	 * MIPS has different VM layout: the UVA map on mips ends the
-	 * highest mapped entry at the VM_MAXUSER_ADDRESS - PAGE_SIZE,
-	 * while all other arches map either stack or shared page up
-	 * to the VM_MAXUSER_ADDRESS.
-	 */
-	maxuser = VM_MAXUSER_ADDRESS - PAGE_SIZE;
-#elif defined(__amd64__)
-	maxuser = get_maxuser_address();
-	ATF_REQUIRE(maxuser != 0);
-#else
-	maxuser = VM_MAXUSER_ADDRESS;
-#endif
+	addr = MAP_FAILED;
 
+	error = get_vm_layout(&kvm);
+	ATF_REQUIRE(error == 0);
+
+	page_size = sysconf(_SC_PAGESIZE);
+	ATF_REQUIRE(page_size != (long)-1);
+
+	maxuser = kvm.kvm_max_user_addr;
 	scratch_file = mkstemp(template);
 	ATF_REQUIRE(scratch_file != -1);
 	unlink(template);
 
+	/*
+	 * Since the shared page address can be randomized we need to make
+	 * sure that something is mapped at the top of the user address space.
+	 * Otherwise reading bytes from maxuser-X will fail rendering this test
+	 * useless.
+	 */
+	if (kvm.kvm_shp_addr + kvm.kvm_shp_size < maxuser) {
+		addr = mmap((void *)(maxuser - page_size), page_size, PROT_READ,
+		    MAP_ANON | MAP_FIXED, -1, 0);
+		ATF_REQUIRE(addr != MAP_FAILED);
+	}
+
 	ATF_CHECK(copyin_checker(0, 0) == 0);
 	ATF_CHECK(copyin_checker(maxuser - 10, 9) == 0);
 	ATF_CHECK(copyin_checker(maxuser - 10, 10) == 0);
@@ -141,6 +146,9 @@ ATF_TC_BODY(kern_copyin, tc)
 	ATF_CHECK(copyin_checker(ADDR_SIGNED, 1) == EFAULT);
 	ATF_CHECK(copyin_checker2(ADDR_SIGNED) == EFAULT);
 #endif
+
+	if (addr != MAP_FAILED)
+		munmap(addr, PAGE_SIZE);
 }
 
 ATF_TP_ADD_TCS(tp)