git: 03bf40c5d880 - main - arm64: Disable per-thread stack-smashing protection in data_abort()

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Mon, 07 Nov 2022 21:06:12 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=03bf40c5d880f5dae2a7b8c2282edd83f3ec7b5a

commit 03bf40c5d880f5dae2a7b8c2282edd83f3ec7b5a
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2022-11-07 20:53:41 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2022-11-07 21:05:58 +0000

    arm64: Disable per-thread stack-smashing protection in data_abort()
    
    With PERTHREAD_SSP configured, the compiler's stack-smashing protection
    uses a per-thread canary value instead of a global value.  The value is
    stored in td->td_md.md_canary; the sp_el0 register always contains a
    pointer to that value, and certain functions selected by the compiler
    will store the canary value on the stack as a part of the function
    prologue (and will verify the copy as part of the epilogue).  In
    particular, the thread structure may be accessed.
    
    This happens to occur in data_abort(), which leads to the same problem
    addressed by commit 2c10be9e06d4 ("arm64: Handle translation faults for
    thread structures").  This commit fixes that directly, by disabling SSP
    in data_abort() and a couple of related functions by using a function
    attribute.  It also moves the update of sp_el0 out of C code in case
    the compiler decides to start checking the canary in pmap_switch()
    someday.
    
    A different solution might be to move the canary value to the PCB, which
    currently lives on the kernel stack and isn't subject to the same
    problem as thread structures (if only because guard pages inhibit
    superpage promotion).  However, there isn't any particular reason the
    PCB has to live on the stack today; on amd64 it is embedded in struct
    thread, reintroducing the same problem.  Keeping the reference canary
    value at the top of the stack is also rather dubious since it could be
    clobbered by a sufficiently large stack overflow.
    
    A third solution could be to go back to the approach of commit
    5aa5420ff2e8, and modify UMA to use the direct map for thread structures
    even if KASAN is enabled.  But, transient promotions and demotions in
    the direct map are possible too.
    
    Reviewed by:    alc, kib, andrew
    MFC after:      1 month
    Sponsored by:   Juniper Networks, Inc.
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D37255
---
 sys/arm64/arm64/pmap.c    |  9 ++++-----
 sys/arm64/arm64/swtch.S   | 13 +++++++++++++
 sys/arm64/arm64/trap.c    | 13 +++++++++++--
 sys/arm64/include/param.h |  6 ++++++
 sys/sys/cdefs.h           | 10 ++++++++++
 5 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 07a079fc5c8e..8a73fb1c5180 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1647,8 +1647,11 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
  * Walks the page tables to translate a kernel virtual address to a
  * physical address. Returns true if the kva is valid and stores the
  * physical address in pa if it is not NULL.
+ *
+ * See the comment above data_abort() for the rationale for specifying
+ * NO_PERTHREAD_SSP here.
  */
-bool
+bool NO_PERTHREAD_SSP
 pmap_klookup(vm_offset_t va, vm_paddr_t *pa)
 {
 	pt_entry_t *pte, tpte;
@@ -7052,10 +7055,6 @@ pmap_switch(struct thread *new)
 
 	/* Store the new curthread */
 	PCPU_SET(curthread, new);
-#if defined(PERTHREAD_SSP)
-	/* Set the new threads SSP canary */
-	__asm("msr	sp_el0, %0" :: "r"(&new->td_md.md_canary));
-#endif
 
 	/* And the new pcb */
 	pcb = new->td_pcb;
diff --git a/sys/arm64/arm64/swtch.S b/sys/arm64/arm64/swtch.S
index d7f915a91946..e7114746f03b 100644
--- a/sys/arm64/arm64/swtch.S
+++ b/sys/arm64/arm64/swtch.S
@@ -80,9 +80,17 @@ ENTRY(cpu_throw)
 
 	/* This returns the thread pointer so no need to save it */
 	bl	ptrauth_switch
+#ifdef PERTHREAD_SSP
+	mov	x19, x0
+#endif
 	/* This returns the thread pcb */
 	bl	pmap_switch
 	mov	x4, x0
+#ifdef PERTHREAD_SSP
+	/* Update the per-thread stack canary pointer. */
+	add	x19, x19, #(TD_MD_CANARY)
+	msr	sp_el0, x19
+#endif
 
 	/* If we are single stepping, enable it */
 	ldr	w5, [x4, #PCB_FLAGS]
@@ -159,6 +167,11 @@ ENTRY(cpu_switch)
 	mov	x2, x21
 	mov	x1, x20
 	mov	x0, x19
+#ifdef PERTHREAD_SSP
+	/* Update the per-thread stack canary pointer. */
+	add	x20, x20, #(TD_MD_CANARY)
+	msr	sp_el0, x20
+#endif
 
 	/*
 	 * Release the old thread.
diff --git a/sys/arm64/arm64/trap.c b/sys/arm64/arm64/trap.c
index f48e33db914e..4e54a06548cc 100644
--- a/sys/arm64/arm64/trap.c
+++ b/sys/arm64/arm64/trap.c
@@ -241,7 +241,13 @@ external_abort(struct thread *td, struct trapframe *frame, uint64_t esr,
 	panic("Unhandled EL%d external data abort", lower ? 0: 1);
 }
 
-static void
+/*
+ * It is unsafe to access the stack canary value stored in "td" until
+ * kernel map translation faults are handled, see the pmap_klookup() call below.
+ * Thus, stack-smashing detection with per-thread canaries must be disabled in
+ * this function.
+ */
+static void NO_PERTHREAD_SSP
 data_abort(struct thread *td, struct trapframe *frame, uint64_t esr,
     uint64_t far, int lower)
 {
@@ -449,7 +455,10 @@ fpe_trap(struct thread *td, void *addr, uint32_t exception)
 }
 #endif
 
-void
+/*
+ * See the comment above data_abort().
+ */
+void NO_PERTHREAD_SSP
 do_el1h_sync(struct thread *td, struct trapframe *frame)
 {
 	uint32_t exception;
diff --git a/sys/arm64/include/param.h b/sys/arm64/include/param.h
index 8b2ac8c9bd18..d34667788938 100644
--- a/sys/arm64/include/param.h
+++ b/sys/arm64/include/param.h
@@ -105,6 +105,12 @@
 #define	KSTACK_GUARD_PAGES	1	/* pages of kstack guard; 0 disables */
 #define	PCPU_PAGES		1
 
+#ifdef PERTHREAD_SSP
+#define	NO_PERTHREAD_SSP	__nostackprotector
+#else
+#define	NO_PERTHREAD_SSP
+#endif
+
 /*
  * Mach derived conversion macros
  */
diff --git a/sys/sys/cdefs.h b/sys/sys/cdefs.h
index 1ace5fbee787..83ba7584e5b9 100644
--- a/sys/sys/cdefs.h
+++ b/sys/sys/cdefs.h
@@ -896,6 +896,16 @@
 #define __nosanitizethread
 #endif
 
+/*
+ * Make it possible to opt out of stack smashing protection.
+ */
+#if __has_attribute(no_stack_protector)
+#define	__nostackprotector	__attribute__((no_stack_protector))
+#else
+#define	__nostackprotector	\
+	__attribute__((__optimize__("-fno-stack-protector")))
+#endif
+
 /* Guard variables and structure members by lock. */
 #define	__guarded_by(x)		__lock_annotate(guarded_by(x))
 #define	__pt_guarded_by(x)	__lock_annotate(pt_guarded_by(x))