amd64: panic on -CURRENT @r330539 for certain UEFI hosts
Konstantin Belousov
kostikbel at gmail.com
Fri Mar 16 09:56:46 UTC 2018
On Thu, Mar 15, 2018 at 09:38:56PM -0500, Peter Lei wrote:
> Some recent UEFI implementations have begun to leave the CPU with page
> write protection enabled in CR0.
>
> With r330539 which enables kernel page protections, interesting things
> happen during boot (aka panic) when protection is already enabled,
> including a write protection fault from an explicit .text fixup write
> from xsave->xsaveopt by fpuinit().
>
> I see this so far booting -CURRENT under virtual environments:
>
> - QEMU with recent OVMF EDK2 builds: this is certainly due to UEFI
> enabling paging and page protections.
>
> - VMWare Fusion 10.1.x on Mac: no specific insight on what's going
> inside the implementation, but CR0_WP is definitely left enabled before
> the kernel is booted.
>
> I have patched my kernel build to explicitly clear CR0_WP (e.g. in
> initializecpu) prior to creating the page tables to get around this, but
> someone might have a cleaner/better solution...
Try this.
diff --git a/sys/amd64/amd64/db_interface.c b/sys/amd64/amd64/db_interface.c
index 9dfd44cf82c..1ecec02835c 100644
--- a/sys/amd64/amd64/db_interface.c
+++ b/sys/amd64/amd64/db_interface.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/pcpu.h>
#include <machine/cpufunc.h>
+#include <machine/md_var.h>
#include <machine/specialreg.h>
#include <ddb/ddb.h>
@@ -75,19 +76,19 @@ db_write_bytes(vm_offset_t addr, size_t size, char *data)
jmp_buf jb;
void *prev_jb;
char *dst;
- u_long cr0save;
+ bool old_wp;
int ret;
- cr0save = rcr0();
+ old_wp = false;
prev_jb = kdb_jmpbuf(jb);
ret = setjmp(jb);
if (ret == 0) {
- load_cr0(cr0save & ~CR0_WP);
+ old_wp = disable_wp();
dst = (char *)addr;
while (size-- > 0)
*dst++ = *data++;
}
- load_cr0(cr0save);
+ restore_wp(old_wp);
(void)kdb_jmpbuf(prev_jb);
return (ret);
}
diff --git a/sys/amd64/amd64/fpu.c b/sys/amd64/amd64/fpu.c
index 72b10396341..39367fa6ffb 100644
--- a/sys/amd64/amd64/fpu.c
+++ b/sys/amd64/amd64/fpu.c
@@ -205,6 +205,7 @@ fpuinit_bsp1(void)
{
u_int cp[4];
uint64_t xsave_mask_user;
+ bool old_wp;
if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
use_xsave = 1;
@@ -233,8 +234,14 @@ fpuinit_bsp1(void)
* Patch the XSAVE instruction in the cpu_switch code
* to XSAVEOPT. We assume that XSAVE encoding used
* REX byte, and set the bit 4 of the r/m byte.
+ *
+ * It seems that some BIOSes give control to the OS
+ * with CR0.WP already set, making the kernel text
+ * read-only before cpu_startup().
*/
+ old_wp = disable_wp();
ctx_switch_xsave[3] |= 0x10;
+ restore_wp(old_wp);
}
}
diff --git a/sys/amd64/amd64/gdb_machdep.c b/sys/amd64/amd64/gdb_machdep.c
index 68eb6002593..f7ca3c07ea3 100644
--- a/sys/amd64/amd64/gdb_machdep.c
+++ b/sys/amd64/amd64/gdb_machdep.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpufunc.h>
#include <machine/frame.h>
#include <machine/gdb_machdep.h>
+#include <machine/md_var.h>
#include <machine/pcb.h>
#include <machine/psl.h>
#include <machine/reg.h>
@@ -127,17 +128,14 @@ gdb_cpu_signal(int type, int code)
void *
gdb_begin_write(void)
{
- u_long cr0save;
- cr0save = rcr0();
- load_cr0(cr0save & ~CR0_WP);
- return ((void *)cr0save);
+ return (disable_wp() ? &gdb_begin_write : NULL);
}
void
gdb_end_write(void *arg)
{
- load_cr0((u_long)arg);
+ restore_wp(arg != NULL);
}
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index e340c6cd14d..fcc45eca57d 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -2597,6 +2597,31 @@ clear_pcb_flags(struct pcb *pcb, const u_int flags)
: "cc", "memory");
}
+/*
+ * Enable and restore kernel text write permissions.
+ * Callers must ensure that disable_wp()/restore_wp() are executed
+ * without rescheduling on the same core.
+ */
+bool
+disable_wp(void)
+{
+ u_int cr0;
+
+ cr0 = rcr0();
+ if ((cr0 & CR0_WP) == 0)
+ return (false);
+ load_cr0(cr0 & ~CR0_WP);
+ return (true);
+}
+
+void
+restore_wp(bool old_wp)
+{
+
+ if (old_wp)
+ load_cr0(rcr0() | CR0_WP);
+}
+
#ifdef KDB
/*
diff --git a/sys/amd64/include/md_var.h b/sys/amd64/include/md_var.h
index 63dabaf4047..abcc273b6c6 100644
--- a/sys/amd64/include/md_var.h
+++ b/sys/amd64/include/md_var.h
@@ -53,6 +53,8 @@ void amd64_conf_fast_syscall(void);
void amd64_db_resume_dbreg(void);
void amd64_lower_shared_page(struct sysentvec *);
void amd64_syscall(struct thread *td, int traced);
+bool disable_wp(void);
+void restore_wp(bool old_wp);
void doreti_iret(void) __asm(__STRING(doreti_iret));
void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
void ld_ds(void) __asm(__STRING(ld_ds));
More information about the freebsd-current
mailing list