git: fdeb273d49bf - main - dtrace: Add some more annotations for KMSAN

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Sat, 23 Nov 2024 02:36:45 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=fdeb273d49bf2fa2544d3c98114859db10385550

commit fdeb273d49bf2fa2544d3c98114859db10385550
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-11-23 02:32:36 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-11-23 02:36:08 +0000

    dtrace: Add some more annotations for KMSAN
    
    - Don't allow FBT and kinst to instrument the KMSAN runtime.
    - When fetching data from the traced thread's stack, mark it as
      initialized.  It may well be uninitialized, but as dtrace permits
      arbitrary inspection of kernel memory, it isn't very useful to raise
      KMSAN reports.
    - Mark data copied in from userspace as initialized, as we do for
      copyin() etc. using interceptors.
    
    MFC after:      2 weeks
---
 sys/cddl/dev/dtrace/amd64/dtrace_isa.c | 55 +++++++++++++++++++++++++++-------
 sys/cddl/dev/fbt/fbt.c                 |  7 +++++
 sys/cddl/dev/kinst/kinst.c             |  7 +++++
 3 files changed, 59 insertions(+), 10 deletions(-)

diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_isa.c b/sys/cddl/dev/dtrace/amd64/dtrace_isa.c
index 83d34abbd270..f14e90d974bc 100644
--- a/sys/cddl/dev/dtrace/amd64/dtrace_isa.c
+++ b/sys/cddl/dev/dtrace/amd64/dtrace_isa.c
@@ -29,6 +29,7 @@
 #include <sys/systm.h>
 #include <sys/dtrace_impl.h>
 #include <sys/kernel.h>
+#include <sys/msan.h>
 #include <sys/stack.h>
 #include <sys/pcpu.h>
 
@@ -73,6 +74,8 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
 	frame = (struct amd64_frame *)rbp;
 	td = curthread;
 	while (depth < pcstack_limit) {
+		kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED);
+
 		if (!kstack_contains(curthread, (vm_offset_t)frame,
 		    sizeof(*frame)))
 			break;
@@ -99,6 +102,7 @@ dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes,
 	for (; depth < pcstack_limit; depth++) {
 		pcstack[depth] = 0;
 	}
+	kmsan_check(pcstack, pcstack_limit * sizeof(*pcstack), "dtrace");
 }
 
 static int
@@ -399,8 +403,10 @@ dtrace_getarg(int arg, int aframes)
 		goto load;
 	}
 
-	for (i = 1; i <= aframes; i++)
+	for (i = 1; i <= aframes; i++) {
+		kmsan_mark(fp, sizeof(*fp), KMSAN_STATE_INITED);
 		fp = fp->f_frame;
+	}
 
 	/*
 	 * We know that we did not come through a trap to get into
@@ -430,6 +436,8 @@ load:
 	val = stack[arg];
 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
 
+	kmsan_mark(&val, sizeof(val), KMSAN_STATE_INITED);
+
 	return (val);
 }
 
@@ -444,10 +452,13 @@ dtrace_getstackdepth(int aframes)
 	rbp = dtrace_getfp();
 	frame = (struct amd64_frame *)rbp;
 	depth++;
-	for(;;) {
+	for (;;) {
+		kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED);
+
 		if (!kstack_contains(curthread, (vm_offset_t)frame,
 		    sizeof(*frame)))
 			break;
+
 		depth++;
 		if (frame->f_frame <= frame)
 			break;
@@ -574,76 +585,100 @@ void
 dtrace_copyin(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
-	if (dtrace_copycheck(uaddr, kaddr, size))
+	if (dtrace_copycheck(uaddr, kaddr, size)) {
 		dtrace_copy(uaddr, kaddr, size);
+		kmsan_mark((void *)kaddr, size, KMSAN_STATE_INITED);
+	}
 }
 
 void
 dtrace_copyout(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
-	if (dtrace_copycheck(uaddr, kaddr, size))
+	if (dtrace_copycheck(uaddr, kaddr, size)) {
+		kmsan_check((void *)kaddr, size, "dtrace_copyout");
 		dtrace_copy(kaddr, uaddr, size);
+	}
 }
 
 void
 dtrace_copyinstr(uintptr_t uaddr, uintptr_t kaddr, size_t size,
     volatile uint16_t *flags)
 {
-	if (dtrace_copycheck(uaddr, kaddr, size))
+	if (dtrace_copycheck(uaddr, kaddr, size)) {
 		dtrace_copystr(uaddr, kaddr, size, flags);
+		kmsan_mark((void *)kaddr, size, KMSAN_STATE_INITED);
+	}
 }
 
 void
 dtrace_copyoutstr(uintptr_t kaddr, uintptr_t uaddr, size_t size,
     volatile uint16_t *flags)
 {
-	if (dtrace_copycheck(uaddr, kaddr, size))
+	if (dtrace_copycheck(uaddr, kaddr, size)) {
+		kmsan_check((void *)kaddr, size, "dtrace_copyoutstr");
 		dtrace_copystr(kaddr, uaddr, size, flags);
+	}
 }
 
 uint8_t
 dtrace_fuword8(void *uaddr)
 {
+	uint8_t val;
+
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
-	return (dtrace_fuword8_nocheck(uaddr));
+	val = dtrace_fuword8_nocheck(uaddr);
+	kmsan_mark(&val, sizeof(val), KMSAN_STATE_INITED);
+	return (val);
 }
 
 uint16_t
 dtrace_fuword16(void *uaddr)
 {
+	uint16_t val;
+
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
-	return (dtrace_fuword16_nocheck(uaddr));
+	val = dtrace_fuword16_nocheck(uaddr);
+	kmsan_mark(&val, sizeof(val), KMSAN_STATE_INITED);
+	return (val);
 }
 
 uint32_t
 dtrace_fuword32(void *uaddr)
 {
+	uint32_t val;
+
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
-	return (dtrace_fuword32_nocheck(uaddr));
+	val = dtrace_fuword32_nocheck(uaddr);
+	kmsan_mark(&val, sizeof(val), KMSAN_STATE_INITED);
+	return (val);
 }
 
 uint64_t
 dtrace_fuword64(void *uaddr)
 {
+	uint64_t val;
+
 	if ((uintptr_t)uaddr > VM_MAXUSER_ADDRESS) {
 		DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
 		cpu_core[curcpu].cpuc_dtrace_illval = (uintptr_t)uaddr;
 		return (0);
 	}
-	return (dtrace_fuword64_nocheck(uaddr));
+	val = dtrace_fuword64_nocheck(uaddr);
+	kmsan_mark(&val, sizeof(val), KMSAN_STATE_INITED);
+	return (val);
 }
 
 /*
diff --git a/sys/cddl/dev/fbt/fbt.c b/sys/cddl/dev/fbt/fbt.c
index 481c896e9775..99a77ba65eb8 100644
--- a/sys/cddl/dev/fbt/fbt.c
+++ b/sys/cddl/dev/fbt/fbt.c
@@ -136,6 +136,13 @@ fbt_excluded(const char *name)
 	    strcmp(name, "owner_sx") == 0)
 		return (1);
 
+	/*
+	 * The KMSAN runtime can't be instrumented safely.
+	 */
+	if (strncmp(name, "__msan", 6) == 0 ||
+	    strncmp(name, "kmsan_", 6) == 0)
+		return (1);
+
 	/*
 	 * Stack unwinders may be called from probe context on some
 	 * platforms.
diff --git a/sys/cddl/dev/kinst/kinst.c b/sys/cddl/dev/kinst/kinst.c
index 60400a452b95..82b78d98987c 100644
--- a/sys/cddl/dev/kinst/kinst.c
+++ b/sys/cddl/dev/kinst/kinst.c
@@ -132,6 +132,13 @@ kinst_excluded(const char *name)
 	    strcmp(name, "owner_sx") == 0)
 		return (true);
 
+	/*
+	 * The KMSAN runtime can't be instrumented safely.
+	 */
+	if (strncmp(name, "__msan", 6) == 0 ||
+	    strncmp(name, "kmsan_", 6) == 0)
+		return (1);
+
 	/*
 	 * When DTrace is built into the kernel we need to exclude the kinst
 	 * functions from instrumentation.