git: 588ab3c77454 - main - Allow minidumps to be performed on the live system

From: Mitchell Horne <mhorne_at_FreeBSD.org>
Date: Fri, 19 Nov 2021 19:06:08 UTC
The branch main has been updated by mhorne:

URL: https://cgit.FreeBSD.org/src/commit/?id=588ab3c7745480778281ce2ab086eacfb487e413

commit 588ab3c7745480778281ce2ab086eacfb487e413
Author:     Mitchell Horne <mhorne@FreeBSD.org>
AuthorDate: 2021-11-17 15:35:59 +0000
Commit:     Mitchell Horne <mhorne@FreeBSD.org>
CommitDate: 2021-11-19 19:05:53 +0000

    Allow minidumps to be performed on the live system
    
    Add a boolean parameter to minidumpsys(), to indicate a live dump. When
    requested, take a snapshot of important global state, and pass this to
    the machine-dependent minidump function. For now this includes the
    kernel message buffer, and the bitset of pages to be dumped. Beyond
    this, we don't take much action to protect the integrity of the dump
    from changes in the running system.
    
    A new function msgbuf_duplicate() is added for snapshotting the message
    buffer. msgbuf_copy() is insufficient for this purpose since it marks
    any new characters it finds as read.
    
    For now, nothing can actually trigger a live minidump. A future patch
    will add the mechanism for this. For simplicity and safety, live dumps
    are disallowed for mips.
    
    Reviewed by:    markj, jhb
    MFC after:      2 weeks
    Sponsored by:   Juniper Networks, Inc.
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D31993
---
 sys/kern/kern_dump.c             | 67 +++++++++++++++++++++++++++++++++++++---
 sys/kern/subr_msgbuf.c           | 15 +++++++++
 sys/mips/mips/minidump_machdep.c |  4 +++
 sys/sys/kerneldump.h             |  2 +-
 sys/sys/msgbuf.h                 |  1 +
 5 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/sys/kern/kern_dump.c b/sys/kern/kern_dump.c
index 278863e19a65..17ac4e418645 100644
--- a/sys/kern/kern_dump.c
+++ b/sys/kern/kern_dump.c
@@ -31,8 +31,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
+#include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kerneldump.h>
+#include <sys/malloc.h>
 #include <sys/msgbuf.h>
 #include <sys/proc.h>
 #include <sys/watchdog.h>
@@ -295,7 +297,7 @@ dumpsys_generic(struct dumperinfo *di)
 
 #if MINIDUMP_PAGE_TRACKING == 1
 	if (do_minidump)
-		return (minidumpsys(di));
+		return (minidumpsys(di, false));
 #endif
 
 	bzero(&ehdr, sizeof(ehdr));
@@ -461,15 +463,72 @@ dumpsys_pb_progress(size_t delta)
 }
 
 int
-minidumpsys(struct dumperinfo *di)
+minidumpsys(struct dumperinfo *di, bool livedump)
 {
 	struct minidumpstate state;
+	struct msgbuf mb_copy;
+	char *msg_ptr;
+	size_t sz;
 	int error;
 
-	state.msgbufp = msgbufp;
-	state.dump_bitset = vm_page_dump;
+	if (livedump) {
+		KASSERT(!dumping, ("live dump invoked from incorrect context"));
+
+		/*
+		 * Before invoking cpu_minidumpsys() on the live system, we
+		 * must snapshot some required global state: the message
+		 * buffer, and the page dump bitset. They may be modified at
+		 * any moment, so for the sake of the live dump it is best to
+		 * have an unchanging snapshot to work with. Both are included
+		 * as part of the dump and consumed by userspace tools.
+		 *
+		 * Other global state important to the minidump code is the
+		 * dump_avail array and the kernel's page tables, but snapshots
+		 * are not taken of these. For one, dump_avail[] is expected
+		 * not to change after boot. Snapshotting the kernel page
+		 * tables would involve an additional walk, so this is avoided
+		 * too.
+		 *
+		 * This means live dumps are best effort, and the result may or
+		 * may not be usable; there are no guarantees about the
+		 * consistency of the dump's contents. Any of the following
+		 * (and likely more) may affect the live dump:
+		 *
+		 *  - Data may be modified, freed, or remapped during the
+		 *    course of the dump, such that the contents written out
+		 *    are partially or entirely unrecognizable. This means
+		 *    valid references may point to destroyed/mangled objects,
+		 *    and vice versa.
+		 *
+		 *  - The dumped context of any threads that ran during the
+		 *    dump process may be unreliable.
+		 *
+		 *  - The set of kernel page tables included in the dump likely
+		 *    won't correspond exactly to the copy of the dump bitset.
+		 *    This means some pages will be dumped without any way to
+		 *    locate them, and some pages may not have been dumped
+		 *    despite appearing as if they should.
+		 */
+		msg_ptr = malloc(msgbufsize, M_TEMP, M_WAITOK);
+		msgbuf_duplicate(msgbufp, &mb_copy, msg_ptr);
+		state.msgbufp = &mb_copy;
+
+		sz = BITSET_SIZE(vm_page_dump_pages);
+		state.dump_bitset = malloc(sz, M_TEMP, M_WAITOK);
+		BIT_COPY_STORE_REL(sz, vm_page_dump, state.dump_bitset);
+	} else {
+		KASSERT(dumping, ("minidump invoked outside of doadump()"));
+
+		/* Use the globals. */
+		state.msgbufp = msgbufp;
+		state.dump_bitset = vm_page_dump;
+	}
 
 	error = cpu_minidumpsys(di, &state);
+	if (livedump) {
+		free(msg_ptr, M_TEMP);
+		free(state.dump_bitset, M_TEMP);
+	}
 
 	return (error);
 }
diff --git a/sys/kern/subr_msgbuf.c b/sys/kern/subr_msgbuf.c
index 980d37df205b..8af013d52a2d 100644
--- a/sys/kern/subr_msgbuf.c
+++ b/sys/kern/subr_msgbuf.c
@@ -414,3 +414,18 @@ msgbuf_copy(struct msgbuf *src, struct msgbuf *dst)
 	while ((c = msgbuf_getchar(src)) >= 0)
 		msgbuf_addchar(dst, c);
 }
+
+/*
+ * Get a snapshot of the message buffer, without modifying its internal state
+ * (i.e. don't mark any new characters as read).
+ */
+void
+msgbuf_duplicate(struct msgbuf *src, struct msgbuf *dst, char *dst_msgptr)
+{
+
+	mtx_lock_spin(&src->msg_lock);
+	bcopy(src, dst, sizeof(struct msgbuf));
+	dst->msg_ptr = dst_msgptr;
+	bcopy(src->msg_ptr, dst->msg_ptr, src->msg_size);
+	mtx_unlock_spin(&src->msg_lock);
+}
diff --git a/sys/mips/mips/minidump_machdep.c b/sys/mips/mips/minidump_machdep.c
index abe45e999f13..cbf9a83395a6 100644
--- a/sys/mips/mips/minidump_machdep.c
+++ b/sys/mips/mips/minidump_machdep.c
@@ -120,6 +120,10 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state)
 	int i, error;
 	void *dump_va;
 
+	/* Live dumps are untested. */
+	if (!dumping)
+		return (EOPNOTSUPP);
+
 	/* Flush cache */
 	mips_dcache_wbinv_all();
 
diff --git a/sys/sys/kerneldump.h b/sys/sys/kerneldump.h
index 54662d9cff39..c293491eadc9 100644
--- a/sys/sys/kerneldump.h
+++ b/sys/sys/kerneldump.h
@@ -140,7 +140,7 @@ struct minidumpstate {
 	struct bitset	*dump_bitset;
 };
 
-int minidumpsys(struct dumperinfo *);
+int minidumpsys(struct dumperinfo *, bool);
 int dumpsys_generic(struct dumperinfo *);
 
 void dumpsys_map_chunk(vm_paddr_t, size_t, void **);
diff --git a/sys/sys/msgbuf.h b/sys/sys/msgbuf.h
index df61f130e46f..27aba1a8e0ed 100644
--- a/sys/sys/msgbuf.h
+++ b/sys/sys/msgbuf.h
@@ -78,6 +78,7 @@ void	msgbuf_init(struct msgbuf *mbp, void *ptr, int size);
 int	msgbuf_peekbytes(struct msgbuf *mbp, char *buf, int buflen,
 	    u_int *seqp);
 void	msgbuf_reinit(struct msgbuf *mbp, void *ptr, int size);
+void	msgbuf_duplicate(struct msgbuf *src, struct msgbuf *dst, char *msgptr);
 
 #ifndef MSGBUF_SIZE
 #define	MSGBUF_SIZE	(32768 * 3)