svn commit: r333596 - in head/sys: dev/hwpmc kern sys

Matt Macy mmacy at FreeBSD.org
Mon May 14 00:21:06 UTC 2018


Author: mmacy
Date: Mon May 14 00:21:04 2018
New Revision: 333596
URL: https://svnweb.freebsd.org/changeset/base/333596

Log:
  hwpmc: fix load/unload race and vm map LOR
  
  - fix load/unload race by allocating the per-domain list structure at boot
  
  - fix long extant vm map LOR by replacing pmc_sx sx_slock with global_epoch
    to protect the liveness of elements of the pmc_ss_owners list
  
  Reported by:	pho
  Approved by:	sbruno

Modified:
  head/sys/dev/hwpmc/hwpmc_logging.c
  head/sys/dev/hwpmc/hwpmc_mod.c
  head/sys/kern/kern_pmc.c
  head/sys/sys/pmc.h
  head/sys/sys/pmckern.h

Modified: head/sys/dev/hwpmc/hwpmc_logging.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_logging.c	Mon May 14 00:14:00 2018	(r333595)
+++ head/sys/dev/hwpmc/hwpmc_logging.c	Mon May 14 00:21:04 2018	(r333596)
@@ -63,20 +63,10 @@ __FBSDID("$FreeBSD$");
 
 #ifdef NUMA
 #define NDOMAINS vm_ndomains
-
-static int
-getdomain(int cpu)
-{
-	struct pcpu *pc;
-
-	pc = pcpu_find(cpu);
-	return (pc->pc_domain);
-}
 #else
 #define NDOMAINS 1
 #define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags))
 #define free_domain(addr, type) free(addr, type)
-#define getdomain(cpu) 0
 #endif
 
 /*
@@ -225,16 +215,6 @@ struct pmclog_buffer {
 	uint16_t	 plb_domain;
 } __aligned(CACHE_LINE_SIZE);
 
-struct pmc_domain_buffer_header {
-	struct mtx pdbh_mtx;
-	TAILQ_HEAD(, pmclog_buffer) pdbh_head;
-	struct pmclog_buffer *pdbh_plbs;
-	int pdbh_ncpus;
-} __aligned(CACHE_LINE_SIZE);
-
-struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
-
-
 /*
  * Prototypes
  */
@@ -280,6 +260,7 @@ pmclog_get_buffer(struct pmc_owner *po)
 	    ("[pmclog,%d] po=%p current buffer still valid", __LINE__, po));
 
 	domain = PCPU_GET(domain);
+	MPASS(pmc_dom_hdrs[domain]);
 	mtx_lock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx);
 	if ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL)
 		TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next);
@@ -1165,7 +1146,7 @@ pmclog_process_userlog(struct pmc_owner *po, struct pm
 void
 pmclog_initialize()
 {
-	int domain, cpu;
+	int domain;
 	struct pmclog_buffer *plb;
 
 	if (pmclog_buffer_size <= 0 || pmclog_buffer_size > 16*1024) {
@@ -1180,7 +1161,6 @@ pmclog_initialize()
 					  "than zero.\n", pmc_nlogbuffers_pcpu);
 		pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU;
 	}
-
 	if (pmc_nlogbuffers_pcpu*pmclog_buffer_size > 32*1024) {
 		(void) printf("hwpmc: memory allocated pcpu must be less than 32MB (is %dK).\n",
 					  pmc_nlogbuffers_pcpu*pmclog_buffer_size);
@@ -1188,17 +1168,6 @@ pmclog_initialize()
 		pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
 	}
 	for (domain = 0; domain < NDOMAINS; domain++) {
-		pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct pmc_domain_buffer_header), M_PMC, domain,
-										M_WAITOK|M_ZERO);
-		mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", "pmc-leaf", MTX_SPIN);
-		TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head);
-	}
-	CPU_FOREACH(cpu) {
-		domain = getdomain(cpu);
-		KASSERT(pmc_dom_hdrs[domain] != NULL, ("no mem allocated for domain: %d", domain));
-		pmc_dom_hdrs[domain]->pdbh_ncpus++;
-	}
-	for (domain = 0; domain < NDOMAINS; domain++) {
 		int ncpus = pmc_dom_hdrs[domain]->pdbh_ncpus;
 		int total = ncpus*pmc_nlogbuffers_pcpu;
 
@@ -1231,12 +1200,10 @@ pmclog_shutdown()
 	mtx_destroy(&pmc_kthread_mtx);
 
 	for (domain = 0; domain < NDOMAINS; domain++) {
-		mtx_destroy(&pmc_dom_hdrs[domain]->pdbh_mtx);
 		while ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) {
 			TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next);
 			free(plb->plb_base, M_PMC);
 		}
 		free(pmc_dom_hdrs[domain]->pdbh_plbs, M_PMC);
-		free(pmc_dom_hdrs[domain], M_PMC);
 	}
 }

Modified: head/sys/dev/hwpmc/hwpmc_mod.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_mod.c	Mon May 14 00:14:00 2018	(r333595)
+++ head/sys/dev/hwpmc/hwpmc_mod.c	Mon May 14 00:21:04 2018	(r333596)
@@ -1564,12 +1564,14 @@ pmc_process_mmap(struct thread *td, struct pmckern_map
 	const struct pmc_process *pp;
 
 	freepath = fullpath = NULL;
+	epoch_exit(global_epoch);
 	pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
 
 	pid = td->td_proc->p_pid;
 
+	epoch_enter(global_epoch);
 	/* Inform owners of all system-wide sampling PMCs. */
-	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+	CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 	    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 			pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
 
@@ -1606,10 +1608,12 @@ pmc_process_munmap(struct thread *td, struct pmckern_m
 
 	pid = td->td_proc->p_pid;
 
-	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+	epoch_enter(global_epoch);
+	CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 	    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 		pmclog_process_map_out(po, pid, pkm->pm_address,
 		    pkm->pm_address + pkm->pm_size);
+	epoch_exit(global_epoch);
 
 	if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
 		return;
@@ -1631,7 +1635,7 @@ pmc_log_kernel_mappings(struct pmc *pm)
 	struct pmc_owner *po;
 	struct pmckern_map_in *km, *kmbase;
 
-	sx_assert(&pmc_sx, SX_LOCKED);
+	MPASS(in_epoch() || sx_xlocked(&pmc_sx));
 	KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
 	    ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
 		__LINE__, (void *) pm));
@@ -1905,11 +1909,13 @@ pmc_hook_handler(struct thread *td, int function, void
 
 		pk = (struct pmckern_procexec *) arg;
 
+		epoch_enter(global_epoch);
 		/* Inform owners of SS mode PMCs of the exec event. */
-		LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+		CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 		    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 			    pmclog_process_procexec(po, PMC_ID_INVALID,
 				p->p_pid, pk->pm_entryaddr, fullpath);
+		epoch_exit(global_epoch);
 
 		PROC_LOCK(p);
 		is_using_hwpmcs = p->p_flag & P_HWPMC;
@@ -2033,12 +2039,12 @@ pmc_hook_handler(struct thread *td, int function, void
 		break;
 
 	case PMC_FN_MMAP:
-		sx_assert(&pmc_sx, SX_LOCKED);
+		MPASS(in_epoch() || sx_xlocked(&pmc_sx));
 		pmc_process_mmap(td, (struct pmckern_map_in *) arg);
 		break;
 
 	case PMC_FN_MUNMAP:
-		sx_assert(&pmc_sx, SX_LOCKED);
+		MPASS(in_epoch() || sx_xlocked(&pmc_sx));
 		pmc_process_munmap(td, (struct pmckern_map_out *) arg);
 		break;
 
@@ -2360,7 +2366,8 @@ pmc_release_pmc_descriptor(struct pmc *pm)
 			po->po_sscount--;
 			if (po->po_sscount == 0) {
 				atomic_subtract_rel_int(&pmc_ss_count, 1);
-				LIST_REMOVE(po, po_ssnext);
+				CK_LIST_REMOVE(po, po_ssnext);
+				epoch_wait(global_epoch);
 			}
 		}
 
@@ -2727,7 +2734,7 @@ pmc_start(struct pmc *pm)
 
 	if (mode == PMC_MODE_SS) {
 		if (po->po_sscount == 0) {
-			LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
+			CK_LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
 			atomic_add_rel_int(&pmc_ss_count, 1);
 			PMCDBG1(PMC,OPS,1, "po=%p in global list", po);
 		}
@@ -2854,7 +2861,8 @@ pmc_stop(struct pmc *pm)
 		po->po_sscount--;
 		if (po->po_sscount == 0) {
 			atomic_subtract_rel_int(&pmc_ss_count, 1);
-			LIST_REMOVE(po, po_ssnext);
+			CK_LIST_REMOVE(po, po_ssnext);
+			epoch_wait(global_epoch);
 			PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po);
 		}
 	}
@@ -2900,6 +2908,9 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
 	c = (struct pmc_syscall_args *)syscall_args;
 	op = c->pmop_code;
 	arg = c->pmop_data;
+	/* PMC isn't set up yet */
+	if (pmc_hook == NULL)
+		return (EINVAL);
 	if (op == PMC_OP_CONFIGURELOG) {
 		/*
 		 * We cannot create the logging process inside
@@ -2916,7 +2927,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
 
 	PMC_GET_SX_XLOCK(ENOSYS);
 	is_sx_downgraded = 0;
-
 	PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op,
 	    pmc_op_to_name[op], arg);
 
@@ -4482,9 +4492,11 @@ pmc_process_exit(void *arg __unused, struct proc *p)
 	/*
 	 * Log a sysexit event to all SS PMC owners.
 	 */
-	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+	epoch_enter(global_epoch);
+	CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 	    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 		    pmclog_process_sysexit(po, p->p_pid);
+	epoch_exit(global_epoch);
 
 	if (!is_using_hwpmcs)
 		return;
@@ -4659,10 +4671,11 @@ pmc_process_fork(void *arg __unused, struct proc *p1, 
 	 * If there are system-wide sampling PMCs active, we need to
 	 * log all fork events to their owner's logs.
 	 */
-
-	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+	epoch_enter(global_epoch);
+	CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 	    if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 		    pmclog_process_procfork(po, p1->p_pid, newproc->p_pid);
+	epoch_exit(global_epoch);
 
 	if (!is_using_hwpmcs)
 		return;
@@ -4728,21 +4741,19 @@ pmc_kld_load(void *arg __unused, linker_file_t lf)
 {
 	struct pmc_owner *po;
 
-	sx_slock(&pmc_sx);
-
 	/*
 	 * Notify owners of system sampling PMCs about KLD operations.
 	 */
-	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+	epoch_enter(global_epoch);
+	CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 		if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 			pmclog_process_map_in(po, (pid_t) -1,
 			    (uintfptr_t) lf->address, lf->filename);
+	epoch_exit(global_epoch);
 
 	/*
 	 * TODO: Notify owners of (all) process-sampling PMCs too.
 	 */
-
-	sx_sunlock(&pmc_sx);
 }
 
 static void
@@ -4751,18 +4762,16 @@ pmc_kld_unload(void *arg __unused, const char *filenam
 {
 	struct pmc_owner *po;
 
-	sx_slock(&pmc_sx);
-
-	LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+	epoch_enter(global_epoch);
+	CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
 		if (po->po_flags & PMC_PO_OWNS_LOGFILE)
 			pmclog_process_map_out(po, (pid_t) -1,
 			    (uintfptr_t) address, (uintfptr_t) address + size);
+	epoch_exit(global_epoch);
 
 	/*
 	 * TODO: Notify owners of process-sampling PMCs.
 	 */
-
-	sx_sunlock(&pmc_sx);
 }
 
 /*
@@ -5064,6 +5073,7 @@ pmc_initialize(void)
 
 	/* set hook functions */
 	pmc_intr = md->pmd_intr;
+	wmb();
 	pmc_hook = pmc_hook_handler;
 
 	if (error == 0) {
@@ -5282,6 +5292,3 @@ load (struct module *module __unused, int cmd, void *a
 
 	return error;
 }
-
-/* memory pool */
-MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module");

Modified: head/sys/kern/kern_pmc.c
==============================================================================
--- head/sys/kern/kern_pmc.c	Mon May 14 00:14:00 2018	(r333595)
+++ head/sys/kern/kern_pmc.c	Mon May 14 00:21:04 2018	(r333596)
@@ -48,6 +48,10 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
 #ifdef	HWPMC_HOOKS
 FEATURE(hwpmc_hooks, "Kernel support for HW PMC");
 #define	PMC_KERNEL_VERSION	PMC_VERSION
@@ -58,6 +62,9 @@ FEATURE(hwpmc_hooks, "Kernel support for HW PMC");
 MALLOC_DECLARE(M_PMCHOOKS);
 MALLOC_DEFINE(M_PMCHOOKS, "pmchooks", "Memory space for PMC hooks");
 
+/* memory pool */
+MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module");
+
 const int pmc_kernel_version = PMC_KERNEL_VERSION;
 
 /* Hook variable. */
@@ -84,6 +91,7 @@ volatile int pmc_ss_count;
  * somewhat more expensive than a simple 'if' check and indirect call.
  */
 struct sx pmc_sx;
+SX_SYSINIT(pmcsx, &pmc_sx, "pmc-sx");
 
 /*
  * PMC Soft per cpu trapframe.
@@ -91,6 +99,11 @@ struct sx pmc_sx;
 struct trapframe pmc_tf[MAXCPU];
 
 /*
+ * Per domain list of buffer headers
+ */
+__read_mostly struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
+
+/*
  * PMC Soft use a global table to store registered events.
  */
 
@@ -100,20 +113,12 @@ static int pmc_softevents = 16;
 SYSCTL_INT(_kern_hwpmc, OID_AUTO, softevents, CTLFLAG_RDTUN,
     &pmc_softevents, 0, "maximum number of soft events");
 
-struct mtx pmc_softs_mtx;
 int pmc_softs_count;
 struct pmc_soft **pmc_softs;
 
+struct mtx pmc_softs_mtx;
 MTX_SYSINIT(pmc_soft_mtx, &pmc_softs_mtx, "pmc-softs", MTX_SPIN);
 
-static void
-pmc_init_sx(void)
-{
-	sx_init_flags(&pmc_sx, "pmc-sx", SX_NOWITNESS);
-}
-
-SYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL);
-
 /*
  * Helper functions.
  */
@@ -325,12 +330,30 @@ pmc_soft_ev_release(struct pmc_soft *ps)
 	mtx_unlock_spin(&pmc_softs_mtx);
 }
 
+#ifdef NUMA
+#define NDOMAINS vm_ndomains
+
+static int
+getdomain(int cpu)
+{
+	struct pcpu *pc;
+
+	pc = pcpu_find(cpu);
+	return (pc->pc_domain);
+}
+#else
+#define NDOMAINS 1
+#define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags))
+#define getdomain(cpu) 0
+#endif
 /*
  *  Initialise hwpmc.
  */
 static void
 init_hwpmc(void *dummy __unused)
 {
+	int domain, cpu;
+
 	if (pmc_softevents <= 0 ||
 	    pmc_softevents > PMC_EV_DYN_COUNT) {
 		(void) printf("hwpmc: tunable \"softevents\"=%d out of "
@@ -339,6 +362,19 @@ init_hwpmc(void *dummy __unused)
 	}
 	pmc_softs = malloc(pmc_softevents * sizeof(struct pmc_soft *), M_PMCHOOKS, M_NOWAIT|M_ZERO);
 	KASSERT(pmc_softs != NULL, ("cannot allocate soft events table"));
+
+	for (domain = 0; domain < NDOMAINS; domain++) {
+		pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct pmc_domain_buffer_header), M_PMC, domain,
+										M_WAITOK|M_ZERO);
+		mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", "pmc-leaf", MTX_SPIN);
+		TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head);
+	}
+	CPU_FOREACH(cpu) {
+		domain = getdomain(cpu);
+		KASSERT(pmc_dom_hdrs[domain] != NULL, ("no mem allocated for domain: %d", domain));
+		pmc_dom_hdrs[domain]->pdbh_ncpus++;
+	}
+
 }
 
 SYSINIT(hwpmc, SI_SUB_KDTRACE, SI_ORDER_FIRST, init_hwpmc, NULL);

Modified: head/sys/sys/pmc.h
==============================================================================
--- head/sys/sys/pmc.h	Mon May 14 00:14:00 2018	(r333595)
+++ head/sys/sys/pmc.h	Mon May 14 00:21:04 2018	(r333596)
@@ -40,6 +40,8 @@
 #include <sys/counter.h>
 #include <machine/pmc_mdep.h>
 #include <machine/profile.h>
+#include <sys/epoch.h>
+#include <ck_queue.h>
 
 #define	PMC_MODULE_NAME		"hwpmc"
 #define	PMC_NAME_MAX		64 /* HW counter name size */
@@ -826,7 +828,7 @@ struct pmc_process {
 
 struct pmc_owner  {
 	LIST_ENTRY(pmc_owner)	po_next;	/* hash chain */
-	LIST_ENTRY(pmc_owner)	po_ssnext;	/* list of SS PMC owners */
+	CK_LIST_ENTRY(pmc_owner)	po_ssnext;	/* list of SS PMC owners */
 	LIST_HEAD(, pmc)	po_pmcs;	/* owned PMC list */
 	TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */
 	struct mtx		po_mtx;		/* spin lock for (o) */

Modified: head/sys/sys/pmckern.h
==============================================================================
--- head/sys/sys/pmckern.h	Mon May 14 00:14:00 2018	(r333595)
+++ head/sys/sys/pmckern.h	Mon May 14 00:21:04 2018	(r333596)
@@ -157,6 +157,15 @@ struct pmc_soft {
 	struct pmc_dyn_event_descr	ps_ev;
 };
 
+struct pmclog_buffer;
+
+struct pmc_domain_buffer_header {
+	struct mtx pdbh_mtx;
+	TAILQ_HEAD(, pmclog_buffer) pdbh_head;
+	struct pmclog_buffer *pdbh_plbs;
+	int pdbh_ncpus;
+} __aligned(CACHE_LINE_SIZE);
+
 /* hook */
 extern int (*pmc_hook)(struct thread *_td, int _function, void *_arg);
 extern int (*pmc_intr)(int _cpu, struct trapframe *_frame);
@@ -176,16 +185,19 @@ extern const int pmc_kernel_version;
 /* PMC soft per cpu trapframe */
 extern struct trapframe pmc_tf[MAXCPU];
 
+/* per domain buffer header list */
+extern struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
+
 /* Quick check if preparatory work is necessary */
 #define	PMC_HOOK_INSTALLED(cmd)	__predict_false(pmc_hook != NULL)
 
 /* Hook invocation; for use within the kernel */
 #define	PMC_CALL_HOOK(t, cmd, arg)		\
 do {						\
-	sx_slock(&pmc_sx);			\
+	epoch_enter(global_epoch);		\
 	if (pmc_hook != NULL)			\
 		(pmc_hook)((t), (cmd), (arg));	\
-	sx_sunlock(&pmc_sx);			\
+	epoch_exit(global_epoch);			\
 } while (0)
 
 /* Hook invocation that needs an exclusive lock */


More information about the svn-src-head mailing list