svn commit: r333596 - in head/sys: dev/hwpmc kern sys
Matt Macy
mmacy at FreeBSD.org
Mon May 14 00:21:06 UTC 2018
Author: mmacy
Date: Mon May 14 00:21:04 2018
New Revision: 333596
URL: https://svnweb.freebsd.org/changeset/base/333596
Log:
hwpmc: fix load/unload race and vm map LOR
- fix load/unload race by allocating the per-domain list structure at boot
- fix long extant vm map LOR by replacing pmc_sx sx_slock with global_epoch
to protect the liveness of elements of the pmc_ss_owners list
Reported by: pho
Approved by: sbruno
Modified:
head/sys/dev/hwpmc/hwpmc_logging.c
head/sys/dev/hwpmc/hwpmc_mod.c
head/sys/kern/kern_pmc.c
head/sys/sys/pmc.h
head/sys/sys/pmckern.h
Modified: head/sys/dev/hwpmc/hwpmc_logging.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_logging.c Mon May 14 00:14:00 2018 (r333595)
+++ head/sys/dev/hwpmc/hwpmc_logging.c Mon May 14 00:21:04 2018 (r333596)
@@ -63,20 +63,10 @@ __FBSDID("$FreeBSD$");
#ifdef NUMA
#define NDOMAINS vm_ndomains
-
-static int
-getdomain(int cpu)
-{
- struct pcpu *pc;
-
- pc = pcpu_find(cpu);
- return (pc->pc_domain);
-}
#else
#define NDOMAINS 1
#define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags))
#define free_domain(addr, type) free(addr, type)
-#define getdomain(cpu) 0
#endif
/*
@@ -225,16 +215,6 @@ struct pmclog_buffer {
uint16_t plb_domain;
} __aligned(CACHE_LINE_SIZE);
-struct pmc_domain_buffer_header {
- struct mtx pdbh_mtx;
- TAILQ_HEAD(, pmclog_buffer) pdbh_head;
- struct pmclog_buffer *pdbh_plbs;
- int pdbh_ncpus;
-} __aligned(CACHE_LINE_SIZE);
-
-struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
-
-
/*
* Prototypes
*/
@@ -280,6 +260,7 @@ pmclog_get_buffer(struct pmc_owner *po)
("[pmclog,%d] po=%p current buffer still valid", __LINE__, po));
domain = PCPU_GET(domain);
+ MPASS(pmc_dom_hdrs[domain]);
mtx_lock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx);
if ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL)
TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next);
@@ -1165,7 +1146,7 @@ pmclog_process_userlog(struct pmc_owner *po, struct pm
void
pmclog_initialize()
{
- int domain, cpu;
+ int domain;
struct pmclog_buffer *plb;
if (pmclog_buffer_size <= 0 || pmclog_buffer_size > 16*1024) {
@@ -1180,7 +1161,6 @@ pmclog_initialize()
"than zero.\n", pmc_nlogbuffers_pcpu);
pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU;
}
-
if (pmc_nlogbuffers_pcpu*pmclog_buffer_size > 32*1024) {
(void) printf("hwpmc: memory allocated pcpu must be less than 32MB (is %dK).\n",
pmc_nlogbuffers_pcpu*pmclog_buffer_size);
@@ -1188,17 +1168,6 @@ pmclog_initialize()
pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
}
for (domain = 0; domain < NDOMAINS; domain++) {
- pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct pmc_domain_buffer_header), M_PMC, domain,
- M_WAITOK|M_ZERO);
- mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", "pmc-leaf", MTX_SPIN);
- TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head);
- }
- CPU_FOREACH(cpu) {
- domain = getdomain(cpu);
- KASSERT(pmc_dom_hdrs[domain] != NULL, ("no mem allocated for domain: %d", domain));
- pmc_dom_hdrs[domain]->pdbh_ncpus++;
- }
- for (domain = 0; domain < NDOMAINS; domain++) {
int ncpus = pmc_dom_hdrs[domain]->pdbh_ncpus;
int total = ncpus*pmc_nlogbuffers_pcpu;
@@ -1231,12 +1200,10 @@ pmclog_shutdown()
mtx_destroy(&pmc_kthread_mtx);
for (domain = 0; domain < NDOMAINS; domain++) {
- mtx_destroy(&pmc_dom_hdrs[domain]->pdbh_mtx);
while ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) {
TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next);
free(plb->plb_base, M_PMC);
}
free(pmc_dom_hdrs[domain]->pdbh_plbs, M_PMC);
- free(pmc_dom_hdrs[domain], M_PMC);
}
}
Modified: head/sys/dev/hwpmc/hwpmc_mod.c
==============================================================================
--- head/sys/dev/hwpmc/hwpmc_mod.c Mon May 14 00:14:00 2018 (r333595)
+++ head/sys/dev/hwpmc/hwpmc_mod.c Mon May 14 00:21:04 2018 (r333596)
@@ -1564,12 +1564,14 @@ pmc_process_mmap(struct thread *td, struct pmckern_map
const struct pmc_process *pp;
freepath = fullpath = NULL;
+ epoch_exit(global_epoch);
pmc_getfilename((struct vnode *) pkm->pm_file, &fullpath, &freepath);
pid = td->td_proc->p_pid;
+ epoch_enter(global_epoch);
/* Inform owners of all system-wide sampling PMCs. */
- LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
@@ -1606,10 +1608,12 @@ pmc_process_munmap(struct thread *td, struct pmckern_m
pid = td->td_proc->p_pid;
- LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+ epoch_enter(global_epoch);
+ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_out(po, pid, pkm->pm_address,
pkm->pm_address + pkm->pm_size);
+ epoch_exit(global_epoch);
if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
return;
@@ -1631,7 +1635,7 @@ pmc_log_kernel_mappings(struct pmc *pm)
struct pmc_owner *po;
struct pmckern_map_in *km, *kmbase;
- sx_assert(&pmc_sx, SX_LOCKED);
+ MPASS(in_epoch() || sx_xlocked(&pmc_sx));
KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
("[pmc,%d] non-sampling PMC (%p) desires mapping information",
__LINE__, (void *) pm));
@@ -1905,11 +1909,13 @@ pmc_hook_handler(struct thread *td, int function, void
pk = (struct pmckern_procexec *) arg;
+ epoch_enter(global_epoch);
/* Inform owners of SS mode PMCs of the exec event. */
- LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_procexec(po, PMC_ID_INVALID,
p->p_pid, pk->pm_entryaddr, fullpath);
+ epoch_exit(global_epoch);
PROC_LOCK(p);
is_using_hwpmcs = p->p_flag & P_HWPMC;
@@ -2033,12 +2039,12 @@ pmc_hook_handler(struct thread *td, int function, void
break;
case PMC_FN_MMAP:
- sx_assert(&pmc_sx, SX_LOCKED);
+ MPASS(in_epoch() || sx_xlocked(&pmc_sx));
pmc_process_mmap(td, (struct pmckern_map_in *) arg);
break;
case PMC_FN_MUNMAP:
- sx_assert(&pmc_sx, SX_LOCKED);
+ MPASS(in_epoch() || sx_xlocked(&pmc_sx));
pmc_process_munmap(td, (struct pmckern_map_out *) arg);
break;
@@ -2360,7 +2366,8 @@ pmc_release_pmc_descriptor(struct pmc *pm)
po->po_sscount--;
if (po->po_sscount == 0) {
atomic_subtract_rel_int(&pmc_ss_count, 1);
- LIST_REMOVE(po, po_ssnext);
+ CK_LIST_REMOVE(po, po_ssnext);
+ epoch_wait(global_epoch);
}
}
@@ -2727,7 +2734,7 @@ pmc_start(struct pmc *pm)
if (mode == PMC_MODE_SS) {
if (po->po_sscount == 0) {
- LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
+ CK_LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
atomic_add_rel_int(&pmc_ss_count, 1);
PMCDBG1(PMC,OPS,1, "po=%p in global list", po);
}
@@ -2854,7 +2861,8 @@ pmc_stop(struct pmc *pm)
po->po_sscount--;
if (po->po_sscount == 0) {
atomic_subtract_rel_int(&pmc_ss_count, 1);
- LIST_REMOVE(po, po_ssnext);
+ CK_LIST_REMOVE(po, po_ssnext);
+ epoch_wait(global_epoch);
PMCDBG1(PMC,OPS,2,"po=%p removed from global list", po);
}
}
@@ -2900,6 +2908,9 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
c = (struct pmc_syscall_args *)syscall_args;
op = c->pmop_code;
arg = c->pmop_data;
+ /* PMC isn't set up yet */
+ if (pmc_hook == NULL)
+ return (EINVAL);
if (op == PMC_OP_CONFIGURELOG) {
/*
* We cannot create the logging process inside
@@ -2916,7 +2927,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_a
PMC_GET_SX_XLOCK(ENOSYS);
is_sx_downgraded = 0;
-
PMCDBG3(MOD,PMS,1, "syscall op=%d \"%s\" arg=%p", op,
pmc_op_to_name[op], arg);
@@ -4482,9 +4492,11 @@ pmc_process_exit(void *arg __unused, struct proc *p)
/*
* Log a sysexit event to all SS PMC owners.
*/
- LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+ epoch_enter(global_epoch);
+ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_sysexit(po, p->p_pid);
+ epoch_exit(global_epoch);
if (!is_using_hwpmcs)
return;
@@ -4659,10 +4671,11 @@ pmc_process_fork(void *arg __unused, struct proc *p1,
* If there are system-wide sampling PMCs active, we need to
* log all fork events to their owner's logs.
*/
-
- LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+ epoch_enter(global_epoch);
+ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_procfork(po, p1->p_pid, newproc->p_pid);
+ epoch_exit(global_epoch);
if (!is_using_hwpmcs)
return;
@@ -4728,21 +4741,19 @@ pmc_kld_load(void *arg __unused, linker_file_t lf)
{
struct pmc_owner *po;
- sx_slock(&pmc_sx);
-
/*
* Notify owners of system sampling PMCs about KLD operations.
*/
- LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+ epoch_enter(global_epoch);
+ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_in(po, (pid_t) -1,
(uintfptr_t) lf->address, lf->filename);
+ epoch_exit(global_epoch);
/*
* TODO: Notify owners of (all) process-sampling PMCs too.
*/
-
- sx_sunlock(&pmc_sx);
}
static void
@@ -4751,18 +4762,16 @@ pmc_kld_unload(void *arg __unused, const char *filenam
{
struct pmc_owner *po;
- sx_slock(&pmc_sx);
-
- LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+ epoch_enter(global_epoch);
+ CK_LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_out(po, (pid_t) -1,
(uintfptr_t) address, (uintfptr_t) address + size);
+ epoch_exit(global_epoch);
/*
* TODO: Notify owners of process-sampling PMCs.
*/
-
- sx_sunlock(&pmc_sx);
}
/*
@@ -5064,6 +5073,7 @@ pmc_initialize(void)
/* set hook functions */
pmc_intr = md->pmd_intr;
+ wmb();
pmc_hook = pmc_hook_handler;
if (error == 0) {
@@ -5282,6 +5292,3 @@ load (struct module *module __unused, int cmd, void *a
return error;
}
-
-/* memory pool */
-MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module");
Modified: head/sys/kern/kern_pmc.c
==============================================================================
--- head/sys/kern/kern_pmc.c Mon May 14 00:14:00 2018 (r333595)
+++ head/sys/kern/kern_pmc.c Mon May 14 00:21:04 2018 (r333596)
@@ -48,6 +48,10 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/systm.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
#ifdef HWPMC_HOOKS
FEATURE(hwpmc_hooks, "Kernel support for HW PMC");
#define PMC_KERNEL_VERSION PMC_VERSION
@@ -58,6 +62,9 @@ FEATURE(hwpmc_hooks, "Kernel support for HW PMC");
MALLOC_DECLARE(M_PMCHOOKS);
MALLOC_DEFINE(M_PMCHOOKS, "pmchooks", "Memory space for PMC hooks");
+/* memory pool */
+MALLOC_DEFINE(M_PMC, "pmc", "Memory space for the PMC module");
+
const int pmc_kernel_version = PMC_KERNEL_VERSION;
/* Hook variable. */
@@ -84,6 +91,7 @@ volatile int pmc_ss_count;
* somewhat more expensive than a simple 'if' check and indirect call.
*/
struct sx pmc_sx;
+SX_SYSINIT(pmcsx, &pmc_sx, "pmc-sx");
/*
* PMC Soft per cpu trapframe.
@@ -91,6 +99,11 @@ struct sx pmc_sx;
struct trapframe pmc_tf[MAXCPU];
/*
+ * Per domain list of buffer headers
+ */
+__read_mostly struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
+
+/*
* PMC Soft use a global table to store registered events.
*/
@@ -100,20 +113,12 @@ static int pmc_softevents = 16;
SYSCTL_INT(_kern_hwpmc, OID_AUTO, softevents, CTLFLAG_RDTUN,
&pmc_softevents, 0, "maximum number of soft events");
-struct mtx pmc_softs_mtx;
int pmc_softs_count;
struct pmc_soft **pmc_softs;
+struct mtx pmc_softs_mtx;
MTX_SYSINIT(pmc_soft_mtx, &pmc_softs_mtx, "pmc-softs", MTX_SPIN);
-static void
-pmc_init_sx(void)
-{
- sx_init_flags(&pmc_sx, "pmc-sx", SX_NOWITNESS);
-}
-
-SYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL);
-
/*
* Helper functions.
*/
@@ -325,12 +330,30 @@ pmc_soft_ev_release(struct pmc_soft *ps)
mtx_unlock_spin(&pmc_softs_mtx);
}
+#ifdef NUMA
+#define NDOMAINS vm_ndomains
+
+static int
+getdomain(int cpu)
+{
+ struct pcpu *pc;
+
+ pc = pcpu_find(cpu);
+ return (pc->pc_domain);
+}
+#else
+#define NDOMAINS 1
+#define malloc_domain(size, type, domain, flags) malloc((size), (type), (flags))
+#define getdomain(cpu) 0
+#endif
/*
* Initialise hwpmc.
*/
static void
init_hwpmc(void *dummy __unused)
{
+ int domain, cpu;
+
if (pmc_softevents <= 0 ||
pmc_softevents > PMC_EV_DYN_COUNT) {
(void) printf("hwpmc: tunable \"softevents\"=%d out of "
@@ -339,6 +362,19 @@ init_hwpmc(void *dummy __unused)
}
pmc_softs = malloc(pmc_softevents * sizeof(struct pmc_soft *), M_PMCHOOKS, M_NOWAIT|M_ZERO);
KASSERT(pmc_softs != NULL, ("cannot allocate soft events table"));
+
+ for (domain = 0; domain < NDOMAINS; domain++) {
+ pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct pmc_domain_buffer_header), M_PMC, domain,
+ M_WAITOK|M_ZERO);
+ mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", "pmc-leaf", MTX_SPIN);
+ TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head);
+ }
+ CPU_FOREACH(cpu) {
+ domain = getdomain(cpu);
+ KASSERT(pmc_dom_hdrs[domain] != NULL, ("no mem allocated for domain: %d", domain));
+ pmc_dom_hdrs[domain]->pdbh_ncpus++;
+ }
+
}
SYSINIT(hwpmc, SI_SUB_KDTRACE, SI_ORDER_FIRST, init_hwpmc, NULL);
Modified: head/sys/sys/pmc.h
==============================================================================
--- head/sys/sys/pmc.h Mon May 14 00:14:00 2018 (r333595)
+++ head/sys/sys/pmc.h Mon May 14 00:21:04 2018 (r333596)
@@ -40,6 +40,8 @@
#include <sys/counter.h>
#include <machine/pmc_mdep.h>
#include <machine/profile.h>
+#include <sys/epoch.h>
+#include <ck_queue.h>
#define PMC_MODULE_NAME "hwpmc"
#define PMC_NAME_MAX 64 /* HW counter name size */
@@ -826,7 +828,7 @@ struct pmc_process {
struct pmc_owner {
LIST_ENTRY(pmc_owner) po_next; /* hash chain */
- LIST_ENTRY(pmc_owner) po_ssnext; /* list of SS PMC owners */
+ CK_LIST_ENTRY(pmc_owner) po_ssnext; /* list of SS PMC owners */
LIST_HEAD(, pmc) po_pmcs; /* owned PMC list */
TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */
struct mtx po_mtx; /* spin lock for (o) */
Modified: head/sys/sys/pmckern.h
==============================================================================
--- head/sys/sys/pmckern.h Mon May 14 00:14:00 2018 (r333595)
+++ head/sys/sys/pmckern.h Mon May 14 00:21:04 2018 (r333596)
@@ -157,6 +157,15 @@ struct pmc_soft {
struct pmc_dyn_event_descr ps_ev;
};
+struct pmclog_buffer;
+
+struct pmc_domain_buffer_header {
+ struct mtx pdbh_mtx;
+ TAILQ_HEAD(, pmclog_buffer) pdbh_head;
+ struct pmclog_buffer *pdbh_plbs;
+ int pdbh_ncpus;
+} __aligned(CACHE_LINE_SIZE);
+
/* hook */
extern int (*pmc_hook)(struct thread *_td, int _function, void *_arg);
extern int (*pmc_intr)(int _cpu, struct trapframe *_frame);
@@ -176,16 +185,19 @@ extern const int pmc_kernel_version;
/* PMC soft per cpu trapframe */
extern struct trapframe pmc_tf[MAXCPU];
+/* per domain buffer header list */
+extern struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
+
/* Quick check if preparatory work is necessary */
#define PMC_HOOK_INSTALLED(cmd) __predict_false(pmc_hook != NULL)
/* Hook invocation; for use within the kernel */
#define PMC_CALL_HOOK(t, cmd, arg) \
do { \
- sx_slock(&pmc_sx); \
+ epoch_enter(global_epoch); \
if (pmc_hook != NULL) \
(pmc_hook)((t), (cmd), (arg)); \
- sx_sunlock(&pmc_sx); \
+ epoch_exit(global_epoch); \
} while (0)
/* Hook invocation that needs an exclusive lock */
More information about the svn-src-head
mailing list