git: 5272c66a00c5 - stable/13 - hwpmc: Fix amd/arm64/armv7/uncore sampling overflow race
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 25 Jan 2022 01:40:20 UTC
The branch stable/13 has been updated by jrtc27: URL: https://cgit.FreeBSD.org/src/commit/?id=5272c66a00c510b332c6477bbeacaa0179f96ff3 commit 5272c66a00c510b332c6477bbeacaa0179f96ff3 Author: Jessica Clarke <jrtc27@FreeBSD.org> AuthorDate: 2022-01-10 14:30:05 +0000 Commit: Jessica Clarke <jrtc27@FreeBSD.org> CommitDate: 2022-01-25 00:00:01 +0000 hwpmc: Fix amd/arm64/armv7/uncore sampling overflow race If a counter more than overflows just as we read it on switch out then, if using sampling mode, we will negate this small value to give a huge reload count, and if we later switch back in that context we will validate that value against pm_reloadcount and panic an INVARIANTS kernel with: panic: [pmc,1470] pmcval outside of expected range cpu=2 ri=16 pmcval=fffff292 pm_reloadcount=10000 or similar. Presumably in a non-INVARIANTS kernel we will instead just use the provided value as the reload count, which would lead to the overflow not happing for a very long time (e.g. 78 minutes for a 48-bit counter incrementing at an averate rate of 1GHz). Instead, clamp the reload count to 0 (which corresponds precisely to the value we would compute if it had just overflowed and no more), which will result in hwpmc using the full original reload count again. This is the approach used by core for Intel (for both fixed and programmable counters). As part of this, armv7 and arm64 are made conceptually simpler; rather than skipping modifying the overflow count for sampling mode counters so it's always kept as ~0, those special cases are removed so it's always applicable and the concatentation of it and the hardware counter can always be viewed as a 64-bit counter, which also makes them look more like other architectures. Whilst here, fix an instance of UB (shifting a 1 into the sign bit) for amd in its sign-extension code. Reviewed by: andrew, mhorne, kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D33654 (cherry picked from commit e74c7ffcb11b6ac879167249adc23a1f9ee5aab6) --- sys/dev/hwpmc/hwpmc_amd.c | 15 ++++++++++++--- sys/dev/hwpmc/hwpmc_arm64.c | 25 ++++++++++++++++--------- sys/dev/hwpmc/hwpmc_armv7.c | 26 +++++++++++++++++--------- sys/dev/hwpmc/hwpmc_uncore.c | 4 ++++ 4 files changed, 49 insertions(+), 21 deletions(-) diff --git a/sys/dev/hwpmc/hwpmc_amd.c b/sys/dev/hwpmc/hwpmc_amd.c index a95615926bc3..f0b202af8038 100644 --- a/sys/dev/hwpmc/hwpmc_amd.c +++ b/sys/dev/hwpmc/hwpmc_amd.c @@ -431,9 +431,18 @@ amd_read_pmc(int cpu, int ri, pmc_value_t *v) tmp = rdmsr(pd->pm_perfctr); /* RDMSR serializes */ PMCDBG2(MDP,REA,2,"amd-read (pre-munge) id=%d -> %jd", ri, tmp); if (PMC_IS_SAMPLING_MODE(mode)) { - /* Sign extend 48 bit value to 64 bits. */ - tmp = (pmc_value_t) (((int64_t) tmp << 16) >> 16); - tmp = AMD_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); + /* + * Clamp value to 0 if the counter just overflowed, + * otherwise the returned reload count would wrap to a + * huge value. + */ + if ((tmp & (1ULL << 47)) == 0) + tmp = 0; + else { + /* Sign extend 48 bit value to 64 bits. */ + tmp = (pmc_value_t) ((int64_t)(tmp << 16) >> 16); + tmp = AMD_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); + } } *v = tmp; diff --git a/sys/dev/hwpmc/hwpmc_arm64.c b/sys/dev/hwpmc/hwpmc_arm64.c index ea433ca191d2..675e93c5771d 100644 --- a/sys/dev/hwpmc/hwpmc_arm64.c +++ b/sys/dev/hwpmc/hwpmc_arm64.c @@ -219,8 +219,7 @@ arm64_read_pmc(int cpu, int ri, pmc_value_t *v) if ((READ_SPECIALREG(pmovsclr_el0) & reg) != 0) { /* Clear Overflow Flag */ WRITE_SPECIALREG(pmovsclr_el0, reg); - if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) - pm->pm_pcpu_state[cpu].pps_overflowcnt++; + pm->pm_pcpu_state[cpu].pps_overflowcnt++; /* Reread counter in case we raced. */ tmp = arm64_pmcn_read(ri); @@ -229,10 +228,18 @@ arm64_read_pmc(int cpu, int ri, pmc_value_t *v) intr_restore(s); PMCDBG2(MDP, REA, 2, "arm64-read id=%d -> %jd", ri, tmp); - if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) - *v = ARMV8_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); - else - *v = tmp; + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { + /* + * Clamp value to 0 if the counter just overflowed, + * otherwise the returned reload count would wrap to a + * huge value. + */ + if ((tmp & (1ull << 63)) == 0) + tmp = 0; + else + tmp = ARMV8_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); + } + *v = tmp; return (0); } @@ -380,10 +387,10 @@ arm64_intr(struct trapframe *tf) retval = 1; /* Found an interrupting PMC. */ - if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { - pm->pm_pcpu_state[cpu].pps_overflowcnt += 1; + pm->pm_pcpu_state[cpu].pps_overflowcnt += 1; + + if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) continue; - } if (pm->pm_state != PMC_STATE_RUNNING) continue; diff --git a/sys/dev/hwpmc/hwpmc_armv7.c b/sys/dev/hwpmc/hwpmc_armv7.c index 84a983bbc69c..eaef95932c60 100644 --- a/sys/dev/hwpmc/hwpmc_armv7.c +++ b/sys/dev/hwpmc/hwpmc_armv7.c @@ -191,8 +191,7 @@ armv7_read_pmc(int cpu, int ri, pmc_value_t *v) if ((cp15_pmovsr_get() & reg) != 0) { /* Clear Overflow Flag */ cp15_pmovsr_set(reg); - if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) - pm->pm_pcpu_state[cpu].pps_overflowcnt++; + pm->pm_pcpu_state[cpu].pps_overflowcnt++; /* Reread counter in case we raced. */ tmp = armv7_pmcn_read(ri, pm->pm_md.pm_armv7.pm_armv7_evsel); @@ -201,10 +200,18 @@ armv7_read_pmc(int cpu, int ri, pmc_value_t *v) intr_restore(s); PMCDBG2(MDP, REA, 2, "armv7-read id=%d -> %jd", ri, tmp); - if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) - *v = ARMV7_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); - else - *v = tmp; + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { + /* + * Clamp value to 0 if the counter just overflowed, + * otherwise the returned reload count would wrap to a + * huge value. + */ + if ((tmp & (1ull << 63)) == 0) + tmp = 0; + else + tmp = ARMV7_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); + } + *v = tmp; return 0; } @@ -362,10 +369,11 @@ armv7_intr(struct trapframe *tf) retval = 1; /* Found an interrupting PMC. */ - if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { - pm->pm_pcpu_state[cpu].pps_overflowcnt += 1; + pm->pm_pcpu_state[cpu].pps_overflowcnt += 1; + + if (!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) continue; - } + if (pm->pm_state != PMC_STATE_RUNNING) continue; diff --git a/sys/dev/hwpmc/hwpmc_uncore.c b/sys/dev/hwpmc/hwpmc_uncore.c index 2c638833dcd9..a5e3d9bb2f8a 100644 --- a/sys/dev/hwpmc/hwpmc_uncore.c +++ b/sys/dev/hwpmc/hwpmc_uncore.c @@ -175,6 +175,10 @@ uncore_pcpu_fini(struct pmc_mdep *md, int cpu) static pmc_value_t ucf_perfctr_value_to_reload_count(pmc_value_t v) { + + /* If the PMC has overflowed, return a reload count of zero. */ + if ((v & (1ULL << (uncore_ucf_width - 1))) == 0) + return (0); v &= (1ULL << uncore_ucf_width) - 1; return (1ULL << uncore_ucf_width) - v; }