git: a889a65ba369 - main - eventtimer: Fix several races in the timer reload code

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Mon, 11 Jul 2022 19:59:03 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=a889a65ba36985dfb31111ac1607be35ca2b2c8c

commit a889a65ba36985dfb31111ac1607be35ca2b2c8c
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2022-06-30 18:27:07 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2022-07-11 19:58:43 +0000

    eventtimer: Fix several races in the timer reload code
    
    In handleevents(), lock the timer state before fetching the time for the
    next event.  A concurrent callout_cc_add() call might be changing the
    next event time, and the race can cause handleevents() to program an
    out-of-date time, causing the callout to run later (by an unbounded
    period, up to the idle hardclock period of 1s) than requested.
    
    In cpu_idleclock(), call getnextcpuevent() with the timer state mutex
    held, for similar reasons.  In particular, cpu_idleclock() runs with
    interrupts enabled, so an untimely timer interrupt can result in a stale
    next event time being programmed.  Further, an interrupt can cause
    cpu_idleclock() to use a stale value for "now".
    
    In cpu_activeclock(), disable interrupts before loading "now", so as to
    avoid going backwards in time when calling handleevents().  It's ok to
    leave interrupts enabled when checking "state->idle", since the race at
    worst will cause handleevents() to be called unnecessarily.  But use an
    atomic load to indicate that the test is racy.
    
    PR:             264867
    Reviewed by:    mav, jhb, kib
    MFC after:      2 weeks
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D35735
---
 sys/kern/kern_clocksource.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c
index 9d53d1242482..89d19bca9317 100644
--- a/sys/kern/kern_clocksource.c
+++ b/sys/kern/kern_clocksource.c
@@ -214,8 +214,8 @@ handleevents(sbintime_t now, int fake)
 		callout_process(now);
 	}
 
-	t = getnextcpuevent(state, 0);
 	ET_HW_LOCK(state);
+	t = getnextcpuevent(state, 0);
 	if (!busy) {
 		state->idle = 0;
 		state->nextevent = t;
@@ -678,14 +678,12 @@ cpu_initclocks_bsp(void)
 void
 cpu_initclocks_ap(void)
 {
-	sbintime_t now;
 	struct pcpu_state *state;
 	struct thread *td;
 
 	state = DPCPU_PTR(timerstate);
-	now = sbinuptime();
 	ET_HW_LOCK(state);
-	state->now = now;
+	state->now = sbinuptime();
 	hardclock_sync(curcpu);
 	spinlock_enter();
 	ET_HW_UNLOCK(state);
@@ -769,6 +767,7 @@ cpu_idleclock(void)
 	    )
 		return (-1);
 	state = DPCPU_PTR(timerstate);
+	ET_HW_LOCK(state);
 	if (periodic)
 		now = state->now;
 	else
@@ -776,7 +775,6 @@ cpu_idleclock(void)
 	CTR3(KTR_SPARE2, "idle at %d:    now  %d.%08x",
 	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
 	t = getnextcpuevent(state, 1);
-	ET_HW_LOCK(state);
 	state->idle = 1;
 	state->nextevent = t;
 	if (!periodic)
@@ -796,15 +794,15 @@ cpu_activeclock(void)
 	struct thread *td;
 
 	state = DPCPU_PTR(timerstate);
-	if (state->idle == 0 || busy)
+	if (atomic_load_int(&state->idle) == 0 || busy)
 		return;
+	spinlock_enter();
 	if (periodic)
 		now = state->now;
 	else
 		now = sbinuptime();
 	CTR3(KTR_SPARE2, "active at %d:  now  %d.%08x",
 	    curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
-	spinlock_enter();
 	td = curthread;
 	td->td_intr_nesting_level++;
 	handleevents(now, 1);