svn commit: r303643 - in head/sys: kern sys
Mateusz Guzik
mjg at FreeBSD.org
Mon Aug 1 21:48:39 UTC 2016
Author: mjg
Date: Mon Aug 1 21:48:37 2016
New Revision: 303643
URL: https://svnweb.freebsd.org/changeset/base/303643
Log:
Implement trivial backoff for locking primitives.
All current spinning loops retry an atomic op the first chance they get,
which leads to performance degradation under load.
One classic solution to the problem consists of delaying the test to an
extent. This implementation has a trivial linear increment and a random
factor for each attempt.
For simplicity, this first thouch implementation only modifies spinning
loops where the lock owner is running. spin mutexes and thread lock were
not modified.
Current parameters are autotuned on boot based on mp_cpus.
Autotune factors are very conservative and are subject to change later.
Reviewed by: kib, jhb
Tested by: pho
MFC after: 1 week
Modified:
head/sys/kern/kern_mutex.c
head/sys/kern/kern_rwlock.c
head/sys/kern/kern_sx.c
head/sys/kern/subr_lock.c
head/sys/sys/lock.h
Modified: head/sys/kern/kern_mutex.c
==============================================================================
--- head/sys/kern/kern_mutex.c Mon Aug 1 21:28:34 2016 (r303642)
+++ head/sys/kern/kern_mutex.c Mon Aug 1 21:48:37 2016 (r303643)
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/sbuf.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/turnstile.h>
#include <sys/vmmeter.h>
@@ -138,6 +139,37 @@ struct lock_class lock_class_mtx_spin =
#endif
};
+#ifdef ADAPTIVE_MUTEXES
+static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging");
+
+static struct lock_delay_config mtx_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_initial, CTLFLAG_RW, &mtx_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_step, CTLFLAG_RW, &mtx_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_min, CTLFLAG_RW, &mtx_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max,
+ 0, "");
+
+static void
+mtx_delay_sysinit(void *dummy)
+{
+
+ mtx_delay.initial = mp_ncpus * 25;
+ mtx_delay.step = (mp_ncpus * 25) / 2;
+ mtx_delay.min = mp_ncpus * 5;
+ mtx_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(mtx_delay_sysinit);
+#endif
+
/*
* System-wide mutexes
*/
@@ -408,8 +440,10 @@ __mtx_lock_sleep(volatile uintptr_t *c,
int contested = 0;
uint64_t waittime = 0;
#endif
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -418,6 +452,9 @@ __mtx_lock_sleep(volatile uintptr_t *c,
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &mtx_delay);
+#endif
m = mtxlock2mtx(c);
if (mtx_owned(m)) {
@@ -451,7 +488,7 @@ __mtx_lock_sleep(volatile uintptr_t *c,
if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid))
break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef ADAPTIVE_MUTEXES
/*
@@ -471,12 +508,8 @@ __mtx_lock_sleep(volatile uintptr_t *c,
"spinning", "lockname:\"%s\"",
m->lock_object.lo_name);
while (mtx_owner(m) == owner &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname((struct thread *)tid),
"running");
@@ -570,7 +603,7 @@ __mtx_lock_sleep(volatile uintptr_t *c,
/*
* Only record the loops spinning and not sleeping.
*/
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time);
#endif
}
Modified: head/sys/kern/kern_rwlock.c
==============================================================================
--- head/sys/kern/kern_rwlock.c Mon Aug 1 21:28:34 2016 (r303642)
+++ head/sys/kern/kern_rwlock.c Mon Aug 1 21:48:37 2016 (r303643)
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/sched.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/turnstile.h>
@@ -65,15 +66,6 @@ PMC_SOFT_DECLARE( , , lock, failed);
*/
#define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock))
-#ifdef ADAPTIVE_RWLOCKS
-static int rowner_retries = 10;
-static int rowner_loops = 10000;
-static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
- "rwlock debugging");
-SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
-SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
-#endif
-
#ifdef DDB
#include <ddb/ddb.h>
@@ -100,6 +92,42 @@ struct lock_class lock_class_rw = {
#endif
};
+#ifdef ADAPTIVE_RWLOCKS
+static int rowner_retries = 10;
+static int rowner_loops = 10000;
+static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL,
+ "rwlock debugging");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, "");
+
+static struct lock_delay_config rw_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_initial, CTLFLAG_RW, &rw_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_step, CTLFLAG_RW, &rw_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_min, CTLFLAG_RW, &rw_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max,
+ 0, "");
+
+static void
+rw_delay_sysinit(void *dummy)
+{
+
+ rw_delay.initial = mp_ncpus * 25;
+ rw_delay.step = (mp_ncpus * 25) / 2;
+ rw_delay.min = mp_ncpus * 5;
+ rw_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(rw_delay_sysinit);
+#endif
+
/*
* Return a pointer to the owning thread if the lock is write-locked or
* NULL if the lock is unlocked or read-locked.
@@ -355,9 +383,11 @@ __rw_rlock(volatile uintptr_t *c, const
int contested = 0;
#endif
uintptr_t v;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -366,6 +396,9 @@ __rw_rlock(volatile uintptr_t *c, const
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &rw_delay);
+#endif
rw = rwlock2rw(c);
KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
@@ -412,7 +445,7 @@ __rw_rlock(volatile uintptr_t *c, const
continue;
}
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
@@ -437,12 +470,8 @@ __rw_rlock(volatile uintptr_t *c, const
sched_tdname(curthread), "spinning",
"lockname:\"%s\"", rw->lock_object.lo_name);
while ((struct thread*)RW_OWNER(rw->rw_lock) ==
- owner && TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ owner && TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -459,7 +488,7 @@ __rw_rlock(volatile uintptr_t *c, const
cpu_spinwait();
}
#ifdef KDTRACE_HOOKS
- spin_cnt += rowner_loops - i;
+ lda.spin_cnt += rowner_loops - i;
#endif
KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
"running");
@@ -552,7 +581,7 @@ __rw_rlock(volatile uintptr_t *c, const
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
/* Record only the loops spinning and not sleeping. */
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
@@ -740,9 +769,11 @@ __rw_wlock_hard(volatile uintptr_t *c, u
uint64_t waittime = 0;
int contested = 0;
#endif
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -751,6 +782,9 @@ __rw_wlock_hard(volatile uintptr_t *c, u
if (SCHEDULER_STOPPED())
return;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &rw_delay);
+#endif
rw = rwlock2rw(c);
if (rw_wlocked(rw)) {
@@ -775,7 +809,7 @@ __rw_wlock_hard(volatile uintptr_t *c, u
if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid))
break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
@@ -798,12 +832,8 @@ __rw_wlock_hard(volatile uintptr_t *c, u
"spinning", "lockname:\"%s\"",
rw->lock_object.lo_name);
while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
"running");
continue;
@@ -828,7 +858,7 @@ __rw_wlock_hard(volatile uintptr_t *c, u
KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
"running");
#ifdef KDTRACE_HOOKS
- spin_cnt += rowner_loops - i;
+ lda.spin_cnt += rowner_loops - i;
#endif
if (i != rowner_loops)
continue;
@@ -918,7 +948,7 @@ __rw_wlock_hard(volatile uintptr_t *c, u
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
/* Record only the loops spinning and not sleeping. */
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
Modified: head/sys/kern/kern_sx.c
==============================================================================
--- head/sys/kern/kern_sx.c Mon Aug 1 21:28:34 2016 (r303642)
+++ head/sys/kern/kern_sx.c Mon Aug 1 21:48:37 2016 (r303643)
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sched.h>
#include <sys/sleepqueue.h>
#include <sys/sx.h>
+#include <sys/smp.h>
#include <sys/sysctl.h>
#if defined(SMP) && !defined(NO_ADAPTIVE_SX)
@@ -145,6 +146,33 @@ static u_int asx_loops = 10000;
static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging");
SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, "");
SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, "");
+
+static struct lock_delay_config sx_delay = {
+ .initial = 1000,
+ .step = 500,
+ .min = 100,
+ .max = 5000,
+};
+
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_initial, CTLFLAG_RW, &sx_delay.initial,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_step, CTLFLAG_RW, &sx_delay.step,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_min, CTLFLAG_RW, &sx_delay.min,
+ 0, "");
+SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max,
+ 0, "");
+
+static void
+sx_delay_sysinit(void *dummy)
+{
+
+ sx_delay.initial = mp_ncpus * 25;
+ sx_delay.step = (mp_ncpus * 25) / 2;
+ sx_delay.min = mp_ncpus * 5;
+ sx_delay.max = mp_ncpus * 25 * 10;
+}
+LOCK_DELAY_SYSINIT(sx_delay_sysinit);
#endif
void
@@ -513,9 +541,11 @@ _sx_xlock_hard(struct sx *sx, uintptr_t
int contested = 0;
#endif
int error = 0;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -524,6 +554,10 @@ _sx_xlock_hard(struct sx *sx, uintptr_t
if (SCHEDULER_STOPPED())
return (0);
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &sx_delay);
+#endif
+
/* If we already hold an exclusive lock, then recurse. */
if (sx_xlocked(sx)) {
KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
@@ -549,7 +583,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t
atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
break;
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
#ifdef HWPMC_HOOKS
PMC_SOFT_CALL( , , lock, failed);
@@ -578,12 +612,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t
sx->lock_object.lo_name);
GIANT_SAVE();
while (SX_OWNER(sx->sx_lock) == x &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -605,7 +635,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t
break;
cpu_spinwait();
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
}
KTR_STATE0(KTR_SCHED, "thread",
@@ -725,7 +755,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t
LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
@@ -818,9 +848,11 @@ _sx_slock_hard(struct sx *sx, int opts,
#endif
uintptr_t x;
int error = 0;
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ struct lock_delay_arg lda;
+#endif
#ifdef KDTRACE_HOOKS
uintptr_t state;
- u_int spin_cnt = 0;
u_int sleep_cnt = 0;
int64_t sleep_time = 0;
int64_t all_time = 0;
@@ -829,6 +861,9 @@ _sx_slock_hard(struct sx *sx, int opts,
if (SCHEDULER_STOPPED())
return (0);
+#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS)
+ lock_delay_arg_init(&lda, &sx_delay);
+#endif
#ifdef KDTRACE_HOOKS
state = sx->sx_lock;
all_time -= lockstat_nsecs(&sx->lock_object);
@@ -840,7 +875,7 @@ _sx_slock_hard(struct sx *sx, int opts,
*/
for (;;) {
#ifdef KDTRACE_HOOKS
- spin_cnt++;
+ lda.spin_cnt++;
#endif
x = sx->sx_lock;
@@ -888,12 +923,8 @@ _sx_slock_hard(struct sx *sx, int opts,
"lockname:\"%s\"", sx->lock_object.lo_name);
GIANT_SAVE();
while (SX_OWNER(sx->sx_lock) == x &&
- TD_IS_RUNNING(owner)) {
- cpu_spinwait();
-#ifdef KDTRACE_HOOKS
- spin_cnt++;
-#endif
- }
+ TD_IS_RUNNING(owner))
+ lock_delay(&lda);
KTR_STATE0(KTR_SCHED, "thread",
sched_tdname(curthread), "running");
continue;
@@ -989,7 +1020,7 @@ _sx_slock_hard(struct sx *sx, int opts,
LOCKSTAT_RECORD4(sx__block, sx, sleep_time,
LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
- if (spin_cnt > sleep_cnt)
+ if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
Modified: head/sys/kern/subr_lock.c
==============================================================================
--- head/sys/kern/subr_lock.c Mon Aug 1 21:28:34 2016 (r303642)
+++ head/sys/kern/subr_lock.c Mon Aug 1 21:48:37 2016 (r303643)
@@ -103,6 +103,34 @@ lock_destroy(struct lock_object *lock)
lock->lo_flags &= ~LO_INITIALIZED;
}
+void
+lock_delay(struct lock_delay_arg *la)
+{
+ u_int i, delay, backoff, min, max;
+ struct lock_delay_config *lc = la->config;
+
+ delay = la->delay;
+
+ if (delay == 0)
+ delay = lc->initial;
+ else {
+ delay += lc->step;
+ max = lc->max;
+ if (delay > max)
+ delay = max;
+ }
+
+ backoff = cpu_ticks() % delay;
+ min = lc->min;
+ if (backoff < min)
+ backoff = min;
+ for (i = 0; i < backoff; i++)
+ cpu_spinwait();
+
+ la->delay = delay;
+ la->spin_cnt += backoff;
+}
+
#ifdef DDB
DB_SHOW_COMMAND(lock, db_show_lock)
{
Modified: head/sys/sys/lock.h
==============================================================================
--- head/sys/sys/lock.h Mon Aug 1 21:28:34 2016 (r303642)
+++ head/sys/sys/lock.h Mon Aug 1 21:48:37 2016 (r303643)
@@ -201,9 +201,33 @@ extern struct lock_class lock_class_lock
extern struct lock_class *lock_classes[];
+struct lock_delay_config {
+ u_int initial;
+ u_int step;
+ u_int min;
+ u_int max;
+};
+
+struct lock_delay_arg {
+ struct lock_delay_config *config;
+ u_int delay;
+ u_int spin_cnt;
+};
+
+static inline void
+lock_delay_arg_init(struct lock_delay_arg *la, struct lock_delay_config *lc) {
+ la->config = lc;
+ la->delay = 0;
+ la->spin_cnt = 0;
+}
+
+#define LOCK_DELAY_SYSINIT(func) \
+ SYSINIT(func##_ld, SI_SUB_LOCK, SI_ORDER_ANY, func, NULL)
+
void lock_init(struct lock_object *, struct lock_class *,
const char *, const char *, int);
void lock_destroy(struct lock_object *);
+void lock_delay(struct lock_delay_arg *);
void spinlock_enter(void);
void spinlock_exit(void);
void witness_init(struct lock_object *, const char *);
More information about the svn-src-head
mailing list