svn commit: r367842 - head/sys/kern
Mateusz Guzik
mjg at FreeBSD.org
Thu Nov 19 10:00:49 UTC 2020
Author: mjg
Date: Thu Nov 19 10:00:48 2020
New Revision: 367842
URL: https://svnweb.freebsd.org/changeset/base/367842
Log:
thread: numa-aware zombie reaping
The current global list is a significant problem, in particular induces a lot
of cross-domain thread frees. When running poudriere on a 2 domain box about
half of all frees were of that nature.
Patch below introduces per-domain thread data containing zombie lists and
domain-aware reaping. By default it only reaps from the current domain, only
reaping from others if there is free TID shortage.
A dedicated callout is introduced to reap lingering threads if there happens
to be no activity.
Reviewed by: kib, markj
Differential Revision: https://reviews.freebsd.org/D27185
Modified:
head/sys/kern/kern_thread.c
Modified: head/sys/kern/kern_thread.c
==============================================================================
--- head/sys/kern/kern_thread.c Thu Nov 19 09:26:51 2020 (r367841)
+++ head/sys/kern/kern_thread.c Thu Nov 19 10:00:48 2020 (r367842)
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
#include <sys/turnstile.h>
+#include <sys/taskqueue.h>
#include <sys/ktr.h>
#include <sys/rwlock.h>
#include <sys/umtx.h>
@@ -64,9 +65,11 @@ __FBSDID("$FreeBSD$");
#include <security/audit/audit.h>
+#include <vm/pmap.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/uma.h>
+#include <vm/vm_phys.h>
#include <sys/eventhandler.h>
/*
@@ -128,9 +131,20 @@ SDT_PROBE_DEFINE(proc, , , lwp__exit);
*/
static uma_zone_t thread_zone;
-static __exclusive_cache_line struct thread *thread_zombies;
+struct thread_domain_data {
+ struct thread *tdd_zombies;
+ int tdd_reapticks;
+} __aligned(CACHE_LINE_SIZE);
+static struct thread_domain_data thread_domain_data[MAXMEMDOM];
+
+static struct task thread_reap_task;
+static struct callout thread_reap_callout;
+
static void thread_zombie(struct thread *);
+static void thread_reap_all(void);
+static void thread_reap_task_cb(void *, int);
+static void thread_reap_callout_cb(void *);
static int thread_unsuspend_one(struct thread *td, struct proc *p,
bool boundary);
static void thread_free_batched(struct thread *td);
@@ -159,30 +173,45 @@ EVENTHANDLER_LIST_DEFINE(thread_init);
EVENTHANDLER_LIST_DEFINE(thread_fini);
static bool
-thread_count_inc(void)
+thread_count_inc_try(void)
{
- static struct timeval lastfail;
- static int curfail;
int nthreads_new;
- thread_reap();
-
nthreads_new = atomic_fetchadd_int(&nthreads, 1) + 1;
if (nthreads_new >= maxthread - 100) {
if (priv_check_cred(curthread->td_ucred, PRIV_MAXPROC) != 0 ||
nthreads_new >= maxthread) {
atomic_subtract_int(&nthreads, 1);
- if (ppsratecheck(&lastfail, &curfail, 1)) {
- printf("maxthread limit exceeded by uid %u "
- "(pid %d); consider increasing kern.maxthread\n",
- curthread->td_ucred->cr_ruid, curproc->p_pid);
- }
return (false);
}
}
return (true);
}
+static bool
+thread_count_inc(void)
+{
+ static struct timeval lastfail;
+ static int curfail;
+
+ thread_reap();
+ if (thread_count_inc_try()) {
+ return (true);
+ }
+
+ thread_reap_all();
+ if (thread_count_inc_try()) {
+ return (true);
+ }
+
+ if (ppsratecheck(&lastfail, &curfail, 1)) {
+ printf("maxthread limit exceeded by uid %u "
+ "(pid %d); consider increasing kern.maxthread\n",
+ curthread->td_ucred->cr_ruid, curproc->p_pid);
+ }
+ return (false);
+}
+
static void
thread_count_sub(int n)
{
@@ -500,6 +529,10 @@ threadinit(void)
M_TIDHASH, M_WAITOK | M_ZERO);
for (i = 0; i < tidhashlock + 1; i++)
rw_init(&tidhashtbl_lock[i], "tidhash");
+
+ TASK_INIT(&thread_reap_task, 0, thread_reap_task_cb, NULL);
+ callout_init(&thread_reap_callout, 1);
+ callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL);
}
/*
@@ -508,12 +541,14 @@ threadinit(void)
void
thread_zombie(struct thread *td)
{
+ struct thread_domain_data *tdd;
struct thread *ztd;
- ztd = atomic_load_ptr(&thread_zombies);
+ tdd = &thread_domain_data[vm_phys_domain(vtophys(td))];
+ ztd = atomic_load_ptr(&tdd->tdd_zombies);
for (;;) {
td->td_zombie = ztd;
- if (atomic_fcmpset_rel_ptr((uintptr_t *)&thread_zombies,
+ if (atomic_fcmpset_rel_ptr((uintptr_t *)&tdd->tdd_zombies,
(uintptr_t *)&ztd, (uintptr_t)td))
break;
continue;
@@ -531,10 +566,10 @@ thread_stash(struct thread *td)
}
/*
- * Reap zombie threads.
+ * Reap zombies from passed domain.
*/
-void
-thread_reap(void)
+static void
+thread_reap_domain(struct thread_domain_data *tdd)
{
struct thread *itd, *ntd;
struct tidbatch tidbatch;
@@ -547,19 +582,26 @@ thread_reap(void)
* Reading upfront is pessimal if followed by concurrent atomic_swap,
* but most of the time the list is empty.
*/
- if (thread_zombies == NULL)
+ if (tdd->tdd_zombies == NULL)
return;
- itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&thread_zombies,
+ itd = (struct thread *)atomic_swap_ptr((uintptr_t *)&tdd->tdd_zombies,
(uintptr_t)NULL);
if (itd == NULL)
return;
+ /*
+ * Multiple CPUs can get here, the race is fine as ticks is only
+ * advisory.
+ */
+ tdd->tdd_reapticks = ticks;
+
tidbatch_prep(&tidbatch);
credbatch_prep(&credbatch);
tdcount = 0;
lim = NULL;
limcount = 0;
+
while (itd != NULL) {
ntd = itd->td_zombie;
EVENTHANDLER_DIRECT_INVOKE(thread_dtor, itd);
@@ -592,6 +634,68 @@ thread_reap(void)
}
MPASS(limcount != 0);
lim_freen(lim, limcount);
+}
+
+/*
+ * Reap zombies from all domains.
+ */
+static void
+thread_reap_all(void)
+{
+ struct thread_domain_data *tdd;
+ int i, domain;
+
+ domain = PCPU_GET(domain);
+ for (i = 0; i < vm_ndomains; i++) {
+ tdd = &thread_domain_data[(i + domain) % vm_ndomains];
+ thread_reap_domain(tdd);
+ }
+}
+
+/*
+ * Reap zombies from local domain.
+ */
+void
+thread_reap(void)
+{
+ struct thread_domain_data *tdd;
+ int domain;
+
+ domain = PCPU_GET(domain);
+ tdd = &thread_domain_data[domain];
+
+ thread_reap_domain(tdd);
+}
+
+static void
+thread_reap_task_cb(void *arg __unused, int pending __unused)
+{
+
+ thread_reap_all();
+}
+
+static void
+thread_reap_callout_cb(void *arg __unused)
+{
+ struct thread_domain_data *tdd;
+ int i, cticks, lticks;
+ bool wantreap;
+
+ wantreap = false;
+ cticks = atomic_load_int(&ticks);
+ for (i = 0; i < vm_ndomains; i++) {
+ tdd = &thread_domain_data[i];
+ lticks = tdd->tdd_reapticks;
+ if (tdd->tdd_zombies != NULL &&
+ (u_int)(cticks - lticks) > 5 * hz) {
+ wantreap = true;
+ break;
+ }
+ }
+
+ if (wantreap)
+ taskqueue_enqueue(taskqueue_thread, &thread_reap_task);
+ callout_reset(&thread_reap_callout, 5 * hz, thread_reap_callout_cb, NULL);
}
/*
More information about the svn-src-all
mailing list