svn commit: r359436 - in head/sys: kern net sys
Mark Johnston
markj at FreeBSD.org
Mon Mar 30 14:31:56 UTC 2020
Author: markj
Date: Mon Mar 30 14:22:52 2020
New Revision: 359436
URL: https://svnweb.freebsd.org/changeset/base/359436
Log:
Simplify taskqgroup inititialization.
taskqgroup initialization was broken into two steps:
1. allocate the taskqgroup structure, at SI_SUB_TASKQ;
2. initialize taskqueues, start taskqueue threads, enqueue "binder"
tasks to bind threads to specific CPUs, at SI_SUB_SMP.
Step 2 tries to handle the case where tasks have already been attached
to a queue, by migrating them to their intended queue. In particular,
tasks can't be enqueued before step 2 has completed. This breaks NFS
mountroot on systems using an iflib-based driver when EARLY_AP_STARTUP
is not defined, since mountroot happens before SI_SUB_SMP in this case.
Simplify initialization: do all initialization except for CPU binding at
SI_SUB_TASKQ. This means that until CPU binding is completed, group
tasks may be executed on a CPU other than that to which they were bound,
but this should not be a problem for existing users of the taskqgroup
KPIs.
Reported by: sbruno
Tested by: bdragon, sbruno
MFC after: 1 month
Sponsored by: The FreeBSD Foundation
Differential Revision: https://reviews.freebsd.org/D24188
Modified:
head/sys/kern/subr_gtaskqueue.c
head/sys/net/iflib.c
head/sys/sys/gtaskqueue.h
Modified: head/sys/kern/subr_gtaskqueue.c
==============================================================================
--- head/sys/kern/subr_gtaskqueue.c Mon Mar 30 14:03:35 2020 (r359435)
+++ head/sys/kern/subr_gtaskqueue.c Mon Mar 30 14:22:52 2020 (r359436)
@@ -170,7 +170,7 @@ gtaskqueue_terminate(struct thread **pp, struct gtaskq
}
}
-static void
+static void __unused
gtaskqueue_free(struct gtaskqueue *queue)
{
@@ -591,18 +591,16 @@ gtaskqueue_create_fast(const char *name, int mflags,
}
struct taskqgroup_cpu {
- LIST_HEAD(, grouptask) tgc_tasks;
- struct gtaskqueue *tgc_taskq;
- int tgc_cnt;
- int tgc_cpu;
+ LIST_HEAD(, grouptask) tgc_tasks;
+ struct gtaskqueue *tgc_taskq;
+ int tgc_cnt;
+ int tgc_cpu;
};
struct taskqgroup {
struct taskqgroup_cpu tqg_queue[MAXCPU];
struct mtx tqg_lock;
const char * tqg_name;
- int tqg_adjusting;
- int tqg_stride;
int tqg_cnt;
};
@@ -625,13 +623,6 @@ taskqgroup_cpu_create(struct taskqgroup *qgroup, int i
qcpu->tgc_cpu = cpu;
}
-static void
-taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
-{
-
- gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
-}
-
/*
* Find the taskq with least # of tasks that doesn't currently have any
* other queues from the uniq identifier.
@@ -644,22 +635,22 @@ taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
int strict;
mtx_assert(&qgroup->tqg_lock, MA_OWNED);
- if (qgroup->tqg_cnt == 0)
- return (0);
- idx = -1;
- mincnt = INT_MAX;
+ KASSERT(qgroup->tqg_cnt != 0,
+ ("qgroup %s has no queues", qgroup->tqg_name));
+
/*
- * Two passes; First scan for a queue with the least tasks that
+ * Two passes: first scan for a queue with the least tasks that
* does not already service this uniq id. If that fails simply find
- * the queue with the least total tasks;
+ * the queue with the least total tasks.
*/
- for (strict = 1; mincnt == INT_MAX; strict = 0) {
+ for (idx = -1, mincnt = INT_MAX, strict = 1; mincnt == INT_MAX;
+ strict = 0) {
for (i = 0; i < qgroup->tqg_cnt; i++) {
if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
continue;
if (strict) {
- LIST_FOREACH(n,
- &qgroup->tqg_queue[i].tgc_tasks, gt_list)
+ LIST_FOREACH(n, &qgroup->tqg_queue[i].tgc_tasks,
+ gt_list)
if (n->gt_uniq == uniq)
break;
if (n != NULL)
@@ -675,37 +666,15 @@ taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
return (idx);
}
-/*
- * smp_started is unusable since it is not set for UP kernels or even for
- * SMP kernels when there is 1 CPU. This is usually handled by adding a
- * (mp_ncpus == 1) test, but that would be broken here since we need to
- * to synchronize with the SI_SUB_SMP ordering. Even in the pure SMP case
- * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
- *
- * So maintain our own flag. It must be set after all CPUs are started
- * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
- * adjustment is properly delayed. SI_ORDER_FOURTH is clearly before
- * SI_ORDER_ANY and unclearly after the CPUs are started. It would be
- * simpler for adjustment to pass a flag indicating if it is delayed.
- */
-
-static int tqg_smp_started;
-
-static void
-tqg_record_smp_started(void *arg)
-{
- tqg_smp_started = 1;
-}
-
-SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
- tqg_record_smp_started, NULL);
-
void
taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
void *uniq, device_t dev, struct resource *irq, const char *name)
{
int cpu, qid, error;
+ KASSERT(qgroup->tqg_cnt > 0,
+ ("qgroup %s has no queues", qgroup->tqg_name));
+
gtask->gt_uniq = uniq;
snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
gtask->gt_dev = dev;
@@ -716,7 +685,7 @@ taskqgroup_attach(struct taskqgroup *qgroup, struct gr
qgroup->tqg_queue[qid].tgc_cnt++;
LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
- if (dev != NULL && irq != NULL && tqg_smp_started) {
+ if (dev != NULL && irq != NULL) {
cpu = qgroup->tqg_queue[qid].tgc_cpu;
gtask->gt_cpu = cpu;
mtx_unlock(&qgroup->tqg_lock);
@@ -728,85 +697,19 @@ taskqgroup_attach(struct taskqgroup *qgroup, struct gr
mtx_unlock(&qgroup->tqg_lock);
}
-static void
-taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
-{
- int qid, cpu, error;
-
- mtx_lock(&qgroup->tqg_lock);
- qid = taskqgroup_find(qgroup, gtask->gt_uniq);
- cpu = qgroup->tqg_queue[qid].tgc_cpu;
- if (gtask->gt_dev != NULL && gtask->gt_irq != NULL) {
- mtx_unlock(&qgroup->tqg_lock);
- error = bus_bind_intr(gtask->gt_dev, gtask->gt_irq, cpu);
- mtx_lock(&qgroup->tqg_lock);
- if (error)
- printf("%s: binding interrupt failed for %s: %d\n",
- __func__, gtask->gt_name, error);
-
- }
- qgroup->tqg_queue[qid].tgc_cnt++;
- LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
- MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
- gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
- mtx_unlock(&qgroup->tqg_lock);
-}
-
int
taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
void *uniq, int cpu, device_t dev, struct resource *irq, const char *name)
{
int i, qid, error;
- qid = -1;
gtask->gt_uniq = uniq;
snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
gtask->gt_dev = dev;
gtask->gt_irq = irq;
gtask->gt_cpu = cpu;
mtx_lock(&qgroup->tqg_lock);
- if (tqg_smp_started) {
- for (i = 0; i < qgroup->tqg_cnt; i++)
- if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
- qid = i;
- break;
- }
- if (qid == -1) {
- mtx_unlock(&qgroup->tqg_lock);
- printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
- return (EINVAL);
- }
- } else
- qid = 0;
- qgroup->tqg_queue[qid].tgc_cnt++;
- LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
- gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
- cpu = qgroup->tqg_queue[qid].tgc_cpu;
- mtx_unlock(&qgroup->tqg_lock);
-
- if (dev != NULL && irq != NULL && tqg_smp_started) {
- error = bus_bind_intr(dev, irq, cpu);
- if (error)
- printf("%s: binding interrupt failed for %s: %d\n",
- __func__, gtask->gt_name, error);
- }
- return (0);
-}
-
-static int
-taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
-{
- device_t dev;
- struct resource *irq;
- int cpu, error, i, qid;
-
- qid = -1;
- dev = gtask->gt_dev;
- irq = gtask->gt_irq;
- cpu = gtask->gt_cpu;
- MPASS(tqg_smp_started);
- mtx_lock(&qgroup->tqg_lock);
- for (i = 0; i < qgroup->tqg_cnt; i++)
+ for (i = 0, qid = -1; i < qgroup->tqg_cnt; i++)
if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
qid = i;
break;
@@ -818,8 +721,8 @@ taskqgroup_attach_cpu_deferred(struct taskqgroup *qgro
}
qgroup->tqg_queue[qid].tgc_cnt++;
LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
- MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
+ cpu = qgroup->tqg_queue[qid].tgc_cpu;
mtx_unlock(&qgroup->tqg_lock);
if (dev != NULL && irq != NULL) {
@@ -853,10 +756,11 @@ taskqgroup_detach(struct taskqgroup *qgroup, struct gr
static void
taskqgroup_binder(void *ctx)
{
- struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
+ struct taskq_bind_task *gtask;
cpuset_t mask;
int error;
+ gtask = ctx;
CPU_ZERO(&mask);
CPU_SET(gtask->bt_cpuid, &mask);
error = cpuset_setthread(curthread->td_tid, &mask);
@@ -869,7 +773,7 @@ taskqgroup_binder(void *ctx)
free(gtask, M_DEVBUF);
}
-static void
+void
taskqgroup_bind(struct taskqgroup *qgroup)
{
struct taskq_bind_task *gtask;
@@ -883,7 +787,7 @@ taskqgroup_bind(struct taskqgroup *qgroup)
return;
for (i = 0; i < qgroup->tqg_cnt; i++) {
- gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
+ gtask = malloc(sizeof(*gtask), M_DEVBUF, M_WAITOK);
GTASK_INIT(>ask->bt_task, 0, 0, taskqgroup_binder, gtask);
gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
@@ -891,137 +795,22 @@ taskqgroup_bind(struct taskqgroup *qgroup)
}
}
-static void
-taskqgroup_config_init(void *arg)
-{
- struct taskqgroup *qgroup = qgroup_config;
- LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
-
- LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks,
- grouptask, gt_list);
- qgroup->tqg_queue[0].tgc_cnt = 0;
- taskqgroup_cpu_create(qgroup, 0, 0);
-
- qgroup->tqg_cnt = 1;
- qgroup->tqg_stride = 1;
-}
-
-SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
- taskqgroup_config_init, NULL);
-
-static int
-_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
-{
- LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
- struct grouptask *gtask;
- int i, k, old_cnt, old_cpu, cpu;
-
- mtx_assert(&qgroup->tqg_lock, MA_OWNED);
-
- if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
- printf("%s: failed cnt: %d stride: %d "
- "mp_ncpus: %d tqg_smp_started: %d\n",
- __func__, cnt, stride, mp_ncpus, tqg_smp_started);
- return (EINVAL);
- }
- if (qgroup->tqg_adjusting) {
- printf("%s failed: adjusting\n", __func__);
- return (EBUSY);
- }
- qgroup->tqg_adjusting = 1;
- old_cnt = qgroup->tqg_cnt;
- old_cpu = 0;
- if (old_cnt < cnt)
- old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
- mtx_unlock(&qgroup->tqg_lock);
- /*
- * Set up queue for tasks added before boot.
- */
- if (old_cnt == 0) {
- LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks,
- grouptask, gt_list);
- qgroup->tqg_queue[0].tgc_cnt = 0;
- }
-
- /*
- * If new taskq threads have been added.
- */
- cpu = old_cpu;
- for (i = old_cnt; i < cnt; i++) {
- taskqgroup_cpu_create(qgroup, i, cpu);
-
- for (k = 0; k < stride; k++)
- cpu = CPU_NEXT(cpu);
- }
- mtx_lock(&qgroup->tqg_lock);
- qgroup->tqg_cnt = cnt;
- qgroup->tqg_stride = stride;
-
- /*
- * Adjust drivers to use new taskqs.
- */
- for (i = 0; i < old_cnt; i++) {
- while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
- LIST_REMOVE(gtask, gt_list);
- qgroup->tqg_queue[i].tgc_cnt--;
- LIST_INSERT_HEAD(>ask_head, gtask, gt_list);
- }
- }
- mtx_unlock(&qgroup->tqg_lock);
-
- while ((gtask = LIST_FIRST(>ask_head))) {
- LIST_REMOVE(gtask, gt_list);
- if (gtask->gt_cpu == -1)
- taskqgroup_attach_deferred(qgroup, gtask);
- else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
- taskqgroup_attach_deferred(qgroup, gtask);
- }
-
-#ifdef INVARIANTS
- mtx_lock(&qgroup->tqg_lock);
- for (i = 0; i < qgroup->tqg_cnt; i++) {
- MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
- LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
- MPASS(gtask->gt_taskqueue != NULL);
- }
- mtx_unlock(&qgroup->tqg_lock);
-#endif
- /*
- * If taskq thread count has been reduced.
- */
- for (i = cnt; i < old_cnt; i++)
- taskqgroup_cpu_remove(qgroup, i);
-
- taskqgroup_bind(qgroup);
-
- mtx_lock(&qgroup->tqg_lock);
- qgroup->tqg_adjusting = 0;
-
- return (0);
-}
-
-int
-taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
-{
- int error;
-
- mtx_lock(&qgroup->tqg_lock);
- error = _taskqgroup_adjust(qgroup, cnt, stride);
- mtx_unlock(&qgroup->tqg_lock);
-
- return (error);
-}
-
struct taskqgroup *
-taskqgroup_create(const char *name)
+taskqgroup_create(const char *name, int cnt, int stride)
{
struct taskqgroup *qgroup;
+ int cpu, i, j;
qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
qgroup->tqg_name = name;
- LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
+ qgroup->tqg_cnt = cnt;
+ for (cpu = i = 0; i < cnt; i++) {
+ taskqgroup_cpu_create(qgroup, i, cpu);
+ for (j = 0; j < stride; j++)
+ cpu = CPU_NEXT(cpu);
+ }
return (qgroup);
}
Modified: head/sys/net/iflib.c
==============================================================================
--- head/sys/net/iflib.c Mon Mar 30 14:03:35 2020 (r359435)
+++ head/sys/net/iflib.c Mon Mar 30 14:22:52 2020 (r359436)
@@ -1410,29 +1410,6 @@ iflib_dma_free_multi(iflib_dma_info_t *dmalist, int co
iflib_dma_free(*dmaiter);
}
-#ifdef EARLY_AP_STARTUP
-static const int iflib_started = 1;
-#else
-/*
- * We used to abuse the smp_started flag to decide if the queues have been
- * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()).
- * That gave bad races, since the SYSINIT() runs strictly after smp_started
- * is set. Run a SYSINIT() strictly after that to just set a usable
- * completion flag.
- */
-
-static int iflib_started;
-
-static void
-iflib_record_started(void *arg)
-{
- iflib_started = 1;
-}
-
-SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST,
- iflib_record_started, NULL);
-#endif
-
static int
iflib_fast_intr(void *arg)
{
@@ -1440,9 +1417,6 @@ iflib_fast_intr(void *arg)
struct grouptask *gtask = info->ifi_task;
int result;
- if (!iflib_started)
- return (FILTER_STRAY);
-
DBG_COUNTER_INC(fast_intrs);
if (info->ifi_filter != NULL) {
result = info->ifi_filter(info->ifi_filter_arg);
@@ -1467,9 +1441,6 @@ iflib_fast_intr_rxtx(void *arg)
qidx_t txqid;
bool intr_enable, intr_legacy;
- if (!iflib_started)
- return (FILTER_STRAY);
-
DBG_COUNTER_INC(fast_intrs);
if (info->ifi_filter != NULL) {
result = info->ifi_filter(info->ifi_filter_arg);
@@ -1522,9 +1493,6 @@ iflib_fast_intr_ctx(void *arg)
struct grouptask *gtask = info->ifi_task;
int result;
- if (!iflib_started)
- return (FILTER_STRAY);
-
DBG_COUNTER_INC(fast_intrs);
if (info->ifi_filter != NULL) {
result = info->ifi_filter(info->ifi_filter_arg);
@@ -4745,18 +4713,6 @@ iflib_device_register(device_t dev, void *sc, if_share
* Now that we know how many queues there are, get the core offset.
*/
ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx);
-
- /*
- * Group taskqueues aren't properly set up until SMP is started,
- * so we disable interrupts until we can handle them post
- * SI_SUB_SMP.
- *
- * XXX: disabling interrupts doesn't actually work, at least for
- * the non-MSI case. When they occur before SI_SUB_SMP completes,
- * we do null handling and depend on this not causing too large an
- * interrupt storm.
- */
- IFDI_INTR_DISABLE(ctx);
if (msix > 1) {
/*
Modified: head/sys/sys/gtaskqueue.h
==============================================================================
--- head/sys/sys/gtaskqueue.h Mon Mar 30 14:03:35 2020 (r359435)
+++ head/sys/sys/gtaskqueue.h Mon Mar 30 14:22:52 2020 (r359436)
@@ -77,9 +77,9 @@ int taskqgroup_attach_cpu(struct taskqgroup *qgroup,
struct grouptask *grptask, void *uniq, int cpu, device_t dev,
struct resource *irq, const char *name);
void taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask);
-struct taskqgroup *taskqgroup_create(const char *name);
+struct taskqgroup *taskqgroup_create(const char *name, int cnt, int stride);
void taskqgroup_destroy(struct taskqgroup *qgroup);
-int taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride);
+void taskqgroup_bind(struct taskqgroup *qgroup);
void taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask,
gtask_fn_t *fn, const char *name);
void taskqgroup_config_gtask_deinit(struct grouptask *gtask);
@@ -107,22 +107,19 @@ struct taskqgroup *qgroup_##name; \
static void \
taskqgroup_define_##name(void *arg) \
{ \
- qgroup_##name = taskqgroup_create(#name); \
+ qgroup_##name = taskqgroup_create(#name, (cnt), (stride)); \
} \
- \
SYSINIT(taskqgroup_##name, SI_SUB_TASKQ, SI_ORDER_FIRST, \
- taskqgroup_define_##name, NULL); \
+ taskqgroup_define_##name, NULL); \
\
static void \
-taskqgroup_adjust_##name(void *arg) \
+taskqgroup_bind_##name(void *arg) \
{ \
- taskqgroup_adjust(qgroup_##name, (cnt), (stride)); \
+ taskqgroup_bind(qgroup_##name); \
} \
- \
-SYSINIT(taskqgroup_adj_##name, SI_SUB_SMP, SI_ORDER_ANY, \
- taskqgroup_adjust_##name, NULL)
+SYSINIT(taskqgroup_bind_##name, SI_SUB_SMP, SI_ORDER_ANY, \
+ taskqgroup_bind_##name, NULL)
-TASKQGROUP_DECLARE(net);
TASKQGROUP_DECLARE(softirq);
#endif /* !_SYS_GTASKQUEUE_H_ */
More information about the svn-src-head
mailing list