svn commit: r238907 - projects/calloutng/sys/kern
Attilio Rao
attilio at freebsd.org
Mon Jul 30 14:02:13 UTC 2012
On 7/30/12, Davide Italiano <davide at freebsd.org> wrote:
> Author: davide
> Date: Mon Jul 30 13:50:37 2012
> New Revision: 238907
> URL: http://svn.freebsd.org/changeset/base/238907
>
> Log:
> - Fix a LOR deadlock dropping the callout lock while executing the
> handler
> directly from hardware interrupt context.
>
> - Add migration support for callouts which runs from hw interrupt
> context.
>
> - Refactor a couple of comments to reflect the new world order.
>
> TODO: implement proper stats for direct callouts.
>
> Just a note: I'd like to thank flo@ for his invaluable help in testing
> and
> issues, mav@ that helped me in tackling the problem and Yahoo! which
> provided access to the machines on which tests were run.
>
> Reported by: avg, flo [1]
> Discused with: mav
>
> Modified:
> projects/calloutng/sys/kern/kern_timeout.c
>
> Modified: projects/calloutng/sys/kern/kern_timeout.c
> ==============================================================================
> --- projects/calloutng/sys/kern/kern_timeout.c Mon Jul 30 12:25:20
> 2012 (r238906)
> +++ projects/calloutng/sys/kern/kern_timeout.c Mon Jul 30 13:50:37
> 2012 (r238907)
> @@ -91,45 +91,62 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpca
> int callwheelsize, callwheelmask;
>
> /*
> - * The callout cpu migration entity represents informations necessary for
> - * describing the migrating callout to the new callout cpu.
> + * The callout cpu exec entities represent informations necessary for
> + * describing the state of callouts currently running on the CPU and the
> ones
> + * necessary for migrating callouts to the new callout cpu. In particular,
> + * the first entry of the array cc_exec_entity holds informations for
> callout
> + * running in SWI thread context, while the second one holds informations
> + * for callout running directly from hardware interrupt context.
> * The cached informations are very important for deferring migration when
> * the migrating callout is already running.
> */
> -struct cc_mig_ent {
> +struct cc_exec {
> + struct callout *cc_next;
> + struct callout *cc_curr;
> #ifdef SMP
> void (*ce_migration_func)(void *);
> void *ce_migration_arg;
> int ce_migration_cpu;
> struct bintime ce_migration_time;
> #endif
> + int cc_cancel;
> + int cc_waiting;
> };
>
> /*
> - * There is one struct callout_cpu per cpu, holding all relevant
> + * There is one struct callou_cpu per cpu, holding all relevant
> * state for the callout processing thread on the individual CPU.
> */
> struct callout_cpu {
> - struct cc_mig_ent cc_migrating_entity;
> + struct cc_exec cc_exec_entity[2];
> struct mtx cc_lock;
> struct callout *cc_callout;
> struct callout_tailq *cc_callwheel;
> struct callout_tailq cc_expireq;
> struct callout_list cc_callfree;
> - struct callout *cc_next;
> - struct callout *cc_curr;
> struct bintime cc_firstevent;
> struct bintime cc_lastscan;
> void *cc_cookie;
> - int cc_cancel;
> - int cc_waiting;
> };
>
> +#define cc_exec_curr cc_exec_entity[0].cc_curr
> +#define cc_exec_next cc_exec_entity[0].cc_next
> +#define cc_exec_cancel cc_exec_entity[0].cc_cancel
> +#define cc_exec_waiting cc_exec_entity[0].cc_waiting
> +#define cc_exec_curr_dir cc_exec_entity[1].cc_curr
> +#define cc_exec_next_dir cc_exec_entity[1].cc_next
> +#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel
> +#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting
> +
> #ifdef SMP
> -#define cc_migration_func cc_migrating_entity.ce_migration_func
> -#define cc_migration_arg cc_migrating_entity.ce_migration_arg
> -#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu
> -#define cc_migration_time cc_migrating_entity.ce_migration_time
> +#define cc_migration_func cc_exec_entity[0].ce_migration_func
> +#define cc_migration_arg cc_exec_entity[0].ce_migration_arg
> +#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu
> +#define cc_migration_time cc_exec_entity[0].ce_migration_time
> +#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func
> +#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg
> +#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu
> +#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time
>
> struct callout_cpu cc_cpu[MAXCPU];
> #define CPUBLOCK MAXCPU
> @@ -156,6 +173,9 @@ struct callout_cpu cc_cpu;
>
> static int timeout_cpu;
> void (*callout_new_inserted)(int cpu, struct bintime bt) = NULL;
> +static struct callout *
> +softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
> + int *lockcalls, int *gcalls, int direct);
>
> static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
>
> @@ -180,15 +200,18 @@ static MALLOC_DEFINE(M_CALLOUT, "callout
> * Resets the migration entity tied to a specific callout cpu.
> */
> static void
> -cc_cme_cleanup(struct callout_cpu *cc)
> +cc_cme_cleanup(struct callout_cpu *cc, int direct)
> {
> -
> +
> + cc->cc_exec_entity[direct].cc_curr = NULL;
> + cc->cc_exec_entity[direct].cc_next = NULL;
> + cc->cc_exec_entity[direct].cc_cancel = 0;
> + cc->cc_exec_entity[direct].cc_waiting = 0;
> #ifdef SMP
> - cc->cc_migration_cpu = CPUBLOCK;
> - cc->cc_migration_time.sec = 0;
> - cc->cc_migration_time.frac = 0;
> - cc->cc_migration_func = NULL;
> - cc->cc_migration_arg = NULL;
> + cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK;
> + bintime_clear(&cc->cc_exec_entity[direct].ce_migration_time);
> + cc->cc_exec_entity[direct].ce_migration_func = NULL;
> + cc->cc_exec_entity[direct].ce_migration_arg = NULL;
> #endif
> }
>
> @@ -196,11 +219,12 @@ cc_cme_cleanup(struct callout_cpu *cc)
> * Checks if migration is requested by a specific callout cpu.
> */
> static int
> -cc_cme_migrating(struct callout_cpu *cc)
> +cc_cme_migrating(struct callout_cpu *cc, int direct)
> {
>
> #ifdef SMP
> - return (cc->cc_migration_cpu != CPUBLOCK);
> +
> + return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK);
> #else
> return (0);
> #endif
> @@ -246,7 +270,8 @@ callout_cpu_init(struct callout_cpu *cc)
> TAILQ_INIT(&cc->cc_callwheel[i]);
> }
> TAILQ_INIT(&cc->cc_expireq);
> - cc_cme_cleanup(cc);
> + for (i = 0; i < 2; i++)
> + cc_cme_cleanup(cc, i);
> if (cc->cc_callout == NULL)
> return;
> for (i = 0; i < ncallout; i++) {
> @@ -355,7 +380,8 @@ callout_process(struct bintime *now)
> struct callout *tmp;
> struct callout_cpu *cc;
> struct callout_tailq *sc;
> - int cpu, first, future, last, need_softclock;
> + int cpu, first, future, gcalls, mpcalls, last, lockcalls,
> + need_softclock;
>
> /*
> * Process callouts at a very low cpu priority, so we don't keep the
> @@ -376,7 +402,8 @@ callout_process(struct bintime *now)
> first &= callwheelmask;
> for (;;) {
> sc = &cc->cc_callwheel[first];
> - TAILQ_FOREACH(tmp, sc, c_links.tqe) {
> + tmp = TAILQ_FIRST(sc);
> + while (tmp != NULL) {
> next = tmp->c_time;
> bintime_sub(&next, &tmp->c_precision);
> if (bintime_cmp(&next, now, <=)) {
> @@ -385,17 +412,20 @@ callout_process(struct bintime *now)
> * directly from hardware interrupt context.
> */
> if (tmp->c_flags & CALLOUT_DIRECT) {
> - tmp->c_func(tmp->c_arg);
> TAILQ_REMOVE(sc, tmp, c_links.tqe);
> - tmp->c_flags &= ~CALLOUT_PENDING;
> + tmp = softclock_call_cc(tmp, cc,
> + &mpcalls, &lockcalls, &gcalls, 1);
> } else {
> TAILQ_INSERT_TAIL(&cc->cc_expireq,
> tmp, c_staiter);
> TAILQ_REMOVE(sc, tmp, c_links.tqe);
> tmp->c_flags |= CALLOUT_PROCESSED;
> need_softclock = 1;
> + tmp = TAILQ_NEXT(tmp, c_links.tqe);
> }
> }
> + else
> + tmp = TAILQ_NEXT(tmp, c_links.tqe);
> }
> if (first == last)
> break;
> @@ -561,11 +591,12 @@ callout_cc_add(struct callout *c, struct
> }
>
> static void
> -callout_cc_del(struct callout *c, struct callout_cpu *cc)
> +callout_cc_del(struct callout *c, struct callout_cpu *cc, int direct)
> {
> -
> - if (cc->cc_next == c)
> - cc->cc_next = TAILQ_NEXT(c, c_staiter);
> + if (direct && cc->cc_exec_next_dir == c)
> + cc->cc_exec_next_dir = TAILQ_NEXT(c, c_links.tqe);
> + else if (!direct && cc->cc_exec_next == c)
> + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter);
> if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
> c->c_func = NULL;
> SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
> @@ -574,13 +605,13 @@ callout_cc_del(struct callout *c, struct
>
> static struct callout *
> softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
> - int *lockcalls, int *gcalls)
> + int *lockcalls, int *gcalls, int direct)
> {
> void (*c_func)(void *);
> void *c_arg;
> struct lock_class *class;
> struct lock_object *c_lock;
> - int c_flags, sharedlock;
> + int c_flags, flags, sharedlock;
> #ifdef SMP
> struct callout_cpu *new_cc;
> void (*new_func)(void *);
> @@ -595,7 +626,14 @@ softclock_call_cc(struct callout *c, str
> static timeout_t *lastfunc;
> #endif
>
> - cc->cc_next = TAILQ_NEXT(c, c_staiter);
> + if (direct)
> + cc->cc_exec_next_dir = TAILQ_NEXT(c, c_links.tqe);
> + else {
> + if ((c->c_flags & CALLOUT_PROCESSED) == 0)
> + cc->cc_exec_next = TAILQ_NEXT(c, c_links.tqe);
> + else
> + cc->cc_exec_next = TAILQ_NEXT(c, c_staiter);
> + }
> class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
> sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
> c_lock = c->c_lock;
> @@ -606,8 +644,8 @@ softclock_call_cc(struct callout *c, str
> c->c_flags = CALLOUT_LOCAL_ALLOC;
> else
> c->c_flags &= ~CALLOUT_PENDING;
> - cc->cc_curr = c;
> - cc->cc_cancel = 0;
> + cc->cc_exec_entity[direct].cc_curr = c;
> + cc->cc_exec_entity[direct].cc_cancel = 0;
> CC_UNLOCK(cc);
> if (c_lock != NULL) {
> class->lc_lock(c_lock, sharedlock);
> @@ -615,13 +653,12 @@ softclock_call_cc(struct callout *c, str
> * The callout may have been cancelled
> * while we switched locks.
> */
> - if (cc->cc_cancel) {
> + if (cc->cc_exec_entity[direct].cc_cancel) {
> class->lc_unlock(c_lock);
> goto skip;
> }
> /* The callout cannot be stopped now. */
> - cc->cc_cancel = 1;
> -
> + cc->cc_exec_entity[direct].cc_cancel = 1;
> if (c_lock == &Giant.lock_object) {
> (*gcalls)++;
> CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
> @@ -639,11 +676,13 @@ softclock_call_cc(struct callout *c, str
> #ifdef DIAGNOSTIC
> binuptime(&bt1);
> #endif
> - THREAD_NO_SLEEPING();
> + if (!direct)
> + THREAD_NO_SLEEPING();
> SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0);
> c_func(c_arg);
> SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0);
> - THREAD_SLEEPING_OK();
> + if (!direct)
> + THREAD_SLEEPING_OK();
I didn't look into the full patch, however I have a question about
this: was it necessary because now this is running in interrupt
context so it can catch the nested sleeping check case? Or there is
something else?
I think it would be a good thing to keep up THREAD_NO_SLEEPING() also
in the direct dispatch case.
Attilio
--
Peace can only be achieved by understanding - A. Einstein
More information about the svn-src-projects
mailing list