git: c6d31b8306eb - main - AST: rework

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Tue, 02 Aug 2022 18:12:16 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=c6d31b8306eb708441c61c33bcf886ecad268a16

commit c6d31b8306eb708441c61c33bcf886ecad268a16
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2022-07-18 16:39:17 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2022-08-02 18:11:09 +0000

    AST: rework
    
    Make most AST handlers dynamically registered.  This allows to have
    subsystem-specific handler source located in the subsystem files,
    instead of making subr_trap.c aware of it.  For instance, signal
    delivery code on return to userspace is now moved to kern_sig.c.
    
    Also, it allows to have some handlers designated as the cleanup (kclear)
    type, which are called both at AST and on thread/process exit.  For
    instance, ast(), exit1(), and NFS server no longer need to be aware
    about UFS softdep processing.
    
    The dynamic registration also allows third-party modules to register AST
    handlers if needed.  There is one caveat with loadable modules: the
    code does not make any effort to ensure that the module is not unloaded
    before all threads processed through AST handler in it.  In fact, this
    is already present behavior for hwpmc.ko and ufs.ko.  I do not think it
    is worth the efforts and the runtime overhead to try to fix it.
    
    Reviewed by:    markj
    Tested by:      emaste (arm64), pho
    Discussed with: jhb
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D35888
---
 sys/amd64/amd64/exception.S                      |   4 +-
 sys/amd64/amd64/genassym.c                       |   4 +-
 sys/amd64/include/vmm.h                          |   9 +-
 sys/amd64/vmm/vmm.c                              |   6 +-
 sys/arm/arm/exception.S                          |   3 +-
 sys/arm/arm/genassym.c                           |   3 +-
 sys/arm64/arm64/exception.S                      |  12 +-
 sys/arm64/arm64/genassym.c                       |   4 +-
 sys/compat/linux/linux_event.c                   |   4 +-
 sys/compat/linux/linux_fork.c                    |   2 +-
 sys/compat/linuxkpi/common/include/linux/sched.h |   2 +-
 sys/dev/hwpmc/hwpmc_soft.c                       |  17 +-
 sys/fs/nfsserver/nfs_nfsdkrpc.c                  |   2 +-
 sys/geom/geom_event.c                            |  22 +-
 sys/i386/i386/exception.s                        |   2 +-
 sys/i386/i386/genassym.c                         |   4 +-
 sys/kern/kern_clock.c                            |  51 +++-
 sys/kern/kern_event.c                            |  12 +-
 sys/kern/kern_exit.c                             |  13 +-
 sys/kern/kern_fork.c                             |  14 +-
 sys/kern/kern_ktrace.c                           |  12 +-
 sys/kern/kern_proc.c                             |   3 +-
 sys/kern/kern_racct.c                            |  21 +-
 sys/kern/kern_sig.c                              | 100 ++++++-
 sys/kern/kern_synch.c                            |  20 +-
 sys/kern/kern_thr.c                              |   2 +-
 sys/kern/kern_thread.c                           |  41 ++-
 sys/kern/sched_4bsd.c                            |  17 +-
 sys/kern/sched_ule.c                             |  18 +-
 sys/kern/subr_prof.c                             |   4 +-
 sys/kern/subr_sleepqueue.c                       |   2 +-
 sys/kern/subr_smp.c                              |   2 +-
 sys/kern/subr_trap.c                             | 336 ++++++++++++-----------
 sys/kern/sys_generic.c                           |   8 +-
 sys/kern/sys_process.c                           |   4 +-
 sys/powerpc/aim/trap_subr32.S                    |   6 +-
 sys/powerpc/aim/trap_subr64.S                    |   6 +-
 sys/powerpc/booke/trap_subr.S                    |   6 +-
 sys/powerpc/powerpc/genassym.c                   |   4 +-
 sys/riscv/riscv/exception.S                      |   4 +-
 sys/riscv/riscv/genassym.c                       |   3 +-
 sys/security/mac_lomac/mac_lomac.c               |  19 +-
 sys/sys/proc.h                                   |  70 +++--
 sys/sys/racct.h                                  |   1 -
 sys/ufs/ffs/ffs_softdep.c                        |  13 +-
 45 files changed, 556 insertions(+), 356 deletions(-)

diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index 6471f9a3041c..d4741603119e 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -585,7 +585,7 @@ fast_syscall_common:
 	jnz	4f
 	/* Check for and handle AST's on return to userland. */
 	movq	PCPU(CURTHREAD),%rax
-	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
+	cmpl	$0,TD_AST(%rax)
 	jne	3f
 	call	handle_ibrs_exit
 	callq	*mds_handler
@@ -1141,7 +1141,7 @@ doreti_ast:
 	 */
 	cli
 	movq	PCPU(CURTHREAD),%rax
-	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
+	cmpl	$0,TD_AST(%rax)
 	je	doreti_exit
 	sti
 	movq	%rsp,%rdi	/* pass a pointer to the trapframe */
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index f61ce120d156..feb3c7f503ce 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -82,6 +82,7 @@ ASSYM(MD_EFIRT_TMP, offsetof(struct mdthread, md_efirt_tmp));
 
 ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
 ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
+ASSYM(TD_AST, offsetof(struct thread, td_ast));
 ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
 ASSYM(TD_PFLAGS, offsetof(struct thread, td_pflags));
 ASSYM(TD_PROC, offsetof(struct thread, td_proc));
@@ -90,9 +91,6 @@ ASSYM(TD_MD, offsetof(struct thread, td_md));
 ASSYM(TD_MD_PCB, offsetof(struct thread, td_md.md_pcb));
 ASSYM(TD_MD_STACK_BASE, offsetof(struct thread, td_md.md_stack_base));
 
-ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
-ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
-
 ASSYM(TDP_CALLCHAIN, TDP_CALLCHAIN);
 ASSYM(TDP_KTHREAD, TDP_KTHREAD);
 
diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
index ce61e16522aa..dcf862c34264 100644
--- a/sys/amd64/include/vmm.h
+++ b/sys/amd64/include/vmm.h
@@ -365,13 +365,10 @@ vcpu_is_running(struct vm *vm, int vcpu, int *hostcpu)
 static int __inline
 vcpu_should_yield(struct vm *vm, int vcpu)
 {
+	struct thread *td;
 
-	if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED))
-		return (1);
-	else if (curthread->td_owepreempt)
-		return (1);
-	else
-		return (0);
+	td = curthread;
+	return (td->td_ast != 0 || td->td_owepreempt != 0);
 }
 #endif
 
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 94f99fdb4f1a..c504d4f26b3a 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1331,7 +1331,7 @@ vm_handle_rendezvous(struct vm *vm, int vcpuid)
 		RENDEZVOUS_CTR0(vm, vcpuid, "Wait for rendezvous completion");
 		mtx_sleep(&vm->rendezvous_func, &vm->rendezvous_mtx, 0,
 		    "vmrndv", hz);
-		if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) {
+		if (td_ast_pending(td, TDA_SUSPEND)) {
 			mtx_unlock(&vm->rendezvous_mtx);
 			error = thread_check_susp(td, true);
 			if (error != 0)
@@ -1421,7 +1421,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 		msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
 		vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
 		vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
-		if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) {
+		if (td_ast_pending(td, TDA_SUSPEND)) {
 			vcpu_unlock(vcpu);
 			error = thread_check_susp(td, false);
 			if (error != 0)
@@ -1593,7 +1593,7 @@ vm_handle_suspend(struct vm *vm, int vcpuid, bool *retu)
 			vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
 			msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
 			vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
-			if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) {
+			if (td_ast_pending(td, TDA_SUSPEND)) {
 				vcpu_unlock(vcpu);
 				error = thread_check_susp(td, false);
 				vcpu_lock(vcpu);
diff --git a/sys/arm/arm/exception.S b/sys/arm/arm/exception.S
index 0416939cb199..351de24ab37c 100644
--- a/sys/arm/arm/exception.S
+++ b/sys/arm/arm/exception.S
@@ -175,8 +175,7 @@ _C_LABEL(dtrace_invop_jump_addr):
 	bne	2f;			/* Nope, get out now */		   \
 	bic	r4, r4, #(PSR_I|PSR_F);					   \
 1:	GET_CURTHREAD_PTR(r5);						   \
-	ldr	r1, [r5, #(TD_FLAGS)];					   \
-	and	r1, r1, #(TDF_ASTPENDING|TDF_NEEDRESCHED);		   \
+	ldr	r1, [r5, #(TD_AST)];					   \
 	teq	r1, #0;							   \
 	beq	2f;			/* Nope. Just bail */		   \
 	msr	cpsr_c, r4;		/* Restore interrupts */	   \
diff --git a/sys/arm/arm/genassym.c b/sys/arm/arm/genassym.c
index e90bbff6549a..af3095135549 100644
--- a/sys/arm/arm/genassym.c
+++ b/sys/arm/arm/genassym.c
@@ -89,6 +89,7 @@ ASSYM(IP_DST, offsetof(struct ip, ip_dst));
 
 ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
 ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
+ASSYM(TD_AST, offsetof(struct thread, td_ast));
 ASSYM(TD_PROC, offsetof(struct thread, td_proc));
 ASSYM(TD_MD, offsetof(struct thread, td_md));
 ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
@@ -116,8 +117,6 @@ ASSYM(PAGE_SIZE, PAGE_SIZE);
 #ifdef PMAP_INCLUDE_PTE_SYNC
 ASSYM(PMAP_INCLUDE_PTE_SYNC, 1);
 #endif
-ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
-ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
 
 ASSYM(MAXCOMLEN, MAXCOMLEN);
 ASSYM(MAXCPU, MAXCPU);
diff --git a/sys/arm64/arm64/exception.S b/sys/arm64/arm64/exception.S
index 1373bb0e7718..e875b75e41c4 100644
--- a/sys/arm64/arm64/exception.S
+++ b/sys/arm64/arm64/exception.S
@@ -175,15 +175,13 @@ __FBSDID("$FreeBSD$");
 	 */
 	msr	daifset, #(DAIF_INTR)
 
-	/* Read the current thread flags */
+	/* Read the current thread AST mask */
 	ldr	x1, [x18, #PC_CURTHREAD]	/* Load curthread */
-	ldr	x2, [x1, #TD_FLAGS]
+	add	x1, x1, #(TD_AST)
+	ldr	x1, [x1]
 
-	/* Check if we have either bits set */
-	mov	x3, #((TDF_ASTPENDING|TDF_NEEDRESCHED) >> 8)
-	lsl	x3, x3, #8
-	and	x2, x2, x3
-	cbz	x2, 2f
+	/* Check if we have a non-zero AST mask */
+	cbz	x1, 2f
 
 	/* Restore interrupts */
 	msr	daif, x19
diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c
index 8e3ddc48317b..4741cf579467 100644
--- a/sys/arm64/arm64/genassym.c
+++ b/sys/arm64/arm64/genassym.c
@@ -48,9 +48,6 @@ ASSYM(BP_KERN_L0PT, offsetof(struct arm64_bootparams, kern_l0pt));
 ASSYM(BP_KERN_TTBR0, offsetof(struct arm64_bootparams, kern_ttbr0));
 ASSYM(BP_BOOT_EL, offsetof(struct arm64_bootparams, boot_el));
 
-ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
-ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
-
 ASSYM(PCPU_SIZE, sizeof(struct pcpu));
 ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
 ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
@@ -71,6 +68,7 @@ ASSYM(SF_UC, offsetof(struct sigframe, sf_uc));
 ASSYM(TD_PROC, offsetof(struct thread, td_proc));
 ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
 ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
+ASSYM(TD_AST, offsetof(struct thread, td_ast));
 ASSYM(TD_FRAME, offsetof(struct thread, td_frame));
 ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
 ASSYM(TD_MD_CANARY, offsetof(struct thread, td_md.md_canary));
diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c
index e4279b3418c2..002987ed3c0e 100644
--- a/sys/compat/linux/linux_event.c
+++ b/sys/compat/linux/linux_event.c
@@ -458,9 +458,7 @@ linux_epoll_wait_ts(struct thread *td, int epfd, struct epoll_event *events,
 		 * usermode and TDP_OLDMASK is cleared, restoring old
 		 * sigmask.
 		 */
-		thread_lock(td);
-		td->td_flags |= TDF_ASTPENDING;
-		thread_unlock(td);
+		ast_sched(td, TDA_SIGSUSPEND);
 	}
 
 	coargs.leventlist = events;
diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c
index bcd5ffe3c589..31d8344ce032 100644
--- a/sys/compat/linux/linux_fork.c
+++ b/sys/compat/linux/linux_fork.c
@@ -323,7 +323,7 @@ linux_clone_thread(struct thread *td, struct l_clone_args *args)
 	sched_fork_thread(td, newtd);
 	thread_unlock(td);
 	if (P_SHOULDSTOP(p))
-		newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
+		ast_sched(newtd, TDA_SUSPEND);
 
 	if (p->p_ptevents & PTRACE_LWP)
 		newtd->td_dbgflags |= TDB_BORN;
diff --git a/sys/compat/linuxkpi/common/include/linux/sched.h b/sys/compat/linuxkpi/common/include/linux/sched.h
index cb0ffde04b2b..a0bcd1260b82 100644
--- a/sys/compat/linuxkpi/common/include/linux/sched.h
+++ b/sys/compat/linuxkpi/common/include/linux/sched.h
@@ -130,7 +130,7 @@ put_task_struct(struct task_struct *task)
 #define	yield()		kern_yield(PRI_UNCHANGED)
 #define	sched_yield()	sched_relinquish(curthread)
 
-#define	need_resched() (curthread->td_flags & TDF_NEEDRESCHED)
+#define	need_resched()	td_ast_pending(curthread, TDA_SCHED)
 
 static inline int
 cond_resched_lock(spinlock_t *lock)
diff --git a/sys/dev/hwpmc/hwpmc_soft.c b/sys/dev/hwpmc/hwpmc_soft.c
index cf2401e9159e..c96a41457a66 100644
--- a/sys/dev/hwpmc/hwpmc_soft.c
+++ b/sys/dev/hwpmc/hwpmc_soft.c
@@ -430,10 +430,11 @@ pmc_soft_intr(struct pmckern_soft *ks)
 			}
 
 			if (user_mode) {
-				/* If in user mode setup AST to process
+				/*
+				 * If in user mode setup AST to process
 				 * callchain out of interrupt context.
 				 */
-				curthread->td_flags |= TDF_ASTPENDING;
+				ast_sched(curthread, TDA_HWPMC);
 			}
 		} else
 			pc->soft_values[ri]++;
@@ -446,6 +447,15 @@ pmc_soft_intr(struct pmckern_soft *ks)
 	return (processed);
 }
 
+static void
+ast_hwpmc(struct thread *td, int tda __unused)
+{
+	/* Handle Software PMC callchain capture. */
+	if (PMC_IS_PENDING_CALLCHAIN(td))
+		PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_USER_CALLCHAIN_SOFT,
+		    (void *)td->td_frame);
+}
+
 void
 pmc_soft_initialize(struct pmc_mdep *md)
 {
@@ -477,6 +487,8 @@ pmc_soft_initialize(struct pmc_mdep *md)
 	pcd->pcd_stop_pmc     = soft_stop_pmc;
 
 	md->pmd_npmc += SOFT_NPMCS;
+
+	ast_register(TDA_HWPMC, ASTR_UNCOND, 0, ast_hwpmc);
 }
 
 void
@@ -493,6 +505,7 @@ pmc_soft_finalize(struct pmc_mdep *md)
 	KASSERT(md->pmd_classdep[PMC_CLASS_INDEX_SOFT].pcd_class ==
 	    PMC_CLASS_SOFT, ("[soft,%d] class mismatch", __LINE__));
 #endif
+	ast_deregister(TDA_HWPMC);
 	free(soft_pcpu, M_PMC);
 	soft_pcpu = NULL;
 }
diff --git a/sys/fs/nfsserver/nfs_nfsdkrpc.c b/sys/fs/nfsserver/nfs_nfsdkrpc.c
index 44f585ff0beb..d2ba7887b8c2 100644
--- a/sys/fs/nfsserver/nfs_nfsdkrpc.c
+++ b/sys/fs/nfsserver/nfs_nfsdkrpc.c
@@ -327,7 +327,7 @@ nfssvc_program(struct svc_req *rqst, SVCXPRT *xprt)
 	svc_freereq(rqst);
 
 out:
-	td_softdep_cleanup(curthread);
+	ast_kclear(curthread);
 	NFSEXITCODE(0);
 }
 
diff --git a/sys/geom/geom_event.c b/sys/geom/geom_event.c
index cecd7fe349f6..f14dfbe1cb54 100644
--- a/sys/geom/geom_event.c
+++ b/sys/geom/geom_event.c
@@ -96,6 +96,24 @@ g_waitidle(void)
 	curthread->td_pflags &= ~TDP_GEOM;
 }
 
+static void
+ast_geom(struct thread *td __unused, int tda __unused)
+{
+	/*
+	 * If this thread tickled GEOM, we need to wait for the giggling to
+	 * stop before we return to userland.
+	 */
+	g_waitidle();
+}
+
+static void
+geom_event_init(void *arg __unused)
+{
+	ast_register(TDA_GEOM, ASTR_ASTF_REQUIRED | ASTR_TDP | ASTR_KCLEAR,
+	    TDP_GEOM, ast_geom);
+}
+SYSINIT(geom_event, SI_SUB_INTRINSIC, SI_ORDER_ANY, geom_event_init, NULL);
+
 struct g_attrchanged_args {
 	struct g_provider *pp;
 	const char *attr;
@@ -353,9 +371,7 @@ g_post_event_ep_va(g_event_t *func, void *arg, int wuflag,
 	mtx_unlock(&g_eventlock);
 	wakeup(&g_wait_event);
 	curthread->td_pflags |= TDP_GEOM;
-	thread_lock(curthread);
-	curthread->td_flags |= TDF_ASTPENDING;
-	thread_unlock(curthread);
+	ast_sched(curthread, TDA_GEOM);
 }
 
 void
diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s
index f47ab92f1987..f4135548fd81 100644
--- a/sys/i386/i386/exception.s
+++ b/sys/i386/i386/exception.s
@@ -470,7 +470,7 @@ doreti_ast:
 	 */
 	cli
 	movl	PCPU(CURTHREAD),%eax
-	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
+	cmpl	$0,TD_AST(%eax)
 	je	doreti_exit
 	sti
 	pushl	%esp			/* pass a pointer to the trapframe */
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index 55406cdd6b97..a0520646dc25 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -85,6 +85,7 @@ ASSYM(VM_PMAP, offsetof(struct vmspace, vm_pmap));
 ASSYM(PM_ACTIVE, offsetof(struct pmap, pm_active));
 
 ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
+ASSYM(TD_AST, offsetof(struct thread, td_ast));
 ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
 ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
 ASSYM(TD_PFLAGS, offsetof(struct thread, td_pflags));
@@ -96,9 +97,6 @@ ASSYM(TDP_CALLCHAIN, TDP_CALLCHAIN);
 ASSYM(P_MD, offsetof(struct proc, p_md));
 ASSYM(MD_LDT, offsetof(struct mdproc, md_ldt));
 
-ASSYM(TDF_ASTPENDING, TDF_ASTPENDING);
-ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
-
 ASSYM(TD0_KSTACK_PAGES, TD0_KSTACK_PAGES);
 ASSYM(PAGE_SIZE, PAGE_SIZE);
 ASSYM(PAGE_SHIFT, PAGE_SHIFT);
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 3998ffd2a607..5f2492c473b8 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -385,6 +385,38 @@ DPCPU_DEFINE_STATIC(int, pcputicks);	/* Per-CPU version of ticks. */
 static int devpoll_run = 0;
 #endif
 
+static void
+ast_oweupc(struct thread *td, int tda __unused)
+{
+	if ((td->td_proc->p_flag & P_PROFIL) == 0)
+		return;
+	addupc_task(td, td->td_profil_addr, td->td_profil_ticks);
+	td->td_profil_ticks = 0;
+	td->td_pflags &= ~TDP_OWEUPC;
+}
+
+static void
+ast_alrm(struct thread *td, int tda __unused)
+{
+	struct proc *p;
+
+	p = td->td_proc;
+	PROC_LOCK(p);
+	kern_psignal(p, SIGVTALRM);
+	PROC_UNLOCK(p);
+}
+
+static void
+ast_prof(struct thread *td, int tda __unused)
+{
+	struct proc *p;
+
+	p = td->td_proc;
+	PROC_LOCK(p);
+	kern_psignal(p, SIGPROF);
+	PROC_UNLOCK(p);
+}
+
 /*
  * Initialize clock frequencies and start both clocks running.
  */
@@ -408,6 +440,10 @@ initclocks(void *dummy __unused)
 		profhz = i;
 	psratio = profhz / i;
 
+	ast_register(TDA_OWEUPC, ASTR_ASTF_REQUIRED, 0, ast_oweupc);
+	ast_register(TDA_ALRM, ASTR_ASTF_REQUIRED, 0, ast_alrm);
+	ast_register(TDA_PROF, ASTR_ASTF_REQUIRED, 0, ast_prof);
+
 #ifdef SW_WATCHDOG
 	/* Enable hardclock watchdog now, even if a hardware watchdog exists. */
 	watchdog_attach();
@@ -423,30 +459,27 @@ static __noinline void
 hardclock_itimer(struct thread *td, struct pstats *pstats, int cnt, int usermode)
 {
 	struct proc *p;
-	int flags;
+	int ast;
 
-	flags = 0;
+	ast = 0;
 	p = td->td_proc;
 	if (usermode &&
 	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
 		PROC_ITIMLOCK(p);
 		if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
 		    tick * cnt) == 0)
-			flags |= TDF_ALRMPEND | TDF_ASTPENDING;
+			ast |= TDAI(TDA_ALRM);
 		PROC_ITIMUNLOCK(p);
 	}
 	if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
 		PROC_ITIMLOCK(p);
 		if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
 		    tick * cnt) == 0)
-			flags |= TDF_PROFPEND | TDF_ASTPENDING;
+			ast |= TDAI(TDA_PROF);
 		PROC_ITIMUNLOCK(p);
 	}
-	if (flags != 0) {
-		thread_lock(td);
-		td->td_flags |= flags;
-		thread_unlock(td);
-	}
+	if (ast != 0)
+		ast_sched_mask(td, ast);
 }
 
 void
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index 4b4522704ad0..5b24741028a9 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -1782,8 +1782,8 @@ kqueue_release(struct kqueue *kq, int locked)
 		KQ_UNLOCK(kq);
 }
 
-void
-kqueue_drain_schedtask(void)
+static void
+ast_kqueue(struct thread *td, int tda __unused)
 {
 	taskqueue_quiesce(taskqueue_kqueue_ctx);
 }
@@ -1791,8 +1791,6 @@ kqueue_drain_schedtask(void)
 static void
 kqueue_schedtask(struct kqueue *kq)
 {
-	struct thread *td;
-
 	KQ_OWNED(kq);
 	KASSERT(((kq->kq_state & KQ_TASKDRAIN) != KQ_TASKDRAIN),
 	    ("scheduling kqueue task while draining"));
@@ -1800,10 +1798,7 @@ kqueue_schedtask(struct kqueue *kq)
 	if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) {
 		taskqueue_enqueue(taskqueue_kqueue_ctx, &kq->kq_task);
 		kq->kq_state |= KQ_TASKSCHED;
-		td = curthread;
-		thread_lock(td);
-		td->td_flags |= TDF_ASTPENDING | TDF_KQTICKLED;
-		thread_unlock(td);
+		ast_sched(curthread, TDA_KQUEUE);
 	}
 }
 
@@ -2813,6 +2808,7 @@ knote_init(void)
 
 	knote_zone = uma_zcreate("KNOTE", sizeof(struct knote), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
+	ast_register(TDA_KQUEUE, ASTR_ASTF_REQUIRED, 0, ast_kqueue);
 }
 SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL);
 
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 0d549d8ecea8..c617bc73716d 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -252,9 +252,11 @@ exit1(struct thread *td, int rval, int signo)
 	}
 
 	/*
-	 * Deref SU mp, since the thread does not return to userspace.
+	 * Process deferred operations, designated with ASTF_KCLEAR.
+	 * For instance, we need to deref SU mp, since the thread does
+	 * not return to userspace, and wait for geom to stabilize.
 	 */
-	td_softdep_cleanup(td);
+	ast_kclear(td);
 
 	/*
 	 * MUST abort all other threads before proceeding past here.
@@ -405,13 +407,6 @@ exit1(struct thread *td, int rval, int signo)
 	pdescfree(td);
 	fdescfree(td);
 
-	/*
-	 * If this thread tickled GEOM, we need to wait for the giggling to
-	 * stop before we return to userland
-	 */
-	if (td->td_pflags & TDP_GEOM)
-		g_waitidle();
-
 	/*
 	 * Remove ourself from our leader's peer list and wake our leader.
 	 */
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 5c33d2b32101..60bf8a7093c9 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -499,7 +499,7 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
 	 * to avoid calling thread_lock() again.
 	 */
 	if ((fr->fr_flags & RFPPWAIT) != 0)
-		td->td_flags |= TDF_ASTPENDING;
+		ast_sched_locked(td, TDA_VFORK);
 	thread_unlock(td);
 
 	/*
@@ -814,8 +814,8 @@ do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *
 	}
 }
 
-void
-fork_rfppwait(struct thread *td)
+static void
+ast_vfork(struct thread *td, int tda __unused)
 {
 	struct proc *p, *p2;
 
@@ -1181,3 +1181,11 @@ fork_return(struct thread *td, struct trapframe *frame)
 		ktrsysret(SYS_fork, 0, 0);
 #endif
 }
+
+static void
+fork_init(void *arg __unused)
+{
+	ast_register(TDA_VFORK, ASTR_ASTF_REQUIRED | ASTR_TDP, TDP_RFPPWAIT,
+	    ast_vfork);
+}
+SYSINIT(fork, SI_SUB_INTRINSIC, SI_ORDER_ANY, fork_init, NULL);
diff --git a/sys/kern/kern_ktrace.c b/sys/kern/kern_ktrace.c
index 1e029bace1ee..49e9dff4e643 100644
--- a/sys/kern/kern_ktrace.c
+++ b/sys/kern/kern_ktrace.c
@@ -209,6 +209,12 @@ ktrace_assert(struct thread *td)
 	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set"));
 }
 
+static void
+ast_ktrace(struct thread *td, int tda __unused)
+{
+	KTRUSERRET(td);
+}
+
 static void
 ktrace_init(void *dummy)
 {
@@ -223,6 +229,8 @@ ktrace_init(void *dummy)
 		    M_ZERO);
 		STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
 	}
+	ast_register(TDA_KTRACE, ASTR_UNCOND, 0, ast_ktrace);
+
 }
 SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
 
@@ -370,9 +378,7 @@ ktr_enqueuerequest(struct thread *td, struct ktr_request *req)
 	mtx_lock(&ktrace_mtx);
 	STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list);
 	mtx_unlock(&ktrace_mtx);
-	thread_lock(td);
-	td->td_flags |= TDF_ASTPENDING;
-	thread_unlock(td);
+	ast_sched(td, TDA_KTRACE);
 }
 
 /*
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index 20e0c1fe3b45..3983e536e70e 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -246,8 +246,7 @@ proc_dtor(void *mem, int size, void *arg)
 #endif
 		/* Free all OSD associated to this thread. */
 		osd_thread_exit(td);
-		td_softdep_cleanup(td);
-		MPASS(td->td_su == NULL);
+		ast_kclear(td);
 
 		/* Make sure all thread destructors are executed */
 		EVENTHANDLER_DIRECT_INVOKE(thread_dtor, td);
diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c
index 6087123c31ce..9fbb2b4cb57f 100644
--- a/sys/kern/kern_racct.c
+++ b/sys/kern/kern_racct.c
@@ -1098,12 +1098,17 @@ racct_move(struct racct *dest, struct racct *src)
 	RACCT_UNLOCK();
 }
 
-void
-racct_proc_throttled(struct proc *p)
+static void
+ast_racct(struct thread *td, int tda __unused)
 {
+	struct proc *p;
 
 	ASSERT_RACCT_ENABLED();
 
+	p = td->td_proc;
+	if (p->p_throttled == 0)
+		return;
+
 	PROC_LOCK(p);
 	while (p->p_throttled != 0) {
 		msleep(p->p_racct, &p->p_mtx, 0, "racct",
@@ -1144,24 +1149,24 @@ racct_proc_throttle(struct proc *p, int timeout)
 
 	FOREACH_THREAD_IN_PROC(p, td) {
 		thread_lock(td);
-		td->td_flags |= TDF_ASTPENDING;
+		ast_sched_locked(td, TDA_RACCT);
 
 		switch (TD_GET_STATE(td)) {
 		case TDS_RUNQ:
 			/*
 			 * If the thread is on the scheduler run-queue, we can
 			 * not just remove it from there.  So we set the flag
-			 * TDF_NEEDRESCHED for the thread, so that once it is
+			 * TDA_SCHED for the thread, so that once it is
 			 * running, it is taken off the cpu as soon as possible.
 			 */
-			td->td_flags |= TDF_NEEDRESCHED;
+			ast_sched_locked(td, TDA_SCHED);
 			break;
 		case TDS_RUNNING:
 			/*
 			 * If the thread is running, we request a context
-			 * switch for it by setting the TDF_NEEDRESCHED flag.
+			 * switch for it by setting the TDA_SCHED flag.
 			 */
-			td->td_flags |= TDF_NEEDRESCHED;
+			ast_sched_locked(td, TDA_SCHED);
 #ifdef SMP
 			cpuid = td->td_oncpu;
 			if ((cpuid != NOCPU) && (td != curthread))
@@ -1355,6 +1360,8 @@ racct_init(void)
 
 	racct_zone = uma_zcreate("racct", sizeof(struct racct),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+	ast_register(TDA_RACCT, ASTR_ASTF_REQUIRED, 0, ast_racct);
+
 	/*
 	 * XXX: Move this somewhere.
 	 */
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 4512212a0847..e28b7f61800c 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -274,6 +274,79 @@ static int sigproptbl[NSIG] = {
 
 sigset_t fastblock_mask;
 
+static void
+ast_sig(struct thread *td, int tda)
+{
+	struct proc *p;
+	int sig;
+	bool resched_sigs;
+
+	p = td->td_proc;
+
+#ifdef DIAGNOSTIC
+	if (p->p_numthreads == 1 && (tda & (TDAI(TDA_SIG) |
+	    TDAI(TDA_AST))) == 0) {
+		PROC_LOCK(p);
+		thread_lock(td);
+		/*
+		 * Note that TDA_SIG should be re-read from
+		 * td_ast, since signal might have been delivered
+		 * after we cleared td_flags above.  This is one of
+		 * the reason for looping check for AST condition.
+		 * See comment in userret() about P_PPWAIT.
+		 */
+		if ((p->p_flag & P_PPWAIT) == 0 &&
+		    (td->td_pflags & TDP_SIGFASTBLOCK) == 0) {
+			if (SIGPENDING(td) && ((tda | td->td_ast) &
+			    (TDAI(TDA_SIG) | TDAI(TDA_AST))) == 0) {
+				thread_unlock(td); /* fix dumps */
+				panic(
+				    "failed2 to set signal flags for ast p %p "
+				    "td %p tda %#x td_ast %#x fl %#x",
+				    p, td, tda, td->td_ast, td->td_flags);
+			}
+		}
+		thread_unlock(td);
+		PROC_UNLOCK(p);
+	}
+#endif
+
+	/*
+	 * Check for signals. Unlocked reads of p_pendingcnt or
+	 * p_siglist might cause process-directed signal to be handled
+	 * later.
+	 */
+	if ((tda & TDA_SIG) != 0 || p->p_pendingcnt > 0 ||
+	    !SIGISEMPTY(p->p_siglist)) {
+		sigfastblock_fetch(td);
+		PROC_LOCK(p);
+		mtx_lock(&p->p_sigacts->ps_mtx);
+		while ((sig = cursig(td)) != 0) {
+			KASSERT(sig >= 0, ("sig %d", sig));
+			postsig(sig);
+		}
+		mtx_unlock(&p->p_sigacts->ps_mtx);
+		PROC_UNLOCK(p);
+		resched_sigs = true;
+	} else {
+		resched_sigs = false;
+	}
+
+	/*
+	 * Handle deferred update of the fast sigblock value, after
+	 * the postsig() loop was performed.
+	 */
+	sigfastblock_setpend(td, resched_sigs);
+}
+
+static void
+ast_sigsuspend(struct thread *td, int tda __unused)
+{
+	MPASS((td->td_pflags & TDP_OLDMASK) != 0);
+	td->td_pflags &= ~TDP_OLDMASK;
+	kern_sigprocmask(td, SIG_SETMASK, &td->td_oldsigmask, NULL, 0);
+}
+
 static void
 sigqueue_start(void)
 {
@@ -285,6 +358,9 @@ sigqueue_start(void)
 	p31b_setcfg(CTL_P1003_1B_SIGQUEUE_MAX, max_pending_per_proc);
 	SIGFILLSET(fastblock_mask);
 	SIG_CANTMASK(fastblock_mask);
+	ast_register(TDA_SIG, ASTR_UNCOND, 0, ast_sig);
+	ast_register(TDA_SIGSUSPEND, ASTR_ASTF_REQUIRED | ASTR_TDP,
+	    TDP_OLDMASK, ast_sigsuspend);
 }
 
 ksiginfo_t *
@@ -644,11 +720,8 @@ signotify(struct thread *td)
 
 	PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
 
-	if (SIGPENDING(td)) {
-		thread_lock(td);
-		td->td_flags |= TDF_NEEDSIGCHK | TDF_ASTPENDING;
-		thread_unlock(td);
-	}
+	if (SIGPENDING(td))
+		ast_sched(td, TDA_SIG);
 }
 
 /*
@@ -1544,6 +1617,7 @@ kern_sigsuspend(struct thread *td, sigset_t mask)
 	kern_sigprocmask(td, SIG_SETMASK, &mask, &td->td_oldsigmask,
 	    SIGPROCMASK_PROC_LOCKED);
 	td->td_pflags |= TDP_OLDMASK;
+	ast_sched(td, TDA_SIGSUSPEND);
 
 	/*
 	 * Process signals now. Otherwise, we can get spurious wakeup
@@ -2587,7 +2661,7 @@ sig_suspend_threads(struct thread *td, struct proc *p, int sending)
 	wakeup_swapper = 0;
 	FOREACH_THREAD_IN_PROC(p, td2) {
 		thread_lock(td2);
-		td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
+		ast_sched_locked(td2, TDA_SUSPEND);
 		if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) &&
 		    (td2->td_flags & TDF_SINTR)) {
 			if (td2->td_flags & TDF_SBDRY) {
@@ -2608,7 +2682,7 @@ sig_suspend_threads(struct thread *td, struct proc *p, int sending)
 				thread_suspend_one(td2);
 		} else if (!TD_IS_SUSPENDED(td2)) {
 			if (sending || td != td2)
-				td2->td_flags |= TDF_ASTPENDING;
+				ast_sched_locked(td2, TDA_AST);
 #ifdef SMP
 			if (TD_IS_RUNNING(td2) && td2 != td)
 				forward_signal(td2);
@@ -3268,7 +3342,7 @@ sig_ast_checksusp(struct thread *td)
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
-	if ((td->td_flags & TDF_NEEDSUSPCHK) == 0)
+	if (!td_ast_pending(td, TDA_SUSPEND))
 		return (0);
 
 	ret = thread_suspend_check(1);
@@ -3286,7 +3360,7 @@ sig_ast_needsigchk(struct thread *td)
 	p = td->td_proc;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
-	if ((td->td_flags & TDF_NEEDSIGCHK) == 0)
+	if (!td_ast_pending(td, TDA_SIG))
 		return (0);
 
 	ps = p->p_sigacts;
@@ -3332,7 +3406,7 @@ sig_intr(void)
 	int ret;
 
 	td = curthread;
-	if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0)
+	if (!td_ast_pending(td, TDA_SIG) && !td_ast_pending(td, TDA_SUSPEND))
 		return (0);
 
 	p = td->td_proc;
@@ -3354,7 +3428,7 @@ curproc_sigkilled(void)
 	bool res;
 
 	td = curthread;
-	if ((td->td_flags & TDF_NEEDSIGCHK) == 0)
+	if (!td_ast_pending(td, TDA_SIG))
 		return (false);
 
 	p = td->td_proc;
@@ -4224,9 +4298,7 @@ sigfastblock_resched(struct thread *td, bool resched)
 		reschedule_signals(p, td->td_sigmask, 0);
 		PROC_UNLOCK(p);
 	}
-	thread_lock(td);
-	td->td_flags |= TDF_ASTPENDING | TDF_NEEDSIGCHK;
-	thread_unlock(td);
+	ast_sched(td, TDA_SIG);
 }
 
 int
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 381d6315044c..261675da6ce9 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -632,11 +632,27 @@ loadav(void *arg)
 	    loadav, NULL, C_DIRECT_EXEC | C_PREL(32));
 }
 
-/* ARGSUSED */
 static void
-synch_setup(void *dummy)
+ast_scheduler(struct thread *td, int tda __unused)
+{
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(1, 1, __func__);
+#endif
+	thread_lock(td);
+	sched_prio(td, td->td_user_pri);
+	mi_switch(SW_INVOL | SWT_NEEDRESCHED);
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(0, 1, __func__);
+#endif
+}
+
+static void
+synch_setup(void *dummy __unused)
 {
 	callout_init(&loadav_callout, 1);
+	ast_register(TDA_SCHED, ASTR_ASTF_REQUIRED, 0, ast_scheduler);
 
 	/* Kick off timeout driven events by calling first time. */
 	loadav(NULL);
diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index 18722cc6a73d..2f44c5304471 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@@ -257,7 +257,7 @@ thread_create(struct thread *td, struct rtprio *rtp,
 	sched_fork_thread(td, newtd);
 	thread_unlock(td);
 	if (P_SHOULDSTOP(p))
-		newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
+		ast_sched(newtd, TDA_SUSPEND);
 	if (p->p_ptevents & PTRACE_LWP)
 		newtd->td_dbgflags |= TDB_BORN;
 
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
*** 1041 LINES SKIPPED ***