git: 140ceb5d956b - main - ptrace(2): add PT_SC_REMOTE remote syscall request

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Thu, 22 Dec 2022 22:10:55 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=140ceb5d956bb8795a77c23d3fd5ef047b0f3c68

commit 140ceb5d956bb8795a77c23d3fd5ef047b0f3c68
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2022-11-30 08:45:52 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2022-12-22 21:11:35 +0000

    ptrace(2): add PT_SC_REMOTE remote syscall request
    
    Reviewed by:    markj
    Discussed with: jhb
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D37590
---
 sys/compat/freebsd32/freebsd32.h      |   7 ++
 sys/compat/freebsd32/freebsd32_misc.c |  32 ++++++++
 sys/kern/kern_sig.c                   | 142 +++++++++++++++++++++++++++++-----
 sys/kern/sys_process.c                |  74 +++++++++++++++++-
 sys/sys/proc.h                        |   1 +
 sys/sys/ptrace.h                      |  16 ++++
 6 files changed, 250 insertions(+), 22 deletions(-)

diff --git a/sys/compat/freebsd32/freebsd32.h b/sys/compat/freebsd32/freebsd32.h
index 91d95d7852b3..3f37b3d85435 100644
--- a/sys/compat/freebsd32/freebsd32.h
+++ b/sys/compat/freebsd32/freebsd32.h
@@ -503,4 +503,11 @@ struct ptrace_coredump32 {
 	uint32_t	pc_limit1, pc_limit2;
 };
 
+struct ptrace_sc_remote32 {
+	struct ptrace_sc_ret32 pscr_ret;
+	u_int		pscr_syscall;
+	u_int		pscr_nargs;
+	uint32_t	pscr_args;
+};
+
 #endif /* !_COMPAT_FREEBSD32_FREEBSD32_H_ */
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index 7537ff3e9dee..7e96dd9296ee 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -966,6 +966,7 @@ freebsd32_ptrace(struct thread *td, struct freebsd32_ptrace_args *uap)
 		struct ptrace_lwpinfo pl;
 		struct ptrace_vm_entry pve;
 		struct ptrace_coredump pc;
+		struct ptrace_sc_remote sr;
 		struct dbreg32 dbreg;
 		struct fpreg32 fpreg;
 		struct reg32 reg;
@@ -979,10 +980,13 @@ freebsd32_ptrace(struct thread *td, struct freebsd32_ptrace_args *uap)
 		struct ptrace_lwpinfo32 pl;
 		struct ptrace_vm_entry32 pve;
 		struct ptrace_coredump32 pc;
+		struct ptrace_sc_remote32 sr;
 		uint32_t args[nitems(td->td_sa.args)];
 		struct ptrace_sc_ret32 psr;
 		struct iovec32 vec;
 	} r32;
+	syscallarg_t pscr_args[nitems(td->td_sa.args)];
+	u_int pscr_args32[nitems(td->td_sa.args)];
 	void *addr;
 	int data, error, i;
 
@@ -1081,6 +1085,28 @@ freebsd32_ptrace(struct thread *td, struct freebsd32_ptrace_args *uap)
 		r.pc.pc_limit = PAIR32TO64(off_t, r32.pc.pc_limit);
 		data = sizeof(r.pc);
 		break;
+	case PT_SC_REMOTE:
+		if (uap->data != sizeof(r32.sr)) {
+			error = EINVAL;
+			break;
+		}
+		error = copyin(uap->addr, &r32.sr, uap->data);
+		if (error != 0)
+			break;
+		CP(r32.sr, r.sr, pscr_syscall);
+		CP(r32.sr, r.sr, pscr_nargs);
+		if (r.sr.pscr_nargs > nitems(td->td_sa.args)) {
+			error = EINVAL;
+			break;
+		}
+		error = copyin(PTRIN(r32.sr.pscr_args), pscr_args32,
+		    sizeof(u_int) * r32.sr.pscr_nargs);
+		if (error != 0)
+			break;
+		for (i = 0; i < r32.sr.pscr_nargs; i++)
+			pscr_args[i] = pscr_args32[i];
+		r.sr.pscr_args = pscr_args;
+		break;
 	default:
 		addr = uap->addr;
 		break;
@@ -1141,6 +1167,12 @@ freebsd32_ptrace(struct thread *td, struct freebsd32_ptrace_args *uap)
 		error = copyout(&r32.psr, uap->addr, MIN(uap->data,
 		    sizeof(r32.psr)));
 		break;
+	case PT_SC_REMOTE:
+		ptrace_sc_ret_to32(&r.sr.pscr_ret, &r32.sr.pscr_ret);
+		error = copyout(&r32.sr.pscr_ret, uap->addr +
+		    offsetof(struct ptrace_sc_remote32, pscr_ret),
+		    sizeof(r32.psr));
+		break;
 	}
 
 	return (error);
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index eea624019fff..df40cdf404db 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -39,9 +39,11 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include "opt_capsicum.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
+#include <sys/capsicum.h>
 #include <sys/ctype.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
@@ -75,6 +77,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/smp.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
+#include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
@@ -82,6 +85,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysproto.h>
 #include <sys/timers.h>
 #include <sys/unistd.h>
+#include <sys/vmmeter.h>
 #include <sys/wait.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
@@ -2633,37 +2637,128 @@ out:
 }
 
 static void
-ptrace_coredump(struct thread *td)
+ptrace_coredumpreq(struct thread *td, struct proc *p,
+    struct thr_coredump_req *tcq)
 {
-	struct proc *p;
-	struct thr_coredump_req *tcq;
 	void *rl_cookie;
 
-	MPASS(td == curthread);
-	p = td->td_proc;
-	PROC_LOCK_ASSERT(p, MA_OWNED);
-	if ((td->td_dbgflags & TDB_COREDUMPREQ) == 0)
-		return;
-	KASSERT((p->p_flag & P_STOPPED_TRACE) != 0, ("not stopped"));
-
-	tcq = td->td_remotereq;
-	KASSERT(tcq != NULL, ("td_remotereq is NULL"));
-
 	if (p->p_sysent->sv_coredump == NULL) {
 		tcq->tc_error = ENOSYS;
-		goto wake;
+		return;
 	}
 
-	PROC_UNLOCK(p);
 	rl_cookie = vn_rangelock_wlock(tcq->tc_vp, 0, OFF_MAX);
-
 	tcq->tc_error = p->p_sysent->sv_coredump(td, tcq->tc_vp,
 	    tcq->tc_limit, tcq->tc_flags);
-
 	vn_rangelock_unlock(tcq->tc_vp, rl_cookie);
+}
+
+static void
+ptrace_syscallreq(struct thread *td, struct proc *p,
+    struct thr_syscall_req *tsr)
+{
+	struct sysentvec *sv;
+	struct sysent *se;
+	register_t rv_saved[2];
+	int error, nerror;
+	int sc;
+	bool audited, sy_thr_static;
+
+	sv = p->p_sysent;
+	if (sv->sv_table == NULL || sv->sv_size < tsr->ts_sa.code) {
+		tsr->ts_ret.sr_error = ENOSYS;
+		return;
+	}
+
+	sc = tsr->ts_sa.code;
+	if (sc == SYS_syscall || sc == SYS___syscall) {
+		memmove(&tsr->ts_sa.args[0], &tsr->ts_sa.args[1],
+		    sizeof(register_t) * (tsr->ts_nargs - 1));
+	}
+
+	tsr->ts_sa.callp = se = &sv->sv_table[sc];
+
+	VM_CNT_INC(v_syscall);
+	td->td_pticks = 0;
+	if (__predict_false(td->td_cowgen != atomic_load_int(
+	    &td->td_proc->p_cowgen)))
+		thread_cow_update(td);
+
+#ifdef CAPABILITY_MODE
+	if (IN_CAPABILITY_MODE(td) && (se->sy_flags & SYF_CAPENABLED) == 0) {
+		tsr->ts_ret.sr_error = ECAPMODE;
+		return;
+	}
+#endif
+
+	sy_thr_static = (se->sy_thrcnt & SY_THR_STATIC) != 0;
+	audited = AUDIT_SYSCALL_ENTER(tsr->ts_syscall, td) != 0;
+
+	if (!sy_thr_static) {
+		error = syscall_thread_enter(td, se);
+		if (error != 0) {
+			tsr->ts_ret.sr_error = error;
+			return;
+		}
+	}
+
+	rv_saved[0] = td->td_retval[0];
+	rv_saved[1] = td->td_retval[1];
+	nerror = td->td_errno;
+	td->td_retval[0] = 0;
+	td->td_retval[1] = 0;
+
+#ifdef KDTRACE_HOOKS
+	if (se->sy_entry != 0)
+		(*systrace_probe_func)(&tsr->ts_sa, SYSTRACE_ENTRY, 0);
+#endif
+	tsr->ts_ret.sr_error = se->sy_call(td, tsr->ts_sa.args);
+#ifdef KDTRACE_HOOKS
+	if (se->sy_return != 0)
+		(*systrace_probe_func)(&tsr->ts_sa, SYSTRACE_RETURN,
+		    tsr->ts_ret->sr_error != 0 ? -1 : td->td_retval[0]);
+#endif
+
+	tsr->ts_ret.sr_retval[0] = td->td_retval[0];
+	tsr->ts_ret.sr_retval[1] = td->td_retval[1];
+	td->td_retval[0] = rv_saved[0];
+	td->td_retval[1] = rv_saved[1];
+	td->td_errno = nerror;
+
+	if (audited)
+		AUDIT_SYSCALL_EXIT(error, td);
+	if (!sy_thr_static)
+		syscall_thread_exit(td, se);
+}
+
+static void
+ptrace_remotereq(struct thread *td, int flag)
+{
+	struct proc *p;
+
+	MPASS(td == curthread);
+	p = td->td_proc;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	if ((td->td_dbgflags & flag) == 0)
+		return;
+	KASSERT((p->p_flag & P_STOPPED_TRACE) != 0, ("not stopped"));
+	KASSERT(td->td_remotereq != NULL, ("td_remotereq is NULL"));
+
+	PROC_UNLOCK(p);
+	switch (flag) {
+	case TDB_COREDUMPREQ:
+		ptrace_coredumpreq(td, p, td->td_remotereq);
+		break;
+	case TDB_SCREMOTEREQ:
+		ptrace_syscallreq(td, p, td->td_remotereq);
+		break;
+	default:
+		__unreachable();
+	}
 	PROC_LOCK(p);
-wake:
-	td->td_dbgflags &= ~TDB_COREDUMPREQ;
+
+	MPASS((td->td_dbgflags & flag) != 0);
+	td->td_dbgflags &= ~flag;
 	td->td_remotereq = NULL;
 	wakeup(p);
 }
@@ -2795,9 +2890,14 @@ stopme:
 			td->td_dbgflags |= TDB_SSWITCH;
 			thread_suspend_switch(td, p);
 			td->td_dbgflags &= ~TDB_SSWITCH;
-			if ((td->td_dbgflags & TDB_COREDUMPREQ) != 0) {
+			if ((td->td_dbgflags & (TDB_COREDUMPREQ |
+			    TDB_SCREMOTEREQ)) != 0) {
+				MPASS((td->td_dbgflags & (TDB_COREDUMPREQ |
+				    TDB_SCREMOTEREQ)) !=
+				    (TDB_COREDUMPREQ | TDB_SCREMOTEREQ));
 				PROC_SUNLOCK(p);
-				ptrace_coredump(td);
+				ptrace_remotereq(td, td->td_dbgflags &
+				    (TDB_COREDUMPREQ | TDB_SCREMOTEREQ));
 				PROC_SLOCK(p);
 				goto stopme;
 			}
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 15b93cee0f5a..47fdf91b87d5 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -592,6 +592,7 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
 		struct ptrace_lwpinfo pl;
 		struct ptrace_vm_entry pve;
 		struct ptrace_coredump pc;
+		struct ptrace_sc_remote sr;
 		struct dbreg dbreg;
 		struct fpreg fpreg;
 		struct reg reg;
@@ -600,6 +601,7 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
 		struct ptrace_sc_ret psr;
 		int ptevents;
 	} r;
+	syscallarg_t pscr_args[nitems(td->td_sa.args)];
 	void *addr;
 	int error;
 
@@ -657,6 +659,24 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
 		else
 			error = copyin(uap->addr, &r.pc, uap->data);
 		break;
+	case PT_SC_REMOTE:
+		if (uap->data != sizeof(r.sr)) {
+			error = EINVAL;
+			break;
+		}
+		error = copyin(uap->addr, &r.sr, uap->data);
+		if (error != 0)
+			break;
+		if (r.sr.pscr_nargs > nitems(td->td_sa.args)) {
+			error = EINVAL;
+			break;
+		}
+		error = copyin(r.sr.pscr_args, pscr_args,
+		    sizeof(u_long) * r.sr.pscr_nargs);
+		if (error != 0)
+			break;
+		r.sr.pscr_args = pscr_args;
+		break;
 	default:
 		addr = uap->addr;
 		break;
@@ -703,6 +723,11 @@ sys_ptrace(struct thread *td, struct ptrace_args *uap)
 		error = copyout(&r.psr, uap->addr, MIN(uap->data,
 		    sizeof(r.psr)));
 		break;
+	case PT_SC_REMOTE:
+		error = copyout(&r.sr.pscr_ret, uap->addr +
+		    offsetof(struct ptrace_sc_remote, pscr_ret),
+		    sizeof(r.sr.pscr_ret));
+		break;
 	}
 
 	return (error);
@@ -812,9 +837,11 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 	struct ptrace_io_desc *piod = NULL;
 	struct ptrace_lwpinfo *pl;
 	struct ptrace_sc_ret *psr;
+	struct ptrace_sc_remote *pscr;
 	struct file *fp;
 	struct ptrace_coredump *pc;
 	struct thr_coredump_req *tcq;
+	struct thr_syscall_req *tsr;
 	int error, num, tmp;
 	lwpid_t tid = 0, *buf;
 #ifdef COMPAT_FREEBSD32
@@ -1559,7 +1586,8 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 			error = EBUSY;
 			goto coredump_cleanup_locked;
 		}
-		KASSERT((td2->td_dbgflags & TDB_COREDUMPREQ) == 0,
+		KASSERT((td2->td_dbgflags & (TDB_COREDUMPREQ |
+		    TDB_SCREMOTEREQ)) == 0,
 		    ("proc %d tid %d req coredump", p->p_pid, td2->td_tid));
 
 		tcq->tc_vp = fp->f_vnode;
@@ -1584,6 +1612,50 @@ coredump_cleanup_nofp:
 		PROC_LOCK(p);
 		break;
 
+	case PT_SC_REMOTE:
+		pscr = addr;
+		CTR2(KTR_PTRACE, "PT_SC_REMOTE: pid %d, syscall %d",
+		    p->p_pid, pscr->pscr_syscall);
+		if ((td2->td_dbgflags & TDB_BOUNDARY) == 0) {
+			error = EBUSY;
+			break;
+		}
+		PROC_UNLOCK(p);
+		MPASS(pscr->pscr_nargs <= nitems(td->td_sa.args));
+
+		tsr = malloc(sizeof(struct thr_syscall_req), M_TEMP,
+		    M_WAITOK | M_ZERO);
+
+		tsr->ts_sa.code = pscr->pscr_syscall;
+		tsr->ts_nargs = pscr->pscr_nargs;
+		memcpy(&tsr->ts_sa.args, pscr->pscr_args,
+		    sizeof(syscallarg_t) * tsr->ts_nargs);
+
+		PROC_LOCK(p);
+		error = proc_can_ptrace(td, p);
+		if (error != 0) {
+			free(tsr, M_TEMP);
+			break;
+		}
+		if (td2->td_proc != p) {
+			free(tsr, M_TEMP);
+			error = ESRCH;
+			break;
+		}
+		KASSERT((td2->td_dbgflags & (TDB_COREDUMPREQ |
+		    TDB_SCREMOTEREQ)) == 0,
+		    ("proc %d tid %d req coredump", p->p_pid, td2->td_tid));
+
+		td2->td_remotereq = tsr;
+		td2->td_dbgflags |= TDB_SCREMOTEREQ;
+		thread_run_flash(td2);
+		while ((td2->td_dbgflags & TDB_SCREMOTEREQ) != 0)
+			msleep(p, &p->p_mtx, PPAUSE, "pscrx", 0);
+		error = 0;
+		memcpy(&pscr->pscr_ret, &tsr->ts_ret, sizeof(tsr->ts_ret));
+		free(tsr, M_TEMP);
+		break;
+
 	default:
 #ifdef __HAVE_PTRACE_MACHDEP
 		if (req >= PT_FIRSTMACH) {
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index f17207d741c1..2da5d8edee6d 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -524,6 +524,7 @@ enum {
 #define	TDB_SSWITCH	0x00004000 /* Suspended in ptracestop */
 #define	TDB_BOUNDARY	0x00008000 /* ptracestop() at boundary */
 #define	TDB_COREDUMPREQ	0x00010000 /* Coredump request */
+#define	TDB_SCREMOTEREQ	0x00020000 /* Remote syscall request */
 
 /*
  * "Private" flags kept in td_pflags:
diff --git a/sys/sys/ptrace.h b/sys/sys/ptrace.h
index 80797f290a41..94d09eac95b1 100644
--- a/sys/sys/ptrace.h
+++ b/sys/sys/ptrace.h
@@ -87,6 +87,7 @@
 #define	PT_VM_ENTRY	41	/* Get VM map (entry) */
 #define	PT_GETREGSET	42	/* Get a target register set */
 #define	PT_SETREGSET	43	/* Set a target register set */
+#define	PT_SC_REMOTE	44	/* Execute a syscall */
 
 #define PT_FIRSTMACH    64	/* for machine-specific requests */
 #include <machine/ptrace.h>	/* machine-specific requests, if any */
@@ -192,8 +193,17 @@ struct ptrace_coredump {
 #define	PC_COMPRESS	0x00000001	/* Allow compression */
 #define	PC_ALL		0x00000002	/* Include non-dumpable entries */
 
+struct ptrace_sc_remote {
+	struct ptrace_sc_ret pscr_ret;
+	u_int	pscr_syscall;
+	u_int	pscr_nargs;
+	syscallarg_t	*pscr_args;
+};
+
 #ifdef _KERNEL
 
+#include <sys/proc.h>
+
 struct thr_coredump_req {
 	struct vnode	*tc_vp;		/* vnode to write coredump to. */
 	off_t		tc_limit;	/* max coredump file size. */
@@ -201,6 +211,12 @@ struct thr_coredump_req {
 	int		tc_error;	/* request result */
 };
 
+struct thr_syscall_req {
+	struct ptrace_sc_ret ts_ret;
+	u_int	ts_nargs;
+	struct syscall_args ts_sa;
+};
+
 int	ptrace_set_pc(struct thread *_td, unsigned long _addr);
 int	ptrace_single_step(struct thread *_td);
 int	ptrace_clear_single_step(struct thread *_td);