git: 5a66ec2748f4 - stable/13 - fork: Allow ABI to specify fork return values for child.

From: Dmitry Chagin <dchagin_at_FreeBSD.org>
Date: Fri, 17 Jun 2022 19:37:57 UTC
The branch stable/13 has been updated by dchagin:

URL: https://cgit.FreeBSD.org/src/commit/?id=5a66ec2748f4e81a9c07fe3eaffb8b83578e12f3

commit 5a66ec2748f4e81a9c07fe3eaffb8b83578e12f3
Author:     Dmitry Chagin <dchagin@FreeBSD.org>
AuthorDate: 2022-06-17 19:33:28 +0000
Commit:     Dmitry Chagin <dchagin@FreeBSD.org>
CommitDate: 2022-06-17 19:33:28 +0000

    fork: Allow ABI to specify fork return values for child.
    
    At least Linux x86 ABI's does not use carry bit and expects that the dx register
    is preserved. For this add a new sv_set_fork_retval hook and call it from cpu_fork().
    
    Add a short comment about touching dx in x86_set_fork_retval(), for more details
    see phab comments from kib@ and imp@.
    
    Reviewed by:            kib
    Differential revision:  https://reviews.freebsd.org/D31472
    MFC after:              2 weeks
    
    (cherry picked from commit de8374df28cc23ce0d893fd96d1ad0a861414154)
---
 sys/amd64/amd64/elf_machdep.c            |  2 ++
 sys/amd64/amd64/vm_machdep.c             | 15 ++++++++++++---
 sys/amd64/cloudabi32/cloudabi32_sysvec.c |  2 ++
 sys/amd64/cloudabi64/cloudabi64_sysvec.c |  2 ++
 sys/amd64/linux/linux_sysvec.c           | 10 ++++++++++
 sys/amd64/linux32/linux32_sysvec.c       | 10 ++++++++++
 sys/compat/ia32/ia32_sysvec.c            |  1 +
 sys/i386/cloudabi32/cloudabi32_sysvec.c  |  2 ++
 sys/i386/i386/elf_machdep.c              |  1 +
 sys/i386/i386/vm_machdep.c               | 15 ++++++++++++---
 sys/i386/linux/linux_sysvec.c            | 11 +++++++++++
 sys/kern/imgact_aout.c                   |  2 ++
 sys/kern/init_main.c                     |  7 +++++++
 sys/sys/sysent.h                         |  2 ++
 sys/x86/include/x86_var.h                |  1 +
 15 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/sys/amd64/amd64/elf_machdep.c b/sys/amd64/amd64/elf_machdep.c
index 6b39fd03f471..f31ec0cca519 100644
--- a/sys/amd64/amd64/elf_machdep.c
+++ b/sys/amd64/amd64/elf_machdep.c
@@ -97,6 +97,7 @@ struct sysentvec elf64_freebsd_sysvec_la48 = {
 	.sv_onexit	= exit_onexit,
 	.sv_regset_begin = SET_BEGIN(__elfN(regset)),
 	.sv_regset_end  = SET_LIMIT(__elfN(regset)),
+	.sv_set_fork_retval = x86_set_fork_retval,
 };
 
 struct sysentvec elf64_freebsd_sysvec_la57 = {
@@ -140,6 +141,7 @@ struct sysentvec elf64_freebsd_sysvec_la57 = {
 	.sv_onexit	= exit_onexit,
 	.sv_regset_begin = SET_BEGIN(__elfN(regset)),
 	.sv_regset_end  = SET_LIMIT(__elfN(regset)),
+	.sv_set_fork_retval=  x86_set_fork_retval,
 };
 
 static void
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index ef4856cbdfd7..e9804c6afb94 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -243,9 +243,8 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 	td2->td_frame = (struct trapframe *)td2->td_md.md_stack_base - 1;
 	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
 
-	td2->td_frame->tf_rax = 0;		/* Child returns zero */
-	td2->td_frame->tf_rflags &= ~PSL_C;	/* success */
-	td2->td_frame->tf_rdx = 1;
+	/* Set child return values. */
+	p2->p_sysent->sv_set_fork_retval(td2);
 
 	/*
 	 * If the parent process has the trap bit set (i.e. a debugger
@@ -298,6 +297,16 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 	 */
 }
 
+void
+x86_set_fork_retval(struct thread *td)
+{
+	struct trapframe *frame = td->td_frame;
+
+	frame->tf_rax = 0;		/* Child returns zero */
+	frame->tf_rflags &= ~PSL_C;	/* success */
+	frame->tf_rdx = 1;		/* System V emulation */
+}
+
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
diff --git a/sys/amd64/cloudabi32/cloudabi32_sysvec.c b/sys/amd64/cloudabi32/cloudabi32_sysvec.c
index 164f87e90e91..a0b3095c9b7d 100644
--- a/sys/amd64/cloudabi32/cloudabi32_sysvec.c
+++ b/sys/amd64/cloudabi32/cloudabi32_sysvec.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/pmap.h>
 
 #include <machine/frame.h>
+#include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/vmparam.h>
 
@@ -224,6 +225,7 @@ static struct sysentvec cloudabi32_elf_sysvec = {
 	.sv_fetch_syscall_args	= cloudabi32_fetch_syscall_args,
 	.sv_syscallnames	= cloudabi32_syscallnames,
 	.sv_schedtail		= cloudabi32_schedtail,
+	.sv_set_fork_retval	= x86_set_fork_retval,
 };
 
 INIT_SYSENTVEC(elf_sysvec, &cloudabi32_elf_sysvec);
diff --git a/sys/amd64/cloudabi64/cloudabi64_sysvec.c b/sys/amd64/cloudabi64/cloudabi64_sysvec.c
index d3893902b08e..743b216aa05c 100644
--- a/sys/amd64/cloudabi64/cloudabi64_sysvec.c
+++ b/sys/amd64/cloudabi64/cloudabi64_sysvec.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/pmap.h>
 
 #include <machine/frame.h>
+#include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/vmparam.h>
 
@@ -211,6 +212,7 @@ static struct sysentvec cloudabi64_elf_sysvec = {
 	.sv_fetch_syscall_args	= cloudabi64_fetch_syscall_args,
 	.sv_syscallnames	= cloudabi64_syscallnames,
 	.sv_schedtail		= cloudabi64_schedtail,
+	.sv_set_fork_retval	= x86_set_fork_retval,
 };
 
 INIT_SYSENTVEC(elf_sysvec, &cloudabi64_elf_sysvec);
diff --git a/sys/amd64/linux/linux_sysvec.c b/sys/amd64/linux/linux_sysvec.c
index 08b9a5f1650f..f9ab19a40ef2 100644
--- a/sys/amd64/linux/linux_sysvec.c
+++ b/sys/amd64/linux/linux_sysvec.c
@@ -125,6 +125,7 @@ static void	linux_exec_setregs(struct thread *td, struct image_params *imgp,
 static void	linux_exec_sysvec_init(void *param);
 static int	linux_on_exec_vmspace(struct proc *p,
 		    struct image_params *imgp);
+static void	linux_set_fork_retval(struct thread *td);
 static int	linux_vsyscall(struct thread *td);
 
 #define LINUX_T_UNKNOWN  255
@@ -268,6 +269,14 @@ linux_set_syscall_retval(struct thread *td, int error)
 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
 }
 
+static void
+linux_set_fork_retval(struct thread *td)
+{
+	struct trapframe *frame = td->td_frame;
+
+	frame->tf_rax = 0;
+}
+
 static int
 linux_copyout_auxargs(struct image_params *imgp, uintptr_t base)
 {
@@ -784,6 +793,7 @@ struct sysentvec elf_linux_sysvec = {
 	.sv_onexit	= linux_on_exit,
 	.sv_ontdexit	= linux_thread_dtor,
 	.sv_setid_allowed = &linux_setid_allowed_query,
+	.sv_set_fork_retval = linux_set_fork_retval,
 };
 
 static int
diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c
index 77d6c612c359..42b40ee3094a 100644
--- a/sys/amd64/linux32/linux32_sysvec.c
+++ b/sys/amd64/linux32/linux32_sysvec.c
@@ -128,6 +128,7 @@ static bool	linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static void	linux_vdso_install(const void *param);
 static void	linux_vdso_deinstall(const void *param);
 static void	linux_vdso_reloc(char *mapping, Elf_Addr offset);
+static void	linux32_set_fork_retval(struct thread *td);
 static void	linux32_set_syscall_retval(struct thread *td, int error);
 
 #define LINUX_T_UNKNOWN  255
@@ -698,6 +699,14 @@ linux32_set_syscall_retval(struct thread *td, int error)
 	}
 }
 
+static void
+linux32_set_fork_retval(struct thread *td)
+{
+	struct trapframe *frame = td->td_frame;
+
+	frame->tf_rax = 0;
+}
+
 /*
  * Clear registers on exec
  * XXX copied from ia32_signal.c.
@@ -947,6 +956,7 @@ struct sysentvec elf_linux_sysvec = {
 	.sv_onexit	= linux_on_exit,
 	.sv_ontdexit	= linux_thread_dtor,
 	.sv_setid_allowed = &linux_setid_allowed_query,
+	.sv_set_fork_retval = linux32_set_fork_retval,
 };
 
 static int
diff --git a/sys/compat/ia32/ia32_sysvec.c b/sys/compat/ia32/ia32_sysvec.c
index 7f3ba2da352b..6619301569b1 100644
--- a/sys/compat/ia32/ia32_sysvec.c
+++ b/sys/compat/ia32/ia32_sysvec.c
@@ -144,6 +144,7 @@ struct sysentvec ia32_freebsd_sysvec = {
 	.sv_onexit	= exit_onexit,
 	.sv_regset_begin = SET_BEGIN(__elfN(regset)),
 	.sv_regset_end  = SET_LIMIT(__elfN(regset)),
+	.sv_set_fork_retval = x86_set_fork_retval,
 };
 INIT_SYSENTVEC(elf_ia32_sysvec, &ia32_freebsd_sysvec);
 
diff --git a/sys/i386/cloudabi32/cloudabi32_sysvec.c b/sys/i386/cloudabi32/cloudabi32_sysvec.c
index 4f12d2b6cbce..f81f334ce75f 100644
--- a/sys/i386/cloudabi32/cloudabi32_sysvec.c
+++ b/sys/i386/cloudabi32/cloudabi32_sysvec.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/pmap.h>
 
 #include <machine/frame.h>
+#include <machine/md_var.h>
 #include <machine/pcb.h>
 #include <machine/vmparam.h>
 
@@ -194,6 +195,7 @@ static struct sysentvec cloudabi32_elf_sysvec = {
 	.sv_fetch_syscall_args	= cloudabi32_fetch_syscall_args,
 	.sv_syscallnames	= cloudabi32_syscallnames,
 	.sv_schedtail		= cloudabi32_schedtail,
+	.sv_set_fork_retval	= x86_set_fork_retval,
 };
 
 INIT_SYSENTVEC(elf_sysvec, &cloudabi32_elf_sysvec);
diff --git a/sys/i386/i386/elf_machdep.c b/sys/i386/i386/elf_machdep.c
index 4e9fe52d4e7e..874b4249024e 100644
--- a/sys/i386/i386/elf_machdep.c
+++ b/sys/i386/i386/elf_machdep.c
@@ -92,6 +92,7 @@ struct sysentvec elf32_freebsd_sysvec = {
 	.sv_onexit	= exit_onexit,
 	.sv_regset_begin = SET_BEGIN(__elfN(regset)),
 	.sv_regset_end  = SET_LIMIT(__elfN(regset)),
+	.sv_set_fork_retval = x86_set_fork_retval,
 };
 INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec);
 
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 5626f9594ecf..ba1bc996bda4 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -258,9 +258,8 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 	    VM86_STACK_SPACE) - 1;
 	bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
 
-	td2->td_frame->tf_eax = 0;		/* Child returns zero */
-	td2->td_frame->tf_eflags &= ~PSL_C;	/* success */
-	td2->td_frame->tf_edx = 1;
+	/* Set child return values. */
+	p2->p_sysent->sv_set_fork_retval(td2);
 
 	/*
 	 * If the parent process has the trap bit set (i.e. a debugger
@@ -302,6 +301,16 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 	 */
 }
 
+void
+x86_set_fork_retval(struct thread *td)
+{
+	struct trapframe * frame = td->td_frame;
+
+	frame->tf_eax = 0;		/* Child returns zero */
+	frame->tf_eflags &= ~PSL_C;	/* success */
+	frame->tf_edx = 1;		/* System V emulation */
+}
+
 /*
  * Intercept the return address from a freshly forked process that has NOT
  * been scheduled yet.
diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c
index c3974e71b8e8..6805a29204c5 100644
--- a/sys/i386/linux/linux_sysvec.c
+++ b/sys/i386/linux/linux_sysvec.c
@@ -108,6 +108,7 @@ static int	linux_on_exec_vmspace(struct proc *p,
 		    struct image_params *imgp);
 static int	linux_copyout_strings(struct image_params *imgp,
 		    uintptr_t *stack_base);
+static void	linux_set_fork_retval(struct thread *td);
 static bool	linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
 static void	linux_vdso_install(const void *param);
 static void	linux_vdso_deinstall(const void *param);
@@ -798,6 +799,14 @@ linux_set_syscall_retval(struct thread *td, int error)
 	}
 }
 
+static void
+linux_set_fork_retval(struct thread *td)
+{
+	struct trapframe *frame = td->td_frame;
+
+	frame->tf_eax = 0;
+}
+
 /*
  * exec_setregs may initialize some registers differently than Linux
  * does, thus potentially confusing Linux binaries. If necessary, we
@@ -852,6 +861,7 @@ struct sysentvec linux_sysvec = {
 	.sv_onexit	= linux_on_exit,
 	.sv_ontdexit	= linux_thread_dtor,
 	.sv_setid_allowed = &linux_setid_allowed_query,
+	.sv_set_fork_retval = linux_set_fork_retval,
 };
 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
 
@@ -895,6 +905,7 @@ struct sysentvec elf_linux_sysvec = {
 	.sv_onexit	= linux_on_exit,
 	.sv_ontdexit	= linux_thread_dtor,
 	.sv_setid_allowed = &linux_setid_allowed_query,
+	.sv_set_fork_retval = linux_set_fork_retval,
 };
 
 static int
diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c
index b7ff48dd8cdc..1818e5665caf 100644
--- a/sys/kern/imgact_aout.c
+++ b/sys/kern/imgact_aout.c
@@ -104,6 +104,7 @@ struct sysentvec aout_sysvec = {
 	.sv_trap	= NULL,
 	.sv_onexec_old = exec_onexec_old,
 	.sv_onexit =  exit_onexit,
+	.sv_set_fork_retval = x86_set_fork_retval,
 };
 
 #elif defined(__amd64__)
@@ -151,6 +152,7 @@ struct sysentvec aout_sysvec = {
 	.sv_syscallnames = freebsd32_syscallnames,
 	.sv_onexec_old	= exec_onexec_old,
 	.sv_onexit	= exit_onexit,
+	.sv_set_fork_retval = x86_set_fork_retval,
 };
 
 static void
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 8226d8ffd434..aaa4f65f0000 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -402,6 +402,12 @@ null_set_syscall_retval(struct thread *td __unused, int error __unused)
 	panic("null_set_syscall_retval");
 }
 
+static void
+null_set_fork_retval(struct thread *td __unused)
+{
+
+}
+
 struct sysentvec null_sysvec = {
 	.sv_size	= 0,
 	.sv_table	= NULL,
@@ -433,6 +439,7 @@ struct sysentvec null_sysvec = {
 	.sv_trap	= NULL,
 	.sv_regset_begin = NULL,
 	.sv_regset_end  = NULL,
+	.sv_set_fork_retval = null_set_fork_retval,
 };
 
 /*
diff --git a/sys/sys/sysent.h b/sys/sys/sysent.h
index 7696879112e5..045fa382ca25 100644
--- a/sys/sys/sysent.h
+++ b/sys/sys/sysent.h
@@ -160,6 +160,8 @@ struct sysentvec {
 			    struct image_params *imgp);
 	struct regset	**sv_regset_begin;
 	struct regset	**sv_regset_end;
+	void		(*sv_set_fork_retval)(struct thread *);
+					/* Only used on x86 */
 };
 
 #define	SV_ILP32	0x000100	/* 32-bit executable. */
diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h
index 85c2eec5d005..e4c3fe797a00 100644
--- a/sys/x86/include/x86_var.h
+++ b/sys/x86/include/x86_var.h
@@ -152,6 +152,7 @@ int	user_dbreg_trap(register_t dr6);
 int	cpu_minidumpsys(struct dumperinfo *, const struct minidumpstate *);
 struct pcb *get_pcb_td(struct thread *td);
 uint64_t rdtsc_ordered(void);
+void	x86_set_fork_retval(struct thread *td);
 
 /*
  * MSR ops for x86_msr_op()