git: 28b647373119 - stable/13 - linux(4): Move uselib() to i386

From: Dmitry Chagin <dchagin_at_FreeBSD.org>
Date: Wed, 01 Mar 2023 08:43:07 UTC
The branch stable/13 has been updated by dchagin:

URL: https://cgit.FreeBSD.org/src/commit/?id=28b6473731192395d1da81789f1ffa7c786d2b09

commit 28b6473731192395d1da81789f1ffa7c786d2b09
Author:     Dmitry Chagin <dchagin@FreeBSD.org>
AuthorDate: 2023-02-14 14:46:31 +0000
Commit:     Dmitry Chagin <dchagin@FreeBSD.org>
CommitDate: 2023-03-01 08:42:32 +0000

    linux(4): Move uselib() to i386
    
    This obsolete system call is not supported by glibc. In ancient libc
    versions (before glibc 2.0), uselib() was used to load the shared
    libraries with names found in an array of names in the binary.
    On Linux, since 3.15, this system call is available only when
    the kernel is configured with the CONFIG_USELIB option.
    
    It doesn't look like anyone needs this syscall for others Linuxulators,
    so move it to the corresponding MD Linuxulator.
    
    MFC after:              2 weeks
    
    (cherry picked from commit 50c85a32d9c667779494850b6238b8d7ec13da75)
---
 sys/compat/linux/linux_misc.c  | 264 -----------------------------------------
 sys/i386/linux/linux_machdep.c | 260 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 260 insertions(+), 264 deletions(-)

diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index 823b79cc373d..4dba124a918f 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -35,9 +35,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/blist.h>
 #include <sys/fcntl.h>
-#if defined(__i386__)
-#include <sys/imgact_aout.h>
-#endif
 #include <sys/jail.h>
 #include <sys/imgact.h>
 #include <sys/kernel.h>
@@ -48,13 +45,11 @@ __FBSDID("$FreeBSD$");
 #include <sys/mount.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
-#include <sys/namei.h>
 #include <sys/poll.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/procctl.h>
 #include <sys/reboot.h>
-#include <sys/racct.h>
 #include <sys/random.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
@@ -77,12 +72,8 @@ __FBSDID("$FreeBSD$");
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
-#include <vm/vm.h>
 #include <vm/pmap.h>
-#include <vm/vm_kern.h>
 #include <vm/vm_map.h>
-#include <vm/vm_param.h>
-#include <vm/vm_extern.h>
 #include <vm/swap_pager.h>
 
 #ifdef COMPAT_LINUX32
@@ -251,261 +242,6 @@ linux_brk(struct thread *td, struct linux_brk_args *args)
 	return (0);
 }
 
-#if defined(__i386__)
-/* XXX: what about amd64/linux32? */
-
-int
-linux_uselib(struct thread *td, struct linux_uselib_args *args)
-{
-	struct nameidata ni;
-	struct vnode *vp;
-	struct exec *a_out;
-	vm_map_t map;
-	vm_map_entry_t entry;
-	struct vattr attr;
-	vm_offset_t vmaddr;
-	unsigned long file_offset;
-	unsigned long bss_size;
-	char *library;
-	ssize_t aresid;
-	int error;
-	bool locked, opened, textset;
-
-	a_out = NULL;
-	vp = NULL;
-	locked = false;
-	textset = false;
-	opened = false;
-
-	if (!LUSECONVPATH(td)) {
-		NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
-		    UIO_USERSPACE, args->library, td);
-		error = namei(&ni);
-	} else {
-		LCONVPATHEXIST(args->library, &library);
-		NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
-		    UIO_SYSSPACE, library, td);
-		error = namei(&ni);
-		LFREEPATH(library);
-	}
-	if (error)
-		goto cleanup;
-
-	vp = ni.ni_vp;
-	NDFREE(&ni, NDF_ONLY_PNBUF);
-
-	/*
-	 * From here on down, we have a locked vnode that must be unlocked.
-	 * XXX: The code below largely duplicates exec_check_permissions().
-	 */
-	locked = true;
-
-	/* Executable? */
-	error = VOP_GETATTR(vp, &attr, td->td_ucred);
-	if (error)
-		goto cleanup;
-
-	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
-	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
-		/* EACCESS is what exec(2) returns. */
-		error = ENOEXEC;
-		goto cleanup;
-	}
-
-	/* Sensible size? */
-	if (attr.va_size == 0) {
-		error = ENOEXEC;
-		goto cleanup;
-	}
-
-	/* Can we access it? */
-	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
-	if (error)
-		goto cleanup;
-
-	/*
-	 * XXX: This should use vn_open() so that it is properly authorized,
-	 * and to reduce code redundancy all over the place here.
-	 * XXX: Not really, it duplicates far more of exec_check_permissions()
-	 * than vn_open().
-	 */
-#ifdef MAC
-	error = mac_vnode_check_open(td->td_ucred, vp, VREAD);
-	if (error)
-		goto cleanup;
-#endif
-	error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
-	if (error)
-		goto cleanup;
-	opened = true;
-
-	/* Pull in executable header into exec_map */
-	error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE,
-	    VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0);
-	if (error)
-		goto cleanup;
-
-	/* Is it a Linux binary ? */
-	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
-		error = ENOEXEC;
-		goto cleanup;
-	}
-
-	/*
-	 * While we are here, we should REALLY do some more checks
-	 */
-
-	/* Set file/virtual offset based on a.out variant. */
-	switch ((int)(a_out->a_magic & 0xffff)) {
-	case 0413:			/* ZMAGIC */
-		file_offset = 1024;
-		break;
-	case 0314:			/* QMAGIC */
-		file_offset = 0;
-		break;
-	default:
-		error = ENOEXEC;
-		goto cleanup;
-	}
-
-	bss_size = round_page(a_out->a_bss);
-
-	/* Check various fields in header for validity/bounds. */
-	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
-		error = ENOEXEC;
-		goto cleanup;
-	}
-
-	/* text + data can't exceed file size */
-	if (a_out->a_data + a_out->a_text > attr.va_size) {
-		error = EFAULT;
-		goto cleanup;
-	}
-
-	/*
-	 * text/data/bss must not exceed limits
-	 * XXX - this is not complete. it should check current usage PLUS
-	 * the resources needed by this library.
-	 */
-	PROC_LOCK(td->td_proc);
-	if (a_out->a_text > maxtsiz ||
-	    a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) ||
-	    racct_set(td->td_proc, RACCT_DATA, a_out->a_data +
-	    bss_size) != 0) {
-		PROC_UNLOCK(td->td_proc);
-		error = ENOMEM;
-		goto cleanup;
-	}
-	PROC_UNLOCK(td->td_proc);
-
-	/*
-	 * Prevent more writers.
-	 */
-	error = VOP_SET_TEXT(vp);
-	if (error != 0)
-		goto cleanup;
-	textset = true;
-
-	/*
-	 * Lock no longer needed
-	 */
-	locked = false;
-	VOP_UNLOCK(vp);
-
-	/*
-	 * Check if file_offset page aligned. Currently we cannot handle
-	 * misalinged file offsets, and so we read in the entire image
-	 * (what a waste).
-	 */
-	if (file_offset & PAGE_MASK) {
-		/* Map text+data read/write/execute */
-
-		/* a_entry is the load address and is page aligned */
-		vmaddr = trunc_page(a_out->a_entry);
-
-		/* get anon user mapping, read+write+execute */
-		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
-		    &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE,
-		    VM_PROT_ALL, VM_PROT_ALL, 0);
-		if (error)
-			goto cleanup;
-
-		error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset,
-		    a_out->a_text + a_out->a_data, UIO_USERSPACE, 0,
-		    td->td_ucred, NOCRED, &aresid, td);
-		if (error != 0)
-			goto cleanup;
-		if (aresid != 0) {
-			error = ENOEXEC;
-			goto cleanup;
-		}
-	} else {
-		/*
-		 * for QMAGIC, a_entry is 20 bytes beyond the load address
-		 * to skip the executable header
-		 */
-		vmaddr = trunc_page(a_out->a_entry);
-
-		/*
-		 * Map it all into the process's space as a single
-		 * copy-on-write "data" segment.
-		 */
-		map = &td->td_proc->p_vmspace->vm_map;
-		error = vm_mmap(map, &vmaddr,
-		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
-		    MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
-		if (error)
-			goto cleanup;
-		vm_map_lock(map);
-		if (!vm_map_lookup_entry(map, vmaddr, &entry)) {
-			vm_map_unlock(map);
-			error = EDOOFUS;
-			goto cleanup;
-		}
-		entry->eflags |= MAP_ENTRY_VN_EXEC;
-		vm_map_unlock(map);
-		textset = false;
-	}
-
-	if (bss_size != 0) {
-		/* Calculate BSS start address */
-		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
-		    a_out->a_data;
-
-		/* allocate some 'anon' space */
-		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
-		    &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL,
-		    VM_PROT_ALL, 0);
-		if (error)
-			goto cleanup;
-	}
-
-cleanup:
-	if (opened) {
-		if (locked)
-			VOP_UNLOCK(vp);
-		locked = false;
-		VOP_CLOSE(vp, FREAD, td->td_ucred, td);
-	}
-	if (textset) {
-		if (!locked) {
-			locked = true;
-			VOP_LOCK(vp, LK_SHARED | LK_RETRY);
-		}
-		VOP_UNSET_TEXT_CHECKED(vp);
-	}
-	if (locked)
-		VOP_UNLOCK(vp);
-
-	/* Release the temporary mapping. */
-	if (a_out)
-		kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE);
-
-	return (error);
-}
-
-#endif	/* __i386__ */
-
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_select(struct thread *td, struct linux_select_args *args)
diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c
index fb42c3e9df84..45aad4935613 100644
--- a/sys/i386/linux/linux_machdep.c
+++ b/sys/i386/linux/linux_machdep.c
@@ -31,15 +31,18 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
+#include <sys/imgact_aout.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mman.h>
 #include <sys/mutex.h>
+#include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
+#include <sys/racct.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
@@ -49,8 +52,12 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/sx.h>
 #include <sys/unistd.h>
+#include <sys/vnode.h>
 #include <sys/wait.h>
 
+#include <security/audit/audit.h>
+#include <security/mac/mac_framework.h>
+
 #include <machine/frame.h>
 #include <machine/psl.h>
 #include <machine/segments.h>
@@ -58,7 +65,10 @@ __FBSDID("$FreeBSD$");
 
 #include <vm/pmap.h>
 #include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
 #include <vm/vm_map.h>
+#include <vm/vm_param.h>
 
 #include <x86/reg.h>
 
@@ -701,3 +711,253 @@ bsd_to_linux_regset(const struct reg *b_reg,
 	l_regset->esp = b_reg->r_esp;
 	l_regset->ss = b_reg->r_ss;
 }
+
+int
+linux_uselib(struct thread *td, struct linux_uselib_args *args)
+{
+	struct nameidata ni;
+	struct vnode *vp;
+	struct exec *a_out;
+	vm_map_t map;
+	vm_map_entry_t entry;
+	struct vattr attr;
+	vm_offset_t vmaddr;
+	unsigned long file_offset;
+	unsigned long bss_size;
+	char *library;
+	ssize_t aresid;
+	int error;
+	bool locked, opened, textset;
+
+	a_out = NULL;
+	vp = NULL;
+	locked = false;
+	textset = false;
+	opened = false;
+
+	if (!LUSECONVPATH(td)) {
+		NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
+		    UIO_USERSPACE, args->library, td);
+		error = namei(&ni);
+	} else {
+		LCONVPATHEXIST(args->library, &library);
+		NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
+		    UIO_SYSSPACE, library, td);
+		error = namei(&ni);
+		LFREEPATH(library);
+	}
+	if (error)
+		goto cleanup;
+
+	vp = ni.ni_vp;
+	NDFREE(&ni, NDF_ONLY_PNBUF);
+
+	/*
+	 * From here on down, we have a locked vnode that must be unlocked.
+	 * XXX: The code below largely duplicates exec_check_permissions().
+	 */
+	locked = true;
+
+	/* Executable? */
+	error = VOP_GETATTR(vp, &attr, td->td_ucred);
+	if (error)
+		goto cleanup;
+
+	if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
+	    ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) {
+		/* EACCESS is what exec(2) returns. */
+		error = ENOEXEC;
+		goto cleanup;
+	}
+
+	/* Sensible size? */
+	if (attr.va_size == 0) {
+		error = ENOEXEC;
+		goto cleanup;
+	}
+
+	/* Can we access it? */
+	error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td);
+	if (error)
+		goto cleanup;
+
+	/*
+	 * XXX: This should use vn_open() so that it is properly authorized,
+	 * and to reduce code redundancy all over the place here.
+	 * XXX: Not really, it duplicates far more of exec_check_permissions()
+	 * than vn_open().
+	 */
+#ifdef MAC
+	error = mac_vnode_check_open(td->td_ucred, vp, VREAD);
+	if (error)
+		goto cleanup;
+#endif
+	error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL);
+	if (error)
+		goto cleanup;
+	opened = true;
+
+	/* Pull in executable header into exec_map */
+	error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE,
+	    VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0);
+	if (error)
+		goto cleanup;
+
+	/* Is it a Linux binary ? */
+	if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
+		error = ENOEXEC;
+		goto cleanup;
+	}
+
+	/*
+	 * While we are here, we should REALLY do some more checks
+	 */
+
+	/* Set file/virtual offset based on a.out variant. */
+	switch ((int)(a_out->a_magic & 0xffff)) {
+	case 0413:			/* ZMAGIC */
+		file_offset = 1024;
+		break;
+	case 0314:			/* QMAGIC */
+		file_offset = 0;
+		break;
+	default:
+		error = ENOEXEC;
+		goto cleanup;
+	}
+
+	bss_size = round_page(a_out->a_bss);
+
+	/* Check various fields in header for validity/bounds. */
+	if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) {
+		error = ENOEXEC;
+		goto cleanup;
+	}
+
+	/* text + data can't exceed file size */
+	if (a_out->a_data + a_out->a_text > attr.va_size) {
+		error = EFAULT;
+		goto cleanup;
+	}
+
+	/*
+	 * text/data/bss must not exceed limits
+	 * XXX - this is not complete. it should check current usage PLUS
+	 * the resources needed by this library.
+	 */
+	PROC_LOCK(td->td_proc);
+	if (a_out->a_text > maxtsiz ||
+	    a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) ||
+	    racct_set(td->td_proc, RACCT_DATA, a_out->a_data +
+	    bss_size) != 0) {
+		PROC_UNLOCK(td->td_proc);
+		error = ENOMEM;
+		goto cleanup;
+	}
+	PROC_UNLOCK(td->td_proc);
+
+	/*
+	 * Prevent more writers.
+	 */
+	error = VOP_SET_TEXT(vp);
+	if (error != 0)
+		goto cleanup;
+	textset = true;
+
+	/*
+	 * Lock no longer needed
+	 */
+	locked = false;
+	VOP_UNLOCK(vp);
+
+	/*
+	 * Check if file_offset page aligned. Currently we cannot handle
+	 * misalinged file offsets, and so we read in the entire image
+	 * (what a waste).
+	 */
+	if (file_offset & PAGE_MASK) {
+		/* Map text+data read/write/execute */
+
+		/* a_entry is the load address and is page aligned */
+		vmaddr = trunc_page(a_out->a_entry);
+
+		/* get anon user mapping, read+write+execute */
+		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
+		    &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE,
+		    VM_PROT_ALL, VM_PROT_ALL, 0);
+		if (error)
+			goto cleanup;
+
+		error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset,
+		    a_out->a_text + a_out->a_data, UIO_USERSPACE, 0,
+		    td->td_ucred, NOCRED, &aresid, td);
+		if (error != 0)
+			goto cleanup;
+		if (aresid != 0) {
+			error = ENOEXEC;
+			goto cleanup;
+		}
+	} else {
+		/*
+		 * for QMAGIC, a_entry is 20 bytes beyond the load address
+		 * to skip the executable header
+		 */
+		vmaddr = trunc_page(a_out->a_entry);
+
+		/*
+		 * Map it all into the process's space as a single
+		 * copy-on-write "data" segment.
+		 */
+		map = &td->td_proc->p_vmspace->vm_map;
+		error = vm_mmap(map, &vmaddr,
+		    a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL,
+		    MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset);
+		if (error)
+			goto cleanup;
+		vm_map_lock(map);
+		if (!vm_map_lookup_entry(map, vmaddr, &entry)) {
+			vm_map_unlock(map);
+			error = EDOOFUS;
+			goto cleanup;
+		}
+		entry->eflags |= MAP_ENTRY_VN_EXEC;
+		vm_map_unlock(map);
+		textset = false;
+	}
+
+	if (bss_size != 0) {
+		/* Calculate BSS start address */
+		vmaddr = trunc_page(a_out->a_entry) + a_out->a_text +
+		    a_out->a_data;
+
+		/* allocate some 'anon' space */
+		error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0,
+		    &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL,
+		    VM_PROT_ALL, 0);
+		if (error)
+			goto cleanup;
+	}
+
+cleanup:
+	if (opened) {
+		if (locked)
+			VOP_UNLOCK(vp);
+		locked = false;
+		VOP_CLOSE(vp, FREAD, td->td_ucred, td);
+	}
+	if (textset) {
+		if (!locked) {
+			locked = true;
+			VOP_LOCK(vp, LK_SHARED | LK_RETRY);
+		}
+		VOP_UNSET_TEXT_CHECKED(vp);
+	}
+	if (locked)
+		VOP_UNLOCK(vp);
+
+	/* Release the temporary mapping. */
+	if (a_out)
+		kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE);
+
+	return (error);
+}