git: 889197949405 - stable/13 - fd: add close_range(..., CLOSE_RANGE_CLOEXEC)

From: Mateusz Guzik <mjg_at_FreeBSD.org>
Date: Mon, 07 Mar 2022 12:18:07 UTC
The branch stable/13 has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=88919794940543d0b6743a035a44a51b9d8ff5d8

commit 88919794940543d0b6743a035a44a51b9d8ff5d8
Author:     Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2022-03-03 12:45:11 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2022-03-07 12:15:47 +0000

    fd: add close_range(..., CLOSE_RANGE_CLOEXEC)
    
    For compatibility with Linux.
    
    MFC after:      3 days
    Reviewed by:    markj
    Differential Revision:  https://reviews.freebsd.org/D34424
    
    (cherry picked from commit f3f3e3c44d3b1776653bbf19eab17ce006a815d8)
---
 lib/libc/sys/closefrom.2          | 10 ++++--
 lib/libsysdecode/flags.c          |  7 ++++
 lib/libsysdecode/mktables         |  1 +
 lib/libsysdecode/sysdecode.h      |  1 +
 lib/libsysdecode/sysdecode_mask.3 |  5 ++-
 sys/kern/kern_descrip.c           | 67 +++++++++++++++++++++++++++++----------
 sys/sys/syscallsubr.h             |  2 +-
 sys/sys/unistd.h                  |  5 +++
 tests/sys/file/closefrom_test.c   | 35 +++++++++++++++++++-
 usr.bin/kdump/kdump.c             |  8 +++++
 usr.bin/truss/syscall.h           |  1 +
 usr.bin/truss/syscalls.c          |  5 +++
 12 files changed, 125 insertions(+), 22 deletions(-)

diff --git a/lib/libc/sys/closefrom.2 b/lib/libc/sys/closefrom.2
index db41c617dc7f..64c210f7897a 100644
--- a/lib/libc/sys/closefrom.2
+++ b/lib/libc/sys/closefrom.2
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd April 12, 2020
+.Dd March 3, 2022
 .Dt CLOSEFROM 2
 .Os
 .Sh NAME
@@ -56,8 +56,12 @@ and
 .Fa highfd
 inclusive, clamped to the range of open file descriptors.
 Any errors encountered while closing file descriptors are ignored.
-There are currently no defined
-.Fa flags .
+Supported
+.Fa flags :
+.Bl -tag -width ".Dv CLOSE_RANGE_CLOEXEC"
+.It Dv CLOSE_RANGE_CLOEXEC
+Set the close-on-exec flag on descriptors in the range instead of closing them.
+.El
 .Sh RETURN VALUES
 Upon successful completion,
 .Fn close_range
diff --git a/lib/libsysdecode/flags.c b/lib/libsysdecode/flags.c
index df461cf0dca1..35bce1ff77f9 100644
--- a/lib/libsysdecode/flags.c
+++ b/lib/libsysdecode/flags.c
@@ -400,6 +400,13 @@ sysdecode_cap_fcntlrights(FILE *fp, uint32_t rights, uint32_t *rem)
 	return (print_mask_int(fp, capfcntl, rights, rem));
 }
 
+bool
+sysdecode_close_range_flags(FILE *fp, int flags, int *rem)
+{
+
+	return (print_mask_int(fp, closerangeflags, flags, rem));
+}
+
 const char *
 sysdecode_extattrnamespace(int namespace)
 {
diff --git a/lib/libsysdecode/mktables b/lib/libsysdecode/mktables
index 77cfa15bd1f5..c9c6830b04aa 100644
--- a/lib/libsysdecode/mktables
+++ b/lib/libsysdecode/mktables
@@ -94,6 +94,7 @@ gen_table "accessmode"      "[A-Z]_OK[[:space:]]+0?x?[0-9A-Fa-f]+"         "sys/
 gen_table "acltype"         "ACL_TYPE_[A-Z4_]+[[:space:]]+0x[0-9]+"        "sys/acl.h"
 gen_table "atflags"         "AT_[A-Z_]+[[:space:]]+0x[0-9]+"               "sys/fcntl.h"
 gen_table "capfcntl"        "CAP_FCNTL_[A-Z]+[[:space:]]+\(1"              "sys/capsicum.h"
+gen_table "closerangeflags" "CLOSE_RANGE_[A-Z]+[[:space:]]+\([0-9]+<<[0-9]+\)"       "sys/unistd.h"
 gen_table "extattrns"       "EXTATTR_NAMESPACE_[A-Z]+[[:space:]]+0x[0-9]+" "sys/extattr.h"
 gen_table "fadvisebehav"    "POSIX_FADV_[A-Z]+[[:space:]]+[0-9]+"          "sys/fcntl.h"
 gen_table "openflags"       "O_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+"           "sys/fcntl.h"	"O_RDONLY|O_RDWR|O_WRONLY"
diff --git a/lib/libsysdecode/sysdecode.h b/lib/libsysdecode/sysdecode.h
index cdb9205f3da1..47e37aeeed98 100644
--- a/lib/libsysdecode/sysdecode.h
+++ b/lib/libsysdecode/sysdecode.h
@@ -45,6 +45,7 @@ const char *sysdecode_atfd(int _fd);
 bool	sysdecode_atflags(FILE *_fp, int _flags, int *_rem);
 bool	sysdecode_cap_fcntlrights(FILE *_fp, uint32_t _rights, uint32_t *_rem);
 void	sysdecode_cap_rights(FILE *_fp, cap_rights_t *_rightsp);
+bool	sysdecode_close_range_flags(FILE *_fp, int _flags, int *_rem);
 const char *sysdecode_cmsg_type(int _cmsg_level, int _cmsg_type);
 const char *sysdecode_extattrnamespace(int _namespace);
 const char *sysdecode_fadvice(int _advice);
diff --git a/lib/libsysdecode/sysdecode_mask.3 b/lib/libsysdecode/sysdecode_mask.3
index 54e182db31d7..45464e333eb4 100644
--- a/lib/libsysdecode/sysdecode_mask.3
+++ b/lib/libsysdecode/sysdecode_mask.3
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd January 16, 2018
+.Dd March 3, 2022
 .Dt sysdecode_mask 3
 .Os
 .Sh NAME
@@ -32,6 +32,7 @@
 .Nm sysdecode_accessmode ,
 .Nm sysdecode_atflags ,
 .Nm sysdecode_capfcntlrights ,
+.Nm sysdecode_close_range_flags ,
 .Nm sysdecode_fcntl_fileflags ,
 .Nm sysdecode_fileflags ,
 .Nm sysdecode_filemode ,
@@ -74,6 +75,8 @@
 .Ft bool
 .Fn sysdecode_cap_fcntlrights "FILE *fp" "uint32_t rights" "uint32_t *rem"
 .Ft bool
+.Fn sysdecode_close_range_flags "FILE *fp" "int flags" "int *rem"
+.Ft bool
 .Fn sysdecode_fcntl_fileflags "FILE *fp" "int flags" "int *rem"
 .Ft bool
 .Fn sysdecode_fileflags "FILE *fp" "fflags_t flags" "fflags_t *rem"
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 1cf88be09a27..03498d5a9ce9 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -1400,23 +1400,39 @@ kern_close(struct thread *td, int fd)
 	return (closefp(fdp, fd, fp, td, true, true));
 }
 
-int
-kern_close_range(struct thread *td, u_int lowfd, u_int highfd)
+static int
+close_range_cloexec(struct thread *td, u_int lowfd, u_int highfd)
 {
 	struct filedesc *fdp;
-	const struct fdescenttbl *fdt;
-	struct file *fp;
+	struct fdescenttbl *fdt;
+	struct filedescent *fde;
 	int fd;
 
-	/*
-	 * Check this prior to clamping; closefrom(3) with only fd 0, 1, and 2
-	 * open should not be a usage error.  From a close_range() perspective,
-	 * close_range(3, ~0U, 0) in the same scenario should also likely not
-	 * be a usage error as all fd above 3 are in-fact already closed.
-	 */
-	if (highfd < lowfd) {
-		return (EINVAL);
+	fdp = td->td_proc->p_fd;
+	FILEDESC_XLOCK(fdp);
+	fdt = atomic_load_ptr(&fdp->fd_files);
+	highfd = MIN(highfd, fdt->fdt_nfiles - 1);
+	fd = lowfd;
+	if (__predict_false(fd > highfd)) {
+		goto out_locked;
+	}
+	for (; fd <= highfd; fd++) {
+		fde = &fdt->fdt_ofiles[fd];
+		if (fde->fde_file != NULL)
+			fde->fde_flags |= UF_EXCLOSE;
 	}
+out_locked:
+	FILEDESC_XUNLOCK(fdp);
+	return (0);
+}
+
+static int
+close_range_impl(struct thread *td, u_int lowfd, u_int highfd)
+{
+	struct filedesc *fdp;
+	const struct fdescenttbl *fdt;
+	struct file *fp;
+	int fd;
 
 	fdp = td->td_proc->p_fd;
 	FILEDESC_XLOCK(fdp);
@@ -1447,6 +1463,26 @@ out_unlocked:
 	return (0);
 }
 
+int
+kern_close_range(struct thread *td, int flags, u_int lowfd, u_int highfd)
+{
+
+	/*
+	 * Check this prior to clamping; closefrom(3) with only fd 0, 1, and 2
+	 * open should not be a usage error.  From a close_range() perspective,
+	 * close_range(3, ~0U, 0) in the same scenario should also likely not
+	 * be a usage error as all fd above 3 are in-fact already closed.
+	 */
+	if (highfd < lowfd) {
+		return (EINVAL);
+	}
+
+	if ((flags & CLOSE_RANGE_CLOEXEC) != 0)
+		return (close_range_cloexec(td, lowfd, highfd));
+
+	return (close_range_impl(td, lowfd, highfd));
+}
+
 #ifndef _SYS_SYSPROTO_H_
 struct close_range_args {
 	u_int	lowfd;
@@ -1462,10 +1498,9 @@ sys_close_range(struct thread *td, struct close_range_args *uap)
 	AUDIT_ARG_CMD(uap->highfd);
 	AUDIT_ARG_FFLAGS(uap->flags);
 
-	/* No flags currently defined */
-	if (uap->flags != 0)
+	if ((uap->flags & ~(CLOSE_RANGE_CLOEXEC)) != 0)
 		return (EINVAL);
-	return (kern_close_range(td, uap->lowfd, uap->highfd));
+	return (kern_close_range(td, uap->flags, uap->lowfd, uap->highfd));
 }
 
 #ifdef COMPAT_FREEBSD12
@@ -1490,7 +1525,7 @@ freebsd12_closefrom(struct thread *td, struct freebsd12_closefrom_args *uap)
 	 * closefrom(0) which closes all files.
 	 */
 	lowfd = MAX(0, uap->lowfd);
-	return (kern_close_range(td, lowfd, ~0U));
+	return (kern_close_range(td, 0, lowfd, ~0U));
 }
 #endif	/* COMPAT_FREEBSD12 */
 
diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h
index 71e76f072693..bd246019bcc9 100644
--- a/sys/sys/syscallsubr.h
+++ b/sys/sys/syscallsubr.h
@@ -108,7 +108,7 @@ int	kern_clock_settime(struct thread *td, clockid_t clock_id,
 	    struct timespec *ats);
 void	kern_thread_cputime(struct thread *targettd, struct timespec *ats);
 void	kern_process_cputime(struct proc *targetp, struct timespec *ats);
-int	kern_close_range(struct thread *td, u_int lowfd, u_int highfd);
+int	kern_close_range(struct thread *td, int flags, u_int lowfd, u_int highfd);
 int	kern_close(struct thread *td, int fd);
 int	kern_connectat(struct thread *td, int dirfd, int fd,
 	    struct sockaddr *sa);
diff --git a/sys/sys/unistd.h b/sys/sys/unistd.h
index 2d9544652f91..007abbdfbba5 100644
--- a/sys/sys/unistd.h
+++ b/sys/sys/unistd.h
@@ -199,6 +199,11 @@
 
 #define	SWAPOFF_FORCE	0x00000001
 
+/*
+ * close_range() options.
+ */
+#define	CLOSE_RANGE_CLOEXEC	(1<<2)
+
 #endif /* __BSD_VISIBLE */
 
 #endif /* !_SYS_UNISTD_H_ */
diff --git a/tests/sys/file/closefrom_test.c b/tests/sys/file/closefrom_test.c
index 8516f7f2598c..d11faa22b55d 100644
--- a/tests/sys/file/closefrom_test.c
+++ b/tests/sys/file/closefrom_test.c
@@ -144,7 +144,7 @@ main(void)
 {
 	struct shared_info *info;
 	pid_t pid;
-	int fd, i, start;
+	int fd, flags, i, start;
 
 	printf("1..20\n");
 
@@ -325,5 +325,38 @@ main(void)
 		fail(info->tag, "%s", info->message);
 	ok(info->tag);
 
+	/* test CLOSE_RANGE_CLOEXEC */
+	for (i = 0; i < 8; i++)
+		(void)devnull();
+	fd = highest_fd();
+	start = fd - 8;
+	if (close_range(start + 1, start + 4, CLOSE_RANGE_CLOEXEC) < 0)
+		fail_err("close_range(..., CLOSE_RANGE_CLOEXEC)");
+	flags = fcntl(start, F_GETFD);
+	if (flags < 0)
+		fail_err("fcntl(.., F_GETFD)");
+	if ((flags & FD_CLOEXEC) != 0)
+		fail("close_range", "CLOSE_RANGE_CLOEXEC set close-on-exec "
+		    "when it should not have on fd %d", start);
+	for (i = start + 1; i <= start + 4; i++) {
+		flags = fcntl(i, F_GETFD);
+		if (flags < 0)
+			fail_err("fcntl(.., F_GETFD)");
+		if ((flags & FD_CLOEXEC) == 0)
+			fail("close_range", "CLOSE_RANGE_CLOEXEC did not set "
+			    "close-on-exec on fd %d", i);
+	}
+	for (; i < start + 8; i++) {
+		flags = fcntl(i, F_GETFD);
+		if (flags < 0)
+			fail_err("fcntl(.., F_GETFD)");
+		if ((flags & FD_CLOEXEC) != 0)
+			fail("close_range", "CLOSE_RANGE_CLOEXEC set close-on-exec "
+			    "when it should not have on fd %d", i);
+	}
+	if (close_range(start, start + 8, 0) < 0)
+		fail_err("close_range");
+	ok("close_range(..., CLOSE_RANGE_CLOEXEC)");
+
 	return (0);
 }
diff --git a/usr.bin/kdump/kdump.c b/usr.bin/kdump/kdump.c
index d863fffea774..19616338fc1f 100644
--- a/usr.bin/kdump/kdump.c
+++ b/usr.bin/kdump/kdump.c
@@ -869,6 +869,14 @@ ktrsyscall(struct ktr_syscall *ktr, u_int sv_flags)
 				ip++;
 				narg--;
 				break;
+			case SYS_close_range:
+				print_number(ip, narg, c);
+				print_number(ip, narg, c);
+				putchar(',');
+				print_mask_arg(sysdecode_close_range_flags, *ip);
+				ip += 3;
+				narg -= 3;
+				break;
 			case SYS_open:
 			case SYS_openat:
 				print_number(ip, narg, c);
diff --git a/usr.bin/truss/syscall.h b/usr.bin/truss/syscall.h
index fc1630677242..f25cbd81ede5 100644
--- a/usr.bin/truss/syscall.h
+++ b/usr.bin/truss/syscall.h
@@ -91,6 +91,7 @@ enum Argtype {
 	Atfd,
 	Atflags,
 	CapFcntlRights,
+	Closerangeflags,
 	Extattrnamespace,
 	Fadvice,
 	Fcntl,
diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c
index efabe1086932..7c9172e070d7 100644
--- a/usr.bin/truss/syscalls.c
+++ b/usr.bin/truss/syscalls.c
@@ -184,6 +184,8 @@ static const struct syscall_decode decoded_syscalls[] = {
 	  .args = { { Int, 0 } } },
 	{ .name = "closefrom", .ret_type = 1, .nargs = 1,
 	  .args = { { Int, 0 } } },
+	{ .name = "close_range", .ret_type = 1, .nargs = 3,
+	  .args = { { Int, 0 }, { Int, 1 }, { Closerangeflags, 2 } } },
 	{ .name = "compat11.fstat", .ret_type = 1, .nargs = 2,
 	  .args = { { Int, 0 }, { Stat11 | OUT, 1 } } },
 	{ .name = "compat11.fstatat", .ret_type = 1, .nargs = 4,
@@ -2166,6 +2168,9 @@ print_arg(struct syscall_arg *sc, unsigned long *args, register_t *retval,
 	case Fcntl:
 		print_integer_arg(sysdecode_fcntl_cmd, fp, args[sc->offset]);
 		break;
+	case Closerangeflags:
+		print_mask_arg(sysdecode_close_range_flags, fp, args[sc->offset]);
+		break;
 	case Mprot:
 		print_mask_arg(sysdecode_mmap_prot, fp, args[sc->offset]);
 		break;