[patch] SOCK_CLOEXEC, SOCK_NONBLOCK and MSG_CMSG_CLOEXEC
Jilles Tjoelker
jilles at stack.nl
Sun Mar 17 21:23:55 UTC 2013
Here are some more modifications to allow creating file descriptors with
close-on-exec set. Like in linux/glibc, SOCK_CLOEXEC and SOCK_NONBLOCK
can be OR'ed in socket() and socketpair()'s type parameter, and
MSG_CMSG_CLOEXEC to recvmsg() makes file descriptors (SCM_RIGHTS)
atomically close-on-exec.
The numerical values for SOCK_CLOEXEC and SOCK_NONBLOCK are as in
NetBSD. MSG_CMSG_CLOEXEC is the first free bit for MSG_*.
I do not pass the SOCK_* flags to MAC because this may cause incorrect
failures and can be done later via fcntl() anyway. I expect audit to
cope with the new flags.
For MSG_CMSG_CLOEXEC, I had to change unp_externalize to take a flags
argument.
The parts that are still missing for atomic close-on-exec (a reference
of what is needed is at http://austingroupbugs.net/view.php?id=411):
* accept4(). Apart from SOCK_CLOEXEC, this does not inherit O_NONBLOCK
and O_ASYNC flags like our (but not Linux's) accept() does.
O_NONBLOCK can be set using SOCK_NONBLOCK flag. O_ASYNC will have to
set using fcntl() for those who still want to use it.
The function is a cancellation point like accept() and therefore
libthr needs to be modified too.
Async-signal-safe.
* pipe2(). I think it is best to write into the array from the kernel
like socketpair() and not create another assembler wrapper to store
two integers into an array. Async-signal-safe.
* dup3(). See PR kern/176233. I will add the [EINVAL] if oldd == newd as
in Linux and will not support any flags other than O_CLOEXEC.
Async-signal-safe.
* 'e' mode in popen(). Fairly easy to do using pipe2().
* Probably need to be more lenient with fopen() modes.
* mkostemp(). Like mkstemp() but with an open() flags argument.
* Allow O_CLOEXEC in posix_openpt().
* posix_spawn_file_actions_adddup2() with oldd == newd turns off
close-on-exec flag.
* kqueue1() as in NetBSD. This seems less important because a kqueue is
not inherited across fork but may be useful in case someone uses
exec without fork.
Index: lib/libc/sys/socket.2
===================================================================
--- lib/libc/sys/socket.2 (revision 248373)
+++ lib/libc/sys/socket.2 (working copy)
@@ -115,6 +115,15 @@
which is planned,
but not yet implemented, are not described here.
.Pp
+Additionally, the following flags are allowed in the
+.Fa type
+argument:
+.Pp
+.Bd -literal -offset indent -compact
+SOCK_CLOEXEC Set close-on-exec on the new descriptor,
+SOCK_NONBLOCK Set non-blocking mode on the new socket
+.Ed
+.Pp
The
.Fa protocol
argument
Index: lib/libc/sys/socketpair.2
===================================================================
--- lib/libc/sys/socketpair.2 (revision 248373)
+++ lib/libc/sys/socketpair.2 (working copy)
@@ -57,6 +57,14 @@
and
.Fa sv Ns [1] .
The two sockets are indistinguishable.
+.Pp
+The
+.Dv SOCK_CLOEXEC
+and
+.Dv SOCK_NONBLOCK
+flags in the
+.Fa type
+argument apply to both descriptors.
.Sh RETURN VALUES
.Rv -std socketpair
.Sh ERRORS
@@ -79,6 +87,7 @@
.Sh SEE ALSO
.Xr pipe 2 ,
.Xr read 2 ,
+.Xr socket 2 ,
.Xr write 2
.Sh HISTORY
The
Index: lib/libc/sys/recv.2
===================================================================
--- lib/libc/sys/recv.2 (revision 248373)
+++ lib/libc/sys/recv.2 (working copy)
@@ -121,11 +121,12 @@
function is formed by
.Em or Ap ing
one or more of the values:
-.Bl -column ".Dv MSG_DONTWAIT" -offset indent
+.Bl -column ".Dv MSG_CMSG_CLOEXEC" -offset indent
.It Dv MSG_OOB Ta process out-of-band data
.It Dv MSG_PEEK Ta peek at incoming message
.It Dv MSG_WAITALL Ta wait for full request or error
.It Dv MSG_DONTWAIT Ta do not block
+.It Dv MSG_CMSG_CLOEXEC Ta set received fds close-on-exec
.El
.Pp
The
@@ -227,6 +228,10 @@
.Fa cmsg_type
set to
.Dv SCM_RIGHTS .
+The close-on-exec flag on received descriptors is set according to the
+.Dv MSG_CMSG_CLOEXEC
+flag passed to
+.Fn recvmsg .
.Pp
Process credentials can also be passed as ancillary data for
.Dv AF_UNIX
Index: share/man/man4/unix.4
===================================================================
--- share/man/man4/unix.4 (revision 248373)
+++ share/man/man4/unix.4 (working copy)
@@ -153,13 +153,15 @@
.Pp
The received descriptor is a
.Em duplicate
-of the sender's descriptor, as if it were created with a call to
-.Xr dup 2 .
-Per-process descriptor flags, set with
-.Xr fcntl 2 ,
-are
-.Em not
-passed to a receiver.
+of the sender's descriptor, as if it were created via
+.Li dup(fd)
+or
+.Li fcntl(fd, F_DUPFD_CLOEXEC, 0)
+depending on whether
+.Dv MSG_CMSG_CLOEXEC
+is passed in the
+.Xr recvmsg 2
+call.
Descriptors that are awaiting delivery, or that are
purposely not received, are automatically closed by the system
when the destination socket is closed.
Index: sys/sys/socket.h
===================================================================
--- sys/sys/socket.h (revision 248373)
+++ sys/sys/socket.h (working copy)
@@ -95,7 +95,15 @@
#endif
#define SOCK_SEQPACKET 5 /* sequenced packet stream */
+#if __BSD_VISIBLE
/*
+ * Creation flags, OR'ed into socket() and socketpair() type argument.
+ */
+#define SOCK_CLOEXEC 0x10000000
+#define SOCK_NONBLOCK 0x20000000
+#endif
+
+/*
* Option flags per-socket.
*/
#define SO_DEBUG 0x0001 /* turn on debugging info recording */
@@ -459,6 +467,7 @@
#endif
#if __BSD_VISIBLE
#define MSG_NOSIGNAL 0x20000 /* do not generate SIGPIPE on EOF */
+#define MSG_CMSG_CLOEXEC 0x40000 /* make received fds close-on-exec */
#endif
/*
Index: sys/sys/domain.h
===================================================================
--- sys/sys/domain.h (revision 248373)
+++ sys/sys/domain.h (working copy)
@@ -51,7 +51,7 @@
void (*dom_destroy) /* cleanup structures / state */
(void);
int (*dom_externalize) /* externalize access rights */
- (struct mbuf *, struct mbuf **);
+ (struct mbuf *, struct mbuf **, int);
void (*dom_dispose) /* dispose of internalized rights */
(struct mbuf *);
struct protosw *dom_protosw, *dom_protoswNPROTOSW;
Index: sys/kern/uipc_socket.c
===================================================================
--- sys/kern/uipc_socket.c (revision 248373)
+++ sys/kern/uipc_socket.c (working copy)
@@ -1727,7 +1727,7 @@
SOCKBUF_UNLOCK(&so->so_rcv);
VNET_SO_ASSERT(so);
error = (*pr->pr_domain->dom_externalize)
- (cm, controlp);
+ (cm, controlp, flags);
SOCKBUF_LOCK(&so->so_rcv);
} else if (controlp != NULL)
*controlp = cm;
@@ -2361,7 +2361,7 @@
cm->m_next = NULL;
if (pr->pr_domain->dom_externalize != NULL) {
error = (*pr->pr_domain->dom_externalize)
- (cm, controlp);
+ (cm, controlp, flags);
} else if (controlp != NULL)
*controlp = cm;
else
Index: sys/kern/uipc_usrreq.c
===================================================================
--- sys/kern/uipc_usrreq.c (revision 248373)
+++ sys/kern/uipc_usrreq.c (working copy)
@@ -288,7 +288,7 @@
static void unp_init(void);
static int unp_internalize(struct mbuf **, struct thread *);
static void unp_internalize_fp(struct file *);
-static int unp_externalize(struct mbuf *, struct mbuf **);
+static int unp_externalize(struct mbuf *, struct mbuf **, int);
static int unp_externalize_fp(struct file *);
static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
static void unp_process_defers(void * __unused, int);
@@ -1695,7 +1695,7 @@
}
static int
-unp_externalize(struct mbuf *control, struct mbuf **controlp)
+unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
{
struct thread *td = curthread; /* XXX */
struct cmsghdr *cm = mtod(control, struct cmsghdr *);
@@ -1765,6 +1765,8 @@
fde->fde_file = fdep[0]->fde_file;
filecaps_move(&fdep[0]->fde_caps,
&fde->fde_caps);
+ if ((flags & MSG_CMSG_CLOEXEC) != 0)
+ fde->fde_flags |= UF_EXCLOSE;
unp_externalize_fp(fde->fde_file);
*fdp = f;
}
Index: sys/kern/uipc_syscalls.c
===================================================================
--- sys/kern/uipc_syscalls.c (revision 248373)
+++ sys/kern/uipc_syscalls.c (working copy)
@@ -164,25 +164,40 @@
{
struct socket *so;
struct file *fp;
- int fd, error;
+ int fd, error, type, oflag, fflag;
AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol);
+
+ type = uap->type;
+ oflag = 0;
+ fflag = 0;
+ if ((type & SOCK_CLOEXEC) != 0) {
+ type &= ~SOCK_CLOEXEC;
+ oflag |= O_CLOEXEC;
+ }
+ if ((type & SOCK_NONBLOCK) != 0) {
+ type &= ~SOCK_NONBLOCK;
+ fflag |= FNONBLOCK;
+ }
+
#ifdef MAC
- error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type,
+ error = mac_socket_check_create(td->td_ucred, uap->domain, type,
uap->protocol);
if (error)
return (error);
#endif
- error = falloc(td, &fp, &fd, 0);
+ error = falloc(td, &fp, &fd, oflag);
if (error)
return (error);
/* An extra reference on `fp' has been held for us by falloc(). */
- error = socreate(uap->domain, &so, uap->type, uap->protocol,
+ error = socreate(uap->domain, &so, type, uap->protocol,
td->td_ucred, td);
if (error) {
fdclose(td->td_proc->p_fd, fp, fd, td);
} else {
- finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
+ finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
+ if ((fflag & FNONBLOCK) != 0)
+ (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td);
td->td_retval[0] = fd;
}
fdrop(fp, td);
@@ -648,9 +663,20 @@
struct filedesc *fdp = td->td_proc->p_fd;
struct file *fp1, *fp2;
struct socket *so1, *so2;
- int fd, error;
+ int fd, error, oflag, fflag;
AUDIT_ARG_SOCKET(domain, type, protocol);
+
+ oflag = 0;
+ fflag = 0;
+ if ((type & SOCK_CLOEXEC) != 0) {
+ type &= ~SOCK_CLOEXEC;
+ oflag |= O_CLOEXEC;
+ }
+ if ((type & SOCK_NONBLOCK) != 0) {
+ type &= ~SOCK_NONBLOCK;
+ fflag |= FNONBLOCK;
+ }
#ifdef MAC
/* We might want to have a separate check for socket pairs. */
error = mac_socket_check_create(td->td_ucred, domain, type,
@@ -665,12 +691,12 @@
if (error)
goto free1;
/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
- error = falloc(td, &fp1, &fd, 0);
+ error = falloc(td, &fp1, &fd, oflag);
if (error)
goto free2;
rsv[0] = fd;
fp1->f_data = so1; /* so1 already has ref count */
- error = falloc(td, &fp2, &fd, 0);
+ error = falloc(td, &fp2, &fd, oflag);
if (error)
goto free3;
fp2->f_data = so2; /* so2 already has ref count */
@@ -686,8 +712,14 @@
if (error)
goto free4;
}
- finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
- finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
+ finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
+ &socketops);
+ finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data,
+ &socketops);
+ if ((fflag & FNONBLOCK) != 0) {
+ (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td);
+ (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td);
+ }
fdrop(fp1, td);
fdrop(fp2, td);
return (0);
--
Jilles Tjoelker
More information about the freebsd-hackers
mailing list