git: bb66c5975383 - main - linux(4): Add sendfile fallback for non-socket fds
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 17 Aug 2023 19:58:39 UTC
The branch main has been updated by dchagin: URL: https://cgit.FreeBSD.org/src/commit/?id=bb66c59753836cd8abb596fe316dcdb77ea66999 commit bb66c59753836cd8abb596fe316dcdb77ea66999 Author: James McLaughlin <james_mclgh.net> AuthorDate: 2023-08-17 19:57:17 +0000 Commit: Dmitry Chagin <dchagin@FreeBSD.org> CommitDate: 2023-08-17 19:57:17 +0000 linux(4): Add sendfile fallback for non-socket fds Before Linux 2.6.33, out_fd must refer to a socket. Since Linux 2.6.33 it can be any file. The patch was originally provided by James McLaughlin and adapted by me for copy_file_range. PR: 262535 Differential revision: https://reviews.freebsd.org/D34555 MFC after: 1 month --- sys/compat/linux/linux_socket.c | 209 ++++++++++++++++++++++++++++++++++------ 1 file changed, 177 insertions(+), 32 deletions(-) diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c index 45b94cb2f994..f768392be546 100644 --- a/sys/compat/linux/linux_socket.c +++ b/sys/compat/linux/linux_socket.c @@ -36,10 +36,12 @@ #include <sys/malloc.h> #include <sys/mbuf.h> #include <sys/proc.h> +#include <sys/protosw.h> #include <sys/socket.h> #include <sys/socketvar.h> #include <sys/syscallsubr.h> #include <sys/sysproto.h> +#include <sys/vnode.h> #include <sys/un.h> #include <sys/unistd.h> @@ -2374,57 +2376,200 @@ out: return (error); } +/* + * Based on sendfile_getsock from kern_sendfile.c + * Determines whether an fd is a stream socket that can be used + * with FreeBSD sendfile. + */ +static bool +is_stream_socket(struct file *fp) +{ + struct socket *so; + + /* + * The socket must be a stream socket and connected. + */ + if (fp->f_type != DTYPE_SOCKET) + return (false); + so = fp->f_data; + if (so->so_type != SOCK_STREAM) + return (false); + /* + * SCTP one-to-one style sockets currently don't work with + * sendfile(). + */ + if (so->so_proto->pr_protocol == IPPROTO_SCTP) + return (false); + return (!SOLISTENING(so)); +} + +static bool +is_regular_file(struct file *fp) +{ + + return (fp->f_type == DTYPE_VNODE && fp->f_vnode != NULL && + fp->f_vnode->v_type == VREG); +} + static int -linux_sendfile_common(struct thread *td, l_int out, l_int in, - off_t *offset, l_size_t count) +sendfile_fallback(struct thread *td, struct file *fp, l_int out, + off_t *offset, l_size_t count, off_t *sbytes) { - off_t bytes_read; - int error; - l_loff_t current_offset; - struct file *fp; + off_t current_offset, out_offset, to_send; + l_size_t bytes_sent, n_read; + struct file *ofp; + struct iovec aiov; + struct uio auio; + bool seekable; + size_t bufsz; + void *buf; + int flags, error; - AUDIT_ARG_FD(in); - error = fget_read(td, in, &cap_pread_rights, &fp); + if (offset == NULL) { + if ((error = fo_seek(fp, 0, SEEK_CUR, td)) != 0) + return (error); + current_offset = td->td_uretoff.tdu_off; + } else { + if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) + return (ESPIPE); + current_offset = *offset; + } + error = fget_write(td, out, &cap_pwrite_rights, &ofp); if (error != 0) return (error); - - if (offset != NULL) { - current_offset = *offset; - } else { - error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? - fo_seek(fp, 0, SEEK_CUR, td) : ESPIPE; - if (error != 0) + seekable = (ofp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0; + if (seekable) { + if ((error = fo_seek(ofp, 0, SEEK_CUR, td)) != 0) goto drop; + out_offset = td->td_uretoff.tdu_off; + } else + out_offset = 0; + + flags = FOF_OFFSET | FOF_NOUPDATE; + bufsz = min(count, MAXPHYS); + buf = malloc(bufsz, M_LINUX, M_WAITOK); + bytes_sent = 0; + while (bytes_sent < count) { + to_send = min(count - bytes_sent, bufsz); + aiov.iov_base = buf; + aiov.iov_len = bufsz; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_td = td; + auio.uio_rw = UIO_READ; + auio.uio_offset = current_offset; + auio.uio_resid = to_send; + error = fo_read(fp, &auio, fp->f_cred, flags, td); + if (error != 0) + break; + n_read = to_send - auio.uio_resid; + if (n_read == 0) + break; + aiov.iov_base = buf; + aiov.iov_len = bufsz; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_td = td; + auio.uio_rw = UIO_WRITE; + auio.uio_offset = (seekable) ? out_offset : 0; + auio.uio_resid = n_read; + error = fo_write(ofp, &auio, ofp->f_cred, flags, td); + if (error != 0) + break; + bytes_sent += n_read; + current_offset += n_read; + out_offset += n_read; + } + free(buf, M_LINUX); + + if (error == 0) { + *sbytes = bytes_sent; + if (offset != NULL) + *offset = current_offset; + else + error = fo_seek(fp, current_offset, SEEK_SET, td); + } + if (error == 0 && seekable) + error = fo_seek(ofp, out_offset, SEEK_SET, td); + +drop: + fdrop(ofp, td); + return (error); +} + +static int +sendfile_sendfile(struct thread *td, struct file *fp, l_int out, + off_t *offset, l_size_t count, off_t *sbytes) +{ + off_t current_offset; + int error; + + if (offset == NULL) { + if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) + return (ESPIPE); + if ((error = fo_seek(fp, 0, SEEK_CUR, td)) != 0) + return (error); current_offset = td->td_uretoff.tdu_off; + } else + current_offset = *offset; + error = fo_sendfile(fp, out, NULL, NULL, current_offset, count, + sbytes, 0, td); + if (error == 0) { + current_offset += *sbytes; + if (offset != NULL) + *offset = current_offset; + else + error = fo_seek(fp, current_offset, SEEK_SET, td); } + return (error); +} - bytes_read = 0; +static int +linux_sendfile_common(struct thread *td, l_int out, l_int in, + off_t *offset, l_size_t count) +{ + struct file *fp, *ofp; + off_t sbytes; + int error; /* Linux cannot have 0 count. */ - if (count <= 0 || current_offset < 0) { + if (count <= 0 || (offset != NULL && *offset < 0)) + return (EINVAL); + + AUDIT_ARG_FD(in); + error = fget_read(td, in, &cap_pread_rights, &fp); + if (error != 0) + return (error); + if ((fp->f_type != DTYPE_SHM && fp->f_type != DTYPE_VNODE) || + (fp->f_type == DTYPE_VNODE && + (fp->f_vnode == NULL || fp->f_vnode->v_type != VREG))) { error = EINVAL; goto drop; } - - error = fo_sendfile(fp, out, NULL, NULL, current_offset, count, - &bytes_read, 0, td); + error = fget_unlocked(td, out, &cap_no_rights, &ofp); if (error != 0) goto drop; - current_offset += bytes_read; - if (offset != NULL) { - *offset = current_offset; + if (is_regular_file(fp) && is_regular_file(ofp)) { + error = kern_copy_file_range(td, in, offset, out, NULL, count, + 0); } else { - error = fo_seek(fp, current_offset, SEEK_SET, td); - if (error != 0) - goto drop; + sbytes = 0; + if (is_stream_socket(ofp)) + error = sendfile_sendfile(td, fp, out, offset, count, + &sbytes); + else + error = sendfile_fallback(td, fp, out, offset, count, + &sbytes); + if (error == 0) + td->td_retval[0] = sbytes; } + fdrop(ofp, td); - td->td_retval[0] = (ssize_t)bytes_read; drop: fdrop(fp, td); - if (error == ENOTSOCK) - error = EINVAL; return (error); } @@ -2434,10 +2579,10 @@ linux_sendfile(struct thread *td, struct linux_sendfile_args *arg) /* * Differences between FreeBSD and Linux sendfile: * - Linux doesn't send anything when count is 0 (FreeBSD uses 0 to - * mean send the whole file.) In linux_sendfile given fds are still - * checked for validity when the count is 0. + * mean send the whole file). * - Linux can send to any fd whereas FreeBSD only supports sockets. - * The same restriction follows for linux_sendfile. + * We therefore use FreeBSD sendfile where possible for performance, + * but fall back on a manual copy (sendfile_fallback). * - Linux doesn't have an equivalent for FreeBSD's flags and sf_hdtr. * - Linux takes an offset pointer and updates it to the read location. * FreeBSD takes in an offset and a 'bytes read' parameter which is