git: b2deba043c0c - stable/13 - linux(4): Rework Linux ppoll system call.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 17 Jun 2022 19:32:01 UTC
The branch stable/13 has been updated by dchagin: URL: https://cgit.FreeBSD.org/src/commit/?id=b2deba043c0ce5e89db934ff81dad753535eafa8 commit b2deba043c0ce5e89db934ff81dad753535eafa8 Author: Dmitry Chagin <dchagin@FreeBSD.org> AuthorDate: 2021-06-22 05:06:05 +0000 Commit: Dmitry Chagin <dchagin@FreeBSD.org> CommitDate: 2022-06-17 19:30:19 +0000 linux(4): Rework Linux ppoll system call. For now the Linux emulation layer uses in kernel ppoll(2) without conversion of user supplied fd 'events', and does not convert the kernel supplied fd 'revents'. At least POLLRDHUP is handled by FreeBSD differently than by Linux. Seems that Linux silencly ignores POLLRDHUP on non socket fd's unlike FreeBSD, which does more strictly check and fails. Rework the Linux ppoll, using kern_poll and converting 'events' and 'revents' values. While here, move poll events defines to the MI part of code as they mostly identical on all arches except arm. Differential Revision: https://reviews.freebsd.org/D30716 MFC after: 2 weeks (cherry picked from commit 26795a0378b58c3e26b68577a4cc446ab527e8b5) --- sys/amd64/linux/linux.h | 21 --------- sys/amd64/linux32/linux.h | 21 --------- sys/compat/linux/linux.c | 96 +++++++++++++++++++++++++++++++++++++++++ sys/compat/linux/linux.h | 17 ++++++++ sys/compat/linux/linux_common.h | 4 ++ sys/compat/linux/linux_misc.c | 68 ++++++++++++++++++++++++++++- sys/i386/linux/linux.h | 21 --------- 7 files changed, 184 insertions(+), 64 deletions(-) diff --git a/sys/amd64/linux/linux.h b/sys/amd64/linux/linux.h index a9ed66689b64..4e736cc11c22 100644 --- a/sys/amd64/linux/linux.h +++ b/sys/amd64/linux/linux.h @@ -407,27 +407,6 @@ struct l_ifconf { #define ifc_buf ifc_ifcu.ifcu_buf #define ifc_req ifc_ifcu.ifcu_req -/* - * poll() - */ -#define LINUX_POLLIN 0x0001 -#define LINUX_POLLPRI 0x0002 -#define LINUX_POLLOUT 0x0004 -#define LINUX_POLLERR 0x0008 -#define LINUX_POLLHUP 0x0010 -#define LINUX_POLLNVAL 0x0020 -#define LINUX_POLLRDNORM 0x0040 -#define LINUX_POLLRDBAND 0x0080 -#define LINUX_POLLWRNORM 0x0100 -#define LINUX_POLLWRBAND 0x0200 -#define LINUX_POLLMSG 0x0400 - -struct l_pollfd { - l_int fd; - l_short events; - l_short revents; -}; - #define LINUX_ARCH_SET_GS 0x1001 #define LINUX_ARCH_SET_FS 0x1002 #define LINUX_ARCH_GET_FS 0x1003 diff --git a/sys/amd64/linux32/linux.h b/sys/amd64/linux32/linux.h index 50a4efed1709..a95545619640 100644 --- a/sys/amd64/linux32/linux.h +++ b/sys/amd64/linux32/linux.h @@ -515,27 +515,6 @@ struct l_ifconf { #define ifc_buf ifc_ifcu.ifcu_buf #define ifc_req ifc_ifcu.ifcu_req -/* - * poll() - */ -#define LINUX_POLLIN 0x0001 -#define LINUX_POLLPRI 0x0002 -#define LINUX_POLLOUT 0x0004 -#define LINUX_POLLERR 0x0008 -#define LINUX_POLLHUP 0x0010 -#define LINUX_POLLNVAL 0x0020 -#define LINUX_POLLRDNORM 0x0040 -#define LINUX_POLLRDBAND 0x0080 -#define LINUX_POLLWRNORM 0x0100 -#define LINUX_POLLWRBAND 0x0200 -#define LINUX_POLLMSG 0x0400 - -struct l_pollfd { - l_int fd; - l_short events; - l_short revents; -}; - struct l_user_desc { l_uint entry_number; l_uint base_addr; diff --git a/sys/compat/linux/linux.c b/sys/compat/linux/linux.c index a8c5e2baddc4..350d2c1abaf9 100644 --- a/sys/compat/linux/linux.c +++ b/sys/compat/linux/linux.c @@ -33,9 +33,13 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/conf.h> #include <sys/ctype.h> +#include <sys/file.h> +#include <sys/filedesc.h> #include <sys/jail.h> #include <sys/lock.h> #include <sys/malloc.h> +#include <sys/poll.h> +#include <sys/proc.h> #include <sys/signalvar.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -50,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include <compat/linux/linux.h> #include <compat/linux/linux_common.h> +#include <compat/linux/linux_mib.h> #include <compat/linux/linux_util.h> struct futex_list futex_list; @@ -627,3 +632,94 @@ linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap, return (no_value); return (bsd_ret); } + +void +linux_to_bsd_poll_events(struct thread *td, int fd, short lev, + short *bev) +{ + struct proc *p = td->td_proc; + struct filedesc *fdp; + struct file *fp; + int error; + short bits = 0; + + if (lev & LINUX_POLLIN) + bits |= POLLIN; + if (lev & LINUX_POLLPRI) + bits |= POLLPRI; + if (lev & LINUX_POLLOUT) + bits |= POLLOUT; + if (lev & LINUX_POLLERR) + bits |= POLLERR; + if (lev & LINUX_POLLHUP) + bits |= POLLHUP; + if (lev & LINUX_POLLNVAL) + bits |= POLLNVAL; + if (lev & LINUX_POLLRDNORM) + bits |= POLLRDNORM; + if (lev & LINUX_POLLRDBAND) + bits |= POLLRDBAND; + if (lev & LINUX_POLLWRBAND) + bits |= POLLWRBAND; + if (lev & LINUX_POLLWRNORM) + bits |= POLLWRNORM; + + if (lev & LINUX_POLLRDHUP) { + /* + * It seems that the Linux silencly ignores POLLRDHUP + * on non-socket file descriptors unlike FreeBSD, where + * events bits is more strictly checked (POLLSTANDARD). + */ + fdp = p->p_fd; + error = fget_unlocked(fdp, fd, &cap_no_rights, &fp); + if (error == 0) { + /* + * XXX. On FreeBSD POLLRDHUP applies only to + * stream sockets. + */ + if (fp->f_type == DTYPE_SOCKET) + bits |= POLLRDHUP; + fdrop(fp, td); + } + } + + if (lev & LINUX_POLLMSG) + LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev); + if (lev & LINUX_POLLREMOVE) + LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev); + + *bev = bits; +} + +void +bsd_to_linux_poll_events(short bev, short *lev) +{ + short bits = 0; + + if (bev & POLLIN) + bits |= LINUX_POLLIN; + if (bev & POLLPRI) + bits |= LINUX_POLLPRI; + if (bev & (POLLOUT | POLLWRNORM)) + /* + * POLLWRNORM is equal to POLLOUT on FreeBSD, + * but not on Linux + */ + bits |= LINUX_POLLOUT; + if (bev & POLLERR) + bits |= LINUX_POLLERR; + if (bev & POLLHUP) + bits |= LINUX_POLLHUP; + if (bev & POLLNVAL) + bits |= LINUX_POLLNVAL; + if (bev & POLLRDNORM) + bits |= LINUX_POLLRDNORM; + if (bev & POLLRDBAND) + bits |= LINUX_POLLRDBAND; + if (bev & POLLWRBAND) + bits |= LINUX_POLLWRBAND; + if (bev & POLLRDHUP) + bits |= LINUX_POLLRDHUP; + + *lev = bits; +} diff --git a/sys/compat/linux/linux.h b/sys/compat/linux/linux.h index ba7a96e1aa79..2548f7d50a97 100644 --- a/sys/compat/linux/linux.h +++ b/sys/compat/linux/linux.h @@ -31,6 +31,23 @@ #include <sys/queue.h> +/* + * poll() + */ +#define LINUX_POLLIN 0x0001 +#define LINUX_POLLPRI 0x0002 +#define LINUX_POLLOUT 0x0004 +#define LINUX_POLLERR 0x0008 +#define LINUX_POLLHUP 0x0010 +#define LINUX_POLLNVAL 0x0020 +#define LINUX_POLLRDNORM 0x0040 +#define LINUX_POLLRDBAND 0x0080 +#define LINUX_POLLWRNORM 0x0100 +#define LINUX_POLLWRBAND 0x0200 +#define LINUX_POLLMSG 0x0400 +#define LINUX_POLLREMOVE 0x1000 +#define LINUX_POLLRDHUP 0x2000 + #define LINUX_IFHWADDRLEN 6 #define LINUX_IFNAMSIZ 16 diff --git a/sys/compat/linux/linux_common.h b/sys/compat/linux/linux_common.h index a306bb1eb859..b0e3408e42df 100644 --- a/sys/compat/linux/linux_common.h +++ b/sys/compat/linux/linux_common.h @@ -41,5 +41,9 @@ int bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa, socklen_t len); int linux_to_bsd_sockaddr(const struct l_sockaddr *lsa, struct sockaddr **sap, socklen_t *len); +void linux_to_bsd_poll_events(struct thread *td, int fd, + short lev, short *bev); +void bsd_to_linux_poll_events(short bev, short *lev); + #endif /* _LINUX_COMMON_H_ */ diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index 22d44416c1b1..90a89578fc8f 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <sys/msgbuf.h> #include <sys/mutex.h> #include <sys/namei.h> +#include <sys/poll.h> #include <sys/priv.h> #include <sys/proc.h> #include <sys/procctl.h> @@ -89,6 +90,7 @@ __FBSDID("$FreeBSD$"); #include <machine/../linux/linux_proto.h> #endif +#include <compat/linux/linux_common.h> #include <compat/linux/linux_dtrace.h> #include <compat/linux/linux_file.h> #include <compat/linux/linux_mib.h> @@ -144,6 +146,10 @@ static int linux_common_pselect6(struct thread *, l_int, static int linux_common_ppoll(struct thread *, struct pollfd *, uint32_t, struct timespec *, l_sigset_t *, l_size_t); +static int linux_pollin(struct thread *, struct pollfd *, + struct pollfd *, u_int); +static int linux_pollout(struct thread *, struct pollfd *, + struct pollfd *, u_int); int linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) @@ -2528,11 +2534,15 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) { struct timespec ts0, ts1; + struct pollfd stackfds[32]; + struct pollfd *kfds; l_sigset_t l_ss; sigset_t *ssp; sigset_t ss; int error; + if (kern_poll_maxfds(nfds)) + return (EINVAL); if (sset != NULL) { if (ssize != sizeof(l_ss)) return (EINVAL); @@ -2546,7 +2556,17 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, if (tsp != NULL) nanotime(&ts0); - error = kern_poll(td, fds, nfds, tsp, ssp); + if (nfds > nitems(stackfds)) + kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); + else + kfds = stackfds; + error = linux_pollin(td, kfds, fds, nfds); + if (error != 0) + goto out; + + error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); + if (error == 0) + error = linux_pollout(td, kfds, fds, nfds); if (error == 0 && tsp != NULL) { if (td->td_retval[0]) { @@ -2558,6 +2578,10 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, } else timespecclear(tsp); } + +out: + if (nfds > nitems(stackfds)) + free(kfds, M_TEMP); return (error); } @@ -2592,6 +2616,48 @@ linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ +static int +linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) +{ + int error; + u_int i; + + error = copyin(ufds, fds, nfd * sizeof(*fds)); + if (error != 0) + return (error); + + for (i = 0; i < nfd; i++) { + if (fds->events != 0) + linux_to_bsd_poll_events(td, fds->fd, + fds->events, &fds->events); + fds++; + } + return (0); +} + +static int +linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) +{ + int error = 0; + u_int i, n = 0; + + for (i = 0; i < nfd; i++) { + if (fds->revents != 0) { + bsd_to_linux_poll_events(fds->revents, + &fds->revents); + n++; + } + error = copyout(&fds->revents, &ufds->revents, + sizeof(ufds->revents)); + if (error) + return (error); + fds++; + ufds++; + } + td->td_retval[0] = n; + return (0); +} + int linux_sched_rr_get_interval(struct thread *td, struct linux_sched_rr_get_interval_args *uap) diff --git a/sys/i386/linux/linux.h b/sys/i386/linux/linux.h index 1bb76d8e41d0..8dff1313c598 100644 --- a/sys/i386/linux/linux.h +++ b/sys/i386/linux/linux.h @@ -478,27 +478,6 @@ struct l_ifreq { #define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */ #define ifr_ifindex ifr_ifru.ifru_ivalue /* Interface index */ -/* - * poll() - */ -#define LINUX_POLLIN 0x0001 -#define LINUX_POLLPRI 0x0002 -#define LINUX_POLLOUT 0x0004 -#define LINUX_POLLERR 0x0008 -#define LINUX_POLLHUP 0x0010 -#define LINUX_POLLNVAL 0x0020 -#define LINUX_POLLRDNORM 0x0040 -#define LINUX_POLLRDBAND 0x0080 -#define LINUX_POLLWRNORM 0x0100 -#define LINUX_POLLWRBAND 0x0200 -#define LINUX_POLLMSG 0x0400 - -struct l_pollfd { - l_int fd; - l_short events; - l_short revents; -}; - struct l_user_desc { l_uint entry_number; l_uint base_addr;