git: 26795a0378b5 - main - linux(4): Rework Linux ppoll system call.
Dmitry Chagin
dchagin at FreeBSD.org
Mon Jun 21 16:30:05 UTC 2021
The branch main has been updated by dchagin:
URL: https://cgit.FreeBSD.org/src/commit/?id=26795a0378b58c3e26b68577a4cc446ab527e8b5
commit 26795a0378b58c3e26b68577a4cc446ab527e8b5
Author: Dmitry Chagin <dchagin at FreeBSD.org>
AuthorDate: 2021-06-22 05:06:05 +0000
Commit: Dmitry Chagin <dchagin at FreeBSD.org>
CommitDate: 2021-06-22 05:06:05 +0000
linux(4): Rework Linux ppoll system call.
For now the Linux emulation layer uses in kernel ppoll(2) without
conversion of user supplied fd 'events', and does not convert the
kernel supplied fd 'revents'.
At least POLLRDHUP is handled by FreeBSD differently than by
Linux. Seems that Linux silencly ignores POLLRDHUP on non socket fd's
unlike FreeBSD, which does more strictly check and fails.
Rework the Linux ppoll, using kern_poll and converting 'events'
and 'revents' values.
While here, move poll events defines to the MI part of code as they
mostly identical on all arches except arm.
Differential Revision: https://reviews.freebsd.org/D30716
MFC after: 2 weeks
---
sys/amd64/linux/linux.h | 21 ---------
sys/amd64/linux32/linux.h | 21 ---------
sys/compat/linux/linux.c | 96 +++++++++++++++++++++++++++++++++++++++++
sys/compat/linux/linux.h | 17 ++++++++
sys/compat/linux/linux_common.h | 4 ++
sys/compat/linux/linux_misc.c | 68 ++++++++++++++++++++++++++++-
sys/i386/linux/linux.h | 21 ---------
7 files changed, 184 insertions(+), 64 deletions(-)
diff --git a/sys/amd64/linux/linux.h b/sys/amd64/linux/linux.h
index a9ed66689b64..4e736cc11c22 100644
--- a/sys/amd64/linux/linux.h
+++ b/sys/amd64/linux/linux.h
@@ -407,27 +407,6 @@ struct l_ifconf {
#define ifc_buf ifc_ifcu.ifcu_buf
#define ifc_req ifc_ifcu.ifcu_req
-/*
- * poll()
- */
-#define LINUX_POLLIN 0x0001
-#define LINUX_POLLPRI 0x0002
-#define LINUX_POLLOUT 0x0004
-#define LINUX_POLLERR 0x0008
-#define LINUX_POLLHUP 0x0010
-#define LINUX_POLLNVAL 0x0020
-#define LINUX_POLLRDNORM 0x0040
-#define LINUX_POLLRDBAND 0x0080
-#define LINUX_POLLWRNORM 0x0100
-#define LINUX_POLLWRBAND 0x0200
-#define LINUX_POLLMSG 0x0400
-
-struct l_pollfd {
- l_int fd;
- l_short events;
- l_short revents;
-};
-
#define LINUX_ARCH_SET_GS 0x1001
#define LINUX_ARCH_SET_FS 0x1002
#define LINUX_ARCH_GET_FS 0x1003
diff --git a/sys/amd64/linux32/linux.h b/sys/amd64/linux32/linux.h
index 50a4efed1709..a95545619640 100644
--- a/sys/amd64/linux32/linux.h
+++ b/sys/amd64/linux32/linux.h
@@ -515,27 +515,6 @@ struct l_ifconf {
#define ifc_buf ifc_ifcu.ifcu_buf
#define ifc_req ifc_ifcu.ifcu_req
-/*
- * poll()
- */
-#define LINUX_POLLIN 0x0001
-#define LINUX_POLLPRI 0x0002
-#define LINUX_POLLOUT 0x0004
-#define LINUX_POLLERR 0x0008
-#define LINUX_POLLHUP 0x0010
-#define LINUX_POLLNVAL 0x0020
-#define LINUX_POLLRDNORM 0x0040
-#define LINUX_POLLRDBAND 0x0080
-#define LINUX_POLLWRNORM 0x0100
-#define LINUX_POLLWRBAND 0x0200
-#define LINUX_POLLMSG 0x0400
-
-struct l_pollfd {
- l_int fd;
- l_short events;
- l_short revents;
-};
-
struct l_user_desc {
l_uint entry_number;
l_uint base_addr;
diff --git a/sys/compat/linux/linux.c b/sys/compat/linux/linux.c
index a8c5e2baddc4..350d2c1abaf9 100644
--- a/sys/compat/linux/linux.c
+++ b/sys/compat/linux/linux.c
@@ -33,9 +33,13 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/conf.h>
#include <sys/ctype.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
#include <sys/jail.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
@@ -50,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux.h>
#include <compat/linux/linux_common.h>
+#include <compat/linux/linux_mib.h>
#include <compat/linux/linux_util.h>
struct futex_list futex_list;
@@ -627,3 +632,94 @@ linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
return (no_value);
return (bsd_ret);
}
+
+void
+linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
+ short *bev)
+{
+ struct proc *p = td->td_proc;
+ struct filedesc *fdp;
+ struct file *fp;
+ int error;
+ short bits = 0;
+
+ if (lev & LINUX_POLLIN)
+ bits |= POLLIN;
+ if (lev & LINUX_POLLPRI)
+ bits |= POLLPRI;
+ if (lev & LINUX_POLLOUT)
+ bits |= POLLOUT;
+ if (lev & LINUX_POLLERR)
+ bits |= POLLERR;
+ if (lev & LINUX_POLLHUP)
+ bits |= POLLHUP;
+ if (lev & LINUX_POLLNVAL)
+ bits |= POLLNVAL;
+ if (lev & LINUX_POLLRDNORM)
+ bits |= POLLRDNORM;
+ if (lev & LINUX_POLLRDBAND)
+ bits |= POLLRDBAND;
+ if (lev & LINUX_POLLWRBAND)
+ bits |= POLLWRBAND;
+ if (lev & LINUX_POLLWRNORM)
+ bits |= POLLWRNORM;
+
+ if (lev & LINUX_POLLRDHUP) {
+ /*
+ * It seems that the Linux silencly ignores POLLRDHUP
+ * on non-socket file descriptors unlike FreeBSD, where
+ * events bits is more strictly checked (POLLSTANDARD).
+ */
+ fdp = p->p_fd;
+ error = fget_unlocked(fdp, fd, &cap_no_rights, &fp);
+ if (error == 0) {
+ /*
+ * XXX. On FreeBSD POLLRDHUP applies only to
+ * stream sockets.
+ */
+ if (fp->f_type == DTYPE_SOCKET)
+ bits |= POLLRDHUP;
+ fdrop(fp, td);
+ }
+ }
+
+ if (lev & LINUX_POLLMSG)
+ LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
+ if (lev & LINUX_POLLREMOVE)
+ LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
+
+ *bev = bits;
+}
+
+void
+bsd_to_linux_poll_events(short bev, short *lev)
+{
+ short bits = 0;
+
+ if (bev & POLLIN)
+ bits |= LINUX_POLLIN;
+ if (bev & POLLPRI)
+ bits |= LINUX_POLLPRI;
+ if (bev & (POLLOUT | POLLWRNORM))
+ /*
+ * POLLWRNORM is equal to POLLOUT on FreeBSD,
+ * but not on Linux
+ */
+ bits |= LINUX_POLLOUT;
+ if (bev & POLLERR)
+ bits |= LINUX_POLLERR;
+ if (bev & POLLHUP)
+ bits |= LINUX_POLLHUP;
+ if (bev & POLLNVAL)
+ bits |= LINUX_POLLNVAL;
+ if (bev & POLLRDNORM)
+ bits |= LINUX_POLLRDNORM;
+ if (bev & POLLRDBAND)
+ bits |= LINUX_POLLRDBAND;
+ if (bev & POLLWRBAND)
+ bits |= LINUX_POLLWRBAND;
+ if (bev & POLLRDHUP)
+ bits |= LINUX_POLLRDHUP;
+
+ *lev = bits;
+}
diff --git a/sys/compat/linux/linux.h b/sys/compat/linux/linux.h
index 18eafa88a432..9b75e6032445 100644
--- a/sys/compat/linux/linux.h
+++ b/sys/compat/linux/linux.h
@@ -31,6 +31,23 @@
#include <sys/queue.h>
+/*
+ * poll()
+ */
+#define LINUX_POLLIN 0x0001
+#define LINUX_POLLPRI 0x0002
+#define LINUX_POLLOUT 0x0004
+#define LINUX_POLLERR 0x0008
+#define LINUX_POLLHUP 0x0010
+#define LINUX_POLLNVAL 0x0020
+#define LINUX_POLLRDNORM 0x0040
+#define LINUX_POLLRDBAND 0x0080
+#define LINUX_POLLWRNORM 0x0100
+#define LINUX_POLLWRBAND 0x0200
+#define LINUX_POLLMSG 0x0400
+#define LINUX_POLLREMOVE 0x1000
+#define LINUX_POLLRDHUP 0x2000
+
#define LINUX_IFHWADDRLEN 6
#define LINUX_IFNAMSIZ 16
diff --git a/sys/compat/linux/linux_common.h b/sys/compat/linux/linux_common.h
index a306bb1eb859..b0e3408e42df 100644
--- a/sys/compat/linux/linux_common.h
+++ b/sys/compat/linux/linux_common.h
@@ -41,5 +41,9 @@ int bsd_to_linux_sockaddr(const struct sockaddr *sa,
struct l_sockaddr **lsa, socklen_t len);
int linux_to_bsd_sockaddr(const struct l_sockaddr *lsa,
struct sockaddr **sap, socklen_t *len);
+void linux_to_bsd_poll_events(struct thread *td, int fd,
+ short lev, short *bev);
+void bsd_to_linux_poll_events(short bev, short *lev);
+
#endif /* _LINUX_COMMON_H_ */
diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c
index 53dd27dc23db..80458364017f 100644
--- a/sys/compat/linux/linux_misc.c
+++ b/sys/compat/linux/linux_misc.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/msgbuf.h>
#include <sys/mutex.h>
#include <sys/namei.h>
+#include <sys/poll.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/procctl.h>
@@ -89,6 +90,7 @@ __FBSDID("$FreeBSD$");
#include <machine/../linux/linux_proto.h>
#endif
+#include <compat/linux/linux_common.h>
#include <compat/linux/linux_dtrace.h>
#include <compat/linux/linux_file.h>
#include <compat/linux/linux_mib.h>
@@ -144,6 +146,10 @@ static int linux_common_pselect6(struct thread *, l_int,
static int linux_common_ppoll(struct thread *, struct pollfd *,
uint32_t, struct timespec *, l_sigset_t *,
l_size_t);
+static int linux_pollin(struct thread *, struct pollfd *,
+ struct pollfd *, u_int);
+static int linux_pollout(struct thread *, struct pollfd *,
+ struct pollfd *, u_int);
int
linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args)
@@ -2519,11 +2525,15 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds,
struct timespec *tsp, l_sigset_t *sset, l_size_t ssize)
{
struct timespec ts0, ts1;
+ struct pollfd stackfds[32];
+ struct pollfd *kfds;
l_sigset_t l_ss;
sigset_t *ssp;
sigset_t ss;
int error;
+ if (kern_poll_maxfds(nfds))
+ return (EINVAL);
if (sset != NULL) {
if (ssize != sizeof(l_ss))
return (EINVAL);
@@ -2537,7 +2547,17 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds,
if (tsp != NULL)
nanotime(&ts0);
- error = kern_poll(td, fds, nfds, tsp, ssp);
+ if (nfds > nitems(stackfds))
+ kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK);
+ else
+ kfds = stackfds;
+ error = linux_pollin(td, kfds, fds, nfds);
+ if (error != 0)
+ goto out;
+
+ error = kern_poll_kfds(td, kfds, nfds, tsp, ssp);
+ if (error == 0)
+ error = linux_pollout(td, kfds, fds, nfds);
if (error == 0 && tsp != NULL) {
if (td->td_retval[0]) {
@@ -2549,6 +2569,10 @@ linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds,
} else
timespecclear(tsp);
}
+
+out:
+ if (nfds > nitems(stackfds))
+ free(kfds, M_TEMP);
return (error);
}
@@ -2583,6 +2607,48 @@ linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args)
}
#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
+static int
+linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
+{
+ int error;
+ u_int i;
+
+ error = copyin(ufds, fds, nfd * sizeof(*fds));
+ if (error != 0)
+ return (error);
+
+ for (i = 0; i < nfd; i++) {
+ if (fds->events != 0)
+ linux_to_bsd_poll_events(td, fds->fd,
+ fds->events, &fds->events);
+ fds++;
+ }
+ return (0);
+}
+
+static int
+linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd)
+{
+ int error = 0;
+ u_int i, n = 0;
+
+ for (i = 0; i < nfd; i++) {
+ if (fds->revents != 0) {
+ bsd_to_linux_poll_events(fds->revents,
+ &fds->revents);
+ n++;
+ }
+ error = copyout(&fds->revents, &ufds->revents,
+ sizeof(ufds->revents));
+ if (error)
+ return (error);
+ fds++;
+ ufds++;
+ }
+ td->td_retval[0] = n;
+ return (0);
+}
+
int
linux_sched_rr_get_interval(struct thread *td,
struct linux_sched_rr_get_interval_args *uap)
diff --git a/sys/i386/linux/linux.h b/sys/i386/linux/linux.h
index 1bb76d8e41d0..8dff1313c598 100644
--- a/sys/i386/linux/linux.h
+++ b/sys/i386/linux/linux.h
@@ -478,27 +478,6 @@ struct l_ifreq {
#define ifr_hwaddr ifr_ifru.ifru_hwaddr /* MAC address */
#define ifr_ifindex ifr_ifru.ifru_ivalue /* Interface index */
-/*
- * poll()
- */
-#define LINUX_POLLIN 0x0001
-#define LINUX_POLLPRI 0x0002
-#define LINUX_POLLOUT 0x0004
-#define LINUX_POLLERR 0x0008
-#define LINUX_POLLHUP 0x0010
-#define LINUX_POLLNVAL 0x0020
-#define LINUX_POLLRDNORM 0x0040
-#define LINUX_POLLRDBAND 0x0080
-#define LINUX_POLLWRNORM 0x0100
-#define LINUX_POLLWRBAND 0x0200
-#define LINUX_POLLMSG 0x0400
-
-struct l_pollfd {
- l_int fd;
- l_short events;
- l_short revents;
-};
-
struct l_user_desc {
l_uint entry_number;
l_uint base_addr;
More information about the dev-commits-src-main
mailing list