git: af93fea71038 - main - timerfd: Move implementation from linux compat to sys/kern
- Reply: Konstantin Belousov : "Re: git: af93fea71038 - main - timerfd: Move implementation from linux compat to sys/kern"
- Reply: Dmitry Chagin : "Re: git: af93fea71038 - main - timerfd: Move implementation from linux compat to sys/kern"
- Reply: Alexey Dokuchaev : "Re: git: af93fea71038 - main - timerfd: Move implementation from linux compat to sys/kern"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Thu, 24 Aug 2023 20:29:48 UTC
The branch main has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=af93fea710385b2b11f0cabd377e7ed6f3d97c34 commit af93fea710385b2b11f0cabd377e7ed6f3d97c34 Author: Jake Freeland <jfree@freebsd.org> AuthorDate: 2023-08-24 04:39:54 +0000 Commit: Warner Losh <imp@FreeBSD.org> CommitDate: 2023-08-24 20:28:56 +0000 timerfd: Move implementation from linux compat to sys/kern Move the timerfd impelemntation from linux compat code to sys/kern. Use it to implement the new system calls for timerfd. Add a hook to kern_tc to allow timerfd to know when the system time has stepped. Add kqueue support to timerfd. Adjust a few names to be less Linux centric. RelNotes: YES Reviewed by: markj (on irc), imp, kib (with reservations), jhb (slack) Differential Revision: https://reviews.freebsd.org/D38459 --- lib/libc/sys/Symbol.map | 3 + sys/bsm/audit_kevents.h | 1 + sys/compat/freebsd32/freebsd32_proto.h | 14 + sys/compat/freebsd32/freebsd32_syscall.h | 5 +- sys/compat/freebsd32/freebsd32_syscalls.c | 3 + sys/compat/freebsd32/freebsd32_sysent.c | 3 + sys/compat/freebsd32/freebsd32_systrace_args.c | 86 ++++ sys/compat/linux/linux_event.c | 443 ++--------------- sys/compat/linux/linux_event.h | 11 - sys/conf/files | 1 + sys/kern/init_sysent.c | 3 + sys/kern/kern_descrip.c | 4 +- sys/kern/kern_tc.c | 2 + sys/kern/sys_timerfd.c | 632 +++++++++++++++++++++++++ sys/kern/syscalls.c | 3 + sys/kern/syscalls.master | 20 + sys/kern/systrace_args.c | 86 ++++ sys/sys/file.h | 2 +- sys/sys/syscall.h | 5 +- sys/sys/syscall.mk | 5 +- sys/sys/sysproto.h | 20 + sys/sys/timerfd.h | 66 +++ sys/sys/user.h | 6 + 23 files changed, 999 insertions(+), 425 deletions(-) diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index 9a07bb457eb8..7937661e3787 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -421,6 +421,9 @@ FBSD_1.7 { kqueuex; membarrier; swapoff; + timerfd_create; + timerfd_gettime; + timerfd_settime; }; FBSDprivate_1.0 { diff --git a/sys/bsm/audit_kevents.h b/sys/bsm/audit_kevents.h index a6b50a67ee6a..d06381837aad 100644 --- a/sys/bsm/audit_kevents.h +++ b/sys/bsm/audit_kevents.h @@ -661,6 +661,7 @@ #define AUE_AIO_WRITEV 43267 /* FreeBSD-specific. */ #define AUE_AIO_READV 43268 /* FreeBSD-specific. */ #define AUE_FSPACECTL 43269 /* FreeBSD-specific. */ +#define AUE_TIMERFD 43270 /* FreeBSD/Linux. */ /* * Darwin BSM uses a number of AUE_O_* definitions, which are aliased to the diff --git a/sys/compat/freebsd32/freebsd32_proto.h b/sys/compat/freebsd32/freebsd32_proto.h index bb333e0321a0..50448b6dce16 100644 --- a/sys/compat/freebsd32/freebsd32_proto.h +++ b/sys/compat/freebsd32/freebsd32_proto.h @@ -684,6 +684,16 @@ struct freebsd32_aio_writev_args { struct freebsd32_aio_readv_args { char aiocbp_l_[PADL_(struct aiocb32 *)]; struct aiocb32 * aiocbp; char aiocbp_r_[PADR_(struct aiocb32 *)]; }; +struct freebsd32_timerfd_gettime_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char curr_value_l_[PADL_(struct itimerspec32 *)]; struct itimerspec32 * curr_value; char curr_value_r_[PADR_(struct itimerspec32 *)]; +}; +struct freebsd32_timerfd_settime_args { + char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; + char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; + char new_value_l_[PADL_(const struct itimerspec32 *)]; const struct itimerspec32 * new_value; char new_value_r_[PADR_(const struct itimerspec32 *)]; + char old_value_l_[PADL_(struct itimerspec32 *)]; struct itimerspec32 * old_value; char old_value_r_[PADR_(struct itimerspec32 *)]; +}; int freebsd32_wait4(struct thread *, struct freebsd32_wait4_args *); int freebsd32_ptrace(struct thread *, struct freebsd32_ptrace_args *); int freebsd32_recvmsg(struct thread *, struct freebsd32_recvmsg_args *); @@ -799,6 +809,8 @@ int freebsd32_cpuset_setdomain(struct thread *, struct freebsd32_cpuset_setdomai int freebsd32___sysctlbyname(struct thread *, struct freebsd32___sysctlbyname_args *); int freebsd32_aio_writev(struct thread *, struct freebsd32_aio_writev_args *); int freebsd32_aio_readv(struct thread *, struct freebsd32_aio_readv_args *); +int freebsd32_timerfd_gettime(struct thread *, struct freebsd32_timerfd_gettime_args *); +int freebsd32_timerfd_settime(struct thread *, struct freebsd32_timerfd_settime_args *); #ifdef COMPAT_43 @@ -1292,6 +1304,8 @@ int freebsd11_freebsd32_fstatat(struct thread *, struct freebsd11_freebsd32_fsta #define FREEBSD32_SYS_AUE_freebsd32___sysctlbyname AUE_SYSCTL #define FREEBSD32_SYS_AUE_freebsd32_aio_writev AUE_AIO_WRITEV #define FREEBSD32_SYS_AUE_freebsd32_aio_readv AUE_AIO_READV +#define FREEBSD32_SYS_AUE_freebsd32_timerfd_gettime AUE_TIMERFD +#define FREEBSD32_SYS_AUE_freebsd32_timerfd_settime AUE_TIMERFD #undef PAD_ #undef PADL_ diff --git a/sys/compat/freebsd32/freebsd32_syscall.h b/sys/compat/freebsd32/freebsd32_syscall.h index c3d8617abf4b..e3777730be1c 100644 --- a/sys/compat/freebsd32/freebsd32_syscall.h +++ b/sys/compat/freebsd32/freebsd32_syscall.h @@ -502,4 +502,7 @@ #define FREEBSD32_SYS_swapoff 582 #define FREEBSD32_SYS_kqueuex 583 #define FREEBSD32_SYS_membarrier 584 -#define FREEBSD32_SYS_MAXSYSCALL 585 +#define FREEBSD32_SYS_timerfd_create 585 +#define FREEBSD32_SYS_freebsd32_timerfd_gettime 586 +#define FREEBSD32_SYS_freebsd32_timerfd_settime 587 +#define FREEBSD32_SYS_MAXSYSCALL 588 diff --git a/sys/compat/freebsd32/freebsd32_syscalls.c b/sys/compat/freebsd32/freebsd32_syscalls.c index 19d454743c55..ccc910ee5ca9 100644 --- a/sys/compat/freebsd32/freebsd32_syscalls.c +++ b/sys/compat/freebsd32/freebsd32_syscalls.c @@ -590,4 +590,7 @@ const char *freebsd32_syscallnames[] = { "swapoff", /* 582 = swapoff */ "kqueuex", /* 583 = kqueuex */ "membarrier", /* 584 = membarrier */ + "timerfd_create", /* 585 = timerfd_create */ + "freebsd32_timerfd_gettime", /* 586 = freebsd32_timerfd_gettime */ + "freebsd32_timerfd_settime", /* 587 = freebsd32_timerfd_settime */ }; diff --git a/sys/compat/freebsd32/freebsd32_sysent.c b/sys/compat/freebsd32/freebsd32_sysent.c index 971f06a643c5..fec6f4a47bd6 100644 --- a/sys/compat/freebsd32/freebsd32_sysent.c +++ b/sys/compat/freebsd32/freebsd32_sysent.c @@ -646,4 +646,7 @@ struct sysent freebsd32_sysent[] = { { .sy_narg = AS(swapoff_args), .sy_call = (sy_call_t *)sys_swapoff, .sy_auevent = AUE_SWAPOFF, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 582 = swapoff */ { .sy_narg = AS(kqueuex_args), .sy_call = (sy_call_t *)sys_kqueuex, .sy_auevent = AUE_KQUEUE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 583 = kqueuex */ { .sy_narg = AS(membarrier_args), .sy_call = (sy_call_t *)sys_membarrier, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 584 = membarrier */ + { .sy_narg = AS(timerfd_create_args), .sy_call = (sy_call_t *)sys_timerfd_create, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 585 = timerfd_create */ + { .sy_narg = AS(freebsd32_timerfd_gettime_args), .sy_call = (sy_call_t *)freebsd32_timerfd_gettime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 586 = freebsd32_timerfd_gettime */ + { .sy_narg = AS(freebsd32_timerfd_settime_args), .sy_call = (sy_call_t *)freebsd32_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 587 = freebsd32_timerfd_settime */ }; diff --git a/sys/compat/freebsd32/freebsd32_systrace_args.c b/sys/compat/freebsd32/freebsd32_systrace_args.c index 5dfc82c30b7b..2c26a0ddab2f 100644 --- a/sys/compat/freebsd32/freebsd32_systrace_args.c +++ b/sys/compat/freebsd32/freebsd32_systrace_args.c @@ -3336,6 +3336,32 @@ systrace_args(int sysnum, void *params, uint64_t *uarg, int *n_args) *n_args = 3; break; } + /* timerfd_create */ + case 585: { + struct timerfd_create_args *p = params; + iarg[a++] = p->clockid; /* int */ + iarg[a++] = p->flags; /* int */ + *n_args = 2; + break; + } + /* freebsd32_timerfd_gettime */ + case 586: { + struct freebsd32_timerfd_gettime_args *p = params; + iarg[a++] = p->fd; /* int */ + uarg[a++] = (intptr_t)p->curr_value; /* struct itimerspec32 * */ + *n_args = 2; + break; + } + /* freebsd32_timerfd_settime */ + case 587: { + struct freebsd32_timerfd_settime_args *p = params; + iarg[a++] = p->fd; /* int */ + iarg[a++] = p->flags; /* int */ + uarg[a++] = (intptr_t)p->new_value; /* const struct itimerspec32 * */ + uarg[a++] = (intptr_t)p->old_value; /* struct itimerspec32 * */ + *n_args = 4; + break; + } default: *n_args = 0; break; @@ -9005,6 +9031,51 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; }; break; + /* timerfd_create */ + case 585: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + default: + break; + }; + break; + /* freebsd32_timerfd_gettime */ + case 586: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "userland struct itimerspec32 *"; + break; + default: + break; + }; + break; + /* freebsd32_timerfd_settime */ + case 587: + switch (ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "userland const struct itimerspec32 *"; + break; + case 3: + p = "userland struct itimerspec32 *"; + break; + default: + break; + }; + break; default: break; }; @@ -10873,6 +10944,21 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) if (ndx == 0 || ndx == 1) p = "int"; break; + /* timerfd_create */ + case 585: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* freebsd32_timerfd_gettime */ + case 586: + if (ndx == 0 || ndx == 1) + p = "int"; + break; + /* freebsd32_timerfd_settime */ + case 587: + if (ndx == 0 || ndx == 1) + p = "int"; + break; default: break; }; diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c index a7db8516e5f0..816c68a90f1d 100644 --- a/sys/compat/linux/linux_event.c +++ b/sys/compat/linux/linux_event.c @@ -44,6 +44,7 @@ #include <sys/specialfd.h> #include <sys/sx.h> #include <sys/syscallsubr.h> +#include <sys/timerfd.h> #include <sys/timespec.h> #include <sys/user.h> @@ -99,55 +100,6 @@ struct epoll_copyout_args { int error; }; -/* timerfd */ -typedef uint64_t timerfd_t; - -static fo_rdwr_t timerfd_read; -static fo_ioctl_t timerfd_ioctl; -static fo_poll_t timerfd_poll; -static fo_kqfilter_t timerfd_kqfilter; -static fo_stat_t timerfd_stat; -static fo_close_t timerfd_close; -static fo_fill_kinfo_t timerfd_fill_kinfo; - -static struct fileops timerfdops = { - .fo_read = timerfd_read, - .fo_write = invfo_rdwr, - .fo_truncate = invfo_truncate, - .fo_ioctl = timerfd_ioctl, - .fo_poll = timerfd_poll, - .fo_kqfilter = timerfd_kqfilter, - .fo_stat = timerfd_stat, - .fo_close = timerfd_close, - .fo_chmod = invfo_chmod, - .fo_chown = invfo_chown, - .fo_sendfile = invfo_sendfile, - .fo_fill_kinfo = timerfd_fill_kinfo, - .fo_flags = DFLAG_PASSABLE -}; - -static void filt_timerfddetach(struct knote *kn); -static int filt_timerfdread(struct knote *kn, long hint); - -static struct filterops timerfd_rfiltops = { - .f_isfd = 1, - .f_detach = filt_timerfddetach, - .f_event = filt_timerfdread -}; - -struct timerfd { - clockid_t tfd_clockid; - struct itimerspec tfd_time; - struct callout tfd_callout; - timerfd_t tfd_count; - bool tfd_canceled; - struct selinfo tfd_sel; - struct mtx tfd_lock; -}; - -static void linux_timerfd_expire(void *); -static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); - static int epoll_create_common(struct thread *td, int flags) { @@ -658,255 +610,14 @@ linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) int linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args) { - struct timerfd *tfd; - struct file *fp; clockid_t clockid; - int fflags, fd, error; - - if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0) - return (EINVAL); - - error = linux_to_native_clockid(&clockid, args->clockid); - if (error != 0) - return (error); - if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) - return (EINVAL); - - fflags = 0; - if ((args->flags & LINUX_TFD_CLOEXEC) != 0) - fflags |= O_CLOEXEC; - - error = falloc(td, &fp, &fd, fflags); - if (error != 0) - return (error); - - tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO); - tfd->tfd_clockid = clockid; - mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); - - callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); - knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); - - fflags = FREAD; - if ((args->flags & LINUX_O_NONBLOCK) != 0) - fflags |= FNONBLOCK; - - finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops); - fdrop(fp, td); - - td->td_retval[0] = fd; - return (error); -} - -static int -timerfd_close(struct file *fp, struct thread *td) -{ - struct timerfd *tfd; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (EINVAL); - - timespecclear(&tfd->tfd_time.it_value); - timespecclear(&tfd->tfd_time.it_interval); - - callout_drain(&tfd->tfd_callout); - - seldrain(&tfd->tfd_sel); - knlist_destroy(&tfd->tfd_sel.si_note); - - fp->f_ops = &badfileops; - mtx_destroy(&tfd->tfd_lock); - free(tfd, M_EPOLL); - - return (0); -} - -static int -timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ - struct timerfd *tfd; - timerfd_t count; - int error; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (EINVAL); - - if (uio->uio_resid < sizeof(timerfd_t)) - return (EINVAL); - - error = 0; - mtx_lock(&tfd->tfd_lock); -retry: - if (tfd->tfd_canceled) { - tfd->tfd_count = 0; - mtx_unlock(&tfd->tfd_lock); - return (ECANCELED); - } - if (tfd->tfd_count == 0) { - if ((fp->f_flag & FNONBLOCK) != 0) { - mtx_unlock(&tfd->tfd_lock); - return (EAGAIN); - } - error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); - if (error == 0) - goto retry; - } - if (error == 0) { - count = tfd->tfd_count; - tfd->tfd_count = 0; - mtx_unlock(&tfd->tfd_lock); - error = uiomove(&count, sizeof(timerfd_t), uio); - } else - mtx_unlock(&tfd->tfd_lock); - - return (error); -} - -static int -timerfd_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td) -{ - struct timerfd *tfd; - int revents = 0; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (POLLERR); - - mtx_lock(&tfd->tfd_lock); - if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0) - revents |= events & (POLLIN|POLLRDNORM); - if (revents == 0) - selrecord(td, &tfd->tfd_sel); - mtx_unlock(&tfd->tfd_lock); - - return (revents); -} - -static int -timerfd_kqfilter(struct file *fp, struct knote *kn) -{ - struct timerfd *tfd; - - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) - return (EINVAL); - - if (kn->kn_filter == EVFILT_READ) - kn->kn_fop = &timerfd_rfiltops; - else - return (EINVAL); - - kn->kn_hook = tfd; - knlist_add(&tfd->tfd_sel.si_note, kn, 0); - - return (0); -} - -static void -filt_timerfddetach(struct knote *kn) -{ - struct timerfd *tfd = kn->kn_hook; - - mtx_lock(&tfd->tfd_lock); - knlist_remove(&tfd->tfd_sel.si_note, kn, 1); - mtx_unlock(&tfd->tfd_lock); -} - -static int -filt_timerfdread(struct knote *kn, long hint) -{ - struct timerfd *tfd = kn->kn_hook; - - return (tfd->tfd_count > 0); -} - -static int -timerfd_ioctl(struct file *fp, u_long cmd, void *data, - struct ucred *active_cred, struct thread *td) -{ - - if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) - return (EINVAL); - - switch (cmd) { - case FIONBIO: - case FIOASYNC: - return (0); - } - - return (ENOTTY); -} - -static int -timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) -{ - - return (ENXIO); -} - -static int -timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) -{ - - kif->kf_type = KF_TYPE_UNKNOWN; - return (0); -} - -static void -linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) -{ - - if (tfd->tfd_clockid == CLOCK_REALTIME) - getnanotime(ts); - else /* CLOCK_MONOTONIC */ - getnanouptime(ts); -} - -static void -linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots) -{ - struct timespec cts; - - linux_timerfd_clocktime(tfd, &cts); - *ots = tfd->tfd_time; - if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) { - timespecsub(&ots->it_value, &cts, &ots->it_value); - if (ots->it_value.tv_sec < 0 || - (ots->it_value.tv_sec == 0 && - ots->it_value.tv_nsec == 0)) { - ots->it_value.tv_sec = 0; - ots->it_value.tv_nsec = 1; - } - } -} - -static int -linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots) -{ - struct timerfd *tfd; - struct file *fp; int error; - error = fget(td, fd, &cap_read_rights, &fp); + error = linux_to_native_clockid(&clockid, args->clockid); if (error != 0) return (error); - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { - error = EINVAL; - goto out; - } - - mtx_lock(&tfd->tfd_lock); - linux_timerfd_curval(tfd, ots); - mtx_unlock(&tfd->tfd_lock); -out: - fdrop(fp, td); - return (error); + return (kern_timerfd_create(td, clockid, args->flags)); } int @@ -916,84 +627,14 @@ linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args struct itimerspec ots; int error; - error = linux_timerfd_gettime_common(td, args->fd, &ots); + error = kern_timerfd_gettime(td, args->fd, &ots); if (error != 0) return (error); - error = native_to_linux_itimerspec(&lots, &ots); - if (error == 0) - error = copyout(&lots, args->old_value, sizeof(lots)); - return (error); -} - -#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) -int -linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args) -{ - struct l_itimerspec64 lots; - struct itimerspec ots; - int error; - error = linux_timerfd_gettime_common(td, args->fd, &ots); - if (error != 0) - return (error); - error = native_to_linux_itimerspec64(&lots, &ots); + error = native_to_linux_itimerspec(&lots, &ots); if (error == 0) error = copyout(&lots, args->old_value, sizeof(lots)); - return (error); -} -#endif - -static int -linux_timerfd_settime_common(struct thread *td, int fd, int flags, - struct itimerspec *nts, struct itimerspec *oval) -{ - struct timespec cts, ts; - struct timerfd *tfd; - struct timeval tv; - struct file *fp; - int error; - - if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0) - return (EINVAL); - - error = fget(td, fd, &cap_write_rights, &fp); - if (error != 0) - return (error); - tfd = fp->f_data; - if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { - error = EINVAL; - goto out; - } - - mtx_lock(&tfd->tfd_lock); - if (!timespecisset(&nts->it_value)) - timespecclear(&nts->it_interval); - if (oval != NULL) - linux_timerfd_curval(tfd, oval); - - bcopy(nts, &tfd->tfd_time, sizeof(*nts)); - tfd->tfd_count = 0; - if (timespecisset(&nts->it_value)) { - linux_timerfd_clocktime(tfd, &cts); - ts = nts->it_value; - if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) { - timespecadd(&tfd->tfd_time.it_value, &cts, - &tfd->tfd_time.it_value); - } else { - timespecsub(&ts, &cts, &ts); - } - TIMESPEC_TO_TIMEVAL(&tv, &ts); - callout_reset(&tfd->tfd_callout, tvtohz(&tv), - linux_timerfd_expire, tfd); - tfd->tfd_canceled = false; - } else { - tfd->tfd_canceled = true; - callout_stop(&tfd->tfd_callout); - } - mtx_unlock(&tfd->tfd_lock); -out: - fdrop(fp, td); return (error); } @@ -1001,7 +642,7 @@ int linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args) { struct l_itimerspec lots; - struct itimerspec nts, ots, *pots; + struct itimerspec nts, ots; int error; error = copyin(args->new_value, &lots, sizeof(lots)); @@ -1010,23 +651,43 @@ linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args error = linux_to_native_itimerspec(&nts, &lots); if (error != 0) return (error); - pots = (args->old_value != NULL ? &ots : NULL); - error = linux_timerfd_settime_common(td, args->fd, args->flags, - &nts, pots); + if (args->old_value == NULL) + error = kern_timerfd_settime(td, args->fd, args->flags, &nts, NULL); + else + error = kern_timerfd_settime(td, args->fd, args->flags, &nts, &ots); if (error == 0 && args->old_value != NULL) { error = native_to_linux_itimerspec(&lots, &ots); if (error == 0) error = copyout(&lots, args->old_value, sizeof(lots)); } + return (error); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) +int +linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args) +{ + struct l_itimerspec64 lots; + struct itimerspec ots; + int error; + + error = kern_timerfd_gettime(td, args->fd, &ots); + if (error != 0) + return (error); + + error = native_to_linux_itimerspec64(&lots, &ots); + if (error == 0) + error = copyout(&lots, args->old_value, sizeof(lots)); + + return (error); +} + int linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *args) { struct l_itimerspec64 lots; - struct itimerspec nts, ots, *pots; + struct itimerspec nts, ots; int error; error = copyin(args->new_value, &lots, sizeof(lots)); @@ -1035,50 +696,16 @@ linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args * error = linux_to_native_itimerspec64(&nts, &lots); if (error != 0) return (error); - pots = (args->old_value != NULL ? &ots : NULL); - error = linux_timerfd_settime_common(td, args->fd, args->flags, - &nts, pots); + if (args->old_value == NULL) + error = kern_timerfd_settime(td, args->fd, args->flags, &nts, NULL); + else + error = kern_timerfd_settime(td, args->fd, args->flags, &nts, &ots); if (error == 0 && args->old_value != NULL) { error = native_to_linux_itimerspec64(&lots, &ots); if (error == 0) error = copyout(&lots, args->old_value, sizeof(lots)); } + return (error); } #endif - -static void -linux_timerfd_expire(void *arg) -{ - struct timespec cts, ts; - struct timeval tv; - struct timerfd *tfd; - - tfd = (struct timerfd *)arg; - - linux_timerfd_clocktime(tfd, &cts); - if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) { - if (timespecisset(&tfd->tfd_time.it_interval)) - timespecadd(&tfd->tfd_time.it_value, - &tfd->tfd_time.it_interval, - &tfd->tfd_time.it_value); - else - /* single shot timer */ - timespecclear(&tfd->tfd_time.it_value); - if (timespecisset(&tfd->tfd_time.it_value)) { - timespecsub(&tfd->tfd_time.it_value, &cts, &ts); - TIMESPEC_TO_TIMEVAL(&tv, &ts); - callout_reset(&tfd->tfd_callout, tvtohz(&tv), - linux_timerfd_expire, tfd); - } - tfd->tfd_count++; - KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); - selwakeup(&tfd->tfd_sel); - wakeup(&tfd->tfd_count); - } else if (timespecisset(&tfd->tfd_time.it_value)) { - timespecsub(&tfd->tfd_time.it_value, &cts, &ts); - TIMESPEC_TO_TIMEVAL(&tv, &ts); - callout_reset(&tfd->tfd_callout, tvtohz(&tv), - linux_timerfd_expire, tfd); - } -} diff --git a/sys/compat/linux/linux_event.h b/sys/compat/linux/linux_event.h index 32269b0070bc..fa63371b5170 100644 --- a/sys/compat/linux/linux_event.h +++ b/sys/compat/linux/linux_event.h @@ -54,15 +54,4 @@ #define LINUX_EFD_SEMAPHORE (1 << 0) -#define LINUX_TFD_TIMER_ABSTIME (1 << 0) -#define LINUX_TFD_TIMER_CANCEL_ON_SET (1 << 1) -#define LINUX_TFD_CLOEXEC LINUX_O_CLOEXEC -#define LINUX_TFD_NONBLOCK LINUX_O_NONBLOCK - -#define LINUX_TFD_SHARED_FCNTL_FLAGS (LINUX_TFD_CLOEXEC \ - |LINUX_TFD_NONBLOCK) -#define LINUX_TFD_CREATE_FLAGS LINUX_TFD_SHARED_FCNTL_FLAGS -#define LINUX_TFD_SETTIME_FLAGS (LINUX_TFD_TIMER_ABSTIME \ - |LINUX_TFD_TIMER_CANCEL_ON_SET) - #endif /* !_LINUX_EVENT_H_ */ diff --git a/sys/conf/files b/sys/conf/files index 3f79ce752c80..8d38b9cc8a2e 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3908,6 +3908,7 @@ kern/sys_pipe.c standard kern/sys_procdesc.c standard kern/sys_process.c standard kern/sys_socket.c standard +kern/sys_timerfd.c standard kern/syscalls.c standard kern/sysv_ipc.c standard kern/sysv_msg.c optional sysvmsg diff --git a/sys/kern/init_sysent.c b/sys/kern/init_sysent.c index 1e62c46b8be0..d44fec54fcd7 100644 --- a/sys/kern/init_sysent.c +++ b/sys/kern/init_sysent.c @@ -645,4 +645,7 @@ struct sysent sysent[] = { { .sy_narg = AS(swapoff_args), .sy_call = (sy_call_t *)sys_swapoff, .sy_auevent = AUE_SWAPOFF, .sy_flags = 0, .sy_thrcnt = SY_THR_STATIC }, /* 582 = swapoff */ { .sy_narg = AS(kqueuex_args), .sy_call = (sy_call_t *)sys_kqueuex, .sy_auevent = AUE_KQUEUE, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 583 = kqueuex */ { .sy_narg = AS(membarrier_args), .sy_call = (sy_call_t *)sys_membarrier, .sy_auevent = AUE_NULL, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 584 = membarrier */ + { .sy_narg = AS(timerfd_create_args), .sy_call = (sy_call_t *)sys_timerfd_create, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 585 = timerfd_create */ + { .sy_narg = AS(timerfd_gettime_args), .sy_call = (sy_call_t *)sys_timerfd_gettime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 586 = timerfd_gettime */ + { .sy_narg = AS(timerfd_settime_args), .sy_call = (sy_call_t *)sys_timerfd_settime, .sy_auevent = AUE_TIMERFD, .sy_flags = SYF_CAPENABLED, .sy_thrcnt = SY_THR_STATIC }, /* 587 = timerfd_settime */ }; diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index c5226288afc5..35046c856d54 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -5001,8 +5001,8 @@ file_type_to_name(short type) return ("proc"); case DTYPE_EVENTFD: return ("eventfd"); - case DTYPE_LINUXTFD: - return ("ltimer"); + case DTYPE_TIMERFD: + return ("timerfd"); default: return ("unkn"); } diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index 170f35830923..26f09cb60260 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -34,6 +34,7 @@ #include <sys/systm.h> #include <sys/timeffc.h> #include <sys/timepps.h> +#include <sys/timerfd.h> #include <sys/timetc.h> #include <sys/timex.h> #include <sys/vdso.h> @@ -1305,6 +1306,7 @@ tc_setclock(struct timespec *ts) /* Avoid rtc_generation == 0, since td_rtcgen == 0 is special. */ atomic_add_rel_int(&rtc_generation, 2); + timerfd_jumped(); sleepq_chains_remove_matching(sleeping_on_old_rtc); if (timestepwarnings) { nanotime(&taft); diff --git a/sys/kern/sys_timerfd.c b/sys/kern/sys_timerfd.c new file mode 100644 index 000000000000..6948fa059b8c --- /dev/null +++ b/sys/kern/sys_timerfd.c @@ -0,0 +1,632 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> + * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/callout.h> +#include <sys/fcntl.h> +#include <sys/file.h> +#include <sys/filedesc.h> +#include <sys/filio.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/poll.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/selinfo.h> +#include <sys/stat.h> +#include <sys/sysctl.h> +#include <sys/sysent.h> +#include <sys/sysproto.h> +#include <sys/timerfd.h> +#include <sys/timespec.h> +#include <sys/uio.h> +#include <sys/user.h> + +#include <security/audit/audit.h> + +#ifdef COMPAT_FREEBSD32 +#include <compat/freebsd32/freebsd32.h> +#include <compat/freebsd32/freebsd32_proto.h> +#endif + +static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures"); +static LIST_HEAD(, timerfd) timerfd_head; +static struct unrhdr64 tfdino_unr; + +#define TFD_NOJUMP 0 /* Realtime clock has not jumped. */ +#define TFD_READ 1 /* Jumped, tfd has been read since. */ +#define TFD_ZREAD 2 /* Jumped backwards, CANCEL_ON_SET=false. */ +#define TFD_CANCELED 4 /* Jumped, CANCEL_ON_SET=true. */ +#define TFD_JUMPED (TFD_ZREAD | TFD_CANCELED) + +struct timerfd { + /* User specified. */ + struct itimerspec tfd_time; /* tfd timer */ + clockid_t tfd_clockid; /* timing base */ + int tfd_flags; /* creation flags */ + int tfd_timflags; /* timer flags */ + + /* Used internally. */ + timerfd_t tfd_count; /* expiration count since last read */ + bool tfd_expired; /* true upon initial expiration */ + struct mtx tfd_lock; /* mtx lock */ + struct callout tfd_callout; /* expiration notification */ + struct selinfo tfd_sel; /* I/O alerts */ + struct timespec tfd_boottim; /* cached boottime */ + int tfd_jumped; /* timer jump status */ + LIST_ENTRY(timerfd) entry; /* entry in list */ + + /* For stat(2). */ + ino_t tfd_ino; /* inode number */ + struct timespec tfd_atim; /* time of last read */ + struct timespec tfd_mtim; /* time of last settime */ + struct timespec tfd_birthtim; /* creation time */ +}; + +static void +timerfd_init(void *data) +{ + new_unrhdr64(&tfdino_unr, 1); +} + +SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL); + +static inline void +timerfd_getboottime(struct timespec *ts) +{ + struct timeval tv; + getboottime(&tv); + TIMEVAL_TO_TIMESPEC(&tv, ts); +} + +/* + * Call when a discontinuous jump has occured in CLOCK_REALTIME and + * update timerfd's cached boottime. A jump can be triggered using + * functions like clock_settime(2) or settimeofday(2). + * + * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set *** 850 LINES SKIPPED ***