svn commit: r255672 - in head/sys: amd64/linux32 compat/linux conf i386/linux kern modules/linux sys
Adrian Chadd
adrian at freebsd.org
Fri May 2 18:49:31 UTC 2014
Hi,
why not just extend the kqueue data fields to 64 bits and leave the
freebsd API only copy 32 bits in?
-a
On 18 September 2013 10:56, Roman Divacky <rdivacky at freebsd.org> wrote:
> Author: rdivacky
> Date: Wed Sep 18 17:56:04 2013
> New Revision: 255672
> URL: http://svnweb.freebsd.org/changeset/base/255672
>
> Log:
> Implement epoll support in Linuxulator. This is a tiny wrapper around kqueue
> to implement epoll subset of functionality. The kqueue user data are 32bit
> on i386 which is not enough for epoll user data so this patch overrides
> kqueue fileops to maintain enough space in struct file.
>
> Initial patch developed by me in 2007 and then extended and finished
> by Yuri Victorovich.
>
> Approved by: re (delphij)
> Sponsored by: Google Summer of Code
> Submitted by: Yuri Victorovich <yuri at rawbw dot com>
> Tested by: Yuri Victorovich <yuri at rawbw dot com>
>
> Added:
> head/sys/compat/linux/linux_epoll.c (contents, props changed)
> head/sys/compat/linux/linux_epoll.h (contents, props changed)
> Modified:
> head/sys/amd64/linux32/linux32_dummy.c
> head/sys/amd64/linux32/syscalls.master
> head/sys/conf/files.amd64
> head/sys/conf/files.i386
> head/sys/conf/files.pc98
> head/sys/i386/linux/linux_dummy.c
> head/sys/i386/linux/syscalls.master
> head/sys/kern/kern_event.c
> head/sys/modules/linux/Makefile
> head/sys/sys/event.h
> head/sys/sys/file.h
> head/sys/sys/syscallsubr.h
>
> Modified: head/sys/amd64/linux32/linux32_dummy.c
> ==============================================================================
> --- head/sys/amd64/linux32/linux32_dummy.c Wed Sep 18 17:28:19 2013 (r255671)
> +++ head/sys/amd64/linux32/linux32_dummy.c Wed Sep 18 17:56:04 2013 (r255672)
> @@ -70,9 +70,6 @@ DUMMY(pivot_root);
> DUMMY(mincore);
> DUMMY(ptrace);
> DUMMY(lookup_dcookie);
> -DUMMY(epoll_create);
> -DUMMY(epoll_ctl);
> -DUMMY(epoll_wait);
> DUMMY(remap_file_pages);
> DUMMY(timer_create);
> DUMMY(timer_settime);
> @@ -129,7 +126,6 @@ DUMMY(timerfd_gettime);
> /* linux 2.6.27: */
> DUMMY(signalfd4);
> DUMMY(eventfd2);
> -DUMMY(epoll_create1);
> DUMMY(dup3);
> DUMMY(inotify_init1);
> /* linux 2.6.30: */
>
> Modified: head/sys/amd64/linux32/syscalls.master
> ==============================================================================
> --- head/sys/amd64/linux32/syscalls.master Wed Sep 18 17:28:19 2013 (r255671)
> +++ head/sys/amd64/linux32/syscalls.master Wed Sep 18 17:56:04 2013 (r255672)
> @@ -430,9 +430,11 @@
> 251 AUE_NULL UNIMPL
> 252 AUE_EXIT STD { int linux_exit_group(int error_code); }
> 253 AUE_NULL STD { int linux_lookup_dcookie(void); }
> -254 AUE_NULL STD { int linux_epoll_create(void); }
> -255 AUE_NULL STD { int linux_epoll_ctl(void); }
> -256 AUE_NULL STD { int linux_epoll_wait(void); }
> +254 AUE_NULL STD { int linux_epoll_create(l_int size); }
> +255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \
> + struct linux_epoll_event *event); }
> +256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct linux_epoll_event *events, \
> + l_int maxevents, l_int timeout); }
> 257 AUE_NULL STD { int linux_remap_file_pages(void); }
> 258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); }
> 259 AUE_NULL STD { int linux_timer_create(void); }
> @@ -534,7 +536,7 @@
> ; linux 2.6.27:
> 327 AUE_NULL STD { int linux_signalfd4(void); }
> 328 AUE_NULL STD { int linux_eventfd2(void); }
> -329 AUE_NULL STD { int linux_epoll_create1(void); }
> +329 AUE_NULL STD { int linux_epoll_create1(l_int flags); }
> 330 AUE_NULL STD { int linux_dup3(void); }
> 331 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); }
> 332 AUE_NULL STD { int linux_inotify_init1(void); }
>
> Added: head/sys/compat/linux/linux_epoll.c
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ head/sys/compat/linux/linux_epoll.c Wed Sep 18 17:56:04 2013 (r255672)
> @@ -0,0 +1,554 @@
> +/*-
> + * Copyright (c) 2007 Roman Divacky
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + */
> +
> +#include <sys/cdefs.h>
> +__FBSDID("$FreeBSD$");
> +
> +#include "opt_compat.h"
> +#include "opt_ktrace.h"
> +
> +#include <sys/limits.h>
> +#include <sys/param.h>
> +#include <sys/kernel.h>
> +#include <sys/capability.h>
> +#include <sys/types.h>
> +#include <sys/systm.h>
> +#include <sys/file.h>
> +#include <sys/filedesc.h>
> +#include <sys/errno.h>
> +#include <sys/event.h>
> +#include <sys/proc.h>
> +#include <sys/sysproto.h>
> +#include <sys/syscallsubr.h>
> +#include <sys/timespec.h>
> +#include <compat/linux/linux_epoll.h>
> +#include <compat/linux/linux_util.h>
> +#ifdef KTRACE
> +#include <sys/ktrace.h>
> +#endif
> +
> +#ifdef COMPAT_LINUX32
> +#include <machine/../linux32/linux.h>
> +#include <machine/../linux32/linux32_proto.h>
> +#else
> +#include <machine/../linux/linux.h>
> +#include <machine/../linux/linux_proto.h>
> +#endif
> +
> +#define ktrepoll_events(evt, count) \
> + ktrstruct("linux_epoll_event", (evt), count * sizeof(*evt))
> +
> +/*
> + * epoll defines 'struct epoll_event' with the field 'data' as 64 bits
> + * on all architectures. But on 32 bit architectures BSD 'struct kevent' only
> + * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied
> + * data verbatuim. Therefore on 32 bit architectures we allocate 64-bit memory
> + * block to pass user supplied data for every file descriptor.
> + */
> +typedef uint64_t epoll_udata_t;
> +#if defined(__i386__)
> +#define EPOLL_WIDE_USER_DATA 1
> +#else
> +#define EPOLL_WIDE_USER_DATA 0
> +#endif
> +
> +#if EPOLL_WIDE_USER_DATA
> +
> +/*
> + * Approach similar to epoll_user_data could also be used to
> + * keep track of event bits per file descriptor for all architectures.
> + * However, it isn't obvious that such tracking would be beneficial
> + * in practice.
> + */
> +
> +struct epoll_user_data {
> + unsigned sz;
> + epoll_udata_t data[1];
> +};
> +static MALLOC_DEFINE(M_LINUX_EPOLL, "epoll", "memory for epoll system");
> +#define EPOLL_USER_DATA_SIZE(ndata) \
> + (sizeof(struct epoll_user_data)+((ndata)-1)*sizeof(epoll_udata_t))
> +#define EPOLL_USER_DATA_MARGIN 16
> +
> +static void epoll_init_user_data(struct thread *td, struct file *epfp);
> +static void epoll_set_user_data(struct thread *td, struct file *epfp, int fd, epoll_udata_t user_data);
> +static epoll_udata_t epoll_get_user_data(struct thread *td, struct file *epfp, int fd);
> +static fo_close_t epoll_close;
> +
> +/* overload kqueue fileops */
> +static struct fileops epollops = {
> + .fo_read = kqueue_read,
> + .fo_write = kqueue_write,
> + .fo_truncate = kqueue_truncate,
> + .fo_ioctl = kqueue_ioctl,
> + .fo_poll = kqueue_poll,
> + .fo_kqfilter = kqueue_kqfilter,
> + .fo_stat = kqueue_stat,
> + .fo_close = epoll_close,
> + .fo_chmod = invfo_chmod,
> + .fo_chown = invfo_chown,
> + .fo_sendfile = invfo_sendfile,
> +};
> +#endif
> +
> +static struct file* epoll_fget(struct thread *td, int epfd);
> +
> +struct epoll_copyin_args {
> + struct kevent *changelist;
> +};
> +
> +struct epoll_copyout_args {
> + struct linux_epoll_event *leventlist;
> + int count;
> + int error;
> +#if KTRACE || EPOLL_WIDE_USER_DATA
> + struct thread *td;
> +#endif
> +#if EPOLL_WIDE_USER_DATA
> + struct file *epfp;
> +#endif
> +};
> +
> +
> +/* Create a new epoll file descriptor. */
> +
> +static int
> +linux_epoll_create_common(struct thread *td)
> +{
> + struct file *fp;
> + int error;
> +
> + error = kern_kqueue_locked(td, &fp);
> +#if EPOLL_WIDE_USER_DATA
> + if (error == 0) {
> + epoll_init_user_data(td, fp);
> + fdrop(fp, td);
> + }
> +#endif
> + return (error);
> +}
> +
> +int
> +linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args)
> +{
> + if (args->size <= 0)
> + return (EINVAL);
> + /* args->size is unused. Linux just tests it
> + * and then forgets it as well. */
> +
> + return (linux_epoll_create_common(td));
> +}
> +
> +int
> +linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args)
> +{
> + int error;
> +
> + error = linux_epoll_create_common(td);
> +
> + if (!error) {
> + if (args->flags & LINUX_EPOLL_CLOEXEC)
> + td->td_proc->p_fd->fd_ofiles[td->td_retval[0]].fde_flags |= UF_EXCLOSE;
> + if (args->flags & LINUX_EPOLL_NONBLOCK)
> + linux_msg(td, "epoll_create1 doesn't yet support EPOLL_NONBLOCK flag\n");
> + }
> +
> + return (error);
> +}
> +
> +/* Structure converting function from epoll to kevent. */
> +static int
> +linux_epoll_to_kevent(struct thread *td,
> +#if EPOLL_WIDE_USER_DATA
> + struct file *epfp,
> +#endif
> + int fd, struct linux_epoll_event *l_event, int kev_flags, struct kevent *kevent, int *nkevents)
> +{
> + /* flags related to how event is registered */
> + if (l_event->events & LINUX_EPOLLONESHOT)
> + kev_flags |= EV_ONESHOT;
> + if (l_event->events & LINUX_EPOLLET) {
> + kev_flags |= EV_CLEAR;
> + }
> +
> + /* flags related to what event is registered */
> + if (l_event->events & LINUX_EPOLLIN ||
> + l_event->events & LINUX_EPOLLRDNORM ||
> + l_event->events & LINUX_EPOLLPRI ||
> + l_event->events & LINUX_EPOLLRDHUP) {
> + EV_SET(kevent++, fd, EVFILT_READ, kev_flags, 0, 0,
> + (void*)(EPOLL_WIDE_USER_DATA ? 0 : l_event->data));
> + ++*nkevents;
> + }
> + if (l_event->events & LINUX_EPOLLOUT ||
> + l_event->events & LINUX_EPOLLWRNORM) {
> + EV_SET(kevent++, fd, EVFILT_WRITE, kev_flags, 0, 0,
> + (void*)(EPOLL_WIDE_USER_DATA ? 0 : l_event->data));
> + ++*nkevents;
> + }
> + if (l_event->events & LINUX_EPOLLRDBAND ||
> + l_event->events & LINUX_EPOLLWRBAND ||
> + l_event->events & LINUX_EPOLLHUP ||
> + l_event->events & LINUX_EPOLLMSG ||
> + l_event->events & LINUX_EPOLLWAKEUP ||
> + l_event->events & LINUX_EPOLLERR) {
> + linux_msg(td, "epoll_ctl doesn't yet support some event flags supplied: 0x%x\n",
> + l_event->events);
> + return (EINVAL);
> + }
> +
> +#if EPOLL_WIDE_USER_DATA
> + epoll_set_user_data(td, epfp, fd, l_event->data);
> +#endif
> + return (0);
> +}
> +
> +/*
> + * Structure converting function from kevent to epoll. In a case
> + * this is called on error in registration we store the error in
> + * event->data and pick it up later in linux_epoll_ctl().
> + */
> +static void
> +linux_kevent_to_epoll(
> +#if EPOLL_WIDE_USER_DATA
> + struct thread *td, struct file *epfp,
> +#endif
> + struct kevent *kevent, struct linux_epoll_event *l_event)
> +{
> + if ((kevent->flags & EV_ERROR) == 0)
> + switch (kevent->filter) {
> + case EVFILT_READ:
> + l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI;
> + break;
> + case EVFILT_WRITE:
> + l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM;
> + break;
> + }
> +#if EPOLL_WIDE_USER_DATA
> + l_event->data = epoll_get_user_data(td, epfp, kevent->ident);
> +#else
> + l_event->data = (epoll_udata_t)kevent->udata;
> +#endif
> +}
> +
> +/*
> + * Copyout callback used by kevent. This converts kevent
> + * events to epoll events and copies them back to the
> + * userspace. This is also called on error on registering
> + * of the filter.
> + */
> +static int
> +epoll_kev_copyout(void *arg, struct kevent *kevp, int count)
> +{
> + struct epoll_copyout_args *args;
> + struct linux_epoll_event *eep;
> + int error, i;
> +
> + args = (struct epoll_copyout_args*) arg;
> + eep = malloc(sizeof(*eep) * count, M_TEMP, M_WAITOK | M_ZERO);
> +
> + for (i = 0; i < count; i++)
> + linux_kevent_to_epoll(
> +#if EPOLL_WIDE_USER_DATA
> + args->td, args->epfp,
> +#endif
> + &kevp[i], &eep[i]);
> +
> + error = copyout(eep, args->leventlist, count * sizeof(*eep));
> + if (!error) {
> + args->leventlist += count;
> + args->count += count;
> + } else if (!args->error)
> + args->error = error;
> +
> +#ifdef KTRACE
> + if (KTRPOINT(args->td, KTR_STRUCT))
> + ktrepoll_events(eep, count);
> +#endif
> +
> + free(eep, M_TEMP);
> + return (error);
> +}
> +
> +/*
> + * Copyin callback used by kevent. This copies already
> + * converted filters from kernel memory to the kevent
> + * internal kernel memory. Hence the memcpy instead of
> + * copyin.
> + */
> +static int
> +epoll_kev_copyin(void *arg, struct kevent *kevp, int count)
> +{
> + struct epoll_copyin_args *args;
> +
> + args = (struct epoll_copyin_args*) arg;
> +
> + memcpy(kevp, args->changelist, count * sizeof(*kevp));
> + args->changelist += count;
> +
> + return (0);
> +}
> +
> +static int
> +ignore_enoent(int error) {
> + if (error == ENOENT)
> + error = 0;
> + return (error);
> +}
> +
> +static int
> +delete_event(struct thread *td, struct file *epfp, int fd, int filter)
> +{
> + struct epoll_copyin_args ciargs;
> + struct kevent kev;
> + struct kevent_copyops k_ops = { &ciargs,
> + NULL,
> + epoll_kev_copyin};
> + ciargs.changelist = &kev;
> +
> + EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0);
> + return (kern_kevent_locked(td, epfp, 1, 0, &k_ops, NULL));
> +}
> +
> +static int
> +delete_all_events(struct thread *td, struct file *epfp, int fd)
> +{
> + /* here we ignore ENONT, because we don't keep track of events here */
> + int error1, error2;
> +
> + error1 = ignore_enoent(delete_event(td, epfp, fd, EVFILT_READ));
> + error2 = ignore_enoent(delete_event(td, epfp, fd, EVFILT_WRITE));
> +
> + /* report any errors we got */
> + if (error1)
> + return (error1);
> + if (error2)
> + return (error2);
> + return (0);
> +}
> +
> +/*
> + * Load epoll filter, convert it to kevent filter
> + * and load it into kevent subsystem.
> + */
> +int
> +linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args)
> +{
> + struct file *epfp;
> + struct epoll_copyin_args ciargs;
> + struct kevent kev[2];
> + struct kevent_copyops k_ops = { &ciargs,
> + NULL,
> + epoll_kev_copyin};
> + struct linux_epoll_event le;
> + int kev_flags;
> + int nchanges = 0;
> + int error;
> +
> + if (args->epfd == args->fd)
> + return (EINVAL);
> +
> + if (args->op != LINUX_EPOLL_CTL_DEL) {
> + error = copyin(args->event, &le, sizeof(le));
> + if (error)
> + return (error);
> + }
> +#ifdef DEBUG
> + if (ldebug(epoll_ctl))
> + printf(ARGS(epoll_ctl,"%i, %i, %i, %u"), args->epfd, args->op,
> + args->fd, le.events);
> +#endif
> +#ifdef KTRACE
> + if (KTRPOINT(td, KTR_STRUCT) && args->op != LINUX_EPOLL_CTL_DEL)
> + ktrepoll_events(&le, 1);
> +#endif
> + epfp = epoll_fget(td, args->epfd);
> +
> + ciargs.changelist = kev;
> +
> + switch (args->op) {
> + case LINUX_EPOLL_CTL_MOD:
> + /* we don't memorize which events were set for this FD
> + on this level, so just delete all we could have set:
> + EVFILT_READ and EVFILT_WRITE, ignoring any errors
> + */
> + error = delete_all_events(td, epfp, args->fd);
> + if (error)
> + goto leave;
> + /* FALLTHROUGH */
> + case LINUX_EPOLL_CTL_ADD:
> + kev_flags = EV_ADD | EV_ENABLE;
> + break;
> + case LINUX_EPOLL_CTL_DEL:
> + /* CTL_DEL means unregister this fd with this epoll */
> + error = delete_all_events(td, epfp, args->fd);
> + goto leave;
> + default:
> + error = EINVAL;
> + goto leave;
> + }
> +
> + error = linux_epoll_to_kevent(td,
> +#if EPOLL_WIDE_USER_DATA
> + epfp,
> +#endif
> + args->fd, &le, kev_flags, kev, &nchanges);
> + if (error)
> + goto leave;
> +
> + error = kern_kevent_locked(td, epfp, nchanges, 0, &k_ops, NULL);
> +leave:
> + fdrop(epfp, td);
> + return (error);
> +}
> +
> +/*
> + * Wait for a filter to be triggered on the epoll file descriptor. */
> +int
> +linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args)
> +{
> + struct file *epfp;
> + struct timespec ts, *tsp;
> + struct epoll_copyout_args coargs;
> + struct kevent_copyops k_ops = { &coargs,
> + epoll_kev_copyout,
> + NULL};
> + int error;
> +
> + if (args->maxevents <= 0 || args->maxevents > LINUX_MAX_EVENTS)
> + return (EINVAL);
> +
> + epfp = epoll_fget(td, args->epfd);
> +
> + coargs.leventlist = args->events;
> + coargs.count = 0;
> + coargs.error = 0;
> +#if defined(KTRACE) || EPOLL_WIDE_USER_DATA
> + coargs.td = td;
> +#endif
> +#if EPOLL_WIDE_USER_DATA
> + coargs.epfp = epfp;
> +#endif
> +
> + if (args->timeout != -1) {
> + if (args->timeout < 0) {
> + error = EINVAL;
> + goto leave;
> + }
> + /* Convert from milliseconds to timespec. */
> + ts.tv_sec = args->timeout / 1000;
> + ts.tv_nsec = (args->timeout % 1000) * 1000000;
> + tsp = &ts;
> + } else {
> + tsp = NULL;
> + }
> +
> + error = kern_kevent_locked(td, epfp, 0, args->maxevents, &k_ops, tsp);
> + if (!error && coargs.error)
> + error = coargs.error;
> +
> + /*
> + * kern_keven might return ENOMEM which is not expected from epoll_wait.
> + * Maybe we should translate that but I don't think it matters at all.
> + */
> +
> + if (!error)
> + td->td_retval[0] = coargs.count;
> +leave:
> + fdrop(epfp, td);
> + return (error);
> +}
> +
> +#if EPOLL_WIDE_USER_DATA
> +/*
> + * we store user_data vector in an unused for kqueue descriptor
> + * field fvn_epollpriv in struct file.
> + */
> +#define EPOLL_USER_DATA_GET(epfp) \
> + ((struct epoll_user_data*)(epfp)->f_vnun.fvn_epollpriv)
> +#define EPOLL_USER_DATA_SET(epfp, udv) \
> + (epfp)->f_vnun.fvn_epollpriv = (udv)
> +
> +static void
> +epoll_init_user_data(struct thread *td, struct file *epfp)
> +{
> + struct epoll_user_data *udv;
> +
> + /* override file ops to have our close operation */
> + atomic_store_rel_ptr((volatile uintptr_t *)&epfp->f_ops, (uintptr_t)&epollops);
> +
> + /* allocate epoll_user_data initially for up to 16 file descriptor values */
> + udv = malloc(EPOLL_USER_DATA_SIZE(EPOLL_USER_DATA_MARGIN), M_LINUX_EPOLL, M_WAITOK);
> + udv->sz = EPOLL_USER_DATA_MARGIN;
> + EPOLL_USER_DATA_SET(epfp, udv);
> +}
> +
> +static void
> +epoll_set_user_data(struct thread *td, struct file *epfp, int fd, epoll_udata_t user_data)
> +{
> + struct epoll_user_data *udv = EPOLL_USER_DATA_GET(epfp);
> +
> + if (fd >= udv->sz) {
> + udv = realloc(udv, EPOLL_USER_DATA_SIZE(fd + EPOLL_USER_DATA_MARGIN), M_LINUX_EPOLL, M_WAITOK);
> + udv->sz = fd + EPOLL_USER_DATA_MARGIN;
> + EPOLL_USER_DATA_SET(epfp, udv);
> + }
> + udv->data[fd] = user_data;
> +}
> +
> +static epoll_udata_t
> +epoll_get_user_data(struct thread *td, struct file *epfp, int fd)
> +{
> + struct epoll_user_data *udv = EPOLL_USER_DATA_GET(epfp);
> + if (fd >= udv->sz)
> + panic("epoll: user data vector is too small");
> +
> + return (udv->data[fd]);
> +}
> +
> +/*ARGSUSED*/
> +static int
> +epoll_close(struct file *epfp, struct thread *td)
> +{
> + /* free user data vector */
> + free(EPOLL_USER_DATA_GET(epfp), M_LINUX_EPOLL);
> + /* over to kqueue parent */
> + return (kqueue_close(epfp, td));
> +}
> +#endif
> +
> +static struct file*
> +epoll_fget(struct thread *td, int epfd)
> +{
> + struct file *fp;
> + cap_rights_t rights;
> +
> + if (fget(td, epfd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp) != 0)
> + panic("epoll: no file object found for kqueue descriptor");
> +
> + return (fp);
> +}
> +
>
> Added: head/sys/compat/linux/linux_epoll.h
> ==============================================================================
> --- /dev/null 00:00:00 1970 (empty, because file is newly added)
> +++ head/sys/compat/linux/linux_epoll.h Wed Sep 18 17:56:04 2013 (r255672)
> @@ -0,0 +1,68 @@
> +/*-
> + * Copyright (c) 2007 Roman Divacky
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + * notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + * notice, this list of conditions and the following disclaimer in the
> + * documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
> + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
> + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
> + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
> + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
> + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
> + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
> + * SUCH DAMAGE.
> + *
> + * $FreeBSD$
> + */
> +
> +#ifndef _LINUX_EPOLL_H_
> +#define _LINUX_EPOLL_H_
> +
> +#ifdef __amd64__
> +#define EPOLL_PACKED __packed
> +#else
> +#define EPOLL_PACKED
> +#endif
> +
> +struct linux_epoll_event {
> + uint32_t events;
> + uint64_t data;
> +} EPOLL_PACKED;
> +
> +#define LINUX_EPOLLIN 0x001
> +#define LINUX_EPOLLPRI 0x002
> +#define LINUX_EPOLLOUT 0x004
> +#define LINUX_EPOLLRDNORM 0x040
> +#define LINUX_EPOLLRDBAND 0x080
> +#define LINUX_EPOLLWRNORM 0x100
> +#define LINUX_EPOLLWRBAND 0x200
> +#define LINUX_EPOLLMSG 0x400
> +#define LINUX_EPOLLERR 0x008
> +#define LINUX_EPOLLHUP 0x010
> +#define LINUX_EPOLLRDHUP 0x2000
> +#define LINUX_EPOLLWAKEUP 1u<<29
> +#define LINUX_EPOLLONESHOT 1u<<30
> +#define LINUX_EPOLLET 1u<<31
> +
> +#define LINUX_EPOLL_CTL_ADD 1
> +#define LINUX_EPOLL_CTL_DEL 2
> +#define LINUX_EPOLL_CTL_MOD 3
> +
> +#define LINUX_EPOLL_CLOEXEC 02000000
> +#define LINUX_EPOLL_NONBLOCK 00004000
> +
> +#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct linux_epoll_event))
> +
> +#endif /* !_LINUX_EPOLL_H_ */
> +
>
> Modified: head/sys/conf/files.amd64
> ==============================================================================
> --- head/sys/conf/files.amd64 Wed Sep 18 17:28:19 2013 (r255671)
> +++ head/sys/conf/files.amd64 Wed Sep 18 17:56:04 2013 (r255672)
> @@ -467,6 +467,7 @@ amd64/linux32/linux32_support.s optional
> dependency "linux32_assym.h"
> amd64/linux32/linux32_sysent.c optional compat_linux32
> amd64/linux32/linux32_sysvec.c optional compat_linux32
> +compat/linux/linux_epoll.c optional compat_linux32
> compat/linux/linux_emul.c optional compat_linux32
> compat/linux/linux_file.c optional compat_linux32
> compat/linux/linux_fork.c optional compat_linux32
>
> Modified: head/sys/conf/files.i386
> ==============================================================================
> --- head/sys/conf/files.i386 Wed Sep 18 17:28:19 2013 (r255671)
> +++ head/sys/conf/files.i386 Wed Sep 18 17:56:04 2013 (r255672)
> @@ -80,6 +80,7 @@ hptrr_lib.o optional hptrr \
> cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}"
> compat/linprocfs/linprocfs.c optional linprocfs
> compat/linsysfs/linsysfs.c optional linsysfs
> +compat/linux/linux_epoll.c optional compat_linux
> compat/linux/linux_emul.c optional compat_linux
> compat/linux/linux_file.c optional compat_linux
> compat/linux/linux_fork.c optional compat_linux
>
> Modified: head/sys/conf/files.pc98
> ==============================================================================
> --- head/sys/conf/files.pc98 Wed Sep 18 17:28:19 2013 (r255671)
> +++ head/sys/conf/files.pc98 Wed Sep 18 17:56:04 2013 (r255672)
> @@ -41,6 +41,7 @@ ukbdmap.h optional ukbd_dflt_keymap \
> cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}"
> compat/linprocfs/linprocfs.c optional linprocfs
> compat/linsysfs/linsysfs.c optional linsysfs
> +compat/linux/linux_epoll.c optional compat_linux
> compat/linux/linux_emul.c optional compat_linux
> compat/linux/linux_file.c optional compat_linux
> compat/linux/linux_fork.c optional compat_linux
>
> Modified: head/sys/i386/linux/linux_dummy.c
> ==============================================================================
> --- head/sys/i386/linux/linux_dummy.c Wed Sep 18 17:28:19 2013 (r255671)
> +++ head/sys/i386/linux/linux_dummy.c Wed Sep 18 17:56:04 2013 (r255672)
> @@ -72,9 +72,6 @@ DUMMY(setfsgid);
> DUMMY(pivot_root);
> DUMMY(mincore);
> DUMMY(lookup_dcookie);
> -DUMMY(epoll_create);
> -DUMMY(epoll_ctl);
> -DUMMY(epoll_wait);
> DUMMY(remap_file_pages);
> DUMMY(fstatfs64);
> DUMMY(mbind);
> @@ -120,7 +117,6 @@ DUMMY(timerfd_gettime);
> /* linux 2.6.27: */
> DUMMY(signalfd4);
> DUMMY(eventfd2);
> -DUMMY(epoll_create1);
> DUMMY(dup3);
> DUMMY(inotify_init1);
> /* linux 2.6.30: */
>
> Modified: head/sys/i386/linux/syscalls.master
> ==============================================================================
> --- head/sys/i386/linux/syscalls.master Wed Sep 18 17:28:19 2013 (r255671)
> +++ head/sys/i386/linux/syscalls.master Wed Sep 18 17:56:04 2013 (r255672)
> @@ -432,9 +432,11 @@
> 251 AUE_NULL UNIMPL
> 252 AUE_EXIT STD { int linux_exit_group(int error_code); }
> 253 AUE_NULL STD { int linux_lookup_dcookie(void); }
> -254 AUE_NULL STD { int linux_epoll_create(void); }
> -255 AUE_NULL STD { int linux_epoll_ctl(void); }
> -256 AUE_NULL STD { int linux_epoll_wait(void); }
> +254 AUE_NULL STD { int linux_epoll_create(l_int size); }
> +255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \
> + struct linux_epoll_event *event); }
> +256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct linux_epoll_event *events, \
> + l_int maxevents, l_int timeout); }
> 257 AUE_NULL STD { int linux_remap_file_pages(void); }
> 258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); }
> 259 AUE_NULL STD { int linux_timer_create(clockid_t clock_id, \
> @@ -544,7 +546,7 @@
> ; linux 2.6.27:
> 327 AUE_NULL STD { int linux_signalfd4(void); }
> 328 AUE_NULL STD { int linux_eventfd2(void); }
> -329 AUE_NULL STD { int linux_epoll_create1(void); }
> +329 AUE_NULL STD { int linux_epoll_create1(l_int flags); }
> 330 AUE_NULL STD { int linux_dup3(void); }
> 331 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); }
> 332 AUE_NULL STD { int linux_inotify_init1(void); }
>
> Modified: head/sys/kern/kern_event.c
> ==============================================================================
> --- head/sys/kern/kern_event.c Wed Sep 18 17:28:19 2013 (r255671)
> +++ head/sys/kern/kern_event.c Wed Sep 18 17:56:04 2013 (r255672)
> @@ -107,16 +107,7 @@ static void kqueue_wakeup(struct kqueue
> static struct filterops *kqueue_fo_find(int filt);
> static void kqueue_fo_release(int filt);
>
> -static fo_rdwr_t kqueue_read;
> -static fo_rdwr_t kqueue_write;
> -static fo_truncate_t kqueue_truncate;
> -static fo_ioctl_t kqueue_ioctl;
> -static fo_poll_t kqueue_poll;
> -static fo_kqfilter_t kqueue_kqfilter;
> -static fo_stat_t kqueue_stat;
> -static fo_close_t kqueue_close;
> -
> -static struct fileops kqueueops = {
> +struct fileops kqueueops = {
> .fo_read = kqueue_read,
> .fo_write = kqueue_write,
> .fo_truncate = kqueue_truncate,
> @@ -303,7 +294,7 @@ filt_fileattach(struct knote *kn)
> }
>
> /*ARGSUSED*/
> -static int
> +int
> kqueue_kqfilter(struct file *fp, struct knote *kn)
> {
> struct kqueue *kq = kn->kn_fp->f_data;
> @@ -688,34 +679,7 @@ filt_usertouch(struct knote *kn, struct
> int
> sys_kqueue(struct thread *td, struct kqueue_args *uap)
> {
> - struct filedesc *fdp;
> - struct kqueue *kq;
> - struct file *fp;
> - int fd, error;
> -
> - fdp = td->td_proc->p_fd;
> - error = falloc(td, &fp, &fd, 0);
> - if (error)
> - goto done2;
> -
> - /* An extra reference on `fp' has been held for us by falloc(). */
> - kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
> - mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK);
> - TAILQ_INIT(&kq->kq_head);
> - kq->kq_fdp = fdp;
> - knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
> - TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
> -
> - FILEDESC_XLOCK(fdp);
> - TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
> - FILEDESC_XUNLOCK(fdp);
> -
> - finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
> - fdrop(fp, td);
> -
> - td->td_retval[0] = fd;
> -done2:
> - return (error);
> + return (kern_kqueue(td));
> }
>
> #ifndef _SYS_SYSPROTO_H_
> @@ -817,19 +781,75 @@ kevent_copyin(void *arg, struct kevent *
> }
>
> int
> +kern_kqueue(struct thread *td)
> +{
> + struct file *fp;
> + int error;
> +
> + error = kern_kqueue_locked(td, &fp);
> +
> + fdrop(fp, td);
> + return (error);
> +}
> +
> +int
> +kern_kqueue_locked(struct thread *td, struct file **fpp)
> +{
> + struct filedesc *fdp;
> + struct kqueue *kq;
> + struct file *fp;
> + int fd, error;
> +
> + fdp = td->td_proc->p_fd;
> + error = falloc(td, &fp, &fd, 0);
> + if (error)
> + return (error);
> +
> + /* An extra reference on `fp' has been held for us by falloc(). */
> + kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO);
> + mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK);
> + TAILQ_INIT(&kq->kq_head);
> + kq->kq_fdp = fdp;
> + knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock);
> + TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);
> +
> + FILEDESC_XLOCK(fdp);
> + TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
> + FILEDESC_XUNLOCK(fdp);
> +
> + finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
> +
> + td->td_retval[0] = fd;
> + *fpp = fp;
> + return (0);
> +}
> +
> +int
> kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
> struct kevent_copyops *k_ops, const struct timespec *timeout)
> {
> + struct file *fp;
> + cap_rights_t rights;
> + int error;
> +
> + if ((error = fget(td, fd, cap_rights_init(&rights, CAP_POST_EVENT), &fp)) != 0)
> + return (error);
> +
> + error = kern_kevent_locked(td, fp, nchanges, nevents, k_ops, timeout);
> +
> + fdrop(fp, td);
> + return (error);
> +}
> +
> +int
> +kern_kevent_locked(struct thread *td, struct file *fp, int nchanges, int nevents,
> + struct kevent_copyops *k_ops, const struct timespec *timeout)
> +{
> struct kevent keva[KQ_NEVENTS];
> struct kevent *kevp, *changes;
> struct kqueue *kq;
> - struct file *fp;
> - cap_rights_t rights;
> int i, n, nerrors, error;
>
> - error = fget(td, fd, cap_rights_init(&rights, CAP_POST_EVENT), &fp);
> - if (error != 0)
> - return (error);
> if ((error = kqueue_acquire(fp, &kq)) != 0)
> goto done_norel;
>
> @@ -872,7 +892,6 @@ kern_kevent(struct thread *td, int fd, i
> done:
> kqueue_release(kq, 0);
> done_norel:
> - fdrop(fp, td);
> return (error);
> }
>
> @@ -1526,7 +1545,7 @@ done_nl:
> * This could be expanded to call kqueue_scan, if desired.
> */
> /*ARGSUSED*/
> -static int
> +int
> kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
> int flags, struct thread *td)
> {
> @@ -1534,7 +1553,7 @@ kqueue_read(struct file *fp, struct uio
> }
>
> /*ARGSUSED*/
> -static int
> +int
> kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
> int flags, struct thread *td)
> {
> @@ -1542,7 +1561,7 @@ kqueue_write(struct file *fp, struct uio
> }
>
> /*ARGSUSED*/
> -static int
> +int
> kqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred,
> struct thread *td)
> {
> @@ -1551,7 +1570,7 @@ kqueue_truncate(struct file *fp, off_t l
> }
>
> /*ARGSUSED*/
> -static int
> +int
> kqueue_ioctl(struct file *fp, u_long cmd, void *data,
> struct ucred *active_cred, struct thread *td)
> {
> @@ -1599,7 +1618,7 @@ kqueue_ioctl(struct file *fp, u_long cmd
> }
>
> /*ARGSUSED*/
> -static int
> +int
> kqueue_poll(struct file *fp, int events, struct ucred *active_cred,
> struct thread *td)
> {
> @@ -1626,7 +1645,7 @@ kqueue_poll(struct file *fp, int events,
> }
>
> /*ARGSUSED*/
> -static int
> +int
> kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred,
> struct thread *td)
> {
> @@ -1644,7 +1663,7 @@ kqueue_stat(struct file *fp, struct stat
> }
>
> /*ARGSUSED*/
> -static int
> +int
> kqueue_close(struct file *fp, struct thread *td)
> {
> struct kqueue *kq = fp->f_data;
>
> Modified: head/sys/modules/linux/Makefile
> ==============================================================================
> --- head/sys/modules/linux/Makefile Wed Sep 18 17:28:19 2013 (r255671)
> +++ head/sys/modules/linux/Makefile Wed Sep 18 17:56:04 2013 (r255672)
> @@ -9,7 +9,7 @@ CFLAGS+=-DCOMPAT_FREEBSD32 -DCOMPAT_LINU
>
> KMOD= linux
> SRCS= linux_fork.c linux${SFX}_dummy.c linux_emul.c linux_file.c \
> - linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \
> + linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c linux_epoll.c \
> linux${SFX}_machdep.c linux_mib.c linux_misc.c linux_signal.c \
> linux_socket.c linux_stats.c linux_sysctl.c linux${SFX}_sysent.c \
> linux${SFX}_sysvec.c linux_uid16.c linux_util.c linux_time.c \
>
> Modified: head/sys/sys/event.h
> ==============================================================================
> --- head/sys/sys/event.h Wed Sep 18 17:28:19 2013 (r255671)
> +++ head/sys/sys/event.h Wed Sep 18 17:56:04 2013 (r255672)
> @@ -236,6 +236,9 @@ struct proc;
> struct knlist;
> struct mtx;
> struct rwlock;
> +struct uio;
> +struct stat;
> +struct ucred;
>
> extern void knote(struct knlist *list, long hint, int lockflags);
> extern void knote_fork(struct knlist *list, int pid);
> @@ -261,6 +264,21 @@ extern int kqfd_register(int fd, struct
> extern int kqueue_add_filteropts(int filt, struct filterops *filtops);
> extern int kqueue_del_filteropts(int filt);
>
> +int kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
> + int flags, struct thread *td);
> +int kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
> + int flags, struct thread *td);
> +int kqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred,
> + struct thread *td);
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-head
mailing list