svn commit: r274403 - in head/sys: compat/freebsd32 kern sys
Adrian Chadd
adrian at freebsd.org
Tue Nov 11 20:43:51 UTC 2014
... wait a sec, you're removing this without even asking first?
I've been using this locally to implement / benchmark zero-copy socket
writes from userland, which is what it was initially intended for.
It's the only sane way of implementing notifications for shared memory
based sendfile. I was going to release it as part of the RSS
development suite so people could write applications without us having
to implement actual zero-copy versions of write/writev for sockets.
Please put it back into -HEAD.
Thanks,
-adrian
On 11 November 2014 12:32, Gleb Smirnoff <glebius at freebsd.org> wrote:
> Author: glebius
> Date: Tue Nov 11 20:32:46 2014
> New Revision: 274403
> URL: https://svnweb.freebsd.org/changeset/base/274403
>
> Log:
> Remove SF_KQUEUE code. This code was developed at Netflix, but was not
> ever used. It didn't go into stable/10, neither was documented.
> It might be useful, but we collectively decided to remove it, rather
> leave it abandoned and unmaintained. It is removed in one single
> commit, so restoring it should be easy, if anyone wants to reopen
> this idea.
>
> Sponsored by: Netflix
>
> Deleted:
> head/sys/sys/sf_base.h
> head/sys/sys/sf_sync.h
> Modified:
> head/sys/compat/freebsd32/freebsd32_misc.c
> head/sys/kern/kern_descrip.c
> head/sys/kern/uipc_syscalls.c
> head/sys/sys/file.h
> head/sys/sys/socket.h
>
> Modified: head/sys/compat/freebsd32/freebsd32_misc.c
> ==============================================================================
> --- head/sys/compat/freebsd32/freebsd32_misc.c Tue Nov 11 20:05:50 2014 (r274402)
> +++ head/sys/compat/freebsd32/freebsd32_misc.c Tue Nov 11 20:32:46 2014 (r274403)
> @@ -83,10 +83,6 @@ __FBSDID("$FreeBSD$");
> #include <sys/msg.h>
> #include <sys/sem.h>
> #include <sys/shm.h>
> -#include <sys/condvar.h>
> -#include <sys/sf_buf.h>
> -#include <sys/sf_sync.h>
> -#include <sys/sf_base.h>
>
> #ifdef INET
> #include <netinet/in.h>
> @@ -1567,26 +1563,16 @@ struct sf_hdtr32 {
> int trl_cnt;
> };
>
> -struct sf_hdtr_kq32 {
> - int kq_fd;
> - uint32_t kq_flags;
> - uint32_t kq_udata; /* 32-bit void ptr */
> - uint32_t kq_ident; /* 32-bit uintptr_t */
> -};
> -
> static int
> freebsd32_do_sendfile(struct thread *td,
> struct freebsd32_sendfile_args *uap, int compat)
> {
> struct sf_hdtr32 hdtr32;
> struct sf_hdtr hdtr;
> - struct sf_hdtr_kq32 hdtr_kq32;
> - struct sf_hdtr_kq hdtr_kq;
> struct uio *hdr_uio, *trl_uio;
> struct iovec32 *iov32;
> - off_t offset;
> + off_t offset, sbytes;
> int error;
> - off_t sbytes;
>
> offset = PAIR32TO64(off_t, uap->offset);
> if (offset < 0)
> @@ -1617,31 +1603,17 @@ freebsd32_do_sendfile(struct thread *td,
> if (error)
> goto out;
> }
> -
> - /*
> - * If SF_KQUEUE is set, then we need to also copy in
> - * the kqueue data after the normal hdtr set and set do_kqueue=1.
> - */
> - if (uap->flags & SF_KQUEUE) {
> - error = copyin(((char *) uap->hdtr) + sizeof(hdtr32),
> - &hdtr_kq32,
> - sizeof(hdtr_kq32));
> - if (error != 0)
> - goto out;
> -
> - /* 32->64 bit fields */
> - CP(hdtr_kq32, hdtr_kq, kq_fd);
> - CP(hdtr_kq32, hdtr_kq, kq_flags);
> - PTRIN_CP(hdtr_kq32, hdtr_kq, kq_udata);
> - CP(hdtr_kq32, hdtr_kq, kq_ident);
> - }
> }
>
> + AUDIT_ARG_FD(uap->fd);
>
> - /* Call sendfile */
> - /* XXX stack depth! */
> - error = _do_sendfile(td, uap->fd, uap->s, uap->flags, compat,
> - offset, uap->nbytes, &sbytes, hdr_uio, trl_uio, &hdtr_kq);
> + if ((error = fget_read(td, uap->fd,
> + cap_rights_init(&rights, CAP_PREAD), &fp)) != 0)
> + goto out;
> +
> + error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset,
> + uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td);
> + fdrop(fp, td);
>
> if (uap->sbytes != NULL)
> copyout(&sbytes, uap->sbytes, sizeof(off_t));
>
> Modified: head/sys/kern/kern_descrip.c
> ==============================================================================
> --- head/sys/kern/kern_descrip.c Tue Nov 11 20:05:50 2014 (r274402)
> +++ head/sys/kern/kern_descrip.c Tue Nov 11 20:32:46 2014 (r274403)
> @@ -3684,7 +3684,7 @@ badfo_chown(struct file *fp, uid_t uid,
> static int
> badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
> struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
> - int kflags, struct sendfile_sync *sfs, struct thread *td)
> + int kflags, struct thread *td)
> {
>
> return (EBADF);
> @@ -3770,7 +3770,7 @@ invfo_chown(struct file *fp, uid_t uid,
> int
> invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
> struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
> - int kflags, struct sendfile_sync *sfs, struct thread *td)
> + int kflags, struct thread *td)
> {
>
> return (EINVAL);
>
> Modified: head/sys/kern/uipc_syscalls.c
> ==============================================================================
> --- head/sys/kern/uipc_syscalls.c Tue Nov 11 20:05:50 2014 (r274402)
> +++ head/sys/kern/uipc_syscalls.c Tue Nov 11 20:32:46 2014 (r274403)
> @@ -63,8 +63,6 @@ __FBSDID("$FreeBSD$");
> #include <sys/protosw.h>
> #include <sys/rwlock.h>
> #include <sys/sf_buf.h>
> -#include <sys/sf_sync.h>
> -#include <sys/sf_base.h>
> #include <sys/sysent.h>
> #include <sys/socket.h>
> #include <sys/socketvar.h>
> @@ -115,10 +113,6 @@ static int getpeername1(struct thread *t
>
> counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
>
> -static int filt_sfsync_attach(struct knote *kn);
> -static void filt_sfsync_detach(struct knote *kn);
> -static int filt_sfsync(struct knote *kn, long hint);
> -
> /*
> * sendfile(2)-related variables and associated sysctls
> */
> @@ -128,28 +122,6 @@ static int sfreadahead = 1;
> SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW,
> &sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks");
>
> -#ifdef SFSYNC_DEBUG
> -static int sf_sync_debug = 0;
> -SYSCTL_INT(_debug, OID_AUTO, sf_sync_debug, CTLFLAG_RW,
> - &sf_sync_debug, 0, "Output debugging during sf_sync lifecycle");
> -#define SFSYNC_DPRINTF(s, ...) \
> - do { \
> - if (sf_sync_debug) \
> - printf((s), ##__VA_ARGS__); \
> - } while (0)
> -#else
> -#define SFSYNC_DPRINTF(c, ...)
> -#endif
> -
> -static uma_zone_t zone_sfsync;
> -
> -static struct filterops sendfile_filtops = {
> - .f_isfd = 0,
> - .f_attach = filt_sfsync_attach,
> - .f_detach = filt_sfsync_detach,
> - .f_event = filt_sfsync,
> -};
> -
> static void
> sfstat_init(const void *unused)
> {
> @@ -159,19 +131,6 @@ sfstat_init(const void *unused)
> }
> SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL);
>
> -static void
> -sf_sync_init(const void *unused)
> -{
> -
> - zone_sfsync = uma_zcreate("sendfile_sync", sizeof(struct sendfile_sync),
> - NULL, NULL,
> - NULL, NULL,
> - UMA_ALIGN_CACHE,
> - 0);
> - kqueue_add_filteropts(EVFILT_SENDFILE, &sendfile_filtops);
> -}
> -SYSINIT(sf_sync, SI_SUB_MBUF, SI_ORDER_FIRST, sf_sync_init, NULL);
> -
> static int
> sfstat_sysctl(SYSCTL_HANDLER_ARGS)
> {
> @@ -1864,116 +1823,11 @@ getsockaddr(namp, uaddr, len)
> return (error);
> }
>
> -static int
> -filt_sfsync_attach(struct knote *kn)
> -{
> - struct sendfile_sync *sfs = (struct sendfile_sync *) kn->kn_sdata;
> - struct knlist *knl = &sfs->klist;
> -
> - SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs);
> -
> - /*
> - * Validate that we actually received this via the kernel API.
> - */
> - if ((kn->kn_flags & EV_FLAG1) == 0)
> - return (EPERM);
> -
> - kn->kn_ptr.p_v = sfs;
> - kn->kn_flags &= ~EV_FLAG1;
> -
> - knl->kl_lock(knl->kl_lockarg);
> - /*
> - * If we're in the "freeing" state,
> - * don't allow the add. That way we don't
> - * end up racing with some other thread that
> - * is trying to finish some setup.
> - */
> - if (sfs->state == SF_STATE_FREEING) {
> - knl->kl_unlock(knl->kl_lockarg);
> - return (EINVAL);
> - }
> - knlist_add(&sfs->klist, kn, 1);
> - knl->kl_unlock(knl->kl_lockarg);
> -
> - return (0);
> -}
> -
> -/*
> - * Called when a knote is being detached.
> - */
> -static void
> -filt_sfsync_detach(struct knote *kn)
> -{
> - struct knlist *knl;
> - struct sendfile_sync *sfs;
> - int do_free = 0;
> -
> - sfs = kn->kn_ptr.p_v;
> - knl = &sfs->klist;
> -
> - SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs);
> -
> - knl->kl_lock(knl->kl_lockarg);
> - if (!knlist_empty(knl))
> - knlist_remove(knl, kn, 1);
> -
> - /*
> - * If the list is empty _AND_ the refcount is 0
> - * _AND_ we've finished the setup phase and now
> - * we're in the running phase, we can free the
> - * underlying sendfile_sync.
> - *
> - * But we shouldn't do it before finishing the
> - * underlying divorce from the knote.
> - *
> - * So, we have the sfsync lock held; transition
> - * it to "freeing", then unlock, then free
> - * normally.
> - */
> - if (knlist_empty(knl)) {
> - if (sfs->state == SF_STATE_COMPLETED && sfs->count == 0) {
> - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; completed, "
> - "count==0, empty list: time to free!\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs);
> - sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
> - do_free = 1;
> - }
> - }
> - knl->kl_unlock(knl->kl_lockarg);
> -
> - /*
> - * Only call free if we're the one who has transitioned things
> - * to free. Otherwise we could race with another thread that
> - * is currently tearing things down.
> - */
> - if (do_free == 1) {
> - SFSYNC_DPRINTF("%s: (%llu) sfs=%p, %s:%d\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs,
> - __FILE__,
> - __LINE__);
> - sf_sync_free(sfs);
> - }
> -}
> -
> -static int
> -filt_sfsync(struct knote *kn, long hint)
> -{
> - struct sendfile_sync *sfs = (struct sendfile_sync *) kn->kn_ptr.p_v;
> - int ret;
> -
> - SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs);
> -
> - /*
> - * XXX add a lock assertion here!
> - */
> - ret = (sfs->count == 0 && sfs->state == SF_STATE_COMPLETED);
> -
> - return (ret);
> -}
> +struct sendfile_sync {
> + struct mtx mtx;
> + struct cv cv;
> + unsigned count;
> +};
>
> /*
> * Add more references to a vm_page + sf_buf + sendfile_sync.
> @@ -2022,344 +1876,13 @@ sf_ext_free(void *arg1, void *arg2)
> vm_page_free(pg);
> vm_page_unlock(pg);
>
> - if (sfs != NULL)
> - sf_sync_deref(sfs);
> -}
> -
> -/*
> - * Called to remove a reference to a sf_sync object.
> - *
> - * This is generally done during the mbuf free path to signify
> - * that one of the mbufs in the transaction has been completed.
> - *
> - * If we're doing SF_SYNC and the refcount is zero then we'll wake
> - * up any waiters.
> - *
> - * IF we're doing SF_KQUEUE and the refcount is zero then we'll
> - * fire off the knote.
> - */
> -void
> -sf_sync_deref(struct sendfile_sync *sfs)
> -{
> - int do_free = 0;
> -
> - if (sfs == NULL)
> - return;
> -
> - mtx_lock(&sfs->mtx);
> - KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
> - sfs->count --;
> -
> - /*
> - * Only fire off the wakeup / kqueue notification if
> - * we are in the running state.
> - */
> - if (sfs->count == 0 && sfs->state == SF_STATE_COMPLETED) {
> - if (sfs->flags & SF_SYNC)
> + if (sfs != NULL) {
> + mtx_lock(&sfs->mtx);
> + KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
> + if (--sfs->count == 0)
> cv_signal(&sfs->cv);
> -
> - if (sfs->flags & SF_KQUEUE) {
> - SFSYNC_DPRINTF("%s: (%llu) sfs=%p: knote!\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs);
> - KNOTE_LOCKED(&sfs->klist, 1);
> - }
> -
> - /*
> - * If we're not waiting around for a sync,
> - * check if the knote list is empty.
> - * If it is, we transition to free.
> - *
> - * XXX I think it's about time I added some state
> - * or flag that says whether we're supposed to be
> - * waiting around until we've done a signal.
> - *
> - * XXX Ie, the reason that I don't free it here
> - * is because the caller will free the last reference,
> - * not us. That should be codified in some flag
> - * that indicates "self-free" rather than checking
> - * for SF_SYNC all the time.
> - */
> - if ((sfs->flags & SF_SYNC) == 0 && knlist_empty(&sfs->klist)) {
> - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; completed, "
> - "count==0, empty list: time to free!\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs);
> - sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
> - do_free = 1;
> - }
> -
> - }
> - mtx_unlock(&sfs->mtx);
> -
> - /*
> - * Attempt to do a free here.
> - *
> - * We do this outside of the lock because it may destroy the
> - * lock in question as it frees things. We can optimise this
> - * later.
> - *
> - * XXX yes, we should make it a requirement to hold the
> - * lock across sf_sync_free().
> - */
> - if (do_free == 1) {
> - SFSYNC_DPRINTF("%s: (%llu) sfs=%p\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs);
> - sf_sync_free(sfs);
> - }
> -}
> -
> -/*
> - * Allocate a sendfile_sync state structure.
> - *
> - * For now this only knows about the "sleep" sync, but later it will
> - * grow various other personalities.
> - */
> -struct sendfile_sync *
> -sf_sync_alloc(uint32_t flags)
> -{
> - struct sendfile_sync *sfs;
> -
> - sfs = uma_zalloc(zone_sfsync, M_WAITOK | M_ZERO);
> - mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
> - cv_init(&sfs->cv, "sendfile");
> - sfs->flags = flags;
> - sfs->state = SF_STATE_SETUP;
> - knlist_init_mtx(&sfs->klist, &sfs->mtx);
> -
> - SFSYNC_DPRINTF("%s: sfs=%p, flags=0x%08x\n", __func__, sfs, sfs->flags);
> -
> - return (sfs);
> -}
> -
> -/*
> - * Take a reference to a sfsync instance.
> - *
> - * This has to map 1:1 to free calls coming in via sf_ext_free(),
> - * so typically this will be referenced once for each mbuf allocated.
> - */
> -void
> -sf_sync_ref(struct sendfile_sync *sfs)
> -{
> -
> - if (sfs == NULL)
> - return;
> -
> - mtx_lock(&sfs->mtx);
> - sfs->count++;
> - mtx_unlock(&sfs->mtx);
> -}
> -
> -void
> -sf_sync_syscall_wait(struct sendfile_sync *sfs)
> -{
> -
> - if (sfs == NULL)
> - return;
> -
> - KASSERT(mtx_owned(&sfs->mtx), ("%s: sfs=%p: not locked but should be!",
> - __func__,
> - sfs));
> -
> - /*
> - * If we're not requested to wait during the syscall,
> - * don't bother waiting.
> - */
> - if ((sfs->flags & SF_SYNC) == 0)
> - goto out;
> -
> - /*
> - * This is a bit suboptimal and confusing, so bear with me.
> - *
> - * Ideally sf_sync_syscall_wait() will wait until
> - * all pending mbuf transmit operations are done.
> - * This means that when sendfile becomes async, it'll
> - * run in the background and will transition from
> - * RUNNING to COMPLETED when it's finished acquiring
> - * new things to send. Then, when the mbufs finish
> - * sending, COMPLETED + sfs->count == 0 is enough to
> - * know that no further work is being done.
> - *
> - * So, we will sleep on both RUNNING and COMPLETED.
> - * It's up to the (in progress) async sendfile loop
> - * to transition the sf_sync from RUNNING to
> - * COMPLETED so the wakeup above will actually
> - * do the cv_signal() call.
> - */
> - if (sfs->state != SF_STATE_COMPLETED && sfs->state != SF_STATE_RUNNING)
> - goto out;
> -
> - if (sfs->count != 0)
> - cv_wait(&sfs->cv, &sfs->mtx);
> - KASSERT(sfs->count == 0, ("sendfile sync still busy"));
> -
> -out:
> - return;
> -}
> -
> -/*
> - * Free an sf_sync if it's appropriate to.
> - */
> -void
> -sf_sync_free(struct sendfile_sync *sfs)
> -{
> -
> - if (sfs == NULL)
> - return;
> -
> - SFSYNC_DPRINTF("%s: (%lld) sfs=%p; called; state=%d, flags=0x%08x "
> - "count=%d\n",
> - __func__,
> - (long long) curthread->td_tid,
> - sfs,
> - sfs->state,
> - sfs->flags,
> - sfs->count);
> -
> - mtx_lock(&sfs->mtx);
> -
> - /*
> - * We keep the sf_sync around if the state is active,
> - * we are doing kqueue notification and we have active
> - * knotes.
> - *
> - * If the caller wants to free us right this second it
> - * should transition this to the freeing state.
> - *
> - * So, complain loudly if they break this rule.
> - */
> - if (sfs->state != SF_STATE_FREEING) {
> - printf("%s: (%llu) sfs=%p; not freeing; let's wait!\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs);
> mtx_unlock(&sfs->mtx);
> - return;
> }
> -
> - KASSERT(sfs->count == 0, ("sendfile sync still busy"));
> - cv_destroy(&sfs->cv);
> - /*
> - * This doesn't call knlist_detach() on each knote; it just frees
> - * the entire list.
> - */
> - knlist_delete(&sfs->klist, curthread, 1);
> - mtx_destroy(&sfs->mtx);
> - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; freeing\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs);
> - uma_zfree(zone_sfsync, sfs);
> -}
> -
> -/*
> - * Setup a sf_sync to post a kqueue notification when things are complete.
> - */
> -int
> -sf_sync_kqueue_setup(struct sendfile_sync *sfs, struct sf_hdtr_kq *sfkq)
> -{
> - struct kevent kev;
> - int error;
> -
> - sfs->flags |= SF_KQUEUE;
> -
> - /* Check the flags are valid */
> - if ((sfkq->kq_flags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0)
> - return (EINVAL);
> -
> - SFSYNC_DPRINTF("%s: sfs=%p: kqfd=%d, flags=0x%08x, ident=%p, udata=%p\n",
> - __func__,
> - sfs,
> - sfkq->kq_fd,
> - sfkq->kq_flags,
> - (void *) sfkq->kq_ident,
> - (void *) sfkq->kq_udata);
> -
> - /* Setup and register a knote on the given kqfd. */
> - kev.ident = (uintptr_t) sfkq->kq_ident;
> - kev.filter = EVFILT_SENDFILE;
> - kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | sfkq->kq_flags;
> - kev.data = (intptr_t) sfs;
> - kev.udata = sfkq->kq_udata;
> -
> - error = kqfd_register(sfkq->kq_fd, &kev, curthread, 1);
> - if (error != 0) {
> - SFSYNC_DPRINTF("%s: returned %d\n", __func__, error);
> - }
> - return (error);
> -}
> -
> -void
> -sf_sync_set_state(struct sendfile_sync *sfs, sendfile_sync_state_t state,
> - int islocked)
> -{
> - sendfile_sync_state_t old_state;
> -
> - if (! islocked)
> - mtx_lock(&sfs->mtx);
> -
> - /*
> - * Update our current state.
> - */
> - old_state = sfs->state;
> - sfs->state = state;
> - SFSYNC_DPRINTF("%s: (%llu) sfs=%p; going from %d to %d\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs,
> - old_state,
> - state);
> -
> - /*
> - * If we're transitioning from RUNNING to COMPLETED and the count is
> - * zero, then post the knote. The caller may have completed the
> - * send before we updated the state to COMPLETED and we need to make
> - * sure this is communicated.
> - */
> - if (old_state == SF_STATE_RUNNING
> - && state == SF_STATE_COMPLETED
> - && sfs->count == 0
> - && sfs->flags & SF_KQUEUE) {
> - SFSYNC_DPRINTF("%s: (%llu) sfs=%p: triggering knote!\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs);
> - KNOTE_LOCKED(&sfs->klist, 1);
> - }
> -
> - if (! islocked)
> - mtx_unlock(&sfs->mtx);
> -}
> -
> -/*
> - * Set the retval/errno for the given transaction.
> - *
> - * This will eventually/ideally be used when the KNOTE is fired off
> - * to signify the completion of this transaction.
> - *
> - * The sfsync lock should be held before entering this function.
> - */
> -void
> -sf_sync_set_retval(struct sendfile_sync *sfs, off_t retval, int xerrno)
> -{
> -
> - KASSERT(mtx_owned(&sfs->mtx), ("%s: sfs=%p: not locked but should be!",
> - __func__,
> - sfs));
> -
> - SFSYNC_DPRINTF("%s: (%llu) sfs=%p: errno=%d, retval=%jd\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs,
> - xerrno,
> - (intmax_t) retval);
> -
> - sfs->retval = retval;
> - sfs->xerrno = xerrno;
> }
>
> /*
> @@ -2380,174 +1903,15 @@ sys_sendfile(struct thread *td, struct s
> return (do_sendfile(td, uap, 0));
> }
>
> -int
> -_do_sendfile(struct thread *td, int src_fd, int sock_fd, int flags,
> - int compat, off_t offset, size_t nbytes, off_t *sbytes,
> - struct uio *hdr_uio,
> - struct uio *trl_uio, struct sf_hdtr_kq *hdtr_kq)
> -{
> - cap_rights_t rights;
> - struct sendfile_sync *sfs = NULL;
> - struct file *fp;
> - int error;
> - int do_kqueue = 0;
> - int do_free = 0;
> -
> - AUDIT_ARG_FD(src_fd);
> -
> - if (hdtr_kq != NULL)
> - do_kqueue = 1;
> -
> - /*
> - * sendfile(2) can start at any offset within a file so we require
> - * CAP_READ+CAP_SEEK = CAP_PREAD.
> - */
> - if ((error = fget_read(td, src_fd,
> - cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) {
> - goto out;
> - }
> -
> - /*
> - * IF SF_KQUEUE is set but we haven't copied in anything for
> - * kqueue data, error out.
> - */
> - if (flags & SF_KQUEUE && do_kqueue == 0) {
> - SFSYNC_DPRINTF("%s: SF_KQUEUE but no KQUEUE data!\n", __func__);
> - goto out;
> - }
> -
> - /*
> - * If we need to wait for completion, initialise the sfsync
> - * state here.
> - */
> - if (flags & (SF_SYNC | SF_KQUEUE))
> - sfs = sf_sync_alloc(flags & (SF_SYNC | SF_KQUEUE));
> -
> - if (flags & SF_KQUEUE) {
> - error = sf_sync_kqueue_setup(sfs, hdtr_kq);
> - if (error) {
> - SFSYNC_DPRINTF("%s: (%llu) error; sfs=%p\n",
> - __func__,
> - (unsigned long long) curthread->td_tid,
> - sfs);
> - sf_sync_set_state(sfs, SF_STATE_FREEING, 0);
> - sf_sync_free(sfs);
> - goto out;
> - }
> - }
> -
> - /*
> - * Do the sendfile call.
> - *
> - * If this fails, it'll free the mbuf chain which will free up the
> - * sendfile_sync references.
> - */
> - error = fo_sendfile(fp, sock_fd, hdr_uio, trl_uio, offset,
> - nbytes, sbytes, flags, compat ? SFK_COMPAT : 0, sfs, td);
> -
> - /*
> - * If the sendfile call succeeded, transition the sf_sync state
> - * to RUNNING, then COMPLETED.
> - *
> - * If the sendfile call failed, then the sendfile call may have
> - * actually sent some data first - so we check to see whether
> - * any data was sent. If some data was queued (ie, count > 0)
> - * then we can't call free; we have to wait until the partial
> - * transaction completes before we continue along.
> - *
> - * This has the side effect of firing off the knote
> - * if the refcount has hit zero by the time we get here.
> - */
> - if (sfs != NULL) {
> - mtx_lock(&sfs->mtx);
> - if (error == 0 || sfs->count > 0) {
> - /*
> - * When it's time to do async sendfile, the transition
> - * to RUNNING signifies that we're actually actively
> - * adding and completing mbufs. When the last disk
> - * buffer is read (ie, when we're not doing any
> - * further read IO and all subsequent stuff is mbuf
> - * transmissions) we'll transition to COMPLETED
> - * and when the final mbuf is freed, the completion
> - * will be signaled.
> - */
> - sf_sync_set_state(sfs, SF_STATE_RUNNING, 1);
> -
> - /*
> - * Set the retval before we signal completed.
> - * If we do it the other way around then transitioning to
> - * COMPLETED may post the knote before you set the return
> - * status!
> - *
> - * XXX for now, errno is always 0, as we don't post
> - * knotes if sendfile failed. Maybe that'll change later.
> - */
> - sf_sync_set_retval(sfs, *sbytes, error);
> -
> - /*
> - * And now transition to completed, which will kick off
> - * the knote if required.
> - */
> - sf_sync_set_state(sfs, SF_STATE_COMPLETED, 1);
> - } else {
> - /*
> - * Error isn't zero, sfs_count is zero, so we
> - * won't have some other thing to wake things up.
> - * Thus free.
> - */
> - sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
> - do_free = 1;
> - }
> -
> - /*
> - * Next - wait if appropriate.
> - */
> - sf_sync_syscall_wait(sfs);
> -
> - /*
> - * If we're not doing kqueue notifications, we can
> - * transition this immediately to the freeing state.
> - */
> - if ((sfs->flags & SF_KQUEUE) == 0) {
> - sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
> - do_free = 1;
> - }
> -
> - mtx_unlock(&sfs->mtx);
> - }
> -
> - /*
> - * If do_free is set, free here.
> - *
> - * If we're doing no-kqueue notification and it's just sleep notification,
> - * we also do free; it's the only chance we have.
> - */
> - if (sfs != NULL && do_free == 1) {
> - sf_sync_free(sfs);
> - }
> -
> - /*
> - * XXX Should we wait until the send has completed before freeing the source
> - * file handle? It's the previous behaviour, sure, but is it required?
> - * We've wired down the page references after all.
> - */
> - fdrop(fp, td);
> -
> -out:
> - /* Return error */
> - return (error);
> -}
> -
> -
> static int
> do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
> {
> struct sf_hdtr hdtr;
> - struct sf_hdtr_kq hdtr_kq;
> struct uio *hdr_uio, *trl_uio;
> - int error;
> + struct file *fp;
> + cap_rights_t rights;
> off_t sbytes;
> - int do_kqueue = 0;
> + int error;
>
> /*
> * File offset must be positive. If it goes beyond EOF
> @@ -2563,38 +1927,37 @@ do_sendfile(struct thread *td, struct se
> if (error != 0)
> goto out;
> if (hdtr.headers != NULL) {
> - error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
> + error = copyinuio(hdtr.headers, hdtr.hdr_cnt,
> + &hdr_uio);
> if (error != 0)
> goto out;
> }
> if (hdtr.trailers != NULL) {
> - error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
> + error = copyinuio(hdtr.trailers, hdtr.trl_cnt,
> + &trl_uio);
> if (error != 0)
> goto out;
> }
> + }
>
> - /*
> - * If SF_KQUEUE is set, then we need to also copy in
> - * the kqueue data after the normal hdtr set and set
> - * do_kqueue=1.
> - */
> - if (uap->flags & SF_KQUEUE) {
> - error = copyin(((char *) uap->hdtr) + sizeof(hdtr),
> - &hdtr_kq,
> - sizeof(hdtr_kq));
> - if (error != 0)
> - goto out;
> - do_kqueue = 1;
> - }
> + AUDIT_ARG_FD(src_fd);
> +
> + /*
> + * sendfile(2) can start at any offset within a file so we require
> + * CAP_READ+CAP_SEEK = CAP_PREAD.
> + */
> + if ((error = fget_read(td, uap->fd,
> + cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) {
> + goto out;
> }
>
> - /* Call sendfile */
> - error = _do_sendfile(td, uap->fd, uap->s, uap->flags, compat,
> - uap->offset, uap->nbytes, &sbytes, hdr_uio, trl_uio, &hdtr_kq);
> + error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset,
> + uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td);
> + fdrop(fp, td);
>
> - if (uap->sbytes != NULL) {
> + if (uap->sbytes != NULL)
> copyout(&sbytes, uap->sbytes, sizeof(off_t));
> - }
> +
> out:
> free(hdr_uio, M_IOV);
> free(trl_uio, M_IOV);
> @@ -2819,7 +2182,7 @@ kern_sendfile_getsock(struct thread *td,
> int
> vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
> struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
> - int kflags, struct sendfile_sync *sfs, struct thread *td)
> + int kflags, struct thread *td)
> {
> struct file *sock_fp;
> struct vnode *vp;
> @@ -2829,6 +2192,7 @@ vn_sendfile(struct file *fp, int sockfd,
> struct sf_buf *sf;
> struct vm_page *pg;
> struct shmfd *shmfd;
> + struct sendfile_sync *sfs;
> struct vattr va;
> off_t off, xfsize, fsbytes, sbytes, rem, obj_size;
> int error, bsize, nd, hdrlen, mnw;
> @@ -2837,6 +2201,7 @@ vn_sendfile(struct file *fp, int sockfd,
> obj = NULL;
> so = NULL;
> m = NULL;
> + sfs = NULL;
> fsbytes = sbytes = 0;
> hdrlen = mnw = 0;
> rem = nbytes;
> @@ -2860,6 +2225,12 @@ vn_sendfile(struct file *fp, int sockfd,
> if (flags & SF_MNOWAIT)
> mnw = 1;
>
> + if (flags & SF_SYNC) {
> + sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO);
> + mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
> + cv_init(&sfs->cv, "sendfile");
> + }
> +
> #ifdef MAC
> error = mac_socket_check_send(td->td_ucred, so);
> if (error != 0)
> @@ -3106,12 +2477,11 @@ retry_space:
> loopbytes += xfsize;
> off += xfsize;
>
> - /*
> - * XXX eventually this should be a sfsync
> - * method call!
> - */
> - if (sfs != NULL)
> - sf_sync_ref(sfs);
> + if (sfs != NULL) {
> + mtx_lock(&sfs->mtx);
> + sfs->count++;
> + mtx_unlock(&sfs->mtx);
> + }
> }
>
> if (vp != NULL)
> @@ -3193,6 +2563,16 @@ out:
> if (m)
> m_freem(m);
>
> + if (sfs != NULL) {
> + mtx_lock(&sfs->mtx);
> + if (sfs->count != 0)
> + cv_wait(&sfs->cv, &sfs->mtx);
> + KASSERT(sfs->count == 0, ("sendfile sync still busy"));
> + cv_destroy(&sfs->cv);
> + mtx_destroy(&sfs->mtx);
> + free(sfs, M_TEMP);
> + }
> +
> if (error == ERESTART)
> error = EINTR;
>
>
> Modified: head/sys/sys/file.h
> ==============================================================================
> --- head/sys/sys/file.h Tue Nov 11 20:05:50 2014 (r274402)
> +++ head/sys/sys/file.h Tue Nov 11 20:32:46 2014 (r274403)
> @@ -90,9 +90,6 @@ foffset_get(struct file *fp)
> return (foffset_lock(fp, FOF_NOLOCK));
> }
>
> -/* XXX pollution? */
> -struct sendfile_sync;
> -
> typedef int fo_rdwr_t(struct file *fp, struct uio *uio,
> struct ucred *active_cred, int flags,
> struct thread *td);
> @@ -112,8 +109,7 @@ typedef int fo_chown_t(struct file *fp,
> struct ucred *active_cred, struct thread *td);
> typedef int fo_sendfile_t(struct file *fp, int sockfd, struct uio *hdr_uio,
> struct uio *trl_uio, off_t offset, size_t nbytes,
> - off_t *sent, int flags, int kflags,
> - struct sendfile_sync *sfs, struct thread *td);
> + off_t *sent, int flags, int kflags, struct thread *td);
> typedef int fo_seek_t(struct file *fp, off_t offset, int whence,
> struct thread *td);
> typedef int fo_fill_kinfo_t(struct file *fp, struct kinfo_file *kif,
> @@ -371,11 +367,11 @@ fo_chown(struct file *fp, uid_t uid, gid
> static __inline int
> fo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
> struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
> - int kflags, struct sendfile_sync *sfs, struct thread *td)
> + int kflags, struct thread *td)
> {
>
> return ((*fp->f_ops->fo_sendfile)(fp, sockfd, hdr_uio, trl_uio, offset,
> - nbytes, sent, flags, kflags, sfs, td));
> + nbytes, sent, flags, kflags, td));
> }
>
> static __inline int
>
> Modified: head/sys/sys/socket.h
> ==============================================================================
> --- head/sys/sys/socket.h Tue Nov 11 20:05:50 2014 (r274402)
> +++ head/sys/sys/socket.h Tue Nov 11 20:32:46 2014 (r274403)
> @@ -584,27 +584,11 @@ struct sf_hdtr {
> };
>
> /*
> - * sendfile(2) kqueue information
> - */
> -struct sf_hdtr_kq {
> - uintptr_t kq_ident; /* ident (from userland?) */
> - void *kq_udata; /* user data pointer */
> - uint32_t kq_flags; /* extra flags to pass in */
>
> *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
>
More information about the svn-src-all
mailing list