git: 00134a07898f - main - fusefs: require FUSE_NO_OPENDIR_SUPPORT for NFS exporting
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 04 Feb 2022 23:31:43 UTC
The branch main has been updated by asomers: URL: https://cgit.FreeBSD.org/src/commit/?id=00134a07898fa807b8a1fcb2596f0e3644143f69 commit 00134a07898fa807b8a1fcb2596f0e3644143f69 Author: Alan Somers <asomers@FreeBSD.org> AuthorDate: 2022-01-03 00:16:09 +0000 Commit: Alan Somers <asomers@FreeBSD.org> CommitDate: 2022-02-04 23:31:05 +0000 fusefs: require FUSE_NO_OPENDIR_SUPPORT for NFS exporting FUSE file systems that do not set FUSE_NO_OPENDIR_SUPPORT do not guarantee that d_off will be valid after closing and reopening a directory. That conflicts with NFS's statelessness, that results in unresolvable bugs when NFS reads large directories, if: * The file system _does_ change the d_off field for the last directory entry previously returned by VOP_READDIR, or * The file system deletes the last directory entry previously seen by NFS. Rather than doing a poor job of exporting such file systems, it's better just to refuse. Even though this is technically a breaking change, 13.0-RELEASE's NFS-FUSE support was bad enough that an MFC should be allowed. MFC after: 3 weeks. Reviewed by: rmacklem Differential Revision: https://reviews.freebsd.org/D33726 --- sys/fs/fuse/fuse_internal.c | 84 +++++++++++++++------------------------------ sys/fs/fuse/fuse_internal.h | 8 ++--- sys/fs/fuse/fuse_vnops.c | 50 +++++++++++++++++---------- 3 files changed, 64 insertions(+), 78 deletions(-) diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c index eb8f1f87d90f..8f5cbb2d86ae 100644 --- a/sys/fs/fuse/fuse_internal.c +++ b/sys/fs/fuse/fuse_internal.c @@ -553,7 +553,6 @@ fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, int fuse_internal_readdir(struct vnode *vp, struct uio *uio, - off_t startoff, struct fuse_filehandle *fufh, struct fuse_iov *cookediov, int *ncookies, @@ -562,7 +561,6 @@ fuse_internal_readdir(struct vnode *vp, int err = 0; struct fuse_dispatcher fdi; struct fuse_read_in *fri = NULL; - int fnd_start; if (uio_resid(uio) == 0) return 0; @@ -572,18 +570,6 @@ fuse_internal_readdir(struct vnode *vp, * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p * I/O). */ - - /* - * fnd_start is set non-zero once the offset in the directory gets - * to the startoff. This is done because directories must be read - * from the beginning (offset == 0) when fuse_vnop_readdir() needs - * to do an open of the directory. - * If it is not set non-zero here, it will be set non-zero in - * fuse_internal_readdir_processdata() when uio_offset == startoff. - */ - fnd_start = 0; - if (uio->uio_offset == startoff) - fnd_start = 1; while (uio_resid(uio) > 0) { fdi.iosize = sizeof(*fri); fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); @@ -595,9 +581,8 @@ fuse_internal_readdir(struct vnode *vp, if ((err = fdisp_wait_answ(&fdi))) break; - if ((err = fuse_internal_readdir_processdata(uio, startoff, - &fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov, - ncookies, &cookies))) + if ((err = fuse_internal_readdir_processdata(uio, fri->size, + fdi.answ, fdi.iosize, cookediov, ncookies, &cookies))) break; } @@ -612,8 +597,6 @@ fuse_internal_readdir(struct vnode *vp, */ int fuse_internal_readdir_processdata(struct uio *uio, - off_t startoff, - int *fnd_start, size_t reqsize, void *buf, size_t bufsize, @@ -624,8 +607,6 @@ fuse_internal_readdir_processdata(struct uio *uio, int err = 0; int oreclen; size_t freclen; - int ents_copied = 0; - int ents_seen = 0; struct dirent *de; struct fuse_dirent *fudge; @@ -636,7 +617,7 @@ fuse_internal_readdir_processdata(struct uio *uio, return -1; for (;;) { if (bufsize < FUSE_NAME_OFFSET) { - err = (ents_seen == 0 || ents_copied > 0) ? -1 : 0; + err = -1; break; } fudge = (struct fuse_dirent *)buf; @@ -647,7 +628,7 @@ fuse_internal_readdir_processdata(struct uio *uio, * This indicates a partial directory entry at the * end of the directory data. */ - err = (ents_seen == 0 || ents_copied > 0) ? -1 : 0; + err = -1; break; } #ifdef ZERO_PAD_INCOMPLETE_BUFS @@ -669,41 +650,32 @@ fuse_internal_readdir_processdata(struct uio *uio, err = -1; break; } - ents_seen++; - /* - * Don't start to copy the directory entries out until - * the requested offset in the directory is found. - */ - if (*fnd_start != 0) { - fiov_adjust(cookediov, oreclen); - bzero(cookediov->base, oreclen); - - de = (struct dirent *)cookediov->base; - de->d_fileno = fudge->ino; - de->d_off = fudge->off; - de->d_reclen = oreclen; - de->d_type = fudge->type; - de->d_namlen = fudge->namelen; - memcpy((char *)cookediov->base + sizeof(struct dirent) - - MAXNAMLEN - 1, - (char *)buf + FUSE_NAME_OFFSET, fudge->namelen); - dirent_terminate(de); - - err = uiomove(cookediov->base, cookediov->len, uio); - if (err) + fiov_adjust(cookediov, oreclen); + bzero(cookediov->base, oreclen); + + de = (struct dirent *)cookediov->base; + de->d_fileno = fudge->ino; + de->d_off = fudge->off; + de->d_reclen = oreclen; + de->d_type = fudge->type; + de->d_namlen = fudge->namelen; + memcpy((char *)cookediov->base + sizeof(struct dirent) - + MAXNAMLEN - 1, + (char *)buf + FUSE_NAME_OFFSET, fudge->namelen); + dirent_terminate(de); + + err = uiomove(cookediov->base, cookediov->len, uio); + if (err) + break; + if (cookies != NULL) { + if (*ncookies == 0) { + err = -1; break; - if (cookies != NULL) { - if (*ncookies == 0) { - err = -1; - break; - } - *cookies = fudge->off; - cookies++; - (*ncookies)--; } - ents_copied++; - } else if (startoff == fudge->off) - *fnd_start = 1; + *cookies = fudge->off; + cookies++; + (*ncookies)--; + } buf = (char *)buf + freclen; bufsize -= freclen; uio_setoffset(uio, fudge->off); diff --git a/sys/fs/fuse/fuse_internal.h b/sys/fs/fuse/fuse_internal.h index c17eff2acac3..5d852b420366 100644 --- a/sys/fs/fuse/fuse_internal.h +++ b/sys/fs/fuse/fuse_internal.h @@ -250,12 +250,12 @@ int fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, struct pseudo_dirent { uint32_t d_namlen; }; -int fuse_internal_readdir(struct vnode *vp, struct uio *uio, off_t startoff, +int fuse_internal_readdir(struct vnode *vp, struct uio *uio, struct fuse_filehandle *fufh, struct fuse_iov *cookediov, int *ncookies, uint64_t *cookies); -int fuse_internal_readdir_processdata(struct uio *uio, off_t startoff, - int *fnd_start, size_t reqsize, void *buf, size_t bufsize, - struct fuse_iov *cookediov, int *ncookies, uint64_t **cookiesp); +int fuse_internal_readdir_processdata(struct uio *uio, size_t reqsize, + void *buf, size_t bufsize, struct fuse_iov *cookediov, int *ncookies, + uint64_t **cookiesp); /* remove */ diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index da925b5dcbb5..10d64390123d 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -1846,11 +1846,10 @@ fuse_vnop_readdir(struct vop_readdir_args *ap) struct uio *uio = ap->a_uio; struct ucred *cred = ap->a_cred; struct fuse_filehandle *fufh = NULL; - struct fuse_data *mpdata = fuse_get_mpdata(vnode_mount(vp)); + struct mount *mp = vnode_mount(vp); struct fuse_iov cookediov; int err = 0; uint64_t *cookies; - off_t startoff; ssize_t tresid; int ncookies; bool closefufh = false; @@ -1867,25 +1866,18 @@ fuse_vnop_readdir(struct vop_readdir_args *ap) } tresid = uio->uio_resid; - startoff = uio->uio_offset; err = fuse_filehandle_get_dir(vp, &fufh, cred, pid); - if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { + if (err == EBADF && mp->mnt_flag & MNT_EXPORTED) { + struct fuse_data *data = fuse_get_mpdata(mp); + + KASSERT(data->dataflags & FSESS_NO_OPENDIR_SUPPORT, + ("FUSE file systems that don't set " + "FUSE_NO_OPENDIR_SUPPORT should not be exported")); /* * nfsd will do VOP_READDIR without first doing VOP_OPEN. We - * must implicitly open the directory here + * must implicitly open the directory here. */ err = fuse_filehandle_open(vp, FREAD, &fufh, curthread, cred); - if (err == 0 && !(mpdata->dataflags & FSESS_NO_OPEN_SUPPORT)) { - /* - * FUSE does not require a directory entry's d_off - * field to be valid outside of the lifetime of the - * directory's FUSE file handle. So we must read the - * directory from the beginning. However, if the file - * system sets FUSE_NO_OPENDIR_SUPPORT, then the d_off - * field will be valid for the lifetime of the dirent. - */ - uio->uio_offset = 0; - } closefufh = true; } if (err) @@ -1903,7 +1895,7 @@ fuse_vnop_readdir(struct vop_readdir_args *ap) #define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1) fiov_init(&cookediov, DIRCOOKEDSIZE); - err = fuse_internal_readdir(vp, uio, startoff, fufh, &cookediov, + err = fuse_internal_readdir(vp, uio, fufh, &cookediov, &ncookies, cookies); fiov_teardown(&cookediov); @@ -3108,8 +3100,30 @@ fuse_vnop_vptofh(struct vop_vptofh_args *ap) struct vattr va; int err; - if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) + if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) { + /* NFS requires lookups for "." and ".." */ + SDT_PROBE2(fusefs, , vnops, trace, 1, + "VOP_VPTOFH without FUSE_EXPORT_SUPPORT"); return EOPNOTSUPP; + } + if ((mp->mnt_flag & MNT_EXPORTED) && + !(data->dataflags & FSESS_NO_OPENDIR_SUPPORT)) + { + /* + * NFS is stateless, so nfsd must reopen a directory on every + * call to VOP_READDIR, passing in the d_off field from the + * final dirent of the previous invocation. But without + * FUSE_NO_OPENDIR_SUPPORT, the FUSE protocol does not + * guarantee that d_off will be valid after a directory is + * closed and reopened. So prohibit exporting FUSE file + * systems that don't set that flag. + * + * But userspace NFS servers don't have this problem. + */ + SDT_PROBE2(fusefs, , vnops, trace, 1, + "VOP_VPTOFH without FUSE_NO_OPENDIR_SUPPORT"); + return EOPNOTSUPP; + } err = fuse_internal_getattr(vp, &va, curthread->td_ucred, curthread); if (err)