git: 3b6056204dd8 - main - FIOSEEKHOLE/FIOSEEKDATA: correct consistency for bmap-based implementation
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 04 Feb 2023 18:32:45 UTC
The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=3b6056204dd80cc866b7998ef0776247ebc42ce4 commit 3b6056204dd80cc866b7998ef0776247ebc42ce4 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2023-02-04 01:20:19 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2023-02-04 18:32:07 +0000 FIOSEEKHOLE/FIOSEEKDATA: correct consistency for bmap-based implementation Writes on UFS through a mapped region do not allocate disk blocks in holes immediately. The blocks are allocated when the pages are paged out first time. This breaks the algorithm in vn_bmap_seekhole() and ufs_bmap_seekdata(), because VOP_BMAP() reports hole for the place which already contains a valid data. Clean the pages before doing VOP_BMAP() in the affected functions. In principle, we could clean less by only requesting clean starting from the offset, but it is probably not very important. PR: 269261 Reported by: asomers Reviewed by: asomers, markj Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D38379 --- sys/kern/vfs_vnops.c | 14 ++++++++++++-- sys/ufs/ufs/ufs_bmap.c | 18 ++++++++++++++++++ sys/ufs/ufs/ufs_vnops.c | 2 +- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index a49070b6060d..1d90f76f9cd6 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -2556,6 +2556,7 @@ int vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) { + vm_object_t obj; off_t size; daddr_t bn, bnp; uint64_t bsize; @@ -2564,7 +2565,7 @@ vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, ("%s: Wrong command %lu", __func__, cmd)); - ASSERT_VOP_LOCKED(vp, "vn_bmap_seekhole_locked"); + ASSERT_VOP_ELOCKED(vp, "vn_bmap_seekhole_locked"); if (vp->v_type != VREG) { error = ENOTTY; @@ -2578,6 +2579,15 @@ vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, error = ENXIO; goto out; } + + /* See the comment in ufs_bmap_seekdata(). */ + obj = vp->v_object; + if (obj != NULL) { + VM_OBJECT_WLOCK(obj); + vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); + VM_OBJECT_WUNLOCK(obj); + } + bsize = vp->v_mount->mnt_stat.f_iosize; for (bn = noff / bsize; noff < size; bn++, noff += bsize - noff % bsize) { @@ -2613,7 +2623,7 @@ vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred) KASSERT(cmd == FIOSEEKHOLE || cmd == FIOSEEKDATA, ("%s: Wrong command %lu", __func__, cmd)); - if (vn_lock(vp, LK_SHARED) != 0) + if (vn_lock(vp, LK_EXCLUSIVE) != 0) return (EBADF); error = vn_bmap_seekhole_locked(vp, cmd, off, cred); VOP_UNLOCK(vp); diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c index 4ac8ca149279..acdd334f6c7b 100644 --- a/sys/ufs/ufs/ufs_bmap.c +++ b/sys/ufs/ufs/ufs_bmap.c @@ -44,12 +44,16 @@ __FBSDID("$FreeBSD$"); #include <sys/bio.h> #include <sys/buf.h> #include <sys/proc.h> +#include <sys/rwlock.h> #include <sys/vnode.h> #include <sys/mount.h> #include <sys/racct.h> #include <sys/resourcevar.h> #include <sys/stat.h> +#include <vm/vm.h> +#include <vm/vm_object.h> + #include <ufs/ufs/extattr.h> #include <ufs/ufs/quota.h> #include <ufs/ufs/inode.h> @@ -348,6 +352,7 @@ ufs_bmap_seekdata(struct vnode *vp, off_t *offp) struct inode *ip; struct mount *mp; struct ufsmount *ump; + vm_object_t obj; ufs2_daddr_t bn, daddr, nextbn; uint64_t bsize; off_t numblks; @@ -364,6 +369,19 @@ ufs_bmap_seekdata(struct vnode *vp, off_t *offp) if (*offp < 0 || *offp >= ip->i_size) return (ENXIO); + /* + * We could have pages on the vnode' object queue which still + * do not have the data blocks allocated. Convert all dirty + * pages into buffer writes to ensure that we see all + * allocated data. + */ + obj = vp->v_object; + if (obj != NULL) { + VM_OBJECT_WLOCK(obj); + vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); + VM_OBJECT_WUNLOCK(obj); + } + bsize = mp->mnt_stat.f_iosize; for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize); bn < numblks; bn = nextbn) { diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 93a5b173b785..ae6d963920f3 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -2944,7 +2944,7 @@ ufs_ioctl(struct vop_ioctl_args *ap) vp = ap->a_vp; switch (ap->a_command) { case FIOSEEKDATA: - error = vn_lock(vp, LK_SHARED); + error = vn_lock(vp, LK_EXCLUSIVE); if (error == 0) { error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data); VOP_UNLOCK(vp);