git: 8fa5e0f21fd1 - main - tmpfs: Account for whiteouts during rename/rmdir

From: Jason A. Harmening <jah_at_FreeBSD.org>
Date: Mon, 09 Sep 2024 00:02:16 UTC
The branch main has been updated by jah:

URL: https://cgit.FreeBSD.org/src/commit/?id=8fa5e0f21fd14bb3f5d977ae9130dae3e197f2ba

commit 8fa5e0f21fd14bb3f5d977ae9130dae3e197f2ba
Author:     Jason A. Harmening <jah@FreeBSD.org>
AuthorDate: 2024-08-06 04:30:30 +0000
Commit:     Jason A. Harmening <jah@FreeBSD.org>
CommitDate: 2024-09-08 23:34:15 +0000

    tmpfs: Account for whiteouts during rename/rmdir
    
    The existing tmpfs implementation will return ENOTEMPTY for VOP_RMDIR,
    or for the destination directory of VOP_RENAME, for any case in which
    the directory is non-empty, even if the directory only contains
    whiteouts.
    
    Fix this by tracking total whiteout dirent allocation separately for
    each directory, and avoid returning ENOTEMPTY if IGNOREWHITEOUT has
    been specified by the caller and the total allocation of dirents is not
    greater than the total whiteout allocation.  This addresses "directory
    not empty" failures seen on some recently-added unionfs stress2 tests
    which use tmpfs as a base-layer filesystem.
    
    A separate issue for independent consideration is that unionfs' default
    behavior when deleting files or directories is to create whiteouts even
    when it does not truly need to do so.
    
    Differential Revision:  https://reviews.freebsd.org/D45987
    Reviewed by:            kib (prior version), olce
    Tested by:              pho
---
 sys/fs/tmpfs/tmpfs.h       | 12 +++++++++++
 sys/fs/tmpfs/tmpfs_subr.c  | 36 ++++++++++++++++++++++++++++++-
 sys/fs/tmpfs/tmpfs_vnops.c | 54 +++++++++++++++++++++++++++++++++++-----------
 3 files changed, 88 insertions(+), 14 deletions(-)

diff --git a/sys/fs/tmpfs/tmpfs.h b/sys/fs/tmpfs/tmpfs.h
index c28f3a02a7bf..58f8720c3f84 100644
--- a/sys/fs/tmpfs/tmpfs.h
+++ b/sys/fs/tmpfs/tmpfs.h
@@ -292,6 +292,15 @@ struct tmpfs_node {
 			 */
 			off_t			tn_readdir_lastn;
 			struct tmpfs_dirent *	tn_readdir_lastp;
+
+			/*
+			 * Total size of whiteout directory entries.  This
+			 * must be a multiple of sizeof(struct tmpfs_dirent)
+			 * and is used to determine whether a directory is
+			 * empty (excluding whiteout entries) during rename/
+			 * rmdir operations.
+			 */
+			off_t			tn_wht_size;	/* (v) */
 		} tn_dir;
 
 		/* Valid when tn_type == VLNK. */
@@ -484,6 +493,7 @@ int	tmpfs_dir_getdents(struct tmpfs_mount *, struct tmpfs_node *,
 	    struct uio *, int, uint64_t *, int *);
 int	tmpfs_dir_whiteout_add(struct vnode *, struct componentname *);
 void	tmpfs_dir_whiteout_remove(struct vnode *, struct componentname *);
+void	tmpfs_dir_clear_whiteouts(struct vnode *);
 int	tmpfs_reg_resize(struct vnode *, off_t, boolean_t);
 int	tmpfs_reg_punch_hole(struct vnode *vp, off_t *, off_t *);
 int	tmpfs_chflags(struct vnode *, u_long, struct ucred *, struct thread *);
@@ -533,6 +543,8 @@ tmpfs_update(struct vnode *vp)
 #define TMPFS_VALIDATE_DIR(node) do { \
 	MPASS((node)->tn_type == VDIR); \
 	MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \
+	MPASS((node)->tn_dir.tn_wht_size % sizeof(struct tmpfs_dirent) == 0); \
+	MPASS((node)->tn_dir.tn_wht_size <= (node)->tn_size); \
 } while (0)
 
 /*
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index a5eb865f2996..41d1f27caf13 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -646,6 +646,7 @@ tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, __enum_uint8(vtype)
 		nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent;
 		nnode->tn_dir.tn_readdir_lastn = 0;
 		nnode->tn_dir.tn_readdir_lastp = NULL;
+		nnode->tn_dir.tn_wht_size = 0;
 		nnode->tn_links++;
 		TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent);
 		nnode->tn_dir.tn_parent->tn_links++;
@@ -1831,13 +1832,16 @@ int
 tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp)
 {
 	struct tmpfs_dirent *de;
+	struct tmpfs_node *dnode;
 	int error;
 
 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL,
 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
 	if (error != 0)
 		return (error);
+	dnode = VP_TO_TMPFS_DIR(dvp);
 	tmpfs_dir_attach(dvp, de);
+	dnode->tn_dir.tn_wht_size += sizeof(*de);
 	return (0);
 }
 
@@ -1845,13 +1849,43 @@ void
 tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp)
 {
 	struct tmpfs_dirent *de;
+	struct tmpfs_node *dnode;
 
-	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
+	dnode = VP_TO_TMPFS_DIR(dvp);
+	de = tmpfs_dir_lookup(dnode, NULL, cnp);
 	MPASS(de != NULL && de->td_node == NULL);
+	MPASS(dnode->tn_dir.tn_wht_size >= sizeof(*de));
+	dnode->tn_dir.tn_wht_size -= sizeof(*de);
 	tmpfs_dir_detach(dvp, de);
 	tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
 }
 
+/*
+ * Frees any dirents still associated with the directory represented
+ * by dvp in preparation for the removal of the directory.  This is
+ * required when removing a directory which contains only whiteout
+ * entries.
+ */
+void
+tmpfs_dir_clear_whiteouts(struct vnode *dvp)
+{
+	struct tmpfs_dir_cursor dc;
+	struct tmpfs_dirent *de;
+	struct tmpfs_node *dnode;
+
+	dnode = VP_TO_TMPFS_DIR(dvp);
+
+	while ((de = tmpfs_dir_first(dnode, &dc)) != NULL) {
+		KASSERT(de->td_node == NULL, ("%s: non-whiteout dirent %p",
+		    __func__, de));
+		dnode->tn_dir.tn_wht_size -= sizeof(*de);
+		tmpfs_dir_detach(dvp, de);
+		tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de);
+	}
+	MPASS(dnode->tn_size == 0);
+	MPASS(dnode->tn_dir.tn_wht_size == 0);
+}
+
 /*
  * Resizes the aobj associated with the regular file pointed to by 'vp' to the
  * size 'newsize'.  'vp' must point to a vnode that represents a regular file.
diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c
index 718cfef6bfa3..b278c3153863 100644
--- a/sys/fs/tmpfs/tmpfs_vnops.c
+++ b/sys/fs/tmpfs/tmpfs_vnops.c
@@ -1078,7 +1078,9 @@ tmpfs_rename(struct vop_rename_args *v)
 		}
 
 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
-			if (tnode->tn_size > 0) {
+			if (tnode->tn_size != 0 &&
+			    ((tcnp->cn_flags & IGNOREWHITEOUT) == 0 ||
+			    tnode->tn_size > tnode->tn_dir.tn_wht_size)) {
 				error = ENOTEMPTY;
 				goto out_locked;
 			}
@@ -1239,6 +1241,16 @@ tmpfs_rename(struct vop_rename_args *v)
 		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
 		tmpfs_dir_detach(tdvp, tde);
 
+		/*
+		 * If we are overwriting a directory, per the ENOTEMPTY check
+		 * above it must either be empty or contain only whiteout
+		 * entries.  In the latter case (which can only happen if
+		 * IGNOREWHITEOUT was passed in tcnp->cn_flags), clear the
+		 * whiteout entries to avoid leaking memory.
+		 */
+		if (tnode->tn_type == VDIR && tnode->tn_size > 0)
+			tmpfs_dir_clear_whiteouts(tvp);
+
 		/* Update node's ctime because of possible hardlinks. */
 		tnode->tn_status |= TMPFS_NODE_CHANGED;
 		tmpfs_update(tvp);
@@ -1309,6 +1321,7 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode *vp = v->a_vp;
+	struct componentname *cnp = v->a_cnp;
 
 	int error;
 	struct tmpfs_dirent *de;
@@ -1320,12 +1333,16 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_DIR(vp);
 
-	/* Directories with more than two entries ('.' and '..') cannot be
-	 * removed. */
-	 if (node->tn_size > 0) {
-		 error = ENOTEMPTY;
-		 goto out;
-	 }
+	/*
+	 * Directories with more than two non-whiteout entries ('.' and '..')
+	 * cannot be removed.
+	 */
+	if (node->tn_size != 0 &&
+	    ((cnp->cn_flags & IGNOREWHITEOUT) == 0 ||
+	    node->tn_size > node->tn_dir.tn_wht_size)) {
+		error = ENOTEMPTY;
+		goto out;
+	}
 
 	if ((dnode->tn_flags & APPEND)
 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
@@ -1334,15 +1351,15 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
 	}
 
 	/* This invariant holds only if we are not trying to remove "..".
-	  * We checked for that above so this is safe now. */
+	 * We checked for that above so this is safe now. */
 	MPASS(node->tn_dir.tn_parent == dnode);
 
 	/* Get the directory entry associated with node (vp).  This was
 	 * filled by tmpfs_lookup while looking up the entry. */
-	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
+	de = tmpfs_dir_lookup(dnode, node, cnp);
 	MPASS(TMPFS_DIRENT_MATCHES(de,
-	    v->a_cnp->cn_nameptr,
-	    v->a_cnp->cn_namelen));
+	    cnp->cn_nameptr,
+	    cnp->cn_namelen));
 
 	/* Check flags to see if we are allowed to remove the directory. */
 	if ((dnode->tn_flags & APPEND) != 0 ||
@@ -1353,8 +1370,19 @@ tmpfs_rmdir(struct vop_rmdir_args *v)
 
 	/* Detach the directory entry from the directory (dnode). */
 	tmpfs_dir_detach(dvp, de);
-	if (v->a_cnp->cn_flags & DOWHITEOUT)
-		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
+
+	/*
+	 * If we are removing a directory, per the ENOTEMPTY check above it
+	 * must either be empty or contain only whiteout entries.  In the
+	 * latter case (which can only happen if IGNOREWHITEOUT was passed
+	 * in cnp->cn_flags), clear the whiteout entries to avoid leaking
+	 * memory.
+	 */
+	if (node->tn_size > 0)
+		tmpfs_dir_clear_whiteouts(vp);
+
+	if (cnp->cn_flags & DOWHITEOUT)
+		tmpfs_dir_whiteout_add(dvp, cnp);
 
 	/* No vnode should be allocated for this entry from this point */
 	TMPFS_NODE_LOCK(node);