git: 05e8ab627bc6 - main - unionfs_rename: fix numerous locking issues

From: Jason A. Harmening <jah_at_FreeBSD.org>
Date: Mon, 29 Apr 2024 01:27:22 UTC
The branch main has been updated by jah:

URL: https://cgit.FreeBSD.org/src/commit/?id=05e8ab627bc6fc6e607aea94b60fd264b8a6c736

commit 05e8ab627bc6fc6e607aea94b60fd264b8a6c736
Author:     Jason A. Harmening <jah@FreeBSD.org>
AuthorDate: 2024-02-18 00:20:51 +0000
Commit:     Jason A. Harmening <jah@FreeBSD.org>
CommitDate: 2024-04-29 01:19:48 +0000

    unionfs_rename: fix numerous locking issues
    
    There are a few places in which unionfs_rename() accesses fvp's private
    data without holding the necessary lock/interlock.  Moreover, the
    implementation completely fails to handle the case in which fdvp is not
    the same as tdvp; in this case it simply fails to lock fdvp at all.
    Finally, it locks fvp while potentially already holding tvp's lock, but
    makes no attempt to deal with possible LOR there.
    
    Fix this by optimistically using the vnode interlock to protect
    the short accesses to fdvp and fvp private data, sequentially.
    If a file copy or shadow directory creation is required to prepare
    the upper FS for the rename operation, the interlock must be dropped
    and fdvp/fvp locked as necessary.
    
    Additionally, use ERELOOKUP (as suggested by kib@) to simplify the
    locking logic and eliminate unionfs_relookup() calls for file-copy/
    shadow-directory cases that require tdvp's lock to be dropped.
    
    Reviewed by:            kib (earlier version), olce
    Tested by:              pho
    Differential Revision:  https://reviews.freebsd.org/D44788
---
 sys/fs/unionfs/union_vnops.c | 152 +++++++++++++++++++++++++++----------------
 1 file changed, 96 insertions(+), 56 deletions(-)

diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
index 187d0513da25..aa2a7273825a 100644
--- a/sys/fs/unionfs/union_vnops.c
+++ b/sys/fs/unionfs/union_vnops.c
@@ -1167,7 +1167,6 @@ unionfs_rename(struct vop_rename_args *ap)
 	struct unionfs_mount *ump;
 	struct unionfs_node *unp;
 	int		error;
-	int		needrelookup;
 
 	UNIONFS_INTERNAL_DEBUG("unionfs_rename: enter\n");
 
@@ -1185,7 +1184,6 @@ unionfs_rename(struct vop_rename_args *ap)
 	rfvp = fvp;
 	rtdvp = tdvp;
 	rtvp = tvp;
-	needrelookup = 0;
 
 	/* check for cross device rename */
 	if (fvp->v_mount != tdvp->v_mount ||
@@ -1201,58 +1199,114 @@ unionfs_rename(struct vop_rename_args *ap)
 	if (fvp == tvp)
 		goto unionfs_rename_abort;
 
-	/*
-	 * from/to vnode is unionfs node.
-	 */
-
-	KASSERT_UNIONFS_VNODE(fdvp);
-	KASSERT_UNIONFS_VNODE(fvp);
 	KASSERT_UNIONFS_VNODE(tdvp);
 	if (tvp != NULLVP)
 		KASSERT_UNIONFS_VNODE(tvp);
-
+	if (fdvp != tdvp)
+		VI_LOCK(fdvp);
 	unp = VTOUNIONFS(fdvp);
+	if (unp == NULL) {
+		if (fdvp != tdvp)
+			VI_UNLOCK(fdvp);
+		error = ENOENT;
+		goto unionfs_rename_abort;
+	}
 #ifdef UNIONFS_IDBG_RENAME
 	UNIONFS_INTERNAL_DEBUG("fdvp=%p, ufdvp=%p, lfdvp=%p\n",
 	    fdvp, unp->un_uppervp, unp->un_lowervp);
 #endif
 	if (unp->un_uppervp == NULLVP) {
 		error = ENODEV;
-		goto unionfs_rename_abort;
+	} else {
+		rfdvp = unp->un_uppervp;
+		vref(rfdvp);
 	}
-	rfdvp = unp->un_uppervp;
-	vref(rfdvp);
+	if (fdvp != tdvp)
+		VI_UNLOCK(fdvp);
+	if (error != 0)
+		goto unionfs_rename_abort;
 
+	VI_LOCK(fvp);
 	unp = VTOUNIONFS(fvp);
+	if (unp == NULL) {
+		VI_UNLOCK(fvp);
+		error = ENOENT;
+		goto unionfs_rename_abort;
+	}
+
 #ifdef UNIONFS_IDBG_RENAME
 	UNIONFS_INTERNAL_DEBUG("fvp=%p, ufvp=%p, lfvp=%p\n",
 	    fvp, unp->un_uppervp, unp->un_lowervp);
 #endif
 	ump = MOUNTTOUNIONFSMOUNT(fvp->v_mount);
+	/*
+	 * If we only have a lower vnode, copy the source file to the upper
+	 * FS so that the rename operation can be issued against the upper FS.
+	 */
 	if (unp->un_uppervp == NULLVP) {
-		switch (fvp->v_type) {
-		case VREG:
-			if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
-				goto unionfs_rename_abort;
-			error = unionfs_copyfile(unp, 1, fcnp->cn_cred, td);
-			VOP_UNLOCK(fvp);
-			if (error != 0)
-				goto unionfs_rename_abort;
-			break;
-		case VDIR:
-			if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
-				goto unionfs_rename_abort;
-			error = unionfs_mkshadowdir(ump, rfdvp, unp, fcnp, td);
-			VOP_UNLOCK(fvp);
-			if (error != 0)
-				goto unionfs_rename_abort;
-			break;
-		default:
-			error = ENODEV;
-			goto unionfs_rename_abort;
+		bool unlock_fdvp = false, relock_tdvp = false;
+		VI_UNLOCK(fvp);
+		if (tvp != NULLVP)
+			VOP_UNLOCK(tvp);
+		if (fvp->v_type == VREG) {
+			/*
+			 * For regular files, unionfs_copyfile() will expect
+			 * fdvp's upper parent directory vnode to be unlocked
+			 * and will temporarily lock it.  If fdvp == tdvp, we
+			 * should unlock tdvp to avoid recursion on tdvp's
+			 * lock.  If fdvp != tdvp, we should also unlock tdvp
+			 * to avoid potential deadlock due to holding tdvp's
+			 * lock while locking unrelated vnodes associated with
+			 * fdvp/fvp.
+			 */
+			VOP_UNLOCK(tdvp);
+			relock_tdvp = true;
+		} else if (fvp->v_type == VDIR && tdvp != fdvp) {
+			/*
+			 * For directories, unionfs_mkshadowdir() will expect
+			 * fdvp's upper parent directory vnode to be locked
+			 * and will temporarily unlock it.  If fdvp == tdvp,
+			 * we can therefore leave tdvp locked.  If fdvp !=
+			 * tdvp, we should exchange the lock on tdvp for a
+			 * lock on fdvp.
+			 */
+			VOP_UNLOCK(tdvp);
+			unlock_fdvp = true;
+			relock_tdvp = true;
+			vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY);
 		}
-
-		needrelookup = 1;
+		vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY);
+		unp = VTOUNIONFS(fvp);
+		if (unp == NULL)
+			error = ENOENT;
+		else if (unp->un_uppervp == NULLVP) {
+			switch (fvp->v_type) {
+			case VREG:
+				error = unionfs_copyfile(unp, 1, fcnp->cn_cred, td);
+				break;
+			case VDIR:
+				error = unionfs_mkshadowdir(ump, rfdvp, unp, fcnp, td);
+				break;
+			default:
+				error = ENODEV;
+				break;
+			}
+		}
+		VOP_UNLOCK(fvp);
+		if (unlock_fdvp)
+			VOP_UNLOCK(fdvp);
+		if (relock_tdvp)
+			vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY);
+		if (tvp != NULLVP)
+			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
+		/*
+		 * Since we've dropped tdvp's lock at some point in the copy
+		 * sequence above, force the caller to re-drive the lookup
+		 * in case the relationship between tdvp and tvp has changed.
+		 */
+		if (error == 0)
+			error = ERELOOKUP;
+		goto unionfs_rename_abort;
 	}
 
 	if (unp->un_lowervp != NULLVP)
@@ -1260,7 +1314,10 @@ unionfs_rename(struct vop_rename_args *ap)
 	rfvp = unp->un_uppervp;
 	vref(rfvp);
 
+	VI_UNLOCK(fvp);
+
 	unp = VTOUNIONFS(tdvp);
+
 #ifdef UNIONFS_IDBG_RENAME
 	UNIONFS_INTERNAL_DEBUG("tdvp=%p, utdvp=%p, ltdvp=%p\n",
 	    tdvp, unp->un_uppervp, unp->un_lowervp);
@@ -1273,11 +1330,12 @@ unionfs_rename(struct vop_rename_args *ap)
 	ltdvp = unp->un_lowervp;
 	vref(rtdvp);
 
-	if (tdvp == tvp) {
-		rtvp = rtdvp;
-		vref(rtvp);
-	} else if (tvp != NULLVP) {
+	if (tvp != NULLVP) {
 		unp = VTOUNIONFS(tvp);
+		if (unp == NULL) {
+			error = ENOENT;
+			goto unionfs_rename_abort;
+		}
 #ifdef UNIONFS_IDBG_RENAME
 		UNIONFS_INTERNAL_DEBUG("tvp=%p, utvp=%p, ltvp=%p\n",
 		    tvp, unp->un_uppervp, unp->un_lowervp);
@@ -1298,24 +1356,6 @@ unionfs_rename(struct vop_rename_args *ap)
 	if (rfvp == rtvp)
 		goto unionfs_rename_abort;
 
-	if (needrelookup != 0) {
-		if ((error = vn_lock(fdvp, LK_EXCLUSIVE)) != 0)
-			goto unionfs_rename_abort;
-		error = unionfs_relookup_for_delete(fdvp, fcnp, td);
-		VOP_UNLOCK(fdvp);
-		if (error != 0)
-			goto unionfs_rename_abort;
-
-		/* Lock of tvp is canceled in order to avoid recursive lock. */
-		if (tvp != NULLVP && tvp != tdvp)
-			VOP_UNLOCK(tvp);
-		error = unionfs_relookup_for_rename(tdvp, tcnp, td);
-		if (tvp != NULLVP && tvp != tdvp)
-			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY);
-		if (error != 0)
-			goto unionfs_rename_abort;
-	}
-
 	error = VOP_RENAME(rfdvp, rfvp, fcnp, rtdvp, rtvp, tcnp);
 
 	if (error == 0) {