git: bb24eaea4982 - main - vn_lock_pair(): allow to request shared locking

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Fri, 07 Apr 2023 22:59:39 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=bb24eaea498268572aa140c35c02e02884cdf930

commit bb24eaea498268572aa140c35c02e02884cdf930
Author:     Konstantin Belousov <kib@FreeBSD.org>
AuthorDate: 2023-04-06 04:11:08 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2023-04-07 22:58:26 +0000

    vn_lock_pair(): allow to request shared locking
    
    If either of vnodes is shared locked, lock must not be recursed.
    
    Requested by:   rmacklem
    Reviewed by:    markj, rmacklem
    Tested by:      pho
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D39444
---
 sys/fs/unionfs/union_subr.c |  3 +-
 sys/kern/vfs_mount.c        |  2 +-
 sys/kern/vfs_vnops.c        | 86 +++++++++++++++++++++++++++++++--------------
 sys/sys/vnode.h             |  4 +--
 sys/ufs/ffs/ffs_softdep.c   | 10 +++---
 sys/ufs/ufs/ufs_vnops.c     |  2 +-
 6 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index 42437f1e3840..264d37634949 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -388,7 +388,8 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
 	KASSERT(dvp != NULL || (vp->v_vflag & VV_ROOT) != 0,
 	    ("%s: NULL dvp for non-root vp %p", __func__, vp));
 
-	vn_lock_pair(lowervp, false, uppervp, false); 
+	vn_lock_pair(lowervp, false, LK_EXCLUSIVE, uppervp, false,
+	    LK_EXCLUSIVE);
 	error = insmntque1(vp, mp);
 	if (error != 0) {
 		unionfs_nodeget_cleanup(vp, unp);
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index 6233d4c3741a..bf532df335bf 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -1273,7 +1273,7 @@ vfs_domount_first(
 	 * Use vn_lock_pair to avoid establishing an ordering between vnodes
 	 * from different filesystems.
 	 */
-	vn_lock_pair(vp, false, newdp, false);
+	vn_lock_pair(vp, false, LK_EXCLUSIVE, newdp, false, LK_EXCLUSIVE);
 
 	VI_LOCK(vp);
 	vp->v_iflag &= ~VI_MOUNT;
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 1d90f76f9cd6..05cc0cfda16e 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -3753,50 +3753,74 @@ vn_lock_pair_pause(const char *wmesg)
 
 /*
  * Lock pair of vnodes vp1, vp2, avoiding lock order reversal.
- * vp1_locked indicates whether vp1 is exclusively locked; if not, vp1
- * must be unlocked.  Same for vp2 and vp2_locked.  One of the vnodes
- * can be NULL.
+ * vp1_locked indicates whether vp1 is locked; if not, vp1 must be
+ * unlocked.  Same for vp2 and vp2_locked.  One of the vnodes can be
+ * NULL.
  *
- * The function returns with both vnodes exclusively locked, and
- * guarantees that it does not create lock order reversal with other
- * threads during its execution.  Both vnodes could be unlocked
- * temporary (and reclaimed).
+ * The function returns with both vnodes exclusively or shared locked,
+ * according to corresponding lkflags, and guarantees that it does not
+ * create lock order reversal with other threads during its execution.
+ * Both vnodes could be unlocked temporary (and reclaimed).
+ *
+ * If requesting shared locking, locked vnode lock must not be recursed.
  */
 void
-vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2,
-    bool vp2_locked)
+vn_lock_pair(struct vnode *vp1, bool vp1_locked, int lkflags1,
+    struct vnode *vp2, bool vp2_locked, int lkflags2)
 {
 	int error;
 
+	MPASS(lkflags1 == LK_SHARED || lkflags1 == LK_EXCLUSIVE);
+	MPASS(lkflags2 == LK_SHARED || lkflags2 == LK_EXCLUSIVE);
+
 	if (vp1 == NULL && vp2 == NULL)
 		return;
+
 	if (vp1 != NULL) {
-		if (vp1_locked)
-			ASSERT_VOP_ELOCKED(vp1, "vp1");
-		else
+		if (lkflags1 == LK_SHARED &&
+		    (vp1->v_vnlock->lock_object.lo_flags & LK_NOSHARE) != 0)
+			lkflags1 = LK_EXCLUSIVE;
+		if (vp1_locked && VOP_ISLOCKED(vp1) != LK_EXCLUSIVE) {
+			ASSERT_VOP_LOCKED(vp1, "vp1");
+			if (lkflags1 == LK_EXCLUSIVE) {
+				VOP_UNLOCK(vp1);
+				ASSERT_VOP_UNLOCKED(vp1,
+				    "vp1 shared recursed");
+				vp1_locked = false;
+			}
+		} else if (!vp1_locked)
 			ASSERT_VOP_UNLOCKED(vp1, "vp1");
 	} else {
 		vp1_locked = true;
 	}
+
 	if (vp2 != NULL) {
-		if (vp2_locked)
-			ASSERT_VOP_ELOCKED(vp2, "vp2");
-		else
+		if (lkflags2 == LK_SHARED &&
+		    (vp2->v_vnlock->lock_object.lo_flags & LK_NOSHARE) != 0)
+			lkflags2 = LK_EXCLUSIVE;
+		if (vp2_locked && VOP_ISLOCKED(vp2) != LK_EXCLUSIVE) {
+			ASSERT_VOP_LOCKED(vp2, "vp2");
+			if (lkflags2 == LK_EXCLUSIVE) {
+				VOP_UNLOCK(vp2);
+				ASSERT_VOP_UNLOCKED(vp2,
+				    "vp2 shared recursed");
+				vp2_locked = false;
+			}
+		} else if (!vp2_locked)
 			ASSERT_VOP_UNLOCKED(vp2, "vp2");
 	} else {
 		vp2_locked = true;
 	}
+
 	if (!vp1_locked && !vp2_locked) {
-		vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
+		vn_lock(vp1, lkflags1 | LK_RETRY);
 		vp1_locked = true;
 	}
 
-	for (;;) {
-		if (vp1_locked && vp2_locked)
-			break;
+	while (!vp1_locked || !vp2_locked) {
 		if (vp1_locked && vp2 != NULL) {
 			if (vp1 != NULL) {
-				error = VOP_LOCK1(vp2, LK_EXCLUSIVE | LK_NOWAIT,
+				error = VOP_LOCK1(vp2, lkflags2 | LK_NOWAIT,
 				    __FILE__, __LINE__);
 				if (error == 0)
 					break;
@@ -3804,12 +3828,12 @@ vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2,
 				vp1_locked = false;
 				vn_lock_pair_pause("vlp1");
 			}
-			vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY);
+			vn_lock(vp2, lkflags2 | LK_RETRY);
 			vp2_locked = true;
 		}
 		if (vp2_locked && vp1 != NULL) {
 			if (vp2 != NULL) {
-				error = VOP_LOCK1(vp1, LK_EXCLUSIVE | LK_NOWAIT,
+				error = VOP_LOCK1(vp1, lkflags1 | LK_NOWAIT,
 				    __FILE__, __LINE__);
 				if (error == 0)
 					break;
@@ -3817,14 +3841,22 @@ vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2,
 				vp2_locked = false;
 				vn_lock_pair_pause("vlp2");
 			}
-			vn_lock(vp1, LK_EXCLUSIVE | LK_RETRY);
+			vn_lock(vp1, lkflags1 | LK_RETRY);
 			vp1_locked = true;
 		}
 	}
-	if (vp1 != NULL)
-		ASSERT_VOP_ELOCKED(vp1, "vp1 ret");
-	if (vp2 != NULL)
-		ASSERT_VOP_ELOCKED(vp2, "vp2 ret");
+	if (vp1 != NULL) {
+		if (lkflags1 == LK_EXCLUSIVE)
+			ASSERT_VOP_ELOCKED(vp1, "vp1 ret");
+		else
+			ASSERT_VOP_LOCKED(vp1, "vp1 ret");
+	}
+	if (vp2 != NULL) {
+		if (lkflags2 == LK_EXCLUSIVE)
+			ASSERT_VOP_ELOCKED(vp2, "vp2 ret");
+		else
+			ASSERT_VOP_LOCKED(vp2, "vp2 ret");
+	}
 }
 
 int
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 74514654713a..5e3f81de0236 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -757,8 +757,8 @@ bool	vn_isdisk_error(struct vnode *vp, int *errp);
 bool	vn_isdisk(struct vnode *vp);
 int	_vn_lock(struct vnode *vp, int flags, const char *file, int line);
 #define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__)
-void	vn_lock_pair(struct vnode *vp1, bool vp1_locked, struct vnode *vp2,
-	    bool vp2_locked);
+void	vn_lock_pair(struct vnode *vp1, bool vp1_locked, int lkflags1,
+	    struct vnode *vp2, bool vp2_locked, int lkflags2);
 int	vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp);
 int	vn_open_cred(struct nameidata *ndp, int *flagp, int cmode,
 	    u_int vn_open_flags, struct ucred *cred, struct file *fp);
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 1e245d82f8b8..9f3644477038 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -3323,7 +3323,7 @@ softdep_prelink(struct vnode *dvp,
 	if (vp != NULL) {
 		VOP_UNLOCK(dvp);
 		ffs_syncvnode(vp, MNT_NOWAIT, 0);
-		vn_lock_pair(dvp, false, vp, true);
+		vn_lock_pair(dvp, false, LK_EXCLUSIVE, vp, true, LK_EXCLUSIVE);
 		if (dvp->v_data == NULL)
 			goto out;
 	}
@@ -3335,7 +3335,8 @@ softdep_prelink(struct vnode *dvp,
 		VOP_UNLOCK(dvp);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (vp->v_data == NULL) {
-			vn_lock_pair(dvp, false, vp, true);
+			vn_lock_pair(dvp, false, LK_EXCLUSIVE, vp, true,
+			    LK_EXCLUSIVE);
 			goto out;
 		}
 		ACQUIRE_LOCK(ump);
@@ -3345,7 +3346,8 @@ softdep_prelink(struct vnode *dvp,
 		VOP_UNLOCK(vp);
 		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		if (dvp->v_data == NULL) {
-			vn_lock_pair(dvp, true, vp, false);
+			vn_lock_pair(dvp, true, LK_EXCLUSIVE, vp, false,
+			    LK_EXCLUSIVE);
 			goto out;
 		}
 	}
@@ -3360,7 +3362,7 @@ softdep_prelink(struct vnode *dvp,
 	journal_check_space(ump);
 	FREE_LOCK(ump);
 
-	vn_lock_pair(dvp, false, vp, false);
+	vn_lock_pair(dvp, false, LK_EXCLUSIVE, vp, false, LK_EXCLUSIVE);
 out:
 	ndp->ni_dvp_seqc = vn_seqc_read_any(dvp);
 	if (vp != NULL)
diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c
index 0a34eee310b4..1982e183ca04 100644
--- a/sys/ufs/ufs/ufs_vnops.c
+++ b/sys/ufs/ufs/ufs_vnops.c
@@ -235,7 +235,7 @@ ufs_sync_nlink(struct vnode *vp, struct vnode *vp1)
 	if (vp1 != NULL)
 		VOP_UNLOCK(vp1);
 	error = ufs_sync_nlink1(mp);
-	vn_lock_pair(vp, false, vp1, false);
+	vn_lock_pair(vp, false, LK_EXCLUSIVE, vp1, false, LK_EXCLUSIVE);
 	return (error);
 }