Re: git: 8ef0c11e7ce7 - main - nfsclient: upgrade vnode lock in VOP_OPEN()/VOP_CLOSE() if we need to flush buffers

From: Konstantin Belousov <kostikbel_at_gmail.com>
Date: Wed, 24 Nov 2021 11:30:25 UTC
On Wed, Nov 24, 2021 at 07:01:56PM +1100, Peter Jeremy wrote:
> On 2021-Nov-24 05:12:19 +0200, Konstantin Belousov <kostikbel@gmail.com> wrote:
> >Try this (combined two patches).
> 
> That's helped.  I can now swapon and swapoff but not actually use swap:
> KDB: stack backtrace:
> db_trace_self() at db_trace_self
> db_trace_self_wrapper() at db_trace_self_wrapper+0x30
> assert_vop_locked() at assert_vop_locked+0x58
> VOP_STRATEGY_APV() at VOP_STRATEGY_APV+0x54
> bufstrategy() at bufstrategy+0x4c
> swap_pager_putpages() at swap_pager_putpages+0x108
> vm_pageout_flush() at vm_pageout_flush+0x108
> vm_pageout_cluster() at vm_pageout_cluster+0x464
> vm_pageout_laundry_worker() at vm_pageout_laundry_worker+0x910
> fork_exit() at fork_exit+0x74
> fork_trampoline() at fork_trampoline+0x14
> vnode 0xffffa00008840700: type VREG
>     usecount 1, writecount 0, refcount 2 seqc users 0
>     hold count flags ()
>     flags (VV_VMSIZEVNLOCK)
>     v_object 0xffffa00008df3738 ref 0 pages 0 cleanbuf 0 dirtybuf 0
>     lock type nfs: UNLOCKED
>         fileid 30984 fsid 0x3a3a00ff01
> VOP_STRATEGY: 0xffffa00008840700 is not locked but should be
> KDB: enter: lock violation

Please try this, but it may require more work.  In particular, watch out
for deadlock: the swapped out pages are busied before swap vnode is locked.
By itself it is fine, but if some other io happens to the swap vnode, it
might become problematic.

diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 9bc506c9b6b8..6daedd02649d 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -2366,8 +2366,8 @@ sys_swapon(struct thread *td, struct swapon_args *uap)
 		goto done;
 	}
 
-	NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | AUDITVNODE1, UIO_USERSPACE,
-	    uap->name, td);
+	NDINIT(&nd, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1,
+	    UIO_USERSPACE, uap->name, td);
 	error = namei(&nd);
 	if (error)
 		goto done;
@@ -2387,8 +2387,10 @@ sys_swapon(struct thread *td, struct swapon_args *uap)
 		error = swaponvp(td, vp, attr.va_size / DEV_BSIZE);
 	}
 
-	if (error)
-		vrele(vp);
+	if (error != 0)
+		vput(vp);
+	else
+		VOP_UNLOCK(vp);
 done:
 	sx_xunlock(&swdev_syscall_lock);
 	return (error);
@@ -3012,7 +3014,7 @@ swapongeom(struct vnode *vp)
 {
 	int error;
 
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	ASSERT_VOP_ELOCKED(vp, "swapongeom");
 	if (vp->v_type != VCHR || VN_IS_DOOMED(vp)) {
 		error = ENOENT;
 	} else {
@@ -3020,7 +3022,6 @@ swapongeom(struct vnode *vp)
 		error = swapongeom_locked(vp->v_rdev, vp);
 		g_topology_unlock();
 	}
-	VOP_UNLOCK(vp);
 	return (error);
 }
 
@@ -3042,24 +3043,30 @@ swapdev_strategy(struct buf *bp, struct swdevt *sp)
 	vp2 = sp->sw_id;
 	vhold(vp2);
 	if (bp->b_iocmd == BIO_WRITE) {
+		vn_lock(vp2, LK_EXCLUSIVE | LK_RETRY);
 		if (bp->b_bufobj)
 			bufobj_wdrop(bp->b_bufobj);
 		bufobj_wref(&vp2->v_bufobj);
+	} else {
+		vn_lock(vp2, LK_SHARED | LK_RETRY);
 	}
 	if (bp->b_bufobj != &vp2->v_bufobj)
 		bp->b_bufobj = &vp2->v_bufobj;
 	bp->b_vp = vp2;
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	bstrategy(bp);
-	return;
+	VOP_UNLOCK(vp2);
 }
 
 static void
 swapdev_close(struct thread *td, struct swdevt *sp)
 {
+	struct vnode *vp;
 
-	VOP_CLOSE(sp->sw_vp, FREAD | FWRITE, td->td_ucred, td);
-	vrele(sp->sw_vp);
+	vp = sp->sw_vp;
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+	VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
+	vput(vp);
 }
 
 static int
@@ -3068,6 +3075,7 @@ swaponvp(struct thread *td, struct vnode *vp, u_long nblks)
 	struct swdevt *sp;
 	int error;
 
+	ASSERT_VOP_ELOCKED(vp, "swaponvp");
 	if (nblks == 0)
 		return (ENXIO);
 	mtx_lock(&sw_dev_mtx);
@@ -3079,14 +3087,12 @@ swaponvp(struct thread *td, struct vnode *vp, u_long nblks)
 	}
 	mtx_unlock(&sw_dev_mtx);
 
-	(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 #ifdef MAC
 	error = mac_system_check_swapon(td->td_ucred, vp);
 	if (error == 0)
 #endif
 		error = VOP_OPEN(vp, FREAD | FWRITE, td->td_ucred, td, NULL);
-	(void) VOP_UNLOCK(vp);
-	if (error)
+	if (error != 0)
 		return (error);
 
 	swaponsomething(vp, vp, nblks, swapdev_strategy, swapdev_close,