svn commit: r235626 - in stable/9/sys: fs/coda fs/ext2fs fs/msdosfs fs/nfsclient fs/nfsserver i386/conf kern nfsclient sys ufs/ffs ufs/ufs

Kirk McKusick mckusick at FreeBSD.org
Fri May 18 19:48:39 UTC 2012


Author: mckusick
Date: Fri May 18 19:48:38 2012
New Revision: 235626
URL: http://svn.freebsd.org/changeset/base/235626

Log:
  MFC of 234386, 234400, 234441, 234443, 234482, 234483, 235052, 235241,
         235246, and 235619
  
  MFC: 234386
  
  Replace the MNT_VNODE_FOREACH interface with MNT_VNODE_FOREACH_ALL.
  The primary changes are that the user of the interface no longer
  needs to manage the mount-mutex locking and that the vnode that
  is returned has its mutex locked (thus avoiding the need to check
  to see if its is DOOMED or other possible end of life senarios).
  
  To minimize compatibility issues for third-party developers, the
  old MNT_VNODE_FOREACH interface will remain available so that this
  change can be MFC'ed to 9. Following the MFC to 9, MNT_VNODE_FOREACH
  will be removed in head.
  
  The reason for this update is to prepare for the addition of the
  MNT_VNODE_FOREACH_ACTIVE interface that will loop over just the
  active vnodes associated with a mount point (typically less than
  1% of the vnodes associated with the mount point).
  
  Reviewed by: kib
  Tested by:   Peter Holm
  MFC after:   2 weeks
  
  MFC: 234400
  
  Drop export of vdestroy() function from kern/vfs_subr.c as it is
  used only as a helper function in that file. Replace sole call to
  vbusy() with inline code in vholdl(). Replace sole calls to vfree()
  and vdestroy() with inline code in vdropl().
  
  The Clang compiler already inlines these functions, so they do not
  show up in a kernel backtrace which is confusing. Also you cannot
  set their frame in kgdb which means that it is impossible to view
  their local variables. So, while the produced code is unchanged,
  the debugging should be easier.
  
  Discussed with: kib
  MFC after:      2 weeks
  
  MFC: 234441
  
  Fix a memory leak of M_VNODE_MARKER introduced in 234386.
  
  Found by:  Peter Holm
  
  MFC: 234443
  
  Delete a no longer useful VNASSERT missed during changes in 234400.
  
  Suggested by: kib
  
  MFC: 234482
  
  This change creates a new list of active vnodes associated with
  a mount point. Active vnodes are those with a non-zero use or hold
  count, e.g., those vnodes that are not on the free list. Note that
  this list is in addition to the list of all the vnodes associated
  with a mount point.
  
  To avoid adding another set of linkage pointers to the vnode
  structure, the active list uses the existing linkage pointers
  used by the free list (previously named v_freelist, now renamed
  v_actfreelist).
  
  This update adds the MNT_VNODE_FOREACH_ACTIVE interface that loops
  over just the active vnodes associated with a mount point (typically
  less than 1% of the vnodes associated with the mount point).
  
  Reviewed by: kib
  Tested by:   Peter Holm
  MFC after:   2 weeks
  
  MFC: 234483
  
  This update uses the MNT_VNODE_FOREACH_ACTIVE interface that loops
  over just the active vnodes associated with a mount point to replace
  MNT_VNODE_FOREACH_ALL in the vfs_msync, ffs_sync_lazy, and qsync
  routines.
  
  The vfs_msync routine is run every 30 seconds for every writably
  mounted filesystem. It ensures that any files mmap'ed from the
  filesystem with modified pages have those pages queued to be
  written back to the file from which they are mapped.
  
  The ffs_lazy_sync and qsync routines are run every 30 seconds for
  every writably mounted UFS/FFS filesystem. The ffs_lazy_sync routine
  ensures that any files that have been accessed in the previous
  30 seconds have had their access times queued for updating in the
  filesystem. The qsync routine ensures that any files with modified
  quotas have those quotas queued to be written back to their
  associated quota file.
  
  In a system configured with 250,000 vnodes, less than 1000 are
  typically active at any point in time. Prior to this change all
  250,000 vnodes would be locked and inspected twice every minute
  by the syncer. For UFS/FFS filesystems they would be locked and
  inspected six times every minute (twice by each of these three
  routines since each of these routines does its own pass over the
  vnodes associated with a mount point). With this change the syncer
  now locks and inspects only the tiny set of vnodes that are active.
  
  Reviewed by: kib
  Tested by:   Peter Holm
  MFC after:   2 weeks
  
  MFC: 235052 (by pluknet)
  
  Fix mount mutex handling missed in r234386.
  
  MFC: 235241 (by pluknet)
  
  Fix mount interlock oversights from the previous change in r234386.
  
  Reported by:    dougb
  Submitted by:   Mateusz Guzik <mjguzik at gmail com>
  Reviewed by:    Kirk McKusick
  Tested by:      pho
  
  MFC: 235246
  
  Fix mount mutex handling missed in r234386.
  
  MFC: 235619
  
  Update comment to document that the vnode free-list mutex needs to be
  held when updating mnt_activevnodelist and mnt_activevnodelistsize.

Modified:
  stable/9/sys/fs/coda/coda_subr.c
  stable/9/sys/fs/ext2fs/ext2_vfsops.c
  stable/9/sys/fs/msdosfs/msdosfs_vfsops.c
  stable/9/sys/fs/nfsclient/nfs_clsubs.c
  stable/9/sys/fs/nfsclient/nfs_clvfsops.c
  stable/9/sys/fs/nfsserver/nfs_nfsdport.c
  stable/9/sys/kern/vfs_default.c
  stable/9/sys/kern/vfs_mount.c
  stable/9/sys/kern/vfs_subr.c
  stable/9/sys/nfsclient/nfs_subs.c
  stable/9/sys/nfsclient/nfs_vfsops.c
  stable/9/sys/sys/mount.h
  stable/9/sys/sys/vnode.h
  stable/9/sys/ufs/ffs/ffs_snapshot.c
  stable/9/sys/ufs/ffs/ffs_softdep.c
  stable/9/sys/ufs/ffs/ffs_vfsops.c
  stable/9/sys/ufs/ufs/quota.h
  stable/9/sys/ufs/ufs/ufs_inode.c
  stable/9/sys/ufs/ufs/ufs_quota.c
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/amd64/include/xen/   (props changed)
  stable/9/sys/boot/   (props changed)
  stable/9/sys/boot/i386/efi/   (props changed)
  stable/9/sys/boot/ia64/efi/   (props changed)
  stable/9/sys/boot/ia64/ski/   (props changed)
  stable/9/sys/boot/powerpc/boot1.chrp/   (props changed)
  stable/9/sys/boot/powerpc/ofw/   (props changed)
  stable/9/sys/cddl/contrib/opensolaris/   (props changed)
  stable/9/sys/conf/   (props changed)
  stable/9/sys/contrib/dev/acpica/   (props changed)
  stable/9/sys/contrib/octeon-sdk/   (props changed)
  stable/9/sys/contrib/pf/   (props changed)
  stable/9/sys/contrib/x86emu/   (props changed)
  stable/9/sys/dev/   (props changed)
  stable/9/sys/dev/e1000/   (props changed)
  stable/9/sys/dev/ixgbe/   (props changed)
  stable/9/sys/fs/   (props changed)
  stable/9/sys/fs/ntfs/   (props changed)
  stable/9/sys/i386/conf/XENHVM   (props changed)
  stable/9/sys/kern/subr_witness.c   (props changed)
  stable/9/sys/modules/   (props changed)

Modified: stable/9/sys/fs/coda/coda_subr.c
==============================================================================
--- stable/9/sys/fs/coda/coda_subr.c	Fri May 18 19:48:33 2012	(r235625)
+++ stable/9/sys/fs/coda/coda_subr.c	Fri May 18 19:48:38 2012	(r235626)
@@ -365,13 +365,7 @@ coda_checkunmounting(struct mount *mp)
 	struct cnode *cp;
 	int count = 0, bad = 0;
 
-	MNT_ILOCK(mp);
-	MNT_VNODE_FOREACH(vp, mp, nvp) {
-		VI_LOCK(vp);
-		if (vp->v_iflag & VI_DOOMED) {
-			VI_UNLOCK(vp);
-			continue;
-		}
+	MNT_VNODE_FOREACH_ALL(vp, mp, nvp) {
 		cp = VTOC(vp);
 		count++;
 		if (!(cp->c_flags & C_UNMOUNTING)) {
@@ -381,7 +375,6 @@ coda_checkunmounting(struct mount *mp)
 		}
 		VI_UNLOCK(vp);
 	}
-	MNT_IUNLOCK(mp);
 }
 
 void

Modified: stable/9/sys/fs/ext2fs/ext2_vfsops.c
==============================================================================
--- stable/9/sys/fs/ext2fs/ext2_vfsops.c	Fri May 18 19:48:33 2012	(r235625)
+++ stable/9/sys/fs/ext2fs/ext2_vfsops.c	Fri May 18 19:48:38 2012	(r235626)
@@ -480,19 +480,12 @@ ext2_reload(struct mount *mp, struct thr
 	}
 
 loop:
-	MNT_ILOCK(mp);
-	MNT_VNODE_FOREACH(vp, mp, mvp) {
-		VI_LOCK(vp);
-		if (vp->v_iflag & VI_DOOMED) {
-			VI_UNLOCK(vp);
-			continue;
-		}
-		MNT_IUNLOCK(mp);
+	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		/*
 		 * Step 4: invalidate all cached file data.
 		 */
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
-			MNT_VNODE_FOREACH_ABORT(mp, mvp);
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, 0, 0))
@@ -507,7 +500,7 @@ loop:
 		if (error) {
 			VOP_UNLOCK(vp, 0);
 			vrele(vp);
-			MNT_VNODE_FOREACH_ABORT(mp, mvp);
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			return (error);
 		}
 		ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data +
@@ -515,9 +508,7 @@ loop:
 		brelse(bp);
 		VOP_UNLOCK(vp, 0);
 		vrele(vp);
-		MNT_ILOCK(mp);
 	}
-	MNT_IUNLOCK(mp);
 	return (0);
 }
 
@@ -839,29 +830,24 @@ ext2_sync(struct mount *mp, int waitfor)
 	/*
 	 * Write back each (modified) inode.
 	 */
-	MNT_ILOCK(mp);
 loop:
-	MNT_VNODE_FOREACH(vp, mp, mvp) {
-		VI_LOCK(vp);
-		if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED)) {
+	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
+		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
-		MNT_IUNLOCK(mp);
 		ip = VTOI(vp);
 		if ((ip->i_flag &
 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 		    (vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
 		    waitfor == MNT_LAZY)) {
 			VI_UNLOCK(vp);
-			MNT_ILOCK(mp);
 			continue;
 		}
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td);
 		if (error) {
-			MNT_ILOCK(mp);
 			if (error == ENOENT) {
-				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
+				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				goto loop;
 			}
 			continue;
@@ -870,9 +856,7 @@ loop:
 			allerror = error;
 		VOP_UNLOCK(vp, 0);
 		vrele(vp);
-		MNT_ILOCK(mp);
 	}
-	MNT_IUNLOCK(mp);
 
 	/*
 	 * Force stale file system control information to be flushed.

Modified: stable/9/sys/fs/msdosfs/msdosfs_vfsops.c
==============================================================================
--- stable/9/sys/fs/msdosfs/msdosfs_vfsops.c	Fri May 18 19:48:33 2012	(r235625)
+++ stable/9/sys/fs/msdosfs/msdosfs_vfsops.c	Fri May 18 19:48:38 2012	(r235626)
@@ -834,7 +834,7 @@ msdosfs_unmount(struct mount *mp, int mn
 		vn_printf(vp,
 		    "msdosfs_umount(): just before calling VOP_CLOSE()\n");
 		printf("freef %p, freeb %p, mount %p\n",
-		    TAILQ_NEXT(vp, v_freelist), vp->v_freelist.tqe_prev,
+		    TAILQ_NEXT(vp, v_actfreelist), vp->v_actfreelist.tqe_prev,
 		    vp->v_mount);
 		printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
 		    TAILQ_FIRST(&vp->v_bufobj.bo_clean.bv_hd),
@@ -923,27 +923,22 @@ msdosfs_sync(struct mount *mp, int waitf
 	/*
 	 * Write back each (modified) denode.
 	 */
-	MNT_ILOCK(mp);
 loop:
-	MNT_VNODE_FOREACH(vp, mp, nvp) {
-		VI_LOCK(vp);
-		if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED)) {
+	MNT_VNODE_FOREACH_ALL(vp, mp, nvp) {
+		if (vp->v_type == VNON) {
 			VI_UNLOCK(vp);
 			continue;
 		}
-		MNT_IUNLOCK(mp);
 		dep = VTODE(vp);
 		if ((dep->de_flag &
 		    (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 &&
 		    (vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
 		    waitfor == MNT_LAZY)) {
 			VI_UNLOCK(vp);
-			MNT_ILOCK(mp);
 			continue;
 		}
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td);
 		if (error) {
-			MNT_ILOCK(mp);
 			if (error == ENOENT)
 				goto loop;
 			continue;
@@ -953,9 +948,7 @@ loop:
 			allerror = error;
 		VOP_UNLOCK(vp, 0);
 		vrele(vp);
-		MNT_ILOCK(mp);
 	}
-	MNT_IUNLOCK(mp);
 
 	/*
 	 * Flush filesystem control info.

Modified: stable/9/sys/fs/nfsclient/nfs_clsubs.c
==============================================================================
--- stable/9/sys/fs/nfsclient/nfs_clsubs.c	Fri May 18 19:48:33 2012	(r235625)
+++ stable/9/sys/fs/nfsclient/nfs_clsubs.c	Fri May 18 19:48:38 2012	(r235626)
@@ -367,17 +367,10 @@ ncl_clearcommit(struct mount *mp)
 	struct buf *bp, *nbp;
 	struct bufobj *bo;
 
-	MNT_ILOCK(mp);
-	MNT_VNODE_FOREACH(vp, mp, nvp) {
+	MNT_VNODE_FOREACH_ALL(vp, mp, nvp) {
 		bo = &vp->v_bufobj;
-		VI_LOCK(vp);
-		if (vp->v_iflag & VI_DOOMED) {
-			VI_UNLOCK(vp);
-			continue;
-		}
 		vholdl(vp);
 		VI_UNLOCK(vp);
-		MNT_IUNLOCK(mp);
 		BO_LOCK(bo);
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (!BUF_ISLOCKED(bp) &&
@@ -387,9 +380,7 @@ ncl_clearcommit(struct mount *mp)
 		}
 		BO_UNLOCK(bo);
 		vdrop(vp);
-		MNT_ILOCK(mp);
 	}
-	MNT_IUNLOCK(mp);
 }
 
 /*

Modified: stable/9/sys/fs/nfsclient/nfs_clvfsops.c
==============================================================================
--- stable/9/sys/fs/nfsclient/nfs_clvfsops.c	Fri May 18 19:48:33 2012	(r235625)
+++ stable/9/sys/fs/nfsclient/nfs_clvfsops.c	Fri May 18 19:48:38 2012	(r235626)
@@ -1510,24 +1510,21 @@ nfs_sync(struct mount *mp, int waitfor)
 		MNT_IUNLOCK(mp);
 		return (EBADF);
 	}
+	MNT_IUNLOCK(mp);
 
 	/*
 	 * Force stale buffer cache information to be flushed.
 	 */
 loop:
-	MNT_VNODE_FOREACH(vp, mp, mvp) {
-		VI_LOCK(vp);
-		MNT_IUNLOCK(mp);
+	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		/* XXX Racy bv_cnt check. */
 		if (NFSVOPISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 ||
 		    waitfor == MNT_LAZY) {
 			VI_UNLOCK(vp);
-			MNT_ILOCK(mp);
 			continue;
 		}
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) {
-			MNT_ILOCK(mp);
-			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		error = VOP_FSYNC(vp, waitfor, td);
@@ -1535,10 +1532,7 @@ loop:
 			allerror = error;
 		NFSVOPUNLOCK(vp, 0);
 		vrele(vp);
-
-		MNT_ILOCK(mp);
 	}
-	MNT_IUNLOCK(mp);
 	return (allerror);
 }
 

Modified: stable/9/sys/fs/nfsserver/nfs_nfsdport.c
==============================================================================
--- stable/9/sys/fs/nfsserver/nfs_nfsdport.c	Fri May 18 19:48:33 2012	(r235625)
+++ stable/9/sys/fs/nfsserver/nfs_nfsdport.c	Fri May 18 19:48:38 2012	(r235626)
@@ -2915,12 +2915,14 @@ nfsd_mntinit(void)
 	inited = 1;
 	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
 	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
+	TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist);
 	nfsv4root_mnt.mnt_export = NULL;
 	TAILQ_INIT(&nfsv4root_opt);
 	TAILQ_INIT(&nfsv4root_newopt);
 	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
 	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
 	nfsv4root_mnt.mnt_nvnodelistsize = 0;
+	nfsv4root_mnt.mnt_activevnodelistsize = 0;
 }
 
 /*

Modified: stable/9/sys/kern/vfs_default.c
==============================================================================
--- stable/9/sys/kern/vfs_default.c	Fri May 18 19:48:33 2012	(r235625)
+++ stable/9/sys/kern/vfs_default.c	Fri May 18 19:48:38 2012	(r235626)
@@ -1114,18 +1114,15 @@ vfs_stdsync(mp, waitfor)
 	/*
 	 * Force stale buffer cache information to be flushed.
 	 */
-	MNT_ILOCK(mp);
 loop:
-	MNT_VNODE_FOREACH(vp, mp, mvp) {
-		/* bv_cnt is an acceptable race here. */
-		if (vp->v_bufobj.bo_dirty.bv_cnt == 0)
+	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
+		if (vp->v_bufobj.bo_dirty.bv_cnt == 0) {
+			VI_UNLOCK(vp);
 			continue;
-		VI_LOCK(vp);
-		MNT_IUNLOCK(mp);
+		}
 		if ((error = vget(vp, lockreq, td)) != 0) {
-			MNT_ILOCK(mp);
 			if (error == ENOENT) {
-				MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
+				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				goto loop;
 			}
 			continue;
@@ -1134,9 +1131,7 @@ loop:
 		if (error)
 			allerror = error;
 		vput(vp);
-		MNT_ILOCK(mp);
 	}
-	MNT_IUNLOCK(mp);
 	return (allerror);
 }
 

Modified: stable/9/sys/kern/vfs_mount.c
==============================================================================
--- stable/9/sys/kern/vfs_mount.c	Fri May 18 19:48:33 2012	(r235625)
+++ stable/9/sys/kern/vfs_mount.c	Fri May 18 19:48:38 2012	(r235626)
@@ -79,7 +79,6 @@ SYSCTL_INT(_vfs, OID_AUTO, usermount, CT
     "Unprivileged users may mount and unmount file systems");
 
 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
-MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
 static uma_zone_t mount_zone;
 
 /* List of mounted filesystems. */
@@ -460,6 +459,8 @@ vfs_mount_alloc(struct vnode *vp, struct
 	    __rangeof(struct mount, mnt_startzero, mnt_endzero));
 	TAILQ_INIT(&mp->mnt_nvnodelist);
 	mp->mnt_nvnodelistsize = 0;
+	TAILQ_INIT(&mp->mnt_activevnodelist);
+	mp->mnt_activevnodelistsize = 0;
 	mp->mnt_ref = 0;
 	(void) vfs_busy(mp, MBF_NOWAIT);
 	mp->mnt_op = vfsp->vfc_vfsops;
@@ -513,6 +514,8 @@ vfs_mount_destroy(struct mount *mp)
 	}
 	if (mp->mnt_nvnodelistsize != 0)
 		panic("vfs_mount_destroy: nonzero nvnodelistsize");
+	if (mp->mnt_activevnodelistsize != 0)
+		panic("vfs_mount_destroy: nonzero activevnodelistsize");
 	if (mp->mnt_lockref != 0)
 		panic("vfs_mount_destroy: nonzero lock refcount");
 	MNT_IUNLOCK(mp);
@@ -1663,10 +1666,14 @@ vfs_copyopt(opts, name, dest, len)
 }
 
 /*
- * This is a helper function for filesystems to traverse their
- * vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h
+ * These are helper functions for filesystems to traverse all
+ * their vnodes.  See MNT_VNODE_FOREACH() in sys/mount.h.
+ *
+ * This interface has been deprecated in favor of MNT_VNODE_FOREACH_ALL.
  */
 
+MALLOC_DECLARE(M_VNODE_MARKER);
+
 struct vnode *
 __mnt_vnode_next(struct vnode **mvp, struct mount *mp)
 {
@@ -1755,7 +1762,6 @@ __mnt_vnode_markerfree(struct vnode **mv
 	MNT_REL(mp);
 }
 
-
 int
 __vfs_statfs(struct mount *mp, struct statfs *sbp)
 {

Modified: stable/9/sys/kern/vfs_subr.c
==============================================================================
--- stable/9/sys/kern/vfs_subr.c	Fri May 18 19:48:33 2012	(r235625)
+++ stable/9/sys/kern/vfs_subr.c	Fri May 18 19:48:38 2012	(r235626)
@@ -102,12 +102,10 @@ static int	flushbuflist(struct bufv *buf
 		    int slpflag, int slptimeo);
 static void	syncer_shutdown(void *arg, int howto);
 static int	vtryrecycle(struct vnode *vp);
-static void	vbusy(struct vnode *vp);
 static void	v_incr_usecount(struct vnode *);
 static void	v_decr_usecount(struct vnode *);
 static void	v_decr_useonly(struct vnode *);
 static void	v_upgrade_usecount(struct vnode *);
-static void	vfree(struct vnode *);
 static void	vnlru_free(int);
 static void	vgonel(struct vnode *);
 static void	vfs_knllock(void *arg);
@@ -118,8 +116,7 @@ static void	destroy_vpollinfo(struct vpo
 
 /*
  * Number of vnodes in existence.  Increased whenever getnewvnode()
- * allocates a new vnode, decreased on vdestroy() called on VI_DOOMed
- * vnode.
+ * allocates a new vnode, decreased in vdropl() for VI_DOOMED vnode.
  */
 static unsigned long	numvnodes;
 
@@ -778,12 +775,16 @@ vnlru_free(int count)
 			break;
 		VNASSERT(vp->v_op != NULL, vp,
 		    ("vnlru_free: vnode already reclaimed."));
-		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		KASSERT((vp->v_iflag & VI_FREE) != 0,
+		    ("Removing vnode not on freelist"));
+		KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
+		    ("Mangling active vnode"));
+		TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
 		/*
 		 * Don't recycle if we can't get the interlock.
 		 */
 		if (!VI_TRYLOCK(vp)) {
-			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist);
 			continue;
 		}
 		VNASSERT(VCANRECYCLE(vp), vp,
@@ -878,46 +879,6 @@ SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI
  * Routines having to do with the management of the vnode table.
  */
 
-void
-vdestroy(struct vnode *vp)
-{
-	struct bufobj *bo;
-
-	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	mtx_lock(&vnode_free_list_mtx);
-	numvnodes--;
-	mtx_unlock(&vnode_free_list_mtx);
-	bo = &vp->v_bufobj;
-	VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
-	    ("cleaned vnode still on the free list."));
-	VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't"));
-	VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count"));
-	VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count"));
-	VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count"));
-	VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's"));
-	VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0"));
-	VNASSERT(bo->bo_clean.bv_root == NULL, vp, ("cleanblkroot not NULL"));
-	VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0"));
-	VNASSERT(bo->bo_dirty.bv_root == NULL, vp, ("dirtyblkroot not NULL"));
-	VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst"));
-	VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src"));
-	VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for .."));
-	VI_UNLOCK(vp);
-#ifdef MAC
-	mac_vnode_destroy(vp);
-#endif
-	if (vp->v_pollinfo != NULL)
-		destroy_vpollinfo(vp->v_pollinfo);
-#ifdef INVARIANTS
-	/* XXX Elsewhere we can detect an already freed vnode via NULL v_op. */
-	vp->v_op = NULL;
-#endif
-	lockdestroy(vp->v_vnlock);
-	mtx_destroy(&vp->v_interlock);
-	mtx_destroy(BO_MTX(bo));
-	uma_zfree(vnode_zone, vp);
-}
-
 /*
  * Try to recycle a freed vnode.  We abort if anyone picks up a reference
  * before we actually vgone().  This function must be called with the vnode
@@ -1078,12 +1039,26 @@ static void
 delmntque(struct vnode *vp)
 {
 	struct mount *mp;
+	int active;
 
 	mp = vp->v_mount;
 	if (mp == NULL)
 		return;
 	MNT_ILOCK(mp);
+	VI_LOCK(vp);
+	KASSERT(mp->mnt_activevnodelistsize <= mp->mnt_nvnodelistsize,
+	    ("Active vnode list size %d > Vnode list size %d",
+	     mp->mnt_activevnodelistsize, mp->mnt_nvnodelistsize));
+	active = vp->v_iflag & VI_ACTIVE;
+	vp->v_iflag &= ~VI_ACTIVE;
+	if (active) {
+		mtx_lock(&vnode_free_list_mtx);
+		TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist);
+		mp->mnt_activevnodelistsize--;
+		mtx_unlock(&vnode_free_list_mtx);
+	}
 	vp->v_mount = NULL;
+	VI_UNLOCK(vp);
 	VNASSERT(mp->mnt_nvnodelistsize > 0, vp,
 		("bad mount point vnode list size"));
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
@@ -1123,13 +1098,24 @@ insmntque1(struct vnode *vp, struct moun
 		ASSERT_VOP_ELOCKED(vp,
 		    "insmntque: mp-safe fs and non-locked vp");
 #endif
+	/*
+	 * We acquire the vnode interlock early to ensure that the
+	 * vnode cannot be recycled by another process releasing a
+	 * holdcnt on it before we get it on both the vnode list
+	 * and the active vnode list. The mount mutex protects only
+	 * manipulation of the vnode list and the vnode freelist
+	 * mutex protects only manipulation of the active vnode list.
+	 * Hence the need to hold the vnode interlock throughout.
+	 */
 	MNT_ILOCK(mp);
+	VI_LOCK(vp);
 	if ((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 &&
 	    ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 ||
 	     mp->mnt_nvnodelistsize == 0)) {
 		locked = VOP_ISLOCKED(vp);
 		if (!locked || (locked == LK_EXCLUSIVE &&
 		     (vp->v_vflag & VV_FORCEINSMQ) == 0)) {
+			VI_UNLOCK(vp);
 			MNT_IUNLOCK(mp);
 			if (dtr != NULL)
 				dtr(vp, dtr_arg);
@@ -1142,6 +1128,14 @@ insmntque1(struct vnode *vp, struct moun
 	VNASSERT(mp->mnt_nvnodelistsize >= 0, vp,
 		("neg mount point vnode list size"));
 	mp->mnt_nvnodelistsize++;
+	KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
+	    ("Activating already active vnode"));
+	vp->v_iflag |= VI_ACTIVE;
+	mtx_lock(&vnode_free_list_mtx);
+	TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
+	mp->mnt_activevnodelistsize++;
+	mtx_unlock(&vnode_free_list_mtx);
+	VI_UNLOCK(vp);
 	MNT_IUNLOCK(mp);
 	return (0);
 }
@@ -2346,19 +2340,41 @@ vhold(struct vnode *vp)
 	VI_UNLOCK(vp);
 }
 
+/*
+ * Increase the hold count and activate if this is the first reference.
+ */
 void
 vholdl(struct vnode *vp)
 {
+	struct mount *mp;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_holdcnt++;
-	if (VSHOULDBUSY(vp))
-		vbusy(vp);
+	if (!VSHOULDBUSY(vp))
+		return;
+	ASSERT_VI_LOCKED(vp, "vholdl");
+	VNASSERT((vp->v_iflag & VI_FREE) != 0, vp, ("vnode not free"));
+	VNASSERT(vp->v_op != NULL, vp, ("vholdl: vnode already reclaimed."));
+	/*
+	 * Remove a vnode from the free list, mark it as in use,
+	 * and put it on the active list.
+	 */
+	mtx_lock(&vnode_free_list_mtx);
+	TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
+	freevnodes--;
+	vp->v_iflag &= ~(VI_FREE|VI_AGE);
+	KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
+	    ("Activating already active vnode"));
+	vp->v_iflag |= VI_ACTIVE;
+	mp = vp->v_mount;
+	TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
+	mp->mnt_activevnodelistsize++;
+	mtx_unlock(&vnode_free_list_mtx);
 }
 
 /*
- * Note that there is one less who cares about this vnode.  vdrop() is the
- * opposite of vhold().
+ * Note that there is one less who cares about this vnode.
+ * vdrop() is the opposite of vhold().
  */
 void
 vdrop(struct vnode *vp)
@@ -2370,28 +2386,93 @@ vdrop(struct vnode *vp)
 
 /*
  * Drop the hold count of the vnode.  If this is the last reference to
- * the vnode we will free it if it has been vgone'd otherwise it is
- * placed on the free list.
+ * the vnode we place it on the free list unless it has been vgone'd
+ * (marked VI_DOOMED) in which case we will free it.
  */
 void
 vdropl(struct vnode *vp)
 {
+	struct bufobj *bo;
+	struct mount *mp;
+	int active;
 
 	ASSERT_VI_LOCKED(vp, "vdropl");
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	if (vp->v_holdcnt <= 0)
 		panic("vdrop: holdcnt %d", vp->v_holdcnt);
 	vp->v_holdcnt--;
-	if (vp->v_holdcnt == 0) {
-		if (vp->v_iflag & VI_DOOMED) {
-			CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__,
-			    vp);
-			vdestroy(vp);
-			return;
-		} else
-			vfree(vp);
+	if (vp->v_holdcnt > 0) {
+		VI_UNLOCK(vp);
+		return;
 	}
+	if ((vp->v_iflag & VI_DOOMED) == 0) {
+		/*
+		 * Mark a vnode as free: remove it from its active list
+		 * and put it up for recycling on the freelist.
+		 */
+		VNASSERT(vp->v_op != NULL, vp,
+		    ("vdropl: vnode already reclaimed."));
+		VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
+		    ("vnode already free"));
+		VNASSERT(VSHOULDFREE(vp), vp,
+		    ("vdropl: freeing when we shouldn't"));
+		active = vp->v_iflag & VI_ACTIVE;
+		vp->v_iflag &= ~VI_ACTIVE;
+		mp = vp->v_mount;
+		mtx_lock(&vnode_free_list_mtx);
+		if (active) {
+			TAILQ_REMOVE(&mp->mnt_activevnodelist, vp,
+			    v_actfreelist);
+			mp->mnt_activevnodelistsize--;
+		}
+		if (vp->v_iflag & VI_AGE) {
+			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_actfreelist);
+		} else {
+			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist);
+		}
+		freevnodes++;
+		vp->v_iflag &= ~VI_AGE;
+		vp->v_iflag |= VI_FREE;
+		mtx_unlock(&vnode_free_list_mtx);
+		VI_UNLOCK(vp);
+		return;
+	}
+	/*
+	 * The vnode has been marked for destruction, so free it.
+	 */
+	CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp);
+	mtx_lock(&vnode_free_list_mtx);
+	numvnodes--;
+	mtx_unlock(&vnode_free_list_mtx);
+	bo = &vp->v_bufobj;
+	VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
+	    ("cleaned vnode still on the free list."));
+	VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't"));
+	VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count"));
+	VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count"));
+	VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count"));
+	VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's"));
+	VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0"));
+	VNASSERT(bo->bo_clean.bv_root == NULL, vp, ("cleanblkroot not NULL"));
+	VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0"));
+	VNASSERT(bo->bo_dirty.bv_root == NULL, vp, ("dirtyblkroot not NULL"));
+	VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst"));
+	VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src"));
+	VNASSERT(vp->v_cache_dd == NULL, vp, ("vp has namecache for .."));
 	VI_UNLOCK(vp);
+#ifdef MAC
+	mac_vnode_destroy(vp);
+#endif
+	if (vp->v_pollinfo != NULL)
+		destroy_vpollinfo(vp->v_pollinfo);
+#ifdef INVARIANTS
+	/* XXX Elsewhere we detect an already freed vnode via NULL v_op. */
+	vp->v_op = NULL;
+#endif
+	lockdestroy(vp->v_vnlock);
+	mtx_destroy(&vp->v_interlock);
+	mtx_destroy(BO_MTX(bo));
+	uma_zfree(vnode_zone, vp);
 }
 
 /*
@@ -2403,6 +2484,7 @@ vdropl(struct vnode *vp)
 void
 vinactive(struct vnode *vp, struct thread *td)
 {
+	struct vm_object *obj;
 
 	ASSERT_VOP_ELOCKED(vp, "vinactive");
 	ASSERT_VI_LOCKED(vp, "vinactive");
@@ -2412,6 +2494,17 @@ vinactive(struct vnode *vp, struct threa
 	vp->v_iflag |= VI_DOINGINACT;
 	vp->v_iflag &= ~VI_OWEINACT;
 	VI_UNLOCK(vp);
+	/*
+	 * Before moving off the active list, we must be sure that any
+	 * modified pages are on the vnode's dirty list since these will
+	 * no longer be checked once the vnode is on the inactive list.
+	 */
+	obj = vp->v_object;
+	if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0) {
+		VM_OBJECT_LOCK(obj);
+		vm_object_page_clean(obj, 0, 0, OBJPC_NOSYNC);
+		VM_OBJECT_UNLOCK(obj);
+	}
 	VOP_INACTIVE(vp, td);
 	VI_LOCK(vp);
 	VNASSERT(vp->v_iflag & VI_DOINGINACT, vp,
@@ -2467,17 +2560,13 @@ vflush(struct mount *mp, int rootrefs, i
 		}
 		vput(rootvp);
 	}
-	MNT_ILOCK(mp);
 loop:
-	MNT_VNODE_FOREACH(vp, mp, mvp) {
-		VI_LOCK(vp);
+	MNT_VNODE_FOREACH_ALL(vp, mp, mvp) {
 		vholdl(vp);
-		MNT_IUNLOCK(mp);
 		error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE);
 		if (error) {
 			vdrop(vp);
-			MNT_ILOCK(mp);
-			MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			goto loop;
 		}
 		/*
@@ -2486,7 +2575,6 @@ loop:
 		if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
 			VOP_UNLOCK(vp, 0);
 			vdrop(vp);
-			MNT_ILOCK(mp);
 			continue;
 		}
 		/*
@@ -2504,7 +2592,7 @@ loop:
 			if (error != 0) {
 				VOP_UNLOCK(vp, 0);
 				vdrop(vp);
-				MNT_VNODE_FOREACH_ABORT(mp, mvp);
+				MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 				return (error);
 			}
 			error = VOP_GETATTR(vp, &vattr, td->td_ucred);
@@ -2515,7 +2603,6 @@ loop:
 			    (vp->v_writecount == 0 || vp->v_type != VREG)) {
 				VOP_UNLOCK(vp, 0);
 				vdropl(vp);
-				MNT_ILOCK(mp);
 				continue;
 			}
 		} else
@@ -2540,9 +2627,7 @@ loop:
 		}
 		VOP_UNLOCK(vp, 0);
 		vdropl(vp);
-		MNT_ILOCK(mp);
 	}
-	MNT_IUNLOCK(mp);
 	if (rootrefs > 0 && (flags & FORCECLOSE) == 0) {
 		/*
 		 * If just the root vnode is busy, and if its refcount
@@ -2993,6 +3078,8 @@ DB_SHOW_COMMAND(mount, db_show_mount)
 	db_printf("    mnt_ref = %d\n", mp->mnt_ref);
 	db_printf("    mnt_gen = %d\n", mp->mnt_gen);
 	db_printf("    mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize);
+	db_printf("    mnt_activevnodelistsize = %d\n",
+	    mp->mnt_activevnodelistsize);
 	db_printf("    mnt_writeopcount = %d\n", mp->mnt_writeopcount);
 	db_printf("    mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen);
 	db_printf("    mnt_iosize_max = %d\n", mp->mnt_iosize_max);
@@ -3002,15 +3089,23 @@ DB_SHOW_COMMAND(mount, db_show_mount)
 	    mp->mnt_secondary_accwrites);
 	db_printf("    mnt_gjprovider = %s\n",
 	    mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL");
-	db_printf("\n");
 
-	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
+	db_printf("\n\nList of active vnodes\n");
+	TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) {
 		if (vp->v_type != VMARKER) {
 			vn_printf(vp, "vnode ");
 			if (db_pager_quit)
 				break;
 		}
 	}
+	db_printf("\n\nList of inactive vnodes\n");
+	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
+		if (vp->v_type != VMARKER && (vp->v_iflag & VI_ACTIVE) == 0) {
+			vn_printf(vp, "vnode ");
+			if (db_pager_quit)
+				break;
+		}
+	}
 }
 #endif	/* DDB */
 
@@ -3279,19 +3374,15 @@ vfs_msync(struct mount *mp, int flags)
 	struct vm_object *obj;
 
 	CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
-	MNT_ILOCK(mp);
-	MNT_VNODE_FOREACH(vp, mp, mvp) {
-		VI_LOCK(vp);
+	MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
 		obj = vp->v_object;
 		if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0 &&
 		    (flags == MNT_WAIT || VOP_ISLOCKED(vp) == 0)) {
-			MNT_IUNLOCK(mp);
 			if (!vget(vp,
 			    LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK,
 			    curthread)) {
 				if (vp->v_vflag & VV_NOSYNC) {	/* unlinked */
 					vput(vp);
-					MNT_ILOCK(mp);
 					continue;
 				}
 
@@ -3305,55 +3396,9 @@ vfs_msync(struct mount *mp, int flags)
 				}
 				vput(vp);
 			}
-			MNT_ILOCK(mp);
 		} else
 			VI_UNLOCK(vp);
 	}
-	MNT_IUNLOCK(mp);
-}
-
-/*
- * Mark a vnode as free, putting it up for recycling.
- */
-static void
-vfree(struct vnode *vp)
-{
-
-	ASSERT_VI_LOCKED(vp, "vfree");
-	mtx_lock(&vnode_free_list_mtx);
-	VNASSERT(vp->v_op != NULL, vp, ("vfree: vnode already reclaimed."));
-	VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("vnode already free"));
-	VNASSERT(VSHOULDFREE(vp), vp, ("vfree: freeing when we shouldn't"));
-	VNASSERT((vp->v_iflag & VI_DOOMED) == 0, vp,
-	    ("vfree: Freeing doomed vnode"));
-	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-	if (vp->v_iflag & VI_AGE) {
-		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
-	} else {
-		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
-	}
-	freevnodes++;
-	vp->v_iflag &= ~VI_AGE;
-	vp->v_iflag |= VI_FREE;
-	mtx_unlock(&vnode_free_list_mtx);
-}
-
-/*
- * Opposite of vfree() - mark a vnode as in use.
- */
-static void
-vbusy(struct vnode *vp)
-{
-	ASSERT_VI_LOCKED(vp, "vbusy");
-	VNASSERT((vp->v_iflag & VI_FREE) != 0, vp, ("vnode not free"));
-	VNASSERT(vp->v_op != NULL, vp, ("vbusy: vnode already reclaimed."));
-	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
-
-	mtx_lock(&vnode_free_list_mtx);
-	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
-	freevnodes--;
-	vp->v_iflag &= ~(VI_FREE|VI_AGE);
-	mtx_unlock(&vnode_free_list_mtx);
 }
 
 static void
@@ -4504,3 +4549,187 @@ vfs_unixify_accmode(accmode_t *accmode)
 
 	return (0);
 }
+
+/*
+ * These are helper functions for filesystems to traverse all
+ * their vnodes.  See MNT_VNODE_FOREACH_ALL() in sys/mount.h.
+ *
+ * This interface replaces MNT_VNODE_FOREACH.
+ */
+
+MALLOC_DEFINE(M_VNODE_MARKER, "vnodemarker", "vnode marker");
+
+struct vnode *
+__mnt_vnode_next_all(struct vnode **mvp, struct mount *mp)
+{
+	struct vnode *vp;
+
+	if (should_yield())
+		kern_yield(PRI_UNCHANGED);
+	MNT_ILOCK(mp);
+	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+	vp = TAILQ_NEXT(*mvp, v_nmntvnodes);
+	while (vp != NULL && (vp->v_type == VMARKER ||
+	    (vp->v_iflag & VI_DOOMED) != 0))
+		vp = TAILQ_NEXT(vp, v_nmntvnodes);
+
+	/* Check if we are done */
+	if (vp == NULL) {
+		__mnt_vnode_markerfree_all(mvp, mp);
+		/* MNT_IUNLOCK(mp); -- done in above function */
+		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
+		return (NULL);
+	}
+	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
+	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
+	VI_LOCK(vp);
+	MNT_IUNLOCK(mp);
+	return (vp);
+}
+
+struct vnode *
+__mnt_vnode_first_all(struct vnode **mvp, struct mount *mp)
+{
+	struct vnode *vp;
+
+	*mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
+	MNT_ILOCK(mp);
+	MNT_REF(mp);
+	(*mvp)->v_type = VMARKER;
+
+	vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
+	while (vp != NULL && (vp->v_type == VMARKER ||
+	    (vp->v_iflag & VI_DOOMED) != 0))
+		vp = TAILQ_NEXT(vp, v_nmntvnodes);
+
+	/* Check if we are done */
+	if (vp == NULL) {
+		MNT_REL(mp);
+		MNT_IUNLOCK(mp);
+		free(*mvp, M_VNODE_MARKER);
+		*mvp = NULL;
+		return (NULL);
+	}
+	(*mvp)->v_mount = mp;
+	TAILQ_INSERT_AFTER(&mp->mnt_nvnodelist, vp, *mvp, v_nmntvnodes);
+	VI_LOCK(vp);
+	MNT_IUNLOCK(mp);
+	return (vp);
+}
+
+
+void
+__mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp)
+{
+
+	if (*mvp == NULL) {
+		MNT_IUNLOCK(mp);
+		return;
+	}
+
+	mtx_assert(MNT_MTX(mp), MA_OWNED);
+
+	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+	TAILQ_REMOVE(&mp->mnt_nvnodelist, *mvp, v_nmntvnodes);
+	MNT_REL(mp);
+	MNT_IUNLOCK(mp);
+	free(*mvp, M_VNODE_MARKER);
+	*mvp = NULL;
+}
+
+/*
+ * These are helper functions for filesystems to traverse their
+ * active vnodes.  See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h
+ */
+struct vnode *
+__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
+{
+	struct vnode *vp, *nvp;
+
+	if (should_yield())
+		kern_yield(PRI_UNCHANGED);
+	MNT_ILOCK(mp);
+	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+	vp = TAILQ_NEXT(*mvp, v_actfreelist);
+	while (vp != NULL) {
+		VI_LOCK(vp);
+		if (vp->v_mount == mp && vp->v_type != VMARKER &&
+		    (vp->v_iflag & VI_DOOMED) == 0)
+			break;
+		nvp = TAILQ_NEXT(vp, v_actfreelist);
+		VI_UNLOCK(vp);
+		vp = nvp;
+	}
+
+	/* Check if we are done */
+	if (vp == NULL) {
+		__mnt_vnode_markerfree_active(mvp, mp);
+		/* MNT_IUNLOCK(mp); -- done in above function */
+		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
+		return (NULL);
+	}
+	mtx_lock(&vnode_free_list_mtx);
+	TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
+	TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
+	mtx_unlock(&vnode_free_list_mtx);
+	MNT_IUNLOCK(mp);
+	return (vp);
+}
+
+struct vnode *
+__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp)
+{
+	struct vnode *vp, *nvp;
+
+	*mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
+	MNT_ILOCK(mp);
+	MNT_REF(mp);
+	(*mvp)->v_type = VMARKER;
+
+	vp = TAILQ_NEXT(*mvp, v_actfreelist);
+	while (vp != NULL) {
+		VI_LOCK(vp);
+		if (vp->v_mount == mp && vp->v_type != VMARKER &&
+		    (vp->v_iflag & VI_DOOMED) == 0)
+			break;
+		nvp = TAILQ_NEXT(vp, v_actfreelist);
+		VI_UNLOCK(vp);
+		vp = nvp;
+	}
+
+	/* Check if we are done */
+	if (vp == NULL) {
+		MNT_REL(mp);
+		MNT_IUNLOCK(mp);
+		free(*mvp, M_VNODE_MARKER);
+		*mvp = NULL;
+		return (NULL);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-stable-9 mailing list