svn commit: r306512 - in head/sys: kern sys
Mateusz Guzik
mjg at FreeBSD.org
Fri Sep 30 17:27:19 UTC 2016
Author: mjg
Date: Fri Sep 30 17:27:17 2016
New Revision: 306512
URL: https://svnweb.freebsd.org/changeset/base/306512
Log:
vfs: batch free vnodes in per-mnt lists
Previously free vnodes would always by directly returned to the global
LRU list. With this change up to mnt_free_list_batch vnodes are collected
first.
syncer runs always return the batch regardless of its size.
While vnodes on per-mnt lists are not counted as free, they can be
returned in case of vnode shortage.
Reviewed by: kib
Tested by: pho
Modified:
head/sys/kern/vfs_mount.c
head/sys/kern/vfs_subr.c
head/sys/sys/mount.h
head/sys/sys/vnode.h
Modified: head/sys/kern/vfs_mount.c
==============================================================================
--- head/sys/kern/vfs_mount.c Fri Sep 30 17:19:43 2016 (r306511)
+++ head/sys/kern/vfs_mount.c Fri Sep 30 17:27:17 2016 (r306512)
@@ -109,6 +109,7 @@ mount_init(void *mem, int size, int flag
mp = (struct mount *)mem;
mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
+ mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF);
lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
return (0);
}
@@ -120,6 +121,7 @@ mount_fini(void *mem, int size)
mp = (struct mount *)mem;
lockdestroy(&mp->mnt_explock);
+ mtx_destroy(&mp->mnt_listmtx);
mtx_destroy(&mp->mnt_mtx);
}
@@ -461,6 +463,8 @@ vfs_mount_alloc(struct vnode *vp, struct
mp->mnt_nvnodelistsize = 0;
TAILQ_INIT(&mp->mnt_activevnodelist);
mp->mnt_activevnodelistsize = 0;
+ TAILQ_INIT(&mp->mnt_tmpfreevnodelist);
+ mp->mnt_tmpfreevnodelistsize = 0;
mp->mnt_ref = 0;
(void) vfs_busy(mp, MBF_NOWAIT);
atomic_add_acq_int(&vfsp->vfc_refcount, 1);
Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c Fri Sep 30 17:19:43 2016 (r306511)
+++ head/sys/kern/vfs_subr.c Fri Sep 30 17:27:17 2016 (r306512)
@@ -112,6 +112,7 @@ static void vfs_knllock(void *arg);
static void vfs_knlunlock(void *arg);
static void vfs_knl_assert_locked(void *arg);
static void vfs_knl_assert_unlocked(void *arg);
+static void vnlru_return_batches(struct vfsops *mnt_op);
static void destroy_vpollinfo(struct vpollinfo *vi);
/*
@@ -127,6 +128,10 @@ static u_long vnodes_created;
SYSCTL_ULONG(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created,
0, "Number of vnodes created by getnewvnode");
+static u_long mnt_free_list_batch = 128;
+SYSCTL_ULONG(_vfs, OID_AUTO, mnt_free_list_batch, CTLFLAG_RW,
+ &mnt_free_list_batch, 0, "Limit of vnodes held on mnt's free list");
+
/*
* Conversion tables for conversion from vnode types to inode formats
* and back.
@@ -953,7 +958,9 @@ vnlru_free_locked(int count, struct vfso
{
struct vnode *vp;
struct mount *mp;
+ bool tried_batches;
+ tried_batches = false;
mtx_assert(&vnode_free_list_mtx, MA_OWNED);
if (count > max_vnlru_free)
count = max_vnlru_free;
@@ -963,8 +970,16 @@ vnlru_free_locked(int count, struct vfso
* The list can be modified while the free_list_mtx
* has been dropped and vp could be NULL here.
*/
- if (!vp)
- break;
+ if (vp == NULL) {
+ if (tried_batches)
+ break;
+ mtx_unlock(&vnode_free_list_mtx);
+ vnlru_return_batches(mnt_op);
+ tried_batches = true;
+ mtx_lock(&vnode_free_list_mtx);
+ continue;
+ }
+
VNASSERT(vp->v_op != NULL, vp,
("vnlru_free: vnode already reclaimed."));
KASSERT((vp->v_iflag & VI_FREE) != 0,
@@ -1041,6 +1056,63 @@ vspace(void)
return (space);
}
+static void
+vnlru_return_batch_locked(struct mount *mp)
+{
+ struct vnode *vp;
+
+ mtx_assert(&mp->mnt_listmtx, MA_OWNED);
+
+ if (mp->mnt_tmpfreevnodelistsize == 0)
+ return;
+
+ mtx_lock(&vnode_free_list_mtx);
+ TAILQ_FOREACH(vp, &mp->mnt_tmpfreevnodelist, v_actfreelist) {
+ VNASSERT((vp->v_mflag & VMP_TMPMNTFREELIST) != 0, vp,
+ ("vnode without VMP_TMPMNTFREELIST on mnt_tmpfreevnodelist"));
+ vp->v_mflag &= ~VMP_TMPMNTFREELIST;
+ }
+ TAILQ_CONCAT(&vnode_free_list, &mp->mnt_tmpfreevnodelist, v_actfreelist);
+ freevnodes += mp->mnt_tmpfreevnodelistsize;
+ mp->mnt_tmpfreevnodelistsize = 0;
+ mtx_unlock(&vnode_free_list_mtx);
+}
+
+static void
+vnlru_return_batch(struct mount *mp)
+{
+
+ mtx_lock(&mp->mnt_listmtx);
+ vnlru_return_batch_locked(mp);
+ mtx_unlock(&mp->mnt_listmtx);
+}
+
+static void
+vnlru_return_batches(struct vfsops *mnt_op)
+{
+ struct mount *mp, *nmp;
+ bool need_unbusy;
+
+ mtx_lock(&mountlist_mtx);
+ for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
+ need_unbusy = false;
+ if (mnt_op != NULL && mp->mnt_op != mnt_op)
+ goto next;
+ if (mp->mnt_tmpfreevnodelistsize == 0)
+ goto next;
+ if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) == 0) {
+ vnlru_return_batch(mp);
+ need_unbusy = true;
+ mtx_lock(&mountlist_mtx);
+ }
+next:
+ nmp = TAILQ_NEXT(mp, mnt_list);
+ if (need_unbusy)
+ vfs_unbusy(mp);
+ }
+ mtx_unlock(&mountlist_mtx);
+}
+
/*
* Attempt to recycle vnodes in a context that is always safe to block.
* Calling vlrurecycle() from the bowels of filesystem code has some
@@ -1068,9 +1140,8 @@ vnlru_proc(void)
* adjusted using its sysctl, or emergency growth), first
* try to reduce it by discarding from the free list.
*/
- if (numvnodes > desiredvnodes && freevnodes > 0)
- vnlru_free_locked(ulmin(numvnodes - desiredvnodes,
- freevnodes), NULL);
+ if (numvnodes > desiredvnodes)
+ vnlru_free_locked(numvnodes - desiredvnodes, NULL);
/*
* Sleep if the vnode cache is in a good state. This is
* when it is not over-full and has space for about a 4%
@@ -1457,10 +1528,10 @@ delmntque(struct vnode *vp)
active = vp->v_iflag & VI_ACTIVE;
vp->v_iflag &= ~VI_ACTIVE;
if (active) {
- mtx_lock(&vnode_free_list_mtx);
+ mtx_lock(&mp->mnt_listmtx);
TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist);
mp->mnt_activevnodelistsize--;
- mtx_unlock(&vnode_free_list_mtx);
+ mtx_unlock(&mp->mnt_listmtx);
}
vp->v_mount = NULL;
VI_UNLOCK(vp);
@@ -1525,10 +1596,10 @@ insmntque1(struct vnode *vp, struct moun
KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
("Activating already active vnode"));
vp->v_iflag |= VI_ACTIVE;
- mtx_lock(&vnode_free_list_mtx);
+ mtx_lock(&mp->mnt_listmtx);
TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
mp->mnt_activevnodelistsize++;
- mtx_unlock(&vnode_free_list_mtx);
+ mtx_unlock(&mp->mnt_listmtx);
VI_UNLOCK(vp);
MNT_IUNLOCK(mp);
return (0);
@@ -2753,17 +2824,25 @@ _vhold(struct vnode *vp, bool locked)
* Remove a vnode from the free list, mark it as in use,
* and put it on the active list.
*/
- mtx_lock(&vnode_free_list_mtx);
- TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
- freevnodes--;
- vp->v_iflag &= ~VI_FREE;
+ mp = vp->v_mount;
+ mtx_lock(&mp->mnt_listmtx);
+ if ((vp->v_mflag & VMP_TMPMNTFREELIST) != 0) {
+ TAILQ_REMOVE(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist);
+ mp->mnt_tmpfreevnodelistsize--;
+ vp->v_mflag &= ~VMP_TMPMNTFREELIST;
+ } else {
+ mtx_lock(&vnode_free_list_mtx);
+ TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
+ freevnodes--;
+ mtx_unlock(&vnode_free_list_mtx);
+ }
KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
("Activating already active vnode"));
+ vp->v_iflag &= ~VI_FREE;
vp->v_iflag |= VI_ACTIVE;
- mp = vp->v_mount;
TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
mp->mnt_activevnodelistsize++;
- mtx_unlock(&vnode_free_list_mtx);
+ mtx_unlock(&mp->mnt_listmtx);
refcount_acquire(&vp->v_holdcnt);
if (!locked)
VI_UNLOCK(vp);
@@ -2819,21 +2898,25 @@ _vdrop(struct vnode *vp, bool locked)
if ((vp->v_iflag & VI_OWEINACT) == 0) {
vp->v_iflag &= ~VI_ACTIVE;
mp = vp->v_mount;
- mtx_lock(&vnode_free_list_mtx);
+ mtx_lock(&mp->mnt_listmtx);
if (active) {
TAILQ_REMOVE(&mp->mnt_activevnodelist, vp,
v_actfreelist);
mp->mnt_activevnodelistsize--;
}
- TAILQ_INSERT_TAIL(&vnode_free_list, vp,
+ TAILQ_INSERT_TAIL(&mp->mnt_tmpfreevnodelist, vp,
v_actfreelist);
- freevnodes++;
+ mp->mnt_tmpfreevnodelistsize++;
vp->v_iflag |= VI_FREE;
- mtx_unlock(&vnode_free_list_mtx);
+ vp->v_mflag |= VMP_TMPMNTFREELIST;
+ VI_UNLOCK(vp);
+ if (mp->mnt_tmpfreevnodelistsize >= mnt_free_list_batch)
+ vnlru_return_batch_locked(mp);
+ mtx_unlock(&mp->mnt_listmtx);
} else {
+ VI_UNLOCK(vp);
atomic_add_long(&free_owe_inact, 1);
}
- VI_UNLOCK(vp);
return;
}
/*
@@ -3926,6 +4009,9 @@ vfs_msync(struct mount *mp, int flags)
struct vm_object *obj;
CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
+
+ vnlru_return_batch(mp);
+
MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
obj = vp->v_object;
if (obj != NULL && (obj->flags & OBJ_MIGHTBEDIRTY) != 0 &&
@@ -5236,7 +5322,7 @@ mnt_vnode_next_active(struct vnode **mvp
{
struct vnode *vp, *nvp;
- mtx_assert(&vnode_free_list_mtx, MA_OWNED);
+ mtx_assert(&mp->mnt_listmtx, MA_OWNED);
KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
restart:
vp = TAILQ_NEXT(*mvp, v_actfreelist);
@@ -5249,9 +5335,9 @@ restart:
if (!VI_TRYLOCK(vp)) {
if (mp_ncpus == 1 || should_yield()) {
TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
- mtx_unlock(&vnode_free_list_mtx);
+ mtx_unlock(&mp->mnt_listmtx);
pause("vnacti", 1);
- mtx_lock(&vnode_free_list_mtx);
+ mtx_lock(&mp->mnt_listmtx);
goto restart;
}
continue;
@@ -5268,12 +5354,12 @@ restart:
/* Check if we are done */
if (vp == NULL) {
- mtx_unlock(&vnode_free_list_mtx);
+ mtx_unlock(&mp->mnt_listmtx);
mnt_vnode_markerfree_active(mvp, mp);
return (NULL);
}
TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
- mtx_unlock(&vnode_free_list_mtx);
+ mtx_unlock(&mp->mnt_listmtx);
ASSERT_VI_LOCKED(vp, "active iter");
KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp));
return (vp);
@@ -5285,7 +5371,7 @@ __mnt_vnode_next_active(struct vnode **m
if (should_yield())
kern_yield(PRI_USER);
- mtx_lock(&vnode_free_list_mtx);
+ mtx_lock(&mp->mnt_listmtx);
return (mnt_vnode_next_active(mvp, mp));
}
@@ -5301,10 +5387,10 @@ __mnt_vnode_first_active(struct vnode **
(*mvp)->v_type = VMARKER;
(*mvp)->v_mount = mp;
- mtx_lock(&vnode_free_list_mtx);
+ mtx_lock(&mp->mnt_listmtx);
vp = TAILQ_FIRST(&mp->mnt_activevnodelist);
if (vp == NULL) {
- mtx_unlock(&vnode_free_list_mtx);
+ mtx_unlock(&mp->mnt_listmtx);
mnt_vnode_markerfree_active(mvp, mp);
return (NULL);
}
@@ -5319,8 +5405,8 @@ __mnt_vnode_markerfree_active(struct vno
if (*mvp == NULL)
return;
- mtx_lock(&vnode_free_list_mtx);
+ mtx_lock(&mp->mnt_listmtx);
TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
- mtx_unlock(&vnode_free_list_mtx);
+ mtx_unlock(&mp->mnt_listmtx);
mnt_vnode_markerfree_active(mvp, mp);
}
Modified: head/sys/sys/mount.h
==============================================================================
--- head/sys/sys/mount.h Fri Sep 30 17:19:43 2016 (r306511)
+++ head/sys/sys/mount.h Fri Sep 30 17:27:17 2016 (r306512)
@@ -147,6 +147,7 @@ struct vfsopt {
* put on a doubly linked list.
*
* Lock reference:
+ * l - mnt_listmtx
* m - mountlist_mtx
* i - interlock
* v - vnode freelist mutex
@@ -166,8 +167,6 @@ struct mount {
int mnt_ref; /* (i) Reference count */
struct vnodelst mnt_nvnodelist; /* (i) list of vnodes */
int mnt_nvnodelistsize; /* (i) # of vnodes */
- struct vnodelst mnt_activevnodelist; /* (v) list of active vnodes */
- int mnt_activevnodelistsize;/* (v) # of active vnodes */
int mnt_writeopcount; /* (i) write syscalls pending */
int mnt_kern_flag; /* (i) kernel only flags */
uint64_t mnt_flag; /* (i) flags shared with user */
@@ -188,6 +187,11 @@ struct mount {
struct thread *mnt_susp_owner; /* (i) thread owning suspension */
#define mnt_endzero mnt_gjprovider
char *mnt_gjprovider; /* gjournal provider name */
+ struct mtx mnt_listmtx;
+ struct vnodelst mnt_activevnodelist; /* (l) list of active vnodes */
+ int mnt_activevnodelistsize;/* (l) # of active vnodes */
+ struct vnodelst mnt_tmpfreevnodelist; /* (l) list of free vnodes */
+ int mnt_tmpfreevnodelistsize;/* (l) # of free vnodes */
struct lock mnt_explock; /* vfs_export walkers lock */
TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */
TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/
Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h Fri Sep 30 17:19:43 2016 (r306511)
+++ head/sys/sys/vnode.h Fri Sep 30 17:27:17 2016 (r306512)
@@ -75,8 +75,8 @@ struct vpollinfo {
*
* Lock reference:
* c - namecache mutex
- * f - freelist mutex
* i - interlock
+ * l - mp mnt_listmtx or freelist mutex
* I - updated with atomics, 0->1 and 1->0 transitions with interlock held
* m - mount point interlock
* p - pollinfo lock
@@ -144,7 +144,7 @@ struct vnode {
/*
* The machinery of being a vnode
*/
- TAILQ_ENTRY(vnode) v_actfreelist; /* f vnode active/free lists */
+ TAILQ_ENTRY(vnode) v_actfreelist; /* l vnode active/free lists */
struct bufobj v_bufobj; /* * Buffer cache object */
/*
@@ -167,6 +167,7 @@ struct vnode {
u_int v_usecount; /* I ref count of users */
u_int v_iflag; /* i vnode flags (see below) */
u_int v_vflag; /* v vnode flags */
+ u_int v_mflag; /* l mnt-specific vnode flags */
int v_writecount; /* v ref count of writers */
u_int v_hash;
enum vtype v_type; /* u vnode type */
@@ -256,6 +257,8 @@ struct xvnode {
#define VV_MD 0x0800 /* vnode backs the md device */
#define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */
+#define VMP_TMPMNTFREELIST 0x0001 /* Vnode is on mnt's tmp free list */
+
/*
* Vnode attributes. A field value of VNOVAL represents a field whose value
* is unavailable (getattr) or which is not to be changed (setattr).
More information about the svn-src-all
mailing list