svn commit: r249524 - in releng/8.4/sys: cddl/contrib/opensolaris/uts/common/fs/zfs cddl/contrib/opensolaris/uts/common/fs/zfs/sys kern sys
Steven Hartland
smh at FreeBSD.org
Mon Apr 15 19:45:11 UTC 2013
Author: smh
Date: Mon Apr 15 19:45:09 2013
New Revision: 249524
URL: http://svnweb.freebsd.org/changeset/base/249524
Log:
MFC three change sets (details below) which fix system shutdown and
reboots hanging at "All buffers synced" when using ZFS.
MFC r241556:
Add a KPI to allow to reserve some amount of space in the numvnodes
counter, without actually allocating the vnodes.
For KBI stability, the td_vp_reserv was moved to the end of struct thread
MFC r241628:
zfs: make use of getnewvnode_reserve in zfs_mknode and zfs_zget
MFC r243520, r243521:
zfs: overhaul zfs-vfs glue for vnode life-cycle management
Reviewed by: avg
Approved by: re (jpaetzel), avg (co-mentor)
Modified:
releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
releng/8.4/sys/kern/kern_thread.c
releng/8.4/sys/kern/subr_trap.c
releng/8.4/sys/kern/vfs_subr.c
releng/8.4/sys/sys/proc.h
releng/8.4/sys/sys/vnode.h
Directory Properties:
releng/8.4/sys/ (props changed)
releng/8.4/sys/cddl/ (props changed)
releng/8.4/sys/cddl/contrib/opensolaris/ (props changed)
releng/8.4/sys/kern/ (props changed)
releng/8.4/sys/sys/ (props changed)
Modified: releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h
==============================================================================
--- releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h Mon Apr 15 19:32:14 2013 (r249523)
+++ releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h Mon Apr 15 19:45:09 2013 (r249524)
@@ -207,8 +207,6 @@ typedef struct znode {
list_node_t z_link_node; /* all znodes in fs link */
sa_handle_t *z_sa_hdl; /* handle to sa data */
boolean_t z_is_sa; /* are we native sa? */
- /* FreeBSD-specific field. */
- struct task z_task;
} znode_t;
Modified: releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Mon Apr 15 19:32:14 2013 (r249523)
+++ releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c Mon Apr 15 19:45:09 2013 (r249524)
@@ -1844,18 +1844,6 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolea
zfsvfs->z_unmounted = B_TRUE;
rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
rw_exit(&zfsvfs->z_teardown_inactive_lock);
-
-#ifdef __FreeBSD__
- /*
- * Some znodes might not be fully reclaimed, wait for them.
- */
- mutex_enter(&zfsvfs->z_znodes_lock);
- while (list_head(&zfsvfs->z_all_znodes) != NULL) {
- msleep(zfsvfs, &zfsvfs->z_znodes_lock, 0,
- "zteardown", 0);
- }
- mutex_exit(&zfsvfs->z_znodes_lock);
-#endif
}
/*
Modified: releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Mon Apr 15 19:32:14 2013 (r249523)
+++ releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Mon Apr 15 19:45:09 2013 (r249524)
@@ -4579,14 +4579,22 @@ zfs_inactive(vnode_t *vp, cred_t *cr, ca
* The fs has been unmounted, or we did a
* suspend/resume and this file no longer exists.
*/
- VI_LOCK(vp);
- ASSERT(vp->v_count <= 1);
- vp->v_count = 0;
- VI_UNLOCK(vp);
+ rw_exit(&zfsvfs->z_teardown_inactive_lock);
vrecycle(vp, curthread);
+ return;
+ }
+
+ mutex_enter(&zp->z_lock);
+ if (zp->z_unlinked) {
+ /*
+ * Fast path to recycle a vnode of a removed file.
+ */
+ mutex_exit(&zp->z_lock);
rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ vrecycle(vp, curthread);
return;
}
+ mutex_exit(&zp->z_lock);
if (zp->z_atime_dirty && zp->z_unlinked == 0) {
dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
@@ -4605,8 +4613,6 @@ zfs_inactive(vnode_t *vp, cred_t *cr, ca
dmu_tx_commit(tx);
}
}
-
- zfs_zinactive(zp);
rw_exit(&zfsvfs->z_teardown_inactive_lock);
}
@@ -6116,28 +6122,6 @@ zfs_freebsd_inactive(ap)
return (0);
}
-static void
-zfs_reclaim_complete(void *arg, int pending)
-{
- znode_t *zp = arg;
- zfsvfs_t *zfsvfs = zp->z_zfsvfs;
-
- rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
- if (zp->z_sa_hdl != NULL) {
- ZFS_OBJ_HOLD_ENTER(zfsvfs, zp->z_id);
- zfs_znode_dmu_fini(zp);
- ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id);
- }
- zfs_znode_free(zp);
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
- /*
- * If the file system is being unmounted, there is a process waiting
- * for us, wake it up.
- */
- if (zfsvfs->z_unmounted)
- wakeup_one(zfsvfs);
-}
-
static int
zfs_freebsd_reclaim(ap)
struct vop_reclaim_args /* {
@@ -6148,53 +6132,25 @@ zfs_freebsd_reclaim(ap)
vnode_t *vp = ap->a_vp;
znode_t *zp = VTOZ(vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- boolean_t rlocked;
-
- rlocked = rw_tryenter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
ASSERT(zp != NULL);
- /*
- * Destroy the vm object and flush associated pages.
- */
+ /* Destroy the vm object and flush associated pages. */
vnode_destroy_vobject(vp);
- mutex_enter(&zp->z_lock);
- zp->z_vnode = NULL;
- mutex_exit(&zp->z_lock);
-
- if (zp->z_unlinked) {
- ; /* Do nothing. */
- } else if (!rlocked) {
- TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp);
- taskqueue_enqueue(taskqueue_thread, &zp->z_task);
- } else if (zp->z_sa_hdl == NULL) {
+ /*
+ * z_teardown_inactive_lock protects from a race with
+ * zfs_znode_dmu_fini in zfsvfs_teardown during
+ * force unmount.
+ */
+ rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
+ if (zp->z_sa_hdl == NULL)
zfs_znode_free(zp);
- } else /* if (!zp->z_unlinked && zp->z_dbuf != NULL) */ {
- int locked;
+ else
+ zfs_zinactive(zp);
+ rw_exit(&zfsvfs->z_teardown_inactive_lock);
- locked = MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)) ? 2 :
- ZFS_OBJ_HOLD_TRYENTER(zfsvfs, zp->z_id);
- if (locked == 0) {
- /*
- * Lock can't be obtained due to deadlock possibility,
- * so defer znode destruction.
- */
- TASK_INIT(&zp->z_task, 0, zfs_reclaim_complete, zp);
- taskqueue_enqueue(taskqueue_thread, &zp->z_task);
- } else {
- zfs_znode_dmu_fini(zp);
- if (locked == 1)
- ZFS_OBJ_HOLD_EXIT(zfsvfs, zp->z_id);
- zfs_znode_free(zp);
- }
- }
- VI_LOCK(vp);
vp->v_data = NULL;
- ASSERT(vp->v_holdcnt >= 1);
- VI_UNLOCK(vp);
- if (rlocked)
- rw_exit(&zfsvfs->z_teardown_inactive_lock);
return (0);
}
Modified: releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
==============================================================================
--- releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c Mon Apr 15 19:32:14 2013 (r249523)
+++ releng/8.4/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c Mon Apr 15 19:45:09 2013 (r249524)
@@ -855,6 +855,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, d
}
}
+ getnewvnode_reserve(1);
ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
@@ -1041,6 +1042,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, d
KASSERT(err == 0, ("insmntque() failed: error %d", err));
}
ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
+ getnewvnode_drop_reserve();
}
/*
@@ -1145,18 +1147,22 @@ zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_
dmu_object_info_t doi;
dmu_buf_t *db;
znode_t *zp;
- int err;
+ vnode_t *vp;
sa_handle_t *hdl;
- int first = 1;
-
- *zpp = NULL;
+ struct thread *td;
+ int locked;
+ int err;
+ td = curthread;
+ getnewvnode_reserve(1);
again:
+ *zpp = NULL;
ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
if (err) {
ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+ getnewvnode_drop_reserve();
return (err);
}
@@ -1167,6 +1173,7 @@ again:
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+ getnewvnode_drop_reserve();
return (EINVAL);
}
@@ -1188,48 +1195,39 @@ again:
if (zp->z_unlinked) {
err = ENOENT;
} else {
- vnode_t *vp;
- int dying = 0;
-
vp = ZTOV(zp);
- if (vp == NULL)
- dying = 1;
- else {
- VN_HOLD(vp);
- if ((vp->v_iflag & VI_DOOMED) != 0) {
- dying = 1;
- /*
- * Don't VN_RELE() vnode here, because
- * it can call vn_lock() which creates
- * LOR between vnode lock and znode
- * lock. We will VN_RELE() the vnode
- * after droping znode lock.
- */
- }
- }
- if (dying) {
- if (first) {
- ZFS_LOG(1, "dying znode detected (zp=%p)", zp);
- first = 0;
- }
- /*
- * znode is dying so we can't reuse it, we must
- * wait until destruction is completed.
- */
- sa_buf_rele(db, NULL);
- mutex_exit(&zp->z_lock);
- ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
- if (vp != NULL)
- VN_RELE(vp);
- tsleep(zp, 0, "zcollide", 1);
- goto again;
- }
*zpp = zp;
err = 0;
}
sa_buf_rele(db, NULL);
+
+ /* Don't let the vnode disappear after ZFS_OBJ_HOLD_EXIT. */
+ if (err == 0)
+ VN_HOLD(vp);
+
mutex_exit(&zp->z_lock);
ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+
+ if (err == 0) {
+ locked = VOP_ISLOCKED(vp);
+ VI_LOCK(vp);
+ if ((vp->v_iflag & VI_DOOMED) != 0 &&
+ locked != LK_EXCLUSIVE) {
+ /*
+ * The vnode is doomed and this thread doesn't
+ * hold the exclusive lock on it, so the vnode
+ * must be being reclaimed by another thread.
+ * Otherwise the doomed vnode is being reclaimed
+ * by this thread and zfs_zget is called from
+ * ZIL internals.
+ */
+ VI_UNLOCK(vp);
+ VN_RELE(vp);
+ goto again;
+ }
+ VI_UNLOCK(vp);
+ }
+ getnewvnode_drop_reserve();
return (err);
}
@@ -1265,6 +1263,7 @@ again:
}
}
ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
+ getnewvnode_drop_reserve();
return (err);
}
@@ -1393,10 +1392,8 @@ zfs_znode_delete(znode_t *zp, dmu_tx_t *
void
zfs_zinactive(znode_t *zp)
{
- vnode_t *vp = ZTOV(zp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
uint64_t z_id = zp->z_id;
- int vfslocked;
ASSERT(zp->z_sa_hdl);
@@ -1406,19 +1403,6 @@ zfs_zinactive(znode_t *zp)
ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
mutex_enter(&zp->z_lock);
- VI_LOCK(vp);
- if (vp->v_count > 0) {
- /*
- * If the hold count is greater than zero, somebody has
- * obtained a new reference on this znode while we were
- * processing it here, so we are done.
- */
- VI_UNLOCK(vp);
- mutex_exit(&zp->z_lock);
- ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
- return;
- }
- VI_UNLOCK(vp);
/*
* If this was the last reference to a file with no links,
@@ -1427,16 +1411,14 @@ zfs_zinactive(znode_t *zp)
if (zp->z_unlinked) {
mutex_exit(&zp->z_lock);
ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
- ASSERT(vp->v_count == 0);
- vrecycle(vp, curthread);
- vfslocked = VFS_LOCK_GIANT(zfsvfs->z_vfs);
zfs_rmnode(zp);
- VFS_UNLOCK_GIANT(vfslocked);
return;
}
mutex_exit(&zp->z_lock);
+ zfs_znode_dmu_fini(zp);
ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
+ zfs_znode_free(zp);
}
void
@@ -1444,8 +1426,8 @@ zfs_znode_free(znode_t *zp)
{
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
- ASSERT(ZTOV(zp) == NULL);
ASSERT(zp->z_sa_hdl == NULL);
+ zp->z_vnode = NULL;
mutex_enter(&zfsvfs->z_znodes_lock);
POINTER_INVALIDATE(&zp->z_zfsvfs);
list_remove(&zfsvfs->z_all_znodes, zp);
Modified: releng/8.4/sys/kern/kern_thread.c
==============================================================================
--- releng/8.4/sys/kern/kern_thread.c Mon Apr 15 19:32:14 2013 (r249523)
+++ releng/8.4/sys/kern/kern_thread.c Mon Apr 15 19:45:09 2013 (r249524)
@@ -159,6 +159,7 @@ thread_init(void *mem, int size, int fla
td->td_sleepqueue = sleepq_alloc();
td->td_turnstile = turnstile_alloc();
+ td->td_vp_reserv = 0;
EVENTHANDLER_INVOKE(thread_init, td);
td->td_sched = (struct td_sched *)&td[1];
umtx_thread_init(td);
Modified: releng/8.4/sys/kern/subr_trap.c
==============================================================================
--- releng/8.4/sys/kern/subr_trap.c Mon Apr 15 19:32:14 2013 (r249523)
+++ releng/8.4/sys/kern/subr_trap.c Mon Apr 15 19:45:09 2013 (r249524)
@@ -132,6 +132,8 @@ userret(struct thread *td, struct trapfr
sched_userret(td);
KASSERT(td->td_locks == 0,
("userret: Returning with %d locks held.", td->td_locks));
+ KASSERT(td->td_vp_reserv == 0,
+ ("userret: Returning while holding vnode reservation"));
#ifdef VIMAGE
/* Unfortunately td_vnet_lpush needs VNET_DEBUG. */
VNET_ASSERT(curvnet == NULL,
Modified: releng/8.4/sys/kern/vfs_subr.c
==============================================================================
--- releng/8.4/sys/kern/vfs_subr.c Mon Apr 15 19:32:14 2013 (r249523)
+++ releng/8.4/sys/kern/vfs_subr.c Mon Apr 15 19:45:09 2013 (r249524)
@@ -977,34 +977,22 @@ vtryrecycle(struct vnode *vp)
}
/*
- * Return the next vnode from the free list.
+ * Wait for available vnodes.
*/
-int
-getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
- struct vnode **vpp)
+static int
+getnewvnode_wait(int suspended)
{
- struct vnode *vp = NULL;
- struct bufobj *bo;
- CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag);
- mtx_lock(&vnode_free_list_mtx);
- /*
- * Lend our context to reclaim vnodes if they've exceeded the max.
- */
- if (freevnodes > wantfreevnodes)
- vnlru_free(1);
- /*
- * Wait for available vnodes.
- */
+ mtx_assert(&vnode_free_list_mtx, MA_OWNED);
if (numvnodes > desiredvnodes) {
- if (mp != NULL && (mp->mnt_kern_flag & MNTK_SUSPEND)) {
+ if (suspended) {
/*
* File system is beeing suspended, we cannot risk a
* deadlock here, so allocate new vnode anyway.
*/
if (freevnodes > wantfreevnodes)
vnlru_free(freevnodes - wantfreevnodes);
- goto alloc;
+ return (0);
}
if (vnlruproc_sig == 0) {
vnlruproc_sig = 1; /* avoid unnecessary wakeups */
@@ -1012,16 +1000,76 @@ getnewvnode(const char *tag, struct moun
}
msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS,
"vlruwk", hz);
-#if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */
- if (numvnodes > desiredvnodes) {
- mtx_unlock(&vnode_free_list_mtx);
- return (ENFILE);
+ }
+ return (numvnodes > desiredvnodes ? ENFILE : 0);
+}
+
+void
+getnewvnode_reserve(u_int count)
+{
+ struct thread *td;
+
+ td = curthread;
+ mtx_lock(&vnode_free_list_mtx);
+ while (count > 0) {
+ if (getnewvnode_wait(0) == 0) {
+ count--;
+ td->td_vp_reserv++;
+ numvnodes++;
}
-#endif
}
-alloc:
+ mtx_unlock(&vnode_free_list_mtx);
+}
+
+void
+getnewvnode_drop_reserve(void)
+{
+ struct thread *td;
+
+ td = curthread;
+ mtx_lock(&vnode_free_list_mtx);
+ KASSERT(numvnodes >= td->td_vp_reserv, ("reserve too large"));
+ numvnodes -= td->td_vp_reserv;
+ mtx_unlock(&vnode_free_list_mtx);
+ td->td_vp_reserv = 0;
+}
+
+/*
+ * Return the next vnode from the free list.
+ */
+int
+getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
+ struct vnode **vpp)
+{
+ struct vnode *vp;
+ struct bufobj *bo;
+ struct thread *td;
+ int error;
+
+ CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag);
+ vp = NULL;
+ td = curthread;
+ if (td->td_vp_reserv > 0) {
+ td->td_vp_reserv -= 1;
+ goto alloc;
+ }
+ mtx_lock(&vnode_free_list_mtx);
+ /*
+ * Lend our context to reclaim vnodes if they've exceeded the max.
+ */
+ if (freevnodes > wantfreevnodes)
+ vnlru_free(1);
+ error = getnewvnode_wait(mp != NULL && (mp->mnt_kern_flag &
+ MNTK_SUSPEND));
+#if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */
+ if (error != 0) {
+ mtx_unlock(&vnode_free_list_mtx);
+ return (error);
+ }
+#endif
numvnodes++;
mtx_unlock(&vnode_free_list_mtx);
+alloc:
vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO);
/*
* Setup locks.
Modified: releng/8.4/sys/sys/proc.h
==============================================================================
--- releng/8.4/sys/sys/proc.h Mon Apr 15 19:32:14 2013 (r249523)
+++ releng/8.4/sys/sys/proc.h Mon Apr 15 19:45:09 2013 (r249524)
@@ -308,6 +308,7 @@ struct thread {
struct rusage_ext td_rux; /* (t) Internal rusage information. */
struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */
pid_t td_dbg_forked; /* (c) Child pid for debugger. */
+ u_int td_vp_reserv; /* (k) Count of reserved vnodes. */
};
struct mtx *thread_lock_block(struct thread *);
Modified: releng/8.4/sys/sys/vnode.h
==============================================================================
--- releng/8.4/sys/sys/vnode.h Mon Apr 15 19:32:14 2013 (r249523)
+++ releng/8.4/sys/sys/vnode.h Mon Apr 15 19:45:09 2013 (r249524)
@@ -604,6 +604,8 @@ void cvtstat(struct stat *st, struct ost
void cvtnstat(struct stat *sb, struct nstat *nsb);
int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
struct vnode **vpp);
+void getnewvnode_reserve(u_int count);
+void getnewvnode_drop_reserve(void);
int insmntque1(struct vnode *vp, struct mount *mp,
void (*dtr)(struct vnode *, void *), void *dtr_arg);
int insmntque(struct vnode *vp, struct mount *mp);
More information about the svn-src-releng
mailing list