kern/164261: [patch] fix panic with NFS served from NULLFS
Kostik Belousov
kostikbel at gmail.com
Wed Jan 18 18:58:06 UTC 2012
On Wed, Jan 18, 2012 at 02:07:27PM +0400, Eygene Ryabinkin wrote:
> Konstantin, good day.
>
> Wed, Jan 18, 2012 at 06:14:39AM +0200, Kostik Belousov wrote:
> > On Wed, Jan 18, 2012 at 12:28:53AM +0400, Eygene Ryabinkin wrote:
> > > Patches
> > > http://codelabs.ru/fbsd/prs/2012-jan-nullfs-LK_SHARED/0001-NULLFS-properly-destroy-node-hash.patch
> > This one is probably fine, assuming that the hashes are properly cleared
> > on unmount. Feel free to commit.
>
> Will do, thanks!
>
> > > and
> > > http://codelabs.ru/fbsd/prs/2012-jan-nullfs-LK_SHARED/0002-NULLFS-fix-panics-when-lowervp-is-locked-with-LK_SHA.patch
> > > will fix the problem (in reality, the first patch is just some
> > > nitpicking).
> > And I do not even read this.
> >
> > The issue that the backtrace is pointing to seems to be the misuse of vrele(),
> > after the vnode lock is switched to null vnode v_lock. Since the vnode that
> > is being thrown out is exclusively locked, cleanup path shall do vput()
> > instead of vrele().
>
> The short story: at this vrele, vp had returned to its own v_lock as v_vnlock,
> so it is unlocked here.
>
>
> The long story with some thoughts and questions. If vput() path will
> end up in null_reclaim(), this seems to be unhelpful:
>
> - VOP_RECLAIM() expects exclusively locked vnode, since it was instructed
> to by vnode_if.src and thus vnode_if.c has ASSERT_VOP_ELOCKED(a->a_vp)
> in VOP_RECLAIM_APV(); for nullfs vop_islocked is vop_stdislocked() and
> it checks the lock status of v_vnlock, so anything that comes to null_reclaim
> will be exclusively locked with *v_vnlock;
>
> - null_reclaim() has the following code,
> {{{
> struct vnode *vp = ap->a_vp;
> [...]
> lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL)
> [...]]
> }}}
> And when vp->v_lock is equal to *v_vnlock, this will lead to the
> lockmgr panic, because the thread tries to exclusively lock the
> object that was already locked by itself and has no recursion rights.
>
> If anyone sees flaws in this explanations, please, point me to them.
Ok, real flaw there is the attempt to treat half-constructed vnode as
the fully-constructed one, later. It shall be decommissioned with the
same code as does the insmntque1(). The complication is the fact that
the vnode can be found on the mount list, but we only search for a vnode
by hash.
>
>
> I had recompiled the kernel with your vrele -> vput change inside
> null_reclaim and with DEBUG_VFS_LOCKS: it resulted in the lock violation
> from insmntque,
> http://codelabs.ru/fbsd/prs/2012-jan-nullfs-LK_SHARED/DEBUG_VFS_LOCKS-panic.txt
> So, it hadn't got to the point where null_reclaim() will come to the game.
>
> The problem is that insmntque1() wants the passed vnode to be exclusively
> locked, but nfsrvd_readdirplus() uses LK_SHARED.
>
> By the way, the ASSERT_VOP_ELOCKED() was introduced in r182364 by you:
> why do you insist on exclusive lock for MP-safe fs? The current interplay
> of NFS and NULLFS makes me think that either some of filesystems isn't really
> MP-safe, or the requirement for exclusive locking can be relaxed.
insmntque1() requires the exclusively locked vnode, because
the function modifies the vnode (it inserts the vnode into mount list).
nfsd is right there, nullfs is not. The filesystem shall ensure the
proper locking if the requested mode is not strong enough. See how the
UFS treats the lock flags if ffs_vgetf(): shared is only honored if the
vnode is found in hash. So this is another bug, nullfs must switch to
exclusive lock there.
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index 319e404..dd4ab61 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -169,17 +169,26 @@ null_hashins(mp, xp)
}
static void
-null_insmntque_dtr(struct vnode *vp, void *xp)
+null_destroy_proto(struct vnode *vp, void *xp)
{
- vput(((struct null_node *)xp)->null_lowervp);
+ VI_LOCK(vp);
vp->v_data = NULL;
vp->v_vnlock = &vp->v_lock;
- free(xp, M_NULLFSNODE);
vp->v_op = &dead_vnodeops;
+ VI_UNLOCK(vp);
(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
vgone(vp);
vput(vp);
+ free(xp, M_NULLFSNODE);
+}
+
+static void
+null_insmntque_dtr(struct vnode *vp, void *xp)
+{
+
+ vput(((struct null_node *)xp)->null_lowervp);
+ null_destroy_proto(vp, xp);
}
/*
@@ -250,9 +259,7 @@ null_nodeget(mp, lowervp, vpp)
*vpp = null_hashins(mp, xp);
if (*vpp != NULL) {
vrele(lowervp);
- vp->v_vnlock = &vp->v_lock;
- xp->null_lowervp = NULL;
- vrele(vp);
+ null_destroy_proto(vp, xp);
return (0);
}
*vpp = vp;
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
index cf3176f..d39926f 100644
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -307,6 +307,12 @@ nullfs_vget(mp, ino, flags, vpp)
struct vnode **vpp;
{
int error;
+
+ KASSERT((flags & LK_TYPE_MASK) != 0,
+ ("nullfs_vget: no lock requested"));
+ flags &= ~LK_TYPE_MASK;
+ flags |= LK_EXCLUSIVE;
+
error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, flags, vpp);
if (error)
return (error);
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index e0645bd..b607666 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -697,12 +697,18 @@ null_inactive(struct vop_inactive_args *ap)
static int
null_reclaim(struct vop_reclaim_args *ap)
{
- struct vnode *vp = ap->a_vp;
- struct null_node *xp = VTONULL(vp);
- struct vnode *lowervp = xp->null_lowervp;
+ struct vnode *vp;
+ struct null_node *xp;
+ struct vnode *lowervp;
+
+ vp = ap->a_vp;
+ xp = VTONULL(vp);
+ lowervp = xp->null_lowervp;
+
+ KASSERT(lowervp != NULL && vp->v_vnlock != &vp->v_lock,
+ ("Reclaiming inclomplete null vnode %p", vp));
- if (lowervp)
- null_hashrem(xp);
+ null_hashrem(xp);
/*
* Use the interlock to protect the clearing of v_data to
* prevent faults in null_lock().
@@ -713,10 +719,7 @@ null_reclaim(struct vop_reclaim_args *ap)
vp->v_object = NULL;
vp->v_vnlock = &vp->v_lock;
VI_UNLOCK(vp);
- if (lowervp)
- vput(lowervp);
- else
- panic("null_reclaim: reclaiming a node with no lowervp");
+ vput(lowervp);
free(xp, M_NULLFSNODE);
return (0);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-fs/attachments/20120118/10e7c649/attachment.pgp
More information about the freebsd-fs
mailing list