svn commit: r318822 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Andriy Gapon
avg at FreeBSD.org
Wed May 24 21:45:54 UTC 2017
Author: avg
Date: Wed May 24 21:45:52 2017
New Revision: 318822
URL: https://svnweb.freebsd.org/changeset/base/318822
Log:
MFC r316913: 7869 panic in bpobj_space(): null pointer dereference
illumos/illumos-gate at a3905a45920de250d181b66ac0b6b71bd200d9ef
https://github.com/illumos/illumos-gate/commit/a3905a45920de250d181b66ac0b6b71bd200d9ef
https://www.illumos.org/issues/7869
The issue fixed by this patch is a race condition in the deadlist code.
A thread executing an administrative command that uses
`dsl_deadlist_space_range()` holds the lock of the whole `deadlist_t` to
protect the access of all its entries that the deadlist contains in an
avl tree.
Sync threads trying to insert a new entry in the deadlist
(through `dsl_deadlist_insert()` -> `dle_enqueue()`) do not hold the
deadlist lock at that moment. If the `dle_bpobj` is the empty bpobj (our
sentinel value), we close and reopen it. Between these two operations,
it is possible for the `dsl_deadlist_space_range()` thread to dereference
that bpobj which is `NULL` during that window.
Threads should hold the a deadlist's `dl_lock` when they manipulate its
internal data so scenarios like the one above are avoided. In addition,
threads should also hold the bpobj lock whenever they are allocating the
subobj list of a bpobj, and not just when they actually insert the subobj
to the list. This way we can avoid potential memory leaks.
Reviewed by: Matt Ahrens <mahrens at delphix.com>
Reviewed by: Dan Kimmel <dan.kimmel at delphix.com>
Reviewed by: Steve Gonczi <steve.gonczi at delphix.com>
Reviewed by: John Kennedy <john.kennedy at delphix.com>
Reviewed by: George Melikov <mail at gmelikov.ru>
Reviewed by: Brian Behlendorf <behlendorf1 at llnl.gov>
Approved by: Dan McDonald <danmcd at omniti.com>
Author: Serapheim Dimitropoulos <serapheim at delphix.com>
MFC after: 2 weeks
Modified:
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c
Directory Properties:
head/sys/cddl/contrib/opensolaris/ (props changed)
Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c Wed May 24 21:43:34 2017 (r318821)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bpobj.c Wed May 24 21:45:52 2017 (r318822)
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
*/
@@ -395,6 +395,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint6
return;
}
+ mutex_enter(&bpo->bpo_lock);
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
if (bpo->bpo_phys->bpo_subobjs == 0) {
bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
@@ -406,7 +407,6 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint6
ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
- mutex_enter(&bpo->bpo_lock);
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
sizeof (subobj), &subobj, tx);
Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c Wed May 24 21:43:34 2017 (r318821)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_deadlist.c Wed May 24 21:45:52 2017 (r318822)
@@ -72,6 +72,8 @@ dsl_deadlist_load_tree(dsl_deadlist_t *d
zap_cursor_t zc;
zap_attribute_t za;
+ ASSERT(MUTEX_HELD(&dl->dl_lock));
+
ASSERT(!dl->dl_oldfmt);
if (dl->dl_havetree)
return;
@@ -182,6 +184,7 @@ static void
dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
const blkptr_t *bp, dmu_tx_t *tx)
{
+ ASSERT(MUTEX_HELD(&dl->dl_lock));
if (dle->dle_bpobj.bpo_object ==
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
@@ -198,6 +201,7 @@ static void
dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
uint64_t obj, dmu_tx_t *tx)
{
+ ASSERT(MUTEX_HELD(&dl->dl_lock));
if (dle->dle_bpobj.bpo_object !=
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
@@ -222,15 +226,14 @@ dsl_deadlist_insert(dsl_deadlist_t *dl,
return;
}
+ mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
- mutex_enter(&dl->dl_lock);
dl->dl_phys->dl_used +=
bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
- mutex_exit(&dl->dl_lock);
dle_tofind.dle_mintxg = bp->blk_birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@@ -239,6 +242,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl,
else
dle = AVL_PREV(&dl->dl_tree, dle);
dle_enqueue(dl, dle, bp, tx);
+ mutex_exit(&dl->dl_lock);
}
/*
@@ -254,16 +258,19 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl,
if (dl->dl_oldfmt)
return;
- dsl_deadlist_load_tree(dl);
-
dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
dle->dle_mintxg = mintxg;
+
+ mutex_enter(&dl->dl_lock);
+ dsl_deadlist_load_tree(dl);
+
obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
avl_add(&dl->dl_tree, dle);
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
mintxg, obj, tx));
+ mutex_exit(&dl->dl_lock);
}
/*
@@ -278,6 +285,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *
if (dl->dl_oldfmt)
return;
+ mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
dle_tofind.dle_mintxg = mintxg;
@@ -291,6 +299,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *
kmem_free(dle, sizeof (*dle));
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
+ mutex_exit(&dl->dl_lock);
}
/*
@@ -334,6 +343,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, u
return (newobj);
}
+ mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
for (dle = avl_first(&dl->dl_tree); dle;
@@ -347,6 +357,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, u
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
dle->dle_mintxg, obj, tx));
}
+ mutex_exit(&dl->dl_lock);
return (newobj);
}
@@ -424,6 +435,8 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t
uint64_t used, comp, uncomp;
bpobj_t bpo;
+ ASSERT(MUTEX_HELD(&dl->dl_lock));
+
VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
bpobj_close(&bpo);
@@ -431,11 +444,9 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t
dsl_deadlist_load_tree(dl);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
- mutex_enter(&dl->dl_lock);
dl->dl_phys->dl_used += used;
dl->dl_phys->dl_comp += comp;
dl->dl_phys->dl_uncomp += uncomp;
- mutex_exit(&dl->dl_lock);
dle_tofind.dle_mintxg = birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@@ -475,6 +486,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, u
return;
}
+ mutex_enter(&dl->dl_lock);
for (zap_cursor_init(&zc, dl->dl_os, obj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
@@ -489,6 +501,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, u
dmu_buf_will_dirty(bonus, tx);
bzero(dlp, sizeof (*dlp));
dmu_buf_rele(bonus, FTAG);
+ mutex_exit(&dl->dl_lock);
}
/*
@@ -503,6 +516,8 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *
avl_index_t where;
ASSERT(!dl->dl_oldfmt);
+
+ mutex_enter(&dl->dl_lock);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
dsl_deadlist_load_tree(dl);
@@ -518,14 +533,12 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *
VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp));
- mutex_enter(&dl->dl_lock);
ASSERT3U(dl->dl_phys->dl_used, >=, used);
ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp);
dl->dl_phys->dl_used -= used;
dl->dl_phys->dl_comp -= comp;
dl->dl_phys->dl_uncomp -= uncomp;
- mutex_exit(&dl->dl_lock);
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
dle->dle_mintxg, tx));
@@ -536,4 +549,5 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *
kmem_free(dle, sizeof (*dle));
dle = dle_next;
}
+ mutex_exit(&dl->dl_lock);
}
More information about the svn-src-head
mailing list