svn commit: r275492 - stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
Xin LI
delphij at FreeBSD.org
Fri Dec 5 00:32:34 UTC 2014
Author: delphij
Date: Fri Dec 5 00:32:33 2014
New Revision: 275492
URL: https://svnweb.freebsd.org/changeset/base/275492
Log:
MFC r274172 (avg)
fix l2arc compression buffers leak
We have observed that arc_release() can be called concurrently with a
l2arc in-flight write.
Also, we have observed that arc_hdr_destroy() can be called from
arc_write_done() for a zio with ZIO_FLAG_IO_REWRITE flag in similar
circumstances.
Previously the l2arc headers would be freed while leaking their
associated compression buffers. Now the buffers are placed on
l2arc_free_on_write list for delayed freeing. This is similar to what
was already done to arc buffers that were supposed to be freed
concurrently with in-flight writes of those buffers.
In addition to fixing the discovered leaks this change also adds some
protective code to assert that a compression buffer associated with a
l2arc header is never leaked.
A new kstat l2_cdata_free_on_write is added. It keeps a count of
delayed compression buffer frees which previously would have been leaks.
Tested by: Vitalij Satanivskij <satan at ukr.net> et al
Requested by: many
Sponsored by: HybridCluster / ClusterHQ
This is a 10.1-RELEASE errata candidate.
Modified:
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
Directory Properties:
stable/10/ (props changed)
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Thu Dec 4 23:27:29 2014 (r275491)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Fri Dec 5 00:32:33 2014 (r275492)
@@ -390,6 +390,7 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_evict_lock_retry;
kstat_named_t arcstat_l2_evict_reading;
kstat_named_t arcstat_l2_free_on_write;
+ kstat_named_t arcstat_l2_cdata_free_on_write;
kstat_named_t arcstat_l2_abort_lowmem;
kstat_named_t arcstat_l2_cksum_bad;
kstat_named_t arcstat_l2_io_error;
@@ -467,6 +468,7 @@ static arc_stats_t arc_stats = {
{ "l2_evict_lock_retry", KSTAT_DATA_UINT64 },
{ "l2_evict_reading", KSTAT_DATA_UINT64 },
{ "l2_free_on_write", KSTAT_DATA_UINT64 },
+ { "l2_cdata_free_on_write", KSTAT_DATA_UINT64 },
{ "l2_abort_lowmem", KSTAT_DATA_UINT64 },
{ "l2_cksum_bad", KSTAT_DATA_UINT64 },
{ "l2_io_error", KSTAT_DATA_UINT64 },
@@ -1658,6 +1660,21 @@ arc_buf_add_ref(arc_buf_t *buf, void* ta
data, metadata, hits);
}
+static void
+arc_buf_free_on_write(void *data, size_t size,
+ void (*free_func)(void *, size_t))
+{
+ l2arc_data_free_t *df;
+
+ df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
+ df->l2df_data = data;
+ df->l2df_size = size;
+ df->l2df_func = free_func;
+ mutex_enter(&l2arc_free_on_write_mtx);
+ list_insert_head(l2arc_free_on_write, df);
+ mutex_exit(&l2arc_free_on_write_mtx);
+}
+
/*
* Free the arc data buffer. If it is an l2arc write in progress,
* the buffer is placed on l2arc_free_on_write to be freed later.
@@ -1668,14 +1685,7 @@ arc_buf_data_free(arc_buf_t *buf, void (
arc_buf_hdr_t *hdr = buf->b_hdr;
if (HDR_L2_WRITING(hdr)) {
- l2arc_data_free_t *df;
- df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
- df->l2df_data = buf->b_data;
- df->l2df_size = hdr->b_size;
- df->l2df_func = free_func;
- mutex_enter(&l2arc_free_on_write_mtx);
- list_insert_head(l2arc_free_on_write, df);
- mutex_exit(&l2arc_free_on_write_mtx);
+ arc_buf_free_on_write(buf->b_data, hdr->b_size, free_func);
ARCSTAT_BUMP(arcstat_l2_free_on_write);
} else {
free_func(buf->b_data, hdr->b_size);
@@ -1687,6 +1697,23 @@ arc_buf_data_free(arc_buf_t *buf, void (
* arc_buf_t off of the the arc_buf_hdr_t's list and free it.
*/
static void
+arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
+{
+ l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
+
+ ASSERT(MUTEX_HELD(&l2arc_buflist_mtx));
+
+ if (l2hdr->b_tmp_cdata == NULL)
+ return;
+
+ ASSERT(HDR_L2_WRITING(hdr));
+ arc_buf_free_on_write(l2hdr->b_tmp_cdata, hdr->b_size,
+ zio_data_buf_free);
+ ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
+ l2hdr->b_tmp_cdata = NULL;
+}
+
+static void
arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
{
arc_buf_t **bufp;
@@ -1785,6 +1812,7 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
trim_map_free(l2hdr->b_dev->l2ad_vdev, l2hdr->b_daddr,
hdr->b_size, 0);
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
+ arc_buf_l2_cdata_free(hdr);
ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
vdev_space_update(l2hdr->b_dev->l2ad_vdev,
@@ -3678,6 +3706,7 @@ arc_release(arc_buf_t *buf, void *tag)
l2hdr = hdr->b_l2hdr;
if (l2hdr) {
mutex_enter(&l2arc_buflist_mtx);
+ arc_buf_l2_cdata_free(hdr);
hdr->b_l2hdr = NULL;
list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
}
@@ -4967,6 +4996,11 @@ top:
ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
bytes_evicted += abl2->b_asize;
ab->b_l2hdr = NULL;
+ /*
+ * We are destroying l2hdr, so ensure that
+ * its compressed buffer, if any, is not leaked.
+ */
+ ASSERT(abl2->b_tmp_cdata == NULL);
kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
}
@@ -5205,6 +5239,14 @@ l2arc_write_buffers(spa_t *spa, l2arc_de
buf_data = l2hdr->b_tmp_cdata;
buf_sz = l2hdr->b_asize;
+ /*
+ * If the data has not been compressed, then clear b_tmp_cdata
+ * to make sure that it points only to a temporary compression
+ * buffer.
+ */
+ if (!L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress))
+ l2hdr->b_tmp_cdata = NULL;
+
/* Compression may have squashed the buffer to zero length. */
if (buf_sz != 0) {
uint64_t buf_p_sz;
@@ -5395,15 +5437,18 @@ l2arc_release_cdata_buf(arc_buf_hdr_t *a
{
l2arc_buf_hdr_t *l2hdr = ab->b_l2hdr;
- if (l2hdr->b_compress == ZIO_COMPRESS_LZ4) {
+ ASSERT(L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress));
+ if (l2hdr->b_compress != ZIO_COMPRESS_EMPTY) {
/*
* If the data was compressed, then we've allocated a
* temporary buffer for it, so now we need to release it.
*/
ASSERT(l2hdr->b_tmp_cdata != NULL);
zio_data_buf_free(l2hdr->b_tmp_cdata, ab->b_size);
+ l2hdr->b_tmp_cdata = NULL;
+ } else {
+ ASSERT(l2hdr->b_tmp_cdata == NULL);
}
- l2hdr->b_tmp_cdata = NULL;
}
/*
More information about the svn-src-stable-10
mailing list