svn commit: r248547 - in stable/9: cddl/contrib/opensolaris/cmd/ztest sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys
Martin Matuska
mm at FreeBSD.org
Wed Mar 20 08:26:18 UTC 2013
Author: mm
Date: Wed Mar 20 08:26:16 2013
New Revision: 248547
URL: http://svnweb.freebsd.org/changeset/base/248547
Log:
MFC r242845, r247592:
MFC r242845 (delphij):
Illumos r13840:97fd5cdf328a:
3145 single-copy arc
3212 ztest: race condition between vdev_online() and spa_vdev_remove()
Illumos r13849:3468a95b27cd:
3258 ztest's use of file descriptors is unstable
MFC r247592 (delphij):
Import a fix tighten assertion on SPA versions from vendor (Illumos).
Illumos ZFS issue:
3543 Feature flags causes assertion in spa.c to miss certain cases
Approved by: delphij
Modified:
stable/9/cddl/contrib/opensolaris/cmd/ztest/ztest.c
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
Directory Properties:
stable/9/cddl/contrib/opensolaris/ (props changed)
stable/9/sys/cddl/contrib/opensolaris/ (props changed)
Modified: stable/9/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- stable/9/cddl/contrib/opensolaris/cmd/ztest/ztest.c Wed Mar 20 07:20:00 2013 (r248546)
+++ stable/9/cddl/contrib/opensolaris/cmd/ztest/ztest.c Wed Mar 20 08:26:16 2013 (r248547)
@@ -121,8 +121,8 @@
#include <sys/fs/zfs.h>
#include <libnvpair.h>
-#define ZTEST_FD_DATA 3
-#define ZTEST_FD_RAND 4
+static int ztest_fd_data = -1;
+static int ztest_fd_rand = -1;
typedef struct ztest_shared_hdr {
uint64_t zh_hdr_size;
@@ -713,14 +713,17 @@ process_options(int argc, char **argv)
UINT64_MAX >> 2);
if (strlen(altdir) > 0) {
- char cmd[MAXNAMELEN];
- char realaltdir[MAXNAMELEN];
+ char *cmd;
+ char *realaltdir;
char *bin;
char *ztest;
char *isa;
int isalen;
- (void) realpath(getexecname(), cmd);
+ cmd = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
+ realaltdir = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
+
+ VERIFY(NULL != realpath(getexecname(), cmd));
if (0 != access(altdir, F_OK)) {
ztest_dump_core = B_FALSE;
fatal(B_TRUE, "invalid alternate ztest path: %s",
@@ -751,6 +754,9 @@ process_options(int argc, char **argv)
fatal(B_TRUE, "invalid alternate lib directory %s",
zo->zo_alt_libpath);
}
+
+ umem_free(cmd, MAXPATHLEN);
+ umem_free(realaltdir, MAXPATHLEN);
}
}
@@ -767,10 +773,12 @@ ztest_random(uint64_t range)
{
uint64_t r;
+ ASSERT3S(ztest_fd_rand, >=, 0);
+
if (range == 0)
return (0);
- if (read(ZTEST_FD_RAND, &r, sizeof (r)) != sizeof (r))
+ if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
fatal(1, "short read from /dev/urandom");
return (r % range);
@@ -4836,7 +4844,18 @@ ztest_fault_inject(ztest_ds_t *zd, uint6
if (islog)
(void) rw_unlock(&ztest_name_lock);
} else {
+ /*
+ * Ideally we would like to be able to randomly
+ * call vdev_[on|off]line without holding locks
+ * to force unpredictable failures but the side
+ * effects of vdev_[on|off]line prevent us from
+ * doing so. We grab the ztest_vdev_lock here to
+ * prevent a race between injection testing and
+ * aux_vdev removal.
+ */
+ VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
(void) vdev_online(spa, guid0, 0, NULL);
+ VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
}
}
@@ -5795,29 +5814,16 @@ ztest_init(ztest_shared_t *zs)
}
static void
-setup_fds(void)
+setup_data_fd(void)
{
- int fd;
-#ifdef illumos
-
- char *tmp = tempnam(NULL, NULL);
- fd = open(tmp, O_RDWR | O_CREAT, 0700);
- ASSERT3U(fd, ==, ZTEST_FD_DATA);
- (void) unlink(tmp);
- free(tmp);
-#else
- char tmp[MAXPATHLEN];
-
- strlcpy(tmp, ztest_opts.zo_dir, MAXPATHLEN);
- strlcat(tmp, "/ztest.XXXXXX", MAXPATHLEN);
- fd = mkstemp(tmp);
- ASSERT3U(fd, ==, ZTEST_FD_DATA);
-#endif
+ static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX";
- fd = open("/dev/urandom", O_RDONLY);
- ASSERT3U(fd, ==, ZTEST_FD_RAND);
+ ztest_fd_data = mkstemp(ztest_name_data);
+ ASSERT3S(ztest_fd_data, >=, 0);
+ (void) unlink(ztest_name_data);
}
+
static int
shared_data_size(ztest_shared_hdr_t *hdr)
{
@@ -5838,15 +5844,11 @@ setup_hdr(void)
int size;
ztest_shared_hdr_t *hdr;
-#ifndef illumos
- pwrite(ZTEST_FD_DATA, "", 1, 0);
-#endif
-
hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
- PROT_READ | PROT_WRITE, MAP_SHARED, ZTEST_FD_DATA, 0);
+ PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
ASSERT(hdr != MAP_FAILED);
- VERIFY3U(0, ==, ftruncate(ZTEST_FD_DATA, sizeof (ztest_shared_hdr_t)));
+ VERIFY3U(0, ==, ftruncate(ztest_fd_data, sizeof (ztest_shared_hdr_t)));
hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t);
hdr->zh_opts_size = sizeof (ztest_shared_opts_t);
@@ -5857,7 +5859,7 @@ setup_hdr(void)
hdr->zh_ds_count = ztest_opts.zo_datasets;
size = shared_data_size(hdr);
- VERIFY3U(0, ==, ftruncate(ZTEST_FD_DATA, size));
+ VERIFY3U(0, ==, ftruncate(ztest_fd_data, size));
(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
}
@@ -5870,14 +5872,14 @@ setup_data(void)
uint8_t *buf;
hdr = (void *)mmap(0, P2ROUNDUP(sizeof (*hdr), getpagesize()),
- PROT_READ, MAP_SHARED, ZTEST_FD_DATA, 0);
+ PROT_READ, MAP_SHARED, ztest_fd_data, 0);
ASSERT(hdr != MAP_FAILED);
size = shared_data_size(hdr);
(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()),
- PROT_READ | PROT_WRITE, MAP_SHARED, ZTEST_FD_DATA, 0);
+ PROT_READ | PROT_WRITE, MAP_SHARED, ztest_fd_data, 0);
ASSERT(hdr != MAP_FAILED);
buf = (uint8_t *)hdr;
@@ -5896,12 +5898,13 @@ exec_child(char *cmd, char *libpath, boo
{
pid_t pid;
int status;
- char cmdbuf[MAXPATHLEN];
+ char *cmdbuf = NULL;
pid = fork();
if (cmd == NULL) {
- (void) strlcpy(cmdbuf, getexecname(), sizeof (cmdbuf));
+ cmdbuf = umem_alloc(MAXPATHLEN, UMEM_NOFAIL);
+ (void) strlcpy(cmdbuf, getexecname(), MAXPATHLEN);
cmd = cmdbuf;
}
@@ -5910,9 +5913,16 @@ exec_child(char *cmd, char *libpath, boo
if (pid == 0) { /* child */
char *emptyargv[2] = { cmd, NULL };
+ char fd_data_str[12];
struct rlimit rl = { 1024, 1024 };
(void) setrlimit(RLIMIT_NOFILE, &rl);
+
+ (void) close(ztest_fd_rand);
+ VERIFY3U(11, >=,
+ snprintf(fd_data_str, 12, "%d", ztest_fd_data));
+ VERIFY0(setenv("ZTEST_FD_DATA", fd_data_str, 1));
+
(void) enable_extended_FILE_stdio(-1, -1);
if (libpath != NULL)
VERIFY(0 == setenv("LD_LIBRARY_PATH", libpath, 1));
@@ -5925,6 +5935,11 @@ exec_child(char *cmd, char *libpath, boo
fatal(B_TRUE, "exec failed: %s", cmd);
}
+ if (cmdbuf != NULL) {
+ umem_free(cmdbuf, MAXPATHLEN);
+ cmd = NULL;
+ }
+
while (waitpid(pid, &status, 0) != pid)
continue;
if (statusp != NULL)
@@ -5989,39 +6004,41 @@ main(int argc, char **argv)
char timebuf[100];
char numbuf[6];
spa_t *spa;
- char cmd[MAXNAMELEN];
+ char *cmd;
boolean_t hasalt;
-
- boolean_t ischild = (0 == lseek(ZTEST_FD_DATA, 0, SEEK_CUR));
- ASSERT(ischild || errno == EBADF);
+ char *fd_data_str = getenv("ZTEST_FD_DATA");
(void) setvbuf(stdout, NULL, _IOLBF, 0);
dprintf_setup(&argc, argv);
- if (!ischild) {
+ ztest_fd_rand = open("/dev/urandom", O_RDONLY);
+ ASSERT3S(ztest_fd_rand, >=, 0);
+
+ if (!fd_data_str) {
process_options(argc, argv);
- setup_fds();
+ setup_data_fd();
setup_hdr();
setup_data();
bcopy(&ztest_opts, ztest_shared_opts,
sizeof (*ztest_shared_opts));
} else {
+ ztest_fd_data = atoi(fd_data_str);
setup_data();
bcopy(ztest_shared_opts, &ztest_opts, sizeof (ztest_opts));
}
ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count);
/* Override location of zpool.cache */
- (void) asprintf((char **)&spa_config_path, "%s/zpool.cache",
- ztest_opts.zo_dir);
+ VERIFY3U(asprintf((char **)&spa_config_path, "%s/zpool.cache",
+ ztest_opts.zo_dir), !=, -1);
ztest_ds = umem_alloc(ztest_opts.zo_datasets * sizeof (ztest_ds_t),
UMEM_NOFAIL);
zs = ztest_shared;
- if (ischild) {
+ if (fd_data_str) {
metaslab_gang_bang = ztest_opts.zo_metaslab_gang_bang;
metaslab_df_alloc_threshold =
zs->zs_metaslab_df_alloc_threshold;
@@ -6044,7 +6061,8 @@ main(int argc, char **argv)
(u_longlong_t)ztest_opts.zo_time);
}
- (void) strlcpy(cmd, getexecname(), sizeof (cmd));
+ cmd = umem_alloc(MAXNAMELEN, UMEM_NOFAIL);
+ (void) strlcpy(cmd, getexecname(), MAXNAMELEN);
zs->zs_do_init = B_TRUE;
if (strlen(ztest_opts.zo_alt_ztest) != 0) {
@@ -6185,5 +6203,7 @@ main(int argc, char **argv)
kills, iters - kills, (100.0 * kills) / MAX(1, iters));
}
+ umem_free(cmd, MAXNAMELEN);
+
return (0);
}
Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Wed Mar 20 07:20:00 2013 (r248546)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c Wed Mar 20 08:26:16 2013 (r248547)
@@ -191,6 +191,7 @@ uint64_t zfs_arc_meta_limit = 0;
int zfs_arc_grow_retry = 0;
int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
+int zfs_disable_dup_eviction = 0;
TUNABLE_QUAD("vfs.zfs.arc_max", &zfs_arc_max);
TUNABLE_QUAD("vfs.zfs.arc_min", &zfs_arc_min);
@@ -321,7 +322,6 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_io_error;
kstat_named_t arcstat_l2_size;
kstat_named_t arcstat_l2_hdr_size;
- kstat_named_t arcstat_memory_throttle_count;
kstat_named_t arcstat_l2_write_trylock_fail;
kstat_named_t arcstat_l2_write_passed_headroom;
kstat_named_t arcstat_l2_write_spa_mismatch;
@@ -334,6 +334,10 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_write_buffer_bytes_scanned;
kstat_named_t arcstat_l2_write_buffer_list_iter;
kstat_named_t arcstat_l2_write_buffer_list_null_iter;
+ kstat_named_t arcstat_memory_throttle_count;
+ kstat_named_t arcstat_duplicate_buffers;
+ kstat_named_t arcstat_duplicate_buffers_size;
+ kstat_named_t arcstat_duplicate_reads;
} arc_stats_t;
static arc_stats_t arc_stats = {
@@ -391,7 +395,6 @@ static arc_stats_t arc_stats = {
{ "l2_io_error", KSTAT_DATA_UINT64 },
{ "l2_size", KSTAT_DATA_UINT64 },
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
- { "memory_throttle_count", KSTAT_DATA_UINT64 },
{ "l2_write_trylock_fail", KSTAT_DATA_UINT64 },
{ "l2_write_passed_headroom", KSTAT_DATA_UINT64 },
{ "l2_write_spa_mismatch", KSTAT_DATA_UINT64 },
@@ -403,7 +406,11 @@ static arc_stats_t arc_stats = {
{ "l2_write_pios", KSTAT_DATA_UINT64 },
{ "l2_write_buffer_bytes_scanned", KSTAT_DATA_UINT64 },
{ "l2_write_buffer_list_iter", KSTAT_DATA_UINT64 },
- { "l2_write_buffer_list_null_iter", KSTAT_DATA_UINT64 }
+ { "l2_write_buffer_list_null_iter", KSTAT_DATA_UINT64 },
+ { "memory_throttle_count", KSTAT_DATA_UINT64 },
+ { "duplicate_buffers", KSTAT_DATA_UINT64 },
+ { "duplicate_buffers_size", KSTAT_DATA_UINT64 },
+ { "duplicate_reads", KSTAT_DATA_UINT64 }
};
#define ARCSTAT(stat) (arc_stats.stat.value.ui64)
@@ -1516,6 +1523,17 @@ arc_buf_clone(arc_buf_t *from)
hdr->b_buf = buf;
arc_get_data_buf(buf);
bcopy(from->b_data, buf->b_data, size);
+
+ /*
+ * This buffer already exists in the arc so create a duplicate
+ * copy for the caller. If the buffer is associated with user data
+ * then track the size and number of duplicates. These stats will be
+ * updated as duplicate buffers are created and destroyed.
+ */
+ if (hdr->b_type == ARC_BUFC_DATA) {
+ ARCSTAT_BUMP(arcstat_duplicate_buffers);
+ ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
+ }
hdr->b_datacnt += 1;
return (buf);
}
@@ -1616,6 +1634,16 @@ arc_buf_destroy(arc_buf_t *buf, boolean_
ASSERT3U(state->arcs_size, >=, size);
atomic_add_64(&state->arcs_size, -size);
buf->b_data = NULL;
+
+ /*
+ * If we're destroying a duplicate buffer make sure
+ * that the appropriate statistics are updated.
+ */
+ if (buf->b_hdr->b_datacnt > 1 &&
+ buf->b_hdr->b_type == ARC_BUFC_DATA) {
+ ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
+ ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
+ }
ASSERT(buf->b_hdr->b_datacnt > 0);
buf->b_hdr->b_datacnt -= 1;
}
@@ -1800,6 +1828,48 @@ arc_buf_size(arc_buf_t *buf)
}
/*
+ * Called from the DMU to determine if the current buffer should be
+ * evicted. In order to ensure proper locking, the eviction must be initiated
+ * from the DMU. Return true if the buffer is associated with user data and
+ * duplicate buffers still exist.
+ */
+boolean_t
+arc_buf_eviction_needed(arc_buf_t *buf)
+{
+ arc_buf_hdr_t *hdr;
+ boolean_t evict_needed = B_FALSE;
+
+ if (zfs_disable_dup_eviction)
+ return (B_FALSE);
+
+ mutex_enter(&buf->b_evict_lock);
+ hdr = buf->b_hdr;
+ if (hdr == NULL) {
+ /*
+ * We are in arc_do_user_evicts(); let that function
+ * perform the eviction.
+ */
+ ASSERT(buf->b_data == NULL);
+ mutex_exit(&buf->b_evict_lock);
+ return (B_FALSE);
+ } else if (buf->b_data == NULL) {
+ /*
+ * We have already been added to the arc eviction list;
+ * recommend eviction.
+ */
+ ASSERT3P(hdr, ==, &arc_eviction_hdr);
+ mutex_exit(&buf->b_evict_lock);
+ return (B_TRUE);
+ }
+
+ if (hdr->b_datacnt > 1 && hdr->b_type == ARC_BUFC_DATA)
+ evict_needed = B_TRUE;
+
+ mutex_exit(&buf->b_evict_lock);
+ return (evict_needed);
+}
+
+/*
* Evict buffers from list until we've removed the specified number of
* bytes. Move the removed buffers to the appropriate evict state.
* If the recycle flag is set, then attempt to "recycle" a buffer:
@@ -2885,8 +2955,10 @@ arc_read_done(zio_t *zio)
abuf = buf;
for (acb = callback_list; acb; acb = acb->acb_next) {
if (acb->acb_done) {
- if (abuf == NULL)
+ if (abuf == NULL) {
+ ARCSTAT_BUMP(arcstat_duplicate_reads);
abuf = arc_buf_clone(buf);
+ }
acb->acb_buf = abuf;
abuf = NULL;
}
@@ -3405,6 +3477,16 @@ arc_release(arc_buf_t *buf, void *tag)
ASSERT3U(*size, >=, hdr->b_size);
atomic_add_64(size, -hdr->b_size);
}
+
+ /*
+ * We're releasing a duplicate user data buffer, update
+ * our statistics accordingly.
+ */
+ if (hdr->b_type == ARC_BUFC_DATA) {
+ ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
+ ARCSTAT_INCR(arcstat_duplicate_buffers_size,
+ -hdr->b_size);
+ }
hdr->b_datacnt -= 1;
arc_cksum_verify(buf);
#ifdef illumos
Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Wed Mar 20 07:20:00 2013 (r248546)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Wed Mar 20 08:26:16 2013 (r248547)
@@ -2071,7 +2071,24 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db,
dbuf_evict(db);
} else {
VERIFY(arc_buf_remove_ref(db->db_buf, db) == 0);
- if (!DBUF_IS_CACHEABLE(db))
+
+ /*
+ * A dbuf will be eligible for eviction if either the
+ * 'primarycache' property is set or a duplicate
+ * copy of this buffer is already cached in the arc.
+ *
+ * In the case of the 'primarycache' a buffer
+ * is considered for eviction if it matches the
+ * criteria set in the property.
+ *
+ * To decide if our buffer is considered a
+ * duplicate, we must call into the arc to determine
+ * if multiple buffers are referencing the same
+ * block on-disk. If so, then we simply evict
+ * ourselves.
+ */
+ if (!DBUF_IS_CACHEABLE(db) ||
+ arc_buf_eviction_needed(db->db_buf))
dbuf_clear(db);
else
mutex_exit(&db->db_mtx);
Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Wed Mar 20 07:20:00 2013 (r248546)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c Wed Mar 20 08:26:16 2013 (r248547)
@@ -6001,7 +6001,7 @@ spa_sync_version(void *arg1, void *arg2,
*/
ASSERT(tx->tx_txg != TXG_INITIAL);
- ASSERT(version <= SPA_VERSION);
+ ASSERT(SPA_VERSION_IS_SUPPORTED(version));
ASSERT(version >= spa_version(spa));
spa->spa_uberblock.ub_version = version;
@@ -6542,7 +6542,7 @@ spa_upgrade(spa_t *spa, uint64_t version
* future version would result in an unopenable pool, this shouldn't be
* possible.
*/
- ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION);
+ ASSERT(SPA_VERSION_IS_SUPPORTED(spa->spa_uberblock.ub_version));
ASSERT(version >= spa->spa_uberblock.ub_version);
spa->spa_uberblock.ub_version = version;
Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h Wed Mar 20 07:20:00 2013 (r248546)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/arc.h Wed Mar 20 08:26:16 2013 (r248547)
@@ -96,6 +96,7 @@ int arc_released(arc_buf_t *buf);
int arc_has_callback(arc_buf_t *buf);
void arc_buf_freeze(arc_buf_t *buf);
void arc_buf_thaw(arc_buf_t *buf);
+boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
#ifdef ZFS_DEBUG
int arc_referenced(arc_buf_t *buf);
#endif
More information about the svn-src-stable-9
mailing list