svn commit: r263394 - in stable: 8/cddl/contrib/opensolaris/cmd/zdb 8/cddl/contrib/opensolaris/cmd/zpool 8/cddl/contrib/opensolaris/lib/libzpool/common 8/cddl/contrib/opensolaris/lib/libzpool/commo...
Xin LI
delphij at FreeBSD.org
Wed Mar 19 23:47:01 UTC 2014
Author: delphij
Date: Wed Mar 19 23:46:59 2014
New Revision: 263394
URL: http://svnweb.freebsd.org/changeset/base/263394
Log:
MFC r260138: MFV r242733:
3306 zdb should be able to issue reads in parallel
3321 'zpool reopen' command should be documented in the man page
and help message
illumos/illumos-gate at 31d7e8fa33fae995f558673adb22641b5aa8b6e1
FreeBSD porting notes: the kernel part of this changeset depends
on Solaris buf(9S) interfaces and are not really applicable for
our use. vdev_disk.c is patched as-is to reduce diverge from
upstream, but vdev_file.c is left intact.
Modified:
stable/9/cddl/contrib/opensolaris/cmd/zdb/zdb.8
stable/9/cddl/contrib/opensolaris/cmd/zdb/zdb.c
stable/9/cddl/contrib/opensolaris/cmd/zpool/zpool.8
stable/9/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
stable/9/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
stable/9/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
Directory Properties:
stable/9/cddl/contrib/opensolaris/ (props changed)
stable/9/cddl/contrib/opensolaris/cmd/zpool/ (props changed)
stable/9/sys/ (props changed)
stable/9/sys/cddl/contrib/opensolaris/ (props changed)
Changes in other areas also in this revision:
Modified:
stable/8/cddl/contrib/opensolaris/cmd/zdb/zdb.8
stable/8/cddl/contrib/opensolaris/cmd/zdb/zdb.c
stable/8/cddl/contrib/opensolaris/cmd/zpool/zpool.8
stable/8/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
stable/8/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
stable/8/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
stable/8/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
Directory Properties:
stable/8/cddl/contrib/opensolaris/ (props changed)
stable/8/cddl/contrib/opensolaris/cmd/zpool/ (props changed)
stable/8/sys/ (props changed)
stable/8/sys/cddl/ (props changed)
stable/8/sys/cddl/contrib/opensolaris/ (props changed)
Modified: stable/9/cddl/contrib/opensolaris/cmd/zdb/zdb.8
==============================================================================
--- stable/9/cddl/contrib/opensolaris/cmd/zdb/zdb.8 Wed Mar 19 23:44:03 2014 (r263393)
+++ stable/9/cddl/contrib/opensolaris/cmd/zdb/zdb.8 Wed Mar 19 23:46:59 2014 (r263394)
@@ -14,11 +14,12 @@
.\"
.\" Copyright 2012, Richard Lowe.
.\" Copyright (c) 2012, Marcelo Araujo <araujo at FreeBSD.org>.
+.\" Copyright (c) 2012 by Delphix. All rights reserved.
.\" All Rights Reserved.
.\"
.\" $FreeBSD$
.\"
-.Dd May 10, 2012
+.Dd December 31, 2013
.Dt ZDB 8
.Os
.Sh NAME
@@ -29,27 +30,35 @@
.Op Fl CumdibcsDvhLXFPA
.Op Fl e Op Fl p Ar path...
.Op Fl t Ar txg
+.Op Fl U Ar cache
+.Op Fl M Ar inflight I/Os
.Ar poolname
.Op Ar object ...
.Nm
.Op Fl divPA
.Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
.Ar dataset
.Op Ar object ...
.Nm
.Fl m Op Fl LXFPA
.Op Fl t Ar txg
.Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
.Ar poolname
.Nm
.Fl R Op Fl A
.Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
+.Ar poolname
.Ar poolname
.Ar vdev Ns : Ns Ar offset Ns : Ns Ar size Ns Op Ns : Ns Ar flags
.Nm
.Fl S
.Op Fl AP
.Op Fl e Op Fl p Ar path...
+.Op Fl U Ar cache
+.Ar poolname
.Ar poolname
.Nm
.Fl l
@@ -205,6 +214,11 @@ flag specifies the path under which devi
.It Fl F
Attempt to make an unreadable pool readable by trying progressively older
transactions.
+.It Fl M Ar inflight I/Os
+Limit the number of outstanding checksum I/Os to the specified value.
+The default value is 200. This option affects the performance of the
+.Fl c
+option.
.It Fl P
Print numbers in an unscaled form more amenable to parsing, eg. 1000000 rather
than 1M.
@@ -218,9 +232,7 @@ options for a means to see the available
transaction numbers.
.It Fl U Ar cachefile
Use a cache file other than
-.Pa /etc/zfs/zpool.cache .
-This option is only valid with
-.Fl C
+.Pa /boot/zfs/zpool.cache .
.It Fl v
Enable verbosity.
Specify multiple times for increased verbosity.
Modified: stable/9/cddl/contrib/opensolaris/cmd/zdb/zdb.c
==============================================================================
--- stable/9/cddl/contrib/opensolaris/cmd/zdb/zdb.c Wed Mar 19 23:44:03 2014 (r263393)
+++ stable/9/cddl/contrib/opensolaris/cmd/zdb/zdb.c Wed Mar 19 23:46:59 2014 (r263394)
@@ -89,6 +89,7 @@ extern void dump_intent_log(zilog_t *);
uint64_t *zopt_object = NULL;
int zopt_objects = 0;
libzfs_handle_t *g_zfs;
+uint64_t max_inflight = 200;
/*
* These libumem hooks provide a reasonable set of defaults for the allocator's
@@ -110,16 +111,17 @@ static void
usage(void)
{
(void) fprintf(stderr,
- "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]]"
- "poolname [object...]\n"
- " %s [-divPA] [-e -p path...] dataset [object...]\n"
- " %s -m [-LXFPA] [-t txg] [-e [-p path...]]"
- "poolname [vdev [metaslab...]]\n"
- " %s -R [-A] [-e [-p path...]] poolname "
- "vdev:offset:size[:flags]\n"
- " %s -S [-PA] [-e [-p path...]] poolname\n"
- " %s -l [-uA] device\n"
- " %s -C [-A] [-U config]\n\n",
+ "Usage: %s [-CumdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
+ "[-U config] [-M inflight I/Os] poolname [object...]\n"
+ " %s [-divPA] [-e -p path...] [-U config] dataset "
+ "[object...]\n"
+ " %s -m [-LXFPA] [-t txg] [-e [-p path...]] [-U config] "
+ "poolname [vdev [metaslab...]]\n"
+ " %s -R [-A] [-e [-p path...]] poolname "
+ "vdev:offset:size[:flags]\n"
+ " %s -S [-PA] [-e [-p path...]] [-U config] poolname\n"
+ " %s -l [-uA] device\n"
+ " %s -C [-A] [-U config]\n\n",
cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname);
(void) fprintf(stderr, " Dataset name must include at least one "
@@ -164,6 +166,8 @@ usage(void)
(void) fprintf(stderr, " -P print numbers in parseable form\n");
(void) fprintf(stderr, " -t <txg> -- highest txg to use when "
"searching for uberblocks\n");
+ (void) fprintf(stderr, " -M <number of inflight I/Os> -- "
+ "specify the maximum number of checksumming I/Os [default is 200]");
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
"to make only that option verbose\n");
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
@@ -2154,6 +2158,47 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *
bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
}
+/* ARGSUSED */
+static void
+zdb_blkptr_done(zio_t *zio)
+{
+ spa_t *spa = zio->io_spa;
+ blkptr_t *bp = zio->io_bp;
+ int ioerr = zio->io_error;
+ zdb_cb_t *zcb = zio->io_private;
+ zbookmark_t *zb = &zio->io_bookmark;
+
+ zio_data_buf_free(zio->io_data, zio->io_size);
+
+ mutex_enter(&spa->spa_scrub_lock);
+ spa->spa_scrub_inflight--;
+ cv_broadcast(&spa->spa_scrub_io_cv);
+
+ if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
+ char blkbuf[BP_SPRINTF_LEN];
+
+ zcb->zcb_haderrors = 1;
+ zcb->zcb_errors[ioerr]++;
+
+ if (dump_opt['b'] >= 2)
+ sprintf_blkptr(blkbuf, bp);
+ else
+ blkbuf[0] = '\0';
+
+ (void) printf("zdb_blkptr_cb: "
+ "Got error %d reading "
+ "<%llu, %llu, %lld, %llx> %s -- skipping\n",
+ ioerr,
+ (u_longlong_t)zb->zb_objset,
+ (u_longlong_t)zb->zb_object,
+ (u_longlong_t)zb->zb_level,
+ (u_longlong_t)zb->zb_blkid,
+ blkbuf);
+ }
+ mutex_exit(&spa->spa_scrub_lock);
+}
+
+/* ARGSUSED */
static int
zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
@@ -2174,38 +2219,22 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog
is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
- int ioerr;
size_t size = BP_GET_PSIZE(bp);
- void *data = malloc(size);
+ void *data = zio_data_buf_alloc(size);
int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
/* If it's an intent log block, failure is expected. */
if (zb->zb_level == ZB_ZIL_LEVEL)
flags |= ZIO_FLAG_SPECULATIVE;
- ioerr = zio_wait(zio_read(NULL, spa, bp, data, size,
- NULL, NULL, ZIO_PRIORITY_ASYNC_READ, flags, zb));
-
- free(data);
- if (ioerr && !(flags & ZIO_FLAG_SPECULATIVE)) {
- zcb->zcb_haderrors = 1;
- zcb->zcb_errors[ioerr]++;
+ mutex_enter(&spa->spa_scrub_lock);
+ while (spa->spa_scrub_inflight > max_inflight)
+ cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
+ spa->spa_scrub_inflight++;
+ mutex_exit(&spa->spa_scrub_lock);
- if (dump_opt['b'] >= 2)
- sprintf_blkptr(blkbuf, bp);
- else
- blkbuf[0] = '\0';
-
- (void) printf("zdb_blkptr_cb: "
- "Got error %d reading "
- "<%llu, %llu, %lld, %llx> %s -- skipping\n",
- ioerr,
- (u_longlong_t)zb->zb_objset,
- (u_longlong_t)zb->zb_object,
- (u_longlong_t)zb->zb_level,
- (u_longlong_t)zb->zb_blkid,
- blkbuf);
- }
+ zio_nowait(zio_read(NULL, spa, bp, data, size,
+ zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
}
zcb->zcb_readfails = 0;
@@ -2433,6 +2462,18 @@ dump_block_stats(spa_t *spa)
zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
+ /*
+ * If we've traversed the data blocks then we need to wait for those
+ * I/Os to complete. We leverage "The Godfather" zio to wait on
+ * all async I/Os to complete.
+ */
+ if (dump_opt['c']) {
+ (void) zio_wait(spa->spa_async_zio_root);
+ spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
+ ZIO_FLAG_GODFATHER);
+ }
+
if (zcb.zcb_haderrors) {
(void) printf("\nError counts:\n\n");
(void) printf("\t%5s %s\n", "errno", "count");
@@ -3203,7 +3244,7 @@ main(int argc, char **argv)
dprintf_setup(&argc, argv);
- while ((c = getopt(argc, argv, "bcdhilmsuCDRSAFLXevp:t:U:P")) != -1) {
+ while ((c = getopt(argc, argv, "bcdhilmM:suCDRSAFLXevp:t:U:P")) != -1) {
switch (c) {
case 'b':
case 'c':
@@ -3232,6 +3273,15 @@ main(int argc, char **argv)
case 'v':
verbose++;
break;
+ case 'M':
+ max_inflight = strtoull(optarg, NULL, 0);
+ if (max_inflight == 0) {
+ (void) fprintf(stderr, "maximum number "
+ "of inflight I/Os must be greater "
+ "than 0\n");
+ usage();
+ }
+ break;
case 'p':
if (searchdirs == NULL) {
searchdirs = umem_alloc(sizeof (char *),
Modified: stable/9/cddl/contrib/opensolaris/cmd/zpool/zpool.8
==============================================================================
--- stable/9/cddl/contrib/opensolaris/cmd/zpool/zpool.8 Wed Mar 19 23:44:03 2014 (r263393)
+++ stable/9/cddl/contrib/opensolaris/cmd/zpool/zpool.8 Wed Mar 19 23:46:59 2014 (r263394)
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd March 14, 2013
+.Dd December 31, 2013
.Dt ZPOOL 8
.Os
.Sh NAME
@@ -141,6 +141,9 @@
.Cm remove
.Ar pool device ...
.Nm
+.Cm reopen
+.Ar pool
+.Nm
.Cm replace
.Op Fl f
.Ar pool device
@@ -1431,6 +1434,13 @@ command. Non-redundant and
devices cannot be removed from a pool.
.It Xo
.Nm
+.Cm reopen
+.Ar pool
+.Xc
+.Pp
+Reopen all the vdevs associated with the pool.
+.It Xo
+.Nm
.Cm replace
.Op Fl f
.Ar pool device
Modified: stable/9/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
==============================================================================
--- stable/9/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c Wed Mar 19 23:44:03 2014 (r263393)
+++ stable/9/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c Wed Mar 19 23:46:59 2014 (r263394)
@@ -247,7 +247,7 @@ get_usage(zpool_help_t idx) {
case HELP_REMOVE:
return (gettext("\tremove <pool> <device> ...\n"));
case HELP_REOPEN:
- return (""); /* Undocumented command */
+ return (gettext("\treopen <pool>\n"));
case HELP_SCRUB:
return (gettext("\tscrub [-s] <pool> ...\n"));
case HELP_STATUS:
@@ -3710,22 +3710,37 @@ zpool_do_reguid(int argc, char **argv)
* zpool reopen <pool>
*
* Reopen the pool so that the kernel can update the sizes of all vdevs.
- *
- * NOTE: This command is currently undocumented. If the command is ever
- * exposed then the appropriate usage() messages will need to be made.
*/
int
zpool_do_reopen(int argc, char **argv)
{
+ int c;
int ret = 0;
zpool_handle_t *zhp;
char *pool;
+ /* check options */
+ while ((c = getopt(argc, argv, "")) != -1) {
+ switch (c) {
+ case '?':
+ (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+ optopt);
+ usage(B_FALSE);
+ }
+ }
+
argc--;
argv++;
- if (argc != 1)
- return (2);
+ if (argc < 1) {
+ (void) fprintf(stderr, gettext("missing pool name\n"));
+ usage(B_FALSE);
+ }
+
+ if (argc > 1) {
+ (void) fprintf(stderr, gettext("too many arguments\n"));
+ usage(B_FALSE);
+ }
pool = argv[0];
if ((zhp = zpool_open_canfail(g_zfs, pool)) == NULL)
Modified: stable/9/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c
==============================================================================
--- stable/9/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c Wed Mar 19 23:44:03 2014 (r263393)
+++ stable/9/cddl/contrib/opensolaris/lib/libzpool/common/kernel.c Wed Mar 19 23:46:59 2014 (r263394)
@@ -661,7 +661,7 @@ __dprintf(const char *file, const char *
if (dprintf_find_string("pid"))
(void) printf("%d ", getpid());
if (dprintf_find_string("tid"))
- (void) printf("%u ", thr_self());
+ (void) printf("%ul ", thr_self());
#if 0
if (dprintf_find_string("cpu"))
(void) printf("%u ", getcpuid());
@@ -1125,3 +1125,50 @@ zvol_create_minors(const char *name)
return (0);
}
#endif
+
+#ifdef illumos
+void
+bioinit(buf_t *bp)
+{
+ bzero(bp, sizeof (buf_t));
+}
+
+void
+biodone(buf_t *bp)
+{
+ if (bp->b_iodone != NULL) {
+ (*(bp->b_iodone))(bp);
+ return;
+ }
+ ASSERT((bp->b_flags & B_DONE) == 0);
+ bp->b_flags |= B_DONE;
+}
+
+void
+bioerror(buf_t *bp, int error)
+{
+ ASSERT(bp != NULL);
+ ASSERT(error >= 0);
+
+ if (error != 0) {
+ bp->b_flags |= B_ERROR;
+ } else {
+ bp->b_flags &= ~B_ERROR;
+ }
+ bp->b_error = error;
+}
+
+
+int
+geterror(struct buf *bp)
+{
+ int error = 0;
+
+ if (bp->b_flags & B_ERROR) {
+ error = bp->b_error;
+ if (!error)
+ error = EIO;
+ }
+ return (error);
+}
+#endif
Modified: stable/9/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- stable/9/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Wed Mar 19 23:44:03 2014 (r263393)
+++ stable/9/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Wed Mar 19 23:46:59 2014 (r263394)
@@ -786,6 +786,38 @@ extern void cyclic_remove(cyclic_id_t);
extern int cyclic_reprogram(cyclic_id_t, hrtime_t);
#endif /* illumos */
+#ifdef illumos
+/*
+ * Buf structure
+ */
+#define B_BUSY 0x0001
+#define B_DONE 0x0002
+#define B_ERROR 0x0004
+#define B_READ 0x0040 /* read when I/O occurs */
+#define B_WRITE 0x0100 /* non-read pseudo-flag */
+
+typedef struct buf {
+ int b_flags;
+ size_t b_bcount;
+ union {
+ caddr_t b_addr;
+ } b_un;
+
+ lldaddr_t _b_blkno;
+#define b_lblkno _b_blkno._f
+ size_t b_resid;
+ size_t b_bufsize;
+ int (*b_iodone)(struct buf *);
+ int b_error;
+ void *b_private;
+} buf_t;
+
+extern void bioinit(buf_t *);
+extern void biodone(buf_t *);
+extern void bioerror(buf_t *, int);
+extern int geterror(buf_t *);
+#endif
+
#ifdef __cplusplus
}
#endif
Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h Wed Mar 19 23:44:03 2014 (r263393)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h Wed Mar 19 23:46:59 2014 (r263394)
@@ -359,6 +359,16 @@ extern void vdev_set_min_asize(vdev_t *v
/* zdb uses this tunable, so it must be declared here to make lint happy. */
extern int zfs_vdev_cache_size;
+#ifdef illumos
+/*
+ * The vdev_buf_t is used to translate between zio_t and buf_t, and back again.
+ */
+typedef struct vdev_buf {
+ buf_t vb_buf; /* buffer that describes the io */
+ zio_t *vb_io; /* pointer back to the original zio_t */
+} vdev_buf_t;
+#endif
+
#ifdef __cplusplus
}
#endif
Modified: stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c
==============================================================================
--- stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c Wed Mar 19 23:44:03 2014 (r263393)
+++ stable/9/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_disk.c Wed Mar 19 23:46:59 2014 (r263394)
@@ -41,11 +41,6 @@
extern ldi_ident_t zfs_li;
-typedef struct vdev_disk_buf {
- buf_t vdb_buf;
- zio_t *vdb_io;
-} vdev_disk_buf_t;
-
static void
vdev_disk_hold(vdev_t *vd)
{
@@ -443,8 +438,8 @@ vdev_disk_ldi_physio(ldi_handle_t vd_lh,
static void
vdev_disk_io_intr(buf_t *bp)
{
- vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
- zio_t *zio = vdb->vdb_io;
+ vdev_buf_t *vb = (vdev_buf_t *)bp;
+ zio_t *zio = vb->vb_io;
/*
* The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
@@ -456,7 +451,7 @@ vdev_disk_io_intr(buf_t *bp)
if (zio->io_error == 0 && bp->b_resid != 0)
zio->io_error = SET_ERROR(EIO);
- kmem_free(vdb, sizeof (vdev_disk_buf_t));
+ kmem_free(vb, sizeof (vdev_buf_t));
zio_interrupt(zio);
}
@@ -487,7 +482,7 @@ vdev_disk_io_start(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
vdev_disk_t *dvd = vd->vdev_tsd;
- vdev_disk_buf_t *vdb;
+ vdev_buf_t *vb;
struct dk_callback *dkc;
buf_t *bp;
int error;
@@ -551,10 +546,10 @@ vdev_disk_io_start(zio_t *zio)
return (ZIO_PIPELINE_CONTINUE);
}
- vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
+ vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
- vdb->vdb_io = zio;
- bp = &vdb->vdb_buf;
+ vb->vb_io = zio;
+ bp = &vb->vb_buf;
bioinit(bp);
bp->b_flags = B_BUSY | B_NOCACHE |
More information about the svn-src-stable-9
mailing list