svn commit: r288571 - in stable/10: cddl/contrib/opensolaris/cmd/zdb cddl/contrib/opensolaris/cmd/zfs cddl/contrib/opensolaris/cmd/ztest cddl/contrib/opensolaris/lib/libzfs/common cddl/contrib/open...
Alexander Motin
mav at FreeBSD.org
Sat Oct 3 08:03:42 UTC 2015
Author: mav
Date: Sat Oct 3 08:03:36 2015
New Revision: 288571
URL: https://svnweb.freebsd.org/changeset/base/288571
Log:
MFC r286705: 5960 zfs recv should prefetch indirect blocks
5925 zfs receive -o origin=
Reviewed by: Prakash Surya <prakash.surya at delphix.com>
Reviewed by: Matthew Ahrens <mahrens at delphix.com>
Author: Paul Dagnelie <pcd at delphix.com>
While running 'zfs recv' we noticed that every 128th 8K block required a
read. We were seeing that restore_write() was calling dmu_tx_hold_write()
and the indirect block was not cached. We should prefetch upcoming indirect
blocks to avoid having to go to disk and blocking the restore_write().
Allow an incremental send stream to be received as a clone, even if the
stream does not mark it as a clone.
Added:
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c
- copied unchanged from r286705, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bqueue.h
- copied unchanged from r286705, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/bqueue.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_priority.h
- copied unchanged from r286705, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_priority.h
Modified:
stable/10/cddl/contrib/opensolaris/cmd/zdb/zdb.c
stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs.8
stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
stable/10/cddl/contrib/opensolaris/cmd/ztest/ztest.c
stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
stable/10/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_traverse.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_zfetch.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_destroy.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_scan.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/space_map.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio.c
stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
Directory Properties:
stable/10/ (props changed)
Modified: stable/10/cddl/contrib/opensolaris/cmd/zdb/zdb.c
==============================================================================
--- stable/10/cddl/contrib/opensolaris/cmd/zdb/zdb.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/cddl/contrib/opensolaris/cmd/zdb/zdb.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -2429,6 +2429,9 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog
dmu_object_type_t type;
boolean_t is_metadata;
+ if (bp == NULL)
+ return (0);
+
if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
char blkbuf[BP_SPRINTF_LEN];
snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
@@ -2918,7 +2921,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilo
avl_index_t where;
zdb_ddt_entry_t *zdde, zdde_search;
- if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+ if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);
if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
Modified: stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs.8
==============================================================================
--- stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs.8 Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs.8 Sat Oct 3 08:03:36 2015 (r288571)
@@ -191,11 +191,13 @@
.Nm
.Cm receive Ns | Ns Cm recv
.Op Fl vnFu
+.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Nm
.Cm receive Ns | Ns Cm recv
.Op Fl vnFu
.Op Fl d | e
+.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem
.Nm
.Cm allow
@@ -2711,6 +2713,7 @@ feature.
.Nm
.Cm receive Ns | Ns Cm recv
.Op Fl vnFu
+.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem Ns | Ns Ar volume Ns | Ns Ar snapshot
.Xc
.It Xo
@@ -2718,6 +2721,7 @@ feature.
.Cm receive Ns | Ns Cm recv
.Op Fl vnFu
.Op Fl d | e
+.Op Fl o Sy origin Ns = Ns Ar snapshot
.Ar filesystem
.Xc
.Pp
@@ -2802,6 +2806,10 @@ receive operation.
Do not actually receive the stream. This can be useful in conjunction with the
.Fl v
option to verify the name the receive operation would use.
+.It Fl o Sy origin Ns = Ns Ar snapshot
+Forces the stream to be received as a clone of the given snapshot.
+This is only valid if the stream is an incremental stream whose source
+is the same as the provided origin.
.It Fl F
Force a rollback of the file system to the most recent snapshot before
performing the receive operation. If receiving an incremental replication
Modified: stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c
==============================================================================
--- stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/cddl/contrib/opensolaris/cmd/zfs/zfs_main.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -264,8 +264,9 @@ get_usage(zfs_help_t idx)
return (gettext("\tpromote <clone-filesystem>\n"));
case HELP_RECEIVE:
return (gettext("\treceive|recv [-vnFu] <filesystem|volume|"
- "snapshot>\n"
- "\treceive|recv [-vnFu] [-d | -e] <filesystem>\n"));
+ "snapshot>\n"
+ "\treceive|recv [-vnFu] [-o origin=<snapshot>] [-d | -e] "
+ "<filesystem>\n"));
case HELP_RENAME:
return (gettext("\trename [-f] <filesystem|volume|snapshot> "
"<filesystem|volume|snapshot>\n"
@@ -791,7 +792,7 @@ zfs_do_create(int argc, char **argv)
nomem();
break;
case 'o':
- if (parseprop(props, optarg))
+ if (parseprop(props, optarg) != 0)
goto error;
break;
case 's':
@@ -3663,7 +3664,7 @@ zfs_do_snapshot(int argc, char **argv)
while ((c = getopt(argc, argv, "ro:")) != -1) {
switch (c) {
case 'o':
- if (parseprop(props, optarg))
+ if (parseprop(props, optarg) != 0)
return (1);
break;
case 'r':
@@ -3922,10 +3923,19 @@ zfs_do_receive(int argc, char **argv)
{
int c, err;
recvflags_t flags = { 0 };
+ nvlist_t *props;
+ nvpair_t *nvp = NULL;
+
+ if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0)
+ nomem();
/* check options */
- while ((c = getopt(argc, argv, ":denuvF")) != -1) {
+ while ((c = getopt(argc, argv, ":o:denuvF")) != -1) {
switch (c) {
+ case 'o':
+ if (parseprop(props, optarg) != 0)
+ return (1);
+ break;
case 'd':
flags.isprefix = B_TRUE;
break;
@@ -3970,6 +3980,13 @@ zfs_do_receive(int argc, char **argv)
usage(B_FALSE);
}
+ while ((nvp = nvlist_next_nvpair(props, nvp))) {
+ if (strcmp(nvpair_name(nvp), "origin") != 0) {
+ (void) fprintf(stderr, gettext("invalid option"));
+ usage(B_FALSE);
+ }
+ }
+
if (isatty(STDIN_FILENO)) {
(void) fprintf(stderr,
gettext("Error: Backup stream can not be read "
@@ -3978,7 +3995,7 @@ zfs_do_receive(int argc, char **argv)
return (1);
}
- err = zfs_receive(g_zfs, argv[0], &flags, STDIN_FILENO, NULL);
+ err = zfs_receive(g_zfs, argv[0], props, &flags, STDIN_FILENO, NULL);
return (err != 0);
}
Modified: stable/10/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- stable/10/cddl/contrib/opensolaris/cmd/ztest/ztest.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/cddl/contrib/opensolaris/cmd/ztest/ztest.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -3586,7 +3586,8 @@ ztest_dmu_read_write(ztest_ds_t *zd, uin
*/
n = ztest_random(regions) * stride + ztest_random(width);
s = 1 + ztest_random(2 * width - 1);
- dmu_prefetch(os, bigobj, n * chunksize, s * chunksize);
+ dmu_prefetch(os, bigobj, 0, n * chunksize, s * chunksize,
+ ZIO_PRIORITY_SYNC_READ);
/*
* Pick a random index and compute the offsets into packobj and bigobj.
@@ -5705,8 +5706,10 @@ ztest_run(ztest_shared_t *zs)
* Right before closing the pool, kick off a bunch of async I/O;
* spa_close() should wait for it to complete.
*/
- for (uint64_t object = 1; object < 50; object++)
- dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
+ for (uint64_t object = 1; object < 50; object++) {
+ dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20,
+ ZIO_PRIORITY_SYNC_READ);
+ }
spa_close(spa, FTAG);
Modified: stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
==============================================================================
--- stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h Sat Oct 3 08:03:36 2015 (r288571)
@@ -668,8 +668,8 @@ typedef struct recvflags {
boolean_t nomount;
} recvflags_t;
-extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *,
- int, avl_tree_t *);
+extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
+ recvflags_t *, int, avl_tree_t *);
typedef enum diff_flags {
ZFS_DIFF_PARSEABLE = 0x1,
Modified: stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
==============================================================================
--- stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -3535,7 +3535,7 @@ zpool_vdev_name(libzfs_handle_t *hdl, zp
}
static int
-zbookmark_compare(const void *a, const void *b)
+zbookmark_mem_compare(const void *a, const void *b)
{
return (memcmp(a, b, sizeof (zbookmark_phys_t)));
}
@@ -3598,7 +3598,7 @@ zpool_get_errlog(zpool_handle_t *zhp, nv
zc.zc_nvlist_dst_size;
count -= zc.zc_nvlist_dst_size;
- qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_compare);
+ qsort(zb, count, sizeof (zbookmark_phys_t), zbookmark_mem_compare);
verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
Modified: stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
==============================================================================
--- stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -64,8 +64,9 @@ extern void zfs_setprop_error(libzfs_han
/* We need to use something for ENODATA. */
#define ENODATA EIDRM
-static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
- int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
+static int zfs_receive_impl(libzfs_handle_t *, const char *, const char *,
+ recvflags_t *, int, const char *, nvlist_t *, avl_tree_t *, char **, int,
+ uint64_t *);
static const zio_cksum_t zero_cksum = { 0 };
@@ -2498,7 +2499,7 @@ zfs_receive_package(libzfs_handle_t *hdl
* zfs_receive_one() will take care of it (ie,
* recv_skip() and return 0).
*/
- error = zfs_receive_impl(hdl, destname, flags, fd,
+ error = zfs_receive_impl(hdl, destname, NULL, flags, fd,
sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
action_handlep);
if (error == ENODATA) {
@@ -2631,9 +2632,9 @@ recv_skip(libzfs_handle_t *hdl, int fd,
*/
static int
zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
- recvflags_t *flags, dmu_replay_record_t *drr,
- dmu_replay_record_t *drr_noswap, const char *sendfs,
- nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
+ const char *originsnap, recvflags_t *flags, dmu_replay_record_t *drr,
+ dmu_replay_record_t *drr_noswap, const char *sendfs, nvlist_t *stream_nv,
+ avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
uint64_t *action_handlep)
{
zfs_cmd_t zc = { 0 };
@@ -2798,10 +2799,15 @@ zfs_receive_one(libzfs_handle_t *hdl, in
}
if (flags->verbose)
(void) printf("found clone origin %s\n", zc.zc_string);
+ } else if (originsnap) {
+ (void) strncpy(zc.zc_string, originsnap, ZFS_MAXNAMELEN);
+ if (flags->verbose)
+ (void) printf("using provided clone origin %s\n",
+ zc.zc_string);
}
stream_wantsnewfs = (drrb->drr_fromguid == 0 ||
- (drrb->drr_flags & DRR_FLAG_CLONE));
+ (drrb->drr_flags & DRR_FLAG_CLONE) || originsnap);
if (stream_wantsnewfs) {
/*
@@ -3179,9 +3185,10 @@ zfs_receive_one(libzfs_handle_t *hdl, in
}
static int
-zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
- int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
- char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
+zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
+ const char *originsnap, recvflags_t *flags, int infd, const char *sendfs,
+ nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
+ uint64_t *action_handlep)
{
int err;
dmu_replay_record_t drr, drr_noswap;
@@ -3200,6 +3207,12 @@ zfs_receive_impl(libzfs_handle_t *hdl, c
"(%s) does not exist"), tosnap);
return (zfs_error(hdl, EZFS_NOENT, errbuf));
}
+ if (originsnap &&
+ !zfs_dataset_exists(hdl, originsnap, ZFS_TYPE_DATASET)) {
+ zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified origin fs "
+ "(%s) does not exist"), originsnap);
+ return (zfs_error(hdl, EZFS_NOENT, errbuf));
+ }
/* read in the BEGIN record */
if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
@@ -3272,14 +3285,14 @@ zfs_receive_impl(libzfs_handle_t *hdl, c
*cp = '\0';
sendfs = nonpackage_sendfs;
}
- return (zfs_receive_one(hdl, infd, tosnap, flags,
- &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
- top_zfs, cleanup_fd, action_handlep));
+ return (zfs_receive_one(hdl, infd, tosnap, originsnap, flags,
+ &drr, &drr_noswap, sendfs, stream_nv, stream_avl, top_zfs,
+ cleanup_fd, action_handlep));
} else {
assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
DMU_COMPOUNDSTREAM);
- return (zfs_receive_package(hdl, infd, tosnap, flags,
- &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
+ return (zfs_receive_package(hdl, infd, tosnap, flags, &drr,
+ &zcksum, top_zfs, cleanup_fd, action_handlep));
}
}
@@ -3290,18 +3303,24 @@ zfs_receive_impl(libzfs_handle_t *hdl, c
* (-1 will override -2).
*/
int
-zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
- int infd, avl_tree_t *stream_avl)
+zfs_receive(libzfs_handle_t *hdl, const char *tosnap, nvlist_t *props,
+ recvflags_t *flags, int infd, avl_tree_t *stream_avl)
{
char *top_zfs = NULL;
int err;
int cleanup_fd;
uint64_t action_handle = 0;
+ char *originsnap = NULL;
+ if (props) {
+ err = nvlist_lookup_string(props, "origin", &originsnap);
+ if (err && err != ENOENT)
+ return (err);
+ }
cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
VERIFY(cleanup_fd >= 0);
- err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
+ err = zfs_receive_impl(hdl, tosnap, originsnap, flags, infd, NULL, NULL,
stream_avl, &top_zfs, cleanup_fd, &action_handle);
VERIFY(0 == close(cleanup_fd));
Modified: stable/10/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h
==============================================================================
--- stable/10/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/cddl/contrib/opensolaris/lib/libzpool/common/sys/zfs_context.h Sat Oct 3 08:03:36 2015 (r288571)
@@ -135,8 +135,18 @@ extern int aok;
/*
* DTrace SDT probes have different signatures in userland than they do in
- * kernel. If they're being used in kernel code, re-define them out of
+ * the kernel. If they're being used in kernel code, re-define them out of
* existence for their counterparts in libzpool.
+ *
+ * Here's an example of how to use the set-error probes in userland:
+ * zfs$target:::set-error /arg0 == EBUSY/ {stack();}
+ *
+ * Here's an example of how to use DTRACE_PROBE probes in userland:
+ * If there is a probe declared as follows:
+ * DTRACE_PROBE2(zfs__probe_name, uint64_t, blkid, dnode_t *, dn);
+ * Then you can use it as follows:
+ * zfs$target:::probe2 /copyinstr(arg0) == "zfs__probe_name"/
+ * {printf("%u %p\n", arg1, arg2);}
*/
#ifdef DTRACE_PROBE
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/Makefile.files
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/Makefile.files Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/Makefile.files Sat Oct 3 08:03:36 2015 (r288571)
@@ -22,7 +22,9 @@
#
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
-# Copyright (c) 2013 by Delphix. All rights reserved.
+# Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
+# Copyright (c) 2012 Joyent, Inc. All rights reserved.
+# Copyright (c) 2011, 2014 by Delphix. All rights reserved.
# Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
#
#
@@ -36,6 +38,7 @@ ZFS_COMMON_OBJS += \
blkptr.o \
bpobj.o \
bptree.o \
+ bqueue.o \
dbuf.o \
ddt.o \
ddt_zap.o \
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bptree.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -154,7 +154,7 @@ bptree_visit_cb(spa_t *spa, zilog_t *zil
int err;
struct bptree_args *ba = arg;
- if (BP_IS_HOLE(bp))
+ if (bp == NULL || BP_IS_HOLE(bp))
return (0);
err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
Copied: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c (from r286705, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c Sat Oct 3 08:03:36 2015 (r288571, copy of r286705, head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/bqueue.c)
@@ -0,0 +1,111 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ */
+
+#include <sys/bqueue.h>
+#include <sys/zfs_context.h>
+
+static inline bqueue_node_t *
+obj2node(bqueue_t *q, void *data)
+{
+ return ((bqueue_node_t *)((char *)data + q->bq_node_offset));
+}
+
+/*
+ * Initialize a blocking queue The maximum capacity of the queue is set to
+ * size. Types that want to be stored in a bqueue must contain a bqueue_node_t,
+ * and offset should give its offset from the start of the struct. Return 0 on
+ * success, or -1 on failure.
+ */
+int
+bqueue_init(bqueue_t *q, uint64_t size, size_t node_offset)
+{
+ list_create(&q->bq_list, node_offset + sizeof (bqueue_node_t),
+ node_offset + offsetof(bqueue_node_t, bqn_node));
+ cv_init(&q->bq_add_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&q->bq_pop_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&q->bq_lock, NULL, MUTEX_DEFAULT, NULL);
+ q->bq_node_offset = node_offset;
+ q->bq_size = 0;
+ q->bq_maxsize = size;
+ return (0);
+}
+
+/*
+ * Destroy a blocking queue. This function asserts that there are no
+ * elements in the queue, and no one is blocked on the condition
+ * variables.
+ */
+void
+bqueue_destroy(bqueue_t *q)
+{
+ ASSERT0(q->bq_size);
+ cv_destroy(&q->bq_add_cv);
+ cv_destroy(&q->bq_pop_cv);
+ mutex_destroy(&q->bq_lock);
+ list_destroy(&q->bq_list);
+}
+
+/*
+ * Add data to q, consuming size units of capacity. If there is insufficient
+ * capacity to consume size units, block until capacity exists. Asserts size is
+ * > 0.
+ */
+void
+bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
+{
+ ASSERT3U(item_size, >, 0);
+ ASSERT3U(item_size, <, q->bq_maxsize);
+ mutex_enter(&q->bq_lock);
+ obj2node(q, data)->bqn_size = item_size;
+ while (q->bq_size + item_size > q->bq_maxsize) {
+ cv_wait(&q->bq_add_cv, &q->bq_lock);
+ }
+ q->bq_size += item_size;
+ list_insert_tail(&q->bq_list, data);
+ cv_signal(&q->bq_pop_cv);
+ mutex_exit(&q->bq_lock);
+}
+/*
+ * Take the first element off of q. If there are no elements on the queue, wait
+ * until one is put there. Return the removed element.
+ */
+void *
+bqueue_dequeue(bqueue_t *q)
+{
+ void *ret;
+ uint64_t item_size;
+ mutex_enter(&q->bq_lock);
+ while (q->bq_size == 0) {
+ cv_wait(&q->bq_pop_cv, &q->bq_lock);
+ }
+ ret = list_remove_head(&q->bq_list);
+ item_size = obj2node(q, ret)->bqn_size;
+ q->bq_size -= item_size;
+ mutex_exit(&q->bq_lock);
+ cv_signal(&q->bq_add_cv);
+ return (ret);
+}
+
+/*
+ * Returns true if the space used is 0.
+ */
+boolean_t
+bqueue_empty(bqueue_t *q)
+{
+ return (q->bq_size == 0);
+}
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -548,11 +548,35 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
return (abuf);
}
+/*
+ * Calculate which level n block references the data at the level 0 offset
+ * provided.
+ */
uint64_t
-dbuf_whichblock(dnode_t *dn, uint64_t offset)
+dbuf_whichblock(dnode_t *dn, int64_t level, uint64_t offset)
{
- if (dn->dn_datablkshift) {
- return (offset >> dn->dn_datablkshift);
+ if (dn->dn_datablkshift != 0 && dn->dn_indblkshift != 0) {
+ /*
+ * The level n blkid is equal to the level 0 blkid divided by
+ * the number of level 0s in a level n block.
+ *
+ * The level 0 blkid is offset >> datablkshift =
+ * offset / 2^datablkshift.
+ *
+ * The number of level 0s in a level n is the number of block
+ * pointers in an indirect block, raised to the power of level.
+ * This is 2^(indblkshift - SPA_BLKPTRSHIFT)^level =
+ * 2^(level*(indblkshift - SPA_BLKPTRSHIFT)).
+ *
+ * Thus, the level n blkid is: offset /
+ * ((2^datablkshift)*(2^(level*(indblkshift - SPA_BLKPTRSHIFT)))
+ * = offset / 2^(datablkshift + level *
+ * (indblkshift - SPA_BLKPTRSHIFT))
+ * = offset >> (datablkshift + level *
+ * (indblkshift - SPA_BLKPTRSHIFT))
+ */
+ return (offset >> (dn->dn_datablkshift + level *
+ (dn->dn_indblkshift - SPA_BLKPTRSHIFT)));
} else {
ASSERT3U(offset, <, dn->dn_datablksz);
return (0);
@@ -1715,6 +1739,12 @@ dbuf_clear(dmu_buf_impl_t *db)
dbuf_rele(parent, db);
}
+/*
+ * Note: While bpp will always be updated if the function returns success,
+ * parentp will not be updated if the dnode does not have dn_dbuf filled in;
+ * this happens when the dnode is the meta-dnode, or a userused or groupused
+ * object.
+ */
static int
dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
dmu_buf_impl_t **parentp, blkptr_t **bpp)
@@ -1755,7 +1785,7 @@ dbuf_findbp(dnode_t *dn, int level, uint
} else if (level < nlevels-1) {
/* this block is referenced from an indirect block */
int err = dbuf_hold_impl(dn, level+1,
- blkid >> epbs, fail_sparse, NULL, parentp);
+ blkid >> epbs, fail_sparse, FALSE, NULL, parentp);
if (err)
return (err);
err = dbuf_read(*parentp, NULL,
@@ -1930,11 +1960,96 @@ dbuf_destroy(dmu_buf_impl_t *db)
arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
}
+typedef struct dbuf_prefetch_arg {
+ spa_t *dpa_spa; /* The spa to issue the prefetch in. */
+ zbookmark_phys_t dpa_zb; /* The target block to prefetch. */
+ int dpa_epbs; /* Entries (blkptr_t's) Per Block Shift. */
+ int dpa_curlevel; /* The current level that we're reading */
+ zio_priority_t dpa_prio; /* The priority I/Os should be issued at. */
+ zio_t *dpa_zio; /* The parent zio_t for all prefetches. */
+ arc_flags_t dpa_aflags; /* Flags to pass to the final prefetch. */
+} dbuf_prefetch_arg_t;
+
+/*
+ * Actually issue the prefetch read for the block given.
+ */
+static void
+dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
+{
+ if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+ return;
+
+ arc_flags_t aflags =
+ dpa->dpa_aflags | ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
+
+ ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp));
+ ASSERT3U(dpa->dpa_curlevel, ==, dpa->dpa_zb.zb_level);
+ ASSERT(dpa->dpa_zio != NULL);
+ (void) arc_read(dpa->dpa_zio, dpa->dpa_spa, bp, NULL, NULL,
+ dpa->dpa_prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
+ &aflags, &dpa->dpa_zb);
+}
+
+/*
+ * Called when an indirect block above our prefetch target is read in. This
+ * will either read in the next indirect block down the tree or issue the actual
+ * prefetch if the next block down is our target.
+ */
+static void
+dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
+{
+ dbuf_prefetch_arg_t *dpa = private;
+
+ ASSERT3S(dpa->dpa_zb.zb_level, <, dpa->dpa_curlevel);
+ ASSERT3S(dpa->dpa_curlevel, >, 0);
+ if (zio != NULL) {
+ ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel);
+ ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size);
+ ASSERT3P(zio->io_spa, ==, dpa->dpa_spa);
+ }
+
+ dpa->dpa_curlevel--;
+
+ uint64_t nextblkid = dpa->dpa_zb.zb_blkid >>
+ (dpa->dpa_epbs * (dpa->dpa_curlevel - dpa->dpa_zb.zb_level));
+ blkptr_t *bp = ((blkptr_t *)abuf->b_data) +
+ P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs);
+ if (BP_IS_HOLE(bp) || (zio != NULL && zio->io_error != 0)) {
+ kmem_free(dpa, sizeof (*dpa));
+ } else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) {
+ ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid);
+ dbuf_issue_final_prefetch(dpa, bp);
+ kmem_free(dpa, sizeof (*dpa));
+ } else {
+ arc_flags_t iter_aflags = ARC_FLAG_NOWAIT;
+ zbookmark_phys_t zb;
+
+ ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp));
+
+ SET_BOOKMARK(&zb, dpa->dpa_zb.zb_objset,
+ dpa->dpa_zb.zb_object, dpa->dpa_curlevel, nextblkid);
+
+ (void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
+ bp, dbuf_prefetch_indirect_done, dpa, dpa->dpa_prio,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
+ &iter_aflags, &zb);
+ }
+ (void) arc_buf_remove_ref(abuf, private);
+}
+
+/*
+ * Issue prefetch reads for the given block on the given level. If the indirect
+ * blocks above that block are not in memory, we will read them in
+ * asynchronously. As a result, this call never blocks waiting for a read to
+ * complete.
+ */
void
-dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio)
+dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
+ arc_flags_t aflags)
{
- dmu_buf_impl_t *db = NULL;
- blkptr_t *bp = NULL;
+ blkptr_t bp;
+ int epbs, nlevels, curlevel;
+ uint64_t curblkid;
ASSERT(blkid != DMU_BONUS_BLKID);
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
@@ -1942,35 +2057,104 @@ dbuf_prefetch(dnode_t *dn, uint64_t blki
if (dnode_block_freed(dn, blkid))
return;
- /* dbuf_find() returns with db_mtx held */
- if (db = dbuf_find(dn->dn_objset, dn->dn_object, 0, blkid)) {
+ /*
+ * This dnode hasn't been written to disk yet, so there's nothing to
+ * prefetch.
+ */
+ nlevels = dn->dn_phys->dn_nlevels;
+ if (level >= nlevels || dn->dn_phys->dn_nblkptr == 0)
+ return;
+
+ epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+ if (dn->dn_phys->dn_maxblkid < blkid << (epbs * level))
+ return;
+
+ dmu_buf_impl_t *db = dbuf_find(dn->dn_objset, dn->dn_object,
+ level, blkid);
+ if (db != NULL) {
+ mutex_exit(&db->db_mtx);
/*
- * This dbuf is already in the cache. We assume that
- * it is already CACHED, or else about to be either
- * read or filled.
+ * This dbuf already exists. It is either CACHED, or
+ * (we assume) about to be read or filled.
*/
- mutex_exit(&db->db_mtx);
return;
}
- if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp) == 0) {
- if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
- dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
- arc_flags_t aflags =
- ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
- zbookmark_phys_t zb;
-
- SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
- dn->dn_object, 0, blkid);
-
- (void) arc_read(NULL, dn->dn_objset->os_spa,
- bp, NULL, NULL, prio,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
- &aflags, &zb);
+ /*
+ * Find the closest ancestor (indirect block) of the target block
+ * that is present in the cache. In this indirect block, we will
+ * find the bp that is at curlevel, curblkid.
+ */
+ curlevel = level;
+ curblkid = blkid;
+ while (curlevel < nlevels - 1) {
+ int parent_level = curlevel + 1;
+ uint64_t parent_blkid = curblkid >> epbs;
+ dmu_buf_impl_t *db;
+
+ if (dbuf_hold_impl(dn, parent_level, parent_blkid,
+ FALSE, TRUE, FTAG, &db) == 0) {
+ blkptr_t *bpp = db->db_buf->b_data;
+ bp = bpp[P2PHASE(curblkid, 1 << epbs)];
+ dbuf_rele(db, FTAG);
+ break;
}
- if (db)
- dbuf_rele(db, NULL);
+
+ curlevel = parent_level;
+ curblkid = parent_blkid;
+ }
+
+ if (curlevel == nlevels - 1) {
+ /* No cached indirect blocks found. */
+ ASSERT3U(curblkid, <, dn->dn_phys->dn_nblkptr);
+ bp = dn->dn_phys->dn_blkptr[curblkid];
}
+ if (BP_IS_HOLE(&bp))
+ return;
+
+ ASSERT3U(curlevel, ==, BP_GET_LEVEL(&bp));
+
+ zio_t *pio = zio_root(dmu_objset_spa(dn->dn_objset), NULL, NULL,
+ ZIO_FLAG_CANFAIL);
+
+ dbuf_prefetch_arg_t *dpa = kmem_zalloc(sizeof (*dpa), KM_SLEEP);
+ dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+ SET_BOOKMARK(&dpa->dpa_zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
+ dn->dn_object, level, blkid);
+ dpa->dpa_curlevel = curlevel;
+ dpa->dpa_prio = prio;
+ dpa->dpa_aflags = aflags;
+ dpa->dpa_spa = dn->dn_objset->os_spa;
+ dpa->dpa_epbs = epbs;
+ dpa->dpa_zio = pio;
+
+ /*
+ * If we have the indirect just above us, no need to do the asynchronous
+ * prefetch chain; we'll just run the last step ourselves. If we're at
+ * a higher level, though, we want to issue the prefetches for all the
+ * indirect blocks asynchronously, so we can go on with whatever we were
+ * doing.
+ */
+ if (curlevel == level) {
+ ASSERT3U(curblkid, ==, blkid);
+ dbuf_issue_final_prefetch(dpa, &bp);
+ kmem_free(dpa, sizeof (*dpa));
+ } else {
+ arc_flags_t iter_aflags = ARC_FLAG_NOWAIT;
+ zbookmark_phys_t zb;
+
+ SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
+ dn->dn_object, curlevel, curblkid);
+ (void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
+ &bp, dbuf_prefetch_indirect_done, dpa, prio,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
+ &iter_aflags, &zb);
+ }
+ /*
+ * We use pio here instead of dpa_zio since it's possible that
+ * dpa may have already been freed.
+ */
+ zio_nowait(pio);
}
/*
@@ -1978,7 +2162,8 @@ dbuf_prefetch(dnode_t *dn, uint64_t blki
* Note: dn_struct_rwlock must be held.
*/
int
-dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
+dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
+ boolean_t fail_sparse, boolean_t fail_uncached,
void *tag, dmu_buf_impl_t **dbp)
{
dmu_buf_impl_t *db, *parent = NULL;
@@ -1996,6 +2181,9 @@ top:
blkptr_t *bp = NULL;
int err;
+ if (fail_uncached)
+ return (SET_ERROR(ENOENT));
+
ASSERT3P(parent, ==, NULL);
err = dbuf_findbp(dn, level, blkid, fail_sparse, &parent, &bp);
if (fail_sparse) {
@@ -2012,6 +2200,11 @@ top:
db = dbuf_create(dn, level, blkid, parent, bp);
}
+ if (fail_uncached && db->db_state != DB_CACHED) {
+ mutex_exit(&db->db_mtx);
+ return (SET_ERROR(ENOENT));
+ }
+
if (db->db_buf && refcount_is_zero(&db->db_holds)) {
arc_buf_add_ref(db->db_buf, db);
if (db->db_buf->b_data == NULL) {
@@ -2067,16 +2260,14 @@ top:
dmu_buf_impl_t *
dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag)
{
- dmu_buf_impl_t *db;
- int err = dbuf_hold_impl(dn, 0, blkid, FALSE, tag, &db);
- return (err ? NULL : db);
+ return (dbuf_hold_level(dn, 0, blkid, tag));
}
dmu_buf_impl_t *
dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag)
{
dmu_buf_impl_t *db;
- int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db);
+ int err = dbuf_hold_impl(dn, level, blkid, FALSE, FALSE, tag, &db);
return (err ? NULL : db);
}
@@ -2429,8 +2620,8 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_i
if (parent == NULL) {
mutex_exit(&db->db_mtx);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- (void) dbuf_hold_impl(dn, db->db_level+1,
- db->db_blkid >> epbs, FALSE, db, &parent);
+ parent = dbuf_hold_level(dn, db->db_level + 1,
+ db->db_blkid >> epbs, db);
rw_exit(&dn->dn_struct_rwlock);
mutex_enter(&db->db_mtx);
db->db_parent = parent;
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -142,7 +142,7 @@ dmu_buf_hold_noread(objset_t *os, uint64
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, 0, offset);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
db = dbuf_hold(dn, blkid, tag);
rw_exit(&dn->dn_struct_rwlock);
@@ -425,7 +425,7 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn,
dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, 0, offset);
for (i = 0; i < nblks; i++) {
dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag);
if (db == NULL) {
@@ -529,17 +529,16 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake,
}
/*
- * Issue prefetch i/os for the given blocks.
+ * Issue prefetch i/os for the given blocks. If level is greater than 0, the
+ * indirect blocks prefeteched will be those that point to the blocks containing
+ * the data starting at offset, and continuing to offset + len.
*
- * Note: The assumption is that we *know* these blocks will be needed
- * almost immediately. Therefore, the prefetch i/os will be issued at
- * ZIO_PRIORITY_SYNC_READ
- *
- * Note: indirect blocks and other metadata will be read synchronously,
- * causing this function to block if they are not already cached.
+ * Note that if the indirect blocks above the blocks being prefetched are not in
+ * cache, they will be asychronously read in.
*/
void
-dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
+dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
+ uint64_t len, zio_priority_t pri)
{
dnode_t *dn;
uint64_t blkid;
@@ -555,8 +554,9 @@ dmu_prefetch(objset_t *os, uint64_t obje
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t));
- dbuf_prefetch(dn, blkid, ZIO_PRIORITY_SYNC_READ);
+ blkid = dbuf_whichblock(dn, level,
+ object * sizeof (dnode_phys_t));
+ dbuf_prefetch(dn, level, blkid, pri, 0);
rw_exit(&dn->dn_struct_rwlock);
return;
}
@@ -571,18 +571,24 @@ dmu_prefetch(objset_t *os, uint64_t obje
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- if (dn->dn_datablkshift) {
- int blkshift = dn->dn_datablkshift;
- nblks = (P2ROUNDUP(offset + len, 1 << blkshift) -
- P2ALIGN(offset, 1 << blkshift)) >> blkshift;
+ /*
+ * offset + len - 1 is the last byte we want to prefetch for, and offset
+ * is the first. Then dbuf_whichblk(dn, level, off + len - 1) is the
+ * last block we want to prefetch, and dbuf_whichblock(dn, level,
+ * offset) is the first. Then the number we need to prefetch is the
+ * last - first + 1.
+ */
+ if (level > 0 || dn->dn_datablkshift != 0) {
+ nblks = dbuf_whichblock(dn, level, offset + len - 1) -
+ dbuf_whichblock(dn, level, offset) + 1;
} else {
nblks = (offset < dn->dn_datablksz);
}
if (nblks != 0) {
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, level, offset);
for (int i = 0; i < nblks; i++)
- dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_SYNC_READ);
+ dbuf_prefetch(dn, level, blkid + i, pri, 0);
}
rw_exit(&dn->dn_struct_rwlock);
@@ -1394,7 +1400,7 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uin
DB_DNODE_ENTER(dbuf);
dn = DB_DNODE(dbuf);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, 0, offset);
VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(dbuf);
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_diff.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -138,7 +138,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, cons
if (issig(JUSTLOOKING) && issig(FORREAL))
return (SET_ERROR(EINTR));
- if (zb->zb_object != DMU_META_DNODE_OBJECT)
+ if (bp == NULL || zb->zb_object != DMU_META_DNODE_OBJECT)
return (0);
if (BP_IS_HOLE(bp)) {
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -148,6 +148,11 @@ dmu_object_free(objset_t *os, uint64_t o
return (0);
}
+/*
+ * Return (in *objectp) the next object which is allocated (or a hole)
+ * after *object, taking into account only objects that may have been modified
+ * after the specified txg.
+ */
int
dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
{
Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c Sat Oct 3 07:59:25 2015 (r288570)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c Sat Oct 3 08:03:36 2015 (r288571)
@@ -53,6 +53,7 @@
#include <sys/blkptr.h>
#include <sys/dsl_bookmark.h>
#include <sys/zfeature.h>
+#include <sys/bqueue.h>
#ifdef __FreeBSD__
#undef dump_write
@@ -61,10 +62,34 @@
/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
int zfs_send_corrupt_data = B_FALSE;
+int zfs_send_queue_length = 16 * 1024 * 1024;
+int zfs_recv_queue_length = 16 * 1024 * 1024;
static char *dmu_recv_tag = "dmu_recv_tag";
static const char *recv_clone_name = "%recv";
+#define BP_SPAN(datablkszsec, indblkshift, level) \
+ (((uint64_t)datablkszsec) << (SPA_MINBLOCKSHIFT + \
+ (level) * (indblkshift - SPA_BLKPTRSHIFT)))
+
+struct send_thread_arg {
+ bqueue_t q;
+ dsl_dataset_t *ds; /* Dataset to traverse */
+ uint64_t fromtxg; /* Traverse from this txg */
+ int flags; /* flags to pass to traverse_dataset */
+ int error_code;
+ boolean_t cancel;
+};
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-10
mailing list