svn commit: r288568 - in stable/10: cddl/contrib/opensolaris/lib/libzfs_core/common sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys

Alexander Motin mav at FreeBSD.org
Sat Oct 3 07:57:34 UTC 2015


Author: mav
Date: Sat Oct  3 07:57:32 2015
New Revision: 288568
URL: https://svnweb.freebsd.org/changeset/base/288568

Log:
  MFC r286683: 5765 add support for estimating send stream size with
  lzc_send_space when source is a bookmark
  
  Reviewed by: Matthew Ahrens <mahrens at delphix.com>
  Reviewed by: Christopher Siden <christopher.siden at delphix.com>
  Reviewed by: Steven Hartland <killing at multiplay.co.uk>
  Reviewed by: Bayard Bell <buffer.g.overflow at gmail.com>
  Approved by: Albert Lee <trisk at nexenta.com>
  Author: Max Grossman <max.grossman at delphix.com>
  
  illumos/illumos-gate at 643da460c8ca583e39ce053081754e24087f84c8

Modified:
  stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
  stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
  stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h
  stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
  stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
==============================================================================
--- stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c	Sat Oct  3 07:55:37 2015	(r288567)
+++ stable/10/cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c	Sat Oct  3 07:57:32 2015	(r288568)
@@ -20,7 +20,7 @@
  */
 
 /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  */
 
@@ -532,18 +532,30 @@ lzc_send(const char *snapname, const cha
 }
 
 /*
- * If fromsnap is NULL, a full (non-incremental) stream will be estimated.
+ * "from" can be NULL, a snapshot, or a bookmark.
+ *
+ * If from is NULL, a full (non-incremental) stream will be estimated.  This
+ * is calculated very efficiently.
+ *
+ * If from is a snapshot, lzc_send_space uses the deadlists attached to
+ * each snapshot to efficiently estimate the stream size.
+ *
+ * If from is a bookmark, the indirect blocks in the destination snapshot
+ * are traversed, looking for blocks with a birth time since the creation TXG of
+ * the snapshot this bookmark was created from.  This will result in
+ * significantly more I/O and be less efficient than a send space estimation on
+ * an equivalent snapshot.
  */
 int
-lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
+lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
 {
 	nvlist_t *args;
 	nvlist_t *result;
 	int err;
 
 	args = fnvlist_alloc();
-	if (fromsnap != NULL)
-		fnvlist_add_string(args, "fromsnap", fromsnap);
+	if (from != NULL)
+		fnvlist_add_string(args, "from", from);
 	err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
 	nvlist_free(args);
 	if (err == 0)

Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	Sat Oct  3 07:55:37 2015	(r288567)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	Sat Oct  3 07:57:32 2015	(r288568)
@@ -856,6 +856,40 @@ dmu_send(const char *tosnap, const char 
 	return (err);
 }
 
+static int
+dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
+    uint64_t *sizep)
+{
+	int err;
+	/*
+	 * Assume that space (both on-disk and in-stream) is dominated by
+	 * data.  We will adjust for indirect blocks and the copies property,
+	 * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
+	 */
+
+	/*
+	 * Subtract out approximate space used by indirect blocks.
+	 * Assume most space is used by data blocks (non-indirect, non-dnode).
+	 * Assume all blocks are recordsize.  Assume ditto blocks and
+	 * internal fragmentation counter out compression.
+	 *
+	 * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
+	 * block, which we observe in practice.
+	 */
+	uint64_t recordsize;
+	err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
+	if (err != 0)
+		return (err);
+	size -= size / recordsize * sizeof (blkptr_t);
+
+	/* Add in the space for the record associated with each block. */
+	size += size / recordsize * sizeof (dmu_replay_record_t);
+
+	*sizep = size;
+
+	return (0);
+}
+
 int
 dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
 {
@@ -891,33 +925,61 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl
 			return (err);
 	}
 
-	/*
-	 * Assume that space (both on-disk and in-stream) is dominated by
-	 * data.  We will adjust for indirect blocks and the copies property,
-	 * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
-	 */
+	err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
+	return (err);
+}
+
+/*
+ * Simple callback used to traverse the blocks of a snapshot and sum their
+ * uncompressed size
+ */
+/* ARGSUSED */
+static int
+dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+    const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
+{
+	uint64_t *spaceptr = arg;
+	if (bp != NULL && !BP_IS_HOLE(bp)) {
+		*spaceptr += BP_GET_UCSIZE(bp);
+	}
+	return (0);
+}
+
+/*
+ * Given a desination snapshot and a TXG, calculate the approximate size of a
+ * send stream sent from that TXG. from_txg may be zero, indicating that the
+ * whole snapshot will be sent.
+ */
+int
+dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
+    uint64_t *sizep)
+{
+	dsl_pool_t *dp = ds->ds_dir->dd_pool;
+	int err;
+	uint64_t size = 0;
+
+	ASSERT(dsl_pool_config_held(dp));
+
+	/* tosnap must be a snapshot */
+	if (!dsl_dataset_is_snapshot(ds))
+		return (SET_ERROR(EINVAL));
+
+	/* verify that from_txg is before the provided snapshot was taken */
+	if (from_txg >= dsl_dataset_phys(ds)->ds_creation_txg) {
+		return (SET_ERROR(EXDEV));
+	}
 
 	/*
-	 * Subtract out approximate space used by indirect blocks.
-	 * Assume most space is used by data blocks (non-indirect, non-dnode).
-	 * Assume all blocks are recordsize.  Assume ditto blocks and
-	 * internal fragmentation counter out compression.
-	 *
-	 * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
-	 * block, which we observe in practice.
+	 * traverse the blocks of the snapshot with birth times after
+	 * from_txg, summing their uncompressed size
 	 */
-	uint64_t recordsize;
-	err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
-	if (err != 0)
+	err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
+	    dmu_calculate_send_traversal, &size);
+	if (err)
 		return (err);
-	size -= size / recordsize * sizeof (blkptr_t);
-
-	/* Add in the space for the record associated with each block. */
-	size += size / recordsize * sizeof (dmu_replay_record_t);
-
-	*sizep = size;
 
-	return (0);
+	err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
+	return (err);
 }
 
 typedef struct dmu_recv_begin_arg {

Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h	Sat Oct  3 07:55:37 2015	(r288567)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_send.h	Sat Oct  3 07:57:32 2015	(r288568)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  */
@@ -45,6 +45,8 @@ int dmu_send(const char *tosnap, const c
 #endif
 int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
     uint64_t *sizep);
+int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
+    uint64_t *sizep);
 int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
     boolean_t embedok, boolean_t large_block_ok,
 #ifdef illumos

Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h	Sat Oct  3 07:55:37 2015	(r288567)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dsl_dataset.h	Sat Oct  3 07:57:32 2015	(r288568)
@@ -201,6 +201,9 @@ dsl_dataset_phys(dsl_dataset_t *ds)
  */
 #define	MAX_TAG_PREFIX_LEN	17
 
+#define	dsl_dataset_is_snapshot(ds) \
+	(dsl_dataset_phys(ds)->ds_num_children != 0)
+
 #define	DS_UNIQUE_IS_ACCURATE(ds)	\
 	((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)
 

Modified: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
==============================================================================
--- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c	Sat Oct  3 07:55:37 2015	(r288567)
+++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c	Sat Oct  3 07:57:32 2015	(r288568)
@@ -5488,7 +5488,8 @@ zfs_ioc_send_new(const char *snapname, n
  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
  *
  * innvl: {
- *     (optional) "fromsnap" -> full snap name to send an incremental from
+ *     (optional) "from" -> full snap or bookmark name to send an incremental
+ *                          from
  * }
  *
  * outnvl: {
@@ -5499,7 +5500,6 @@ static int
 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
 {
 	dsl_pool_t *dp;
-	dsl_dataset_t *fromsnap = NULL;
 	dsl_dataset_t *tosnap;
 	int error;
 	char *fromname;
@@ -5515,27 +5515,55 @@ zfs_ioc_send_space(const char *snapname,
 		return (error);
 	}
 
-	error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
+	error = nvlist_lookup_string(innvl, "from", &fromname);
 	if (error == 0) {
-		error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
-		if (error != 0) {
-			dsl_dataset_rele(tosnap, FTAG);
-			dsl_pool_rele(dp, FTAG);
-			return (error);
+		if (strchr(fromname, '@') != NULL) {
+			/*
+			 * If from is a snapshot, hold it and use the more
+			 * efficient dmu_send_estimate to estimate send space
+			 * size using deadlists.
+			 */
+			dsl_dataset_t *fromsnap;
+			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
+			if (error != 0)
+				goto out;
+			error = dmu_send_estimate(tosnap, fromsnap, &space);
+			dsl_dataset_rele(fromsnap, FTAG);
+		} else if (strchr(fromname, '#') != NULL) {
+			/*
+			 * If from is a bookmark, fetch the creation TXG of the
+			 * snapshot it was created from and use that to find
+			 * blocks that were born after it.
+			 */
+			zfs_bookmark_phys_t frombm;
+
+			error = dsl_bookmark_lookup(dp, fromname, tosnap,
+			    &frombm);
+			if (error != 0)
+				goto out;
+			error = dmu_send_estimate_from_txg(tosnap,
+			    frombm.zbm_creation_txg, &space);
+		} else {
+			/*
+			 * from is not properly formatted as a snapshot or
+			 * bookmark
+			 */
+			error = SET_ERROR(EINVAL);
+			goto out;
 		}
+	} else {
+		// If estimating the size of a full send, use dmu_send_estimate
+		error = dmu_send_estimate(tosnap, NULL, &space);
 	}
 
-	error = dmu_send_estimate(tosnap, fromsnap, &space);
 	fnvlist_add_uint64(outnvl, "space", space);
 
-	if (fromsnap != NULL)
-		dsl_dataset_rele(fromsnap, FTAG);
+out:
 	dsl_dataset_rele(tosnap, FTAG);
 	dsl_pool_rele(dp, FTAG);
 	return (error);
 }
 
-
 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
 
 static void


More information about the svn-src-stable-10 mailing list