svn commit: r286587 - in head: cddl/contrib/opensolaris/cmd/zstreamdump cddl/contrib/opensolaris/lib/libzfs/common sys/cddl/contrib/opensolaris/uts/common/fs/zfs sys/cddl/contrib/opensolaris/uts/co...

Alexander Motin mav at FreeBSD.org
Mon Aug 10 19:33:00 UTC 2015


Author: mav
Date: Mon Aug 10 19:32:58 2015
New Revision: 286587
URL: https://svnweb.freebsd.org/changeset/base/286587

Log:
  MFV 286586: 5746 more checksumming in zfs send
  
  Reviewed by: Christopher Siden <christopher.siden at delphix.com>
  Reviewed by: George Wilson <george.wilson at delphix.com>
  Reviewed by: Bayard Bell <buffer.g.overflow at gmail.com>
  Approved by: Albert Lee <trisk at omniti.com>
  Author: Matthew Ahrens <mahrens at delphix.com>
  
  illumos/illumos-gate at 98110f08fa182032082d98be2ddb9391fcd62bf1

Modified:
  head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
  head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio_checksum.h

Modified: head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
==============================================================================
--- head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c	Mon Aug 10 19:32:30 2015	(r286586)
+++ head/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c	Mon Aug 10 19:32:58 2015	(r286587)
@@ -25,7 +25,7 @@
  */
 
 /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
  */
 
 #include <ctype.h>
@@ -34,6 +34,7 @@
 #include <stdlib.h>
 #include <strings.h>
 #include <unistd.h>
+#include <stddef.h>
 
 #include <sys/dmu.h>
 #include <sys/zfs_ioctl.h>
@@ -83,7 +84,6 @@ safe_malloc(size_t size)
  *
  * Read while computing incremental checksum
  */
-
 static size_t
 ssread(void *buf, size_t len, zio_cksum_t *cksum)
 {
@@ -92,7 +92,7 @@ ssread(void *buf, size_t len, zio_cksum_
 	if ((outlen = fread(buf, len, 1, send_stream)) == 0)
 		return (0);
 
-	if (do_cksum && cksum) {
+	if (do_cksum) {
 		if (do_byteswap)
 			fletcher_4_incremental_byteswap(buf, len, cksum);
 		else
@@ -102,6 +102,34 @@ ssread(void *buf, size_t len, zio_cksum_
 	return (outlen);
 }
 
+static size_t
+read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum)
+{
+	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+	size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum);
+	if (r == 0)
+		return (0);
+	zio_cksum_t saved_cksum = *cksum;
+	r = ssread(&drr->drr_u.drr_checksum.drr_checksum,
+	    sizeof (zio_cksum_t), cksum);
+	if (r == 0)
+		return (0);
+	if (!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) &&
+	    !ZIO_CHECKSUM_EQUAL(saved_cksum,
+	    drr->drr_u.drr_checksum.drr_checksum)) {
+		fprintf(stderr, "invalid checksum\n");
+		(void) printf("Incorrect checksum in record header.\n");
+		(void) printf("Expected checksum = %llx/%llx/%llx/%llx\n",
+		    saved_cksum.zc_word[0],
+		    saved_cksum.zc_word[1],
+		    saved_cksum.zc_word[2],
+		    saved_cksum.zc_word[3]);
+		exit(1);
+	}
+	return (sizeof (*drr));
+}
+
 /*
  * Print part of a block in ASCII characters
  */
@@ -183,8 +211,10 @@ main(int argc, char *argv[])
 	struct drr_free *drrf = &thedrr.drr_u.drr_free;
 	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
 	struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
+	struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
 	char c;
 	boolean_t verbose = B_FALSE;
+	boolean_t very_verbose = B_FALSE;
 	boolean_t first = B_TRUE;
 	/*
 	 * dump flag controls whether the contents of any modified data blocks
@@ -202,11 +232,14 @@ main(int argc, char *argv[])
 			do_cksum = B_FALSE;
 			break;
 		case 'v':
+			if (verbose)
+				very_verbose = B_TRUE;
 			verbose = B_TRUE;
 			break;
 		case 'd':
 			dump = B_TRUE;
 			verbose = B_TRUE;
+			very_verbose = B_TRUE;
 			break;
 		case ':':
 			(void) fprintf(stderr,
@@ -230,7 +263,7 @@ main(int argc, char *argv[])
 
 	send_stream = stdin;
 	pcksum = zc;
-	while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) {
+	while (read_hdr(drr, &zc)) {
 
 		/*
 		 * If this is the first DMU record being processed, check for
@@ -432,7 +465,7 @@ main(int argc, char *argv[])
 			if (verbose) {
 				(void) printf("WRITE object = %llu type = %u "
 				    "checksum type = %u\n"
-				    "offset = %llu length = %llu "
+				    "    offset = %llu length = %llu "
 				    "props = %llx\n",
 				    (u_longlong_t)drrw->drr_object,
 				    drrw->drr_type,
@@ -476,9 +509,9 @@ main(int argc, char *argv[])
 			if (verbose) {
 				(void) printf("WRITE_BYREF object = %llu "
 				    "checksum type = %u props = %llx\n"
-				    "offset = %llu length = %llu\n"
+				    "    offset = %llu length = %llu\n"
 				    "toguid = %llx refguid = %llx\n"
-				    "refobject = %llu refoffset = %llu\n",
+				    "    refobject = %llu refoffset = %llu\n",
 				    (u_longlong_t)drrwbr->drr_object,
 				    drrwbr->drr_checksumtype,
 				    (u_longlong_t)drrwbr->drr_key.ddk_prop,
@@ -538,7 +571,7 @@ main(int argc, char *argv[])
 			if (verbose) {
 				(void) printf("WRITE_EMBEDDED object = %llu "
 				    "offset = %llu length = %llu\n"
-				    "toguid = %llx comp = %u etype = %u "
+				    "    toguid = %llx comp = %u etype = %u "
 				    "lsize = %u psize = %u\n",
 				    (u_longlong_t)drrwe->drr_object,
 				    (u_longlong_t)drrwe->drr_offset,
@@ -553,6 +586,13 @@ main(int argc, char *argv[])
 			    P2ROUNDUP(drrwe->drr_psize, 8), &zc);
 			break;
 		}
+		if (drr->drr_type != DRR_BEGIN && very_verbose) {
+			(void) printf("    checksum = %llx/%llx/%llx/%llx\n",
+			    (longlong_t)drrc->drr_checksum.zc_word[0],
+			    (longlong_t)drrc->drr_checksum.zc_word[1],
+			    (longlong_t)drrc->drr_checksum.zc_word[2],
+			    (longlong_t)drrc->drr_checksum.zc_word[3]);
+		}
 		pcksum = zc;
 	}
 	free(buf);

Modified: head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c
==============================================================================
--- head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c	Mon Aug 10 19:32:30 2015	(r286586)
+++ head/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_sendrecv.c	Mon Aug 10 19:32:58 2015	(r286587)
@@ -188,10 +188,28 @@ ddt_update(libzfs_handle_t *hdl, dedup_t
 }
 
 static int
-cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
+dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
+    zio_cksum_t *zc, int outfd)
 {
-	fletcher_4_incremental_native(buf, len, zc);
-	return (write(outfd, buf, len));
+	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+	fletcher_4_incremental_native(drr,
+	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
+	if (drr->drr_type != DRR_BEGIN) {
+		ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
+		    drr_checksum.drr_checksum));
+		drr->drr_u.drr_checksum.drr_checksum = *zc;
+	}
+	fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
+	    sizeof (zio_cksum_t), zc);
+	if (write(outfd, drr, sizeof (*drr)) == -1)
+		return (errno);
+	if (payload_len != 0) {
+		fletcher_4_incremental_native(payload, payload_len, zc);
+		if (write(outfd, payload, payload_len) == -1)
+			return (errno);
+	}
+	return (0);
 }
 
 /*
@@ -218,26 +236,18 @@ cksummer(void *arg)
 	char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
 	dmu_replay_record_t thedrr;
 	dmu_replay_record_t *drr = &thedrr;
-	struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
-	struct drr_end *drre = &thedrr.drr_u.drr_end;
-	struct drr_object *drro = &thedrr.drr_u.drr_object;
-	struct drr_write *drrw = &thedrr.drr_u.drr_write;
-	struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
-	struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
 	FILE *ofp;
 	int outfd;
-	dmu_replay_record_t wbr_drr = {0};
-	struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
 	dedup_table_t ddt;
 	zio_cksum_t stream_cksum;
 	uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
 	uint64_t numbuckets;
 
 	ddt.max_ddt_size =
-	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
-	    SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
+	    MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
+	    SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
 
-	numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
+	numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
 
 	/*
 	 * numbuckets must be a power of 2.  Increase number to
@@ -253,32 +263,29 @@ cksummer(void *arg)
 	ddt.numhashbits = high_order_bit(numbuckets) - 1;
 	ddt.ddt_full = B_FALSE;
 
-	/* Initialize the write-by-reference block. */
-	wbr_drr.drr_type = DRR_WRITE_BYREF;
-	wbr_drr.drr_payloadlen = 0;
-
 	outfd = dda->outputfd;
 	ofp = fdopen(dda->inputfd, "r");
-	while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
+	while (ssread(drr, sizeof (*drr), ofp) != 0) {
 
 		switch (drr->drr_type) {
 		case DRR_BEGIN:
 		{
-			int	fflags;
+			struct drr_begin *drrb = &drr->drr_u.drr_begin;
+			int fflags;
+			int sz = 0;
 			ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
 
+			ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+
 			/* set the DEDUP feature flag for this stream */
 			fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 			fflags |= (DMU_BACKUP_FEATURE_DEDUP |
 			    DMU_BACKUP_FEATURE_DEDUPPROPS);
 			DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
 
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
-				goto out;
 			if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 			    DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
-				int sz = drr->drr_payloadlen;
+				sz = drr->drr_payloadlen;
 
 				if (sz > SPA_MAXBLOCKSIZE) {
 					buf = zfs_realloc(dda->dedup_hdl, buf,
@@ -287,64 +294,60 @@ cksummer(void *arg)
 				(void) ssread(buf, sz, ofp);
 				if (ferror(stdin))
 					perror("fread");
-				if (cksum_and_write(buf, sz, &stream_cksum,
-				    outfd) == -1)
-					goto out;
 			}
+			if (dump_record(drr, buf, sz, &stream_cksum,
+			    outfd) != 0)
+				goto out;
 			break;
 		}
 
 		case DRR_END:
 		{
+			struct drr_end *drre = &drr->drr_u.drr_end;
 			/* use the recalculated checksum */
-			ZIO_SET_CHECKSUM(&drre->drr_checksum,
-			    stream_cksum.zc_word[0], stream_cksum.zc_word[1],
-			    stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
-			if ((write(outfd, drr,
-			    sizeof (dmu_replay_record_t))) == -1)
+			drre->drr_checksum = stream_cksum;
+			if (dump_record(drr, NULL, 0, &stream_cksum,
+			    outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_OBJECT:
 		{
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
-				goto out;
+			struct drr_object *drro = &drr->drr_u.drr_object;
 			if (drro->drr_bonuslen > 0) {
 				(void) ssread(buf,
 				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 				    ofp);
-				if (cksum_and_write(buf,
-				    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
-				    &stream_cksum, outfd) == -1)
-					goto out;
 			}
+			if (dump_record(drr, buf,
+			    P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
+			    &stream_cksum, outfd) != 0)
+				goto out;
 			break;
 		}
 
 		case DRR_SPILL:
 		{
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
-				goto out;
+			struct drr_spill *drrs = &drr->drr_u.drr_spill;
 			(void) ssread(buf, drrs->drr_length, ofp);
-			if (cksum_and_write(buf, drrs->drr_length,
-			    &stream_cksum, outfd) == -1)
+			if (dump_record(drr, buf, drrs->drr_length,
+			    &stream_cksum, outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_FREEOBJECTS:
 		{
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
+			if (dump_record(drr, NULL, 0, &stream_cksum,
+			    outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_WRITE:
 		{
+			struct drr_write *drrw = &drr->drr_u.drr_write;
 			dataref_t	dataref;
 
 			(void) ssread(buf, drrw->drr_length, ofp);
@@ -382,7 +385,13 @@ cksummer(void *arg)
 			if (ddt_update(dda->dedup_hdl, &ddt,
 			    &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
 			    &dataref)) {
+				dmu_replay_record_t wbr_drr = {0};
+				struct drr_write_byref *wbr_drrr =
+				    &wbr_drr.drr_u.drr_write_byref;
+
 				/* block already present in stream */
+				wbr_drr.drr_type = DRR_WRITE_BYREF;
+
 				wbr_drrr->drr_object = drrw->drr_object;
 				wbr_drrr->drr_offset = drrw->drr_offset;
 				wbr_drrr->drr_length = drrw->drr_length;
@@ -402,19 +411,13 @@ cksummer(void *arg)
 				wbr_drrr->drr_key.ddk_prop =
 				    drrw->drr_key.ddk_prop;
 
-				if (cksum_and_write(&wbr_drr,
-				    sizeof (dmu_replay_record_t), &stream_cksum,
-				    outfd) == -1)
+				if (dump_record(&wbr_drr, NULL, 0,
+				    &stream_cksum, outfd) != 0)
 					goto out;
 			} else {
 				/* block not previously seen */
-				if (cksum_and_write(drr,
-				    sizeof (dmu_replay_record_t), &stream_cksum,
-				    outfd) == -1)
-					goto out;
-				if (cksum_and_write(buf,
-				    drrw->drr_length,
-				    &stream_cksum, outfd) == -1)
+				if (dump_record(drr, buf, drrw->drr_length,
+				    &stream_cksum, outfd) != 0)
 					goto out;
 			}
 			break;
@@ -422,28 +425,27 @@ cksummer(void *arg)
 
 		case DRR_WRITE_EMBEDDED:
 		{
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
-				goto out;
+			struct drr_write_embedded *drrwe =
+			    &drr->drr_u.drr_write_embedded;
 			(void) ssread(buf,
 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
-			if (cksum_and_write(buf,
+			if (dump_record(drr, buf,
 			    P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
-			    &stream_cksum, outfd) == -1)
+			    &stream_cksum, outfd) != 0)
 				goto out;
 			break;
 		}
 
 		case DRR_FREE:
 		{
-			if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
-			    &stream_cksum, outfd) == -1)
+			if (dump_record(drr, NULL, 0, &stream_cksum,
+			    outfd) != 0)
 				goto out;
 			break;
 		}
 
 		default:
-			(void) printf("INVALID record type 0x%x\n",
+			(void) fprintf(stderr, "INVALID record type 0x%x\n",
 			    drr->drr_type);
 			/* should never happen, so assert */
 			assert(B_FALSE);
@@ -1470,18 +1472,11 @@ zfs_send(zfs_handle_t *zhp, const char *
 			    sizeof (drr.drr_u.drr_begin.drr_toname),
 			    "%s@%s", zhp->zfs_name, tosnap);
 			drr.drr_payloadlen = buflen;
-			err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
 
-			/* write header nvlist */
-			if (err != -1 && packbuf != NULL) {
-				err = cksum_and_write(packbuf, buflen, &zc,
-				    outfd);
-			}
+			err = dump_record(&drr, packbuf, buflen, &zc, outfd);
 			free(packbuf);
-			if (err == -1) {
-				err = errno;
+			if (err != 0)
 				goto stderr_out;
-			}
 
 			/* write end record */
 			bzero(&drr, sizeof (drr));
@@ -1714,6 +1709,8 @@ recv_read(libzfs_handle_t *hdl, int fd, 
 	int rv;
 	int len = ilen;
 
+	assert(ilen <= SPA_MAXBLOCKSIZE);
+
 	do {
 		rv = read(fd, cp, len);
 		cp += rv;

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	Mon Aug 10 19:32:30 2015	(r286586)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	Mon Aug 10 19:32:58 2015	(r286587)
@@ -73,7 +73,6 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf
 	struct iovec aiov;
 	ASSERT0(len % 8);
 
-	fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
 	aiov.iov_base = buf;
 	aiov.iov_len = len;
 	auio.uio_iov = &aiov;
@@ -99,6 +98,38 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf
 	return (dsp->dsa_err);
 }
 
+/*
+ * For all record types except BEGIN, fill in the checksum (overlaid in
+ * drr_u.drr_checksum.drr_checksum).  The checksum verifies everything
+ * up to the start of the checksum itself.
+ */
+static int
+dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
+{
+	ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+	fletcher_4_incremental_native(dsp->dsa_drr,
+	    offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+	    &dsp->dsa_zc);
+	if (dsp->dsa_drr->drr_type != DRR_BEGIN) {
+		ASSERT(ZIO_CHECKSUM_IS_ZERO(&dsp->dsa_drr->drr_u.
+		    drr_checksum.drr_checksum));
+		dsp->dsa_drr->drr_u.drr_checksum.drr_checksum = dsp->dsa_zc;
+	}
+	fletcher_4_incremental_native(&dsp->dsa_drr->
+	    drr_u.drr_checksum.drr_checksum,
+	    sizeof (zio_cksum_t), &dsp->dsa_zc);
+	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
+		return (SET_ERROR(EINTR));
+	if (payload_len != 0) {
+		fletcher_4_incremental_native(payload, payload_len,
+		    &dsp->dsa_zc);
+		if (dump_bytes(dsp, payload, payload_len) != 0)
+			return (SET_ERROR(EINTR));
+	}
+	return (0);
+}
+
 static int
 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
     uint64_t length)
@@ -143,8 +174,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o
 	 */
 	if (dsp->dsa_pending_op != PENDING_NONE &&
 	    dsp->dsa_pending_op != PENDING_FREE) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
@@ -167,8 +197,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o
 			return (0);
 		} else {
 			/* not a continuation.  Push out pending record */
-			if (dump_bytes(dsp, dsp->dsa_drr,
-			    sizeof (dmu_replay_record_t)) != 0)
+			if (dump_record(dsp, NULL, 0) != 0)
 				return (SET_ERROR(EINTR));
 			dsp->dsa_pending_op = PENDING_NONE;
 		}
@@ -181,8 +210,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t o
 	drrf->drr_length = length;
 	drrf->drr_toguid = dsp->dsa_toguid;
 	if (length == -1ULL) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 	} else {
 		dsp->dsa_pending_op = PENDING_FREE;
@@ -214,12 +242,11 @@ dump_write(dmu_sendarg_t *dsp, dmu_objec
 	 * of different types.
 	 */
 	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
-	/* write a DATA record */
+	/* write a WRITE record */
 	bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 	dsp->dsa_drr->drr_type = DRR_WRITE;
 	drrw->drr_object = object;
@@ -245,9 +272,7 @@ dump_write(dmu_sendarg_t *dsp, dmu_objec
 		drrw->drr_key.ddk_cksum = bp->blk_cksum;
 	}
 
-	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-		return (SET_ERROR(EINTR));
-	if (dump_bytes(dsp, data, blksz) != 0)
+	if (dump_record(dsp, data, blksz) != 0)
 		return (SET_ERROR(EINTR));
 	return (0);
 }
@@ -261,8 +286,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, 
 	    &(dsp->dsa_drr->drr_u.drr_write_embedded);
 
 	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (EINTR);
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
@@ -282,9 +306,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, 
 
 	decode_embedded_bp_compressed(bp, buf);
 
-	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-		return (EINTR);
-	if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
+	if (dump_record(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
 		return (EINTR);
 	return (0);
 }
@@ -295,8 +317,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t 
 	struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
 
 	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
@@ -308,9 +329,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t 
 	drrs->drr_length = blksz;
 	drrs->drr_toguid = dsp->dsa_toguid;
 
-	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
-		return (SET_ERROR(EINTR));
-	if (dump_bytes(dsp, data, blksz))
+	if (dump_record(dsp, data, blksz) != 0)
 		return (SET_ERROR(EINTR));
 	return (0);
 }
@@ -333,8 +352,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uin
 	 */
 	if (dsp->dsa_pending_op != PENDING_NONE &&
 	    dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
@@ -348,8 +366,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uin
 			return (0);
 		} else {
 			/* can't be aggregated.  Push out pending record */
-			if (dump_bytes(dsp, dsp->dsa_drr,
-			    sizeof (dmu_replay_record_t)) != 0)
+			if (dump_record(dsp, NULL, 0) != 0)
 				return (SET_ERROR(EINTR));
 			dsp->dsa_pending_op = PENDING_NONE;
 		}
@@ -376,8 +393,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t 
 		return (dump_freeobjects(dsp, object, 1));
 
 	if (dsp->dsa_pending_op != PENDING_NONE) {
-		if (dump_bytes(dsp, dsp->dsa_drr,
-		    sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			return (SET_ERROR(EINTR));
 		dsp->dsa_pending_op = PENDING_NONE;
 	}
@@ -398,11 +414,10 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t 
 	    drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
 		drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
 
-	if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
-		return (SET_ERROR(EINTR));
-
-	if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
+	if (dump_record(dsp, DN_BONUS(dnp),
+	    P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) {
 		return (SET_ERROR(EINTR));
+	}
 
 	/* Free anything past the end of the file. */
 	if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
@@ -651,7 +666,6 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
 	dsp->dsa_os = os;
 	dsp->dsa_off = off;
 	dsp->dsa_toguid = dsl_dataset_phys(ds)->ds_guid;
-	ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
 	dsp->dsa_pending_op = PENDING_NONE;
 	dsp->dsa_incremental = (fromzb != NULL);
 	dsp->dsa_featureflags = featureflags;
@@ -663,7 +677,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
 	dsl_dataset_long_hold(ds, FTAG);
 	dsl_pool_rele(dp, tag);
 
-	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+	if (dump_record(dsp, NULL, 0) != 0) {
 		err = dsp->dsa_err;
 		goto out;
 	}
@@ -672,7 +686,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
 	    backup_cb, dsp);
 
 	if (dsp->dsa_pending_op != PENDING_NONE)
-		if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
+		if (dump_record(dsp, NULL, 0) != 0)
 			err = SET_ERROR(EINTR);
 
 	if (err != 0) {
@@ -686,7 +700,7 @@ dmu_send_impl(void *tag, dsl_pool_t *dp,
 	drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
 	drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
 
-	if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+	if (dump_record(dsp, NULL, 0) != 0) {
 		err = dsp->dsa_err;
 		goto out;
 	}
@@ -1251,14 +1265,20 @@ dmu_recv_begin(char *tofs, char *tosnap,
 }
 
 struct restorearg {
+	objset_t *os;
 	int err;
 	boolean_t byteswap;
 	kthread_t *td;
 	struct file *fp;
-	char *buf;
 	uint64_t voff;
 	int bufsize; /* amount of memory allocated for buf */
+
+	dmu_replay_record_t *drr;
+	dmu_replay_record_t *next_drr;
+	char *buf;
 	zio_cksum_t cksum;
+	zio_cksum_t prev_cksum;
+
 	avl_tree_t *guid_to_ds_map;
 };
 
@@ -1323,14 +1343,11 @@ restore_bytes(struct restorearg *ra, voi
 	return (error);
 }
 
-static void *
-restore_read(struct restorearg *ra, int len, char *buf)
+static int
+restore_read(struct restorearg *ra, int len, void *buf)
 {
 	int done = 0;
 
-	if (buf == NULL)
-		buf = ra->buf;
-
 	/* some things will require 8-byte alignment, so everything must */
 	ASSERT0(len % 8);
 	ASSERT3U(len, <=, ra->bufsize);
@@ -1346,24 +1363,21 @@ restore_read(struct restorearg *ra, int 
 		ra->voff += len - done - resid;
 		done = len - resid;
 		if (ra->err != 0)
-			return (NULL);
+			return (ra->err);
 	}
 
 	ASSERT3U(done, ==, len);
-	if (ra->byteswap)
-		fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
-	else
-		fletcher_4_incremental_native(buf, len, &ra->cksum);
-	return (buf);
+	return (0);
 }
 
 static void
-backup_byteswap(dmu_replay_record_t *drr)
+byteswap_record(dmu_replay_record_t *drr)
 {
 #define	DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
 #define	DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
 	drr->drr_type = BSWAP_32(drr->drr_type);
 	drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
+
 	switch (drr->drr_type) {
 	case DRR_BEGIN:
 		DO64(drr_begin.drr_magic);
@@ -1393,10 +1407,7 @@ backup_byteswap(dmu_replay_record_t *drr
 		DO64(drr_write.drr_offset);
 		DO64(drr_write.drr_length);
 		DO64(drr_write.drr_toguid);
-		DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
-		DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
-		DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
-		DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write.drr_key.ddk_cksum);
 		DO64(drr_write.drr_key.ddk_prop);
 		break;
 	case DRR_WRITE_BYREF:
@@ -1407,10 +1418,8 @@ backup_byteswap(dmu_replay_record_t *drr
 		DO64(drr_write_byref.drr_refguid);
 		DO64(drr_write_byref.drr_refobject);
 		DO64(drr_write_byref.drr_refoffset);
-		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
-		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
-		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
-		DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write_byref.
+		    drr_key.ddk_cksum);
 		DO64(drr_write_byref.drr_key.ddk_prop);
 		break;
 	case DRR_WRITE_EMBEDDED:
@@ -1433,13 +1442,15 @@ backup_byteswap(dmu_replay_record_t *drr
 		DO64(drr_spill.drr_toguid);
 		break;
 	case DRR_END:
-		DO64(drr_end.drr_checksum.zc_word[0]);
-		DO64(drr_end.drr_checksum.zc_word[1]);
-		DO64(drr_end.drr_checksum.zc_word[2]);
-		DO64(drr_end.drr_checksum.zc_word[3]);
 		DO64(drr_end.drr_toguid);
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_end.drr_checksum);
 		break;
 	}
+
+	if (drr->drr_type != DRR_BEGIN) {
+		ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_checksum.drr_checksum);
+	}
+
 #undef DO64
 #undef DO32
 }
@@ -1456,11 +1467,10 @@ deduce_nblkptr(dmu_object_type_t bonus_t
 }
 
 static int
-restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
+restore_object(struct restorearg *ra, struct drr_object *drro, void *data)
 {
 	dmu_object_info_t doi;
 	dmu_tx_t *tx;
-	void *data = NULL;
 	uint64_t object;
 	int err;
 
@@ -1471,23 +1481,17 @@ restore_object(struct restorearg *ra, ob
 	    drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
 	    P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
 	    drro->drr_blksz < SPA_MINBLOCKSIZE ||
-	    drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) ||
+	    drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(ra->os)) ||
 	    drro->drr_bonuslen > DN_MAX_BONUSLEN) {
 		return (SET_ERROR(EINVAL));
 	}
 
-	err = dmu_object_info(os, drro->drr_object, &doi);
+	err = dmu_object_info(ra->os, drro->drr_object, &doi);
 
 	if (err != 0 && err != ENOENT)
 		return (SET_ERROR(EINVAL));
 	object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
 
-	if (drro->drr_bonuslen) {
-		data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL);
-		if (ra->err != 0)
-			return (ra->err);
-	}
-
 	/*
 	 * If we are losing blkptrs or changing the block size this must
 	 * be a new file instance.  We must clear out the previous file
@@ -1501,14 +1505,14 @@ restore_object(struct restorearg *ra, ob
 
 		if (drro->drr_blksz != doi.doi_data_block_size ||
 		    nblkptr < doi.doi_nblkptr) {
-			err = dmu_free_long_range(os, drro->drr_object,
+			err = dmu_free_long_range(ra->os, drro->drr_object,
 			    0, DMU_OBJECT_END);
 			if (err != 0)
 				return (SET_ERROR(EINVAL));
 		}
 	}
 
-	tx = dmu_tx_create(os);
+	tx = dmu_tx_create(ra->os);
 	dmu_tx_hold_bonus(tx, object);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
@@ -1518,7 +1522,7 @@ restore_object(struct restorearg *ra, ob
 
 	if (object == DMU_NEW_OBJECT) {
 		/* currently free, want to be allocated */
-		err = dmu_object_claim(os, drro->drr_object,
+		err = dmu_object_claim(ra->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen, tx);
 	} else if (drro->drr_type != doi.doi_type ||
@@ -1526,7 +1530,7 @@ restore_object(struct restorearg *ra, ob
 	    drro->drr_bonustype != doi.doi_bonus_type ||
 	    drro->drr_bonuslen != doi.doi_bonus_size) {
 		/* currently allocated, but with different properties */
-		err = dmu_object_reclaim(os, drro->drr_object,
+		err = dmu_object_reclaim(ra->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen, tx);
 	}
@@ -1535,14 +1539,15 @@ restore_object(struct restorearg *ra, ob
 		return (SET_ERROR(EINVAL));
 	}
 
-	dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
-	    tx);
-	dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
+	dmu_object_set_checksum(ra->os, drro->drr_object,
+	    drro->drr_checksumtype, tx);
+	dmu_object_set_compress(ra->os, drro->drr_object,
+	    drro->drr_compress, tx);
 
 	if (data != NULL) {
 		dmu_buf_t *db;
 
-		VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
+		VERIFY0(dmu_bonus_hold(ra->os, drro->drr_object, FTAG, &db));
 		dmu_buf_will_dirty(db, tx);
 
 		ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
@@ -1561,7 +1566,7 @@ restore_object(struct restorearg *ra, ob
 
 /* ARGSUSED */
 static int
-restore_freeobjects(struct restorearg *ra, objset_t *os,
+restore_freeobjects(struct restorearg *ra,
     struct drr_freeobjects *drrfo)
 {
 	uint64_t obj;
@@ -1571,13 +1576,13 @@ restore_freeobjects(struct restorearg *r
 
 	for (obj = drrfo->drr_firstobj;
 	    obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
-	    (void) dmu_object_next(os, &obj, FALSE, 0)) {
+	    (void) dmu_object_next(ra->os, &obj, FALSE, 0)) {
 		int err;
 
-		if (dmu_object_info(os, obj, NULL) != 0)
+		if (dmu_object_info(ra->os, obj, NULL) != 0)
 			continue;
 
-		err = dmu_free_long_object(os, obj);
+		err = dmu_free_long_object(ra->os, obj);
 		if (err != 0)
 			return (err);
 	}
@@ -1585,49 +1590,37 @@ restore_freeobjects(struct restorearg *r
 }
 
 static int
-restore_write(struct restorearg *ra, objset_t *os,
-    struct drr_write *drrw)
+restore_write(struct restorearg *ra, struct drr_write *drrw, arc_buf_t *abuf)
 {
 	dmu_tx_t *tx;
-	void *data;
 	int err;
 
 	if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
 	    !DMU_OT_IS_VALID(drrw->drr_type))
 		return (SET_ERROR(EINVAL));
 
-	if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
+	if (dmu_object_info(ra->os, drrw->drr_object, NULL) != 0)
 		return (SET_ERROR(EINVAL));
 
-	dmu_buf_t *bonus;
-	if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0)
-		return (SET_ERROR(EINVAL));
-
-	arc_buf_t *abuf = dmu_request_arcbuf(bonus, drrw->drr_length);
-
-	data = restore_read(ra, drrw->drr_length, abuf->b_data);
-	if (data == NULL) {
-		dmu_return_arcbuf(abuf);
-		dmu_buf_rele(bonus, FTAG);
-		return (ra->err);
-	}
-
-	tx = dmu_tx_create(os);
+	tx = dmu_tx_create(ra->os);
 
 	dmu_tx_hold_write(tx, drrw->drr_object,
 	    drrw->drr_offset, drrw->drr_length);
 	err = dmu_tx_assign(tx, TXG_WAIT);
 	if (err != 0) {
-		dmu_return_arcbuf(abuf);
-		dmu_buf_rele(bonus, FTAG);
 		dmu_tx_abort(tx);
 		return (err);
 	}
 	if (ra->byteswap) {
 		dmu_object_byteswap_t byteswap =
 		    DMU_OT_BYTESWAP(drrw->drr_type);
-		dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
+		dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
+		    drrw->drr_length);
 	}
+
+	dmu_buf_t *bonus;
+	if (dmu_bonus_hold(ra->os, drrw->drr_object, FTAG, &bonus) != 0)
+		return (SET_ERROR(EINVAL));
 	dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
 	dmu_tx_commit(tx);
 	dmu_buf_rele(bonus, FTAG);
@@ -1642,8 +1635,7 @@ restore_write(struct restorearg *ra, obj
  * data from the stream to fulfill this write.
  */
 static int
-restore_write_byref(struct restorearg *ra, objset_t *os,
-    struct drr_write_byref *drrwbr)
+restore_write_byref(struct restorearg *ra, struct drr_write_byref *drrwbr)
 {
 	dmu_tx_t *tx;
 	int err;
@@ -1669,7 +1661,7 @@ restore_write_byref(struct restorearg *r
 		if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
 			return (SET_ERROR(EINVAL));
 	} else {
-		ref_os = os;
+		ref_os = ra->os;
 	}
 
 	err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
@@ -1677,7 +1669,7 @@ restore_write_byref(struct restorearg *r
 	if (err != 0)
 		return (err);
 
-	tx = dmu_tx_create(os);
+	tx = dmu_tx_create(ra->os);
 
 	dmu_tx_hold_write(tx, drrwbr->drr_object,
 	    drrwbr->drr_offset, drrwbr->drr_length);
@@ -1686,7 +1678,7 @@ restore_write_byref(struct restorearg *r
 		dmu_tx_abort(tx);
 		return (err);
 	}
-	dmu_write(os, drrwbr->drr_object,
+	dmu_write(ra->os, drrwbr->drr_object,
 	    drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
 	dmu_buf_rele(dbp, FTAG);
 	dmu_tx_commit(tx);
@@ -1694,12 +1686,11 @@ restore_write_byref(struct restorearg *r
 }
 

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-all mailing list