svn commit: r286586 - vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor/illumos/dist/cmd/zstreamdump vendor/illumos/dist/lib/libzfs/common vendor/illumo...
Alexander Motin
mav at FreeBSD.org
Mon Aug 10 19:32:32 UTC 2015
Author: mav
Date: Mon Aug 10 19:32:30 2015
New Revision: 286586
URL: https://svnweb.freebsd.org/changeset/base/286586
Log:
5746 more checksumming in zfs send
Reviewed by: Christopher Siden <christopher.siden at delphix.com>
Reviewed by: George Wilson <george.wilson at delphix.com>
Reviewed by: Bayard Bell <buffer.g.overflow at gmail.com>
Approved by: Albert Lee <trisk at omniti.com>
Author: Matthew Ahrens <mahrens at delphix.com>
illumos/illumos-gate at 98110f08fa182032082d98be2ddb9391fcd62bf1
Modified:
vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c
vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c
vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c
Changes in other areas also in this revision:
Modified:
vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_ioctl.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zio_checksum.h
Modified: vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c
==============================================================================
--- vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c Mon Aug 10 18:27:54 2015 (r286585)
+++ vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c Mon Aug 10 19:32:30 2015 (r286586)
@@ -25,7 +25,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
*/
#include <ctype.h>
@@ -34,6 +34,7 @@
#include <stdlib.h>
#include <strings.h>
#include <unistd.h>
+#include <stddef.h>
#include <sys/dmu.h>
#include <sys/zfs_ioctl.h>
@@ -83,7 +84,6 @@ safe_malloc(size_t size)
*
* Read while computing incremental checksum
*/
-
static size_t
ssread(void *buf, size_t len, zio_cksum_t *cksum)
{
@@ -92,7 +92,7 @@ ssread(void *buf, size_t len, zio_cksum_
if ((outlen = fread(buf, len, 1, send_stream)) == 0)
return (0);
- if (do_cksum && cksum) {
+ if (do_cksum) {
if (do_byteswap)
fletcher_4_incremental_byteswap(buf, len, cksum);
else
@@ -102,6 +102,34 @@ ssread(void *buf, size_t len, zio_cksum_
return (outlen);
}
+static size_t
+read_hdr(dmu_replay_record_t *drr, zio_cksum_t *cksum)
+{
+ ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+ size_t r = ssread(drr, sizeof (*drr) - sizeof (zio_cksum_t), cksum);
+ if (r == 0)
+ return (0);
+ zio_cksum_t saved_cksum = *cksum;
+ r = ssread(&drr->drr_u.drr_checksum.drr_checksum,
+ sizeof (zio_cksum_t), cksum);
+ if (r == 0)
+ return (0);
+ if (!ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.drr_checksum.drr_checksum) &&
+ !ZIO_CHECKSUM_EQUAL(saved_cksum,
+ drr->drr_u.drr_checksum.drr_checksum)) {
+ fprintf(stderr, "invalid checksum\n");
+ (void) printf("Incorrect checksum in record header.\n");
+ (void) printf("Expected checksum = %llx/%llx/%llx/%llx\n",
+ saved_cksum.zc_word[0],
+ saved_cksum.zc_word[1],
+ saved_cksum.zc_word[2],
+ saved_cksum.zc_word[3]);
+ exit(1);
+ }
+ return (sizeof (*drr));
+}
+
/*
* Print part of a block in ASCII characters
*/
@@ -183,8 +211,10 @@ main(int argc, char *argv[])
struct drr_free *drrf = &thedrr.drr_u.drr_free;
struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
+ struct drr_checksum *drrc = &thedrr.drr_u.drr_checksum;
char c;
boolean_t verbose = B_FALSE;
+ boolean_t very_verbose = B_FALSE;
boolean_t first = B_TRUE;
/*
* dump flag controls whether the contents of any modified data blocks
@@ -202,11 +232,14 @@ main(int argc, char *argv[])
do_cksum = B_FALSE;
break;
case 'v':
+ if (verbose)
+ very_verbose = B_TRUE;
verbose = B_TRUE;
break;
case 'd':
dump = B_TRUE;
verbose = B_TRUE;
+ very_verbose = B_TRUE;
break;
case ':':
(void) fprintf(stderr,
@@ -230,7 +263,7 @@ main(int argc, char *argv[])
send_stream = stdin;
pcksum = zc;
- while (ssread(drr, sizeof (dmu_replay_record_t), &zc)) {
+ while (read_hdr(drr, &zc)) {
/*
* If this is the first DMU record being processed, check for
@@ -432,7 +465,7 @@ main(int argc, char *argv[])
if (verbose) {
(void) printf("WRITE object = %llu type = %u "
"checksum type = %u\n"
- "offset = %llu length = %llu "
+ " offset = %llu length = %llu "
"props = %llx\n",
(u_longlong_t)drrw->drr_object,
drrw->drr_type,
@@ -476,9 +509,9 @@ main(int argc, char *argv[])
if (verbose) {
(void) printf("WRITE_BYREF object = %llu "
"checksum type = %u props = %llx\n"
- "offset = %llu length = %llu\n"
+ " offset = %llu length = %llu\n"
"toguid = %llx refguid = %llx\n"
- "refobject = %llu refoffset = %llu\n",
+ " refobject = %llu refoffset = %llu\n",
(u_longlong_t)drrwbr->drr_object,
drrwbr->drr_checksumtype,
(u_longlong_t)drrwbr->drr_key.ddk_prop,
@@ -538,7 +571,7 @@ main(int argc, char *argv[])
if (verbose) {
(void) printf("WRITE_EMBEDDED object = %llu "
"offset = %llu length = %llu\n"
- "toguid = %llx comp = %u etype = %u "
+ " toguid = %llx comp = %u etype = %u "
"lsize = %u psize = %u\n",
(u_longlong_t)drrwe->drr_object,
(u_longlong_t)drrwe->drr_offset,
@@ -553,6 +586,13 @@ main(int argc, char *argv[])
P2ROUNDUP(drrwe->drr_psize, 8), &zc);
break;
}
+ if (drr->drr_type != DRR_BEGIN && very_verbose) {
+ (void) printf(" checksum = %llx/%llx/%llx/%llx\n",
+ (longlong_t)drrc->drr_checksum.zc_word[0],
+ (longlong_t)drrc->drr_checksum.zc_word[1],
+ (longlong_t)drrc->drr_checksum.zc_word[2],
+ (longlong_t)drrc->drr_checksum.zc_word[3]);
+ }
pcksum = zc;
}
free(buf);
Modified: vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c
==============================================================================
--- vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c Mon Aug 10 18:27:54 2015 (r286585)
+++ vendor/illumos/dist/lib/libzfs/common/libzfs_sendrecv.c Mon Aug 10 19:32:30 2015 (r286586)
@@ -179,10 +179,28 @@ ddt_update(libzfs_handle_t *hdl, dedup_t
}
static int
-cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
+dump_record(dmu_replay_record_t *drr, void *payload, int payload_len,
+ zio_cksum_t *zc, int outfd)
{
- fletcher_4_incremental_native(buf, len, zc);
- return (write(outfd, buf, len));
+ ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+ fletcher_4_incremental_native(drr,
+ offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum), zc);
+ if (drr->drr_type != DRR_BEGIN) {
+ ASSERT(ZIO_CHECKSUM_IS_ZERO(&drr->drr_u.
+ drr_checksum.drr_checksum));
+ drr->drr_u.drr_checksum.drr_checksum = *zc;
+ }
+ fletcher_4_incremental_native(&drr->drr_u.drr_checksum.drr_checksum,
+ sizeof (zio_cksum_t), zc);
+ if (write(outfd, drr, sizeof (*drr)) == -1)
+ return (errno);
+ if (payload_len != 0) {
+ fletcher_4_incremental_native(payload, payload_len, zc);
+ if (write(outfd, payload, payload_len) == -1)
+ return (errno);
+ }
+ return (0);
}
/*
@@ -209,26 +227,18 @@ cksummer(void *arg)
char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
dmu_replay_record_t thedrr;
dmu_replay_record_t *drr = &thedrr;
- struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
- struct drr_end *drre = &thedrr.drr_u.drr_end;
- struct drr_object *drro = &thedrr.drr_u.drr_object;
- struct drr_write *drrw = &thedrr.drr_u.drr_write;
- struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
- struct drr_write_embedded *drrwe = &thedrr.drr_u.drr_write_embedded;
FILE *ofp;
int outfd;
- dmu_replay_record_t wbr_drr = {0};
- struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
dedup_table_t ddt;
zio_cksum_t stream_cksum;
uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
uint64_t numbuckets;
ddt.max_ddt_size =
- MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
- SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
+ MAX((physmem * MAX_DDT_PHYSMEM_PERCENT) / 100,
+ SMALLEST_POSSIBLE_MAX_DDT_MB << 20);
- numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
+ numbuckets = ddt.max_ddt_size / (sizeof (dedup_entry_t));
/*
* numbuckets must be a power of 2. Increase number to
@@ -244,32 +254,29 @@ cksummer(void *arg)
ddt.numhashbits = high_order_bit(numbuckets) - 1;
ddt.ddt_full = B_FALSE;
- /* Initialize the write-by-reference block. */
- wbr_drr.drr_type = DRR_WRITE_BYREF;
- wbr_drr.drr_payloadlen = 0;
-
outfd = dda->outputfd;
ofp = fdopen(dda->inputfd, "r");
- while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
+ while (ssread(drr, sizeof (*drr), ofp) != 0) {
switch (drr->drr_type) {
case DRR_BEGIN:
{
- int fflags;
+ struct drr_begin *drrb = &drr->drr_u.drr_begin;
+ int fflags;
+ int sz = 0;
ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
+ ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+
/* set the DEDUP feature flag for this stream */
fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
fflags |= (DMU_BACKUP_FEATURE_DEDUP |
DMU_BACKUP_FEATURE_DEDUPPROPS);
DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
- goto out;
if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
- int sz = drr->drr_payloadlen;
+ sz = drr->drr_payloadlen;
if (sz > SPA_MAXBLOCKSIZE) {
buf = zfs_realloc(dda->dedup_hdl, buf,
@@ -278,64 +285,60 @@ cksummer(void *arg)
(void) ssread(buf, sz, ofp);
if (ferror(stdin))
perror("fread");
- if (cksum_and_write(buf, sz, &stream_cksum,
- outfd) == -1)
- goto out;
}
+ if (dump_record(drr, buf, sz, &stream_cksum,
+ outfd) != 0)
+ goto out;
break;
}
case DRR_END:
{
+ struct drr_end *drre = &drr->drr_u.drr_end;
/* use the recalculated checksum */
- ZIO_SET_CHECKSUM(&drre->drr_checksum,
- stream_cksum.zc_word[0], stream_cksum.zc_word[1],
- stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
- if ((write(outfd, drr,
- sizeof (dmu_replay_record_t))) == -1)
+ drre->drr_checksum = stream_cksum;
+ if (dump_record(drr, NULL, 0, &stream_cksum,
+ outfd) != 0)
goto out;
break;
}
case DRR_OBJECT:
{
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
- goto out;
+ struct drr_object *drro = &drr->drr_u.drr_object;
if (drro->drr_bonuslen > 0) {
(void) ssread(buf,
P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
ofp);
- if (cksum_and_write(buf,
- P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
- &stream_cksum, outfd) == -1)
- goto out;
}
+ if (dump_record(drr, buf,
+ P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
+ &stream_cksum, outfd) != 0)
+ goto out;
break;
}
case DRR_SPILL:
{
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
- goto out;
+ struct drr_spill *drrs = &drr->drr_u.drr_spill;
(void) ssread(buf, drrs->drr_length, ofp);
- if (cksum_and_write(buf, drrs->drr_length,
- &stream_cksum, outfd) == -1)
+ if (dump_record(drr, buf, drrs->drr_length,
+ &stream_cksum, outfd) != 0)
goto out;
break;
}
case DRR_FREEOBJECTS:
{
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
+ if (dump_record(drr, NULL, 0, &stream_cksum,
+ outfd) != 0)
goto out;
break;
}
case DRR_WRITE:
{
+ struct drr_write *drrw = &drr->drr_u.drr_write;
dataref_t dataref;
(void) ssread(buf, drrw->drr_length, ofp);
@@ -373,7 +376,13 @@ cksummer(void *arg)
if (ddt_update(dda->dedup_hdl, &ddt,
&drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
&dataref)) {
+ dmu_replay_record_t wbr_drr = {0};
+ struct drr_write_byref *wbr_drrr =
+ &wbr_drr.drr_u.drr_write_byref;
+
/* block already present in stream */
+ wbr_drr.drr_type = DRR_WRITE_BYREF;
+
wbr_drrr->drr_object = drrw->drr_object;
wbr_drrr->drr_offset = drrw->drr_offset;
wbr_drrr->drr_length = drrw->drr_length;
@@ -393,19 +402,13 @@ cksummer(void *arg)
wbr_drrr->drr_key.ddk_prop =
drrw->drr_key.ddk_prop;
- if (cksum_and_write(&wbr_drr,
- sizeof (dmu_replay_record_t), &stream_cksum,
- outfd) == -1)
+ if (dump_record(&wbr_drr, NULL, 0,
+ &stream_cksum, outfd) != 0)
goto out;
} else {
/* block not previously seen */
- if (cksum_and_write(drr,
- sizeof (dmu_replay_record_t), &stream_cksum,
- outfd) == -1)
- goto out;
- if (cksum_and_write(buf,
- drrw->drr_length,
- &stream_cksum, outfd) == -1)
+ if (dump_record(drr, buf, drrw->drr_length,
+ &stream_cksum, outfd) != 0)
goto out;
}
break;
@@ -413,28 +416,27 @@ cksummer(void *arg)
case DRR_WRITE_EMBEDDED:
{
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
- goto out;
+ struct drr_write_embedded *drrwe =
+ &drr->drr_u.drr_write_embedded;
(void) ssread(buf,
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8), ofp);
- if (cksum_and_write(buf,
+ if (dump_record(drr, buf,
P2ROUNDUP((uint64_t)drrwe->drr_psize, 8),
- &stream_cksum, outfd) == -1)
+ &stream_cksum, outfd) != 0)
goto out;
break;
}
case DRR_FREE:
{
- if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
- &stream_cksum, outfd) == -1)
+ if (dump_record(drr, NULL, 0, &stream_cksum,
+ outfd) != 0)
goto out;
break;
}
default:
- (void) printf("INVALID record type 0x%x\n",
+ (void) fprintf(stderr, "INVALID record type 0x%x\n",
drr->drr_type);
/* should never happen, so assert */
assert(B_FALSE);
@@ -1460,18 +1462,11 @@ zfs_send(zfs_handle_t *zhp, const char *
sizeof (drr.drr_u.drr_begin.drr_toname),
"%s@%s", zhp->zfs_name, tosnap);
drr.drr_payloadlen = buflen;
- err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
- /* write header nvlist */
- if (err != -1 && packbuf != NULL) {
- err = cksum_and_write(packbuf, buflen, &zc,
- outfd);
- }
+ err = dump_record(&drr, packbuf, buflen, &zc, outfd);
free(packbuf);
- if (err == -1) {
- err = errno;
+ if (err != 0)
goto stderr_out;
- }
/* write end record */
bzero(&drr, sizeof (drr));
@@ -1702,6 +1697,8 @@ recv_read(libzfs_handle_t *hdl, int fd,
int rv;
int len = ilen;
+ assert(ilen <= SPA_MAXBLOCKSIZE);
+
do {
rv = read(fd, cp, len);
cp += rv;
Modified: vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c
==============================================================================
--- vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c Mon Aug 10 18:27:54 2015 (r286585)
+++ vendor/illumos/dist/lib/libzfs_core/common/libzfs_core.c Mon Aug 10 19:32:30 2015 (r286586)
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
@@ -485,18 +485,30 @@ lzc_send(const char *snapname, const cha
}
/*
- * If fromsnap is NULL, a full (non-incremental) stream will be estimated.
+ * "from" can be NULL, a snapshot, or a bookmark.
+ *
+ * If from is NULL, a full (non-incremental) stream will be estimated. This
+ * is calculated very efficiently.
+ *
+ * If from is a snapshot, lzc_send_space uses the deadlists attached to
+ * each snapshot to efficiently estimate the stream size.
+ *
+ * If from is a bookmark, the indirect blocks in the destination snapshot
+ * are traversed, looking for blocks with a birth time since the creation TXG of
+ * the snapshot this bookmark was created from. This will result in
+ * significantly more I/O and be less efficient than a send space estimation on
+ * an equivalent snapshot.
*/
int
-lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
+lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
{
nvlist_t *args;
nvlist_t *result;
int err;
args = fnvlist_alloc();
- if (fromsnap != NULL)
- fnvlist_add_string(args, "fromsnap", fromsnap);
+ if (from != NULL)
+ fnvlist_add_string(args, "from", from);
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
nvlist_free(args);
if (err == 0)
More information about the svn-src-all
mailing list