svn commit: r350898 - vendor-sys/illumos/dist/common/zfs vendor-sys/illumos/dist/uts/common/fs/zfs vendor-sys/illumos/dist/uts/common/fs/zfs/sys vendor-sys/illumos/dist/uts/common/sys/fs vendor/ill...
Andriy Gapon
avg at FreeBSD.org
Mon Aug 12 12:05:44 UTC 2019
Author: avg
Date: Mon Aug 12 12:05:40 2019
New Revision: 350898
URL: https://svnweb.freebsd.org/changeset/base/350898
Log:
8423 8199 7432 Implement large_dnode pool feature
8423 Implement large_dnode pool feature
8199 multi-threaded dmu_object_alloc()
7432 Large dnode pool feature
llumos/illumos-gate at 54811da5ac6b517992fdc173df5d605e4e61fdc0
https://github.com/illumos/illumos-gate/commit/54811da5ac6b517992fdc173df5d605e4e61fdc0
https://www.illumos.org/issues/8423
https://www.illumos.org/issues/8199
https://www.illumos.org/issues/7432
ZoL issues:
Improved dnode allocation #6564
Clean up large dnode code #6262
Fix dnode_hold() freeing dnode behavior #8172
Fix dnode allocation race #6414, #6439
Partial: Raw sends must be able to decrease nlevels #6821, #6864
Remove unnecessary txg syncs from receive_object() Closes #7197
Author: Toomas Soome <tsoome at me.com>
Modified:
vendor/illumos/dist/cmd/zdb/zdb.c
vendor/illumos/dist/cmd/zdb/zdb_il.c
vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c
vendor/illumos/dist/cmd/ztest/ztest.c
vendor/illumos/dist/man/man5/zpool-features.5
Changes in other areas also in this revision:
Modified:
vendor-sys/illumos/dist/common/zfs/zfeature_common.c
vendor-sys/illumos/dist/common/zfs/zfeature_common.h
vendor-sys/illumos/dist/common/zfs/zfs_prop.c
vendor-sys/illumos/dist/common/zfs/zpool_prop.c
vendor-sys/illumos/dist/uts/common/fs/zfs/dbuf.c
vendor-sys/illumos/dist/uts/common/fs/zfs/dmu.c
vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_object.c
vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_objset.c
vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_send.c
vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_traverse.c
vendor-sys/illumos/dist/uts/common/fs/zfs/dmu_tx.c
vendor-sys/illumos/dist/uts/common/fs/zfs/dnode.c
vendor-sys/illumos/dist/uts/common/fs/zfs/dnode_sync.c
vendor-sys/illumos/dist/uts/common/fs/zfs/dsl_scan.c
vendor-sys/illumos/dist/uts/common/fs/zfs/sa.c
vendor-sys/illumos/dist/uts/common/fs/zfs/spa.c
vendor-sys/illumos/dist/uts/common/fs/zfs/spa_misc.c
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/arc.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dmu.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dmu_objset.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dnode.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/dsl_dataset.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/sa_impl.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/spa.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zap.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_ioctl.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zfs_znode.h
vendor-sys/illumos/dist/uts/common/fs/zfs/sys/zil.h
vendor-sys/illumos/dist/uts/common/fs/zfs/zap.c
vendor-sys/illumos/dist/uts/common/fs/zfs/zap_micro.c
vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_acl.c
vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_ioctl.c
vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_log.c
vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_replay.c
vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_sa.c
vendor-sys/illumos/dist/uts/common/fs/zfs/zfs_znode.c
vendor-sys/illumos/dist/uts/common/fs/zfs/zil.c
vendor-sys/illumos/dist/uts/common/sys/fs/zfs.h
Modified: vendor/illumos/dist/cmd/zdb/zdb.c
==============================================================================
--- vendor/illumos/dist/cmd/zdb/zdb.c Mon Aug 12 11:42:16 2019 (r350897)
+++ vendor/illumos/dist/cmd/zdb/zdb.c Mon Aug 12 12:05:40 2019 (r350898)
@@ -2108,14 +2108,15 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES
};
static void
-dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
+dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header,
+ uint64_t *dnode_slots_used)
{
dmu_buf_t *db = NULL;
dmu_object_info_t doi;
dnode_t *dn;
void *bonus = NULL;
size_t bsize = 0;
- char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
+ char iblk[32], dblk[32], lsize[32], asize[32], fill[32], dnsize[32];
char bonus_size[32];
char aux[50];
int error;
@@ -2128,9 +2129,9 @@ dump_object(objset_t *os, uint64_t object, int verbosi
CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ);
if (*print_header) {
- (void) printf("\n%10s %3s %5s %5s %5s %5s %6s %s\n",
- "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
- "%full", "type");
+ (void) printf("\n%10s %3s %5s %5s %5s %6s %5s %6s %s\n",
+ "Object", "lvl", "iblk", "dblk", "dsize", "dnsize",
+ "lsize", "%full", "type");
*print_header = 0;
}
@@ -2147,11 +2148,15 @@ dump_object(objset_t *os, uint64_t object, int verbosi
}
dmu_object_info_from_dnode(dn, &doi);
+ if (dnode_slots_used != NULL)
+ *dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE;
+
zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize));
zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size));
+ zdb_nicenum(doi.doi_dnodesize, dnsize, sizeof (dnsize));
(void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
doi.doi_max_offset);
@@ -2168,13 +2173,14 @@ dump_object(objset_t *os, uint64_t object, int verbosi
ZDB_COMPRESS_NAME(doi.doi_compress));
}
- (void) printf("%10lld %3u %5s %5s %5s %5s %6s %s%s\n",
- (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
- asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
+ (void) printf("%10" PRIu64
+ " %3u %5s %5s %5s %5s %5s %6s %s%s\n",
+ object, doi.doi_indirection, iblk, dblk,
+ asize, dnsize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
- (void) printf("%10s %3s %5s %5s %5s %5s %6s %s\n",
- "", "", "", "", "", bonus_size, "bonus",
+ (void) printf("%10s %3s %5s %5s %5s %5s %5s %6s %s\n",
+ "", "", "", "", "", "", bonus_size, "bonus",
ZDB_OT_NAME(doi.doi_bonus_type));
}
@@ -2278,6 +2284,9 @@ dump_dir(objset_t *os)
int print_header = 1;
unsigned i;
int error;
+ uint64_t total_slots_used = 0;
+ uint64_t max_slot_used = 0;
+ uint64_t dnode_slots;
/* make sure nicenum has enough space */
CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
@@ -2322,7 +2331,7 @@ dump_dir(objset_t *os)
if (zopt_objects != 0) {
for (i = 0; i < zopt_objects; i++)
dump_object(os, zopt_object[i], verbosity,
- &print_header);
+ &print_header, NULL);
(void) printf("\n");
return;
}
@@ -2347,24 +2356,39 @@ dump_dir(objset_t *os)
if (BP_IS_HOLE(os->os_rootbp))
return;
- dump_object(os, 0, verbosity, &print_header);
+ dump_object(os, 0, verbosity, &print_header, NULL);
object_count = 0;
if (DMU_USERUSED_DNODE(os) != NULL &&
DMU_USERUSED_DNODE(os)->dn_type != 0) {
- dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
- dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
+ dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header,
+ NULL);
+ dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header,
+ NULL);
}
object = 0;
while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
- dump_object(os, object, verbosity, &print_header);
+ dump_object(os, object, verbosity, &print_header, &dnode_slots);
object_count++;
+ total_slots_used += dnode_slots;
+ max_slot_used = object + dnode_slots - 1;
}
ASSERT3U(object_count, ==, usedobjs);
(void) printf("\n");
+ (void) printf(" Dnode slots:\n");
+ (void) printf("\tTotal used: %10llu\n",
+ (u_longlong_t)total_slots_used);
+ (void) printf("\tMax used: %10llu\n",
+ (u_longlong_t)max_slot_used);
+ (void) printf("\tPercent empty: %10lf\n",
+ (double)(max_slot_used - total_slots_used)*100 /
+ (double)max_slot_used);
+
+ (void) printf("\n");
+
if (error != ESRCH) {
(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
abort();
@@ -2553,7 +2577,7 @@ dump_path_impl(objset_t *os, uint64_t obj, char *name)
return (dump_path_impl(os, child_obj, s + 1));
/*FALLTHROUGH*/
case DMU_OT_PLAIN_FILE_CONTENTS:
- dump_object(os, child_obj, dump_opt['v'], &header);
+ dump_object(os, child_obj, dump_opt['v'], &header, NULL);
return (0);
default:
(void) fprintf(stderr, "object %llu has non-file/directory "
Modified: vendor/illumos/dist/cmd/zdb/zdb_il.c
==============================================================================
--- vendor/illumos/dist/cmd/zdb/zdb_il.c Mon Aug 12 11:42:16 2019 (r350897)
+++ vendor/illumos/dist/cmd/zdb/zdb_il.c Mon Aug 12 12:05:40 2019 (r350898)
@@ -84,13 +84,15 @@ zil_prt_rec_create(zilog_t *zilog, int txtype, void *a
}
(void) printf("%s%s", tab_prefix, ctime(&crtime));
- (void) printf("%sdoid %llu, foid %llu, mode %llo\n", tab_prefix,
- (u_longlong_t)lr->lr_doid, (u_longlong_t)lr->lr_foid,
- (longlong_t)lr->lr_mode);
- (void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
- tab_prefix,
- (u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
- (u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
+ (void) printf("%sdoid %" PRIu64 ", foid %" PRIu64 ", slots %" PRIu64
+ ", mode %" PRIo64 "\n",
+ tab_prefix, lr->lr_doid,
+ LR_FOID_GET_OBJ(lr->lr_foid),
+ LR_FOID_GET_SLOTS(lr->lr_foid),
+ lr->lr_mode);
+ (void) printf("%suid %" PRIu64 ", gid %" PRIu64 ", gen %" PRIu64
+ ", rdev %#" PRIx64 "\n",
+ tab_prefix, lr->lr_uid, lr->lr_gid, lr->lr_gen, lr->lr_rdev);
}
/* ARGSUSED */
Modified: vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c
==============================================================================
--- vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c Mon Aug 12 11:42:16 2019 (r350897)
+++ vendor/illumos/dist/cmd/zstreamdump/zstreamdump.c Mon Aug 12 12:05:40 2019 (r350898)
@@ -416,13 +416,15 @@ main(int argc, char *argv[])
drro->drr_toguid = BSWAP_64(drro->drr_toguid);
}
if (verbose) {
- (void) printf("OBJECT object = %llu type = %u "
- "bonustype = %u blksz = %u bonuslen = %u\n",
- (u_longlong_t)drro->drr_object,
+ (void) printf("OBJECT object = %" PRIu64
+ " type = %u bonustype = %u blksz = %u"
+ " bonuslen = %u dn_slots = %u\n",
+ drro->drr_object,
drro->drr_type,
drro->drr_bonustype,
drro->drr_blksz,
- drro->drr_bonuslen);
+ drro->drr_bonuslen,
+ drro->drr_dn_slots);
}
if (drro->drr_bonuslen > 0) {
(void) ssread(buf,
Modified: vendor/illumos/dist/cmd/ztest/ztest.c
==============================================================================
--- vendor/illumos/dist/cmd/ztest/ztest.c Mon Aug 12 11:42:16 2019 (r350897)
+++ vendor/illumos/dist/cmd/ztest/ztest.c Mon Aug 12 12:05:40 2019 (r350898)
@@ -194,6 +194,7 @@ extern uint64_t zfs_deadman_synctime_ms;
extern int metaslab_preload_limit;
extern boolean_t zfs_compressed_arc_enabled;
extern boolean_t zfs_abd_scatter_enabled;
+extern int dmu_object_alloc_chunk_shift;
extern boolean_t zfs_force_some_double_word_sm_entries;
static ztest_shared_opts_t *ztest_shared_opts;
@@ -224,6 +225,7 @@ typedef struct ztest_block_tag {
uint64_t bt_magic;
uint64_t bt_objset;
uint64_t bt_object;
+ uint64_t bt_dnodesize;
uint64_t bt_offset;
uint64_t bt_gen;
uint64_t bt_txg;
@@ -274,6 +276,7 @@ typedef struct ztest_od {
dmu_object_type_t od_crtype;
uint64_t od_blocksize;
uint64_t od_crblocksize;
+ uint64_t od_crdnodesize;
uint64_t od_gen;
uint64_t od_crgen;
char od_name[ZFS_MAX_DATASET_NAME_LEN];
@@ -320,6 +323,7 @@ static ztest_shared_callstate_t *ztest_shared_callstat
ztest_func_t ztest_dmu_read_write;
ztest_func_t ztest_dmu_write_parallel;
ztest_func_t ztest_dmu_object_alloc_free;
+ztest_func_t ztest_dmu_object_next_chunk;
ztest_func_t ztest_dmu_commit_callbacks;
ztest_func_t ztest_zap;
ztest_func_t ztest_zap_parallel;
@@ -349,6 +353,7 @@ ztest_func_t ztest_device_removal;
ztest_func_t ztest_remap_blocks;
ztest_func_t ztest_spa_checkpoint_create_discard;
ztest_func_t ztest_initialize;
+ztest_func_t ztest_verify_dnode_bt;
uint64_t zopt_always = 0ULL * NANOSEC; /* all the time */
uint64_t zopt_incessant = 1ULL * NANOSEC / 10; /* every 1/10 second */
@@ -360,6 +365,7 @@ ztest_info_t ztest_info[] = {
{ ztest_dmu_read_write, 1, &zopt_always },
{ ztest_dmu_write_parallel, 10, &zopt_always },
{ ztest_dmu_object_alloc_free, 1, &zopt_always },
+ { ztest_dmu_object_next_chunk, 1, &zopt_sometimes },
{ ztest_dmu_commit_callbacks, 1, &zopt_always },
{ ztest_zap, 30, &zopt_always },
{ ztest_zap_parallel, 100, &zopt_always },
@@ -392,7 +398,8 @@ ztest_info_t ztest_info[] = {
{ ztest_device_removal, 1, &zopt_sometimes },
{ ztest_remap_blocks, 1, &zopt_sometimes },
{ ztest_spa_checkpoint_create_discard, 1, &zopt_rarely },
- { ztest_initialize, 1, &zopt_sometimes }
+ { ztest_initialize, 1, &zopt_sometimes },
+ { ztest_verify_dnode_bt, 1, &zopt_sometimes }
};
#define ZTEST_FUNCS (sizeof (ztest_info) / sizeof (ztest_info_t))
@@ -438,8 +445,8 @@ static spa_t *ztest_spa = NULL;
static ztest_ds_t *ztest_ds;
static kmutex_t ztest_vdev_lock;
-static kmutex_t ztest_checkpoint_lock;
static boolean_t ztest_device_removal_active = B_FALSE;
+static kmutex_t ztest_checkpoint_lock;
/*
* The ztest_name_lock protects the pool and dataset namespace used by
@@ -1009,6 +1016,36 @@ ztest_random_blocksize(void)
}
static int
+ztest_random_dnodesize(void)
+{
+ int slots;
+ int max_slots = spa_maxdnodesize(ztest_spa) >> DNODE_SHIFT;
+
+ if (max_slots == DNODE_MIN_SLOTS)
+ return (DNODE_MIN_SIZE);
+
+ /*
+ * Weight the random distribution more heavily toward smaller
+ * dnode sizes since that is more likely to reflect real-world
+ * usage.
+ */
+ ASSERT3U(max_slots, >, 4);
+ switch (ztest_random(10)) {
+ case 0:
+ slots = 5 + ztest_random(max_slots - 4);
+ break;
+ case 1 ... 4:
+ slots = 2 + ztest_random(3);
+ break;
+ default:
+ slots = 1;
+ break;
+ }
+
+ return (slots << DNODE_SHIFT);
+}
+
+static int
ztest_random_ibshift(void)
{
return (DN_MIN_INDBLKSHIFT +
@@ -1285,11 +1322,13 @@ ztest_pattern_match(void *buf, uint64_t size, uint64_t
static void
ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
- uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
+ uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
+ uint64_t crtxg)
{
bt->bt_magic = BT_MAGIC;
bt->bt_objset = dmu_objset_id(os);
bt->bt_object = object;
+ bt->bt_dnodesize = dnodesize;
bt->bt_offset = offset;
bt->bt_gen = gen;
bt->bt_txg = txg;
@@ -1298,11 +1337,13 @@ ztest_bt_generate(ztest_block_tag_t *bt, objset_t *os,
static void
ztest_bt_verify(ztest_block_tag_t *bt, objset_t *os, uint64_t object,
- uint64_t offset, uint64_t gen, uint64_t txg, uint64_t crtxg)
+ uint64_t dnodesize, uint64_t offset, uint64_t gen, uint64_t txg,
+ uint64_t crtxg)
{
ASSERT3U(bt->bt_magic, ==, BT_MAGIC);
ASSERT3U(bt->bt_objset, ==, dmu_objset_id(os));
ASSERT3U(bt->bt_object, ==, object);
+ ASSERT3U(bt->bt_dnodesize, ==, dnodesize);
ASSERT3U(bt->bt_offset, ==, offset);
ASSERT3U(bt->bt_gen, <=, gen);
ASSERT3U(bt->bt_txg, <=, txg);
@@ -1324,6 +1365,52 @@ ztest_bt_bonus(dmu_buf_t *db)
}
/*
+ * Generate a token to fill up unused bonus buffer space. Try to make
+ * it unique to the object, generation, and offset to verify that data
+ * is not getting overwritten by data from other dnodes.
+ */
+#define ZTEST_BONUS_FILL_TOKEN(obj, ds, gen, offset) \
+ (((ds) << 48) | ((gen) << 32) | ((obj) << 8) | (offset))
+
+/*
+ * Fill up the unused bonus buffer region before the block tag with a
+ * verifiable pattern. Filling the whole bonus area with non-zero data
+ * helps ensure that all dnode traversal code properly skips the
+ * interior regions of large dnodes.
+ */
+void
+ztest_fill_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
+ objset_t *os, uint64_t gen)
+{
+ uint64_t *bonusp;
+
+ ASSERT(IS_P2ALIGNED((char *)end - (char *)db->db_data, 8));
+
+ for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) {
+ uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os),
+ gen, bonusp - (uint64_t *)db->db_data);
+ *bonusp = token;
+ }
+}
+
+/*
+ * Verify that the unused area of a bonus buffer is filled with the
+ * expected tokens.
+ */
+void
+ztest_verify_unused_bonus(dmu_buf_t *db, void *end, uint64_t obj,
+ objset_t *os, uint64_t gen)
+{
+ uint64_t *bonusp;
+
+ for (bonusp = db->db_data; bonusp < (uint64_t *)end; bonusp++) {
+ uint64_t token = ZTEST_BONUS_FILL_TOKEN(obj, dmu_objset_id(os),
+ gen, bonusp - (uint64_t *)db->db_data);
+ VERIFY3U(*bonusp, ==, token);
+ }
+}
+
+/*
* ZIL logging ops
*/
@@ -1331,7 +1418,7 @@ ztest_bt_bonus(dmu_buf_t *db)
#define lrz_blocksize lr_uid
#define lrz_ibshift lr_gid
#define lrz_bonustype lr_rdev
-#define lrz_bonuslen lr_crtime[1]
+#define lrz_dnodesize lr_crtime[1]
static void
ztest_log_create(ztest_ds_t *zd, dmu_tx_t *tx, lr_create_t *lr)
@@ -1447,6 +1534,7 @@ ztest_replay_create(void *arg1, void *arg2, boolean_t
dmu_tx_t *tx;
uint64_t txg;
int error = 0;
+ int bonuslen;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@@ -1469,26 +1557,27 @@ ztest_replay_create(void *arg1, void *arg2, boolean_t
return (ENOSPC);
ASSERT(dmu_objset_zil(os)->zl_replay == !!lr->lr_foid);
+ bonuslen = DN_BONUS_SIZE(lr->lrz_dnodesize);
if (lr->lrz_type == DMU_OT_ZAP_OTHER) {
if (lr->lr_foid == 0) {
- lr->lr_foid = zap_create(os,
+ lr->lr_foid = zap_create_dnsize(os,
lr->lrz_type, lr->lrz_bonustype,
- lr->lrz_bonuslen, tx);
+ bonuslen, lr->lrz_dnodesize, tx);
} else {
- error = zap_create_claim(os, lr->lr_foid,
+ error = zap_create_claim_dnsize(os, lr->lr_foid,
lr->lrz_type, lr->lrz_bonustype,
- lr->lrz_bonuslen, tx);
+ bonuslen, lr->lrz_dnodesize, tx);
}
} else {
if (lr->lr_foid == 0) {
- lr->lr_foid = dmu_object_alloc(os,
+ lr->lr_foid = dmu_object_alloc_dnsize(os,
lr->lrz_type, 0, lr->lrz_bonustype,
- lr->lrz_bonuslen, tx);
+ bonuslen, lr->lrz_dnodesize, tx);
} else {
- error = dmu_object_claim(os, lr->lr_foid,
+ error = dmu_object_claim_dnsize(os, lr->lr_foid,
lr->lrz_type, 0, lr->lrz_bonustype,
- lr->lrz_bonuslen, tx);
+ bonuslen, lr->lrz_dnodesize, tx);
}
}
@@ -1508,7 +1597,9 @@ ztest_replay_create(void *arg1, void *arg2, boolean_t
VERIFY3U(0, ==, dmu_bonus_hold(os, lr->lr_foid, FTAG, &db));
bbt = ztest_bt_bonus(db);
dmu_buf_will_dirty(db, tx);
- ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_gen, txg, txg);
+ ztest_bt_generate(bbt, os, lr->lr_foid, lr->lrz_dnodesize, -1ULL,
+ lr->lr_gen, txg, txg);
+ ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, lr->lr_gen);
dmu_buf_rele(db, FTAG);
VERIFY3U(0, ==, zap_add(os, lr->lr_doid, name, sizeof (uint64_t), 1,
@@ -1658,7 +1749,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t b
VERIFY(dmu_read(os, lr->lr_foid, offset,
sizeof (rbt), &rbt, prefetch) == 0);
if (rbt.bt_magic == BT_MAGIC) {
- ztest_bt_verify(&rbt, os, lr->lr_foid,
+ ztest_bt_verify(&rbt, os, lr->lr_foid, 0,
offset, gen, txg, crtxg);
}
}
@@ -1670,7 +1761,7 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t b
* as it was when the write was generated.
*/
if (zd->zd_zilog->zl_replay) {
- ztest_bt_verify(bt, os, lr->lr_foid, offset,
+ ztest_bt_verify(bt, os, lr->lr_foid, 0, offset,
MAX(gen, bt->bt_gen), MAX(txg, lrtxg),
bt->bt_crtxg);
}
@@ -1679,7 +1770,8 @@ ztest_replay_write(void *arg1, void *arg2, boolean_t b
* Set the bt's gen/txg to the bonus buffer's gen/txg
* so that all of the usual ASSERTs will work.
*/
- ztest_bt_generate(bt, os, lr->lr_foid, offset, gen, txg, crtxg);
+ ztest_bt_generate(bt, os, lr->lr_foid, 0, offset, gen, txg,
+ crtxg);
}
if (abuf == NULL) {
@@ -1751,7 +1843,7 @@ ztest_replay_setattr(void *arg1, void *arg2, boolean_t
dmu_tx_t *tx;
dmu_buf_t *db;
ztest_block_tag_t *bbt;
- uint64_t txg, lrtxg, crtxg;
+ uint64_t txg, lrtxg, crtxg, dnodesize;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
@@ -1774,6 +1866,7 @@ ztest_replay_setattr(void *arg1, void *arg2, boolean_t
ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
crtxg = bbt->bt_crtxg;
lrtxg = lr->lr_common.lrc_txg;
+ dnodesize = bbt->bt_dnodesize;
if (zd->zd_zilog->zl_replay) {
ASSERT(lr->lr_size != 0);
@@ -1792,7 +1885,7 @@ ztest_replay_setattr(void *arg1, void *arg2, boolean_t
/*
* Verify that the current bonus buffer is not newer than our txg.
*/
- ztest_bt_verify(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode,
+ ztest_bt_verify(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode,
MAX(txg, lrtxg), crtxg);
dmu_buf_will_dirty(db, tx);
@@ -1802,7 +1895,9 @@ ztest_replay_setattr(void *arg1, void *arg2, boolean_t
VERIFY0(dmu_set_bonus(db, lr->lr_size, tx));
bbt = ztest_bt_bonus(db);
- ztest_bt_generate(bbt, os, lr->lr_foid, -1ULL, lr->lr_mode, txg, crtxg);
+ ztest_bt_generate(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode,
+ txg, crtxg);
+ ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, bbt->bt_gen);
dmu_buf_rele(db, FTAG);
@@ -2033,7 +2128,7 @@ ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count
lr->lrz_blocksize = od->od_crblocksize;
lr->lrz_ibshift = ztest_random_ibshift();
lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
- lr->lrz_bonuslen = dmu_bonus_max();
+ lr->lrz_dnodesize = od->od_crdnodesize;
lr->lr_gen = od->od_crgen;
lr->lr_crtime[0] = time(NULL);
@@ -2212,7 +2307,8 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t off
switch (io_type) {
case ZTEST_IO_WRITE_TAG:
- ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0);
+ ztest_bt_generate(&wbt, zd->zd_os, object, doi.doi_dnodesize,
+ offset, 0, 0, 0);
(void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
break;
@@ -2273,13 +2369,15 @@ ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t off
*/
static void
ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index,
- dmu_object_type_t type, uint64_t blocksize, uint64_t gen)
+ dmu_object_type_t type, uint64_t blocksize, uint64_t dnodesize,
+ uint64_t gen)
{
od->od_dir = ZTEST_DIROBJ;
od->od_object = 0;
od->od_crtype = type;
od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
+ od->od_crdnodesize = dnodesize ? dnodesize : ztest_random_dnodesize();
od->od_crgen = gen;
od->od_type = DMU_OT_NONE;
@@ -3721,8 +3819,10 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t i
ztest_od_t od[4];
int batchsize = sizeof (od) / sizeof (od[0]);
- for (int b = 0; b < batchsize; b++)
- ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0);
+ for (int b = 0; b < batchsize; b++) {
+ ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER,
+ 0, 0, 0);
+ }
/*
* Destroy the previous batch of objects, create a new batch,
@@ -3737,6 +3837,26 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t i
}
/*
+ * Rewind the global allocator to verify object allocation backfilling.
+ */
+void
+ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id)
+{
+ objset_t *os = zd->zd_os;
+ int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
+ uint64_t object;
+
+ /*
+ * Rewind the global allocator randomly back to a lower object number
+ * to force backfilling and reclamation of recently freed dnodes.
+ */
+ mutex_enter(&os->os_obj_lock);
+ object = ztest_random(os->os_obj_next_chunk);
+ os->os_obj_next_chunk = P2ALIGN(object, dnodes_per_chunk);
+ mutex_exit(&os->os_obj_lock);
+}
+
+/*
* Verify that dmu_{read,write} work as expected.
*/
void
@@ -3782,8 +3902,10 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
/*
* Read the directory info. If it's the first time, set things up.
*/
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, chunksize);
- ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0,
+ chunksize);
+ ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
+ chunksize);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
@@ -4052,8 +4174,10 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id
/*
* Read the directory info. If it's the first time, set things up.
*/
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
- ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, chunksize);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize,
+ 0, 0);
+ ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
+ chunksize);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
@@ -4254,7 +4378,8 @@ ztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id)
* to verify that parallel writes to an object -- even to the
* same blocks within the object -- doesn't cause any trouble.
*/
- ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
+ ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER,
+ 0, 0, 0);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
@@ -4273,7 +4398,8 @@ ztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id)
uint64_t blocksize = ztest_random_blocksize();
void *data;
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize,
+ 0, 0);
if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
return;
@@ -4319,7 +4445,7 @@ ztest_zap(ztest_ds_t *zd, uint64_t id)
int error;
char *hc[2] = { "s.acl.h", ".s.open.h.hyLZlg" };
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0, 0);
if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
return;
@@ -4451,7 +4577,7 @@ ztest_fzap(ztest_ds_t *zd, uint64_t id)
ztest_od_t od[1];
uint64_t object, txg;
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_ZAP_OTHER, 0, 0, 0);
if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
return;
@@ -4497,7 +4623,8 @@ ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
char name[20], string_value[20];
void *data;
- ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0);
+ ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER,
+ 0, 0, 0);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
@@ -4685,7 +4812,7 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id
uint64_t old_txg, txg;
int i, error;
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
@@ -4799,6 +4926,41 @@ ztest_dmu_commit_callbacks(ztest_ds_t *zd, uint64_t id
dmu_tx_commit(tx);
}
+/*
+ * Visit each object in the dataset. Verify that its properties
+ * are consistent what was stored in the block tag when it was created,
+ * and that its unused bonus buffer space has not been overwritten.
+ */
+void
+ztest_verify_dnode_bt(ztest_ds_t *zd, uint64_t id)
+{
+ objset_t *os = zd->zd_os;
+ uint64_t obj;
+ int err = 0;
+
+ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) {
+ ztest_block_tag_t *bt = NULL;
+ dmu_object_info_t doi;
+ dmu_buf_t *db;
+
+ if (dmu_bonus_hold(os, obj, FTAG, &db) != 0)
+ continue;
+
+ dmu_object_info_from_db(db, &doi);
+ if (doi.doi_bonus_size >= sizeof (*bt))
+ bt = ztest_bt_bonus(db);
+
+ if (bt && bt->bt_magic == BT_MAGIC) {
+ ztest_bt_verify(bt, os, obj, doi.doi_dnodesize,
+ bt->bt_offset, bt->bt_gen, bt->bt_txg,
+ bt->bt_crtxg);
+ ztest_verify_unused_bonus(db, bt, obj, os, bt->bt_gen);
+ }
+
+ dmu_buf_rele(db, FTAG);
+ }
+}
+
/* ARGSUSED */
void
ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
@@ -5283,7 +5445,8 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
blocksize = ztest_random_blocksize();
blocksize = MIN(blocksize, 2048); /* because we write so many */
- ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
+ ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize,
+ 0, 0);
if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
return;
@@ -6155,7 +6318,7 @@ ztest_freeze(void)
numloops++ < ztest_opts.zo_maxloops &&
metaslab_class_get_alloc(spa_normal_class(spa)) < capacity) {
ztest_od_t od;
- ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0);
+ ztest_od_init(&od, 0, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
VERIFY0(ztest_object_init(zd, &od, sizeof (od), B_FALSE));
ztest_io(zd, od.od_object,
ztest_random(ZTEST_RANGE_LOCKS) << SPA_MAXBLOCKSHIFT);
Modified: vendor/illumos/dist/man/man5/zpool-features.5
==============================================================================
--- vendor/illumos/dist/man/man5/zpool-features.5 Mon Aug 12 11:42:16 2019 (r350897)
+++ vendor/illumos/dist/man/man5/zpool-features.5 Mon Aug 12 12:05:40 2019 (r350898)
@@ -533,6 +533,30 @@ set larger than 128KB, and will return to being \fBena
filesystems that have ever had their recordsize larger than 128KB are destroyed.
.RE
+.ne 2
+.na
+\fB\fBlarge_dnode\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID org.zfsonlinux:large_dnode
+READ\-ONLY COMPATIBLE no
+DEPENDENCIES extensible_dataset
+.TE
+
+The \fBlarge_dnode\fR feature allows the size of dnodes in a dataset to be
+set larger than 512B.
+
+This feature becomes \fBactive\fR once a dataset contains an object with a
+dnode larger than 512B, which occurs as a result of setting the \fBdnodesize\fR
+dataset property to a value other than \fBlegacy\fR. The feature will return to
+being \fBenabled\fR once all filesystems that have ever contained a dnode larger
+than 512B are destroyed. Large dnodes allow more data to be stored in the
+bonus buffer, thus potentially improving performance by avoiding the use of
+spill blocks.
+.RE
+
.sp
.ne 2
.na
More information about the svn-src-vendor
mailing list