svn commit: r354211 - in stable/12: cddl/contrib/opensolaris/cmd/zdb cddl/contrib/opensolaris/cmd/zstreamdump cddl/contrib/opensolaris/cmd/ztest sys/cddl/contrib/opensolaris/common/zfs sys/cddl/con...

Thu Oct 31 09:14:54 UTC 2019

Author: avg
Date: Thu Oct 31 09:14:50 2019
New Revision: 354211
URL: https://svnweb.freebsd.org/changeset/base/354211

Log:
  MFC r353176,r353304,r353556,r353559: large_dnode improvements and fixes
  
  r353176: MFV r350898, r351075: 8423 8199 7432 Implement large_dnode pool feature
    This updates FreeBSD large_dnode code (that was imported from ZoL) to a
    version that was committed to illumos.  It has some cleanups,
    improvements and fixes comparing to what we have in FreeBSD now.
    I think that the most significant update is 8199 multi-threaded
    dmu_object_alloc().
  
  r353304: zfs: use atomic_load_64 to read atomic variable in dmu_object_alloc_impl
  
  r353556: MFV r353551: 10452 ZoL: merge in large dnode feature fixes
  
  r353559: MFV r353558: 10572 10579 Fix race in dnode_check_slots_free()

Modified:
  stable/12/cddl/contrib/opensolaris/cmd/zdb/zdb.c
  stable/12/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
  stable/12/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
  stable/12/cddl/contrib/opensolaris/cmd/ztest/ztest.c
  stable/12/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode_sync.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sa.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_misc.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_impl.h
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu_objset.h
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dnode.h
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/sa_impl.h
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zap.h
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_context.h
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ioctl.h
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zil.h
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zap_micro.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_replay.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c
  stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/cddl/contrib/opensolaris/cmd/zdb/zdb.c
==============================================================================

--- stable/12/cddl/contrib/opensolaris/cmd/zdb/zdb.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/cddl/contrib/opensolaris/cmd/zdb/zdb.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -2131,7 +2131,8 @@ static object_viewer_t *object_viewer[DMU_OT_NUMTYPES 
 };
 
 static void
-dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
+dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header,
+    uint64_t *dnode_slots_used)
 {
 	dmu_buf_t *db = NULL;
 	dmu_object_info_t doi;
@@ -2151,7 +2152,7 @@ dump_object(objset_t *os, uint64_t object, int verbosi
 	CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ);
 
 	if (*print_header) {
-		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %6s %5s  %6s  %s\n",
+		(void) printf("\n%10s  %3s  %5s  %5s  %5s  %6s  %5s  %6s  %s\n",
 		    "Object", "lvl", "iblk", "dblk", "dsize", "dnsize",
 		    "lsize", "%full", "type");
 		*print_header = 0;
@@ -2170,6 +2171,9 @@ dump_object(objset_t *os, uint64_t object, int verbosi
 	}
 	dmu_object_info_from_dnode(dn, &doi);
 
+	if (dnode_slots_used != NULL)
+		*dnode_slots_used = doi.doi_dnodesize / DNODE_MIN_SIZE;
+
 	zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
 	zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
 	zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
@@ -2192,8 +2196,9 @@ dump_object(objset_t *os, uint64_t object, int verbosi
 		    ZDB_COMPRESS_NAME(doi.doi_compress));
 	}
 
-	(void) printf("%10lld  %3u  %5s  %5s  %5s  %6s  %5s  %6s  %s%s\n",
-	    (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
+	(void) printf("%10" PRIu64
+	    "  %3u  %5s  %5s  %5s  %5s  %5s  %6s  %s%s\n",
+	    object, doi.doi_indirection, iblk, dblk,
 	    asize, dnsize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
 
 	if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
@@ -2302,6 +2307,9 @@ dump_dir(objset_t *os)
 	int print_header = 1;
 	unsigned i;
 	int error;
+	uint64_t total_slots_used = 0;
+	uint64_t max_slot_used = 0;
+	uint64_t dnode_slots;
 
 	/* make sure nicenum has enough space */
 	CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
@@ -2346,7 +2354,7 @@ dump_dir(objset_t *os)
 	if (zopt_objects != 0) {
 		for (i = 0; i < zopt_objects; i++)
 			dump_object(os, zopt_object[i], verbosity,
-			    &print_header);
+			    &print_header, NULL);
 		(void) printf("\n");
 		return;
 	}
@@ -2371,22 +2379,37 @@ dump_dir(objset_t *os)
 	if (BP_IS_HOLE(os->os_rootbp))
 		return;
 
-	dump_object(os, 0, verbosity, &print_header);
+	dump_object(os, 0, verbosity, &print_header, NULL);
 	object_count = 0;
 	if (DMU_USERUSED_DNODE(os) != NULL &&
 	    DMU_USERUSED_DNODE(os)->dn_type != 0) {
-		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
-		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
+		dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header,
+		    NULL);
+		dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header,
+		    NULL);
 	}
 
 	object = 0;
 	while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
-		dump_object(os, object, verbosity, &print_header);
+		dump_object(os, object, verbosity, &print_header, &dnode_slots);
 		object_count++;
+		total_slots_used += dnode_slots;
+		max_slot_used = object + dnode_slots - 1;
 	}
 
 	(void) printf("\n");
 
+	(void) printf("    Dnode slots:\n");
+	(void) printf("\tTotal used:    %10llu\n",
+	    (u_longlong_t)total_slots_used);
+	(void) printf("\tMax used:      %10llu\n",
+	    (u_longlong_t)max_slot_used);
+	(void) printf("\tPercent empty: %10lf\n",
+	    (double)(max_slot_used - total_slots_used)*100 /
+	    (double)max_slot_used);
+
+	(void) printf("\n");
+
 	if (error != ESRCH) {
 		(void) fprintf(stderr, "dmu_object_next() = %d\n", error);
 		abort();
@@ -2578,7 +2601,7 @@ dump_path_impl(objset_t *os, uint64_t obj, char *name)
 			return (dump_path_impl(os, child_obj, s + 1));
 		/*FALLTHROUGH*/
 	case DMU_OT_PLAIN_FILE_CONTENTS:
-		dump_object(os, child_obj, dump_opt['v'], &header);
+		dump_object(os, child_obj, dump_opt['v'], &header, NULL);
 		return (0);
 	default:
 		(void) fprintf(stderr, "object %llu has non-file/directory "

Modified: stable/12/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c
==============================================================================
--- stable/12/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/cddl/contrib/opensolaris/cmd/zdb/zdb_il.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -84,15 +84,15 @@ zil_prt_rec_create(zilog_t *zilog, int txtype, void *a
 	}
 
 	(void) printf("%s%s", tab_prefix, ctime(&crtime));
-	(void) printf("%sdoid %llu, foid %llu, slots %llu, mode %llo\n", tab_prefix,
-	    (u_longlong_t)lr->lr_doid, 
-	    (u_longlong_t)LR_FOID_GET_OBJ(lr->lr_foid),
-	    (u_longlong_t)LR_FOID_GET_SLOTS(lr->lr_foid),
-	    (longlong_t)lr->lr_mode);
-	(void) printf("%suid %llu, gid %llu, gen %llu, rdev 0x%llx\n",
-	    tab_prefix,
-	    (u_longlong_t)lr->lr_uid, (u_longlong_t)lr->lr_gid,
-	    (u_longlong_t)lr->lr_gen, (u_longlong_t)lr->lr_rdev);
+	(void) printf("%sdoid %" PRIu64 ", foid %" PRIu64 ", slots %" PRIu64
+	    ", mode %" PRIo64 "\n",
+	    tab_prefix, lr->lr_doid,
+	    (uint64_t)LR_FOID_GET_OBJ(lr->lr_foid),
+	    (uint64_t)LR_FOID_GET_SLOTS(lr->lr_foid),
+	    lr->lr_mode);
+	(void) printf("%suid %" PRIu64 ", gid %" PRIu64 ", gen %" PRIu64
+	    ", rdev %#" PRIx64 "\n",
+	    tab_prefix, lr->lr_uid, lr->lr_gid, lr->lr_gen, lr->lr_rdev);
 }
 
 /* ARGSUSED */

Modified: stable/12/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c
==============================================================================
--- stable/12/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/cddl/contrib/opensolaris/cmd/zstreamdump/zstreamdump.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -416,13 +416,15 @@ main(int argc, char *argv[])
 				drro->drr_toguid = BSWAP_64(drro->drr_toguid);
 			}
 			if (verbose) {
-				(void) printf("OBJECT object = %llu type = %u "
-				    "bonustype = %u blksz = %u bonuslen = %u\n",
-				    (u_longlong_t)drro->drr_object,
+				(void) printf("OBJECT object = %" PRIu64
+				    " type = %u bonustype = %u blksz = %u"
+				    " bonuslen = %u dn_slots = %u\n",
+				    drro->drr_object,
 				    drro->drr_type,
 				    drro->drr_bonustype,
 				    drro->drr_blksz,
-				    drro->drr_bonuslen);
+				    drro->drr_bonuslen,
+				    drro->drr_dn_slots);
 			}
 			if (drro->drr_bonuslen > 0) {
 				(void) ssread(buf,

Modified: stable/12/cddl/contrib/opensolaris/cmd/ztest/ztest.c
==============================================================================
--- stable/12/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/cddl/contrib/opensolaris/cmd/ztest/ztest.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -196,6 +196,7 @@ extern uint64_t zfs_deadman_synctime_ms;
 extern int metaslab_preload_limit;
 extern boolean_t zfs_compressed_arc_enabled;
 extern boolean_t zfs_abd_scatter_enabled;
+extern int dmu_object_alloc_chunk_shift;
 extern boolean_t zfs_force_some_double_word_sm_entries;
 
 static ztest_shared_opts_t *ztest_shared_opts;
@@ -322,6 +323,7 @@ static ztest_shared_callstate_t *ztest_shared_callstat
 ztest_func_t ztest_dmu_read_write;
 ztest_func_t ztest_dmu_write_parallel;
 ztest_func_t ztest_dmu_object_alloc_free;
+ztest_func_t ztest_dmu_object_next_chunk;
 ztest_func_t ztest_dmu_commit_callbacks;
 ztest_func_t ztest_zap;
 ztest_func_t ztest_zap_parallel;
@@ -363,6 +365,7 @@ ztest_info_t ztest_info[] = {
 	{ ztest_dmu_read_write,			1,	&zopt_always	},
 	{ ztest_dmu_write_parallel,		10,	&zopt_always	},
 	{ ztest_dmu_object_alloc_free,		1,	&zopt_always	},
+	{ ztest_dmu_object_next_chunk,		1,	&zopt_sometimes	},
 	{ ztest_dmu_commit_callbacks,		1,	&zopt_always	},
 	{ ztest_zap,				30,	&zopt_always	},
 	{ ztest_zap_parallel,			100,	&zopt_always	},
@@ -1366,7 +1369,7 @@ ztest_bt_bonus(dmu_buf_t *db)
  * it unique to the object, generation, and offset to verify that data
  * is not getting overwritten by data from other dnodes.
  */
-#define	ZTEST_BONUS_FILL_TOKEN(obj, ds, gen, offset) \
+#define	ZTEST_BONUS_FILL_TOKEN(obj, ds, gen, offset)	\
 	(((ds) << 48) | ((gen) << 32) | ((obj) << 8) | (offset))
 
 /*
@@ -1895,6 +1898,7 @@ ztest_replay_setattr(void *arg1, void *arg2, boolean_t
 	ztest_bt_generate(bbt, os, lr->lr_foid, dnodesize, -1ULL, lr->lr_mode,
 	    txg, crtxg);
 	ztest_fill_unused_bonus(db, bbt, lr->lr_foid, os, bbt->bt_gen);
+
 	dmu_buf_rele(db, FTAG);
 
 	(void) ztest_log_setattr(zd, tx, lr);
@@ -3815,8 +3819,10 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t i
 	ztest_od_t od[4];
 	int batchsize = sizeof (od) / sizeof (od[0]);
 
-	for (int b = 0; b < batchsize; b++)
-		ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0, 0);
+	for (int b = 0; b < batchsize; b++) {
+		ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER,
+		    0, 0, 0);
+	}
 
 	/*
 	 * Destroy the previous batch of objects, create a new batch,
@@ -3831,6 +3837,26 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t i
 }
 
 /*
+ * Rewind the global allocator to verify object allocation backfilling.
+ */
+void
+ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id)
+{
+	objset_t *os = zd->zd_os;
+	int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
+	uint64_t object;
+
+	/*
+	 * Rewind the global allocator randomly back to a lower object number
+	 * to force backfilling and reclamation of recently freed dnodes.
+	 */
+	mutex_enter(&os->os_obj_lock);
+	object = ztest_random(os->os_obj_next_chunk);
+	os->os_obj_next_chunk = P2ALIGN(object, dnodes_per_chunk);
+	mutex_exit(&os->os_obj_lock);
+}
+
+/*
  * Verify that dmu_{read,write} work as expected.
  */
 void
@@ -3876,8 +3902,10 @@ ztest_dmu_read_write(ztest_ds_t *zd, uint64_t id)
 	/*
 	 * Read the directory info.  If it's the first time, set things up.
 	 */
-	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, chunksize);
-	ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0, chunksize);
+	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0,
+	    chunksize);
+	ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
+	    chunksize);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;
@@ -4146,8 +4174,10 @@ ztest_dmu_read_write_zcopy(ztest_ds_t *zd, uint64_t id
 	/*
 	 * Read the directory info.  If it's the first time, set things up.
 	 */
-	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
-	ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0, chunksize);
+	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize,
+	    0, 0);
+	ztest_od_init(&od[1], id, FTAG, 1, DMU_OT_UINT64_OTHER, 0, 0,
+	    chunksize);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;
@@ -4347,7 +4377,8 @@ ztest_dmu_write_parallel(ztest_ds_t *zd, uint64_t id)
 	 * to verify that parallel writes to an object -- even to the
 	 * same blocks within the object -- doesn't cause any trouble.
 	 */
-	ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER, 0, 0, 0);
+	ztest_od_init(&od[0], ID_PARALLEL, FTAG, 0, DMU_OT_UINT64_OTHER,
+	    0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;
@@ -4366,7 +4397,8 @@ ztest_dmu_prealloc(ztest_ds_t *zd, uint64_t id)
 	uint64_t blocksize = ztest_random_blocksize();
 	void *data;
 
-	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
+	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize,
+	    0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), !ztest_random(2)) != 0)
 		return;
@@ -4590,7 +4622,8 @@ ztest_zap_parallel(ztest_ds_t *zd, uint64_t id)
 	char name[20], string_value[20];
 	void *data;
 
-	ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER, 0, 0, 0);
+	ztest_od_init(&od[0], ID_PARALLEL, FTAG, micro, DMU_OT_ZAP_OTHER,
+	    0, 0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;
@@ -5411,7 +5444,8 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
 	blocksize = ztest_random_blocksize();
 	blocksize = MIN(blocksize, 2048);	/* because we write so many */
 
-	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0, 0);
+	ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize,
+	    0, 0);
 
 	if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
 		return;

Modified: stable/12/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c
==============================================================================
--- stable/12/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/sys/cddl/contrib/opensolaris/common/zfs/zfs_prop.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -292,10 +292,11 @@ zfs_prop_init(void)
 	    ZFS_VOLMODE_DEFAULT, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT | ZFS_TYPE_VOLUME,
 	    "default | geom | dev | none", "VOLMODE", volmode_table);
+ 
 	zprop_register_index(ZFS_PROP_DNODESIZE, "dnodesize",
 	    ZFS_DNSIZE_LEGACY, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
 	    "legacy | auto | 1k | 2k | 4k | 8k | 16k", "DNSIZE", dnsize_table);
- 
+
 	/* inherit index (boolean) properties */
 	zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
 	    ZFS_TYPE_FILESYSTEM, "on | off", "ATIME", boolean_table);

Modified: stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c
==============================================================================
--- stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -1812,6 +1812,9 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 			    FTAG);
 		}
 	}
+
+	if (tx->tx_txg > dn->dn_dirty_txg)
+		dn->dn_dirty_txg = tx->tx_txg;
 	mutex_exit(&dn->dn_mtx);
 
 	if (db->db_blkid == DMU_SPILL_BLKID)
@@ -3757,7 +3760,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb
 		if (dn->dn_type == DMU_OT_DNODE) {
 			i = 0;
 			while (i < db->db.db_size) {
-				dnode_phys_t *dnp = db->db.db_data + i;
+				dnode_phys_t *dnp =
+				    (void *)(((char *)db->db.db_data) + i);
 
 				i += DNODE_MIN_SIZE;
 				if (dnp->dn_type != DMU_OT_NONE) {

Modified: stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c
==============================================================================
--- stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_object.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -32,6 +32,14 @@
 #include <sys/zfeature.h>
 #include <sys/dsl_dataset.h>
 
+/*
+ * Each of the concurrent object allocators will grab
+ * 2^dmu_object_alloc_chunk_shift dnode slots at a time.  The default is to
+ * grab 128 slots, which is 4 blocks worth.  This was experimentally
+ * determined to be the lowest value that eliminates the measurable effect
+ * of lock contention from this code path.
+ */
+int dmu_object_alloc_chunk_shift = 7;
 
 static uint64_t
 dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize,
@@ -44,6 +52,10 @@ dmu_object_alloc_impl(objset_t *os, dmu_object_type_t 
 	dnode_t *dn = NULL;
 	int dn_slots = dnodesize >> DNODE_SHIFT;
 	boolean_t restarted = B_FALSE;
+	uint64_t *cpuobj = &os->os_obj_next_percpu[CPU_SEQID %
+	    os->os_obj_next_percpu_len];
+	int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
+	int error;
 
 	if (dn_slots == 0) {
 		dn_slots = DNODE_MIN_SLOTS;
@@ -51,93 +63,149 @@ dmu_object_alloc_impl(objset_t *os, dmu_object_type_t 
 		ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
 		ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
 	}
- 
-	mutex_enter(&os->os_obj_lock);
+
+	/*
+	 * The "chunk" of dnodes that is assigned to a CPU-specific
+	 * allocator needs to be at least one block's worth, to avoid
+	 * lock contention on the dbuf.  It can be at most one L1 block's
+	 * worth, so that the "rescan after polishing off a L1's worth"
+	 * logic below will be sure to kick in.
+	 */
+	if (dnodes_per_chunk < DNODES_PER_BLOCK)
+		dnodes_per_chunk = DNODES_PER_BLOCK;
+	if (dnodes_per_chunk > L1_dnode_count)
+		dnodes_per_chunk = L1_dnode_count;
+
+#ifdef __FreeBSD__
+	object = atomic_load_64(cpuobj);
+#else
+	object = *cpuobj;
+#endif
+
 	for (;;) {
-		object = os->os_obj_next;
 		/*
-		 * Each time we polish off a L1 bp worth of dnodes (2^12
-		 * objects), move to another L1 bp that's still
-		 * reasonably sparse (at most 1/4 full). Look from the
-		 * beginning at most once per txg. If we still can't
-		 * allocate from that L1 block, search for an empty L0
-		 * block, which will quickly skip to the end of the
-		 * metadnode if the no nearby L0 blocks are empty. This
-		 * fallback avoids a pathology where full dnode blocks
-		 * containing large dnodes appear sparse because they
-		 * have a low blk_fill, leading to many failed
-		 * allocation attempts. In the long term a better
-		 * mechanism to search for sparse metadnode regions,
-		 * such as spacemaps, could be implemented.
-		 *
-		 * os_scan_dnodes is set during txg sync if enough objects
-		 * have been freed since the previous rescan to justify
-		 * backfilling again.
-		 *
-		 * Note that dmu_traverse depends on the behavior that we use
-		 * multiple blocks of the dnode object before going back to
-		 * reuse objects.  Any change to this algorithm should preserve
-		 * that property or find another solution to the issues
-		 * described in traverse_visitbp.
+		 * If we finished a chunk of dnodes, get a new one from
+		 * the global allocator.
 		 */
-		if (P2PHASE(object, L1_dnode_count) == 0) {
-			uint64_t offset;
-			uint64_t blkfill;
-			int minlvl;
-			int error;
-			if (os->os_rescan_dnodes) {
-				offset = 0;
-				os->os_rescan_dnodes = B_FALSE;
-			} else {
-				offset = object << DNODE_SHIFT;
+		if ((P2PHASE(object, dnodes_per_chunk) == 0) ||
+		    (P2PHASE(object + dn_slots - 1, dnodes_per_chunk) <
+		    dn_slots)) {
+			DNODE_STAT_BUMP(dnode_alloc_next_chunk);
+			mutex_enter(&os->os_obj_lock);
+			ASSERT0(P2PHASE(os->os_obj_next_chunk,
+			    dnodes_per_chunk));
+			object = os->os_obj_next_chunk;
+
+			/*
+			 * Each time we polish off a L1 bp worth of dnodes
+			 * (2^12 objects), move to another L1 bp that's
+			 * still reasonably sparse (at most 1/4 full). Look
+			 * from the beginning at most once per txg. If we
+			 * still can't allocate from that L1 block, search
+			 * for an empty L0 block, which will quickly skip
+			 * to the end of the metadnode if the no nearby L0
+			 * blocks are empty. This fallback avoids a
+			 * pathology where full dnode blocks containing
+			 * large dnodes appear sparse because they have a
+			 * low blk_fill, leading to many failed allocation
+			 * attempts. In the long term a better mechanism to
+			 * search for sparse metadnode regions, such as
+			 * spacemaps, could be implemented.
+			 *
+			 * os_scan_dnodes is set during txg sync if enough
+			 * objects have been freed since the previous
+			 * rescan to justify backfilling again.
+			 *
+			 * Note that dmu_traverse depends on the behavior
+			 * that we use multiple blocks of the dnode object
+			 * before going back to reuse objects. Any change
+			 * to this algorithm should preserve that property
+			 * or find another solution to the issues described
+			 * in traverse_visitbp.
+			 */
+			if (P2PHASE(object, L1_dnode_count) == 0) {
+				uint64_t offset;
+				uint64_t blkfill;
+				int minlvl;
+				if (os->os_rescan_dnodes) {
+					offset = 0;
+					os->os_rescan_dnodes = B_FALSE;
+				} else {
+					offset = object << DNODE_SHIFT;
+				}
+				blkfill = restarted ? 1 : DNODES_PER_BLOCK >> 2;
+				minlvl = restarted ? 1 : 2;
+				restarted = B_TRUE;
+				error = dnode_next_offset(DMU_META_DNODE(os),
+				    DNODE_FIND_HOLE, &offset, minlvl,
+				    blkfill, 0);
+				if (error == 0) {
+					object = offset >> DNODE_SHIFT;
+				}
 			}
-			blkfill = restarted ? 1 : DNODES_PER_BLOCK >> 2;
-			minlvl = restarted ? 1 : 2;
-			restarted = B_TRUE;
-			error = dnode_next_offset(DMU_META_DNODE(os),
-			    DNODE_FIND_HOLE, &offset, minlvl, blkfill, 0);
-			if (error == 0)
-				object = offset >> DNODE_SHIFT;
+			/*
+			 * Note: if "restarted", we may find a L0 that
+			 * is not suitably aligned.
+			 */
+			os->os_obj_next_chunk =
+			    P2ALIGN(object, dnodes_per_chunk) +
+			    dnodes_per_chunk;
+			(void) atomic_swap_64(cpuobj, object);
+			mutex_exit(&os->os_obj_lock);
 		}
-		os->os_obj_next = object + dn_slots;
 
 		/*
+		 * The value of (*cpuobj) before adding dn_slots is the object
+		 * ID assigned to us.  The value afterwards is the object ID
+		 * assigned to whoever wants to do an allocation next.
+		 */
+		object = atomic_add_64_nv(cpuobj, dn_slots) - dn_slots;
+
+		/*
 		 * XXX We should check for an i/o error here and return
 		 * up to our caller.  Actually we should pre-read it in
 		 * dmu_tx_assign(), but there is currently no mechanism
 		 * to do so.
 		 */
-		(void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, dn_slots,
-		    FTAG, &dn);
-		if (dn)
-			break;
-
-		if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
-			os->os_obj_next = object;
-		else
+		error = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
+		    dn_slots, FTAG, &dn);
+		if (error == 0) {
+			rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 			/*
-			 * Skip to next known valid starting point for a dnode.
+			 * Another thread could have allocated it; check
+			 * again now that we have the struct lock.
 			 */
-			os->os_obj_next = P2ROUNDUP(object + 1,
-			    DNODES_PER_BLOCK);
-	}
+			if (dn->dn_type == DMU_OT_NONE) {
+				dnode_allocate(dn, ot, blocksize, 0,
+				    bonustype, bonuslen, dn_slots, tx);
+				rw_exit(&dn->dn_struct_rwlock);
+				dmu_tx_add_new_object(tx, dn);
+				dnode_rele(dn, FTAG);
+				return (object);
+			}
+			rw_exit(&dn->dn_struct_rwlock);
+			dnode_rele(dn, FTAG);
+			DNODE_STAT_BUMP(dnode_alloc_race);
+		}
 
-	dnode_allocate(dn, ot, blocksize, indirect_blockshift,
-		       bonustype, bonuslen, dn_slots, tx);
-	mutex_exit(&os->os_obj_lock);
-
-	dmu_tx_add_new_object(tx, dn);
-	dnode_rele(dn, FTAG);
-
-	return (object);
+		/*
+		 * Skip to next known valid starting point on error. This
+		 * is the start of the next block of dnodes.
+		 */
+		if (dmu_object_next(os, &object, B_TRUE, 0) != 0) {
+			object = P2ROUNDUP(object + 1, DNODES_PER_BLOCK);
+			DNODE_STAT_BUMP(dnode_alloc_next_block);
+		}
+		(void) atomic_swap_64(cpuobj, object);
+	}
 }
 
 uint64_t
 dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
-	return dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype,
-	    bonuslen, 0, tx);
+	return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype,
+	    bonuslen, 0, tx));
 }
 
 uint64_t
@@ -145,8 +213,8 @@ dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t o
     int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
     dmu_tx_t *tx)
 {
-	return dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift,
-	    bonustype, bonuslen, 0, tx);
+	return (dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift,
+	    bonustype, bonuslen, 0, tx));
 }
 
 uint64_t
@@ -178,7 +246,7 @@ dmu_object_claim_dnsize(objset_t *os, uint64_t object,
 		dn_slots = DNODE_MIN_SLOTS;
 	ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
 	ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
-	
+
 	if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
 		return (SET_ERROR(EBADF));
 
@@ -199,7 +267,7 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
 	return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
-	    bonuslen, 0, tx));
+	    bonuslen, DNODE_MIN_SIZE, tx));
 }
 
 int
@@ -211,6 +279,9 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t objec
 	int dn_slots = dnodesize >> DNODE_SHIFT;
 	int err;
 
+	if (dn_slots == 0)
+		dn_slots = DNODE_MIN_SLOTS;
+
 	if (object == DMU_META_DNODE_OBJECT)
 		return (SET_ERROR(EBADF));
 
@@ -260,28 +331,52 @@ int
 dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
 {
 	uint64_t offset;
-	dmu_object_info_t doi;
+	uint64_t start_obj;
 	struct dsl_dataset *ds = os->os_dsl_dataset;
-	int dnodesize;
 	int error;
 
-	/*
-	 * Avoid expensive dnode hold if this dataset doesn't use large dnodes.
-	 */
-	if (ds && ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE]) {
-		error = dmu_object_info(os, *objectp, &doi);
-		if (error && !(error == EINVAL && *objectp == 0))
-			return (SET_ERROR(error));
-		else
-			dnodesize = doi.doi_dnodesize;
+	if (*objectp == 0) {
+		start_obj = 1;
+	} else if (ds && ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE]) {
+		uint64_t i = *objectp + 1;
+		uint64_t last_obj = *objectp | (DNODES_PER_BLOCK - 1);
+		dmu_object_info_t doi;
+
+		/*
+		 * Scan through the remaining meta dnode block. The contents
+		 * of each slot in the block are known so it can be quickly
+		 * checked. If the block is exhausted without a match then
+		 * hand off to dnode_next_offset() for further scanning.
+		 */
+		while (i <= last_obj) {
+			error = dmu_object_info(os, i, &doi);
+			if (error == ENOENT) {
+				if (hole) {
+					*objectp = i;
+					return (0);
+				} else {
+					i++;
+				}
+			} else if (error == EEXIST) {
+				i++;
+			} else if (error == 0) {
+				if (hole) {
+					i += doi.doi_dnodesize >> DNODE_SHIFT;
+				} else {
+					*objectp = i;
+					return (0);
+				}
+			} else {
+				return (error);
+			}
+		}
+
+		start_obj = i;
 	} else {
-		dnodesize = DNODE_MIN_SIZE;
+		start_obj = *objectp + 1;
 	}
 
-	if (*objectp == 0)
-		offset = 1 << DNODE_SHIFT;
-	else
-		offset = (*objectp << DNODE_SHIFT) + dnodesize;
+	offset = start_obj << DNODE_SHIFT;
 
 	error = dnode_next_offset(DMU_META_DNODE(os),
 	    (hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);

Modified: stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c
==============================================================================
--- stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_objset.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -566,6 +566,9 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, bl
 	mutex_init(&os->os_userused_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
+	os->os_obj_next_percpu_len = boot_ncpus;
+	os->os_obj_next_percpu = kmem_zalloc(os->os_obj_next_percpu_len *
+	    sizeof (os->os_obj_next_percpu[0]), KM_SLEEP);
 
 	dnode_special_open(os, &os->os_phys->os_meta_dnode,
 	    DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
@@ -844,6 +847,9 @@ dmu_objset_evict_done(objset_t *os)
 	rw_enter(&os_lock, RW_READER);
 	rw_exit(&os_lock);
 
+	kmem_free(os->os_obj_next_percpu,
+	    os->os_obj_next_percpu_len * sizeof (os->os_obj_next_percpu[0]));
+
 	mutex_destroy(&os->os_lock);
 	mutex_destroy(&os->os_userused_lock);
 	mutex_destroy(&os->os_obj_lock);
@@ -1243,10 +1249,23 @@ dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_
 		ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
 		multilist_sublist_remove(list, dn);
 
+		/*
+		 * If we are not doing useraccounting (os_synced_dnodes == NULL)
+		 * we are done with this dnode for this txg. Unset dn_dirty_txg
+		 * if later txgs aren't dirtying it so that future holders do
+		 * not get a stale value. Otherwise, we will do this in
+		 * userquota_updates_task() when processing has completely
+		 * finished for this txg.
+		 */
 		multilist_t *newlist = dn->dn_objset->os_synced_dnodes;
 		if (newlist != NULL) {
 			(void) dnode_add_ref(dn, newlist);
 			multilist_insert(newlist, dn);
+		} else {
+			mutex_enter(&dn->dn_mtx);
+			if (dn->dn_dirty_txg == tx->tx_txg)
+				dn->dn_dirty_txg = 0;
+			mutex_exit(&dn->dn_mtx);
 		}
 
 		dnode_sync(dn, tx);
@@ -1606,6 +1625,8 @@ userquota_updates_task(void *arg)
 				dn->dn_id_flags |= DN_ID_CHKED_BONUS;
 		}
 		dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
+		if (dn->dn_dirty_txg == spa_syncing_txg(os->os_spa))
+			dn->dn_dirty_txg = 0;
 		mutex_exit(&dn->dn_mtx);
 
 		multilist_sublist_remove(list, dn);

Modified: stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
==============================================================================
--- stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -1441,17 +1441,12 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 	/*
 	 * The receiving code doesn't know how to translate large blocks
 	 * to smaller ones, so the pool must have the LARGE_BLOCKS
-	 * feature enabled if the stream has LARGE_BLOCKS.
+	 * feature enabled if the stream has LARGE_BLOCKS. Same with
+	 * large dnodes.
 	 */
 	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
 	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
 		return (SET_ERROR(ENOTSUP));
-
-	/*
-	 * The receiving code doesn't know how to translate large dnodes
-	 * to smaller ones, so the pool must have the LARGE_DNODE
-	 * feature enabled if the stream has LARGE_DNODE.
-	 */
 	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_DNODE) &&
 	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_DNODE))
 		return (SET_ERROR(ENOTSUP));
@@ -1659,6 +1654,9 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
 	dsl_dataset_t *ds;
 	const char *tofs = drba->drba_cookie->drc_tofs;
 
+	/* 6 extra bytes for /%recv */
+	char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+
 	/* already checked */
 	ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
 	ASSERT(featureflags & DMU_BACKUP_FEATURE_RESUMING);
@@ -1686,8 +1684,18 @@ dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
 	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
 		return (SET_ERROR(ENOTSUP));
 
-	/* 6 extra bytes for /%recv */
-	char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+	/*
+	 * The receiving code doesn't know how to translate large blocks
+	 * to smaller ones, so the pool must have the LARGE_BLOCKS
+	 * feature enabled if the stream has LARGE_BLOCKS. Same with
+	 * large dnodes.
+	 */
+	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
+	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
+		return (SET_ERROR(ENOTSUP));
+	if ((featureflags & DMU_BACKUP_FEATURE_LARGE_DNODE) &&
+	    !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_DNODE))
+		return (SET_ERROR(ENOTSUP));
 
 	(void) snprintf(recvname, sizeof (recvname), "%s/%s",
 	    tofs, recv_clone_name);
@@ -2149,6 +2157,8 @@ receive_object(struct receive_writer_arg *rwa, struct 
 	dmu_tx_t *tx;
 	uint64_t object;
 	int err;
+	uint8_t dn_slots = drro->drr_dn_slots != 0 ?
+	    drro->drr_dn_slots : DNODE_MIN_SLOTS;
 
 	if (drro->drr_type == DMU_OT_NONE ||
 	    !DMU_OT_IS_VALID(drro->drr_type) ||
@@ -2159,15 +2169,16 @@ receive_object(struct receive_writer_arg *rwa, struct 
 	    drro->drr_blksz < SPA_MINBLOCKSIZE ||
 	    drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(rwa->os)) ||
 	    drro->drr_bonuslen >
-	    DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(rwa->os)))) {
+	    DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(rwa->os))) ||
+	    dn_slots >
+	    (spa_maxdnodesize(dmu_objset_spa(rwa->os)) >> DNODE_SHIFT)) {
 		return (SET_ERROR(EINVAL));
 	}
 
 	err = dmu_object_info(rwa->os, drro->drr_object, &doi);
 
-	if (err != 0 && err != ENOENT)
+	if (err != 0 && err != ENOENT && err != EEXIST)
 		return (SET_ERROR(EINVAL));
-	object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
 
 	if (drro->drr_object > rwa->max_object)
 		rwa->max_object = drro->drr_object;
@@ -2180,18 +2191,66 @@ receive_object(struct receive_writer_arg *rwa, struct 
 	if (err == 0) {
 		int nblkptr;
 
+		object = drro->drr_object;
+
 		nblkptr = deduce_nblkptr(drro->drr_bonustype,
 		    drro->drr_bonuslen);
 
 		if (drro->drr_blksz != doi.doi_data_block_size ||
-		    nblkptr < doi.doi_nblkptr) {
+		    nblkptr < doi.doi_nblkptr ||
+		    dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
 			err = dmu_free_long_range(rwa->os, drro->drr_object,
 			    0, DMU_OBJECT_END);
 			if (err != 0)
 				return (SET_ERROR(EINVAL));
 		}
+	} else if (err == EEXIST) {
+		/*
+		 * The object requested is currently an interior slot of a
+		 * multi-slot dnode. This will be resolved when the next txg
+		 * is synced out, since the send stream will have told us
+		 * to free this slot when we freed the associated dnode
+		 * earlier in the stream.
+		 */
+		txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+		object = drro->drr_object;
+	} else {
+		/* object is free and we are about to allocate a new one */
+		object = DMU_NEW_OBJECT;
 	}
 
+	/*
+	 * If this is a multi-slot dnode there is a chance that this
+	 * object will expand into a slot that is already used by
+	 * another object from the previous snapshot. We must free
+	 * these objects before we attempt to allocate the new dnode.
+	 */
+	if (dn_slots > 1) {
+		boolean_t need_sync = B_FALSE;
+
+		for (uint64_t slot = drro->drr_object + 1;
+		    slot < drro->drr_object + dn_slots;
+		    slot++) {
+			dmu_object_info_t slot_doi;
+
+			err = dmu_object_info(rwa->os, slot, &slot_doi);
+			if (err == ENOENT || err == EEXIST)
+				continue;
+			else if (err != 0)
+				return (err);
+
+			err = dmu_free_long_object(rwa->os, slot);
+
+			if (err != 0)
+				return (err);
+
+			need_sync = B_TRUE;
+		}
+
+		if (need_sync)
+			txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+	}
+
 	tx = dmu_tx_create(rwa->os);
 	dmu_tx_hold_bonus(tx, object);
 	err = dmu_tx_assign(tx, TXG_WAIT);
@@ -2205,15 +2264,17 @@ receive_object(struct receive_writer_arg *rwa, struct 
 		err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
 		    drro->drr_bonustype, drro->drr_bonuslen,
-		    drro->drr_dn_slots << DNODE_SHIFT, tx);
+		    dn_slots << DNODE_SHIFT, tx);
 	} else if (drro->drr_type != doi.doi_type ||
 	    drro->drr_blksz != doi.doi_data_block_size ||
 	    drro->drr_bonustype != doi.doi_bonus_type ||
-	    drro->drr_bonuslen != doi.doi_bonus_size) {
+	    drro->drr_bonuslen != doi.doi_bonus_size ||
+	    drro->drr_dn_slots != (doi.doi_dnodesize >> DNODE_SHIFT)) {
 		/* currently allocated, but with different properties */
-		err = dmu_object_reclaim(rwa->os, drro->drr_object,
+		err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
 		    drro->drr_type, drro->drr_blksz,
-		    drro->drr_bonustype, drro->drr_bonuslen, tx);
+		    drro->drr_bonustype, drro->drr_bonuslen,
+		    drro->drr_dn_slots << DNODE_SHIFT, tx);
 	}
 	if (err != 0) {
 		dmu_tx_commit(tx);
@@ -2263,13 +2324,11 @@ receive_freeobjects(struct receive_writer_arg *rwa,
 		dmu_object_info_t doi;
 		int err;
 
-		err = dmu_object_info(rwa->os, obj, &doi);
-		if (err == ENOENT) {
-			obj++;
- 			continue;
-		} else if (err != 0) {
+		err = dmu_object_info(rwa->os, obj, NULL);
+		if (err == ENOENT)
+			continue;
+		else if (err != 0)
 			return (err);
-		}
 
 		err = dmu_free_long_object(rwa->os, obj);
 		if (err != 0)

Modified: stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c
==============================================================================
--- stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_tx.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -1252,11 +1252,13 @@ dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx)
 void
 dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
 {
-	dmu_tx_hold_t *txh = dmu_tx_hold_object_impl(tx,
-	    tx->tx_objset, object, THT_SPILL, 0, 0);
+	dmu_tx_hold_t *txh;
 
-	(void) refcount_add_many(&txh->txh_space_towrite,
-	    SPA_OLD_MAXBLOCKSIZE, FTAG);
+	txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
+	    THT_SPILL, 0, 0);
+	if (txh != NULL)
+		(void) refcount_add_many(&txh->txh_space_towrite,
+		    SPA_OLD_MAXBLOCKSIZE, FTAG);
 }
 
 void

Modified: stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c
==============================================================================
--- stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	Thu Oct 31 00:35:26 2019	(r354210)
+++ stable/12/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dnode.c	Thu Oct 31 09:14:50 2019	(r354211)
@@ -40,21 +40,41 @@
 #include <sys/dmu_zfetch.h>
 #include <sys/range_tree.h>
 
+dnode_stats_t dnode_stats = {
+	{ "dnode_hold_dbuf_hold",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_dbuf_read",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_alloc_hits",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_alloc_misses",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_alloc_interior",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_alloc_lock_retry",	KSTAT_DATA_UINT64 },
+	{ "dnode_hold_alloc_lock_misses",	KSTAT_DATA_UINT64 },
+	{ "dnode_hold_alloc_type_none",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_free_hits",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_free_misses",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_free_lock_misses",	KSTAT_DATA_UINT64 },
+	{ "dnode_hold_free_lock_retry",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_free_overflow",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_free_refcount",		KSTAT_DATA_UINT64 },
+	{ "dnode_hold_free_txg",		KSTAT_DATA_UINT64 },
+	{ "dnode_free_interior_lock_retry",	KSTAT_DATA_UINT64 },
+	{ "dnode_allocate",			KSTAT_DATA_UINT64 },
+	{ "dnode_reallocate",			KSTAT_DATA_UINT64 },
+	{ "dnode_buf_evict",			KSTAT_DATA_UINT64 },
+	{ "dnode_alloc_next_chunk",		KSTAT_DATA_UINT64 },
+	{ "dnode_alloc_race",			KSTAT_DATA_UINT64 },
+	{ "dnode_alloc_next_block",		KSTAT_DATA_UINT64 },
+	{ "dnode_move_invalid",			KSTAT_DATA_UINT64 },
+	{ "dnode_move_recheck1",		KSTAT_DATA_UINT64 },
+	{ "dnode_move_recheck2",		KSTAT_DATA_UINT64 },
+	{ "dnode_move_special",			KSTAT_DATA_UINT64 },
+	{ "dnode_move_handle",			KSTAT_DATA_UINT64 },
+	{ "dnode_move_rwlock",			KSTAT_DATA_UINT64 },

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***