git: 240afd8c1fcc - main - makefs: Add ZFS support

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Fri, 05 Aug 2022 17:43:37 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=240afd8c1fcc8c5f29dbd4ff0c915795d414405d

commit 240afd8c1fcc8c5f29dbd4ff0c915795d414405d
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2022-08-05 17:07:54 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2022-08-05 17:42:29 +0000

    makefs: Add ZFS support
    
    This allows one to take a staged directory tree and create a file
    consisting of a ZFS pool with one or more datasets that contain the
    contents of the directory tree.  This is useful for creating virtual
    machine images without using the kernel to create a pool; "zpool create"
    requires root privileges and currently is not permitted in jails.
    makefs -t zfs also provides reproducible images by using a fixed seed
    for pseudo-random number generation, used for generating GUIDs and hash
    salts.  makefs -t zfs requires relatively little by way of machine
    resources.
    
    The "zpool_reguid" rc.conf setting can be used to ask a FreeBSD guest to
    generate a unique pool GUID upon first boot.
    
    A small number of pool and dataset properties are supported.  The pool
    is backed by a single disk vdev.  Data is always checksummed using
    Fletcher-4, no redundant copies are made, and no compression is used.
    The manual page documents supported pool and filesystem properties.
    
    The implementation uses a few pieces of ZFS support from with the boot
    loader, especially definitions for various on-disk structures, but is
    otherwise standalone and in particular doesn't depend on OpenZFS.
    
    This feature should be treated as experimental for now, i.e., important
    data shouldn't be trusted to a makefs-created pool, and the command-line
    interface is subject to change.
    
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D35248
---
 usr.sbin/makefs/Makefile                  |  11 +
 usr.sbin/makefs/makefs.8                  |  97 ++-
 usr.sbin/makefs/makefs.c                  |   3 +
 usr.sbin/makefs/makefs.h                  |   5 +
 usr.sbin/makefs/tests/Makefile            |   1 +
 usr.sbin/makefs/tests/makefs_zfs_tests.sh | 634 +++++++++++++++++++
 usr.sbin/makefs/zfs.c                     | 758 +++++++++++++++++++++++
 usr.sbin/makefs/zfs/Makefile.inc          |  12 +
 usr.sbin/makefs/zfs/dsl.c                 | 598 ++++++++++++++++++
 usr.sbin/makefs/zfs/fs.c                  | 981 ++++++++++++++++++++++++++++++
 usr.sbin/makefs/zfs/objset.c              | 259 ++++++++
 usr.sbin/makefs/zfs/vdev.c                | 435 +++++++++++++
 usr.sbin/makefs/zfs/zap.c                 | 551 +++++++++++++++++
 usr.sbin/makefs/zfs/zfs.h                 | 167 +++++
 14 files changed, 4509 insertions(+), 3 deletions(-)

diff --git a/usr.sbin/makefs/Makefile b/usr.sbin/makefs/Makefile
index 3fea648f9383..fe472d7e7309 100644
--- a/usr.sbin/makefs/Makefile
+++ b/usr.sbin/makefs/Makefile
@@ -19,6 +19,17 @@ MAN=	makefs.8
 NO_WCAST_ALIGN=
 CSTD=	c11
 
+.if ${MK_ZFS} != "no"
+SRCS+=	zfs.c
+CFLAGS+=-I${SRCDIR}/zfs \
+	-I${SRCTOP}/stand/libsa \
+	-I${SRCTOP}/sys/cddl/boot
+
+CFLAGS+=	-DHAVE_ZFS
+
+.include "${SRCDIR}/zfs/Makefile.inc"
+.endif
+
 .include "${SRCDIR}/cd9660/Makefile.inc"
 .include "${SRCDIR}/ffs/Makefile.inc"
 .include "${SRCDIR}/msdos/Makefile.inc"
diff --git a/usr.sbin/makefs/makefs.8 b/usr.sbin/makefs/makefs.8
index fdf8d532b69f..464583eab3a1 100644
--- a/usr.sbin/makefs/makefs.8
+++ b/usr.sbin/makefs/makefs.8
@@ -35,7 +35,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 17, 2020
+.Dd August 5, 2022
 .Dt MAKEFS 8
 .Os
 .Sh NAME
@@ -266,6 +266,8 @@ BSD fast file system (default).
 ISO 9660 file system.
 .It Sy msdos
 FAT12, FAT16, or FAT32 file system.
+.It Sy zfs
+ZFS pool containing one or more file systems.
 .El
 .It Fl x
 Exclude file system nodes not explicitly listed in the specfile.
@@ -494,10 +496,97 @@ Volume ID.
 .It Cm volume_label
 Volume Label.
 .El
+.Ss zfs-specific options
+Note: ZFS support is currently considered experimental.
+Do not use it for anything critical.
+.Pp
+The image created by
+.Nm
+contains a ZFS pool with a single vdev of type
+.Ql disk .
+The root dataset is always created implicitly and contains the entire input
+directory tree unless additional datasets are specified using the options
+described below.
+.Pp
+The arguments consist of a keyword, an equal sign
+.Pq Ql = ,
+and a value.
+The following keywords are supported:
+.Pp
+.Bl -tag -width omit-trailing-period -offset indent -compact
+.It ashift
+The base-2 logarithm of the minimum block size.
+Typical values are 9 (512B blocks) and 12 (4KB blocks).
+The default value is 12.
+.It bootfs
+The name of the bootable dataset for the pool.
+Specifying this option causes the
+.Ql bootfs
+property to be set in the created pool.
+.It mssize
+The size of metaslabs in the created pool.
+By default,
+.Nm
+allocates large (up to 512MB) metaslabs with the expectation that
+the image will be auto-expanded upon first use.
+This option allows the default heuristic to be overridden.
+.It poolname
+The name of the ZFS pool.
+This option must be specified.
+.It rootpath
+An implicit path prefix added to dataset mountpoints.
+By default it is
+.Pa /<poolname> .
+For creating bootable pools, the
+.Va rootpath
+should be set to
+.Pa / .
+At least one dataset must have a mountpoint equal to
+.Va rootpath .
+.It fs
+Create an additional dataset.
+This option may be specified multiple times.
+The argument value must be of the form
+.Ar <dataset>[;<prop1=v1>[;<prop2=v2>[;...]]] ,
+where
+.Ar dataset
+is the name of the dataset and must belong to the pool's namespace.
+For example, with a pool name of
+.Ql test
+all dataset names must be prefixed by
+.Ql test/ .
+A dataset must exist at each level of the pool's namespace.
+For example, to create
+.Ql test/foo/bar ,
+.Ql test/foo
+must be created as well.
+.Pp
+The dataset mountpoints determine how the datasets are populated with
+files from the staged directory tree.
+Conceptually, all datasets are mounted before any are populated with files.
+The root of the staged directory tree is mapped to
+.Va rootpath .
+.Pp
+Dataset properties, as described in
+.Xr zfsprops 8 ,
+may be specified following the dataset name.
+The following properties may be set for a dataset:
+.Pp
+.Bl -tag -compact -offset indent
+.It atime
+.It canmount
+.It exec
+.It mountpoint
+.It setuid
+.El
+.El
 .Sh SEE ALSO
 .Xr mtree 5 ,
 .Xr mtree 8 ,
-.Xr newfs 8
+.Xr newfs 8 ,
+.Xr zfsconcepts 8 ,
+.Xr zfsprops 8 ,
+.Xr zpoolprops 8
 .Sh HISTORY
 The
 .Nm
@@ -518,4 +607,6 @@ and first appeared in
 .An Ram Vedam
 (cd9660 support),
 .An Christos Zoulas
-(msdos support).
+(msdos support),
+.An Mark Johnston
+(zfs support).
diff --git a/usr.sbin/makefs/makefs.c b/usr.sbin/makefs/makefs.c
index 888a2b3edea7..2a50768d3152 100644
--- a/usr.sbin/makefs/makefs.c
+++ b/usr.sbin/makefs/makefs.c
@@ -77,6 +77,9 @@ static fstype_t fstypes[] = {
 	ENTRY(cd9660),
 	ENTRY(ffs),
 	ENTRY(msdos),
+#ifdef HAVE_ZFS
+	ENTRY(zfs),
+#endif
 	{ .type = NULL	},
 };
 
diff --git a/usr.sbin/makefs/makefs.h b/usr.sbin/makefs/makefs.h
index 68dc0362dd21..e88313e8366d 100644
--- a/usr.sbin/makefs/makefs.h
+++ b/usr.sbin/makefs/makefs.h
@@ -78,12 +78,14 @@ enum fi_flags {
 	FI_SIZED =	1<<0,		/* inode sized */
 	FI_ALLOCATED =	1<<1,		/* fsinode->ino allocated */
 	FI_WRITTEN =	1<<2,		/* inode written */
+	FI_ROOT =	1<<3,		/* root of a ZFS dataset */
 };
 
 typedef struct {
 	uint32_t	 ino;		/* inode number used on target fs */
 	uint32_t	 nlink;		/* number of links to this entry */
 	enum fi_flags	 flags;		/* flags used by fs specific code */
+	void		*param;		/* for use by individual fs impls */
 	struct stat	 st;		/* stat entry */
 } fsinode;
 
@@ -186,6 +188,9 @@ void		fs ## _makefs(const char *, const char *, fsnode *, fsinfo_t *)
 DECLARE_FUN(cd9660);
 DECLARE_FUN(ffs);
 DECLARE_FUN(msdos);
+#ifdef HAVE_ZFS
+DECLARE_FUN(zfs);
+#endif
 
 extern	u_int		debug;
 extern	int		dupsok;
diff --git a/usr.sbin/makefs/tests/Makefile b/usr.sbin/makefs/tests/Makefile
index 85e4b233aea7..c2c9f6bea5b6 100644
--- a/usr.sbin/makefs/tests/Makefile
+++ b/usr.sbin/makefs/tests/Makefile
@@ -2,6 +2,7 @@
 
 ATF_TESTS_SH+=	makefs_cd9660_tests
 ATF_TESTS_SH+=	makefs_ffs_tests
+ATF_TESTS_SH+=	makefs_zfs_tests
 
 BINDIR=		${TESTSDIR}
 
diff --git a/usr.sbin/makefs/tests/makefs_zfs_tests.sh b/usr.sbin/makefs/tests/makefs_zfs_tests.sh
new file mode 100644
index 000000000000..8cd79966c49a
--- /dev/null
+++ b/usr.sbin/makefs/tests/makefs_zfs_tests.sh
@@ -0,0 +1,634 @@
+#-
+# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+#
+# Copyright (c) 2022 The FreeBSD Foundation
+#
+# This software was developed by Mark Johnston under sponsorship from
+# the FreeBSD Foundation.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in
+#    the documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+
+MAKEFS="makefs -t zfs -o nowarn=true"
+ZFS_POOL_NAME="makefstest$$"
+TEST_ZFS_POOL_NAME="$TMPDIR/poolname"
+
+. "$(dirname "$0")/makefs_tests_common.sh"
+
+common_cleanup()
+{
+	local pool md
+
+        # Try to force a TXG, this can help catch bugs by triggering a panic.
+	sync
+
+	pool=$(cat $TEST_ZFS_POOL_NAME)
+	if zpool list "$pool" >/dev/null; then
+		zpool destroy "$pool"
+	fi
+
+	md=$(cat $TEST_MD_DEVICE_FILE)
+	if [ -c /dev/"$md" ]; then
+		mdconfig -d -u "$md"
+	fi
+}
+
+import_image()
+{
+	atf_check -e empty -o save:$TEST_MD_DEVICE_FILE -s exit:0 \
+	    mdconfig -a -f $TEST_IMAGE
+	atf_check zpool import -R $TEST_MOUNT_DIR $ZFS_POOL_NAME
+	echo "$ZFS_POOL_NAME" > $TEST_ZFS_POOL_NAME
+}
+
+#
+# Test autoexpansion of the vdev.
+#
+# The pool is initially 10GB, so we get 10GB minus one metaslab's worth of
+# usable space for data.  Then the pool is expanded to 50GB, and the amount of
+# usable space is 50GB minus one metaslab.
+#
+atf_test_case autoexpand cleanup
+autoexpand_body()
+{
+	local mssize poolsize poolsize1 newpoolsize
+
+	create_test_inputs
+
+	mssize=$((128 * 1024 * 1024))
+	poolsize=$((10 * 1024 * 1024 * 1024))
+	atf_check $MAKEFS -s $poolsize -o mssize=$mssize -o rootpath=/ \
+	    -o poolname=$ZFS_POOL_NAME \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	newpoolsize=$((50 * 1024 * 1024 * 1024))
+	truncate -s $newpoolsize $TEST_IMAGE
+
+	import_image
+
+	check_image_contents
+
+	poolsize1=$(zpool list -Hp -o size $ZFS_POOL_NAME)
+	atf_check [ $((poolsize1 + $mssize)) -eq $poolsize ]
+
+        atf_check zpool online -e $ZFS_POOL_NAME /dev/$(cat $TEST_MD_DEVICE_FILE)
+
+	check_image_contents
+
+	poolsize1=$(zpool list -Hp -o size $ZFS_POOL_NAME)
+	atf_check [ $((poolsize1 + $mssize)) -eq $newpoolsize ]
+}
+autoexpand_cleanup()
+{
+	common_cleanup
+}
+
+#
+# Test with some default layout defined by the common code.
+#
+atf_test_case basic cleanup
+basic_body()
+{
+	create_test_inputs
+
+	atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+}
+basic_cleanup()
+{
+	common_cleanup
+}
+
+atf_test_case dataset_removal cleanup
+dataset_removal_body()
+{
+	create_test_dirs
+
+	cd $TEST_INPUTS_DIR
+	mkdir dir
+	cd -
+
+	atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    -o fs=${ZFS_POOL_NAME}/dir \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+
+	atf_check zfs destroy ${ZFS_POOL_NAME}/dir
+}
+dataset_removal_cleanup()
+{
+	common_cleanup
+}
+
+#
+# Make sure that we can create and remove an empty directory.
+#
+atf_test_case empty_dir cleanup
+empty_dir_body()
+{
+	create_test_dirs
+
+	cd $TEST_INPUTS_DIR
+	mkdir dir
+	cd -
+
+	atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+
+	atf_check rmdir ${TEST_MOUNT_DIR}/dir
+}
+empty_dir_cleanup()
+{
+	common_cleanup
+}
+
+atf_test_case empty_fs cleanup
+empty_fs_body()
+{
+	create_test_dirs
+
+	atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+}
+empty_fs_cleanup()
+{
+	common_cleanup
+}
+
+atf_test_case file_sizes cleanup
+file_sizes_body()
+{
+	local i
+
+	create_test_dirs
+	cd $TEST_INPUTS_DIR
+
+	i=1
+	while [ $i -lt $((1 << 20)) ]; do
+		truncate -s $i ${i}.1
+		truncate -s $(($i - 1)) ${i}.2
+		truncate -s $(($i + 1)) ${i}.3
+		i=$(($i << 1))
+	done
+
+	cd -
+
+	# XXXMJ this creates sparse files, make sure makefs doesn't
+	#       preserve the sparseness.
+	# XXXMJ need to test with larger files (at least 128MB for L2 indirs)
+	atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+}
+file_sizes_cleanup()
+{
+	common_cleanup
+}
+
+atf_test_case hard_links cleanup
+hard_links_body()
+{
+	local f
+
+	create_test_dirs
+	cd $TEST_INPUTS_DIR
+
+	mkdir dir
+	echo "hello" > 1
+	ln 1 2
+	ln 1 dir/1
+
+	echo "goodbye" > dir/a
+	ln dir/a dir/b
+	ln dir/a a
+
+	cd -
+
+	atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+
+	stat -f '%i' ${TEST_MOUNT_DIR}/1 > ./ino
+	stat -f '%l' ${TEST_MOUNT_DIR}/1 > ./nlink
+	for f in 1 2 dir/1; do
+		atf_check -o file:./nlink -e empty -s exit:0 \
+		    stat -f '%l' ${TEST_MOUNT_DIR}/${f}
+		atf_check -o file:./ino -e empty -s exit:0 \
+		    stat -f '%i' ${TEST_MOUNT_DIR}/${f}
+		atf_check cmp -s ${TEST_INPUTS_DIR}/1 ${TEST_MOUNT_DIR}/${f}
+	done
+
+	stat -f '%i' ${TEST_MOUNT_DIR}/dir/a > ./ino
+	stat -f '%l' ${TEST_MOUNT_DIR}/dir/a > ./nlink
+	for f in dir/a dir/b a; do
+		atf_check -o file:./nlink -e empty -s exit:0 \
+		    stat -f '%l' ${TEST_MOUNT_DIR}/${f}
+		atf_check -o file:./ino -e empty -s exit:0 \
+		    stat -f '%i' ${TEST_MOUNT_DIR}/${f}
+		atf_check cmp -s ${TEST_INPUTS_DIR}/dir/a ${TEST_MOUNT_DIR}/${f}
+	done
+}
+hard_links_cleanup()
+{
+	common_cleanup
+}
+
+# Allocate enough dnodes from an object set that the meta dnode needs to use
+# indirect blocks.
+atf_test_case indirect_dnode_array cleanup
+indirect_dnode_array_body()
+{
+	local count i
+
+	# How many dnodes do we need to allocate?  Well, the data block size
+	# for meta dnodes is always 16KB, so with a dnode size of 512B we get
+	# 32 dnodes per direct block.  The maximum indirect block size is 128KB
+	# and that can fit 1024 block pointers, so we need at least 32 * 1024
+	# files to force the use of two levels of indirection.
+	#
+	# Unfortunately that number of files makes the test run quite slowly,
+	# so we settle for a single indirect block for now...
+	count=$(jot -r 1 32 1024)
+
+	create_test_dirs
+	cd $TEST_INPUTS_DIR
+	for i in $(seq 1 $count); do
+		touch $i
+	done
+	cd -
+
+	atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+}
+indirect_dnode_array_cleanup()
+{
+	common_cleanup
+}
+
+#
+# Create some files with long names, so as to test fat ZAP handling.
+#
+atf_test_case long_file_name cleanup
+long_file_name_body()
+{
+	local dir i
+
+	create_test_dirs
+	cd $TEST_INPUTS_DIR
+
+	# micro ZAP keys can be at most 50 bytes.
+	for i in $(seq 1 60); do
+		touch $(jot -s '' $i 1 1)
+	done
+	dir=$(jot -s '' 61 1 1)
+	mkdir $dir
+	for i in $(seq 1 60); do
+		touch ${dir}/$(jot -s '' $i 1 1)
+	done
+
+	cd -
+
+	atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+
+	# Add a directory entry in the hope that OpenZFS might catch a bug
+	# in makefs' fat ZAP encoding.
+	touch ${TEST_MOUNT_DIR}/foo
+}
+long_file_name_cleanup()
+{
+	common_cleanup
+}
+
+#
+# Exercise handling of multiple datasets.
+#
+atf_test_case multi_dataset_1 cleanup
+multi_dataset_1_body()
+{
+	create_test_dirs
+	cd $TEST_INPUTS_DIR
+
+	mkdir dir1
+	echo a > dir1/a
+	mkdir dir2
+	echo b > dir2/b
+
+	cd -
+
+	atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    -o fs=${ZFS_POOL_NAME}/dir1 -o fs=${ZFS_POOL_NAME}/dir2 \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+
+	# Make sure that we have three datasets with the expected mount points.
+	atf_check -o inline:${ZFS_POOL_NAME}\\n -e empty -s exit:0 \
+	    zfs list -H -o name ${ZFS_POOL_NAME}
+	atf_check -o inline:${TEST_MOUNT_DIR}\\n -e empty -s exit:0 \
+	    zfs list -H -o mountpoint ${ZFS_POOL_NAME}
+
+	atf_check -o inline:${ZFS_POOL_NAME}/dir1\\n -e empty -s exit:0 \
+	    zfs list -H -o name ${ZFS_POOL_NAME}/dir1
+	atf_check -o inline:${TEST_MOUNT_DIR}/dir1\\n -e empty -s exit:0 \
+	    zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1
+
+	atf_check -o inline:${ZFS_POOL_NAME}/dir2\\n -e empty -s exit:0 \
+	    zfs list -H -o name ${ZFS_POOL_NAME}/dir2
+	atf_check -o inline:${TEST_MOUNT_DIR}/dir2\\n -e empty -s exit:0 \
+	    zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir2
+}
+multi_dataset_1_cleanup()
+{
+	common_cleanup
+}
+
+#
+# Create a pool with two datasets, where the root dataset is mounted below
+# the child dataset.
+#
+atf_test_case multi_dataset_2 cleanup
+multi_dataset_2_body()
+{
+	create_test_dirs
+	cd $TEST_INPUTS_DIR
+
+	mkdir dir1
+	echo a > dir1/a
+	mkdir dir2
+	echo b > dir2/b
+
+	cd -
+
+	atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    -o fs=${ZFS_POOL_NAME}/dir1\;mountpoint=/ \
+	    -o fs=${ZFS_POOL_NAME}\;mountpoint=/dir1 \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+}
+multi_dataset_2_cleanup()
+{
+	common_cleanup
+}
+
+#
+# Create a dataset with a non-existent mount point.
+#
+atf_test_case multi_dataset_3 cleanup
+multi_dataset_3_body()
+{
+	create_test_dirs
+	cd $TEST_INPUTS_DIR
+
+	mkdir dir1
+	echo a > dir1/a
+
+	cd -
+
+	atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    -o fs=${ZFS_POOL_NAME}/dir1 \
+	    -o fs=${ZFS_POOL_NAME}/dir2 \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	atf_check -o inline:${TEST_MOUNT_DIR}/dir2\\n -e empty -s exit:0 \
+	    zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir2
+
+	# Mounting dir2 should have created a directory called dir2.  Go
+	# back and create it in the staging tree before comparing.
+	atf_check mkdir ${TEST_INPUTS_DIR}/dir2
+
+	check_image_contents
+}
+multi_dataset_3_cleanup()
+{
+	common_cleanup
+}
+
+#
+# Create an unmounted dataset.
+#
+atf_test_case multi_dataset_4 cleanup
+multi_dataset_4_body()
+{
+	create_test_dirs
+	cd $TEST_INPUTS_DIR
+
+	mkdir dir1
+	echo a > dir1/a
+
+	cd -
+
+	atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    -o fs=${ZFS_POOL_NAME}/dir1\;canmount=noauto\;mountpoint=none \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	atf_check -o inline:none\\n -e empty -s exit:0 \
+	    zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1
+
+	check_image_contents
+
+	atf_check zfs set mountpoint=/dir1 ${ZFS_POOL_NAME}/dir1
+	atf_check zfs mount ${ZFS_POOL_NAME}/dir1
+	atf_check -o inline:${TEST_MOUNT_DIR}/dir1\\n -e empty -s exit:0 \
+	    zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1
+
+	# dir1/a should be part of the root dataset, not dir1.
+	atf_check -s not-exit:0 -e not-empty stat ${TEST_MOUNT_DIR}dir1/a
+}
+multi_dataset_4_cleanup()
+{
+	common_cleanup
+}
+
+#
+# Rudimentary test to verify that two ZFS images created using the same
+# parameters and input hierarchy are byte-identical.  In particular, makefs(1)
+# does not preserve file access times.
+#
+atf_test_case reproducible cleanup
+reproducible_body()
+{
+	create_test_inputs
+
+	atf_check $MAKEFS -s 512m -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    ${TEST_IMAGE}.1 $TEST_INPUTS_DIR
+
+	atf_check $MAKEFS -s 512m -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    ${TEST_IMAGE}.2 $TEST_INPUTS_DIR
+
+	# XXX-MJ cmp(1) is really slow
+	atf_check cmp ${TEST_IMAGE}.1 ${TEST_IMAGE}.2
+}
+reproducible_cleanup()
+{
+}
+
+#
+# Verify that we can take a snapshot of a generated dataset.
+#
+atf_test_case snapshot cleanup
+snapshot_body()
+{
+	create_test_dirs
+	cd $TEST_INPUTS_DIR
+
+	mkdir dir
+	echo "hello" > dir/hello
+	echo "goodbye" > goodbye
+
+	cd -
+
+	atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	atf_check zfs snapshot ${ZFS_POOL_NAME}@1
+}
+snapshot_cleanup()
+{
+	common_cleanup
+}
+
+#
+# Check handling of symbolic links.
+#
+atf_test_case soft_links cleanup
+soft_links_body()
+{
+	create_test_dirs
+	cd $TEST_INPUTS_DIR
+
+	mkdir dir
+	ln -s a a
+	ln -s dir/../a a
+	ln -s dir/b b
+	echo 'c' > dir
+	ln -s dir/c c
+	# XXX-MJ overflows bonus buffer ln -s $(jot -s '' 320 1 1) 1
+
+	cd -
+
+	atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+}
+soft_links_cleanup()
+{
+	common_cleanup
+}
+
+#
+# Verify that we can set properties on the root dataset.
+#
+atf_test_case root_props cleanup
+root_props_body()
+{
+	create_test_inputs
+
+	atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \
+	    -o fs=${ZFS_POOL_NAME}\;atime=off\;setuid=off \
+	    $TEST_IMAGE $TEST_INPUTS_DIR
+
+	import_image
+
+	check_image_contents
+
+	atf_check -o inline:off\\n -e empty -s exit:0 \
+	    zfs get -H -o value atime $ZFS_POOL_NAME
+	atf_check -o inline:local\\n -e empty -s exit:0 \
+	    zfs get -H -o source atime $ZFS_POOL_NAME
+	atf_check -o inline:off\\n -e empty -s exit:0 \
+	    zfs get -H -o value setuid $ZFS_POOL_NAME
+	atf_check -o inline:local\\n -e empty -s exit:0 \
+	    zfs get -H -o source setuid $ZFS_POOL_NAME
+}
+root_props_cleanup()
+{
+	common_cleanup
+}
+
+atf_init_test_cases()
+{
+	atf_add_test_case autoexpand
+	atf_add_test_case basic
+	atf_add_test_case dataset_removal
+	atf_add_test_case empty_dir
+	atf_add_test_case empty_fs
+	atf_add_test_case file_sizes
+	atf_add_test_case hard_links
+	atf_add_test_case indirect_dnode_array
+	atf_add_test_case long_file_name
+	atf_add_test_case multi_dataset_1
+	atf_add_test_case multi_dataset_2
+	atf_add_test_case multi_dataset_3
+	atf_add_test_case multi_dataset_4
+	atf_add_test_case reproducible
+	atf_add_test_case snapshot
+	atf_add_test_case soft_links
+	atf_add_test_case root_props
+
+	# XXXMJ tests:
+	# - test with different ashifts (at least, 9 and 12), different image sizes
+	# - create datasets in imported pool
+}
diff --git a/usr.sbin/makefs/zfs.c b/usr.sbin/makefs/zfs.c
new file mode 100644
index 000000000000..08689a558870
--- /dev/null
+++ b/usr.sbin/makefs/zfs.c
@@ -0,0 +1,758 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2022 The FreeBSD Foundation
+ *
+ * This software was developed by Mark Johnston under sponsorship from
+ * the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/queue.h>
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <util.h>
+
+#include "makefs.h"
+#include "zfs.h"
+
+#define	VDEV_LABEL_SPACE	\
+	((off_t)(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE))
+_Static_assert(VDEV_LABEL_SPACE <= MINDEVSIZE, "");
+
+#define	MINMSSIZE		((off_t)1 << 24) /* 16MB */
+#define	DFLTMSSIZE		((off_t)1 << 29) /* 512MB */
+#define	MAXMSSIZE		((off_t)1 << 34) /* 16GB */
+
+#define	INDIR_LEVELS		6
+/* Indirect blocks are always 128KB. */
+#define	BLKPTR_PER_INDIR	(MAXBLOCKSIZE / sizeof(blkptr_t))
+
+struct dnode_cursor {
+	char		inddir[INDIR_LEVELS][MAXBLOCKSIZE];
+	off_t		indloc;
+	off_t		indspace;
+	dnode_phys_t	*dnode;
+	off_t		dataoff;
+	off_t		datablksz;
+};
+
+void
+zfs_prep_opts(fsinfo_t *fsopts)
+{
+	zfs_opt_t *zfs = ecalloc(1, sizeof(*zfs));
+
+	const option_t zfs_options[] = {
+		{ '\0', "bootfs", &zfs->bootfs, OPT_STRPTR,
+		  0, 0, "Bootable dataset" },
+		{ '\0', "mssize", &zfs->mssize, OPT_INT64,
+		  MINMSSIZE, MAXMSSIZE, "Metaslab size" },
*** 3725 LINES SKIPPED ***