git: 365b89e8ea4a - main - nvmf: Switch several ioctls to using nvlists

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Mon, 30 Dec 2024 18:52:42 UTC
The branch main has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=365b89e8ea4af34a05f68aa28e77573e89fa00b2

commit 365b89e8ea4af34a05f68aa28e77573e89fa00b2
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2024-12-30 18:52:21 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2024-12-30 18:52:21 +0000

    nvmf: Switch several ioctls to using nvlists
    
    For requests that handoff queues from userspace to the kernel as well
    as the request to fetch reconnect parameters from the kernel, switch
    from using flat structures to nvlists.  In particular, this will
    permit adding support for additional transports in the future without
    breaking the ABI of the structures.
    
    Note that this is an ABI break for the ioctls used by nvmf(4) and
    nvmft(4).  Since this is only present in main I did not bother
    implementing compatability shims.
    
    Inspired by:    imp (suggestion on a different review)
    Reviewed by:    imp
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D48230
---
 lib/libnvmf/Makefile                        |   2 +
 lib/libnvmf/internal.h                      |  10 +-
 lib/libnvmf/libnvmf.h                       |   9 +-
 lib/libnvmf/nvmf_controller.c               |  21 ++-
 lib/libnvmf/nvmf_host.c                     |  87 ++++++++----
 lib/libnvmf/nvmf_tcp.c                      |  23 ++--
 lib/libnvmf/nvmf_transport.c                |  47 +++++--
 rescue/rescue/Makefile                      |   2 +-
 sbin/nvmecontrol/reconnect.c                |  18 ++-
 share/mk/src.libnames.mk                    |   1 +
 sys/cam/ctl/ctl_ioctl.h                     |   2 +-
 sys/dev/nvmf/controller/ctl_frontend_nvmf.c |  58 +++++---
 sys/dev/nvmf/controller/nvmft_controller.c  |  19 ++-
 sys/dev/nvmf/controller/nvmft_qpair.c       |  15 +--
 sys/dev/nvmf/controller/nvmft_var.h         |  11 +-
 sys/dev/nvmf/host/nvmf.c                    | 197 ++++++++++++++++------------
 sys/dev/nvmf/host/nvmf_ctldev.c             |  13 +-
 sys/dev/nvmf/host/nvmf_qpair.c              |  21 +--
 sys/dev/nvmf/host/nvmf_var.h                |  13 +-
 sys/dev/nvmf/nvmf.h                         | 100 ++++++++------
 sys/dev/nvmf/nvmf_tcp.c                     |  40 ++++--
 sys/dev/nvmf/nvmf_transport.c               |  92 ++++++++++++-
 sys/dev/nvmf/nvmf_transport.h               |  24 +++-
 sys/dev/nvmf/nvmf_transport_internal.h      |   3 +-
 usr.sbin/nvmfd/ctl.c                        |   4 +-
 25 files changed, 557 insertions(+), 275 deletions(-)

diff --git a/lib/libnvmf/Makefile b/lib/libnvmf/Makefile
index dbba6b476510..b01f5ab82cac 100644
--- a/lib/libnvmf/Makefile
+++ b/lib/libnvmf/Makefile
@@ -14,6 +14,8 @@ SRCS=		gsb_crc32.c \
 		nvmf_transport.c \
 		nvmft_subr.c
 
+LIBADD=		nv
+
 CFLAGS+=	-I${SRCTOP}/sys/dev/nvmf/controller
 CFLAGS+=	-I${SRCTOP}/sys/dev/nvmf
 
diff --git a/lib/libnvmf/internal.h b/lib/libnvmf/internal.h
index cf45c15ba2f0..7b3d4fbb03ef 100644
--- a/lib/libnvmf/internal.h
+++ b/lib/libnvmf/internal.h
@@ -8,6 +8,7 @@
 #ifndef __LIBNVMF_INTERNAL_H__
 #define __LIBNVMF_INTERNAL_H__
 
+#include <sys/nv.h>
 #include <sys/queue.h>
 
 struct nvmf_transport_ops {
@@ -23,9 +24,8 @@ struct nvmf_transport_ops {
 	    const struct nvmf_qpair_params *params);
 	void (*free_qpair)(struct nvmf_qpair *qp);
 
-	/* Create params for kernel handoff. */
-	int (*kernel_handoff_params)(struct nvmf_qpair *qp,
-	    struct nvmf_handoff_qpair_params *qparams);
+	/* Add params for kernel handoff. */
+	void (*kernel_handoff_params)(struct nvmf_qpair *qp, nvlist_t *nvl);
 
 	/* Capsule operations. */
 	struct nvmf_capsule *(*allocate_capsule)(struct nvmf_qpair *qp);
@@ -110,7 +110,7 @@ extern struct nvmf_transport_ops tcp_ops;
 void	na_clear_error(struct nvmf_association *na);
 void	na_error(struct nvmf_association *na, const char *fmt, ...);
 
-int	nvmf_kernel_handoff_params(struct nvmf_qpair *qp,
-    struct nvmf_handoff_qpair_params *qparams);
+int	nvmf_kernel_handoff_params(struct nvmf_qpair *qp, nvlist_t **nvlp);
+int	nvmf_pack_ioc_nvlist(struct nvmf_ioc_nv *nv, nvlist_t *nvl);
 
 #endif /* !__LIBNVMF_INTERNAL_H__ */
diff --git a/lib/libnvmf/libnvmf.h b/lib/libnvmf/libnvmf.h
index f15277a02621..44f13fda5ddd 100644
--- a/lib/libnvmf/libnvmf.h
+++ b/lib/libnvmf/libnvmf.h
@@ -8,6 +8,7 @@
 #ifndef __LIBNVMF_H__
 #define	__LIBNVMF_H__
 
+#include <sys/_nv.h>
 #include <sys/uio.h>
 #include <stdbool.h>
 #include <stddef.h>
@@ -249,7 +250,8 @@ uint64_t nvmf_get_log_page_offset(const struct nvme_command *cmd);
 
 /* Prepare to handoff a controller qpair. */
 int	nvmf_handoff_controller_qpair(struct nvmf_qpair *qp,
-    struct nvmf_handoff_controller_qpair *h);
+    const struct nvmf_fabric_connect_cmd *cmd,
+    const struct nvmf_fabric_connect_data *data, struct nvmf_ioc_nv *nv);
 
 /* Host-specific APIs. */
 
@@ -348,9 +350,10 @@ int	nvmf_disconnect_all(void);
 
 /*
  * Fetch reconnect parameters from an existing kernel host to use for
- * establishing a new association.
+ * establishing a new association.  The caller must destroy the
+ * returned nvlist.
  */
-int	nvmf_reconnect_params(int fd, struct nvmf_reconnect_params *rparams);
+int	nvmf_reconnect_params(int fd, nvlist_t **nvlp);
 
 /*
  * Handoff active host association to an existing host in the kernel.
diff --git a/lib/libnvmf/nvmf_controller.c b/lib/libnvmf/nvmf_controller.c
index 0e0126040ee4..971dccbe039e 100644
--- a/lib/libnvmf/nvmf_controller.c
+++ b/lib/libnvmf/nvmf_controller.c
@@ -457,8 +457,23 @@ nvmf_get_log_page_offset(const struct nvme_command *cmd)
 
 int
 nvmf_handoff_controller_qpair(struct nvmf_qpair *qp,
-    struct nvmf_handoff_controller_qpair *h)
+    const struct nvmf_fabric_connect_cmd *cmd,
+    const struct nvmf_fabric_connect_data *data, struct nvmf_ioc_nv *nv)
 {
-	h->trtype = qp->nq_association->na_trtype;
-	return (nvmf_kernel_handoff_params(qp, &h->params));
+	nvlist_t *nvl, *nvl_qp;
+	int error;
+
+	error = nvmf_kernel_handoff_params(qp, &nvl_qp);
+	if (error)
+		return (error);
+
+	nvl = nvlist_create(0);
+	nvlist_add_number(nvl, "trtype", qp->nq_association->na_trtype);
+	nvlist_move_nvlist(nvl, "params", nvl_qp);
+	nvlist_add_binary(nvl, "cmd", cmd, sizeof(*cmd));
+	nvlist_add_binary(nvl, "data", data, sizeof(*data));
+
+	error = nvmf_pack_ioc_nvlist(nv, nvl);
+	nvlist_destroy(nvl);
+	return (error);
 }
diff --git a/lib/libnvmf/nvmf_host.c b/lib/libnvmf/nvmf_host.c
index a0d95470d8ee..c3668600c463 100644
--- a/lib/libnvmf/nvmf_host.c
+++ b/lib/libnvmf/nvmf_host.c
@@ -767,15 +767,16 @@ is_queue_pair_idle(struct nvmf_qpair *qp)
 }
 
 static int
-prepare_queues_for_handoff(struct nvmf_handoff_host *hh,
-    struct nvmf_qpair *admin_qp, u_int num_queues,
-    struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
+prepare_queues_for_handoff(struct nvmf_ioc_nv *nv, struct nvmf_qpair *admin_qp,
+    u_int num_queues, struct nvmf_qpair **io_queues,
+    const struct nvme_controller_data *cdata)
 {
-	struct nvmf_handoff_qpair_params *io;
+	nvlist_t *nvl, *nvl_qp;
 	u_int i;
 	int error;
 
-	memset(hh, 0, sizeof(*hh));
+	if (num_queues == 0)
+		return (EINVAL);
 
 	/* All queue pairs must be idle. */
 	if (!is_queue_pair_idle(admin_qp))
@@ -785,34 +786,40 @@ prepare_queues_for_handoff(struct nvmf_handoff_host *hh,
 			return (EBUSY);
 	}
 
+	nvl = nvlist_create(0);
+	nvlist_add_number(nvl, "trtype", admin_qp->nq_association->na_trtype);
+	nvlist_add_number(nvl, "kato", admin_qp->nq_kato);
+
 	/* First, the admin queue. */
-	hh->trtype = admin_qp->nq_association->na_trtype;
-	hh->kato = admin_qp->nq_kato;
-	error = nvmf_kernel_handoff_params(admin_qp, &hh->admin);
-	if (error)
+	error = nvmf_kernel_handoff_params(admin_qp, &nvl_qp);
+	if (error) {
+		nvlist_destroy(nvl);
 		return (error);
+	}
+	nvlist_move_nvlist(nvl, "admin", nvl_qp);
 
 	/* Next, the I/O queues. */
-	hh->num_io_queues = num_queues;
-	io = calloc(num_queues, sizeof(*io));
 	for (i = 0; i < num_queues; i++) {
-		error = nvmf_kernel_handoff_params(io_queues[i], &io[i]);
+		error = nvmf_kernel_handoff_params(io_queues[i], &nvl_qp);
 		if (error) {
-			free(io);
+			nvlist_destroy(nvl);
 			return (error);
 		}
+		nvlist_append_nvlist_array(nvl, "io", nvl_qp);
 	}
 
-	hh->io = io;
-	hh->cdata = cdata;
-	return (0);
+	nvlist_add_binary(nvl, "cdata", cdata, sizeof(*cdata));
+
+	error = nvmf_pack_ioc_nvlist(nv, nvl);
+	nvlist_destroy(nvl);
+	return (error);
 }
 
 int
 nvmf_handoff_host(struct nvmf_qpair *admin_qp, u_int num_queues,
     struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
 {
-	struct nvmf_handoff_host hh;
+	struct nvmf_ioc_nv nv;
 	u_int i;
 	int error, fd;
 
@@ -822,14 +829,14 @@ nvmf_handoff_host(struct nvmf_qpair *admin_qp, u_int num_queues,
 		goto out;
 	}
 
-	error = prepare_queues_for_handoff(&hh, admin_qp, num_queues, io_queues,
+	error = prepare_queues_for_handoff(&nv, admin_qp, num_queues, io_queues,
 	    cdata);
 	if (error != 0)
 		goto out;
 
-	if (ioctl(fd, NVMF_HANDOFF_HOST, &hh) == -1)
+	if (ioctl(fd, NVMF_HANDOFF_HOST, &nv) == -1)
 		error = errno;
-	free(hh.io);
+	free(nv.data);
 
 out:
 	if (fd >= 0)
@@ -882,30 +889,56 @@ out:
 	return (error);
 }
 
-int
-nvmf_reconnect_params(int fd, struct nvmf_reconnect_params *rparams)
+static int
+nvmf_read_ioc_nv(int fd, u_long com, nvlist_t **nvlp)
 {
-	if (ioctl(fd, NVMF_RECONNECT_PARAMS, rparams) == -1)
+	struct nvmf_ioc_nv nv;
+	nvlist_t *nvl;
+	int error;
+
+	memset(&nv, 0, sizeof(nv));
+	if (ioctl(fd, com, &nv) == -1)
 		return (errno);
+
+	nv.data = malloc(nv.len);
+	nv.size = nv.len;
+	if (ioctl(fd, com, &nv) == -1) {
+		error = errno;
+		free(nv.data);
+		return (error);
+	}
+
+	nvl = nvlist_unpack(nv.data, nv.len, 0);
+	free(nv.data);
+	if (nvl == NULL)
+		return (EINVAL);
+
+	*nvlp = nvl;
 	return (0);
 }
 
+int
+nvmf_reconnect_params(int fd, nvlist_t **nvlp)
+{
+	return (nvmf_read_ioc_nv(fd, NVMF_RECONNECT_PARAMS, nvlp));
+}
+
 int
 nvmf_reconnect_host(int fd, struct nvmf_qpair *admin_qp, u_int num_queues,
     struct nvmf_qpair **io_queues, const struct nvme_controller_data *cdata)
 {
-	struct nvmf_handoff_host hh;
+	struct nvmf_ioc_nv nv;
 	u_int i;
 	int error;
 
-	error = prepare_queues_for_handoff(&hh, admin_qp, num_queues, io_queues,
+	error = prepare_queues_for_handoff(&nv, admin_qp, num_queues, io_queues,
 	    cdata);
 	if (error != 0)
 		goto out;
 
-	if (ioctl(fd, NVMF_RECONNECT_HOST, &hh) == -1)
+	if (ioctl(fd, NVMF_RECONNECT_HOST, &nv) == -1)
 		error = errno;
-	free(hh.io);
+	free(nv.data);
 
 out:
 	for (i = 0; i < num_queues; i++)
diff --git a/lib/libnvmf/nvmf_tcp.c b/lib/libnvmf/nvmf_tcp.c
index 264a5bb154a0..3f794b5d9750 100644
--- a/lib/libnvmf/nvmf_tcp.c
+++ b/lib/libnvmf/nvmf_tcp.c
@@ -1129,22 +1129,19 @@ tcp_free_qpair(struct nvmf_qpair *nq)
 	free(qp);
 }
 
-static int
-tcp_kernel_handoff_params(struct nvmf_qpair *nq,
-    struct nvmf_handoff_qpair_params *qparams)
+static void
+tcp_kernel_handoff_params(struct nvmf_qpair *nq, nvlist_t *nvl)
 {
 	struct nvmf_tcp_qpair *qp = TQP(nq);
 
-	qparams->tcp.fd = qp->s;
-	qparams->tcp.rxpda = qp->rxpda;
-	qparams->tcp.txpda = qp->txpda;
-	qparams->tcp.header_digests = qp->header_digests;
-	qparams->tcp.data_digests = qp->data_digests;
-	qparams->tcp.maxr2t = qp->maxr2t;
-	qparams->tcp.maxh2cdata = qp->maxh2cdata;
-	qparams->tcp.max_icd = qp->max_icd;
-
-	return (0);
+	nvlist_add_number(nvl, "fd", qp->s);
+	nvlist_add_number(nvl, "rxpda", qp->rxpda);
+	nvlist_add_number(nvl, "txpda", qp->txpda);
+	nvlist_add_bool(nvl, "header_digests", qp->header_digests);
+	nvlist_add_bool(nvl, "data_digests", qp->data_digests);
+	nvlist_add_number(nvl, "maxr2t", qp->maxr2t);
+	nvlist_add_number(nvl, "maxh2cdata", qp->maxh2cdata);
+	nvlist_add_number(nvl, "max_icd", qp->max_icd);
 }
 
 static struct nvmf_capsule *
diff --git a/lib/libnvmf/nvmf_transport.c b/lib/libnvmf/nvmf_transport.c
index 1a8505f2a993..fa3826b8c50d 100644
--- a/lib/libnvmf/nvmf_transport.c
+++ b/lib/libnvmf/nvmf_transport.c
@@ -236,16 +236,27 @@ nvmf_send_controller_data(const struct nvmf_capsule *nc, const void *buf,
 }
 
 int
-nvmf_kernel_handoff_params(struct nvmf_qpair *qp,
-    struct nvmf_handoff_qpair_params *qparams)
+nvmf_kernel_handoff_params(struct nvmf_qpair *qp, nvlist_t **nvlp)
 {
-	memset(qparams, 0, sizeof(*qparams));
-	qparams->admin = qp->nq_admin;
-	qparams->sq_flow_control = qp->nq_flow_control;
-	qparams->qsize = qp->nq_qsize;
-	qparams->sqhd = qp->nq_sqhd;
-	qparams->sqtail = qp->nq_sqtail;
-	return (qp->nq_association->na_ops->kernel_handoff_params(qp, qparams));
+	nvlist_t *nvl;
+	int error;
+
+	nvl = nvlist_create(0);
+	nvlist_add_bool(nvl, "admin", qp->nq_admin);
+	nvlist_add_bool(nvl, "sq_flow_control", qp->nq_flow_control);
+	nvlist_add_number(nvl, "qsize", qp->nq_qsize);
+	nvlist_add_number(nvl, "sqhd", qp->nq_sqhd);
+	if (!qp->nq_association->na_controller)
+		nvlist_add_number(nvl, "sqtail", qp->nq_sqtail);
+	qp->nq_association->na_ops->kernel_handoff_params(qp, nvl);
+	error = nvlist_error(nvl);
+	if (error != 0) {
+		nvlist_destroy(nvl);
+		return (error);
+	}
+
+	*nvlp = nvl;
+	return (0);
 }
 
 const char *
@@ -267,3 +278,21 @@ nvmf_transport_type(uint8_t trtype)
 		return (buf);
 	}
 }
+
+int
+nvmf_pack_ioc_nvlist(struct nvmf_ioc_nv *nv, nvlist_t *nvl)
+{
+	int error;
+
+	memset(nv, 0, sizeof(*nv));
+
+	error = nvlist_error(nvl);
+	if (error)
+		return (error);
+
+	nv->data = nvlist_pack(nvl, &nv->size);
+	if (nv->data == NULL)
+		return (ENOMEM);
+
+	return (0);
+}
diff --git a/rescue/rescue/Makefile b/rescue/rescue/Makefile
index 4474a0af050f..797daf3d2f14 100644
--- a/rescue/rescue/Makefile
+++ b/rescue/rescue/Makefile
@@ -143,7 +143,7 @@ CRUNCH_PROGS_usr.sbin+= zdb
 # CRUNCH_PROGS+= devd
 
 CRUNCH_LIBS+= -l80211 -lalias -lcam -lncursesw -ldevstat -lipsec -llzma
-CRUNCH_LIBS_camcontrol+= ${LIBNVMF}
+CRUNCH_LIBS_camcontrol+= ${LIBNVMF} ${LIBNV}
 .if ${MK_ZFS} != "no"
 CRUNCH_LIBS+= -lavl -lpthread -luutil -lumem -ltpool -lspl -lrt
 CRUNCH_LIBS_zfs+=	${LIBBE} \
diff --git a/sbin/nvmecontrol/reconnect.c b/sbin/nvmecontrol/reconnect.c
index b606409eea90..4c9277bd34cb 100644
--- a/sbin/nvmecontrol/reconnect.c
+++ b/sbin/nvmecontrol/reconnect.c
@@ -5,6 +5,7 @@
  * Written by: John Baldwin <jhb@FreeBSD.org>
  */
 
+#include <sys/nv.h>
 #include <sys/socket.h>
 #include <err.h>
 #include <libnvmf.h>
@@ -60,7 +61,7 @@ reconnect_nvm_controller(int fd, enum nvmf_trtype trtype, int adrfam,
 {
 	struct nvme_controller_data cdata;
 	struct nvmf_association_params aparams;
-	struct nvmf_reconnect_params rparams;
+	nvlist_t *rparams;
 	struct nvmf_qpair *admin, **io;
 	int error;
 
@@ -70,6 +71,13 @@ reconnect_nvm_controller(int fd, enum nvmf_trtype trtype, int adrfam,
 		return (EX_IOERR);
 	}
 
+	if (!nvlist_exists_number(rparams, "cntlid") ||
+	    !nvlist_exists_string(rparams, "subnqn")) {
+		nvlist_destroy(rparams);
+		warnx("Missing required reconnect parameters");
+		return (EX_IOERR);
+	}
+
 	memset(&aparams, 0, sizeof(aparams));
 	aparams.sq_flow_control = opt.flow_control;
 	switch (trtype) {
@@ -77,18 +85,22 @@ reconnect_nvm_controller(int fd, enum nvmf_trtype trtype, int adrfam,
 		tcp_association_params(&aparams);
 		break;
 	default:
+		nvlist_destroy(rparams);
 		warnx("Unsupported transport %s", nvmf_transport_type(trtype));
 		return (EX_UNAVAILABLE);
 	}
 
 	io = calloc(opt.num_io_queues, sizeof(*io));
 	error = connect_nvm_queues(&aparams, trtype, adrfam, address, port,
-	    rparams.cntlid, rparams.subnqn, opt.hostnqn, opt.kato, &admin, io,
-	    opt.num_io_queues, opt.queue_size, &cdata);
+	    nvlist_get_number(rparams, "cntlid"),
+	    nvlist_get_string(rparams, "subnqn"), opt.hostnqn, opt.kato,
+	    &admin, io, opt.num_io_queues, opt.queue_size, &cdata);
 	if (error != 0) {
 		free(io);
+		nvlist_destroy(rparams);
 		return (error);
 	}
+	nvlist_destroy(rparams);
 
 	error = nvmf_reconnect_host(fd, admin, opt.num_io_queues, io, &cdata);
 	if (error != 0) {
diff --git a/share/mk/src.libnames.mk b/share/mk/src.libnames.mk
index 588291d8ec9c..786ad9a6f9a5 100644
--- a/share/mk/src.libnames.mk
+++ b/share/mk/src.libnames.mk
@@ -346,6 +346,7 @@ _DP_mp=	crypto
 _DP_memstat=	kvm
 _DP_magic=	z
 _DP_mt=		sbuf bsdxml
+_DP_nvmf=	nv
 _DP_ldns=	ssl crypto
 _DP_lua=	m
 _DP_lutok=	lua
diff --git a/sys/cam/ctl/ctl_ioctl.h b/sys/cam/ctl/ctl_ioctl.h
index 326e4c931f93..c7070b63eb09 100644
--- a/sys/cam/ctl/ctl_ioctl.h
+++ b/sys/cam/ctl/ctl_ioctl.h
@@ -800,7 +800,7 @@ struct ctl_nvmf_terminate_params {
 };
 
 union ctl_nvmf_data {
-	struct nvmf_handoff_controller_qpair	handoff;
+	struct nvmf_ioc_nv			handoff;
 	struct ctl_nvmf_list_params		list;
 	struct ctl_nvmf_terminate_params	terminate;
 };
diff --git a/sys/dev/nvmf/controller/ctl_frontend_nvmf.c b/sys/dev/nvmf/controller/ctl_frontend_nvmf.c
index 8a4538e5056a..75b36b4834f5 100644
--- a/sys/dev/nvmf/controller/ctl_frontend_nvmf.c
+++ b/sys/dev/nvmf/controller/ctl_frontend_nvmf.c
@@ -923,29 +923,55 @@ nvmft_port_remove(struct ctl_req *req)
 static void
 nvmft_handoff(struct ctl_nvmf *cn)
 {
-	struct nvmf_fabric_connect_cmd cmd;
-	struct nvmf_handoff_controller_qpair *handoff;
-	struct nvmf_fabric_connect_data *data;
+	const struct nvmf_fabric_connect_cmd *cmd;
+	const struct nvmf_fabric_connect_data *data;
+	const nvlist_t *params;
 	struct nvmft_port *np;
+	nvlist_t *nvl;
+	size_t len;
+	enum nvmf_trtype trtype;
 	int error;
 
 	np = NULL;
-	data = NULL;
-	handoff = &cn->data.handoff;
-	error = copyin(handoff->cmd, &cmd, sizeof(cmd));
+	error = nvmf_unpack_ioc_nvlist(&cn->data.handoff, &nvl);
 	if (error != 0) {
 		cn->status = CTL_NVMF_ERROR;
 		snprintf(cn->error_str, sizeof(cn->error_str),
-		    "Failed to copyin CONNECT SQE");
+		    "Failed to copyin and unpack handoff arguments");
 		return;
 	}
 
-	data = malloc(sizeof(*data), M_NVMFT, M_WAITOK);
-	error = copyin(handoff->data, data, sizeof(*data));
-	if (error != 0) {
+	if (!nvlist_exists_number(nvl, "trtype") ||
+	    !nvlist_exists_nvlist(nvl, "params") ||
+	    !nvlist_exists_binary(nvl, "cmd") ||
+	    !nvlist_exists_binary(nvl, "data")) {
+		cn->status = CTL_NVMF_ERROR;
+		snprintf(cn->error_str, sizeof(cn->error_str),
+		    "Handoff arguments missing required value");
+		goto out;
+	}
+
+	params = nvlist_get_nvlist(nvl, "params");
+	if (!nvmf_validate_qpair_nvlist(params, true)) {
+		cn->status = CTL_NVMF_ERROR;
+		snprintf(cn->error_str, sizeof(cn->error_str),
+		    "Invalid queue pair parameters");
+		goto out;
+	}
+
+	cmd = nvlist_get_binary(nvl, "cmd", &len);
+	if (len != sizeof(*cmd)) {
+		cn->status = CTL_NVMF_ERROR;
+		snprintf(cn->error_str, sizeof(cn->error_str),
+		    "Wrong size for CONNECT SQE");
+		goto out;
+	}
+
+	data = nvlist_get_binary(nvl, "data", &len);
+	if (len != sizeof(*data)) {
 		cn->status = CTL_NVMF_ERROR;
 		snprintf(cn->error_str, sizeof(cn->error_str),
-		    "Failed to copyin CONNECT data");
+		    "Wrong size for CONNECT data");
 		goto out;
 	}
 
@@ -976,8 +1002,10 @@ nvmft_handoff(struct ctl_nvmf *cn)
 	nvmft_port_ref(np);
 	sx_sunlock(&nvmft_ports_lock);
 
-	if (handoff->params.admin) {
-		error = nvmft_handoff_admin_queue(np, handoff, &cmd, data);
+	trtype = nvlist_get_number(nvl, "trtype");
+	if (nvlist_get_bool(params, "admin")) {
+		error = nvmft_handoff_admin_queue(np, trtype, params, cmd,
+		    data);
 		if (error != 0) {
 			cn->status = CTL_NVMF_ERROR;
 			snprintf(cn->error_str, sizeof(cn->error_str),
@@ -985,7 +1013,7 @@ nvmft_handoff(struct ctl_nvmf *cn)
 			goto out;
 		}
 	} else {
-		error = nvmft_handoff_io_queue(np, handoff, &cmd, data);
+		error = nvmft_handoff_io_queue(np, trtype, params, cmd, data);
 		if (error != 0) {
 			cn->status = CTL_NVMF_ERROR;
 			snprintf(cn->error_str, sizeof(cn->error_str),
@@ -998,7 +1026,7 @@ nvmft_handoff(struct ctl_nvmf *cn)
 out:
 	if (np != NULL)
 		nvmft_port_rele(np);
-	free(data, M_NVMFT);
+	nvlist_destroy(nvl);
 }
 
 static void
diff --git a/sys/dev/nvmf/controller/nvmft_controller.c b/sys/dev/nvmf/controller/nvmft_controller.c
index 3c10fea75c9d..83a156d9b92a 100644
--- a/sys/dev/nvmf/controller/nvmft_controller.c
+++ b/sys/dev/nvmf/controller/nvmft_controller.c
@@ -107,9 +107,8 @@ nvmft_keep_alive_timer(void *arg)
 }
 
 int
-nvmft_handoff_admin_queue(struct nvmft_port *np,
-    const struct nvmf_handoff_controller_qpair *handoff,
-    const struct nvmf_fabric_connect_cmd *cmd,
+nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
+    const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
     const struct nvmf_fabric_connect_data *data)
 {
 	struct nvmft_controller *ctrlr;
@@ -120,8 +119,7 @@ nvmft_handoff_admin_queue(struct nvmft_port *np,
 	if (cmd->qid != htole16(0))
 		return (EINVAL);
 
-	qp = nvmft_qpair_init(handoff->trtype, &handoff->params, 0,
-	    "admin queue");
+	qp = nvmft_qpair_init(trtype, params, 0, "admin queue");
 	if (qp == NULL) {
 		printf("NVMFT: Failed to setup admin queue from %.*s\n",
 		    (int)sizeof(data->hostnqn), data->hostnqn);
@@ -151,7 +149,7 @@ nvmft_handoff_admin_queue(struct nvmft_port *np,
 	nvmft_printf(ctrlr, "associated with %.*s\n",
 	    (int)sizeof(data->hostnqn), data->hostnqn);
 	ctrlr->admin = qp;
-	ctrlr->trtype = handoff->trtype;
+	ctrlr->trtype = trtype;
 
 	/*
 	 * The spec requires a non-zero KeepAlive timer, but allow a
@@ -175,9 +173,8 @@ nvmft_handoff_admin_queue(struct nvmft_port *np,
 }
 
 int
-nvmft_handoff_io_queue(struct nvmft_port *np,
-    const struct nvmf_handoff_controller_qpair *handoff,
-    const struct nvmf_fabric_connect_cmd *cmd,
+nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
+    const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
     const struct nvmf_fabric_connect_data *data)
 {
 	struct nvmft_controller *ctrlr;
@@ -191,7 +188,7 @@ nvmft_handoff_io_queue(struct nvmft_port *np,
 	cntlid = le16toh(data->cntlid);
 
 	snprintf(name, sizeof(name), "I/O queue %u", qid);
-	qp = nvmft_qpair_init(handoff->trtype, &handoff->params, qid, name);
+	qp = nvmft_qpair_init(trtype, params, qid, name);
 	if (qp == NULL) {
 		printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid,
 		    (int)sizeof(data->hostnqn), data->hostnqn);
@@ -235,7 +232,7 @@ nvmft_handoff_io_queue(struct nvmft_port *np,
 		return (EINVAL);
 	}
 
-	/* XXX: Require handoff->trtype == ctrlr->trtype? */
+	/* XXX: Require trtype == ctrlr->trtype? */
 
 	mtx_lock(&ctrlr->lock);
 	if (ctrlr->shutdown) {
diff --git a/sys/dev/nvmf/controller/nvmft_qpair.c b/sys/dev/nvmf/controller/nvmft_qpair.c
index e66d98f38225..73c7bb280780 100644
--- a/sys/dev/nvmf/controller/nvmft_qpair.c
+++ b/sys/dev/nvmf/controller/nvmft_qpair.c
@@ -31,7 +31,6 @@ struct nvmft_qpair {
 	uint16_t qid;
 	u_int	qsize;
 	uint16_t sqhd;
-	uint16_t sqtail;
 	volatile u_int qp_refs;		/* Internal references on 'qp'. */
 
 	struct task datamove_task;
@@ -102,26 +101,24 @@ nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc)
 }
 
 struct nvmft_qpair *
-nvmft_qpair_init(enum nvmf_trtype trtype,
-    const struct nvmf_handoff_qpair_params *handoff, uint16_t qid,
+nvmft_qpair_init(enum nvmf_trtype trtype, const nvlist_t *params, uint16_t qid,
     const char *name)
 {
 	struct nvmft_qpair *qp;
 
 	qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO);
-	qp->admin = handoff->admin;
-	qp->sq_flow_control = handoff->sq_flow_control;
-	qp->qsize = handoff->qsize;
+	qp->admin = nvlist_get_bool(params, "admin");
+	qp->sq_flow_control = nvlist_get_bool(params, "sq_flow_control");
+	qp->qsize = nvlist_get_number(params, "qsize");
 	qp->qid = qid;
-	qp->sqhd = handoff->sqhd;
-	qp->sqtail = handoff->sqtail;
+	qp->sqhd = nvlist_get_number(params, "sqhd");
 	strlcpy(qp->name, name, sizeof(qp->name));
 	mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF);
 	qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO);
 	STAILQ_INIT(&qp->datamove_queue);
 	TASK_INIT(&qp->datamove_task, 0, nvmft_datamove_task, qp);
 
-	qp->qp = nvmf_allocate_qpair(trtype, true, handoff, nvmft_qpair_error,
+	qp->qp = nvmf_allocate_qpair(trtype, true, params, nvmft_qpair_error,
 	    qp, nvmft_receive_capsule, qp);
 	if (qp->qp == NULL) {
 		mtx_destroy(&qp->lock);
diff --git a/sys/dev/nvmf/controller/nvmft_var.h b/sys/dev/nvmf/controller/nvmft_var.h
index 4fda297c8a85..7a1748d5999e 100644
--- a/sys/dev/nvmf/controller/nvmft_var.h
+++ b/sys/dev/nvmf/controller/nvmft_var.h
@@ -9,6 +9,7 @@
 #define	__NVMFT_VAR_H__
 
 #include <sys/_callout.h>
+#include <sys/_nv.h>
 #include <sys/refcount.h>
 #include <sys/taskqueue.h>
 
@@ -125,20 +126,18 @@ void	nvmft_handle_admin_command(struct nvmft_controller *ctrlr,
 void	nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid,
     struct nvmf_capsule *nc);
 int	nvmft_handoff_admin_queue(struct nvmft_port *np,
-    const struct nvmf_handoff_controller_qpair *handoff,
+    enum nvmf_trtype trtype, const nvlist_t *params,
     const struct nvmf_fabric_connect_cmd *cmd,
     const struct nvmf_fabric_connect_data *data);
-int	nvmft_handoff_io_queue(struct nvmft_port *np,
-    const struct nvmf_handoff_controller_qpair *handoff,
-    const struct nvmf_fabric_connect_cmd *cmd,
+int	nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
+    const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
     const struct nvmf_fabric_connect_data *data);
 int	nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...)
     __printflike(2, 3);
 
 /* nvmft_qpair.c */
 struct nvmft_qpair *nvmft_qpair_init(enum nvmf_trtype trtype,
-    const struct nvmf_handoff_qpair_params *handoff, uint16_t qid,
-    const char *name);
+    const nvlist_t *params, uint16_t qid, const char *name);
 void	nvmft_qpair_shutdown(struct nvmft_qpair *qp);
 void	nvmft_qpair_destroy(struct nvmft_qpair *qp);
 struct nvmft_controller *nvmft_qpair_ctrlr(struct nvmft_qpair *qp);
diff --git a/sys/dev/nvmf/host/nvmf.c b/sys/dev/nvmf/host/nvmf.c
index c726e36e1fae..09d5cecdfad6 100644
--- a/sys/dev/nvmf/host/nvmf.c
+++ b/sys/dev/nvmf/host/nvmf.c
@@ -8,6 +8,7 @@
 #include <sys/param.h>
 #include <sys/bus.h>
 #include <sys/conf.h>
+#include <sys/dnv.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
@@ -15,6 +16,7 @@
 #include <sys/memdesc.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
+#include <sys/nv.h>
 #include <sys/reboot.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
@@ -196,90 +198,100 @@ nvmf_send_keep_alive(void *arg)
 }
 
 int
-nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh)
+nvmf_copyin_handoff(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp)
 {
-	size_t len;
-	u_int i;
+	const nvlist_t *const *io;
+	const nvlist_t *admin;
+	nvlist_t *nvl;
+	size_t i, num_io_queues;
+	uint32_t qsize;
 	int error;
 
-	memset(ivars, 0, sizeof(*ivars));
-
-	if (!hh->admin.admin || hh->num_io_queues < 1)
-		return (EINVAL);
-
-	ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK);
-	error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata));
-	if (error != 0)
-		goto out;
-	nvme_controller_data_swapbytes(ivars->cdata);
-
-	len = hh->num_io_queues * sizeof(*ivars->io_params);
-	ivars->io_params = malloc(len, M_NVMF, M_WAITOK);
-	error = copyin(hh->io, ivars->io_params, len);
+	error = nvmf_unpack_ioc_nvlist(nv, &nvl);
 	if (error != 0)
-		goto out;
-	for (i = 0; i < hh->num_io_queues; i++) {
-		if (ivars->io_params[i].admin) {
-			error = EINVAL;
-			goto out;
-		}
+		return (error);
 
-		/* Require all I/O queues to be the same size. */
-		if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) {
-			error = EINVAL;
-			goto out;
-		}
+	if (!nvlist_exists_number(nvl, "trtype") ||
+	    !nvlist_exists_nvlist(nvl, "admin") ||
+	    !nvlist_exists_nvlist_array(nvl, "io") ||
+	    !nvlist_exists_binary(nvl, "cdata"))
+		goto invalid;
+
+	admin = nvlist_get_nvlist(nvl, "admin");
+	if (!nvmf_validate_qpair_nvlist(admin, false))
+		goto invalid;
+	if (!nvlist_get_bool(admin, "admin"))
+		goto invalid;
+
+	io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
+	if (num_io_queues < 1)
+		goto invalid;
+	for (i = 0; i < num_io_queues; i++) {
+		if (!nvmf_validate_qpair_nvlist(io[i], false))
+			goto invalid;
 	}
 
-	ivars->hh = hh;
-	return (0);
+	/* Require all I/O queues to be the same size. */
+	qsize = nvlist_get_number(io[0], "qsize");
+	for (i = 1; i < num_io_queues; i++) {
+		if (nvlist_get_number(io[i], "qsize") != qsize)
+			goto invalid;
+	}
 
-out:
-	free(ivars->io_params, M_NVMF);
-	free(ivars->cdata, M_NVMF);
-	return (error);
-}
+	nvlist_get_binary(nvl, "cdata", &i);
+	if (i != sizeof(struct nvme_controller_data))
+		goto invalid;
 
-void
-nvmf_free_ivars(struct nvmf_ivars *ivars)
-{
-	free(ivars->io_params, M_NVMF);
-	free(ivars->cdata, M_NVMF);
+	*nvlp = nvl;
+	return (0);
+invalid:
+	nvlist_destroy(nvl);
+	return (EINVAL);
 }
 
 static int
 nvmf_probe(device_t dev)
 {
-	struct nvmf_ivars *ivars = device_get_ivars(dev);
+	const nvlist_t *nvl = device_get_ivars(dev);
+	const struct nvme_controller_data *cdata;
 
-	if (ivars == NULL)
+	if (nvl == NULL)
 		return (ENXIO);
 
-	device_set_descf(dev, "Fabrics: %.256s", ivars->cdata->subnqn);
+	cdata = nvlist_get_binary(nvl, "cdata", NULL);
+	device_set_descf(dev, "Fabrics: %.256s", cdata->subnqn);
 	return (BUS_PROBE_DEFAULT);
 }
 
 static int
-nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars)
+nvmf_establish_connection(struct nvmf_softc *sc, const nvlist_t *nvl)
 {
+	const nvlist_t *const *io;
+	const nvlist_t *admin;
+	uint64_t kato;
+	size_t num_io_queues;
+	enum nvmf_trtype trtype;
 	char name[16];
 
+	trtype = nvlist_get_number(nvl, "trtype");
+	admin = nvlist_get_nvlist(nvl, "admin");
+	io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
+	kato = dnvlist_get_number(nvl, "kato", 0);
+
 	/* Setup the admin queue. */
-	sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin,
-	    "admin queue", 0);
+	sc->admin = nvmf_init_qp(sc, trtype, admin, "admin queue", 0);
 	if (sc->admin == NULL) {
 		device_printf(sc->dev, "Failed to setup admin queue\n");
 		return (ENXIO);
 	}
 
 	/* Setup I/O queues. */
-	sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF,
+	sc->io = malloc(num_io_queues * sizeof(*sc->io), M_NVMF,
 	    M_WAITOK | M_ZERO);
-	sc->num_io_queues = ivars->hh->num_io_queues;
+	sc->num_io_queues = num_io_queues;
 	for (u_int i = 0; i < sc->num_io_queues; i++) {
 		snprintf(name, sizeof(name), "I/O queue %u", i);
-		sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype,
-		    &ivars->io_params[i], name, i);
+		sc->io[i] = nvmf_init_qp(sc, trtype, io[i], name, i);
 		if (sc->io[i] == NULL) {
 			device_printf(sc->dev, "Failed to setup I/O queue %u\n",
 			    i + 1);
@@ -288,10 +300,10 @@ nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars)
 	}
 
 	/* Start KeepAlive timers. */
-	if (ivars->hh->kato != 0) {
+	if (kato != 0) {
 		sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS,
 		    sc->cdata->ctratt) != 0;
-		sc->ka_rx_sbt = mstosbt(ivars->hh->kato);
+		sc->ka_rx_sbt = mstosbt(kato);
 		sc->ka_tx_sbt = sc->ka_rx_sbt / 2;
 		callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0,
 		    nvmf_check_keep_alive, sc, C_HARDCLOCK);
@@ -299,6 +311,9 @@ nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars)
 		    nvmf_send_keep_alive, sc, C_HARDCLOCK);
 	}
 
+	memcpy(sc->cdata, nvlist_get_binary(nvl, "cdata", NULL),
+	    sizeof(*sc->cdata));
+
 	return (0);
 }
 
@@ -452,17 +467,18 @@ nvmf_attach(device_t dev)
 {
 	struct make_dev_args mda;
 	struct nvmf_softc *sc = device_get_softc(dev);
-	struct nvmf_ivars *ivars = device_get_ivars(dev);
+	const nvlist_t *nvl = device_get_ivars(dev);
+	const nvlist_t * const *io;
 	struct sysctl_oid *oid;
 	uint64_t val;
 	u_int i;
 	int error;
 
-	if (ivars == NULL)
+	if (nvl == NULL)
 		return (ENXIO);
 
 	sc->dev = dev;
*** 743 LINES SKIPPED ***