git: d5c21c68833d - main - ctl_backend_ramdisk: Add support for NVMe

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Fri, 03 May 2024 00:16:04 UTC
The branch main has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=d5c21c68833d5d8e8d6b261be5f9e7988707504f

commit d5c21c68833d5d8e8d6b261be5f9e7988707504f
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2024-05-02 23:33:50 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2024-05-02 23:38:30 +0000

    ctl_backend_ramdisk: Add support for NVMe
    
    One known caveat is that the support for WRITE_UNCORRECTABLE is not
    quite correct as reads from LBAs after a WRITE_UNCORRECTABLE will
    return zeroes rather than an error.  Fixing this would likely require
    special handling for PG_ANCHOR for NVMe requests (or adding a new
    PG_UNCORRECTABLE).
    
    Reviewed by:    ken, imp
    Sponsored by:   Chelsio Communications
    Differential Revision:  https://reviews.freebsd.org/D44722
---
 sys/cam/ctl/ctl_backend_ramdisk.c | 207 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 205 insertions(+), 2 deletions(-)

diff --git a/sys/cam/ctl/ctl_backend_ramdisk.c b/sys/cam/ctl/ctl_backend_ramdisk.c
index 1fe239b47293..49ed05f54a58 100644
--- a/sys/cam/ctl/ctl_backend_ramdisk.c
+++ b/sys/cam/ctl/ctl_backend_ramdisk.c
@@ -578,7 +578,7 @@ ctl_backend_ramdisk_gls(union ctl_io *io)
 }
 
 static int
-ctl_backend_ramdisk_config_read(union ctl_io *io)
+ctl_backend_ramdisk_scsi_config_read(union ctl_io *io)
 {
 	int retval = 0;
 
@@ -606,6 +606,89 @@ ctl_backend_ramdisk_config_read(union ctl_io *io)
 	return (retval);
 }
 
+static int
+ramdisk_namespace_data(union ctl_io *io)
+{
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
+	struct nvme_namespace_data *nsdata;
+
+	if (io->nvmeio.kern_data_len != sizeof(struct nvme_namespace_data) ||
+	    io->nvmeio.kern_sg_entries != 0)
+		return (CTL_RETVAL_ERROR);
+
+	nsdata = (struct nvme_namespace_data *)io->nvmeio.kern_data_ptr;
+	memset(nsdata, 0, sizeof(*nsdata));
+	nsdata->nsze = htole64(be_lun->size_blocks);
+	nsdata->ncap = htole64(be_lun->cap_bytes / cbe_lun->blocksize);
+	nsdata->nuse = htole64(be_lun->cap_used / cbe_lun->blocksize);
+	nsdata->nsfeat = NVMEM(NVME_NS_DATA_NSFEAT_THIN_PROV) |
+	    NVMEM(NVME_NS_DATA_NSFEAT_DEALLOC);
+	nsdata->nlbaf = 1 - 1;
+	nsdata->dlfeat = NVMEM(NVME_NS_DATA_DLFEAT_DWZ) |
+	    NVMEF(NVME_NS_DATA_DLFEAT_READ, NVME_NS_DATA_DLFEAT_READ_00);
+	nsdata->flbas = NVMEF(NVME_NS_DATA_FLBAS_FORMAT, 0);
+	nsdata->lbaf[0] = NVMEF(NVME_NS_DATA_LBAF_LBADS,
+	    ffs(cbe_lun->blocksize) - 1);
+
+	ctl_lun_nsdata_ids(cbe_lun, nsdata);
+	ctl_config_read_done(io);
+	return (CTL_RETVAL_COMPLETE);
+}
+
+static int
+ramdisk_nvme_ids(union ctl_io *io)
+{
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+
+	if (io->nvmeio.kern_data_len != 4096 || io->nvmeio.kern_sg_entries != 0)
+		return (CTL_RETVAL_ERROR);
+
+	ctl_lun_nvme_ids(cbe_lun, io->nvmeio.kern_data_ptr);
+	ctl_config_read_done(io);
+	return (CTL_RETVAL_COMPLETE);
+}
+
+static int
+ctl_backend_ramdisk_nvme_config_read(union ctl_io *io)
+{
+	switch (io->nvmeio.cmd.opc) {
+	case NVME_OPC_IDENTIFY:
+	{
+		uint8_t cns;
+
+		cns = le32toh(io->nvmeio.cmd.cdw10) & 0xff;
+		switch (cns) {
+		case 0:
+			return (ramdisk_namespace_data(io));
+		case 3:
+			return (ramdisk_nvme_ids(io));
+		default:
+			ctl_nvme_set_invalid_field(&io->nvmeio);
+			ctl_config_read_done(io);
+			return (CTL_RETVAL_COMPLETE);
+		}
+	}
+	default:
+		ctl_nvme_set_invalid_opcode(&io->nvmeio);
+		ctl_config_read_done(io);
+		return (CTL_RETVAL_COMPLETE);
+	}
+}
+
+static int
+ctl_backend_ramdisk_config_read(union ctl_io *io)
+{
+	switch (io->io_hdr.io_type) {
+	case CTL_IO_SCSI:
+		return (ctl_backend_ramdisk_scsi_config_read(io));
+	case CTL_IO_NVME_ADMIN:
+		return (ctl_backend_ramdisk_nvme_config_read(io));
+	default:
+		__assert_unreachable();
+	}
+}
+
 static void
 ctl_backend_ramdisk_delete(struct ctl_be_lun *cbe_lun, off_t lba, off_t len,
     int anchor)
@@ -658,6 +741,8 @@ ctl_backend_ramdisk_ws(union ctl_io *io)
 	uint64_t lba;
 	u_int lbaoff, lbas;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	if (lbalen->flags & ~(SWS_LBDATA | SWS_UNMAP | SWS_ANCHOR | SWS_NDOB)) {
 		ctl_set_invalid_field(&io->scsiio,
 				      /*sks_valid*/ 1,
@@ -706,6 +791,8 @@ ctl_backend_ramdisk_unmap(union ctl_io *io)
 	struct ctl_ptr_len_flags *ptrlen = (struct ctl_ptr_len_flags *)ARGS(io);
 	struct scsi_unmap_desc *buf, *end;
 
+	CTL_IO_ASSERT(io, SCSI);
+
 	if ((ptrlen->flags & ~SU_ANCHOR) != 0) {
 		ctl_set_invalid_field(&io->scsiio,
 				      /*sks_valid*/ 0,
@@ -730,7 +817,7 @@ ctl_backend_ramdisk_unmap(union ctl_io *io)
 }
 
 static int
-ctl_backend_ramdisk_config_write(union ctl_io *io)
+ctl_backend_ramdisk_scsi_config_write(union ctl_io *io)
 {
 	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
 	int retval = 0;
@@ -785,6 +872,122 @@ ctl_backend_ramdisk_config_write(union ctl_io *io)
 	return (retval);
 }
 
+static void
+ctl_backend_ramdisk_wu(union ctl_io *io)
+{
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	struct ctl_lba_len_flags *lbalen = ARGS(io);
+
+	CTL_IO_ASSERT(io, NVME);
+
+	/*
+	 * XXX: Not quite right as reads will return zeroes rather
+	 * than failing.
+	 */
+	ctl_backend_ramdisk_delete(cbe_lun, lbalen->lba, lbalen->len, 1);
+	ctl_nvme_set_success(&io->nvmeio);
+	ctl_config_write_done(io);
+}
+
+static void
+ctl_backend_ramdisk_wz(union ctl_io *io)
+{
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	struct ctl_be_ramdisk_lun *be_lun = (struct ctl_be_ramdisk_lun *)cbe_lun;
+	struct ctl_lba_len_flags *lbalen = ARGS(io);
+	uint8_t *page;
+	uint64_t lba;
+	u_int lbaoff, lbas;
+
+	CTL_IO_ASSERT(io, NVME);
+
+	if ((le32toh(io->nvmeio.cmd.cdw12) & (1U << 25)) != 0) {
+		ctl_backend_ramdisk_delete(cbe_lun, lbalen->lba, lbalen->len,
+		    0);
+		ctl_nvme_set_success(&io->nvmeio);
+		ctl_config_write_done(io);
+		return;
+	}
+
+	for (lba = lbalen->lba, lbas = lbalen->len; lbas > 0; lba++, lbas--) {
+		page = ctl_backend_ramdisk_getpage(be_lun,
+		    lba >> cbe_lun->pblockexp, GP_WRITE);
+		if (page == P_UNMAPPED || page == P_ANCHORED) {
+			ctl_nvme_set_space_alloc_fail(&io->nvmeio);
+			ctl_data_submit_done(io);
+			return;
+		}
+		lbaoff = lba & ~(UINT_MAX << cbe_lun->pblockexp);
+		page += lbaoff * cbe_lun->blocksize;
+		memset(page, 0, cbe_lun->blocksize);
+	}
+	ctl_nvme_set_success(&io->nvmeio);
+	ctl_config_write_done(io);
+}
+
+static void
+ctl_backend_ramdisk_dsm(union ctl_io *io)
+{
+	struct ctl_be_lun *cbe_lun = CTL_BACKEND_LUN(io);
+	struct nvme_dsm_range *r;
+	uint64_t lba;
+	uint32_t num_blocks;
+	u_int i, ranges;
+
+	CTL_IO_ASSERT(io, NVME);
+
+	ranges = le32toh(io->nvmeio.cmd.cdw10) & 0xff;
+	r = (struct nvme_dsm_range *)io->nvmeio.kern_data_ptr;
+	for (i = 0; i < ranges; i++) {
+		lba = le64toh(r[i].starting_lba);
+		num_blocks = le32toh(r[i].length);
+		if ((le32toh(r[i].attributes) & (1U << 2)) != 0)
+			ctl_backend_ramdisk_delete(cbe_lun, lba, num_blocks, 0);
+	}
+
+	ctl_nvme_set_success(&io->nvmeio);
+	ctl_config_write_done(io);
+}
+
+static int
+ctl_backend_ramdisk_nvme_config_write(union ctl_io *io)
+{
+	switch (io->nvmeio.cmd.opc) {
+	case NVME_OPC_FLUSH:
+		/* We have no cache to flush. */
+		ctl_nvme_set_success(&io->nvmeio);
+		ctl_config_write_done(io);
+		break;
+	case NVME_OPC_WRITE_UNCORRECTABLE:
+		ctl_backend_ramdisk_wu(io);
+		break;
+	case NVME_OPC_WRITE_ZEROES:
+		ctl_backend_ramdisk_wz(io);
+		break;
+	case NVME_OPC_DATASET_MANAGEMENT:
+		ctl_backend_ramdisk_dsm(io);
+		break;
+	default:
+		ctl_nvme_set_invalid_opcode(&io->nvmeio);
+		ctl_config_write_done(io);
+		break;
+	}
+	return (CTL_RETVAL_COMPLETE);
+}
+
+static int
+ctl_backend_ramdisk_config_write(union ctl_io *io)
+{
+	switch (io->io_hdr.io_type) {
+	case CTL_IO_SCSI:
+		return (ctl_backend_ramdisk_scsi_config_write(io));
+	case CTL_IO_NVME:
+		return (ctl_backend_ramdisk_nvme_config_write(io));
+	default:
+		__assert_unreachable();
+	}
+}
+
 static uint64_t
 ctl_backend_ramdisk_lun_attr(struct ctl_be_lun *cbe_lun, const char *attrname)
 {