git: f08746a7e319 - main - nvme: Pass malloc flags to request allocation functions

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Sat, 09 Nov 2024 17:35:10 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=f08746a7e3195a6e144e6f58003dc5c221d15d02

commit f08746a7e3195a6e144e6f58003dc5c221d15d02
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-11-09 17:34:12 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-11-09 17:34:12 +0000

    nvme: Pass malloc flags to request allocation functions
    
    There are some contexts where it is safe to sleep, so we should pass
    M_WAITOK to ensure that a null pointer dereference can't happen.
    
    A few places allocate with M_NOWAIT but have no way to signal an error.
    Flag those with an XXX comment.
    
    PR:             276770
    Reviewed by:    imp
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D47307
---
 sys/dev/nvme/nvme_ctrlr.c     | 22 ++++++++++++++--------
 sys/dev/nvme/nvme_ctrlr_cmd.c | 29 +++++++++++++++++++----------
 sys/dev/nvme/nvme_ns_cmd.c    | 24 +++++++++---------------
 sys/dev/nvme/nvme_private.h   | 26 +++++++++++++++-----------
 sys/dev/nvme/nvme_sim.c       | 11 ++++++-----
 5 files changed, 63 insertions(+), 49 deletions(-)

diff --git a/sys/dev/nvme/nvme_ctrlr.c b/sys/dev/nvme/nvme_ctrlr.c
index 994994c3643f..29c165899f7f 100644
--- a/sys/dev/nvme/nvme_ctrlr.c
+++ b/sys/dev/nvme/nvme_ctrlr.c
@@ -820,7 +820,13 @@ nvme_ctrlr_construct_and_submit_aer(struct nvme_controller *ctrlr,
 	struct nvme_request *req;
 
 	aer->ctrlr = ctrlr;
-	req = nvme_allocate_request_null(nvme_ctrlr_async_event_cb, aer);
+	/*
+	 * XXX-MJ this should be M_WAITOK but we might be in a non-sleepable
+	 * callback context.  AER completions should be handled on a dedicated
+	 * thread.
+	 */
+	req = nvme_allocate_request_null(M_NOWAIT, nvme_ctrlr_async_event_cb,
+	    aer);
 	aer->req = req;
 
 	/*
@@ -1272,12 +1278,12 @@ nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
 				goto err;
 			}
 			req = nvme_allocate_request_vaddr(buf->b_data, pt->len,
-			    nvme_pt_done, pt);
+			    M_WAITOK, nvme_pt_done, pt);
 		} else
 			req = nvme_allocate_request_vaddr(pt->buf, pt->len,
-			    nvme_pt_done, pt);
+			    M_WAITOK, nvme_pt_done, pt);
 	} else
-		req = nvme_allocate_request_null(nvme_pt_done, pt);
+		req = nvme_allocate_request_null(M_WAITOK, nvme_pt_done, pt);
 
 	/* Assume user space already converted to little-endian */
 	req->cmd.opc = pt->cmd.opc;
@@ -1363,14 +1369,14 @@ nvme_ctrlr_linux_passthru_cmd(struct nvme_controller *ctrlr,
 				ret = EFAULT;
 				goto err;
 			}
-			req = nvme_allocate_request_vaddr(buf->b_data, npc->data_len,
-			    nvme_npc_done, npc);
+			req = nvme_allocate_request_vaddr(buf->b_data,
+			    npc->data_len, M_WAITOK, nvme_npc_done, npc);
 		} else
 			req = nvme_allocate_request_vaddr(
 			    (void *)(uintptr_t)npc->addr, npc->data_len,
-			    nvme_npc_done, npc);
+			    M_WAITOK, nvme_npc_done, npc);
 	} else
-		req = nvme_allocate_request_null(nvme_npc_done, npc);
+		req = nvme_allocate_request_null(M_WAITOK, nvme_npc_done, npc);
 
 	req->cmd.opc = npc->opcode;
 	req->cmd.fuse = npc->flags;
diff --git a/sys/dev/nvme/nvme_ctrlr_cmd.c b/sys/dev/nvme/nvme_ctrlr_cmd.c
index 68934b9b3947..993a7718356d 100644
--- a/sys/dev/nvme/nvme_ctrlr_cmd.c
+++ b/sys/dev/nvme/nvme_ctrlr_cmd.c
@@ -37,7 +37,7 @@ nvme_ctrlr_cmd_identify_controller(struct nvme_controller *ctrlr, void *payload,
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_vaddr(payload,
-	    sizeof(struct nvme_controller_data), cb_fn, cb_arg);
+	    sizeof(struct nvme_controller_data), M_WAITOK, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc = NVME_OPC_IDENTIFY;
@@ -59,7 +59,7 @@ nvme_ctrlr_cmd_identify_namespace(struct nvme_controller *ctrlr, uint32_t nsid,
 	struct nvme_command *cmd;
 
 	req = nvme_allocate_request_vaddr(payload,
-	    sizeof(struct nvme_namespace_data), cb_fn, cb_arg);
+	    sizeof(struct nvme_namespace_data), M_WAITOK, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc = NVME_OPC_IDENTIFY;
@@ -79,7 +79,7 @@ nvme_ctrlr_cmd_create_io_cq(struct nvme_controller *ctrlr,
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
-	req = nvme_allocate_request_null(cb_fn, cb_arg);
+	req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc = NVME_OPC_CREATE_IO_CQ;
@@ -103,7 +103,7 @@ nvme_ctrlr_cmd_create_io_sq(struct nvme_controller *ctrlr,
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
-	req = nvme_allocate_request_null(cb_fn, cb_arg);
+	req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc = NVME_OPC_CREATE_IO_SQ;
@@ -127,7 +127,7 @@ nvme_ctrlr_cmd_delete_io_cq(struct nvme_controller *ctrlr,
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
-	req = nvme_allocate_request_null(cb_fn, cb_arg);
+	req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc = NVME_OPC_DELETE_IO_CQ;
@@ -148,7 +148,7 @@ nvme_ctrlr_cmd_delete_io_sq(struct nvme_controller *ctrlr,
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
-	req = nvme_allocate_request_null(cb_fn, cb_arg);
+	req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc = NVME_OPC_DELETE_IO_SQ;
@@ -171,7 +171,7 @@ nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr, uint8_t feature,
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
-	req = nvme_allocate_request_null(cb_fn, cb_arg);
+	req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc = NVME_OPC_SET_FEATURES;
@@ -193,7 +193,7 @@ nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr, uint8_t feature,
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
-	req = nvme_allocate_request_null(cb_fn, cb_arg);
+	req = nvme_allocate_request_null(M_WAITOK, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc = NVME_OPC_GET_FEATURES;
@@ -259,7 +259,12 @@ nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr, uint8_t log_page,
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
-	req = nvme_allocate_request_vaddr(payload, payload_size, cb_fn, cb_arg);
+	/*
+	 * XXX-MJ this should be M_WAITOK but we might be called from AER
+	 * completion processing, which is a non-sleepable context.
+	 */
+	req = nvme_allocate_request_vaddr(payload, payload_size,
+	    M_NOWAIT, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc = NVME_OPC_GET_LOG_PAGE;
@@ -319,7 +324,11 @@ nvme_ctrlr_cmd_abort(struct nvme_controller *ctrlr, uint16_t cid,
 	struct nvme_request *req;
 	struct nvme_command *cmd;
 
-	req = nvme_allocate_request_null(cb_fn, cb_arg);
+	/*
+	 * XXX-MJ this should be M_WAITOK, we do reset from non-sleepable
+	 * context and abort commands as part of that.
+	 */
+	req = nvme_allocate_request_null(M_NOWAIT, cb_fn, cb_arg);
 
 	cmd = &req->cmd;
 	cmd->opc = NVME_OPC_ABORT;
diff --git a/sys/dev/nvme/nvme_ns_cmd.c b/sys/dev/nvme/nvme_ns_cmd.c
index 8cbeac025307..1bad9929cb09 100644
--- a/sys/dev/nvme/nvme_ns_cmd.c
+++ b/sys/dev/nvme/nvme_ns_cmd.c
@@ -36,8 +36,7 @@ nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload, uint64_t lba,
 	struct nvme_request	*req;
 
 	req = nvme_allocate_request_vaddr(payload,
-	    lba_count*nvme_ns_get_sector_size(ns), cb_fn, cb_arg);
-
+	    lba_count * nvme_ns_get_sector_size(ns), M_NOWAIT, cb_fn, cb_arg);
 	if (req == NULL)
 		return (ENOMEM);
 
@@ -56,11 +55,9 @@ nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp,
 	uint64_t		lba;
 	uint64_t		lba_count;
 
-	req = nvme_allocate_request_bio(bp, cb_fn, cb_arg);
-
+	req = nvme_allocate_request_bio(bp, M_NOWAIT, cb_fn, cb_arg);
 	if (req == NULL)
 		return (ENOMEM);
-
 	lba = bp->bio_offset / nvme_ns_get_sector_size(ns);
 	lba_count = bp->bio_bcount / nvme_ns_get_sector_size(ns);
 	nvme_ns_read_cmd(&req->cmd, ns->id, lba, lba_count);
@@ -77,8 +74,7 @@ nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload, uint64_t lba,
 	struct nvme_request	*req;
 
 	req = nvme_allocate_request_vaddr(payload,
-	    lba_count*nvme_ns_get_sector_size(ns), cb_fn, cb_arg);
-
+	    lba_count * nvme_ns_get_sector_size(ns), M_NOWAIT, cb_fn, cb_arg);
 	if (req == NULL)
 		return (ENOMEM);
 
@@ -97,8 +93,7 @@ nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp,
 	uint64_t		lba;
 	uint64_t		lba_count;
 
-	req = nvme_allocate_request_bio(bp, cb_fn, cb_arg);
-
+	req = nvme_allocate_request_bio(bp, M_NOWAIT, cb_fn, cb_arg);
 	if (req == NULL)
 		return (ENOMEM);
 	lba = bp->bio_offset / nvme_ns_get_sector_size(ns);
@@ -118,8 +113,8 @@ nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload,
 	struct nvme_command	*cmd;
 
 	req = nvme_allocate_request_vaddr(payload,
-	    num_ranges * sizeof(struct nvme_dsm_range), cb_fn, cb_arg);
-
+	    num_ranges * sizeof(struct nvme_dsm_range), M_NOWAIT, cb_fn,
+	    cb_arg);
 	if (req == NULL)
 		return (ENOMEM);
 
@@ -141,8 +136,7 @@ nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request	*req;
 
-	req = nvme_allocate_request_null(cb_fn, cb_arg);
-
+	req = nvme_allocate_request_null(M_NOWAIT, cb_fn, cb_arg);
 	if (req == NULL)
 		return (ENOMEM);
 
@@ -165,8 +159,8 @@ nvme_ns_dump(struct nvme_namespace *ns, void *virt, off_t offset, size_t len)
 	int i;
 
 	status.done = FALSE;
-	req = nvme_allocate_request_vaddr(virt, len, nvme_completion_poll_cb,
-	    &status);
+	req = nvme_allocate_request_vaddr(virt, len, M_NOWAIT,
+	    nvme_completion_poll_cb, &status);
 	if (req == NULL)
 		return (ENOMEM);
 
diff --git a/sys/dev/nvme/nvme_private.h b/sys/dev/nvme/nvme_private.h
index dd7a849b6782..949e69ec9290 100644
--- a/sys/dev/nvme/nvme_private.h
+++ b/sys/dev/nvme/nvme_private.h
@@ -486,11 +486,14 @@ nvme_single_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
 }
 
 static __inline struct nvme_request *
-_nvme_allocate_request(nvme_cb_fn_t cb_fn, void *cb_arg)
+_nvme_allocate_request(const int how, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
-	req = malloc(sizeof(*req), M_NVME, M_NOWAIT | M_ZERO);
+	KASSERT(how == M_WAITOK || how == M_NOWAIT,
+	    ("nvme_allocate_request: invalid how %d", how));
+
+	req = malloc(sizeof(*req), M_NVME, how | M_ZERO);
 	if (req != NULL) {
 		req->cb_fn = cb_fn;
 		req->cb_arg = cb_arg;
@@ -501,11 +504,11 @@ _nvme_allocate_request(nvme_cb_fn_t cb_fn, void *cb_arg)
 
 static __inline struct nvme_request *
 nvme_allocate_request_vaddr(void *payload, uint32_t payload_size,
-    nvme_cb_fn_t cb_fn, void *cb_arg)
+    const int how, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
-	req = _nvme_allocate_request(cb_fn, cb_arg);
+	req = _nvme_allocate_request(how, cb_fn, cb_arg);
 	if (req != NULL) {
 		req->payload = memdesc_vaddr(payload, payload_size);
 		req->payload_valid = true;
@@ -514,20 +517,21 @@ nvme_allocate_request_vaddr(void *payload, uint32_t payload_size,
 }
 
 static __inline struct nvme_request *
-nvme_allocate_request_null(nvme_cb_fn_t cb_fn, void *cb_arg)
+nvme_allocate_request_null(const int how, nvme_cb_fn_t cb_fn, void *cb_arg)
 {
 	struct nvme_request *req;
 
-	req = _nvme_allocate_request(cb_fn, cb_arg);
+	req = _nvme_allocate_request(how, cb_fn, cb_arg);
 	return (req);
 }
 
 static __inline struct nvme_request *
-nvme_allocate_request_bio(struct bio *bio, nvme_cb_fn_t cb_fn, void *cb_arg)
+nvme_allocate_request_bio(struct bio *bio, const int how, nvme_cb_fn_t cb_fn,
+    void *cb_arg)
 {
 	struct nvme_request *req;
 
-	req = _nvme_allocate_request(cb_fn, cb_arg);
+	req = _nvme_allocate_request(how, cb_fn, cb_arg);
 	if (req != NULL) {
 		req->payload = memdesc_bio(bio);
 		req->payload_valid = true;
@@ -536,16 +540,16 @@ nvme_allocate_request_bio(struct bio *bio, nvme_cb_fn_t cb_fn, void *cb_arg)
 }
 
 static __inline struct nvme_request *
-nvme_allocate_request_ccb(union ccb *ccb, nvme_cb_fn_t cb_fn, void *cb_arg)
+nvme_allocate_request_ccb(union ccb *ccb, const int how, nvme_cb_fn_t cb_fn,
+    void *cb_arg)
 {
 	struct nvme_request *req;
 
-	req = _nvme_allocate_request(cb_fn, cb_arg);
+	req = _nvme_allocate_request(how, cb_fn, cb_arg);
 	if (req != NULL) {
 		req->payload = memdesc_ccb(ccb);
 		req->payload_valid = true;
 	}
-
 	return (req);
 }
 
diff --git a/sys/dev/nvme/nvme_sim.c b/sys/dev/nvme/nvme_sim.c
index 8bdeb4be49f3..4974bb718222 100644
--- a/sys/dev/nvme/nvme_sim.c
+++ b/sys/dev/nvme/nvme_sim.c
@@ -96,15 +96,16 @@ nvme_sim_nvmeio(struct cam_sim *sim, union ccb *ccb)
 	/* SG LIST ??? */
 	if ((nvmeio->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_BIO)
 		req = nvme_allocate_request_bio((struct bio *)payload,
-		    nvme_sim_nvmeio_done, ccb);
+		    M_NOWAIT, nvme_sim_nvmeio_done, ccb);
 	else if ((nvmeio->ccb_h.flags & CAM_DATA_SG) == CAM_DATA_SG)
-		req = nvme_allocate_request_ccb(ccb, nvme_sim_nvmeio_done, ccb);
+		req = nvme_allocate_request_ccb(ccb, M_NOWAIT,
+		    nvme_sim_nvmeio_done, ccb);
 	else if (payload == NULL)
-		req = nvme_allocate_request_null(nvme_sim_nvmeio_done, ccb);
+		req = nvme_allocate_request_null(M_NOWAIT, nvme_sim_nvmeio_done,
+		    ccb);
 	else
-		req = nvme_allocate_request_vaddr(payload, size,
+		req = nvme_allocate_request_vaddr(payload, size, M_NOWAIT,
 		    nvme_sim_nvmeio_done, ccb);
-
 	if (req == NULL) {
 		nvmeio->ccb_h.status = CAM_RESRC_UNAVAIL;
 		xpt_done(ccb);