git: 9f1fa1a46117 - main - bhyve nvme: Add AEN support to NVMe emulation

From: Chuck Tuffli <chuck_at_FreeBSD.org>
Date: Tue, 14 Dec 2021 23:30:24 UTC
The branch main has been updated by chuck:

URL: https://cgit.FreeBSD.org/src/commit/?id=9f1fa1a46117a418e7f7fbfb1be437100fad531f

commit 9f1fa1a46117a418e7f7fbfb1be437100fad531f
Author:     Chuck Tuffli <chuck@FreeBSD.org>
AuthorDate: 2021-12-15 07:16:49 +0000
Commit:     Chuck Tuffli <chuck@FreeBSD.org>
CommitDate: 2021-12-15 07:16:49 +0000

    bhyve nvme: Add AEN support to NVMe emulation
    
    Add Asynchronous Event Notification infrastructure to the NVMe
    emulation.
    
    Reviewed by:    imp, grehan
    MFC after:      1 month
    Differential Revision:  https://reviews.freebsd.org/D32952
---
 usr.sbin/bhyve/pci_nvme.c | 273 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 266 insertions(+), 7 deletions(-)

diff --git a/usr.sbin/bhyve/pci_nvme.c b/usr.sbin/bhyve/pci_nvme.c
index 034c9409cea3..6fd1aa14b9e3 100644
--- a/usr.sbin/bhyve/pci_nvme.c
+++ b/usr.sbin/bhyve/pci_nvme.c
@@ -65,6 +65,7 @@ __FBSDID("$FreeBSD$");
 
 #include <assert.h>
 #include <pthread.h>
+#include <pthread_np.h>
 #include <semaphore.h>
 #include <stdbool.h>
 #include <stddef.h>
@@ -250,11 +251,39 @@ struct nvme_feature_obj {
 
 #define NVME_FID_MAX		(NVME_FEAT_ENDURANCE_GROUP_EVENT_CONFIGURATION + 1)
 
+typedef enum {
+	PCI_NVME_AE_TYPE_ERROR = 0,
+	PCI_NVME_AE_TYPE_SMART,
+	PCI_NVME_AE_TYPE_NOTICE,
+	PCI_NVME_AE_TYPE_IO_CMD = 6,
+	PCI_NVME_AE_TYPE_VENDOR = 7,
+	PCI_NVME_AE_TYPE_MAX		/* Must be last */
+} pci_nvme_async_type;
+
+/* Asynchronous Event Requests */
 struct pci_nvme_aer {
 	STAILQ_ENTRY(pci_nvme_aer) link;
 	uint16_t	cid;	/* Command ID of the submitted AER */
 };
 
+typedef enum {
+	PCI_NVME_AE_INFO_NS_ATTR_CHANGED = 0,
+	PCI_NVME_AE_INFO_FW_ACTIVATION,
+	PCI_NVME_AE_INFO_TELEMETRY_CHANGE,
+	PCI_NVME_AE_INFO_ANA_CHANGE,
+	PCI_NVME_AE_INFO_PREDICT_LATENCY_CHANGE,
+	PCI_NVME_AE_INFO_LBA_STATUS_ALERT,
+	PCI_NVME_AE_INFO_ENDURANCE_GROUP_CHANGE,
+	PCI_NVME_AE_INFO_MAX,
+} pci_nvme_async_info;
+
+/* Asynchronous Event Notifications */
+struct pci_nvme_aen {
+	pci_nvme_async_type atype;
+	uint32_t	event_data;
+	bool		posted;
+};
+
 struct pci_nvme_softc {
 	struct pci_devinst *nsc_pi;
 
@@ -302,10 +331,21 @@ struct pci_nvme_softc {
 	uint32_t	write_dunits_remainder;
 
 	STAILQ_HEAD(, pci_nvme_aer) aer_list;
+	pthread_mutex_t	aer_mtx;
 	uint32_t	aer_count;
+	struct pci_nvme_aen aen[PCI_NVME_AE_TYPE_MAX];
+	pthread_t	aen_tid;
+	pthread_mutex_t	aen_mtx;
+	pthread_cond_t	aen_cond;
 };
 
 
+static void pci_nvme_cq_update(struct pci_nvme_softc *sc,
+    struct nvme_completion_queue *cq,
+    uint32_t cdw0,
+    uint16_t cid,
+    uint16_t sqid,
+    uint16_t status);
 static struct pci_nvme_ioreq *pci_nvme_get_ioreq(struct pci_nvme_softc *);
 static void pci_nvme_release_ioreq(struct pci_nvme_softc *, struct pci_nvme_ioreq *);
 static void pci_nvme_io_done(struct blockif_req *, int);
@@ -360,6 +400,8 @@ static void nvme_feature_iv_config(struct pci_nvme_softc *,
     struct nvme_command *,
     struct nvme_completion *);
 
+static void *aen_thr(void *arg);
+
 static __inline void
 cpywithpad(char *dst, size_t dst_size, const char *src, char pad)
 {
@@ -623,6 +665,8 @@ pci_nvme_init_features(struct pci_nvme_softc *sc)
 	sc->feat[NVME_FEAT_NUMBER_OF_QUEUES].set = nvme_feature_num_queues;
 	sc->feat[NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION].set =
 	    nvme_feature_iv_config;
+	/* Enable all AENs by default */
+	sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11 = 0x31f;
 	sc->feat[NVME_FEAT_PREDICTABLE_LATENCY_MODE_CONFIG].get =
 	    nvme_feature_invalid_cb;
 	sc->feat[NVME_FEAT_PREDICTABLE_LATENCY_MODE_WINDOW].get =
@@ -630,32 +674,42 @@ pci_nvme_init_features(struct pci_nvme_softc *sc)
 }
 
 static void
-pci_nvme_aer_init(struct pci_nvme_softc *sc)
+pci_nvme_aer_reset(struct pci_nvme_softc *sc)
 {
 
 	STAILQ_INIT(&sc->aer_list);
 	sc->aer_count = 0;
 }
 
+static void
+pci_nvme_aer_init(struct pci_nvme_softc *sc)
+{
+
+	pthread_mutex_init(&sc->aer_mtx, NULL);
+	pci_nvme_aer_reset(sc);
+}
+
 static void
 pci_nvme_aer_destroy(struct pci_nvme_softc *sc)
 {
 	struct pci_nvme_aer *aer = NULL;
 
+	pthread_mutex_lock(&sc->aer_mtx);
 	while (!STAILQ_EMPTY(&sc->aer_list)) {
 		aer = STAILQ_FIRST(&sc->aer_list);
 		STAILQ_REMOVE_HEAD(&sc->aer_list, link);
 		free(aer);
 	}
+	pthread_mutex_unlock(&sc->aer_mtx);
 
-	pci_nvme_aer_init(sc);
+	pci_nvme_aer_reset(sc);
 }
 
 static bool
 pci_nvme_aer_available(struct pci_nvme_softc *sc)
 {
 
-	return (!STAILQ_EMPTY(&sc->aer_list));
+	return (sc->aer_count != 0);
 }
 
 static bool
@@ -687,11 +741,13 @@ pci_nvme_aer_add(struct pci_nvme_softc *sc, uint16_t cid)
 	if (aer == NULL)
 		return (-1);
 
-	sc->aer_count++;
-
 	/* Save the Command ID for use in the completion message */
 	aer->cid = cid;
+
+	pthread_mutex_lock(&sc->aer_mtx);
+	sc->aer_count++;
 	STAILQ_INSERT_TAIL(&sc->aer_list, aer, link);
+	pthread_mutex_unlock(&sc->aer_mtx);
 
 	return (0);
 }
@@ -707,15 +763,208 @@ pci_nvme_aer_get(struct pci_nvme_softc *sc)
 {
 	struct pci_nvme_aer *aer = NULL;
 
+	pthread_mutex_lock(&sc->aer_mtx);
 	aer = STAILQ_FIRST(&sc->aer_list);
 	if (aer != NULL) {
 		STAILQ_REMOVE_HEAD(&sc->aer_list, link);
 		sc->aer_count--;
 	}
+	pthread_mutex_unlock(&sc->aer_mtx);
 	
 	return (aer);
 }
 
+static void
+pci_nvme_aen_reset(struct pci_nvme_softc *sc)
+{
+	uint32_t	atype;
+
+	memset(sc->aen, 0, PCI_NVME_AE_TYPE_MAX * sizeof(struct pci_nvme_aen));
+
+	for (atype = 0; atype < PCI_NVME_AE_TYPE_MAX; atype++) {
+		sc->aen[atype].atype = atype;
+	}
+}
+
+static void
+pci_nvme_aen_init(struct pci_nvme_softc *sc)
+{
+	char nstr[80];
+
+	pci_nvme_aen_reset(sc);
+
+	pthread_mutex_init(&sc->aen_mtx, NULL);
+	pthread_create(&sc->aen_tid, NULL, aen_thr, sc);
+	snprintf(nstr, sizeof(nstr), "nvme-aen-%d:%d", sc->nsc_pi->pi_slot,
+	    sc->nsc_pi->pi_func);
+	pthread_set_name_np(sc->aen_tid, nstr);
+}
+
+static void
+pci_nvme_aen_destroy(struct pci_nvme_softc *sc)
+{
+
+	pci_nvme_aen_reset(sc);
+}
+
+/* Notify the AEN thread of pending work */
+static void
+pci_nvme_aen_notify(struct pci_nvme_softc *sc)
+{
+
+	pthread_cond_signal(&sc->aen_cond);
+}
+
+/*
+ * Post an Asynchronous Event Notification
+ */
+static int32_t
+pci_nvme_aen_post(struct pci_nvme_softc *sc, pci_nvme_async_type atype,
+		uint32_t event_data)
+{
+	struct pci_nvme_aen *aen;
+
+	if (atype >= PCI_NVME_AE_TYPE_MAX) {
+		return(EINVAL);
+	}
+
+	pthread_mutex_lock(&sc->aen_mtx);
+	aen = &sc->aen[atype];
+
+	/* Has the controller already posted an event of this type? */
+	if (aen->posted) {
+		pthread_mutex_unlock(&sc->aen_mtx);
+		return(EALREADY);
+	}
+
+	aen->event_data = event_data;
+	aen->posted = true;
+	pthread_mutex_unlock(&sc->aen_mtx);
+
+	pci_nvme_aen_notify(sc);
+
+	return(0);
+}
+
+static void
+pci_nvme_aen_process(struct pci_nvme_softc *sc)
+{
+	struct pci_nvme_aer *aer;
+	struct pci_nvme_aen *aen;
+	pci_nvme_async_type atype;
+	uint32_t mask;
+	uint16_t status;
+	uint8_t lid;
+
+	assert(pthread_mutex_isowned_np(&sc->aen_mtx));
+	for (atype = 0; atype < PCI_NVME_AE_TYPE_MAX; atype++) {
+		aen = &sc->aen[atype];
+		/* Previous iterations may have depleted the available AER's */
+		if (!pci_nvme_aer_available(sc)) {
+			DPRINTF("%s: no AER", __func__);
+			break;
+		}
+
+		if (!aen->posted) {
+			DPRINTF("%s: no AEN posted for atype=%#x", __func__, atype);
+			continue;
+		}
+
+		status = NVME_SC_SUCCESS;
+
+		/* Is the event masked? */
+		mask =
+		    sc->feat[NVME_FEAT_ASYNC_EVENT_CONFIGURATION].cdw11;
+
+		DPRINTF("%s: atype=%#x mask=%#x event_data=%#x", __func__, atype, mask, aen->event_data);
+		switch (atype) {
+		case PCI_NVME_AE_TYPE_ERROR:
+			lid = NVME_LOG_ERROR;
+			break;
+		case PCI_NVME_AE_TYPE_SMART:
+			mask &= 0xff;
+			if ((mask & aen->event_data) == 0)
+				continue;
+			lid = NVME_LOG_HEALTH_INFORMATION;
+			break;
+		case PCI_NVME_AE_TYPE_NOTICE:
+			if (aen->event_data >= PCI_NVME_AE_INFO_MAX) {
+				EPRINTLN("%s unknown AEN notice type %u",
+				    __func__, aen->event_data);
+				status = NVME_SC_INTERNAL_DEVICE_ERROR;
+				break;
+			}
+			mask >>= 8;
+			if (((1 << aen->event_data) & mask) == 0)
+				continue;
+			switch (aen->event_data) {
+			case PCI_NVME_AE_INFO_NS_ATTR_CHANGED:
+				lid = NVME_LOG_CHANGED_NAMESPACE;
+				break;
+			case PCI_NVME_AE_INFO_FW_ACTIVATION:
+				lid = NVME_LOG_FIRMWARE_SLOT;
+				break;
+			case PCI_NVME_AE_INFO_TELEMETRY_CHANGE:
+				lid = NVME_LOG_TELEMETRY_CONTROLLER_INITIATED;
+				break;
+			case PCI_NVME_AE_INFO_ANA_CHANGE:
+				lid = NVME_LOG_ASYMMETRIC_NAMESPAVE_ACCESS; //TODO spelling
+				break;
+			case PCI_NVME_AE_INFO_PREDICT_LATENCY_CHANGE:
+				lid = NVME_LOG_PREDICTABLE_LATENCY_EVENT_AGGREGATE;
+				break;
+			case PCI_NVME_AE_INFO_LBA_STATUS_ALERT:
+				lid = NVME_LOG_LBA_STATUS_INFORMATION;
+				break;
+			case PCI_NVME_AE_INFO_ENDURANCE_GROUP_CHANGE:
+				lid = NVME_LOG_ENDURANCE_GROUP_EVENT_AGGREGATE;
+				break;
+			default:
+				lid = 0;
+			}
+			break;
+		default:
+			/* bad type?!? */
+			EPRINTLN("%s unknown AEN type %u", __func__, atype);
+			status = NVME_SC_INTERNAL_DEVICE_ERROR;
+			break;
+		}
+
+		aer = pci_nvme_aer_get(sc);
+		assert(aer != NULL);
+
+		DPRINTF("%s: CID=%#x CDW0=%#x", __func__, aer->cid, (lid << 16) | (aen->event_data << 8) | atype);
+		pci_nvme_cq_update(sc, &sc->compl_queues[0],
+		    (lid << 16) | (aen->event_data << 8) | atype, /* cdw0 */
+		    aer->cid,
+		    0,		/* SQID */
+		    status);
+
+		aen->event_data = 0;
+		aen->posted = false;
+
+		pci_generate_msix(sc->nsc_pi, 0);
+	}
+}
+
+static void *
+aen_thr(void *arg)
+{
+	struct pci_nvme_softc *sc;
+
+	sc = arg;
+
+	pthread_mutex_lock(&sc->aen_mtx);
+	for (;;) {
+		pci_nvme_aen_process(sc);
+		pthread_cond_wait(&sc->aen_cond, &sc->aen_mtx);
+	}
+	pthread_mutex_unlock(&sc->aen_mtx);
+
+	pthread_exit(NULL);
+	return (NULL);
+}
+
 static void
 pci_nvme_reset_locked(struct pci_nvme_softc *sc)
 {
@@ -756,6 +1005,7 @@ pci_nvme_reset_locked(struct pci_nvme_softc *sc)
 	sc->num_q_is_set = false;
 
 	pci_nvme_aer_destroy(sc);
+	pci_nvme_aen_destroy(sc);
 }
 
 static void
@@ -1406,8 +1656,13 @@ nvme_opc_set_features(struct pci_nvme_softc *sc, struct nvme_command *command,
 	if (feat->set)
 		feat->set(sc, feat, command, compl);
 
-	if (compl->status == NVME_SC_SUCCESS)
+	DPRINTF("%s: status=%#x cdw11=%#x", __func__, compl->status, command->cdw11);
+	if (compl->status == NVME_SC_SUCCESS) {
 		feat->cdw11 = command->cdw11;
+		if ((fid == NVME_FEAT_ASYNC_EVENT_CONFIGURATION) &&
+		    (command->cdw11 != 0))
+			pci_nvme_aen_notify(sc);
+	}
 
 	return (0);
 }
@@ -1525,7 +1780,8 @@ static int
 nvme_opc_async_event_req(struct pci_nvme_softc* sc,
 	struct nvme_command* command, struct nvme_completion* compl)
 {
-	DPRINTF("%s async event request 0x%x", __func__, command->cdw11);
+	DPRINTF("%s async event request count=%u aerl=%u cid=%#x", __func__,
+	    sc->aer_count, sc->ctrldata.aerl, command->cid);
 
 	/* Don't exceed the Async Event Request Limit (AERL). */
 	if (pci_nvme_aer_limit_reached(sc)) {
@@ -1546,6 +1802,7 @@ nvme_opc_async_event_req(struct pci_nvme_softc* sc,
 	 * there is an event reflective of the request to get event.
 	 */
 	compl->status = NVME_NO_STATUS;
+	pci_nvme_aen_notify(sc);
 
 	return (0);
 }
@@ -2025,6 +2282,7 @@ nvme_opc_write_read(struct pci_nvme_softc *sc,
 
 	lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
 	nblocks = (cmd->cdw12 & 0xFFFF) + 1;
+
 	if (pci_nvme_out_of_range(nvstore, lba, nblocks)) {
 		WPRINTF("%s command would exceed LBA range", __func__);
 		pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE);
@@ -2789,6 +3047,7 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, nvlist_t *nvl)
 	pci_nvme_init_features(sc);
 
 	pci_nvme_aer_init(sc);
+	pci_nvme_aen_init(sc);
 
 	pci_nvme_reset(sc);