svn commit: r359831 - stable/12/usr.sbin/bhyve
Chuck Tuffli
chuck at FreeBSD.org
Sun Apr 12 19:14:28 UTC 2020
Author: chuck
Date: Sun Apr 12 19:14:27 2020
New Revision: 359831
URL: https://svnweb.freebsd.org/changeset/base/359831
Log:
MFC r359364
bhyve: implement NVMe deallocate command
Modified:
stable/12/usr.sbin/bhyve/pci_nvme.c
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/usr.sbin/bhyve/pci_nvme.c
==============================================================================
--- stable/12/usr.sbin/bhyve/pci_nvme.c Sun Apr 12 19:02:34 2020 (r359830)
+++ stable/12/usr.sbin/bhyve/pci_nvme.c Sun Apr 12 19:14:27 2020 (r359831)
@@ -180,6 +180,7 @@ struct pci_nvme_blockstore {
uint32_t sectsz;
uint32_t sectsz_bits;
uint64_t eui64;
+ uint32_t deallocate:1;
};
struct pci_nvme_ioreq {
@@ -209,6 +210,15 @@ struct pci_nvme_ioreq {
struct iovec iovpadding[NVME_MAX_BLOCKIOVS-BLOCKIF_IOV_MAX];
};
+enum nvme_dsm_type {
+ /* Dataset Management bit in ONCS reflects backing storage capability */
+ NVME_DATASET_MANAGEMENT_AUTO,
+ /* Unconditionally set Dataset Management bit in ONCS */
+ NVME_DATASET_MANAGEMENT_ENABLE,
+ /* Unconditionally clear Dataset Management bit in ONCS */
+ NVME_DATASET_MANAGEMENT_DISABLE,
+};
+
struct pci_nvme_softc {
struct pci_devinst *nsc_pi;
@@ -246,6 +256,8 @@ struct pci_nvme_softc {
uint32_t intr_coales_aggr_time; /* 0x08: uS to delay intr */
uint32_t intr_coales_aggr_thresh; /* 0x08: compl-Q entries */
uint32_t async_ev_config; /* 0x0B: async event config */
+
+ enum nvme_dsm_type dataset_management;
};
@@ -285,6 +297,9 @@ static void pci_nvme_io_partial(struct blockif_req *br
((NVME_STATUS_SCT_MASK << NVME_STATUS_SCT_SHIFT) |\
(NVME_STATUS_SC_MASK << NVME_STATUS_SC_SHIFT))
+#define NVME_ONCS_DSM (NVME_CTRLR_DATA_ONCS_DSM_MASK << \
+ NVME_CTRLR_DATA_ONCS_DSM_SHIFT)
+
static __inline void
cpywithpad(char *dst, size_t dst_size, const char *src, char pad)
{
@@ -363,6 +378,19 @@ pci_nvme_init_ctrldata(struct pci_nvme_softc *sc)
(4 << NVME_CTRLR_DATA_CQES_MIN_SHIFT);
cd->nn = 1; /* number of namespaces */
+ cd->oncs = 0;
+ switch (sc->dataset_management) {
+ case NVME_DATASET_MANAGEMENT_AUTO:
+ if (sc->nvstore.deallocate)
+ cd->oncs |= NVME_ONCS_DSM;
+ break;
+ case NVME_DATASET_MANAGEMENT_ENABLE:
+ cd->oncs |= NVME_ONCS_DSM;
+ break;
+ default:
+ break;
+ }
+
cd->fna = 0x03;
cd->power_state[0].mp = 10;
@@ -429,6 +457,9 @@ pci_nvme_init_nsdata(struct pci_nvme_softc *sc,
nd->ncap = nd->nsze;
nd->nuse = nd->nsze;
+ if (nvstore->type == NVME_STOR_BLOCKIF)
+ nvstore->deallocate = blockif_candelete(nvstore->ctx);
+
nd->nlbaf = 0; /* NLBAF is a 0's based value (i.e. 1 LBA Format) */
nd->flbas = 0;
@@ -1339,7 +1370,7 @@ pci_nvme_io_done(struct blockif_req *br, int err)
uint16_t code, status;
DPRINTF(("%s error %d %s", __func__, err, strerror(err)));
-
+
/* TODO return correct error */
code = err ? NVME_SC_DATA_TRANSFER_ERROR : NVME_SC_SUCCESS;
pci_nvme_status_genc(&status, code);
@@ -1358,7 +1389,128 @@ pci_nvme_io_partial(struct blockif_req *br, int err)
pthread_cond_signal(&req->cv);
}
+static void
+pci_nvme_dealloc_sm(struct blockif_req *br, int err)
+{
+ struct pci_nvme_ioreq *req = br->br_param;
+ struct pci_nvme_softc *sc = req->sc;
+ bool done = true;
+ uint16_t status;
+ if (err) {
+ pci_nvme_status_genc(&status, NVME_SC_INTERNAL_DEVICE_ERROR);
+ } else if ((req->prev_gpaddr + 1) == (req->prev_size)) {
+ pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
+ } else {
+ struct iovec *iov = req->io_req.br_iov;
+
+ req->prev_gpaddr++;
+ iov += req->prev_gpaddr;
+
+ /* The iov_* values already include the sector size */
+ req->io_req.br_offset = (off_t)iov->iov_base;
+ req->io_req.br_resid = iov->iov_len;
+ if (blockif_delete(sc->nvstore.ctx, &req->io_req)) {
+ pci_nvme_status_genc(&status,
+ NVME_SC_INTERNAL_DEVICE_ERROR);
+ } else
+ done = false;
+ }
+
+ if (done) {
+ pci_nvme_set_completion(sc, req->nvme_sq, req->sqid,
+ req->cid, 0, status, 0);
+ pci_nvme_release_ioreq(sc, req);
+ }
+}
+
+static int
+nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
+ struct nvme_command *cmd,
+ struct pci_nvme_blockstore *nvstore,
+ struct pci_nvme_ioreq *req,
+ uint16_t *status)
+{
+ int err = -1;
+
+ if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) {
+ pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE);
+ goto out;
+ }
+
+ if (cmd->cdw11 & NVME_DSM_ATTR_DEALLOCATE) {
+ struct nvme_dsm_range *range;
+ uint32_t nr, r;
+ int sectsz = sc->nvstore.sectsz;
+
+ /*
+ * DSM calls are advisory only, and compliant controllers
+ * may choose to take no actions (i.e. return Success).
+ */
+ if (!nvstore->deallocate) {
+ pci_nvme_status_genc(status, NVME_SC_SUCCESS);
+ goto out;
+ }
+
+ if (req == NULL) {
+ pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
+ goto out;
+ }
+
+ /* copy locally because a range entry could straddle PRPs */
+ range = calloc(1, NVME_MAX_DSM_TRIM);
+ if (range == NULL) {
+ pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
+ goto out;
+ }
+ nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
+ (uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP);
+
+ req->opc = cmd->opc;
+ req->cid = cmd->cid;
+ req->nsid = cmd->nsid;
+ /*
+ * If the request is for more than a single range, store
+ * the ranges in the br_iov. Optimize for the common case
+ * of a single range.
+ *
+ * Note that NVMe Number of Ranges is a zero based value
+ */
+ nr = cmd->cdw10 & 0xff;
+
+ req->io_req.br_iovcnt = 0;
+ req->io_req.br_offset = range[0].starting_lba * sectsz;
+ req->io_req.br_resid = range[0].length * sectsz;
+
+ if (nr == 0) {
+ req->io_req.br_callback = pci_nvme_io_done;
+ } else {
+ struct iovec *iov = req->io_req.br_iov;
+
+ for (r = 0; r <= nr; r++) {
+ iov[r].iov_base = (void *)(range[r].starting_lba * sectsz);
+ iov[r].iov_len = range[r].length * sectsz;
+ }
+ req->io_req.br_callback = pci_nvme_dealloc_sm;
+
+ /*
+ * Use prev_gpaddr to track the current entry and
+ * prev_size to track the number of entries
+ */
+ req->prev_gpaddr = 0;
+ req->prev_size = r;
+ }
+
+ err = blockif_delete(nvstore->ctx, &req->io_req);
+ if (err)
+ pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
+
+ free(range);
+ }
+out:
+ return (err);
+}
+
static void
pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx)
{
@@ -1410,16 +1562,27 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint
continue;
}
- nblocks = (cmd->cdw12 & 0xFFFF) + 1;
-
- bytes = nblocks * sc->nvstore.sectsz;
-
if (sc->nvstore.type == NVME_STOR_BLOCKIF) {
req = pci_nvme_get_ioreq(sc);
req->nvme_sq = sq;
req->sqid = idx;
}
+ if (cmd->opc == NVME_OPC_DATASET_MANAGEMENT) {
+ if (nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req,
+ &status)) {
+ pci_nvme_set_completion(sc, sq, idx, cmd->cid,
+ 0, status, 1);
+ if (req)
+ pci_nvme_release_ioreq(sc, req);
+ }
+ continue;
+ }
+
+ nblocks = (cmd->cdw12 & 0xFFFF) + 1;
+
+ bytes = nblocks * sc->nvstore.sectsz;
+
/*
* If data starts mid-page and flows into the next page, then
* increase page count
@@ -1868,6 +2031,7 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *o
sc->ioslots = NVME_IOSLOTS;
sc->num_squeues = sc->max_queues;
sc->num_cqueues = sc->max_queues;
+ sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
sectsz = 0;
uopt = strdup(opts);
@@ -1912,6 +2076,13 @@ pci_nvme_parse_opts(struct pci_nvme_softc *sc, char *o
}
} else if (!strcmp("eui64", xopts)) {
sc->nvstore.eui64 = htobe64(strtoull(config, NULL, 0));
+ } else if (!strcmp("dsm", xopts)) {
+ if (!strcmp("auto", config))
+ sc->dataset_management = NVME_DATASET_MANAGEMENT_AUTO;
+ else if (!strcmp("enable", config))
+ sc->dataset_management = NVME_DATASET_MANAGEMENT_ENABLE;
+ else if (!strcmp("disable", config))
+ sc->dataset_management = NVME_DATASET_MANAGEMENT_DISABLE;
} else if (optidx == 0) {
snprintf(bident, sizeof(bident), "%d:%d",
sc->nsc_pi->pi_slot, sc->nsc_pi->pi_func);
@@ -2031,8 +2202,12 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *p
sem_init(&sc->iosemlock, 0, sc->ioslots);
pci_nvme_reset(sc);
- pci_nvme_init_ctrldata(sc);
+ /*
+ * Controller data depends on Namespace data so initialize Namespace
+ * data first.
+ */
pci_nvme_init_nsdata(sc, &sc->nsdata, 1, &sc->nvstore);
+ pci_nvme_init_ctrldata(sc);
pci_nvme_init_logpages(sc);
pci_lintr_request(pi);
More information about the svn-src-stable
mailing list