svn commit: r241661 - head/sys/dev/nvme
Jim Harris
jimharris at FreeBSD.org
Thu Oct 18 00:40:42 UTC 2012
Author: jimharris
Date: Thu Oct 18 00:40:40 2012
New Revision: 241661
URL: http://svn.freebsd.org/changeset/base/241661
Log:
Cleanup uio-related code to use struct nvme_request and
nvme_ctrlr_submit_io_request().
While here, also fix case where a uio may have more than 1 iovec.
NVMe's definition of SGEs (called PRPs) only allows for the first SGE to
start on a non-page boundary. The simplest way to handle this is to
construct a temporary uio for each iovec, and submit an NVMe request
for each.
Sponsored by: Intel
Modified:
head/sys/dev/nvme/nvme.c
head/sys/dev/nvme/nvme_ctrlr.c
head/sys/dev/nvme/nvme_private.h
head/sys/dev/nvme/nvme_qpair.c
head/sys/dev/nvme/nvme_uio.c
Modified: head/sys/dev/nvme/nvme.c
==============================================================================
--- head/sys/dev/nvme/nvme.c Thu Oct 18 00:39:29 2012 (r241660)
+++ head/sys/dev/nvme/nvme.c Thu Oct 18 00:40:40 2012 (r241661)
@@ -230,15 +230,11 @@ nvme_dump_completion(struct nvme_complet
void
nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg, int error)
{
- struct nvme_tracker *tr;
- struct nvme_qpair *qpair;
+ struct nvme_tracker *tr = arg;
uint32_t cur_nseg;
KASSERT(error == 0, ("nvme_payload_map error != 0\n"));
- tr = (struct nvme_tracker *)arg;
- qpair = tr->qpair;
-
/*
* Note that we specified PAGE_SIZE for alignment and max
* segment size when creating the bus dma tags. So here
@@ -259,7 +255,7 @@ nvme_payload_map(void *arg, bus_dma_segm
}
}
- nvme_qpair_submit_cmd(qpair, tr);
+ nvme_qpair_submit_cmd(tr->qpair, tr);
}
static int
Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c Thu Oct 18 00:39:29 2012 (r241660)
+++ head/sys/dev/nvme/nvme_ctrlr.c Thu Oct 18 00:40:40 2012 (r241661)
@@ -833,12 +833,21 @@ nvme_ctrlr_submit_io_request(struct nvme
tr->req = req;
- if (req->payload_size > 0) {
- err = bus_dmamap_load(tr->qpair->dma_tag, tr->payload_dma_map,
- req->payload, req->payload_size,
- nvme_payload_map, tr, 0);
+ if (req->uio == NULL) {
+ if (req->payload_size > 0) {
+ err = bus_dmamap_load(tr->qpair->dma_tag,
+ tr->payload_dma_map, req->payload,
+ req->payload_size,
+ nvme_payload_map, tr, 0);
+ if (err != 0)
+ panic("bus_dmamap_load returned non-zero!\n");
+ } else
+ nvme_qpair_submit_cmd(tr->qpair, tr);
+ } else {
+ err = bus_dmamap_load_uio(tr->qpair->dma_tag,
+ tr->payload_dma_map, req->uio,
+ nvme_payload_map_uio, tr, 0);
if (err != 0)
panic("bus_dmamap_load returned non-zero!\n");
- } else
- nvme_qpair_submit_cmd(tr->qpair, tr);
+ }
}
Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h Thu Oct 18 00:39:29 2012 (r241660)
+++ head/sys/dev/nvme/nvme_private.h Thu Oct 18 00:40:40 2012 (r241661)
@@ -101,6 +101,7 @@ struct nvme_request {
struct nvme_command cmd;
void *payload;
uint32_t payload_size;
+ struct uio *uio;
nvme_cb_fn_t cb_fn;
void *cb_arg;
SLIST_ENTRY(nvme_request) slist;
@@ -333,6 +334,8 @@ void nvme_ctrlr_cmd_asynchronous_event_r
void nvme_payload_map(void *arg, bus_dma_segment_t *seg, int nseg,
int error);
+void nvme_payload_map_uio(void *arg, bus_dma_segment_t *seg, int nseg,
+ bus_size_t mapsize, int error);
int nvme_ctrlr_construct(struct nvme_controller *ctrlr, device_t dev);
int nvme_ctrlr_reset(struct nvme_controller *ctrlr);
@@ -392,6 +395,22 @@ nvme_allocate_request(void *payload, uin
return (req);
}
+static __inline struct nvme_request *
+nvme_allocate_request_uio(struct uio *uio, nvme_cb_fn_t cb_fn, void *cb_arg)
+{
+ struct nvme_request *req;
+
+ req = uma_zalloc(nvme_request_zone, M_NOWAIT | M_ZERO);
+ if (req == NULL)
+ return (NULL);
+
+ req->uio = uio;
+ req->cb_fn = cb_fn;
+ req->cb_arg = cb_arg;
+
+ return (req);
+}
+
#define nvme_free_request(req) uma_zfree(nvme_request_zone, req)
#endif /* __NVME_PRIVATE_H__ */
Modified: head/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- head/sys/dev/nvme/nvme_qpair.c Thu Oct 18 00:39:29 2012 (r241660)
+++ head/sys/dev/nvme/nvme_qpair.c Thu Oct 18 00:40:40 2012 (r241661)
@@ -163,7 +163,7 @@ nvme_qpair_process_completions(struct nv
/* nvme_qpair_submit_cmd() will release the lock. */
nvme_qpair_submit_cmd(qpair, tr);
else {
- if (req->payload_size > 0)
+ if (req->payload_size > 0 || req->uio != NULL)
bus_dmamap_unload(qpair->dma_tag,
tr->payload_dma_map);
Modified: head/sys/dev/nvme/nvme_uio.c
==============================================================================
--- head/sys/dev/nvme/nvme_uio.c Thu Oct 18 00:39:29 2012 (r241660)
+++ head/sys/dev/nvme/nvme_uio.c Thu Oct 18 00:40:40 2012 (r241661)
@@ -38,8 +38,10 @@ static void
nvme_uio_done(void *arg, const struct nvme_completion *status)
{
struct mtx *mtx;
+ struct uio *uio = arg;
- /* TODO: update uio flags based on status */
+ if (status->sf_sc == 0 && status->sf_sct == 0)
+ uio->uio_resid = 0;
mtx = mtx_pool_find(mtxpool_sleep, arg);
mtx_lock(mtx);
@@ -47,33 +49,17 @@ nvme_uio_done(void *arg, const struct nv
mtx_unlock(mtx);
}
-static struct nvme_tracker *
-nvme_allocate_tracker_uio(struct nvme_controller *ctrlr, struct uio *uio,
- struct nvme_request *req)
-{
- struct nvme_tracker *tr;
- struct nvme_qpair *qpair;
-
- if (ctrlr->per_cpu_io_queues)
- qpair = &ctrlr->ioq[curcpu];
- else
- qpair = &ctrlr->ioq[0];
-
- tr = nvme_qpair_allocate_tracker(qpair);
-
- if (tr == NULL)
- return (NULL);
-
- tr->qpair = qpair;
- tr->req = req;
-
- return (tr);
-}
-
-static void
+void
nvme_payload_map_uio(void *arg, bus_dma_segment_t *seg, int nseg,
bus_size_t mapsize, int error)
{
+ struct nvme_tracker *tr = arg;
+
+ /*
+ * Now that we know the actual size of the uio, divide it by the
+ * sector size that we stored in cdw12.
+ */
+ tr->req->cmd.cdw12 = (mapsize / tr->req->cmd.cdw12)-1;
nvme_payload_map(arg, seg, nseg, error);
}
@@ -81,20 +67,12 @@ static int
nvme_read_uio(struct nvme_namespace *ns, struct uio *uio)
{
struct nvme_request *req;
- struct nvme_tracker *tr;
struct nvme_command *cmd;
- int err, i;
- uint64_t lba, iosize = 0;
-
- for (i = 0; i < uio->uio_iovcnt; i++) {
- iosize += uio->uio_iov[i].iov_len;
- }
-
- req = nvme_allocate_request(NULL, iosize, nvme_uio_done, uio);
+ uint64_t lba;
- tr = nvme_allocate_tracker_uio(ns->ctrlr, uio, req);
+ req = nvme_allocate_request_uio(uio, nvme_uio_done, uio);
- if (tr == NULL)
+ if (req == NULL)
return (ENOMEM);
cmd = &req->cmd;
@@ -103,13 +81,16 @@ nvme_read_uio(struct nvme_namespace *ns,
lba = uio->uio_offset / nvme_ns_get_sector_size(ns);
*(uint64_t *)&cmd->cdw10 = lba;
+ /*
+ * Store the sector size in cdw12 (where the LBA count normally goes).
+ * We'll adjust cdw12 in the map_uio callback based on the mapsize
+ * parameter. This allows us to not have to store the namespace
+ * in the request simply to get the sector size in the map_uio
+ * callback.
+ */
+ cmd->cdw12 = nvme_ns_get_sector_size(ns);
- cmd->cdw12 = (iosize / nvme_ns_get_sector_size(ns))-1;
-
- err = bus_dmamap_load_uio(tr->qpair->dma_tag, tr->payload_dma_map, uio,
- nvme_payload_map_uio, tr, 0);
-
- KASSERT(err == 0, ("bus_dmamap_load_uio returned non-zero!\n"));
+ nvme_ctrlr_submit_io_request(ns->ctrlr, req);
return (0);
}
@@ -118,20 +99,12 @@ static int
nvme_write_uio(struct nvme_namespace *ns, struct uio *uio)
{
struct nvme_request *req;
- struct nvme_tracker *tr;
struct nvme_command *cmd;
- int err, i;
- uint64_t lba, iosize = 0;
+ uint64_t lba;
- for (i = 0; i < uio->uio_iovcnt; i++) {
- iosize += uio->uio_iov[i].iov_len;
- }
-
- req = nvme_allocate_request(NULL, iosize, nvme_uio_done, uio);
-
- tr = nvme_allocate_tracker_uio(ns->ctrlr, uio, req);
+ req = nvme_allocate_request_uio(uio, nvme_uio_done, uio);
- if (tr == NULL)
+ if (req == NULL)
return (ENOMEM);
cmd = &req->cmd;
@@ -140,13 +113,16 @@ nvme_write_uio(struct nvme_namespace *ns
lba = uio->uio_offset / nvme_ns_get_sector_size(ns);
*(uint64_t *)&cmd->cdw10 = lba;
+ /*
+ * Store the sector size in cdw12 (where the LBA count normally goes).
+ * We'll adjust cdw12 in the map_uio callback based on the mapsize
+ * parameter. This allows us to not have to store the namespace
+ * in the request simply to get the sector size in the map_uio
+ * callback.
+ */
+ cmd->cdw12 = nvme_ns_get_sector_size(ns);
- cmd->cdw12 = (iosize / nvme_ns_get_sector_size(ns))-1;
-
- err = bus_dmamap_load_uio(tr->qpair->dma_tag, tr->payload_dma_map, uio,
- nvme_payload_map_uio, tr, 0);
-
- KASSERT(err == 0, ("bus_dmamap_load_uio returned non-zero!\n"));
+ nvme_ctrlr_submit_io_request(ns->ctrlr, req);
return (0);
}
@@ -154,9 +130,11 @@ nvme_write_uio(struct nvme_namespace *ns
int
nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag)
{
+ struct uio uio_tmp;
+ struct iovec uio_iov_tmp;
struct nvme_namespace *ns;
struct mtx *mtx;
- int err;
+ int i, nvme_err, physio_err = 0;
#if __FreeBSD_version > 900017
int ref;
#endif
@@ -164,7 +142,7 @@ nvme_ns_physio(struct cdev *dev, struct
PHOLD(curproc);
ns = dev->si_drv1;
- mtx = mtx_pool_find(mtxpool_sleep, uio);
+ mtx = mtx_pool_find(mtxpool_sleep, &uio_tmp);
#if __FreeBSD_version > 900017
dev_refthread(dev, &ref);
@@ -172,15 +150,48 @@ nvme_ns_physio(struct cdev *dev, struct
dev_refthread(dev);
#endif
- mtx_lock(mtx);
- if (uio->uio_rw == UIO_READ)
- err = nvme_read_uio(ns, uio);
- else
- err = nvme_write_uio(ns, uio);
+ /*
+ * NVM Express doesn't really support true SGLs. All SG elements
+ * must be PAGE_SIZE, except for the first and last element.
+ * Because of this, we need to break up each iovec into a separate
+ * NVMe command - otherwise we could end up with sub-PAGE_SIZE
+ * elements in the middle of an SGL which is not allowed.
+ */
+ uio_tmp.uio_iov = &uio_iov_tmp;
+ uio_tmp.uio_iovcnt = 1;
+ uio_tmp.uio_offset = uio->uio_offset;
+ uio_tmp.uio_segflg = uio->uio_segflg;
+ uio_tmp.uio_rw = uio->uio_rw;
+ uio_tmp.uio_td = uio->uio_td;
- if (err == 0)
- msleep(uio, mtx, PRIBIO, "nvme_physio", 0);
- mtx_unlock(mtx);
+ for (i = 0; i < uio->uio_iovcnt; i++) {
+
+ uio_iov_tmp.iov_base = uio->uio_iov[i].iov_base;
+ uio_iov_tmp.iov_len = uio->uio_iov[i].iov_len;
+ uio_tmp.uio_resid = uio_iov_tmp.iov_len;
+
+ mtx_lock(mtx);
+
+ if (uio->uio_rw == UIO_READ)
+ nvme_err = nvme_read_uio(ns, &uio_tmp);
+ else
+ nvme_err = nvme_write_uio(ns, &uio_tmp);
+
+ if (nvme_err == 0)
+ msleep(&uio_tmp, mtx, PRIBIO, "nvme_physio", 0);
+
+ mtx_unlock(mtx);
+
+ if (uio_tmp.uio_resid == 0) {
+ uio->uio_resid -= uio_iov_tmp.iov_len;
+ uio->uio_offset += uio_iov_tmp.iov_len;
+ } else {
+ physio_err = EFAULT;
+ break;
+ }
+
+ uio_tmp.uio_offset += uio_iov_tmp.iov_len;
+ }
#if __FreeBSD_version > 900017
dev_relthread(dev, ref);
@@ -188,9 +199,6 @@ nvme_ns_physio(struct cdev *dev, struct
dev_relthread(dev);
#endif
- if (err == 0)
- uio->uio_resid = 0;
-
PRELE(curproc);
- return (0);
+ return (physio_err);
}
More information about the svn-src-all
mailing list