git: 782105f7c898 - main - vtblk: Use busdma
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 18 Oct 2022 06:03:09 UTC
The branch main has been updated by cperciva: URL: https://cgit.FreeBSD.org/src/commit/?id=782105f7c89821552304245d71e107b867a3e934 commit 782105f7c89821552304245d71e107b867a3e934 Author: Colin Percival <cperciva@FreeBSD.org> AuthorDate: 2022-09-21 19:34:51 +0000 Commit: Colin Percival <cperciva@FreeBSD.org> CommitDate: 2022-10-18 06:02:21 +0000 vtblk: Use busdma We assume that bus addresses from busdma are the same thing as "physical" addresses in the Virtio specification; this seems to be true for the platforms on which Virtio is currently supported. For block devices which are limited to a single data segment per I/O, we request PAGE_SIZE alignment from busdma; this is necessary in order to support unaligned I/Os from userland, which may cross a boundary between two non-physically-contiguous pages. On devices which support more data segments per I/O, we retain the existing behaviour of limiting I/Os to (max data segs - 1) * PAGE_SIZE. Reviewed by: bryanv Sponsored by: https://www.patreon.com/cperciva Differential Revision: https://reviews.freebsd.org/D36667 --- sys/dev/virtio/block/virtio_blk.c | 161 ++++++++++++++++++++++++++++++++++---- 1 file changed, 144 insertions(+), 17 deletions(-) diff --git a/sys/dev/virtio/block/virtio_blk.c b/sys/dev/virtio/block/virtio_blk.c index 65f43c5de2d7..b3b3a6dd1aed 100644 --- a/sys/dev/virtio/block/virtio_blk.c +++ b/sys/dev/virtio/block/virtio_blk.c @@ -60,12 +60,14 @@ __FBSDID("$FreeBSD$"); struct vtblk_request { struct vtblk_softc *vbr_sc; + bus_dmamap_t vbr_mapp; /* Fields after this point are zeroed for each request. */ struct virtio_blk_outhdr vbr_hdr; struct bio *vbr_bp; uint8_t vbr_ack; uint8_t vbr_requeue_on_error; + uint8_t vbr_busdma_wait; int vbr_error; TAILQ_ENTRY(vtblk_request) vbr_link; }; @@ -86,9 +88,12 @@ struct vtblk_softc { #define VTBLK_FLAG_SUSPEND 0x0004 #define VTBLK_FLAG_BARRIER 0x0008 #define VTBLK_FLAG_WCE_CONFIG 0x0010 +#define VTBLK_FLAG_BUSDMA_WAIT 0x0020 +#define VTBLK_FLAG_BUSDMA_ALIGN 0x0040 struct virtqueue *vtblk_vq; struct sglist *vtblk_sglist; + bus_dma_tag_t vtblk_dmat; struct disk *vtblk_disk; struct bio_queue_head vtblk_bioq; @@ -166,7 +171,9 @@ static struct vtblk_request * vtblk_request_next(struct vtblk_softc *); static struct vtblk_request * vtblk_request_bio(struct vtblk_softc *); -static void vtblk_request_execute(struct vtblk_request *); +static int vtblk_request_execute(struct vtblk_request *, int); +static void vtblk_request_execute_cb(void *, + bus_dma_segment_t *, int, int); static int vtblk_request_error(struct vtblk_request *); static void vtblk_queue_completed(struct vtblk_softc *, @@ -363,6 +370,33 @@ vtblk_attach(device_t dev) goto fail; } + /* + * If vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1, the device only + * supports a single data segment; in that case we need busdma to + * align to a page boundary so we can send a *contiguous* page size + * request to the host. + */ + if (sc->vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1) + sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_ALIGN; + error = bus_dma_tag_create( + bus_get_dma_tag(dev), /* parent */ + (sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) ? PAGE_SIZE : 1, + 0, /* boundary */ + BUS_SPACE_MAXADDR, /* lowaddr */ + BUS_SPACE_MAXADDR, /* highaddr */ + NULL, NULL, /* filter, filterarg */ + maxphys, /* max request size */ + sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS, /* max # segments */ + maxphys, /* maxsegsize */ + 0, /* flags */ + busdma_lock_mutex, /* lockfunc */ + &sc->vtblk_mtx, /* lockarg */ + &sc->vtblk_dmat); + if (error) { + device_printf(dev, "cannot create bus dma tag\n"); + goto fail; + } + error = vtblk_alloc_virtqueue(sc); if (error) { device_printf(dev, "cannot allocate virtqueue\n"); @@ -412,6 +446,11 @@ vtblk_detach(device_t dev) sc->vtblk_disk = NULL; } + if (sc->vtblk_dmat != NULL) { + bus_dma_tag_destroy(sc->vtblk_dmat); + sc->vtblk_dmat = NULL; + } + if (sc->vtblk_sglist != NULL) { sglist_free(sc->vtblk_sglist); sc->vtblk_sglist = NULL; @@ -737,13 +776,14 @@ vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg) * which is typically greater than maxphys. Eventually we should * just advertise maxphys and split buffers that are too big. * - * Note we must subtract one additional segment in case of non - * page aligned buffers. + * If we're not asking busdma to align data to page boundaries, the + * maximum I/O size is reduced by PAGE_SIZE in order to accommodate + * unaligned I/Os. */ - dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) * + dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS) * PAGE_SIZE; - if (dp->d_maxsize < PAGE_SIZE) - dp->d_maxsize = PAGE_SIZE; /* XXX */ + if ((sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) == 0) + dp->d_maxsize -= PAGE_SIZE; if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) { dp->d_fwsectors = blkcfg->geometry.sectors; @@ -809,6 +849,10 @@ vtblk_request_prealloc(struct vtblk_softc *sc) return (ENOMEM); req->vbr_sc = sc; + if (bus_dmamap_create(sc->vtblk_dmat, 0, &req->vbr_mapp)) { + free(req, M_DEVBUF); + return (ENOMEM); + } MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1); MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1); @@ -829,6 +873,7 @@ vtblk_request_free(struct vtblk_softc *sc) while ((req = vtblk_request_dequeue(sc)) != NULL) { sc->vtblk_request_count--; + bus_dmamap_destroy(sc->vtblk_dmat, req->vbr_mapp); free(req, M_DEVBUF); } @@ -938,15 +983,43 @@ vtblk_request_bio(struct vtblk_softc *sc) return (req); } +static int +vtblk_request_execute(struct vtblk_request *req, int flags) +{ + struct vtblk_softc *sc = req->vbr_sc; + struct bio *bp = req->vbr_bp; + int error = 0; + + /* + * Call via bus_dmamap_load_bio or directly depending on whether we + * have a buffer we need to map. + */ + if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { + error = bus_dmamap_load_bio(sc->vtblk_dmat, req->vbr_mapp, + req->vbr_bp, vtblk_request_execute_cb, req, flags); + if (error == EINPROGRESS) { + req->vbr_busdma_wait = 1; + sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_WAIT; + } + } else { + vtblk_request_execute_cb(req, NULL, 0, 0); + } + + return (error ? error : req->vbr_error); +} + static void -vtblk_request_execute(struct vtblk_request *req) +vtblk_request_execute_cb(void * callback_arg, bus_dma_segment_t * segs, + int nseg, int error) { + struct vtblk_request *req; struct vtblk_softc *sc; struct virtqueue *vq; struct sglist *sg; struct bio *bp; - int ordered, readable, writable, error; + int ordered, readable, writable, i; + req = (struct vtblk_request *)callback_arg; sc = req->vbr_sc; vq = sc->vtblk_vq; sg = sc->vtblk_sglist; @@ -954,6 +1027,20 @@ vtblk_request_execute(struct vtblk_request *req) ordered = 0; writable = 0; + /* + * If we paused request queueing while we waited for busdma to call us + * asynchronously, unpause it now; this request made it through so we + * don't need to worry about others getting ahead of us. (Note that we + * hold the device mutex so nothing will happen until after we return + * anyway.) + */ + if (req->vbr_busdma_wait) + sc->vtblk_flags &= ~VTBLK_FLAG_BUSDMA_WAIT; + + /* Fail on errors from busdma. */ + if (error) + goto out1; + /* * Some hosts (such as bhyve) do not implement the barrier feature, * so we emulate it in the driver by allowing the barrier request @@ -979,10 +1066,18 @@ vtblk_request_execute(struct vtblk_request *req) sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr)); if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { - error = sglist_append_bio(sg, bp); - if (error || sg->sg_nseg == sg->sg_maxseg) { - panic("%s: bio %p data buffer too big %d", - __func__, bp, error); + /* + * We cast bus_addr_t to vm_paddr_t here; this isn't valid on + * all platforms, but we believe it works on all platforms + * which are supported by virtio. + */ + for (i = 0; i < nseg; i++) { + error = sglist_append_phys(sg, + (vm_paddr_t)segs[i].ds_addr, segs[i].ds_len); + if (error || sg->sg_nseg == sg->sg_maxseg) { + panic("%s: bio %p data buffer too big %d", + __func__, bp, error); + } } /* BIO_READ means the host writes into our buffer. */ @@ -1011,11 +1106,33 @@ vtblk_request_execute(struct vtblk_request *req) sglist_append(sg, &req->vbr_ack, sizeof(uint8_t)); readable = sg->sg_nseg - writable; + switch (bp->bio_cmd) { + case BIO_READ: + bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp, + BUS_DMASYNC_PREREAD); + break; + case BIO_WRITE: + bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp, + BUS_DMASYNC_PREWRITE); + break; + } + error = virtqueue_enqueue(vq, req, sg, readable, writable); if (error == 0 && ordered) sc->vtblk_req_ordered = req; + /* + * If we were called asynchronously, we need to notify the queue that + * we've added a new request, since the notification from startio was + * performed already. + */ + if (error == 0 && req->vbr_busdma_wait) + virtqueue_notify(vq); + out: + if (error) + bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp); +out1: if (error && req->vbr_requeue_on_error) vtblk_request_requeue_ready(sc, req); req->vbr_error = error; @@ -1054,6 +1171,18 @@ vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue) } bp = req->vbr_bp; + switch (bp->bio_cmd) { + case BIO_READ: + bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp, + BUS_DMASYNC_POSTREAD); + bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp); + break; + case BIO_WRITE: + bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp, + BUS_DMASYNC_POSTWRITE); + bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp); + break; + } bp->bio_error = vtblk_request_error(req); TAILQ_INSERT_TAIL(queue, bp, bio_queue); @@ -1135,7 +1264,7 @@ vtblk_startio(struct vtblk_softc *sc) vq = sc->vtblk_vq; enq = 0; - if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND) + if (sc->vtblk_flags & (VTBLK_FLAG_SUSPEND | VTBLK_FLAG_BUSDMA_WAIT)) return; while (!virtqueue_full(vq)) { @@ -1144,8 +1273,7 @@ vtblk_startio(struct vtblk_softc *sc) break; req->vbr_requeue_on_error = 1; - vtblk_request_execute(req); - if (req->vbr_error != 0) + if (vtblk_request_execute(req, BUS_DMA_WAITOK)) break; enq++; @@ -1280,8 +1408,7 @@ vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req) if (!virtqueue_empty(vq)) return (EBUSY); - vtblk_request_execute(req); - error = req->vbr_error; + error = vtblk_request_execute(req, BUS_DMA_NOWAIT); if (error) return (error);