svn commit: r276487 - stable/10/sys/dev/virtio/block

Bryan Venteicher bryanv at FreeBSD.org
Thu Jan 1 01:43:01 UTC 2015


Author: bryanv
Date: Thu Jan  1 01:43:00 2015
New Revision: 276487
URL: https://svnweb.freebsd.org/changeset/base/276487

Log:
  MFC r275335:
  
    Cleanup and performance improvement of the virtio_blk driver
  
      - Add support for GEOM direct completion. Depending on the benchmark,
        this tends to give a ~30% improvement w.r.t IOPs and BW.
      - Remove an invariants check in the strategy routine. This assertion
        is caught later on by an existing panic.
      - Rename and resort various related functions to make more sense.

Modified:
  stable/10/sys/dev/virtio/block/virtio_blk.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/dev/virtio/block/virtio_blk.c
==============================================================================
--- stable/10/sys/dev/virtio/block/virtio_blk.c	Wed Dec 31 23:25:37 2014	(r276486)
+++ stable/10/sys/dev/virtio/block/virtio_blk.c	Thu Jan  1 01:43:00 2015	(r276487)
@@ -58,7 +58,6 @@ struct vtblk_request {
 	struct virtio_blk_outhdr	 vbr_hdr;
 	struct bio			*vbr_bp;
 	uint8_t				 vbr_ack;
-
 	TAILQ_ENTRY(vtblk_request)	 vbr_link;
 };
 
@@ -132,53 +131,60 @@ static int	vtblk_dump(void *, void *, vm
 static void	vtblk_strategy(struct bio *);
 
 static void	vtblk_negotiate_features(struct vtblk_softc *);
+static void	vtblk_setup_features(struct vtblk_softc *);
 static int	vtblk_maximum_segments(struct vtblk_softc *,
 		    struct virtio_blk_config *);
 static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
 static void	vtblk_resize_disk(struct vtblk_softc *, uint64_t);
-static void	vtblk_set_write_cache(struct vtblk_softc *, int);
-static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
-		    struct virtio_blk_config *);
-static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
 static void	vtblk_alloc_disk(struct vtblk_softc *,
 		    struct virtio_blk_config *);
 static void	vtblk_create_disk(struct vtblk_softc *);
 
-static int	vtblk_quiesce(struct vtblk_softc *);
-static void	vtblk_startio(struct vtblk_softc *);
-static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *);
-static int	vtblk_execute_request(struct vtblk_softc *,
+static int	vtblk_request_prealloc(struct vtblk_softc *);
+static void	vtblk_request_free(struct vtblk_softc *);
+static struct vtblk_request *
+		vtblk_request_dequeue(struct vtblk_softc *);
+static void	vtblk_request_enqueue(struct vtblk_softc *,
 		    struct vtblk_request *);
+static struct vtblk_request *
+		vtblk_request_next_ready(struct vtblk_softc *);
+static void	vtblk_request_requeue_ready(struct vtblk_softc *,
+		    struct vtblk_request *);
+static struct vtblk_request *
+		vtblk_request_next(struct vtblk_softc *);
+static struct vtblk_request *
+		vtblk_request_bio(struct vtblk_softc *);
+static int	vtblk_request_execute(struct vtblk_softc *,
+		    struct vtblk_request *);
+static int	vtblk_request_error(struct vtblk_request *);
 
-static void	vtblk_vq_intr(void *);
+static void	vtblk_queue_completed(struct vtblk_softc *,
+		    struct bio_queue *);
+static void	vtblk_done_completed(struct vtblk_softc *,
+		    struct bio_queue *);
+static void	vtblk_drain_vq(struct vtblk_softc *, int);
+static void	vtblk_drain(struct vtblk_softc *);
 
-static void	vtblk_stop(struct vtblk_softc *);
+static void	vtblk_startio(struct vtblk_softc *);
+static void	vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
 
 static void	vtblk_read_config(struct vtblk_softc *,
 		    struct virtio_blk_config *);
-static void	vtblk_get_ident(struct vtblk_softc *);
-static void	vtblk_prepare_dump(struct vtblk_softc *);
-static int	vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t);
-static int	vtblk_flush_dump(struct vtblk_softc *);
+static void	vtblk_ident(struct vtblk_softc *);
 static int	vtblk_poll_request(struct vtblk_softc *,
 		    struct vtblk_request *);
+static int	vtblk_quiesce(struct vtblk_softc *);
+static void	vtblk_vq_intr(void *);
+static void	vtblk_stop(struct vtblk_softc *);
 
-static void	vtblk_finish_completed(struct vtblk_softc *);
-static void	vtblk_drain_vq(struct vtblk_softc *, int);
-static void	vtblk_drain(struct vtblk_softc *);
-
-static int	vtblk_alloc_requests(struct vtblk_softc *);
-static void	vtblk_free_requests(struct vtblk_softc *);
-static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *);
-static void	vtblk_enqueue_request(struct vtblk_softc *,
-		    struct vtblk_request *);
-
-static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *);
-static void	vtblk_enqueue_ready(struct vtblk_softc *,
-		    struct vtblk_request *);
+static void	vtblk_dump_prepare(struct vtblk_softc *);
+static int	vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
+static int	vtblk_dump_flush(struct vtblk_softc *);
 
-static int	vtblk_request_error(struct vtblk_request *);
-static void	vtblk_finish_bio(struct bio *, int);
+static void	vtblk_set_write_cache(struct vtblk_softc *, int);
+static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
+		    struct virtio_blk_config *);
+static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
 
 static void	vtblk_setup_sysctl(struct vtblk_softc *);
 static int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
@@ -288,30 +294,18 @@ vtblk_attach(device_t dev)
 	struct virtio_blk_config blkcfg;
 	int error;
 
+	virtio_set_feature_desc(dev, vtblk_feature_desc);
+
 	sc = device_get_softc(dev);
 	sc->vtblk_dev = dev;
-
 	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
-
 	bioq_init(&sc->vtblk_bioq);
 	TAILQ_INIT(&sc->vtblk_req_free);
 	TAILQ_INIT(&sc->vtblk_req_ready);
 
-	virtio_set_feature_desc(dev, vtblk_feature_desc);
-	vtblk_negotiate_features(sc);
-
-	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
-		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
-	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
-		sc->vtblk_flags |= VTBLK_FLAG_READONLY;
-	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
-		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
-	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
-		sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
-
 	vtblk_setup_sysctl(sc);
+	vtblk_setup_features(sc);
 
-	/* Get local copy of config. */
 	vtblk_read_config(sc, &blkcfg);
 
 	/*
@@ -350,7 +344,7 @@ vtblk_attach(device_t dev)
 		goto fail;
 	}
 
-	error = vtblk_alloc_requests(sc);
+	error = vtblk_request_prealloc(sc);
 	if (error) {
 		device_printf(dev, "cannot preallocate requests\n");
 		goto fail;
@@ -517,14 +511,14 @@ vtblk_dump(void *arg, void *virtual, vm_
 	VTBLK_LOCK(sc);
 
 	if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
-		vtblk_prepare_dump(sc);
+		vtblk_dump_prepare(sc);
 		sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
 	}
 
 	if (length > 0)
-		error = vtblk_write_dump(sc, virtual, offset, length);
+		error = vtblk_dump_write(sc, virtual, offset, length);
 	else if (virtual == NULL && offset == 0)
-		error = vtblk_flush_dump(sc);
+		error = vtblk_dump_flush(sc);
 	else {
 		error = EINVAL;
 		sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING;
@@ -541,7 +535,7 @@ vtblk_strategy(struct bio *bp)
 	struct vtblk_softc *sc;
 
 	if ((sc = bp->bio_disk->d_drv1) == NULL) {
-		vtblk_finish_bio(bp, EINVAL);
+		vtblk_bio_done(NULL, bp, EINVAL);
 		return;
 	}
 
@@ -551,37 +545,21 @@ vtblk_strategy(struct bio *bp)
 	 */
 	if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
 	    (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
-		vtblk_finish_bio(bp, EROFS);
+		vtblk_bio_done(sc, bp, EROFS);
 		return;
 	}
 
-#ifdef INVARIANTS
-	/*
-	 * Prevent read/write buffers spanning too many segments from
-	 * getting into the queue. This should only trip if d_maxsize
-	 * was incorrectly set.
-	 */
-	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
-		int nsegs, max_nsegs;
-
-		nsegs = sglist_count(bp->bio_data, bp->bio_bcount);
-		max_nsegs = sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS;
+	VTBLK_LOCK(sc);
 
-		KASSERT(nsegs <= max_nsegs,
-		    ("%s: bio %p spanned too many segments: %d, max: %d",
-		    __func__, bp, nsegs, max_nsegs));
+	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
+		VTBLK_UNLOCK(sc);
+		vtblk_bio_done(sc, bp, ENXIO);
+		return;
 	}
-#endif
 
-	VTBLK_LOCK(sc);
-	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
-		vtblk_finish_bio(bp, ENXIO);
-	else {
-		bioq_insert_tail(&sc->vtblk_bioq, bp);
+	bioq_insert_tail(&sc->vtblk_bioq, bp);
+	vtblk_startio(sc);
 
-		if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0)
-			vtblk_startio(sc);
-	}
 	VTBLK_UNLOCK(sc);
 }
 
@@ -597,6 +575,25 @@ vtblk_negotiate_features(struct vtblk_so
 	sc->vtblk_features = virtio_negotiate_features(dev, features);
 }
 
+static void
+vtblk_setup_features(struct vtblk_softc *sc)
+{
+	device_t dev;
+
+	dev = sc->vtblk_dev;
+
+	vtblk_negotiate_features(sc);
+
+	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
+		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
+	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
+		sc->vtblk_flags |= VTBLK_FLAG_READONLY;
+	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
+		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
+	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
+		sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
+}
+
 static int
 vtblk_maximum_segments(struct vtblk_softc *sc,
     struct virtio_blk_config *blkcfg)
@@ -658,59 +655,6 @@ vtblk_resize_disk(struct vtblk_softc *sc
 }
 
 static void
-vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
-{
-
-	/* Set either writeback (1) or writethrough (0) mode. */
-	virtio_write_dev_config_1(sc->vtblk_dev,
-	    offsetof(struct virtio_blk_config, writeback), wc);
-}
-
-static int
-vtblk_write_cache_enabled(struct vtblk_softc *sc,
-    struct virtio_blk_config *blkcfg)
-{
-	int wc;
-
-	if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) {
-		wc = vtblk_tunable_int(sc, "writecache_mode",
-		    vtblk_writecache_mode);
-		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
-			vtblk_set_write_cache(sc, wc);
-		else
-			wc = blkcfg->writeback;
-	} else
-		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);
-
-	return (wc);
-}
-
-static int
-vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
-{
-	struct vtblk_softc *sc;
-	int wc, error;
-
-	sc = oidp->oid_arg1;
-	wc = sc->vtblk_write_cache;
-
-	error = sysctl_handle_int(oidp, &wc, 0, req);
-	if (error || req->newptr == NULL)
-		return (error);
-	if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0)
-		return (EPERM);
-	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
-		return (EINVAL);
-
-	VTBLK_LOCK(sc);
-	sc->vtblk_write_cache = wc;
-	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
-	VTBLK_UNLOCK(sc);
-
-	return (0);
-}
-
-static void
 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
 {
 	device_t dev;
@@ -726,7 +670,8 @@ vtblk_alloc_disk(struct vtblk_softc *sc,
 	dp->d_name = VTBLK_DISK_NAME;
 	dp->d_unit = device_get_unit(dev);
 	dp->d_drv1 = sc;
-	dp->d_flags = DISKFLAG_CANFLUSHCACHE | DISKFLAG_UNMAPPED_BIO;
+	dp->d_flags = DISKFLAG_CANFLUSHCACHE | DISKFLAG_UNMAPPED_BIO |
+	    DISKFLAG_DIRECT_COMPLETION;
 	dp->d_hba_vendor = virtio_get_vendor(dev);
 	dp->d_hba_device = virtio_get_device(dev);
 	dp->d_hba_subvendor = virtio_get_subvendor(dev);
@@ -787,11 +732,7 @@ vtblk_create_disk(struct vtblk_softc *sc
 
 	dp = sc->vtblk_disk;
 
-	/*
-	 * Retrieving the identification string must be done after
-	 * the virtqueue interrupt is setup otherwise it will hang.
-	 */
-	vtblk_get_ident(sc);
+	vtblk_ident(sc);
 
 	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
 	    (uintmax_t) dp->d_mediasize >> 20,
@@ -802,57 +743,107 @@ vtblk_create_disk(struct vtblk_softc *sc
 }
 
 static int
-vtblk_quiesce(struct vtblk_softc *sc)
+vtblk_request_prealloc(struct vtblk_softc *sc)
 {
-	int error;
+	struct vtblk_request *req;
+	int i, nreqs;
 
-	error = 0;
+	nreqs = virtqueue_size(sc->vtblk_vq);
 
-	VTBLK_LOCK_ASSERT(sc);
+	/*
+	 * Preallocate sufficient requests to keep the virtqueue full. Each
+	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
+	 * the number allocated when indirect descriptors are not available.
+	 */
+	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
+		nreqs /= VTBLK_MIN_SEGMENTS;
 
-	while (!virtqueue_empty(sc->vtblk_vq)) {
-		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
-		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
-			error = EBUSY;
-			break;
-		}
+	for (i = 0; i < nreqs; i++) {
+		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
+		if (req == NULL)
+			return (ENOMEM);
+
+		MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
+		MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
+
+		sc->vtblk_request_count++;
+		vtblk_request_enqueue(sc, req);
 	}
 
-	return (error);
+	return (0);
 }
 
 static void
-vtblk_startio(struct vtblk_softc *sc)
+vtblk_request_free(struct vtblk_softc *sc)
 {
-	struct virtqueue *vq;
 	struct vtblk_request *req;
-	int enq;
 
-	vq = sc->vtblk_vq;
-	enq = 0;
+	MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
 
-	VTBLK_LOCK_ASSERT(sc);
+	while ((req = vtblk_request_dequeue(sc)) != NULL) {
+		sc->vtblk_request_count--;
+		free(req, M_DEVBUF);
+	}
 
-	while (!virtqueue_full(vq)) {
-		if ((req = vtblk_dequeue_ready(sc)) == NULL)
-			req = vtblk_bio_request(sc);
-		if (req == NULL)
-			break;
+	KASSERT(sc->vtblk_request_count == 0,
+	    ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
+}
 
-		if (vtblk_execute_request(sc, req) != 0) {
-			vtblk_enqueue_ready(sc, req);
-			break;
-		}
+static struct vtblk_request *
+vtblk_request_dequeue(struct vtblk_softc *sc)
+{
+	struct vtblk_request *req;
 
-		enq++;
+	req = TAILQ_FIRST(&sc->vtblk_req_free);
+	if (req != NULL) {
+		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
+		bzero(req, sizeof(struct vtblk_request));
 	}
 
-	if (enq > 0)
-		virtqueue_notify(vq);
+	return (req);
+}
+
+static void
+vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
+{
+
+	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
+}
+
+static struct vtblk_request *
+vtblk_request_next_ready(struct vtblk_softc *sc)
+{
+	struct vtblk_request *req;
+
+	req = TAILQ_FIRST(&sc->vtblk_req_ready);
+	if (req != NULL)
+		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
+
+	return (req);
+}
+
+static void
+vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
+{
+
+	/* NOTE: Currently, there will be at most one request in the queue. */
+	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
+}
+
+static struct vtblk_request *
+vtblk_request_next(struct vtblk_softc *sc)
+{
+	struct vtblk_request *req;
+
+	req = vtblk_request_next_ready(sc);
+	if (req != NULL)
+		return (req);
+
+	return (vtblk_request_bio(sc));
 }
 
 static struct vtblk_request *
-vtblk_bio_request(struct vtblk_softc *sc)
+vtblk_request_bio(struct vtblk_softc *sc)
 {
 	struct bio_queue_head *bioq;
 	struct vtblk_request *req;
@@ -863,7 +854,7 @@ vtblk_bio_request(struct vtblk_softc *sc
 	if (bioq_first(bioq) == NULL)
 		return (NULL);
 
-	req = vtblk_dequeue_request(sc);
+	req = vtblk_request_dequeue(sc);
 	if (req == NULL)
 		return (NULL);
 
@@ -888,11 +879,14 @@ vtblk_bio_request(struct vtblk_softc *sc
 		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
 	}
 
+	if (bp->bio_flags & BIO_ORDERED)
+		req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
+
 	return (req);
 }
 
 static int
-vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
+vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
 {
 	struct virtqueue *vq;
 	struct sglist *sg;
@@ -905,26 +899,20 @@ vtblk_execute_request(struct vtblk_softc
 	ordered = 0;
 	writable = 0;
 
-	VTBLK_LOCK_ASSERT(sc);
-
 	/*
-	 * Wait until the ordered request completes before
-	 * executing subsequent requests.
+	 * Some hosts (such as bhyve) do not implement the barrier feature,
+	 * so we emulate it in the driver by allowing the barrier request
+	 * to be the only one in flight.
 	 */
-	if (sc->vtblk_req_ordered != NULL)
-		return (EBUSY);
-
-	if (bp->bio_flags & BIO_ORDERED) {
-		if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
-			/*
-			 * This request will be executed once all
-			 * the in-flight requests are completed.
-			 */
+	if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
+		if (sc->vtblk_req_ordered != NULL)
+			return (EBUSY);
+		if (bp->bio_flags & BIO_ORDERED) {
 			if (!virtqueue_empty(vq))
 				return (EBUSY);
 			ordered = 1;
-		} else
-			req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
+			req->vbr_hdr.type &= ~VIRTIO_BLK_T_BARRIER;
+		}
 	}
 
 	sglist_reset(sg);
@@ -933,7 +921,7 @@ vtblk_execute_request(struct vtblk_softc
 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
 		error = sglist_append_bio(sg, bp);
 		if (error || sg->sg_nseg == sg->sg_maxseg) {
-			panic("%s: data buffer too big bio:%p error:%d",
+			panic("%s: bio %p data buffer too big %d",
 			    __func__, bp, error);
 		}
 
@@ -953,44 +941,156 @@ vtblk_execute_request(struct vtblk_softc
 	return (error);
 }
 
-static void
-vtblk_vq_intr(void *xsc)
+static int
+vtblk_request_error(struct vtblk_request *req)
 {
-	struct vtblk_softc *sc;
-	struct virtqueue *vq;
-
-	sc = xsc;
-	vq = sc->vtblk_vq;
+	int error;
 
-again:
-	VTBLK_LOCK(sc);
-	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
-		VTBLK_UNLOCK(sc);
-		return;
+	switch (req->vbr_ack) {
+	case VIRTIO_BLK_S_OK:
+		error = 0;
+		break;
+	case VIRTIO_BLK_S_UNSUPP:
+		error = ENOTSUP;
+		break;
+	default:
+		error = EIO;
+		break;
 	}
 
-	vtblk_finish_completed(sc);
+	return (error);
+}
 
-	if ((sc->vtblk_flags & VTBLK_FLAG_SUSPEND) == 0)
-		vtblk_startio(sc);
-	else
-		wakeup(&sc->vtblk_vq);
+static void
+vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
+{
+	struct vtblk_request *req;
+	struct bio *bp;
 
-	if (virtqueue_enable_intr(vq) != 0) {
-		virtqueue_disable_intr(vq);
-		VTBLK_UNLOCK(sc);
-		goto again;
+	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
+		if (sc->vtblk_req_ordered != NULL) {
+			MPASS(sc->vtblk_req_ordered == req);
+			sc->vtblk_req_ordered = NULL;
+		}
+
+		bp = req->vbr_bp;
+		bp->bio_error = vtblk_request_error(req);
+		TAILQ_INSERT_TAIL(queue, bp, bio_queue);
+
+		vtblk_request_enqueue(sc, req);
 	}
+}
 
-	VTBLK_UNLOCK(sc);
+static void
+vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
+{
+	struct bio *bp, *tmp;
+
+	TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
+		if (bp->bio_error != 0)
+			disk_err(bp, "hard error", -1, 1);
+		vtblk_bio_done(sc, bp, bp->bio_error);
+	}
 }
 
 static void
-vtblk_stop(struct vtblk_softc *sc)
+vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
 {
+	struct virtqueue *vq;
+	struct vtblk_request *req;
+	int last;
 
-	virtqueue_disable_intr(sc->vtblk_vq);
-	virtio_stop(sc->vtblk_dev);
+	vq = sc->vtblk_vq;
+	last = 0;
+
+	while ((req = virtqueue_drain(vq, &last)) != NULL) {
+		if (!skip_done)
+			vtblk_bio_done(sc, req->vbr_bp, ENXIO);
+
+		vtblk_request_enqueue(sc, req);
+	}
+
+	sc->vtblk_req_ordered = NULL;
+	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
+}
+
+static void
+vtblk_drain(struct vtblk_softc *sc)
+{
+	struct bio_queue queue;
+	struct bio_queue_head *bioq;
+	struct vtblk_request *req;
+	struct bio *bp;
+
+	bioq = &sc->vtblk_bioq;
+	TAILQ_INIT(&queue);
+
+	if (sc->vtblk_vq != NULL) {
+		vtblk_queue_completed(sc, &queue);
+		vtblk_done_completed(sc, &queue);
+
+		vtblk_drain_vq(sc, 0);
+	}
+
+	while ((req = vtblk_request_next_ready(sc)) != NULL) {
+		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
+		vtblk_request_enqueue(sc, req);
+	}
+
+	while (bioq_first(bioq) != NULL) {
+		bp = bioq_takefirst(bioq);
+		vtblk_bio_done(sc, bp, ENXIO);
+	}
+
+	vtblk_request_free(sc);
+}
+
+static void
+vtblk_startio(struct vtblk_softc *sc)
+{
+	struct virtqueue *vq;
+	struct vtblk_request *req;
+	int enq;
+
+	VTBLK_LOCK_ASSERT(sc);
+	vq = sc->vtblk_vq;
+	enq = 0;
+
+	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
+		return;
+
+	while (!virtqueue_full(vq)) {
+		req = vtblk_request_next(sc);
+		if (req == NULL)
+			break;
+
+		if (vtblk_request_execute(sc, req) != 0) {
+			vtblk_request_requeue_ready(sc, req);
+			break;
+		}
+
+		enq++;
+	}
+
+	if (enq > 0)
+		virtqueue_notify(vq);
+}
+
+static void
+vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
+{
+
+	/* Because of GEOM direct dispatch, we cannot hold any locks. */
+	if (sc != NULL)
+		VTBLK_LOCK_ASSERT_NOTOWNED(sc);
+
+	if (error) {
+		bp->bio_resid = bp->bio_bcount;
+		bp->bio_error = error;
+		bp->bio_flags |= BIO_ERROR;
+	}
+
+	biodone(bp);
 }
 
 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)			\
@@ -1025,7 +1125,7 @@ vtblk_read_config(struct vtblk_softc *sc
 #undef VTBLK_GET_CONFIG
 
 static void
-vtblk_get_ident(struct vtblk_softc *sc)
+vtblk_ident(struct vtblk_softc *sc)
 {
 	struct bio buf;
 	struct disk *dp;
@@ -1038,7 +1138,7 @@ vtblk_get_ident(struct vtblk_softc *sc)
 	if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
 		return;
 
-	req = vtblk_dequeue_request(sc);
+	req = vtblk_request_dequeue(sc);
 	if (req == NULL)
 		return;
 
@@ -1058,7 +1158,7 @@ vtblk_get_ident(struct vtblk_softc *sc)
 	error = vtblk_poll_request(sc, req);
 	VTBLK_UNLOCK(sc);
 
-	vtblk_enqueue_request(sc, req);
+	vtblk_request_enqueue(sc, req);
 
 	if (error) {
 		device_printf(sc->vtblk_dev,
@@ -1066,77 +1166,6 @@ vtblk_get_ident(struct vtblk_softc *sc)
 	}
 }
 
-static void
-vtblk_prepare_dump(struct vtblk_softc *sc)
-{
-	device_t dev;
-	struct virtqueue *vq;
-
-	dev = sc->vtblk_dev;
-	vq = sc->vtblk_vq;
-
-	vtblk_stop(sc);
-
-	/*
-	 * Drain all requests caught in-flight in the virtqueue,
-	 * skipping biodone(). When dumping, only one request is
-	 * outstanding at a time, and we just poll the virtqueue
-	 * for the response.
-	 */
-	vtblk_drain_vq(sc, 1);
-
-	if (virtio_reinit(dev, sc->vtblk_features) != 0) {
-		panic("%s: cannot reinit VirtIO block device during dump",
-		    device_get_nameunit(dev));
-	}
-
-	virtqueue_disable_intr(vq);
-	virtio_reinit_complete(dev);
-}
-
-static int
-vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset,
-    size_t length)
-{
-	struct bio buf;
-	struct vtblk_request *req;
-
-	req = &sc->vtblk_dump_request;
-	req->vbr_ack = -1;
-	req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
-	req->vbr_hdr.ioprio = 1;
-	req->vbr_hdr.sector = offset / 512;
-
-	req->vbr_bp = &buf;
-	bzero(&buf, sizeof(struct bio));
-
-	buf.bio_cmd = BIO_WRITE;
-	buf.bio_data = virtual;
-	buf.bio_bcount = length;
-
-	return (vtblk_poll_request(sc, req));
-}
-
-static int
-vtblk_flush_dump(struct vtblk_softc *sc)
-{
-	struct bio buf;
-	struct vtblk_request *req;
-
-	req = &sc->vtblk_dump_request;
-	req->vbr_ack = -1;
-	req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
-	req->vbr_hdr.ioprio = 1;
-	req->vbr_hdr.sector = 0;
-
-	req->vbr_bp = &buf;
-	bzero(&buf, sizeof(struct bio));
-
-	buf.bio_cmd = BIO_FLUSH;
-
-	return (vtblk_poll_request(sc, req));
-}
-
 static int
 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
 {
@@ -1148,7 +1177,7 @@ vtblk_poll_request(struct vtblk_softc *s
 	if (!virtqueue_empty(vq))
 		return (EBUSY);
 
-	error = vtblk_execute_request(sc, req);
+	error = vtblk_request_execute(sc, req);
 	if (error)
 		return (error);
 
@@ -1164,212 +1193,188 @@ vtblk_poll_request(struct vtblk_softc *s
 	return (error);
 }
 
-static void
-vtblk_finish_completed(struct vtblk_softc *sc)
+static int
+vtblk_quiesce(struct vtblk_softc *sc)
 {
-	struct vtblk_request *req;
-	struct bio *bp;
 	int error;
 
-	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
-		bp = req->vbr_bp;
+	VTBLK_LOCK_ASSERT(sc);
+	error = 0;
 
-		if (sc->vtblk_req_ordered != NULL) {
-			/* This should be the only outstanding request. */
-			MPASS(sc->vtblk_req_ordered == req);
-			sc->vtblk_req_ordered = NULL;
+	while (!virtqueue_empty(sc->vtblk_vq)) {
+		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
+		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
+			error = EBUSY;
+			break;
 		}
-
-		error = vtblk_request_error(req);
-		if (error)
-			disk_err(bp, "hard error", -1, 1);
-
-		vtblk_finish_bio(bp, error);
-		vtblk_enqueue_request(sc, req);
 	}
+
+	return (error);
 }
 
 static void
-vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
+vtblk_vq_intr(void *xsc)
 {
+	struct vtblk_softc *sc;
 	struct virtqueue *vq;
-	struct vtblk_request *req;
-	int last;
+	struct bio_queue queue;
 
+	sc = xsc;
 	vq = sc->vtblk_vq;
-	last = 0;
-
-	while ((req = virtqueue_drain(vq, &last)) != NULL) {
-		if (!skip_done)
-			vtblk_finish_bio(req->vbr_bp, ENXIO);
+	TAILQ_INIT(&queue);
 
-		vtblk_enqueue_request(sc, req);
-	}
-
-	sc->vtblk_req_ordered = NULL;
-	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
-}
-
-static void
-vtblk_drain(struct vtblk_softc *sc)
-{
-	struct bio_queue_head *bioq;
-	struct vtblk_request *req;
-	struct bio *bp;
+	VTBLK_LOCK(sc);
 
-	bioq = &sc->vtblk_bioq;
+again:
+	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
+		goto out;
 
-	if (sc->vtblk_vq != NULL) {
-		vtblk_finish_completed(sc);
-		vtblk_drain_vq(sc, 0);
-	}
+	vtblk_queue_completed(sc, &queue);
+	vtblk_startio(sc);
 
-	while ((req = vtblk_dequeue_ready(sc)) != NULL) {
-		vtblk_finish_bio(req->vbr_bp, ENXIO);
-		vtblk_enqueue_request(sc, req);
+	if (virtqueue_enable_intr(vq) != 0) {
+		virtqueue_disable_intr(vq);
+		goto again;
 	}
 
-	while (bioq_first(bioq) != NULL) {
-		bp = bioq_takefirst(bioq);
-		vtblk_finish_bio(bp, ENXIO);
-	}
+	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
+		wakeup(&sc->vtblk_vq);
 
-	vtblk_free_requests(sc);
+out:
+	VTBLK_UNLOCK(sc);
+	vtblk_done_completed(sc, &queue);
 }
 
-#ifdef INVARIANTS
 static void
-vtblk_request_invariants(struct vtblk_request *req)
+vtblk_stop(struct vtblk_softc *sc)
 {
-	int hdr_nsegs, ack_nsegs;
-
-	hdr_nsegs = sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr));
-	ack_nsegs = sglist_count(&req->vbr_ack, sizeof(req->vbr_ack));
 
-	KASSERT(hdr_nsegs == 1, ("request header crossed page boundary"));
-	KASSERT(ack_nsegs == 1, ("request ack crossed page boundary"));
+	virtqueue_disable_intr(sc->vtblk_vq);
+	virtio_stop(sc->vtblk_dev);
 }
-#endif
 
-static int
-vtblk_alloc_requests(struct vtblk_softc *sc)
+static void
+vtblk_dump_prepare(struct vtblk_softc *sc)
 {
-	struct vtblk_request *req;
-	int i, nreqs;
+	device_t dev;
+	struct virtqueue *vq;
 
-	nreqs = virtqueue_size(sc->vtblk_vq);
+	dev = sc->vtblk_dev;
+	vq = sc->vtblk_vq;
+
+	vtblk_stop(sc);
 
 	/*
-	 * Preallocate sufficient requests to keep the virtqueue full. Each
-	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
-	 * the number allocated when indirect descriptors are not available.
+	 * Drain all requests caught in-flight in the virtqueue,
+	 * skipping biodone(). When dumping, only one request is
+	 * outstanding at a time, and we just poll the virtqueue
+	 * for the response.
 	 */
-	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
-		nreqs /= VTBLK_MIN_SEGMENTS;
-
-	for (i = 0; i < nreqs; i++) {
-		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
-		if (req == NULL)
-			return (ENOMEM);
-
-#ifdef INVARIANTS
-		vtblk_request_invariants(req);
-#endif
+	vtblk_drain_vq(sc, 1);
 
-		sc->vtblk_request_count++;
-		vtblk_enqueue_request(sc, req);
+	if (virtio_reinit(dev, sc->vtblk_features) != 0) {
+		panic("%s: cannot reinit VirtIO block device during dump",
+		    device_get_nameunit(dev));
 	}
 
-	return (0);
+	virtqueue_disable_intr(vq);

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-stable mailing list