svn commit: r343562 - head/sys/dev/nvd
Alexander Motin
mav at FreeBSD.org
Tue Jan 29 20:35:11 UTC 2019
Author: mav
Date: Tue Jan 29 20:35:09 2019
New Revision: 343562
URL: https://svnweb.freebsd.org/changeset/base/343562
Log:
Reimplement BIO_ORDERED handling in nvd(4).
This fixes BIO_ORDERED semantics while also improving performance by:
- sleeping also before BIO_ORDERED bio, as defined, not only after;
- not queueing BIO_ORDERED bio to taskqueue if no other bios running;
- waking up sleeping taskqueue explicitly rather then rely on polling.
On Samsung SSD 970 PRO this shows sync write latency, measured with
`diskinfo -wS`, reduction from ~2ms to ~1.1ms by not sleeping without
reason till next HZ tick.
On the same device ZFS pool with 8 ZVOLs synchronously writing 4KB blocks
shows ~950 IOPS instead of ~750 IOPS before. I suspect ZFS does not need
BIO_ORDERED on BIO_FLUSH at all, but that will be next question.
MFC after: 2 weeks
Sponsored by: iXsystems, Inc.
Modified:
head/sys/dev/nvd/nvd.c
Modified: head/sys/dev/nvd/nvd.c
==============================================================================
--- head/sys/dev/nvd/nvd.c Tue Jan 29 20:10:27 2019 (r343561)
+++ head/sys/dev/nvd/nvd.c Tue Jan 29 20:35:09 2019 (r343562)
@@ -82,6 +82,7 @@ struct nvd_disk {
struct nvme_namespace *ns;
uint32_t cur_depth;
+#define NVD_ODEPTH (1 << 31)
uint32_t ordered_in_flight;
u_int unit;
@@ -181,39 +182,50 @@ nvd_unload()
mtx_destroy(&nvd_lock);
}
-static int
+static void
nvd_bio_submit(struct nvd_disk *ndisk, struct bio *bp)
{
int err;
bp->bio_driver1 = NULL;
- atomic_add_int(&ndisk->cur_depth, 1);
+ if (__predict_false(bp->bio_flags & BIO_ORDERED))
+ atomic_add_int(&ndisk->cur_depth, NVD_ODEPTH);
+ else
+ atomic_add_int(&ndisk->cur_depth, 1);
err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
if (err) {
- atomic_add_int(&ndisk->cur_depth, -1);
- if (__predict_false(bp->bio_flags & BIO_ORDERED))
+ if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+ atomic_add_int(&ndisk->cur_depth, -NVD_ODEPTH);
atomic_add_int(&ndisk->ordered_in_flight, -1);
+ wakeup(&ndisk->cur_depth);
+ } else {
+ if (atomic_fetchadd_int(&ndisk->cur_depth, -1) == 1 &&
+ __predict_false(ndisk->ordered_in_flight != 0))
+ wakeup(&ndisk->cur_depth);
+ }
bp->bio_error = err;
bp->bio_flags |= BIO_ERROR;
bp->bio_resid = bp->bio_bcount;
biodone(bp);
- return (-1);
}
-
- return (0);
}
static void
nvd_strategy(struct bio *bp)
{
- struct nvd_disk *ndisk;
+ struct nvd_disk *ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1;
- ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1;
-
- if (__predict_false(bp->bio_flags & BIO_ORDERED))
- atomic_add_int(&ndisk->ordered_in_flight, 1);
-
- if (__predict_true(ndisk->ordered_in_flight == 0)) {
+ /*
+ * bio with BIO_ORDERED flag must be executed after all previous
+ * bios in the queue, and before any successive bios.
+ */
+ if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+ if (atomic_fetchadd_int(&ndisk->ordered_in_flight, 1) == 0 &&
+ ndisk->cur_depth == 0 && bioq_first(&ndisk->bioq) == NULL) {
+ nvd_bio_submit(ndisk, bp);
+ return;
+ }
+ } else if (__predict_true(ndisk->ordered_in_flight == 0)) {
nvd_bio_submit(ndisk, bp);
return;
}
@@ -281,28 +293,27 @@ nvd_ioctl(struct disk *ndisk, u_long cmd, void *data,
static int
nvd_dump(void *arg, void *virt, vm_offset_t phys, off_t offset, size_t len)
{
- struct nvd_disk *ndisk;
- struct disk *dp;
+ struct disk *dp = arg;
+ struct nvd_disk *ndisk = dp->d_drv1;
- dp = arg;
- ndisk = dp->d_drv1;
-
return (nvme_ns_dump(ndisk->ns, virt, offset, len));
}
static void
nvd_done(void *arg, const struct nvme_completion *cpl)
{
- struct bio *bp;
- struct nvd_disk *ndisk;
+ struct bio *bp = (struct bio *)arg;
+ struct nvd_disk *ndisk = bp->bio_disk->d_drv1;
- bp = (struct bio *)arg;
-
- ndisk = bp->bio_disk->d_drv1;
-
- atomic_add_int(&ndisk->cur_depth, -1);
- if (__predict_false(bp->bio_flags & BIO_ORDERED))
+ if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+ atomic_add_int(&ndisk->cur_depth, -NVD_ODEPTH);
atomic_add_int(&ndisk->ordered_in_flight, -1);
+ wakeup(&ndisk->cur_depth);
+ } else {
+ if (atomic_fetchadd_int(&ndisk->cur_depth, -1) == 1 &&
+ __predict_false(ndisk->ordered_in_flight != 0))
+ wakeup(&ndisk->cur_depth);
+ }
biodone(bp);
}
@@ -320,22 +331,23 @@ nvd_bioq_process(void *arg, int pending)
if (bp == NULL)
break;
- if (nvd_bio_submit(ndisk, bp) != 0) {
- continue;
+ if (__predict_false(bp->bio_flags & BIO_ORDERED)) {
+ /*
+ * bio with BIO_ORDERED flag set must be executed
+ * after all previous bios.
+ */
+ while (ndisk->cur_depth > 0)
+ tsleep(&ndisk->cur_depth, 0, "nvdorb", 1);
+ } else {
+ /*
+ * bio with BIO_ORDERED flag set must be completed
+ * before proceeding with additional bios.
+ */
+ while (ndisk->cur_depth >= NVD_ODEPTH)
+ tsleep(&ndisk->cur_depth, 0, "nvdora", 1);
}
-#ifdef BIO_ORDERED
- /*
- * BIO_ORDERED flag dictates that the bio with BIO_ORDERED
- * flag set must be completed before proceeding with
- * additional bios.
- */
- if (bp->bio_flags & BIO_ORDERED) {
- while (ndisk->cur_depth > 0) {
- pause("nvd flush", 1);
- }
- }
-#endif
+ nvd_bio_submit(ndisk, bp);
}
}
More information about the svn-src-all
mailing list