svn commit: r281668 - in user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Xin LI
delphij at FreeBSD.org
Fri Apr 17 22:16:36 UTC 2015
Author: delphij
Date: Fri Apr 17 22:16:35 2015
New Revision: 281668
URL: https://svnweb.freebsd.org/changeset/base/281668
Log:
MFV r277430:
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h:
Add two offset/lba based AVL trees to the vdev queue
object.
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h:
Add a second AVL node within each ZIO so that vdev_queue.c
can sort ZIOs by both type and priority.
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c:
Combine reads and writes, irrespecitve of their priorities
into unified, offset sorted, trees. Selection of the
ZIO to issue is unchanged, but aggregation now uses the
unified tree of the appropriate type so that aggregation
across priority classes is possible.
Original author: Justin T. Gibbs justing at spectralogic.com
Illumos issue:
5313 Allow I/Os to be aggregated across ZIO priority classes
Modified:
user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
Directory Properties:
user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/ (props changed)
Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h Fri Apr 17 21:21:11 2015 (r281667)
+++ user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h Fri Apr 17 22:16:35 2015 (r281668)
@@ -113,6 +113,8 @@ struct vdev_queue {
vdev_t *vq_vdev;
vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
avl_tree_t vq_active_tree;
+ avl_tree_t vq_read_offset_tree;
+ avl_tree_t vq_write_offset_tree;
uint64_t vq_last_offset;
hrtime_t vq_io_complete_ts; /* time last i/o completed */
kmutex_t vq_lock;
Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
==============================================================================
--- user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h Fri Apr 17 21:21:11 2015 (r281667)
+++ user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h Fri Apr 17 22:16:35 2015 (r281668)
@@ -454,6 +454,7 @@ struct zio {
uint64_t io_offset;
hrtime_t io_timestamp;
avl_node_t io_queue_node;
+ avl_node_t io_offset_node;
/* Internal pipeline state */
enum zio_flag io_flags;
Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
==============================================================================
--- user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c Fri Apr 17 21:21:11 2015 (r281667)
+++ user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c Fri Apr 17 22:16:35 2015 (r281668)
@@ -290,6 +290,22 @@ vdev_queue_offset_compare(const void *x1
return (0);
}
+static inline avl_tree_t *
+vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p)
+{
+ return (&vq->vq_class[p].vqc_queued_tree);
+}
+
+static inline avl_tree_t *
+vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t)
+{
+ ASSERT(t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE);
+ if (t == ZIO_TYPE_READ)
+ return (&vq->vq_read_offset_tree);
+ else
+ return (&vq->vq_write_offset_tree);
+}
+
int
vdev_queue_timestamp_compare(const void *x1, const void *x2)
{
@@ -324,19 +340,27 @@ vdev_queue_init(vdev_t *vd)
avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
sizeof (zio_t), offsetof(struct zio, io_queue_node));
+ avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ),
+ vdev_queue_offset_compare, sizeof (zio_t),
+ offsetof(struct zio, io_offset_node));
+ avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE),
+ vdev_queue_offset_compare, sizeof (zio_t),
+ offsetof(struct zio, io_offset_node));
for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
+ int (*compfn) (const void *, const void *);
+
/*
- * The synchronous i/o queues are FIFO rather than LBA ordered.
- * This provides more consistent latency for these i/os, and
- * they tend to not be tightly clustered anyway so there is
- * little to no throughput loss.
+ * The synchronous i/o queues are dispatched in FIFO rather
+ * than LBA order. This provides more consistent latency for
+ * these i/os.
*/
- boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ ||
- p == ZIO_PRIORITY_SYNC_WRITE);
- avl_create(&vq->vq_class[p].vqc_queued_tree,
- fifo ? vdev_queue_timestamp_compare :
- vdev_queue_offset_compare,
+ if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE)
+ compfn = vdev_queue_timestamp_compare;
+ else
+ compfn = vdev_queue_offset_compare;
+
+ avl_create(vdev_queue_class_tree(vq, p), compfn,
sizeof (zio_t), offsetof(struct zio, io_queue_node));
}
@@ -349,8 +373,10 @@ vdev_queue_fini(vdev_t *vd)
vdev_queue_t *vq = &vd->vdev_queue;
for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
- avl_destroy(&vq->vq_class[p].vqc_queued_tree);
+ avl_destroy(vdev_queue_class_tree(vq, p));
avl_destroy(&vq->vq_active_tree);
+ avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ));
+ avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE));
mutex_destroy(&vq->vq_lock);
}
@@ -361,7 +387,8 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_
spa_t *spa = zio->io_spa;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
- avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
+ avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
+ avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
#ifdef illumos
mutex_enter(&spa->spa_iokstat_lock);
@@ -378,7 +405,8 @@ vdev_queue_io_remove(vdev_queue_t *vq, z
spa_t *spa = zio->io_spa;
ASSERT(MUTEX_HELD(&vq->vq_lock));
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
- avl_remove(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
+ avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
+ avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
#ifdef illumos
mutex_enter(&spa->spa_iokstat_lock);
@@ -551,7 +579,7 @@ vdev_queue_class_to_issue(vdev_queue_t *
/* find a queue that has not reached its minimum # outstanding i/os */
for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
- if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 &&
+ if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
vq->vq_class[p].vqc_active <
vdev_queue_class_min_active(p))
return (p);
@@ -562,7 +590,7 @@ vdev_queue_class_to_issue(vdev_queue_t *
* maximum # outstanding i/os.
*/
for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
- if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 &&
+ if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
vq->vq_class[p].vqc_active <
vdev_queue_class_max_active(spa, p))
return (p);
@@ -588,8 +616,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, z
uint64_t maxgap = 0;
uint64_t size;
boolean_t stretch = B_FALSE;
- vdev_queue_class_t *vqc = &vq->vq_class[zio->io_priority];
- avl_tree_t *t = &vqc->vqc_queued_tree;
+ avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type);
enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
ASSERT(MUTEX_HELD(&vq->vq_lock));
@@ -597,15 +624,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, z
if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
return (NULL);
- /*
- * The synchronous i/o queues are not sorted by LBA, so we can't
- * find adjacent i/os. These i/os tend to not be tightly clustered,
- * or too large to aggregate, so this has little impact on performance.
- */
- if (zio->io_priority == ZIO_PRIORITY_SYNC_READ ||
- zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
- return (NULL);
-
first = last = zio;
if (zio->io_type == ZIO_TYPE_READ)
@@ -737,7 +755,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq)
zio_t *zio, *aio;
zio_priority_t p;
avl_index_t idx;
- vdev_queue_class_t *vqc;
+ avl_tree_t *tree;
zio_t search;
again:
@@ -756,13 +774,13 @@ again:
*
* For FIFO queues (sync), issue the i/o with the lowest timestamp.
*/
- vqc = &vq->vq_class[p];
+ tree = vdev_queue_class_tree(vq, p);
search.io_timestamp = 0;
search.io_offset = vq->vq_last_offset + 1;
- VERIFY3P(avl_find(&vqc->vqc_queued_tree, &search, &idx), ==, NULL);
- zio = avl_nearest(&vqc->vqc_queued_tree, idx, AVL_AFTER);
+ VERIFY3P(avl_find(tree, &search, &idx), ==, NULL);
+ zio = avl_nearest(tree, idx, AVL_AFTER);
if (zio == NULL)
- zio = avl_first(&vqc->vqc_queued_tree);
+ zio = avl_first(tree);
ASSERT3U(zio->io_priority, ==, p);
aio = vdev_queue_aggregate(vq, zio);
More information about the svn-src-user
mailing list