svn commit: r281668 - in user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys

Xin LI delphij at FreeBSD.org
Fri Apr 17 22:16:36 UTC 2015


Author: delphij
Date: Fri Apr 17 22:16:35 2015
New Revision: 281668
URL: https://svnweb.freebsd.org/changeset/base/281668

Log:
  MFV r277430:
  
  sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h:
      Add two offset/lba based AVL trees to the vdev queue
      object.
  
  sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h:
      Add a second AVL node within each ZIO so that vdev_queue.c
      can sort ZIOs by both type and priority.
  
  sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c:
      Combine reads and writes, irrespecitve of their priorities
      into unified, offset sorted, trees.  Selection of the
      ZIO to issue is unchanged, but aggregation now uses the
      unified tree of the appropriate type so that aggregation
      across priority classes is possible.
  
  Original author:    Justin T. Gibbs justing at spectralogic.com
  
  Illumos issue:
      5313 Allow I/Os to be aggregated across ZIO priority classes

Modified:
  user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
  user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
  user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
Directory Properties:
  user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/   (props changed)

Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h
==============================================================================
--- user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	Fri Apr 17 21:21:11 2015	(r281667)
+++ user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h	Fri Apr 17 22:16:35 2015	(r281668)
@@ -113,6 +113,8 @@ struct vdev_queue {
 	vdev_t		*vq_vdev;
 	vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
 	avl_tree_t	vq_active_tree;
+	avl_tree_t	vq_read_offset_tree;
+	avl_tree_t	vq_write_offset_tree;
 	uint64_t	vq_last_offset;
 	hrtime_t	vq_io_complete_ts; /* time last i/o completed */
 	kmutex_t	vq_lock;

Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h
==============================================================================
--- user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Fri Apr 17 21:21:11 2015	(r281667)
+++ user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zio.h	Fri Apr 17 22:16:35 2015	(r281668)
@@ -454,6 +454,7 @@ struct zio {
 	uint64_t	io_offset;
 	hrtime_t	io_timestamp;
 	avl_node_t	io_queue_node;
+	avl_node_t	io_offset_node;
 
 	/* Internal pipeline state */
 	enum zio_flag	io_flags;

Modified: user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c
==============================================================================
--- user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c	Fri Apr 17 21:21:11 2015	(r281667)
+++ user/delphij/zfs-arc-rebase/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c	Fri Apr 17 22:16:35 2015	(r281668)
@@ -290,6 +290,22 @@ vdev_queue_offset_compare(const void *x1
 	return (0);
 }
 
+static inline avl_tree_t *
+vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p)
+{
+	return (&vq->vq_class[p].vqc_queued_tree);
+}
+
+static inline avl_tree_t *
+vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t)
+{
+	ASSERT(t == ZIO_TYPE_READ || t == ZIO_TYPE_WRITE);
+	if (t == ZIO_TYPE_READ)
+		return (&vq->vq_read_offset_tree);
+	else
+		return (&vq->vq_write_offset_tree);
+}
+
 int
 vdev_queue_timestamp_compare(const void *x1, const void *x2)
 {
@@ -324,19 +340,27 @@ vdev_queue_init(vdev_t *vd)
 
 	avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
 	    sizeof (zio_t), offsetof(struct zio, io_queue_node));
+	avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ),
+	    vdev_queue_offset_compare, sizeof (zio_t),
+	    offsetof(struct zio, io_offset_node));
+	avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE),
+	    vdev_queue_offset_compare, sizeof (zio_t),
+	    offsetof(struct zio, io_offset_node));
 
 	for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
+		int (*compfn) (const void *, const void *);
+
 		/*
-		 * The synchronous i/o queues are FIFO rather than LBA ordered.
-		 * This provides more consistent latency for these i/os, and
-		 * they tend to not be tightly clustered anyway so there is
-		 * little to no throughput loss.
+		 * The synchronous i/o queues are dispatched in FIFO rather
+		 * than LBA order.  This provides more consistent latency for
+		 * these i/os.
 		 */
-		boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ ||
-		    p == ZIO_PRIORITY_SYNC_WRITE);
-		avl_create(&vq->vq_class[p].vqc_queued_tree,
-		    fifo ? vdev_queue_timestamp_compare :
-		    vdev_queue_offset_compare,
+		if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE)
+			compfn = vdev_queue_timestamp_compare;
+		else
+			compfn = vdev_queue_offset_compare;
+
+		avl_create(vdev_queue_class_tree(vq, p), compfn,
 		    sizeof (zio_t), offsetof(struct zio, io_queue_node));
 	}
 
@@ -349,8 +373,10 @@ vdev_queue_fini(vdev_t *vd)
 	vdev_queue_t *vq = &vd->vdev_queue;
 
 	for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
-		avl_destroy(&vq->vq_class[p].vqc_queued_tree);
+		avl_destroy(vdev_queue_class_tree(vq, p));
 	avl_destroy(&vq->vq_active_tree);
+	avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ));
+	avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE));
 
 	mutex_destroy(&vq->vq_lock);
 }
@@ -361,7 +387,8 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_
 	spa_t *spa = zio->io_spa;
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
-	avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
+	avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
+	avl_add(vdev_queue_type_tree(vq, zio->io_type), zio);
 
 #ifdef illumos
 	mutex_enter(&spa->spa_iokstat_lock);
@@ -378,7 +405,8 @@ vdev_queue_io_remove(vdev_queue_t *vq, z
 	spa_t *spa = zio->io_spa;
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
 	ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
-	avl_remove(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
+	avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
+	avl_remove(vdev_queue_type_tree(vq, zio->io_type), zio);
 
 #ifdef illumos
 	mutex_enter(&spa->spa_iokstat_lock);
@@ -551,7 +579,7 @@ vdev_queue_class_to_issue(vdev_queue_t *
 
 	/* find a queue that has not reached its minimum # outstanding i/os */
 	for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
-		if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 &&
+		if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
 		    vq->vq_class[p].vqc_active <
 		    vdev_queue_class_min_active(p))
 			return (p);
@@ -562,7 +590,7 @@ vdev_queue_class_to_issue(vdev_queue_t *
 	 * maximum # outstanding i/os.
 	 */
 	for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
-		if (avl_numnodes(&vq->vq_class[p].vqc_queued_tree) > 0 &&
+		if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 &&
 		    vq->vq_class[p].vqc_active <
 		    vdev_queue_class_max_active(spa, p))
 			return (p);
@@ -588,8 +616,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, z
 	uint64_t maxgap = 0;
 	uint64_t size;
 	boolean_t stretch = B_FALSE;
-	vdev_queue_class_t *vqc = &vq->vq_class[zio->io_priority];
-	avl_tree_t *t = &vqc->vqc_queued_tree;
+	avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type);
 	enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
 
 	ASSERT(MUTEX_HELD(&vq->vq_lock));
@@ -597,15 +624,6 @@ vdev_queue_aggregate(vdev_queue_t *vq, z
 	if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
 		return (NULL);
 
-	/*
-	 * The synchronous i/o queues are not sorted by LBA, so we can't
-	 * find adjacent i/os.  These i/os tend to not be tightly clustered,
-	 * or too large to aggregate, so this has little impact on performance.
-	 */
-	if (zio->io_priority == ZIO_PRIORITY_SYNC_READ ||
-	    zio->io_priority == ZIO_PRIORITY_SYNC_WRITE)
-		return (NULL);
-
 	first = last = zio;
 
 	if (zio->io_type == ZIO_TYPE_READ)
@@ -737,7 +755,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq)
 	zio_t *zio, *aio;
 	zio_priority_t p;
 	avl_index_t idx;
-	vdev_queue_class_t *vqc;
+	avl_tree_t *tree;
 	zio_t search;
 
 again:
@@ -756,13 +774,13 @@ again:
 	 *
 	 * For FIFO queues (sync), issue the i/o with the lowest timestamp.
 	 */
-	vqc = &vq->vq_class[p];
+	tree = vdev_queue_class_tree(vq, p);
 	search.io_timestamp = 0;
 	search.io_offset = vq->vq_last_offset + 1;
-	VERIFY3P(avl_find(&vqc->vqc_queued_tree, &search, &idx), ==, NULL);
-	zio = avl_nearest(&vqc->vqc_queued_tree, idx, AVL_AFTER);
+	VERIFY3P(avl_find(tree, &search, &idx), ==, NULL);
+	zio = avl_nearest(tree, idx, AVL_AFTER);
 	if (zio == NULL)
-		zio = avl_first(&vqc->vqc_queued_tree);
+		zio = avl_first(tree);
 	ASSERT3U(zio->io_priority, ==, p);
 
 	aio = vdev_queue_aggregate(vq, zio);


More information about the svn-src-user mailing list