git: 2283206935b8 - main - cam-iosched: Publish parameters of the latency buckets

From: Warner Losh <imp_at_FreeBSD.org>
Date: Mon, 06 Dec 2021 05:47:11 UTC
The branch main has been updated by imp:

URL: https://cgit.FreeBSD.org/src/commit/?id=2283206935b83a2a40d8322d9283af194b4ba5d6

commit 2283206935b83a2a40d8322d9283af194b4ba5d6
Author:     Warner Losh <imp@FreeBSD.org>
AuthorDate: 2021-12-06 04:54:42 +0000
Commit:     Warner Losh <imp@FreeBSD.org>
CommitDate: 2021-12-06 05:19:07 +0000

    cam-iosched: Publish parameters of the latency buckets
    
    Add sysctls to publish the latency bucket size, number, and stride. Move
    to putting all the iosched stuff under kern.cam.iosched as well and move
    kern.cam.do_dynamic_iosched to kern.cam.iosched.dynamic. In addition, move
    kern.cam.io_sched_alpha_bits to kern.cam.iosched.alpha_bits. Publish
    kern.cam.iosched.bucket_base (the smallest bucket time), .bucket_ratio
    (the geometric progression factor * 100), and .buckets (the total number
    of buckets).
    
    Move to publishing 20 buckets, starting at 20us. This allows us to get
    better resolution on the short end and detect preformance degredation of
    the NVMe drives I've tested on, even with the uncertainty of bucketing.
    
    Sponsored by:           Netflix
---
 sys/cam/cam_iosched.c | 79 +++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 61 insertions(+), 18 deletions(-)

diff --git a/sys/cam/cam_iosched.c b/sys/cam/cam_iosched.c
index 928e8095ef01..10caefdb02ea 100644
--- a/sys/cam/cam_iosched.c
+++ b/sys/cam/cam_iosched.c
@@ -58,6 +58,9 @@ __FBSDID("$FreeBSD$");
 static MALLOC_DEFINE(M_CAMSCHED, "CAM I/O Scheduler",
     "CAM I/O Scheduler buffers");
 
+static SYSCTL_NODE(_kern_cam, OID_AUTO, iosched, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+    "CAM I/O Scheduler parameters");
+
 /*
  * Default I/O scheduler for FreeBSD. This implementation is just a thin-vineer
  * over the bioq_* interface, with notions of separate calls for normal I/O and
@@ -70,8 +73,8 @@ static MALLOC_DEFINE(M_CAMSCHED, "CAM I/O Scheduler",
 
 #ifdef CAM_IOSCHED_DYNAMIC
 
-static bool do_dynamic_iosched = 1;
-SYSCTL_BOOL(_kern_cam, OID_AUTO, do_dynamic_iosched, CTLFLAG_RD | CTLFLAG_TUN,
+static bool do_dynamic_iosched = true;
+SYSCTL_BOOL(_kern_cam_iosched, OID_AUTO, dynamic, CTLFLAG_RD | CTLFLAG_TUN,
     &do_dynamic_iosched, 1,
     "Enable Dynamic I/O scheduler optimizations.");
 
@@ -96,10 +99,46 @@ SYSCTL_BOOL(_kern_cam, OID_AUTO, do_dynamic_iosched, CTLFLAG_RD | CTLFLAG_TUN,
  * Note: See computation of EMA and EMVAR for acceptable ranges of alpha.
  */
 static int alpha_bits = 9;
-SYSCTL_INT(_kern_cam, OID_AUTO, iosched_alpha_bits, CTLFLAG_RW | CTLFLAG_TUN,
+SYSCTL_INT(_kern_cam_iosched, OID_AUTO, alpha_bits, CTLFLAG_RW | CTLFLAG_TUN,
     &alpha_bits, 1,
     "Bits in EMA's alpha.");
 
+/*
+ * Different parameters for the buckets of latency we keep track of. These are all
+ * published read-only since at present they are compile time constants.
+ *
+ * Bucket base is the upper bounds of the first latency bucket. It's currently 20us.
+ * With 20 buckets (see below), that leads to a geometric progression with a max size
+ * of 5.2s which is safeily larger than 1s to help diagnose extreme outliers better.
+ */
+#ifndef BUCKET_BASE
+#define BUCKET_BASE (SBT_1S / 50000)	/* 20us */
+#endif
+static sbintime_t bucket_base = BUCKET_BASE;
+SYSCTL_SBINTIME_USEC(_kern_cam_iosched, OID_AUTO, bucket_base_us, CTLFLAG_RD,
+    &bucket_base,
+    "Size of the smallest latency bucket");
+
+/*
+ * Bucket ratio is the geometric progression for the bucket. For a bucket b_n
+ * the size of bucket b_n+1 is b_n * bucket_ratio / 100.
+ */
+static int bucket_ratio = 200;	/* Rather hard coded at the moment */
+SYSCTL_INT(_kern_cam_iosched, OID_AUTO, bucket_ratio, CTLFLAG_RD,
+    &bucket_ratio, 200,
+    "Latency Bucket Ratio for geometric progression.");
+
+/*
+ * Number of total buckets. Starting at BUCKET_BASE, each one is a power of 2.
+ */
+#ifndef LAT_BUCKETS
+#define LAT_BUCKETS 20	/* < 20us < 40us ... < 2^(n-1)*20us >= 2^(n-1)*20us */
+#endif
+static int lat_buckets = LAT_BUCKETS;
+SYSCTL_INT(_kern_cam_iosched, OID_AUTO, buckets, CTLFLAG_RD,
+    &lat_buckets, LAT_BUCKETS,
+    "Total number of latency buckets published");
+
 struct iop_stats;
 struct cam_iosched_softc;
 
@@ -233,7 +272,6 @@ struct iop_stats {
 	uint32_t	state_flags;
 #define IOP_RATE_LIMITED		1u
 
-#define LAT_BUCKETS 15			/* < 1ms < 2ms ... < 2^(n-1)ms >= 2^(n-1)ms*/
 	uint64_t	latencies[LAT_BUCKETS];
 
 	struct cam_iosched_softc *softc;
@@ -1790,20 +1828,25 @@ isqrt64(uint64_t val)
 }
 
 static sbintime_t latencies[LAT_BUCKETS - 1] = {
-	SBT_1MS <<  0,
-	SBT_1MS <<  1,
-	SBT_1MS <<  2,
-	SBT_1MS <<  3,
-	SBT_1MS <<  4,
-	SBT_1MS <<  5,
-	SBT_1MS <<  6,
-	SBT_1MS <<  7,
-	SBT_1MS <<  8,
-	SBT_1MS <<  9,
-	SBT_1MS << 10,
-	SBT_1MS << 11,
-	SBT_1MS << 12,
-	SBT_1MS << 13		/* 8.192s */
+	BUCKET_BASE <<  0,	/* 20us */
+	BUCKET_BASE <<  1,
+	BUCKET_BASE <<  2,
+	BUCKET_BASE <<  3,
+	BUCKET_BASE <<  4,
+	BUCKET_BASE <<  5,
+	BUCKET_BASE <<  6,
+	BUCKET_BASE <<  7,
+	BUCKET_BASE <<  8,
+	BUCKET_BASE <<  9,
+	BUCKET_BASE << 10,
+	BUCKET_BASE << 11,
+	BUCKET_BASE << 12,
+	BUCKET_BASE << 13,
+	BUCKET_BASE << 14,
+	BUCKET_BASE << 15,
+	BUCKET_BASE << 16,
+	BUCKET_BASE << 17,
+	BUCKET_BASE << 18	/* 5,242,880us */
 };
 
 static void