git: e82644e59ece - main - cam/iosched: Add a counter of I/Os that take too long
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 20 Jul 2024 02:59:06 UTC
The branch main has been updated by imp: URL: https://cgit.FreeBSD.org/src/commit/?id=e82644e59ece5cdc67250262508e81fa22deea90 commit e82644e59ece5cdc67250262508e81fa22deea90 Author: Warner Losh <imp@FreeBSD.org> AuthorDate: 2024-07-20 02:52:40 +0000 Commit: Warner Losh <imp@FreeBSD.org> CommitDate: 2024-07-20 02:53:37 +0000 cam/iosched: Add a counter of I/Os that take too long Add kern.cam.DEV.UNIT.iosched.too_long (to count I/Os taking too long) and kern.cam.DEV.UNIT.bad_latency (to set this threshold, defaults to 500ms). Each class of I/O (read, write, trim) has its own counters and thresholds. Sponsored by: Netflix Reviewed by: jhb Differential Revision: https://reviews.freebsd.org/D46033 --- sys/cam/cam_iosched.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sys/cam/cam_iosched.c b/sys/cam/cam_iosched.c index 471e6c355d69..022eb23cb621 100644 --- a/sys/cam/cam_iosched.c +++ b/sys/cam/cam_iosched.c @@ -271,6 +271,9 @@ struct iop_stats { sbintime_t emvar; sbintime_t sd; /* Last computed sd */ + uint64_t too_long; /* Number of I/Os greater than bad lat threshold */ + sbintime_t bad_latency; /* Latency threshold */ + uint32_t state_flags; #define IOP_RATE_LIMITED 1u @@ -856,6 +859,7 @@ cam_iosched_iop_stats_init(struct cam_iosched_softc *isc, struct iop_stats *ios) ios->total = 0; ios->ema = 0; ios->emvar = 0; + ios->bad_latency = SBT_1S / 2; /* Default to 500ms */ ios->softc = isc; cam_iosched_limiter_init(ios); } @@ -1046,6 +1050,15 @@ cam_iosched_iop_stats_sysctl_init(struct cam_iosched_softc *isc, struct iop_stat OID_AUTO, "errs", CTLFLAG_RD, &ios->errs, 0, "# of transactions completed with an error"); + SYSCTL_ADD_U64(ctx, n, + OID_AUTO, "too_long", CTLFLAG_RD, + &ios->too_long, 0, + "# of transactions completed took too long"); + SYSCTL_ADD_PROC(ctx, n, + OID_AUTO, "bad_latency", + CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, + &ios->bad_latency, 0, cam_iosched_sbintime_sysctl, "A", + "Threshold for counting transactions that took too long (in us)"); SYSCTL_ADD_PROC(ctx, n, OID_AUTO, "limiter", @@ -1916,6 +1929,14 @@ cam_iosched_update(struct iop_stats *iop, sbintime_t sim_latency) sbintime_t y, deltasq, delta; int i; + /* + * Simple threshold: count the number of events that excede the + * configured threshold. + */ + if (sim_latency > iop->bad_latency) { + iop->too_long++; + } + /* * Keep counts for latency. We do it by power of two buckets. * This helps us spot outlier behavior obscured by averages.