git: 8c47d8f53854 - main - prometheus_sysctl_exporter: fix metric aliasing
Date: Tue, 19 Apr 2022 12:56:49 UTC
The branch main has been updated by asomers: URL: https://cgit.FreeBSD.org/src/commit/?id=8c47d8f53854825d8e8591ccd06e32b2c798f81c commit 8c47d8f53854825d8e8591ccd06e32b2c798f81c Author: Alan Somers <asomers@FreeBSD.org> AuthorDate: 2022-04-18 21:29:37 +0000 Commit: Alan Somers <asomers@FreeBSD.org> CommitDate: 2022-04-19 12:56:39 +0000 prometheus_sysctl_exporter: fix metric aliasing When exporting sysctls to Prometheus, the exporter replaces "." with "_". This caused several metrics to alias, confusing the Prometheus server. Fix it by: * Renaming the "tcp_log_bucket" UMA zone to "tcp_log_id_bucket". Also, rename "tcp_log_node" to "tcp_log_id_node" for consistency. * Not exporting sysctls with "(LEGACY)" in the description. That is used by ZFS sysctls that have been replaced by others, many of which alias to the same Prometheus metric name (like "vfs.zfs.arc_max" and "vfs.zfs.arc.max"). PR: 259607 Reported by: delphij MFC after: 2 weeks Sponsored by: Axcient Reviewed by: delphij,rew,thj Differential Revision: https://reviews.freebsd.org/D34952 --- sys/netinet/tcp_log_buf.c | 33 +++++++++++----------- .../prometheus_sysctl_exporter.c | 11 ++++++-- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/sys/netinet/tcp_log_buf.c b/sys/netinet/tcp_log_buf.c index a0a0a97d5fae..5ec4acf367d5 100644 --- a/sys/netinet/tcp_log_buf.c +++ b/sys/netinet/tcp_log_buf.c @@ -65,7 +65,7 @@ __FBSDID("$FreeBSD$"); #define TCP_LOG_EXPIRE_INTVL ((sbintime_t)5 * SBT_1S) bool tcp_log_verbose; -static uma_zone_t tcp_log_bucket_zone, tcp_log_node_zone, tcp_log_zone; +static uma_zone_t tcp_log_id_bucket_zone, tcp_log_id_node_zone, tcp_log_zone; static int tcp_log_session_limit = TCP_LOG_BUF_DEFAULT_SESSION_LIMIT; static uint32_t tcp_log_version = TCP_LOG_BUF_VER; RB_HEAD(tcp_log_id_tree, tcp_log_id_bucket); @@ -99,16 +99,16 @@ SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_global_entries, CTLFLAG_RD, &tcp_log_zone, "Current number of events maintained for all TCP sessions"); SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_limit, CTLFLAG_RW, - &tcp_log_bucket_zone, "Maximum number of log IDs"); + &tcp_log_id_bucket_zone, "Maximum number of log IDs"); SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_entries, CTLFLAG_RD, - &tcp_log_bucket_zone, "Current number of log IDs"); + &tcp_log_id_bucket_zone, "Current number of log IDs"); SYSCTL_UMA_MAX(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_limit, CTLFLAG_RW, - &tcp_log_node_zone, "Maximum number of tcpcbs with log IDs"); + &tcp_log_id_node_zone, "Maximum number of tcpcbs with log IDs"); SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_tcpcb_entries, CTLFLAG_RD, - &tcp_log_node_zone, "Current number of tcpcbs with log IDs"); + &tcp_log_id_node_zone, "Current number of tcpcbs with log IDs"); SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_version, CTLFLAG_RD, &tcp_log_version, 0, "Version of log formats exported"); @@ -360,7 +360,7 @@ tcp_log_remove_bucket(struct tcp_log_id_bucket *tlb) } TCPID_BUCKET_LOCK_DESTROY(tlb); counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1); - uma_zfree(tcp_log_bucket_zone, tlb); + uma_zfree(tcp_log_id_bucket_zone, tlb); } /* @@ -669,7 +669,7 @@ restart: * will unlock the bucket. */ if (tln != NULL) - uma_zfree(tcp_log_node_zone, tln); + uma_zfree(tcp_log_id_node_zone, tln); tln = tp->t_lin; tlb = NULL; bucket_locked = false; @@ -702,7 +702,8 @@ restart: if (*id) { /* Get a new tln, if we don't already have one to reuse. */ if (tln == NULL) { - tln = uma_zalloc(tcp_log_node_zone, M_NOWAIT | M_ZERO); + tln = uma_zalloc(tcp_log_id_node_zone, + M_NOWAIT | M_ZERO); if (tln == NULL) { rv = ENOBUFS; goto done; @@ -756,7 +757,7 @@ refind: /* If we need to add a new bucket, do it now. */ if (tmp_tlb == NULL) { /* Allocate new bucket. */ - tlb = uma_zalloc(tcp_log_bucket_zone, M_NOWAIT); + tlb = uma_zalloc(tcp_log_id_bucket_zone, M_NOWAIT); if (tlb == NULL) { rv = ENOBUFS; goto done_noinp; @@ -803,7 +804,7 @@ refind: #define FREE_NEW_TLB() do { \ TCPID_BUCKET_LOCK_DESTROY(tlb); \ - uma_zfree(tcp_log_bucket_zone, tlb); \ + uma_zfree(tcp_log_id_bucket_zone, tlb); \ counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1); \ counter_u64_add(tcp_log_pcb_ids_tot, (int64_t)-1); \ bucket_locked = false; \ @@ -906,7 +907,7 @@ done_noinp: } else TCPID_TREE_UNLOCK_ASSERT(); if (tln != NULL) - uma_zfree(tcp_log_node_zone, tln); + uma_zfree(tcp_log_id_node_zone, tln); return (rv); } @@ -1146,10 +1147,10 @@ tcp_log_init(void) #endif NULL, UMA_ALIGN_PTR, 0); (void)uma_zone_set_max(tcp_log_zone, TCP_LOG_BUF_DEFAULT_GLOBAL_LIMIT); - tcp_log_bucket_zone = uma_zcreate("tcp_log_bucket", + tcp_log_id_bucket_zone = uma_zcreate("tcp_log_id_bucket", sizeof(struct tcp_log_id_bucket), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - tcp_log_node_zone = uma_zcreate("tcp_log_node", + tcp_log_id_node_zone = uma_zcreate("tcp_log_id_node", sizeof(struct tcp_log_id_node), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); #ifdef TCPLOG_DEBUG_COUNTERS @@ -1258,7 +1259,7 @@ tcp_log_expire(void *unused __unused) tcp_log_free_entries(&tln->tln_entries, &tln->tln_count); /* Free the node. */ - uma_zfree(tcp_log_node_zone, tln); + uma_zfree(tcp_log_id_node_zone, tln); /* Relock the expiry queue. */ TCPLOG_EXPIREQ_LOCK(); @@ -2441,7 +2442,7 @@ no_inp: */ tcp_log_free_entries(&cur_tln->tln_entries, &cur_tln->tln_count); - uma_zfree(tcp_log_node_zone, cur_tln); + uma_zfree(tcp_log_id_node_zone, cur_tln); goto done; } @@ -2463,7 +2464,7 @@ no_inp: } /* No matter what, we are done with the node now. */ - uma_zfree(tcp_log_node_zone, cur_tln); + uma_zfree(tcp_log_id_node_zone, cur_tln); /* * Because we removed this entry from the list, prev_tln diff --git a/usr.sbin/prometheus_sysctl_exporter/prometheus_sysctl_exporter.c b/usr.sbin/prometheus_sysctl_exporter/prometheus_sysctl_exporter.c index 9d50c3e58750..e452e305d2bf 100644 --- a/usr.sbin/prometheus_sysctl_exporter/prometheus_sysctl_exporter.c +++ b/usr.sbin/prometheus_sysctl_exporter/prometheus_sysctl_exporter.c @@ -498,6 +498,7 @@ oid_print(const struct oid *o, struct oidname *on, bool print_description, struct oidvalue ov; struct oiddescription od; char metric[BUFSIZ]; + bool has_desc; if (!oid_get_format(o, &of) || !oid_get_value(o, &of, &ov)) return; @@ -511,14 +512,20 @@ oid_print(const struct oid *o, struct oidname *on, bool print_description, if (include && regexec(&inc_regex, metric, 0, NULL, 0) != 0) return; + has_desc = oid_get_description(o, &od); + /* + * Skip metrics with "(LEGACY)" in the name. It's used by several + * redundant ZFS sysctls whose names alias with the non-legacy versions. + */ + if (has_desc && strnstr(od.description, "(LEGACY)", BUFSIZ) != NULL) + return; /* * Print the line with the description. Prometheus expects a * single unique description for every metric, which cannot be * guaranteed by sysctl if labels are present. Omit the * description if labels are present. */ - if (print_description && !oidname_has_labels(on) && - oid_get_description(o, &od)) { + if (print_description && !oidname_has_labels(on) && has_desc) { fprintf(fp, "# HELP "); fprintf(fp, "%s", metric); fputc(' ', fp);