Re: git: 1d2421ad8b6d - main - Correctly measure system load averages > 1024

From: Kubilay Kocak <koobs_at_FreeBSD.org>
Date: Sat, 07 May 2022 00:06:10 UTC
On 7/05/2022 10:04 am, Alan Somers wrote:
> The branch main has been updated by asomers:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=1d2421ad8b6d508ef155752bdfc5948f7373bac3
> 
> commit 1d2421ad8b6d508ef155752bdfc5948f7373bac3
> Author:     Alan Somers <asomers@FreeBSD.org>
> AuthorDate: 2022-05-05 21:35:23 +0000
> Commit:     Alan Somers <asomers@FreeBSD.org>
> CommitDate: 2022-05-06 23:25:43 +0000
> 
>      Correctly measure system load averages > 1024
>      
>      The old fixed-point arithmetic used for calculating load averages had an
>      overflow at 1024.  So on systems with extremely high load, the observed
>      load average would actually fall back to 0 and shoot up again, creating
>      a kind of sawtooth graph.
>      
>      Fix this by using 64-bit math internally, while still reporting the load
>      average to userspace as a 32-bit number.
>      
>      Sponsored by:   Axcient
>      Reviewed by:    imp
>      Differential Revision: https://reviews.freebsd.org/D35134

Can MFC?

> ---
>   sys/kern/kern_synch.c | 9 +++++----
>   sys/kern/tty_info.c   | 2 +-
>   sys/sys/param.h       | 8 ++++----
>   3 files changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
> index e78878987b57..381d6315044c 100644
> --- a/sys/kern/kern_synch.c
> +++ b/sys/kern/kern_synch.c
> @@ -87,7 +87,7 @@ struct loadavg averunnable =
>    * Constants for averages over 1, 5, and 15 minutes
>    * when sampling at 5 second intervals.
>    */
> -static fixpt_t cexp[3] = {
> +static uint64_t cexp[3] = {
>   	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
>   	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
>   	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
> @@ -611,14 +611,15 @@ setrunnable(struct thread *td, int srqflags)
>   static void
>   loadav(void *arg)
>   {
> -	int i, nrun;
> +	int i;
> +	uint64_t nrun;
>   	struct loadavg *avg;
>   
> -	nrun = sched_load();
> +	nrun = (uint64_t)sched_load();
>   	avg = &averunnable;
>   
>   	for (i = 0; i < 3; i++)
> -		avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
> +		avg->ldavg[i] = (cexp[i] * (uint64_t)avg->ldavg[i] +
>   		    nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
>   
>   	/*
> diff --git a/sys/kern/tty_info.c b/sys/kern/tty_info.c
> index 60675557e4ed..237aa47a18da 100644
> --- a/sys/kern/tty_info.c
> +++ b/sys/kern/tty_info.c
> @@ -302,7 +302,7 @@ tty_info(struct tty *tp)
>   	sbuf_set_drain(&sb, sbuf_tty_drain, tp);
>   
>   	/* Print load average. */
> -	load = (averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT;
> +	load = ((int64_t)averunnable.ldavg[0] * 100 + FSCALE / 2) >> FSHIFT;
>   	sbuf_printf(&sb, "%sload: %d.%02d ", tp->t_column == 0 ? "" : "\n",
>   	    load / 100, load % 100);
>   
> diff --git a/sys/sys/param.h b/sys/sys/param.h
> index 2d463b9ac7a2..b0b53f1a7776 100644
> --- a/sys/sys/param.h
> +++ b/sys/sys/param.h
> @@ -361,12 +361,12 @@ __END_DECLS
>    * Scale factor for scaled integers used to count %cpu time and load avgs.
>    *
>    * The number of CPU `tick's that map to a unique `%age' can be expressed
> - * by the formula (1 / (2 ^ (FSHIFT - 11))).  The maximum load average that
> - * can be calculated (assuming 32 bits) can be closely approximated using
> - * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15).
> + * by the formula (1 / (2 ^ (FSHIFT - 11))).  Since the intermediate
> + * calculation is done with 64-bit precision, the maximum load average that can
> + * be calculated is approximately 2^32 / FSCALE.
>    *
>    * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age',
> - * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024.
> + * FSHIFT must be at least 11.  This gives a maximum load avg of 2 million.
>    */
>   #define	FSHIFT	11		/* bits to right of fixed binary point */
>   #define FSCALE	(1<<FSHIFT)
>