Re: git: 7672cbef2c1e - main - pipes: reserve configured percentage of buffers zone to superuser

From: Mark Johnston <markj_at_freebsd.org>
Date: Fri, 20 Sep 2024 12:43:55 UTC
On Fri, Sep 20, 2024 at 06:46:56AM +0000, Konstantin Belousov wrote:
> The branch main has been updated by kib:
> 
> URL: https://cgit.FreeBSD.org/src/commit/?id=7672cbef2c1e1267e42bb3aad6a6da9380f4347f
> 
> commit 7672cbef2c1e1267e42bb3aad6a6da9380f4347f
> Author:     Konstantin Belousov <kib@FreeBSD.org>
> AuthorDate: 2024-09-15 06:57:34 +0000
> Commit:     Konstantin Belousov <kib@FreeBSD.org>
> CommitDate: 2024-09-20 06:46:07 +0000
> 
>     pipes: reserve configured percentage of buffers zone to superuser
>     
>     Sponsored by:   The FreeBSD Foundation
>     MFC after:      1 week
>     Differential revision:  https://reviews.freebsd.org/D46619
> ---
>  sys/kern/sys_pipe.c | 23 +++++++++++++++++++++--
>  1 file changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
> index 7ee2b5c76da3..68b57708d653 100644
> --- a/sys/kern/sys_pipe.c
> +++ b/sys/kern/sys_pipe.c
> @@ -103,6 +103,7 @@
>  #include <sys/stat.h>
>  #include <sys/malloc.h>
>  #include <sys/poll.h>
> +#include <sys/priv.h>
>  #include <sys/selinfo.h>
>  #include <sys/signalvar.h>
>  #include <sys/syscallsubr.h>
> @@ -206,6 +207,7 @@ static int pipeallocfail;
>  static int piperesizefail;
>  static int piperesizeallowed = 1;
>  static long pipe_mindirect = PIPE_MINDIRECT;
> +static int pipebuf_reserv = 2;
>  
>  SYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
>  	   &maxpipekva, 0, "Pipe KVA limit");
> @@ -219,6 +221,9 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD,
>  	  &piperesizefail, 0, "Pipe resize failures");
>  SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW,
>  	  &piperesizeallowed, 0, "Pipe resizing allowed");
> +SYSCTL_INT(_kern_ipc, OID_AUTO, pipebuf_reserv, CTLFLAG_RW,
> +    &pipebuf_reserv, 0,
> +    "Superuser-reserved percentage of the pipe buffers space");
>  
>  static void pipeinit(void *dummy __unused);
>  static void pipeclose(struct pipe *cpipe);
> @@ -586,8 +591,22 @@ retry:
>  		return (ENOMEM);
>  	}
>  
> -	error = vm_map_find(pipe_map, NULL, 0, (vm_offset_t *)&buffer, size, 0,
> -	    VMFS_ANY_SPACE, VM_PROT_RW, VM_PROT_RW, 0);
> +	vm_map_lock(pipe_map);
> +	if (priv_check(curthread, PRIV_PIPEBUF) != 0 &&
> +	    (vm_map_max(pipe_map) - vm_map_min(pipe_map)) *
> +	    (100 - pipebuf_reserv) / 100 < pipe_map->size + size) {
> +		vm_map_unlock(pipe_map);
> +		if (cpipe->pipe_buffer.buffer == NULL &&
> +		    size > SMALL_PIPE_SIZE) {
> +			size = SMALL_PIPE_SIZE;
> +			pipefragretry++;
> +			goto retry;
> +		}

Don't we need a chgpipecnt() call here too?  It looks like the previous
increment is leaked.

> +		return (ENOMEM);
> +	}
> +	error = vm_map_find_locked(pipe_map, NULL, 0, (vm_offset_t *)&buffer,
> +	    size, 0, VMFS_ANY_SPACE, VM_PROT_RW, VM_PROT_RW, 0);
> +	vm_map_unlock(pipe_map);
>  	if (error != KERN_SUCCESS) {
>  		chgpipecnt(cpipe->pipe_pair->pp_owner->cr_ruidinfo, -size, 0);
>  		if (cpipe->pipe_buffer.buffer == NULL &&