Re: git: 7672cbef2c1e - main - pipes: reserve configured percentage of buffers zone to superuser
Date: Fri, 20 Sep 2024 12:43:55 UTC
On Fri, Sep 20, 2024 at 06:46:56AM +0000, Konstantin Belousov wrote: > The branch main has been updated by kib: > > URL: https://cgit.FreeBSD.org/src/commit/?id=7672cbef2c1e1267e42bb3aad6a6da9380f4347f > > commit 7672cbef2c1e1267e42bb3aad6a6da9380f4347f > Author: Konstantin Belousov <kib@FreeBSD.org> > AuthorDate: 2024-09-15 06:57:34 +0000 > Commit: Konstantin Belousov <kib@FreeBSD.org> > CommitDate: 2024-09-20 06:46:07 +0000 > > pipes: reserve configured percentage of buffers zone to superuser > > Sponsored by: The FreeBSD Foundation > MFC after: 1 week > Differential revision: https://reviews.freebsd.org/D46619 > --- > sys/kern/sys_pipe.c | 23 +++++++++++++++++++++-- > 1 file changed, 21 insertions(+), 2 deletions(-) > > diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c > index 7ee2b5c76da3..68b57708d653 100644 > --- a/sys/kern/sys_pipe.c > +++ b/sys/kern/sys_pipe.c > @@ -103,6 +103,7 @@ > #include <sys/stat.h> > #include <sys/malloc.h> > #include <sys/poll.h> > +#include <sys/priv.h> > #include <sys/selinfo.h> > #include <sys/signalvar.h> > #include <sys/syscallsubr.h> > @@ -206,6 +207,7 @@ static int pipeallocfail; > static int piperesizefail; > static int piperesizeallowed = 1; > static long pipe_mindirect = PIPE_MINDIRECT; > +static int pipebuf_reserv = 2; > > SYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, > &maxpipekva, 0, "Pipe KVA limit"); > @@ -219,6 +221,9 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD, > &piperesizefail, 0, "Pipe resize failures"); > SYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, > &piperesizeallowed, 0, "Pipe resizing allowed"); > +SYSCTL_INT(_kern_ipc, OID_AUTO, pipebuf_reserv, CTLFLAG_RW, > + &pipebuf_reserv, 0, > + "Superuser-reserved percentage of the pipe buffers space"); > > static void pipeinit(void *dummy __unused); > static void pipeclose(struct pipe *cpipe); > @@ -586,8 +591,22 @@ retry: > return (ENOMEM); > } > > - error = vm_map_find(pipe_map, NULL, 0, (vm_offset_t *)&buffer, size, 0, > - VMFS_ANY_SPACE, VM_PROT_RW, VM_PROT_RW, 0); > + vm_map_lock(pipe_map); > + if (priv_check(curthread, PRIV_PIPEBUF) != 0 && > + (vm_map_max(pipe_map) - vm_map_min(pipe_map)) * > + (100 - pipebuf_reserv) / 100 < pipe_map->size + size) { > + vm_map_unlock(pipe_map); > + if (cpipe->pipe_buffer.buffer == NULL && > + size > SMALL_PIPE_SIZE) { > + size = SMALL_PIPE_SIZE; > + pipefragretry++; > + goto retry; > + } Don't we need a chgpipecnt() call here too? It looks like the previous increment is leaked. > + return (ENOMEM); > + } > + error = vm_map_find_locked(pipe_map, NULL, 0, (vm_offset_t *)&buffer, > + size, 0, VMFS_ANY_SPACE, VM_PROT_RW, VM_PROT_RW, 0); > + vm_map_unlock(pipe_map); > if (error != KERN_SUCCESS) { > chgpipecnt(cpipe->pipe_pair->pp_owner->cr_ruidinfo, -size, 0); > if (cpipe->pipe_buffer.buffer == NULL &&