git: bbe017e0415a - main - linux(4): Add a dedicated ioprio system calls
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 04 Aug 2023 13:04:58 UTC
The branch main has been updated by dchagin: URL: https://cgit.FreeBSD.org/src/commit/?id=bbe017e0415a60114036f2315605ff6881ed9c46 commit bbe017e0415a60114036f2315605ff6881ed9c46 Author: Dmitry Chagin <dchagin@FreeBSD.org> AuthorDate: 2023-08-04 13:03:57 +0000 Commit: Dmitry Chagin <dchagin@FreeBSD.org> CommitDate: 2023-08-04 13:03:57 +0000 linux(4): Add a dedicated ioprio system calls On Linux these system calls have an effect only when used in conjuction with an I/O scheduler that supports I/O priorities. If no I/O scheduler has been set for a thread, then by defaut the I/O priority will follow the CPU nice value. Due to FreeBSD lack of I/O scheduler facilities, the default Linux behavior is implemented. Ubuntu 23.04 debootstrap requires Linux ionice which depends on these syscalls. Differential Revision: https://reviews.freebsd.org/D41153 MFC after: 1 month --- sys/compat/linux/linux_dummy.c | 2 - sys/compat/linux/linux_misc.c | 270 +++++++++++++++++++++++++++++++++++++++++ sys/compat/linux/linux_misc.h | 24 ++++ 3 files changed, 294 insertions(+), 2 deletions(-) diff --git a/sys/compat/linux/linux_dummy.c b/sys/compat/linux/linux_dummy.c index 861482b67d8f..5556652b31a1 100644 --- a/sys/compat/linux/linux_dummy.c +++ b/sys/compat/linux/linux_dummy.c @@ -78,8 +78,6 @@ DUMMY(add_key); DUMMY(request_key); DUMMY(keyctl); /* Linux 2.6.13: */ -DUMMY(ioprio_set); -DUMMY(ioprio_get); DUMMY(inotify_add_watch); DUMMY(inotify_rm_watch); /* Linux 2.6.16: */ diff --git a/sys/compat/linux/linux_misc.c b/sys/compat/linux/linux_misc.c index 583cc25f1c43..0cea43001a0d 100644 --- a/sys/compat/linux/linux_misc.c +++ b/sys/compat/linux/linux_misc.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include <sys/reboot.h> #include <sys/random.h> #include <sys/resourcevar.h> +#include <sys/rtprio.h> #include <sys/sched.h> #include <sys/smp.h> #include <sys/stat.h> @@ -2653,3 +2654,272 @@ linux_execve(struct thread *td, struct linux_execve_args *args) AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } + +static void +linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) +{ + struct rtprio rtp2; + + pri_to_rtp(td1, &rtp2); + if (rtp2.type < rtp->type || + (rtp2.type == rtp->type && + rtp2.prio < rtp->prio)) { + rtp->type = rtp2.type; + rtp->prio = rtp2.prio; + } +} + +#define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX + +static int +linux_rtprio2ioprio(struct rtprio *rtp) +{ + int ioprio, prio; + + switch (rtp->type) { + case RTP_PRIO_IDLE: + prio = RTP_PRIO_MIN; + ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); + break; + case RTP_PRIO_NORMAL: + prio = rtp->prio / LINUX_PRIO_DIVIDER; + ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); + break; + case RTP_PRIO_REALTIME: + prio = rtp->prio / LINUX_PRIO_DIVIDER; + ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); + break; + default: + prio = RTP_PRIO_MIN; + ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); + break; + } + return (ioprio); +} + +static int +linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) +{ + + switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { + case LINUX_IOPRIO_CLASS_IDLE: + rtp->prio = RTP_PRIO_MIN; + rtp->type = RTP_PRIO_IDLE; + break; + case LINUX_IOPRIO_CLASS_BE: + rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; + rtp->type = RTP_PRIO_NORMAL; + break; + case LINUX_IOPRIO_CLASS_RT: + rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; + rtp->type = RTP_PRIO_REALTIME; + break; + default: + return (EINVAL); + } + return (0); +} +#undef LINUX_PRIO_DIVIDER + +int +linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) +{ + struct thread *td1; + struct rtprio rtp; + struct pgrp *pg; + struct proc *p; + int error, found; + + p = NULL; + td1 = NULL; + error = 0; + found = 0; + rtp.type = RTP_PRIO_IDLE; + rtp.prio = RTP_PRIO_MAX; + switch (args->which) { + case LINUX_IOPRIO_WHO_PROCESS: + if (args->who == 0) { + td1 = td; + p = td1->td_proc; + PROC_LOCK(p); + } else if (args->who > PID_MAX) { + td1 = linux_tdfind(td, args->who, -1); + if (td1 != NULL) + p = td1->td_proc; + } else + p = pfind(args->who); + if (p == NULL) + return (ESRCH); + if ((error = p_cansee(td, p))) { + PROC_UNLOCK(p); + break; + } + if (td1 != NULL) { + pri_to_rtp(td1, &rtp); + } else { + FOREACH_THREAD_IN_PROC(p, td1) { + linux_up_rtprio_if(td1, &rtp); + } + } + found++; + PROC_UNLOCK(p); + break; + case LINUX_IOPRIO_WHO_PGRP: + sx_slock(&proctree_lock); + if (args->who == 0) { + pg = td->td_proc->p_pgrp; + PGRP_LOCK(pg); + } else { + pg = pgfind(args->who); + if (pg == NULL) { + sx_sunlock(&proctree_lock); + error = ESRCH; + break; + } + } + sx_sunlock(&proctree_lock); + LIST_FOREACH(p, &pg->pg_members, p_pglist) { + PROC_LOCK(p); + if (p->p_state == PRS_NORMAL && + p_cansee(td, p) == 0) { + FOREACH_THREAD_IN_PROC(p, td1) { + linux_up_rtprio_if(td1, &rtp); + found++; + } + } + PROC_UNLOCK(p); + } + PGRP_UNLOCK(pg); + break; + case LINUX_IOPRIO_WHO_USER: + if (args->who == 0) + args->who = td->td_ucred->cr_uid; + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state == PRS_NORMAL && + p->p_ucred->cr_uid == args->who && + p_cansee(td, p) == 0) { + FOREACH_THREAD_IN_PROC(p, td1) { + linux_up_rtprio_if(td1, &rtp); + found++; + } + } + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + break; + default: + error = EINVAL; + break; + } + if (error == 0) { + if (found != 0) + td->td_retval[0] = linux_rtprio2ioprio(&rtp); + else + error = ESRCH; + } + return (error); +} + +int +linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) +{ + struct thread *td1; + struct rtprio rtp; + struct pgrp *pg; + struct proc *p; + int error; + + if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) + return (error); + /* Attempts to set high priorities (REALTIME) require su privileges. */ + if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && + (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) + return (error); + + p = NULL; + td1 = NULL; + switch (args->which) { + case LINUX_IOPRIO_WHO_PROCESS: + if (args->who == 0) { + td1 = td; + p = td1->td_proc; + PROC_LOCK(p); + } else if (args->who > PID_MAX) { + td1 = linux_tdfind(td, args->who, -1); + if (td1 != NULL) + p = td1->td_proc; + } else + p = pfind(args->who); + if (p == NULL) + return (ESRCH); + if ((error = p_cansched(td, p))) { + PROC_UNLOCK(p); + break; + } + if (td1 != NULL) { + error = rtp_to_pri(&rtp, td1); + } else { + FOREACH_THREAD_IN_PROC(p, td1) { + if ((error = rtp_to_pri(&rtp, td1)) != 0) + break; + } + } + PROC_UNLOCK(p); + break; + case LINUX_IOPRIO_WHO_PGRP: + sx_slock(&proctree_lock); + if (args->who == 0) { + pg = td->td_proc->p_pgrp; + PGRP_LOCK(pg); + } else { + pg = pgfind(args->who); + if (pg == NULL) { + sx_sunlock(&proctree_lock); + error = ESRCH; + break; + } + } + sx_sunlock(&proctree_lock); + LIST_FOREACH(p, &pg->pg_members, p_pglist) { + PROC_LOCK(p); + if (p->p_state == PRS_NORMAL && + p_cansched(td, p) == 0) { + FOREACH_THREAD_IN_PROC(p, td1) { + if ((error = rtp_to_pri(&rtp, td1)) != 0) + break; + } + } + PROC_UNLOCK(p); + if (error != 0) + break; + } + PGRP_UNLOCK(pg); + break; + case LINUX_IOPRIO_WHO_USER: + if (args->who == 0) + args->who = td->td_ucred->cr_uid; + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state == PRS_NORMAL && + p->p_ucred->cr_uid == args->who && + p_cansched(td, p) == 0) { + FOREACH_THREAD_IN_PROC(p, td1) { + if ((error = rtp_to_pri(&rtp, td1)) != 0) + break; + } + } + PROC_UNLOCK(p); + if (error != 0) + break; + } + sx_sunlock(&allproc_lock); + break; + default: + error = EINVAL; + break; + } + return (error); +} diff --git a/sys/compat/linux/linux_misc.h b/sys/compat/linux/linux_misc.h index 80f12ef13545..92cc7be636d2 100644 --- a/sys/compat/linux/linux_misc.h +++ b/sys/compat/linux/linux_misc.h @@ -191,4 +191,28 @@ struct syscall_info { }; }; +/* Linux ioprio set/get syscalls */ +#define LINUX_IOPRIO_CLASS_SHIFT 13 +#define LINUX_IOPRIO_CLASS_MASK 0x07 +#define LINUX_IOPRIO_PRIO_MASK ((1UL << LINUX_IOPRIO_CLASS_SHIFT) - 1) + +#define LINUX_IOPRIO_PRIO_CLASS(ioprio) \ + (((ioprio) >> LINUX_IOPRIO_CLASS_SHIFT) & LINUX_IOPRIO_CLASS_MASK) +#define LINUX_IOPRIO_PRIO_DATA(ioprio) ((ioprio) & LINUX_IOPRIO_PRIO_MASK) +#define LINUX_IOPRIO_PRIO(class, data) \ + ((((class) & LINUX_IOPRIO_CLASS_MASK) << LINUX_IOPRIO_CLASS_SHIFT) | \ + ((data) & LINUX_IOPRIO_PRIO_MASK)) + +#define LINUX_IOPRIO_CLASS_NONE 0 +#define LINUX_IOPRIO_CLASS_RT 1 +#define LINUX_IOPRIO_CLASS_BE 2 +#define LINUX_IOPRIO_CLASS_IDLE 3 + +#define LINUX_IOPRIO_MIN 0 +#define LINUX_IOPRIO_MAX 7 + +#define LINUX_IOPRIO_WHO_PROCESS 1 +#define LINUX_IOPRIO_WHO_PGRP 2 +#define LINUX_IOPRIO_WHO_USER 3 + #endif /* _LINUX_MISC_H_ */