git: 8935a3993219 - main - daemon: use kqueue for all events
Date: Fri, 14 Apr 2023 05:13:54 UTC
The branch main has been updated by kevans: URL: https://cgit.FreeBSD.org/src/commit/?id=8935a3993219be76c7ea03e9ad4509657d08af6c commit 8935a3993219be76c7ea03e9ad4509657d08af6c Author: Ihor Antonov <ihor@antonovs.family> AuthorDate: 2023-04-14 05:10:29 +0000 Commit: Kyle Evans <kevans@FreeBSD.org> CommitDate: 2023-04-14 05:12:21 +0000 daemon: use kqueue for all events Refactor daemon to use kqueue/kevent instead of signals. This changes allows to simplify the code in several ways: - the execution flow is now linear, no async events. - several variables became redundant and got removed. - all event handling is now concentrated inside of the event loop, which makes code reading and comprehension easier. - new kqueuex(2) call is used for CLOEXEC, but maintained closing the kq fd prior to execve() to ease later MFC No UX/API changes are intended. Reviewed by: kevans Pull Request: https://github.com/freebsd/freebsd-src/pull/701 --- usr.sbin/daemon/daemon.c | 508 ++++++++++++++++++++++------------------------- 1 file changed, 239 insertions(+), 269 deletions(-) diff --git a/usr.sbin/daemon/daemon.c b/usr.sbin/daemon/daemon.c index 790b0f6fcd0f..89adde8d5e83 100644 --- a/usr.sbin/daemon/daemon.c +++ b/usr.sbin/daemon/daemon.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> +#include <sys/event.h> #include <sys/mman.h> #include <sys/wait.h> @@ -59,11 +60,14 @@ __FBSDID("$FreeBSD$"); #define LBUF_SIZE 4096 +enum daemon_mode { + MODE_DAEMON = 0, /* simply daemonize, no supervision */ + MODE_SUPERVISE, /* initial supervision state */ + MODE_TERMINATING, /* user requested termination */ + MODE_NOCHILD, /* child is terminated, final state of the event loop */ +}; + struct daemon_state { - sigset_t mask_orig; - sigset_t mask_read; - sigset_t mask_term; - sigset_t mask_susp; int pipe_fd[2]; char **argv; const char *child_pidfile; @@ -74,6 +78,8 @@ struct daemon_state { const char *user; struct pidfh *parent_pidfh; struct pidfh *child_pidfh; + enum daemon_mode mode; + int pid; int keep_cur_workdir; int restart_delay; int stdmask; @@ -81,33 +87,25 @@ struct daemon_state { int syslog_facility; int keep_fds_open; int output_fd; - bool supervision_enabled; - bool child_eof; bool restart_enabled; bool syslog_enabled; bool log_reopen; }; -static void setup_signals(struct daemon_state *); static void restrict_process(const char *); -static void handle_term(int); -static void handle_chld(int); -static void handle_hup(int); static int open_log(const char *); static void reopen_log(struct daemon_state *); static bool listen_child(int, struct daemon_state *); static int get_log_mapping(const char *, const CODE *); static void open_pid_files(struct daemon_state *); static void do_output(const unsigned char *, size_t, struct daemon_state *); -static void daemon_sleep(time_t, long); +static void daemon_sleep(struct daemon_state *); static void daemon_state_init(struct daemon_state *); static void daemon_eventloop(struct daemon_state *); static void daemon_terminate(struct daemon_state *); - -static volatile sig_atomic_t terminate = 0; -static volatile sig_atomic_t child_gone = 0; -static volatile sig_atomic_t pid = 0; -static volatile sig_atomic_t do_log_reopen = 0; +static void daemon_exec(struct daemon_state *); +static bool daemon_is_child_dead(struct daemon_state *); +static void daemon_set_child_pipe(struct daemon_state *); static const char shortopts[] = "+cfHSp:P:ru:o:s:l:t:m:R:T:h"; @@ -172,6 +170,10 @@ main(int argc, char *argv[]) daemon_state_init(&state); + /* Signals are processed via kqueue */ + signal(SIGHUP, SIG_IGN); + signal(SIGTERM, SIG_IGN); + /* * Supervision mode is enabled if one of the following options are used: * --child-pidfile -p @@ -207,7 +209,7 @@ main(int argc, char *argv[]) errx(5, "unrecognized syslog facility"); } state.syslog_enabled = true; - state.supervision_enabled = true; + state.mode = MODE_SUPERVISE; break; case 'm': state.stdmask = strtol(optarg, &p, 10); @@ -223,19 +225,19 @@ main(int argc, char *argv[]) * daemon could open the specified file and set it's * descriptor as both stderr and stout before execve() */ - state.supervision_enabled = true; + state.mode = MODE_SUPERVISE; break; case 'p': state.child_pidfile = optarg; - state.supervision_enabled = true; + state.mode = MODE_SUPERVISE; break; case 'P': state.parent_pidfile = optarg; - state.supervision_enabled = true; + state.mode = MODE_SUPERVISE; break; case 'r': state.restart_enabled = true; - state.supervision_enabled = true; + state.mode = MODE_SUPERVISE; break; case 'R': state.restart_enabled = true; @@ -251,11 +253,11 @@ main(int argc, char *argv[]) errx(4, "unrecognized syslog priority"); } state.syslog_enabled = true; - state.supervision_enabled = true; + state.mode = MODE_SUPERVISE; break; case 'S': state.syslog_enabled = true; - state.supervision_enabled = true; + state.mode = MODE_SUPERVISE; break; case 't': state.title = optarg; @@ -263,7 +265,7 @@ main(int argc, char *argv[]) case 'T': state.syslog_tag = optarg; state.syslog_enabled = true; - state.supervision_enabled = true; + state.mode = MODE_SUPERVISE; break; case 'u': state.user = optarg; @@ -304,246 +306,212 @@ main(int argc, char *argv[]) * to be able to report the error intelligently */ open_pid_files(&state); + + /* + * TODO: add feature to avoid backgrounding + * i.e. --foreground, -f + */ if (daemon(state.keep_cur_workdir, state.keep_fds_open) == -1) { warn("daemon"); daemon_terminate(&state); } - /* Write out parent pidfile if needed. */ - pidfile_write(state.parent_pidfh); - if (state.supervision_enabled) { - /* Block SIGTERM to avoid racing until the child is spawned. */ - if (sigprocmask(SIG_BLOCK, &state.mask_term, &state.mask_orig)) { - warn("sigprocmask"); - daemon_terminate(&state); - } + if (state.mode == MODE_DAEMON) { + daemon_exec(&state); + } - setup_signals(&state); + /* Write out parent pidfile if needed. */ + pidfile_write(state.parent_pidfh); - /* - * Try to protect against pageout kill. Ignore the - * error, madvise(2) will fail only if a process does - * not have superuser privileges. - */ - (void)madvise(NULL, 0, MADV_PROTECT); - } do { + state.mode = MODE_SUPERVISE; daemon_eventloop(&state); - close(state.pipe_fd[0]); - state.pipe_fd[0] = -1; - } while (state.restart_enabled && !terminate); + daemon_sleep(&state); + } while (state.restart_enabled); daemon_terminate(&state); } +static void +daemon_exec(struct daemon_state *state) +{ + pidfile_write(state->child_pidfh); -/* - * Main event loop: fork the child and watch for events. - * In legacy mode simply execve into the target process. - * - * Signal handling logic: - * - * - SIGTERM is masked while there is no child. - * - * - SIGCHLD is masked while reading from the pipe. SIGTERM has to be - * caught, to avoid indefinite blocking on read(). - * - * - Both SIGCHLD and SIGTERM are masked before calling sigsuspend() - * to avoid racing. - * - * - After SIGTERM is recieved and propagated to the child there are - * several options on what to do next: - * - read until EOF - * - read until EOF but only for a while - * - bail immediately - * Currently the third option is used, because otherwise there is no - * guarantee that read() won't block indefinitely if the child refuses - * to depart. To handle the second option, a different approach - * would be needed (procctl()?). - * - * - Child's exit might be detected by receiveing EOF from the pipe. - * But the child might have closed its stdout and stderr, so deamon - * must wait for the SIGCHLD to ensure that the child is actually gone. + if (state->user != NULL) { + restrict_process(state->user); + } + + /* Ignored signals remain ignored after execve, unignore them */ + signal(SIGHUP, SIG_DFL); + signal(SIGTERM, SIG_DFL); + execvp(state->argv[0], state->argv); + /* execvp() failed - report error and exit this process */ + err(1, "%s", state->argv[0]); +} + +/* Main event loop: fork the child and watch for events. + * After SIGTERM is recieved and propagated to the child there are + * several options on what to do next: + * - read until EOF + * - read until EOF but only for a while + * - bail immediately + * Currently the third option is used, because otherwise there is no + * guarantee that read() won't block indefinitely if the child refuses + * to depart. To handle the second option, a different approach + * would be needed (procctl()?). */ static void daemon_eventloop(struct daemon_state *state) { - if (state->supervision_enabled) { - if (pipe(state->pipe_fd)) { - err(1, "pipe"); - } - /* - * Spawn a child to exec the command. - */ - child_gone = 0; - pid = fork(); + struct kevent event; + int kq; + int ret; + + /* + * Try to protect against pageout kill. Ignore the + * error, madvise(2) will fail only if a process does + * not have superuser privileges. + */ + (void)madvise(NULL, 0, MADV_PROTECT); + + if (pipe(state->pipe_fd)) { + err(1, "pipe"); } - /* fork failed, this can only happen when supervision is enabled */ - if (pid == -1) { - warn("fork"); - daemon_terminate(state); + kq = kqueuex(KQUEUE_CLOEXEC); + EV_SET(&event, state->pipe_fd[0], EVFILT_READ, EV_ADD|EV_CLEAR, 0, 0, + NULL); + if (kevent(kq, &event, 1, NULL, 0, NULL) == -1) { + err(EXIT_FAILURE, "failed to register kevent"); } - /* fork succeeded, this is child's branch or supervision is disabled */ - if (pid == 0) { - pidfile_write(state->child_pidfh); + EV_SET(&event, SIGHUP, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); + if (kevent(kq, &event, 1, NULL, 0, NULL) == -1) { + err(EXIT_FAILURE, "failed to register kevent"); + } - if (state->user != NULL) { - restrict_process(state->user); - } - /* - * In supervision mode, the child gets the original sigmask, - * and dup'd pipes. - */ - if (state->supervision_enabled) { - close(state->pipe_fd[0]); - if (sigprocmask(SIG_SETMASK, &state->mask_orig, NULL)) { - err(1, "sigprogmask"); - } - if (state->stdmask & STDERR_FILENO) { - if (dup2(state->pipe_fd[1], STDERR_FILENO) == -1) { - err(1, "dup2"); - } - } - if (state->stdmask & STDOUT_FILENO) { - if (dup2(state->pipe_fd[1], STDOUT_FILENO) == -1) { - err(1, "dup2"); - } - } - if (state->pipe_fd[1] != STDERR_FILENO && - state->pipe_fd[1] != STDOUT_FILENO) { - close(state->pipe_fd[1]); - } - } - execvp(state->argv[0], state->argv); - /* execvp() failed - report error and exit this process */ - err(1, "%s", state->argv[0]); + EV_SET(&event, SIGTERM, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); + if (kevent(kq, &event, 1, NULL, 0, NULL) == -1) { + err(EXIT_FAILURE, "failed to register kevent"); } - /* - * else: pid > 0 - * fork succeeded, this is the parent branch, this can only happen when - * supervision is enabled. - * - * Unblock SIGTERM - now there is a valid child PID to signal to. - */ - if (sigprocmask(SIG_UNBLOCK, &state->mask_term, NULL)) { - warn("sigprocmask"); - daemon_terminate(state); + EV_SET(&event, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL); + if (kevent(kq, &event, 1, NULL, 0, NULL) == -1) { + err(EXIT_FAILURE, "failed to register kevent"); } - close(state->pipe_fd[1]); - state->pipe_fd[1] = -1; + memset(&event, 0, sizeof(struct kevent)); - setproctitle("%s[%d]", state->title, (int)pid); - for (;;) { - if (child_gone && state->child_eof) { - break; - } + /* Spawn a child to exec the command. */ + state->pid = fork(); - if (terminate) { - daemon_terminate(state); - } + /* fork failed, this can only happen when supervision is enabled */ + switch (state->pid) { + case -1: + warn("fork"); + state->mode = MODE_NOCHILD; + return; + /* fork succeeded, this is child's branch */ + case 0: + close(kq); + daemon_set_child_pipe(state); + daemon_exec(state); + break; + } - if (state->child_eof) { - if (sigprocmask(SIG_BLOCK, &state->mask_susp, NULL)) { - warn("sigprocmask"); - daemon_terminate(state); - } - while (!terminate && !child_gone) { - sigsuspend(&state->mask_orig); - } - if (sigprocmask(SIG_UNBLOCK, &state->mask_susp, NULL)) { - warn("sigprocmask"); - daemon_terminate(state); - } + /* case: pid > 0; fork succeeded */ + close(state->pipe_fd[1]); + state->pipe_fd[1] = -1; + setproctitle("%s[%d]", state->title, (int)state->pid); + + while (state->mode != MODE_NOCHILD) { + ret = kevent(kq, NULL, 0, &event, 1, NULL); + switch (ret) { + case -1: + err(EXIT_FAILURE, "kevent wait"); + case 0: continue; } - if (sigprocmask(SIG_BLOCK, &state->mask_read, NULL)) { - warn("sigprocmask"); - daemon_terminate(state); + if (event.flags & EV_ERROR) { + errx(EXIT_FAILURE, "Event error: %s", + strerror(event.data)); } - state->child_eof = !listen_child(state->pipe_fd[0], state); + switch (event.filter) { + case EVFILT_SIGNAL: + + switch (event.ident) { + case SIGCHLD: + if (daemon_is_child_dead(state)) { + /* child is dead, read all until EOF */ + state->pid = -1; + state->mode = MODE_NOCHILD; + while (listen_child(state->pipe_fd[0], + state)) + ; + } + continue; + case SIGTERM: + if (state->mode != MODE_SUPERVISE) { + /* user is impatient */ + /* TODO: warn about repeated SIGTERM? */ + continue; + } - if (sigprocmask(SIG_UNBLOCK, &state->mask_read, NULL)) { - warn("sigprocmask"); - daemon_terminate(state); - } + state->mode = MODE_TERMINATING; + state->restart_enabled = false; + if (state->pid > 0) { + kill(state->pid, SIGTERM); + } + /* + * TODO set kevent timer to exit + * unconditionally after some time + */ + continue; + case SIGHUP: + if (state->log_reopen && state->output_fd >= 0) { + reopen_log(state); + } + continue; + } + break; - } + case EVFILT_READ: + /* + * detecting EOF is no longer necessary + * if child closes the pipe daemon will stop getting + * EVFILT_READ events + */ - /* - * At the end of the loop the the child is already gone. - * Block SIGTERM to avoid racing until the child is spawned. - */ - if (sigprocmask(SIG_BLOCK, &state->mask_term, NULL)) { - warn("sigprocmask"); - daemon_terminate(state); + if (event.data > 0) { + (void)listen_child(state->pipe_fd[0], state); + } + continue; + default: + continue; + } } - /* sleep before exiting mainloop if restart is enabled */ - if (state->restart_enabled && !terminate) { - daemon_sleep(state->restart_delay, 0); - } + close(kq); + close(state->pipe_fd[0]); + state->pipe_fd[0] = -1; } static void -daemon_sleep(time_t secs, long nsecs) +daemon_sleep(struct daemon_state *state) { - struct timespec ts = { secs, nsecs }; + struct timespec ts = { state->restart_delay, 0 }; - while (!terminate && nanosleep(&ts, &ts) == -1) { + if (!state->restart_enabled) { + return; + } + while (nanosleep(&ts, &ts) == -1) { if (errno != EINTR) { err(1, "nanosleep"); } } } -/* - * Setup SIGTERM, SIGCHLD and SIGHUP handlers. - * To avoid racing SIGCHLD with SIGTERM corresponding - * signal handlers mask the other signal. - */ -static void -setup_signals(struct daemon_state *state) -{ - struct sigaction act_term = { 0 }; - struct sigaction act_chld = { 0 }; - struct sigaction act_hup = { 0 }; - - /* Setup SIGTERM */ - act_term.sa_handler = handle_term; - sigemptyset(&act_term.sa_mask); - sigaddset(&act_term.sa_mask, SIGCHLD); - if (sigaction(SIGTERM, &act_term, NULL) == -1) { - warn("sigaction"); - daemon_terminate(state); - } - - /* Setup SIGCHLD */ - act_chld.sa_handler = handle_chld; - sigemptyset(&act_chld.sa_mask); - sigaddset(&act_chld.sa_mask, SIGTERM); - if (sigaction(SIGCHLD, &act_chld, NULL) == -1) { - warn("sigaction"); - daemon_terminate(state); - } - - /* Setup SIGHUP if configured */ - if (!state->log_reopen || state->output_fd < 0) { - return; - } - - act_hup.sa_handler = handle_hup; - sigemptyset(&act_hup.sa_mask); - if (sigaction(SIGHUP, &act_hup, NULL) == -1) { - warn("sigaction"); - daemon_terminate(state); - } -} - static void open_pid_files(struct daemon_state *state) { @@ -612,6 +580,8 @@ restrict_process(const char *user) * * Return value of false is assumed to mean EOF or error, and true indicates to * continue reading. + * + * TODO: simplify signature - state contains pipefd */ static bool listen_child(int fd, struct daemon_state *state) @@ -623,9 +593,6 @@ listen_child(int fd, struct daemon_state *state) assert(state != NULL); assert(bytes_read < LBUF_SIZE - 1); - if (do_log_reopen) { - reopen_log(state); - } rv = read(fd, buf + bytes_read, LBUF_SIZE - bytes_read - 1); if (rv > 0) { unsigned char *cp; @@ -697,42 +664,6 @@ do_output(const unsigned char *buf, size_t len, struct daemon_state *state) } } -/* - * We use the global PID acquired directly from fork. If there is no valid - * child pid, the handler should be blocked and/or child_gone == 1. - */ -static void -handle_term(int signo) -{ - if (pid > 0 && !child_gone) { - kill(pid, signo); - } - terminate = 1; -} - -static void -handle_chld(int signo __unused) -{ - - for (;;) { - int rv = waitpid(-1, NULL, WNOHANG); - if (pid == rv) { - child_gone = 1; - break; - } else if (rv == -1 && errno != EINTR) { - warn("waitpid"); - return; - } - } -} - -static void -handle_hup(int signo __unused) -{ - - do_log_reopen = 1; -} - static int open_log(const char *outfn) { @@ -745,7 +676,6 @@ reopen_log(struct daemon_state *state) { int outfd; - do_log_reopen = 0; outfd = open_log(state->output_filename); if (state->output_fd >= 0) { close(state->output_fd); @@ -765,9 +695,9 @@ daemon_state_init(struct daemon_state *state) .parent_pidfile = NULL, .title = NULL, .user = NULL, - .supervision_enabled = false, - .child_eof = false, + .mode = MODE_DAEMON, .restart_enabled = false, + .pid = 0, .keep_cur_workdir = 1, .restart_delay = 1, .stdmask = STDOUT_FILENO | STDERR_FILENO, @@ -780,25 +710,23 @@ daemon_state_init(struct daemon_state *state) .output_fd = -1, .output_filename = NULL, }; - - sigemptyset(&state->mask_susp); - sigemptyset(&state->mask_read); - sigemptyset(&state->mask_term); - sigemptyset(&state->mask_orig); - sigaddset(&state->mask_susp, SIGTERM); - sigaddset(&state->mask_susp, SIGCHLD); - sigaddset(&state->mask_term, SIGTERM); - sigaddset(&state->mask_read, SIGCHLD); - } static _Noreturn void daemon_terminate(struct daemon_state *state) { assert(state != NULL); - close(state->output_fd); - close(state->pipe_fd[0]); - close(state->pipe_fd[1]); + + if (state->output_fd >= 0) { + close(state->output_fd); + } + if (state->pipe_fd[0] >= 0) { + close(state->pipe_fd[0]); + } + + if (state->pipe_fd[1] >= 0) { + close(state->pipe_fd[1]); + } if (state->syslog_enabled) { closelog(); } @@ -812,3 +740,45 @@ daemon_terminate(struct daemon_state *state) */ exit(1); } + +/* + * Returns true if SIGCHILD came from state->pid + * This function could hang if SIGCHILD was emittied for a reason other than + * child dying (e.g., ptrace attach). + */ +static bool +daemon_is_child_dead(struct daemon_state *state) +{ + for (;;) { + int who = waitpid(-1, NULL, WNOHANG); + if (state->pid == who) { + return true; + } + if (who == -1 && errno != EINTR) { + warn("waitpid"); + return false; + } + } +} + +static void +daemon_set_child_pipe(struct daemon_state *state) +{ + if (state->stdmask & STDERR_FILENO) { + if (dup2(state->pipe_fd[1], STDERR_FILENO) == -1) { + err(1, "dup2"); + } + } + if (state->stdmask & STDOUT_FILENO) { + if (dup2(state->pipe_fd[1], STDOUT_FILENO) == -1) { + err(1, "dup2"); + } + } + if (state->pipe_fd[1] != STDERR_FILENO && + state->pipe_fd[1] != STDOUT_FILENO) { + close(state->pipe_fd[1]); + } + + /* The child gets dup'd pipes. */ + close(state->pipe_fd[0]); +}