git: 20a51e6073f4 - main - bhyve: Implement the libslirp notify callback
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 07 Jan 2025 14:34:56 UTC
The branch main has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=20a51e6073f488440e108c7c628231cd6ae6757e commit 20a51e6073f488440e108c7c628231cd6ae6757e Author: Mark Johnston <markj@FreeBSD.org> AuthorDate: 2025-01-07 14:33:45 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2025-01-07 14:33:45 +0000 bhyve: Implement the libslirp notify callback libslirp can invoke a callback when received data is removed from a socket buffer, generally because the guest ACKed some data. Previously it didn't do anything, but it needs to wake up the poll thread to get reasonable throughput. Suppose one is using scp to copy data into a guest filesystem via the slirp backend. Data is received on libslirp's socket, which we poll for data in slirp_pollfd_td_loop(). That data gets buffered in priv->pipe, and eventually is placed in the device model's RX rings by the backend's mevent handler. When implementing TCP, libslirp holds on to a copy of data until it's ACKed by the guest via slirp_send(), at which point it drops that data and invokes the notify callback. The initial implementation of this backend didn't take into account the fact that slirp_pollfds_fill() will not add libslirp's socket to the pollfd set if more than a threshold amount of data is already buffered. Then poll() needs to time out before the backend sends more data to the guest. With a default timeout of 500ms, this kills throughput. Use a pipe to implement a simple in-band signal to the poll thread so that it reacts quickly when more buffer space becomes available. MFC after: 1 month Differential Revision: https://reviews.freebsd.org/D48192 --- usr.sbin/bhyve/net_backend_slirp.c | 90 ++++++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 22 deletions(-) diff --git a/usr.sbin/bhyve/net_backend_slirp.c b/usr.sbin/bhyve/net_backend_slirp.c index d070d2cdfdb6..171c5b5bdbbd 100644 --- a/usr.sbin/bhyve/net_backend_slirp.c +++ b/usr.sbin/bhyve/net_backend_slirp.c @@ -84,6 +84,18 @@ static slirp_new_p_t slirp_new_p; static slirp_pollfds_fill_p_t slirp_pollfds_fill_p; static slirp_pollfds_poll_p_t slirp_pollfds_poll_p; +static void +checked_close(int *fdp) +{ + int error; + + if (*fdp != -1) { + error = close(*fdp); + assert(error == 0); + *fdp = -1; + } +} + static int slirp_init_once(void) { @@ -134,7 +146,8 @@ struct slirp_priv { #define SLIRP_MTU 2048 struct mevent *mevp; - int pipe[2]; + int pipe[2]; /* used to buffer data sent to the guest */ + int wakeup[2]; /* used to wake up the pollfd thread */ pthread_t pollfd_td; struct pollfd *pollfds; @@ -151,6 +164,7 @@ slirp_priv_init(struct slirp_priv *priv) memset(priv, 0, sizeof(*priv)); priv->pipe[0] = priv->pipe[1] = -1; + priv->wakeup[0] = priv->wakeup[1] = -1; error = pthread_mutex_init(&priv->mtx, NULL); assert(error == 0); } @@ -160,14 +174,10 @@ slirp_priv_cleanup(struct slirp_priv *priv) { int error; - if (priv->pipe[0] != -1) { - error = close(priv->pipe[0]); - assert(error == 0); - } - if (priv->pipe[1] != -1) { - error = close(priv->pipe[1]); - assert(error == 0); - } + checked_close(&priv->pipe[0]); + checked_close(&priv->pipe[1]); + checked_close(&priv->wakeup[0]); + checked_close(&priv->wakeup[1]); if (priv->mevp) mevent_delete(priv->mevp); if (priv->slirp != NULL) @@ -188,8 +198,13 @@ slirp_cb_clock_get_ns(void *param __unused) } static void -slirp_cb_notify(void *param __unused) +slirp_cb_notify(void *param) { + struct slirp_priv *priv; + + /* Wake up the poll thread. We assume that priv->mtx is held here. */ + priv = param; + (void)write(priv->wakeup[1], "M", 1); } static void @@ -310,11 +325,19 @@ slirp_poll_revents(int idx, void *param) { struct slirp_priv *priv; struct pollfd *pollfd; + short revents; priv = param; + assert(idx >= 0); + assert((unsigned int)idx < priv->npollfds); pollfd = &priv->pollfds[idx]; assert(pollfd->fd != -1); - return (pollev2slirpev(pollfd->revents)); + + /* The kernel may report POLLHUP even if we didn't ask for it. */ + revents = pollfd->revents; + if ((pollfd->events & POLLHUP) == 0) + revents &= ~POLLHUP; + return (pollev2slirpev(revents)); } static void * @@ -331,9 +354,14 @@ slirp_pollfd_td_loop(void *param) pthread_mutex_lock(&priv->mtx); for (;;) { + int wakeup; + for (size_t i = 0; i < priv->npollfds; i++) priv->pollfds[i].fd = -1; + /* Register for notifications from slirp_cb_notify(). */ + wakeup = slirp_addpoll_cb(priv->wakeup[0], POLLIN, priv); + timeout = UINT32_MAX; slirp_pollfds_fill_p(priv->slirp, &timeout, slirp_addpoll_cb, priv); @@ -341,20 +369,32 @@ slirp_pollfd_td_loop(void *param) pollfds = priv->pollfds; npollfds = priv->npollfds; pthread_mutex_unlock(&priv->mtx); - for (;;) { - error = poll(pollfds, npollfds, timeout); - if (error == -1) { - if (errno != EINTR) { - EPRINTLN("poll: %s", strerror(errno)); - exit(1); - } - continue; - } - break; + error = poll(pollfds, npollfds, timeout); + if (error == -1 && errno != EINTR) { + EPRINTLN("poll: %s", strerror(errno)); + exit(1); } pthread_mutex_lock(&priv->mtx); slirp_pollfds_poll_p(priv->slirp, error == -1, slirp_poll_revents, priv); + + /* + * If we were woken up by the notify callback, mask the + * interrupt. + */ + if ((pollfds[wakeup].revents & POLLIN) != 0) { + ssize_t n; + + do { + uint8_t b; + + n = read(priv->wakeup[0], &b, 1); + } while (n == 1); + if (n != -1 || errno != EAGAIN) { + EPRINTLN("read(wakeup): %s", strerror(errno)); + exit(1); + } + } } } @@ -510,12 +550,18 @@ _slirp_init(struct net_backend *be, const char *devname __unused, free(tofree); } - error = socketpair(PF_LOCAL, SOCK_DGRAM, 0, priv->pipe); + error = socketpair(PF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0, priv->pipe); if (error != 0) { EPRINTLN("Unable to create pipe: %s", strerror(errno)); goto err; } + error = pipe2(priv->wakeup, O_CLOEXEC | O_NONBLOCK); + if (error != 0) { + EPRINTLN("Unable to create wakeup pipe: %s", strerror(errno)); + goto err; + } + /* * Try to avoid dropping buffered packets in slirp_cb_send_packet(). */