git: 20a51e6073f4 - main - bhyve: Implement the libslirp notify callback

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Tue, 07 Jan 2025 14:34:56 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=20a51e6073f488440e108c7c628231cd6ae6757e

commit 20a51e6073f488440e108c7c628231cd6ae6757e
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2025-01-07 14:33:45 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2025-01-07 14:33:45 +0000

    bhyve: Implement the libslirp notify callback
    
    libslirp can invoke a callback when received data is removed from a
    socket buffer, generally because the guest ACKed some data.  Previously
    it didn't do anything, but it needs to wake up the poll thread to get
    reasonable throughput.
    
    Suppose one is using scp to copy data into a guest filesystem via the
    slirp backend.  Data is received on libslirp's socket, which we poll for
    data in slirp_pollfd_td_loop().  That data gets buffered in priv->pipe,
    and eventually is placed in the device model's RX rings by the backend's
    mevent handler.  When implementing TCP, libslirp holds on to a copy of
    data until it's ACKed by the guest via slirp_send(), at which point it
    drops that data and invokes the notify callback.
    
    The initial implementation of this backend didn't take into account the
    fact that slirp_pollfds_fill() will not add libslirp's socket to the
    pollfd set if more than a threshold amount of data is already buffered.
    Then poll() needs to time out before the backend sends more data to the
    guest.  With a default timeout of 500ms, this kills throughput.
    
    Use a pipe to implement a simple in-band signal to the poll thread so
    that it reacts quickly when more buffer space becomes available.
    
    MFC after:      1 month
    Differential Revision:  https://reviews.freebsd.org/D48192
---
 usr.sbin/bhyve/net_backend_slirp.c | 90 ++++++++++++++++++++++++++++----------
 1 file changed, 68 insertions(+), 22 deletions(-)

diff --git a/usr.sbin/bhyve/net_backend_slirp.c b/usr.sbin/bhyve/net_backend_slirp.c
index d070d2cdfdb6..171c5b5bdbbd 100644
--- a/usr.sbin/bhyve/net_backend_slirp.c
+++ b/usr.sbin/bhyve/net_backend_slirp.c
@@ -84,6 +84,18 @@ static slirp_new_p_t slirp_new_p;
 static slirp_pollfds_fill_p_t slirp_pollfds_fill_p;
 static slirp_pollfds_poll_p_t slirp_pollfds_poll_p;
 
+static void
+checked_close(int *fdp)
+{
+	int error;
+
+	if (*fdp != -1) {
+		error = close(*fdp);
+		assert(error == 0);
+		*fdp = -1;
+	}
+}
+
 static int
 slirp_init_once(void)
 {
@@ -134,7 +146,8 @@ struct slirp_priv {
 
 #define	SLIRP_MTU	2048
 	struct mevent *mevp;
-	int pipe[2];
+	int pipe[2];		/* used to buffer data sent to the guest */
+	int wakeup[2];		/* used to wake up the pollfd thread */
 
 	pthread_t pollfd_td;
 	struct pollfd *pollfds;
@@ -151,6 +164,7 @@ slirp_priv_init(struct slirp_priv *priv)
 
 	memset(priv, 0, sizeof(*priv));
 	priv->pipe[0] = priv->pipe[1] = -1;
+	priv->wakeup[0] = priv->wakeup[1] = -1;
 	error = pthread_mutex_init(&priv->mtx, NULL);
 	assert(error == 0);
 }
@@ -160,14 +174,10 @@ slirp_priv_cleanup(struct slirp_priv *priv)
 {
 	int error;
 
-	if (priv->pipe[0] != -1) {
-		error = close(priv->pipe[0]);
-		assert(error == 0);
-	}
-	if (priv->pipe[1] != -1) {
-		error = close(priv->pipe[1]);
-		assert(error == 0);
-	}
+	checked_close(&priv->pipe[0]);
+	checked_close(&priv->pipe[1]);
+	checked_close(&priv->wakeup[0]);
+	checked_close(&priv->wakeup[1]);
 	if (priv->mevp)
 		mevent_delete(priv->mevp);
 	if (priv->slirp != NULL)
@@ -188,8 +198,13 @@ slirp_cb_clock_get_ns(void *param __unused)
 }
 
 static void
-slirp_cb_notify(void *param __unused)
+slirp_cb_notify(void *param)
 {
+	struct slirp_priv *priv;
+
+	/* Wake up the poll thread.  We assume that priv->mtx is held here. */
+	priv = param;
+	(void)write(priv->wakeup[1], "M", 1);
 }
 
 static void
@@ -310,11 +325,19 @@ slirp_poll_revents(int idx, void *param)
 {
 	struct slirp_priv *priv;
 	struct pollfd *pollfd;
+	short revents;
 
 	priv = param;
+	assert(idx >= 0);
+	assert((unsigned int)idx < priv->npollfds);
 	pollfd = &priv->pollfds[idx];
 	assert(pollfd->fd != -1);
-	return (pollev2slirpev(pollfd->revents));
+
+	/* The kernel may report POLLHUP even if we didn't ask for it. */
+	revents = pollfd->revents;
+	if ((pollfd->events & POLLHUP) == 0)
+		revents &= ~POLLHUP;
+	return (pollev2slirpev(revents));
 }
 
 static void *
@@ -331,9 +354,14 @@ slirp_pollfd_td_loop(void *param)
 
 	pthread_mutex_lock(&priv->mtx);
 	for (;;) {
+		int wakeup;
+
 		for (size_t i = 0; i < priv->npollfds; i++)
 			priv->pollfds[i].fd = -1;
 
+		/* Register for notifications from slirp_cb_notify(). */
+		wakeup = slirp_addpoll_cb(priv->wakeup[0], POLLIN, priv);
+
 		timeout = UINT32_MAX;
 		slirp_pollfds_fill_p(priv->slirp, &timeout, slirp_addpoll_cb,
 		    priv);
@@ -341,20 +369,32 @@ slirp_pollfd_td_loop(void *param)
 		pollfds = priv->pollfds;
 		npollfds = priv->npollfds;
 		pthread_mutex_unlock(&priv->mtx);
-		for (;;) {
-			error = poll(pollfds, npollfds, timeout);
-			if (error == -1) {
-				if (errno != EINTR) {
-					EPRINTLN("poll: %s", strerror(errno));
-					exit(1);
-				}
-				continue;
-			}
-			break;
+		error = poll(pollfds, npollfds, timeout);
+		if (error == -1 && errno != EINTR) {
+			EPRINTLN("poll: %s", strerror(errno));
+			exit(1);
 		}
 		pthread_mutex_lock(&priv->mtx);
 		slirp_pollfds_poll_p(priv->slirp, error == -1,
 		    slirp_poll_revents, priv);
+
+		/*
+		 * If we were woken up by the notify callback, mask the
+		 * interrupt.
+		 */
+		if ((pollfds[wakeup].revents & POLLIN) != 0) {
+			ssize_t n;
+
+			do {
+				uint8_t b;
+
+				n = read(priv->wakeup[0], &b, 1);
+			} while (n == 1);
+			if (n != -1 || errno != EAGAIN) {
+				EPRINTLN("read(wakeup): %s", strerror(errno));
+				exit(1);
+			}
+		}
 	}
 }
 
@@ -510,12 +550,18 @@ _slirp_init(struct net_backend *be, const char *devname __unused,
 		free(tofree);
 	}
 
-	error = socketpair(PF_LOCAL, SOCK_DGRAM, 0, priv->pipe);
+	error = socketpair(PF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0, priv->pipe);
 	if (error != 0) {
 		EPRINTLN("Unable to create pipe: %s", strerror(errno));
 		goto err;
 	}
 
+	error = pipe2(priv->wakeup, O_CLOEXEC | O_NONBLOCK);
+	if (error != 0) {
+		EPRINTLN("Unable to create wakeup pipe: %s", strerror(errno));
+		goto err;
+	}
+
 	/*
 	 * Try to avoid dropping buffered packets in slirp_cb_send_packet().
 	 */