git: dd49816b0d66 - main - bpf: avoid panic on multiple readers

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Wed, 09 Apr 2025 08:17:42 UTC
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=dd49816b0d66697ec80dac256c2973d881c39784

commit dd49816b0d66697ec80dac256c2973d881c39784
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2025-02-26 12:50:08 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2025-04-09 07:39:01 +0000

    bpf: avoid panic on multiple readers
    
    If we have multiple simultaneous readers on a single /dev/bpf fd it's possible
    for the assertion after the bpf_uiomove() in bpfread() to fail.
    
    Note that the bpf_uiomove() is done outside of the BPFD_LOCK, because uiomove
    may sleep. As a result it's possible for another thread to have already
    reclaimed toe bd_hbuf, thus causing us to fail the assertion.
    Even without INVARIANTS this may provoke panics.
    
    That results (with INVARIANTS) in a panic such as:
    
            login: panic: bpfread: lost bd_hbuf
            cpuid = 13
            time = 1740567635
            KDB: stack backtrace:
            db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame 0xfffffe003972db10
            vpanic() at vpanic+0x136/frame 0xfffffe003972dc40
            panic() at panic+0x43/frame 0xfffffe003972dca0
            bpfread() at bpfread+0x2e8/frame 0xfffffe003972dce0
            devfs_read_f() at devfs_read_f+0xe4/frame 0xfffffe003972dd40
            dofileread() at dofileread+0x80/frame 0xfffffe003972dd90
            sys_read() at sys_read+0xb7/frame 0xfffffe003972de00
            amd64_syscall() at amd64_syscall+0x15a/frame 0xfffffe003972df30
            fast_syscall_common() at fast_syscall_common+0xf8/frame 0xfffffe003972df30
            --- syscall (3, FreeBSD ELF64, read), rip = 0x302787166afa, rsp = 0x302782638a78, rbp = 0x302782638aa0 ---
    
    Also add a test case replicating the known trigger for this panic.
    
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    Differential Revision:  https://reviews.freebsd.org/D49135
---
 etc/mtree/BSD.tests.dist           |  2 +
 sys/net/bpf.c                      | 16 ++++----
 tests/sys/net/Makefile             |  1 +
 tests/sys/net/bpf/Makefile         | 15 ++++++++
 tests/sys/net/bpf/bpf.sh           | 67 +++++++++++++++++++++++++++++++++
 tests/sys/net/bpf/bpf_multi_read.c | 76 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 170 insertions(+), 7 deletions(-)

diff --git a/etc/mtree/BSD.tests.dist b/etc/mtree/BSD.tests.dist
index e0c16bd5e570..f197021bd4e9 100644
--- a/etc/mtree/BSD.tests.dist
+++ b/etc/mtree/BSD.tests.dist
@@ -876,6 +876,8 @@
         mqueue
         ..
         net
+            bpf
+                ..
             if_ovpn
                 ccd
                 ..
diff --git a/sys/net/bpf.c b/sys/net/bpf.c
index 263f241bcf5f..a347dbe2eb73 100644
--- a/sys/net/bpf.c
+++ b/sys/net/bpf.c
@@ -1110,13 +1110,15 @@ bpfread(struct cdev *dev, struct uio *uio, int ioflag)
 	error = bpf_uiomove(d, d->bd_hbuf, d->bd_hlen, uio);
 
 	BPFD_LOCK(d);
-	KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
-	d->bd_fbuf = d->bd_hbuf;
-	d->bd_hbuf = NULL;
-	d->bd_hlen = 0;
-	bpf_buf_reclaimed(d);
-	d->bd_hbuf_in_use = 0;
-	wakeup(&d->bd_hbuf_in_use);
+	if (d->bd_hbuf_in_use) {
+		KASSERT(d->bd_hbuf != NULL, ("bpfread: lost bd_hbuf"));
+		d->bd_fbuf = d->bd_hbuf;
+		d->bd_hbuf = NULL;
+		d->bd_hlen = 0;
+		bpf_buf_reclaimed(d);
+		d->bd_hbuf_in_use = 0;
+		wakeup(&d->bd_hbuf_in_use);
+	}
 	BPFD_UNLOCK(d);
 
 	return (error);
diff --git a/tests/sys/net/Makefile b/tests/sys/net/Makefile
index 95ab86156a0a..a76fca4e61fb 100644
--- a/tests/sys/net/Makefile
+++ b/tests/sys/net/Makefile
@@ -15,6 +15,7 @@ ATF_TESTS_SH+=	if_tun_test
 ATF_TESTS_SH+=	if_vlan
 ATF_TESTS_SH+=	if_wg
 
+TESTS_SUBDIRS+=	bpf
 TESTS_SUBDIRS+=	if_ovpn
 TESTS_SUBDIRS+=	routing
 
diff --git a/tests/sys/net/bpf/Makefile b/tests/sys/net/bpf/Makefile
new file mode 100644
index 000000000000..9c8a25b15d16
--- /dev/null
+++ b/tests/sys/net/bpf/Makefile
@@ -0,0 +1,15 @@
+.include <src.opts.mk>
+
+PACKAGE=	tests
+
+TESTSDIR=	${TESTSBASE}/sys/net/bpf
+BINDIR=		${TESTSDIR}
+
+LIBADD+=	nv
+
+PROGS=	bpf_multi_read
+LIBADD.bpf_multi_read+=	pcap
+
+ATF_TESTS_SH=	bpf
+
+.include <bsd.test.mk>
diff --git a/tests/sys/net/bpf/bpf.sh b/tests/sys/net/bpf/bpf.sh
new file mode 100644
index 000000000000..2830c4862de9
--- /dev/null
+++ b/tests/sys/net/bpf/bpf.sh
@@ -0,0 +1,67 @@
+##
+# SPDX-License-Identifier: BSD-2-Clause
+#
+# Copyright (c) 2025 Rubicon Communications, LLC ("Netgate")
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+
+. $(atf_get_srcdir)/../../common/vnet.subr
+
+atf_test_case "multi_read" "cleanup"
+multi_read_head()
+{
+	atf_set descr 'Test multiple readers on /dev/bpf'
+	atf_set require.user root
+}
+
+multi_read_body()
+{
+	vnet_init
+
+	epair=$(vnet_mkepair)
+	ifconfig ${epair}a inet 192.0.2.1/24 up
+
+	vnet_mkjail alcatraz ${epair}b
+	jexec alcatraz ifconfig ${epair}b inet 192.0.2.2/24 up
+
+	atf_check -s exit:0 -o ignore \
+	    ping -c 1 192.0.2.2
+
+	# Start a multi-thread (or multi-process) read on bpf
+	$(atf_get_srcdir)/bpf_multi_read ${epair}a &
+
+	# Generate traffic
+	ping -f 192.0.2.2 >/dev/null 2>&1 &
+
+	# Now let this run for 10 seconds
+	sleep 10
+}
+
+multi_read_cleanup()
+{
+	vnet_cleanup
+}
+
+atf_init_test_cases()
+{
+	atf_add_test_case "multi_read"
+}
diff --git a/tests/sys/net/bpf/bpf_multi_read.c b/tests/sys/net/bpf/bpf_multi_read.c
new file mode 100644
index 000000000000..3a8edd76d623
--- /dev/null
+++ b/tests/sys/net/bpf/bpf_multi_read.c
@@ -0,0 +1,76 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2025 Rubicon Communications, LLC (Netgate)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <pcap.h>
+#include <unistd.h>
+
+static void
+callback(u_char *arg __unused, const struct pcap_pkthdr *hdr __unused,
+    const unsigned char *bytes __unused)
+{
+}
+
+int
+main(int argc, const char **argv)
+{
+	pcap_t *pcap;
+	const char *interface;
+	char errbuf[PCAP_ERRBUF_SIZE] = { 0 };
+	int ret;
+
+	if (argc != 2)
+		err(1, "Usage: %s <interface>\n", argv[0]);
+
+	interface = argv[1];
+
+	pcap = pcap_create(interface, errbuf);
+	if (! pcap)
+		perror("Failed to pcap interface");
+
+	ret = pcap_set_snaplen(pcap, 86);
+	if (ret != 0)
+		perror("Failed to set snaplen");
+
+	ret = pcap_set_timeout(pcap, 100);
+	if (ret != 0)
+		perror("Failed to set timeout");
+
+	ret = pcap_activate(pcap);
+	if (ret != 0)
+		perror("Failed to activate");
+
+	/* So we have two readers on one /dev/bpf fd */
+	fork();
+
+	printf("Interface open\n");
+	pcap_loop(pcap, 0, callback, NULL);
+
+	return (0);
+}