git: 4f02a7d739b3 - main - inpcb: Remove bogus SO_REUSEPORT(_LB) checks in in_pcbbind()

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Thu, 12 Dec 2024 14:27:34 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=4f02a7d739b354eef38e19b25866f64842d69414

commit 4f02a7d739b354eef38e19b25866f64842d69414
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2024-12-12 14:06:06 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-12-12 14:25:15 +0000

    inpcb: Remove bogus SO_REUSEPORT(_LB) checks in in_pcbbind()
    
    This check for SO_REUSEPORT was added way back in commit 52b65dbe85faf.
    Per the commit log, this commit restricted this port-stealing check to
    unicast addresses, and then only if the existing socket does not have
    SO_REUSEPORT set.  In other words, if there exists a socket bound to
    INADDR_ANY, and we bind a socket to INADDR_ANY with the same port, then
    the two sockets need not be owned by the same user if the existing
    socket has SO_REUSEPORT set.
    
    This is a surprising semantic; bugzilla PR 7713 gives some additional
    context.  That PR makes a case for the behaviour described above when
    binding to a multicast address.  But, the SO_REUSEPORT check is only
    applied when binding to a non-multicast address, so it doesn't really
    make sense.  In the PR the committer notes that "unicast applications
    don't set SO_REUSEPORT", which makes some sense, but also refers to
    "multicast applications that bind to INADDR_ANY", which sounds a bit
    suspicious.
    
    OpenBSD performs the multicast check, but not the SO_REUSEPORT check.
    DragonflyBSD removed the SO_REUSEPORT (and INADDR_ANY) checks back in
    2014 (commit 0323d5fde12a4).  NetBSD explicitly copied our logic and
    still has it.
    
    The plot thickens: 20 years later, SO_REUSEPORT_LB was ported from
    DragonflyBSD: this option provides similar semantics to SO_REUSEPORT,
    but for unicast addresses it causes incoming connections/datagrams to be
    distributed among all sockets in the group.  This commit (1a43cff92a20d)
    inverted the check for SO_REUSEPORT while adding one for
    SO_REUSEPORT_LB; this appears to have been inadvertent.  However:
    - apparently no one has noticed that the semantics were changed;
    - sockets belonging to different users can now be bound to the same port
      so long as they belong to a single lbgroup bound to INADDR_ANY, which
      is not correct.
    
    Simply remove the SO_REUSEPORT(_LB) checks, as their original
    justification was dubious and their current implementation is wrong; add
    some tests.
    
    Reviewed by:    glebius
    MFC after:      1 month
    Sponsored by:   Klara, Inc.
    Sponsored by:   Stormshield
    Differential Revision:  https://reviews.freebsd.org/D47832
---
 sys/netinet/in_pcb.c              |   4 +-
 sys/netinet6/in6_pcb.c            |   4 +-
 tests/sys/netinet/socket_afinet.c | 240 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 241 insertions(+), 7 deletions(-)

diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index cfe3fd65e032..432cae68062a 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -925,9 +925,7 @@ in_pcbbind_avail(struct inpcb *inp, const struct in_addr laddr,
 			    (inp->inp_socket->so_type != SOCK_STREAM ||
 			     in_nullhost(t->inp_faddr)) &&
 			    (!in_nullhost(laddr) ||
-			     !in_nullhost(t->inp_laddr) ||
-			     (t->inp_socket->so_options & SO_REUSEPORT) ||
-			     (t->inp_socket->so_options & SO_REUSEPORT_LB) == 0) &&
+			     !in_nullhost(t->inp_laddr)) &&
 			    (inp->inp_cred->cr_uid != t->inp_cred->cr_uid))
 				return (EADDRINUSE);
 		}
diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
index ada5058e56b3..51f2a29918fe 100644
--- a/sys/netinet6/in6_pcb.c
+++ b/sys/netinet6/in6_pcb.c
@@ -245,9 +245,7 @@ in6_pcbbind_avail(struct inpcb *inp, const struct sockaddr_in6 *sin6,
 			    (inp->inp_socket->so_type != SOCK_STREAM ||
 			     IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) &&
 			    (!IN6_IS_ADDR_UNSPECIFIED(laddr) ||
-			     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) ||
-			     (t->inp_socket->so_options & SO_REUSEPORT) ||
-			     (t->inp_socket->so_options & SO_REUSEPORT_LB) == 0) &&
+			     !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr)) &&
 			    (inp->inp_cred->cr_uid != t->inp_cred->cr_uid))
 				return (EADDRINUSE);
 
diff --git a/tests/sys/netinet/socket_afinet.c b/tests/sys/netinet/socket_afinet.c
index 7076f084719a..ba8c03af46a6 100644
--- a/tests/sys/netinet/socket_afinet.c
+++ b/tests/sys/netinet/socket_afinet.c
@@ -2,6 +2,7 @@
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2019 Bjoern A. Zeeb
+ * Copyright (c) 2024 Stormshield
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -25,11 +26,17 @@
  * SUCH DAMAGE.
  */
 
-#include <sys/cdefs.h>
+#include <sys/param.h>
 #include <sys/socket.h>
+#include <sys/wait.h>
+
 #include <netinet/in.h>
+
 #include <errno.h>
 #include <poll.h>
+#include <pwd.h>
+#include <stdio.h>
+#include <unistd.h>
 
 #include <atf-c.h>
 
@@ -281,6 +288,235 @@ ATF_TC_BODY(socket_afinet_stream_reconnect, tc)
 	ATF_CHECK_EQ(0, rc);
 }
 
+/*
+ * Make sure that unprivileged users can't set the IP_BINDANY or IPV6_BINDANY
+ * socket options.
+ */
+ATF_TC(socket_afinet_bindany);
+ATF_TC_HEAD(socket_afinet_bindany, tc)
+{
+	atf_tc_set_md_var(tc, "require.user", "unprivileged");
+}
+ATF_TC_BODY(socket_afinet_bindany, tc)
+{
+	int s;
+
+	s = socket(AF_INET, SOCK_STREAM, 0);
+	ATF_REQUIRE(s >= 0);
+	ATF_REQUIRE_ERRNO(EPERM,
+	    setsockopt(s, IPPROTO_IP, IP_BINDANY, &(int){1}, sizeof(int)) ==
+	    -1);
+	ATF_REQUIRE(close(s) == 0);
+
+	s = socket(AF_INET, SOCK_DGRAM, 0);
+	ATF_REQUIRE(s >= 0);
+	ATF_REQUIRE_ERRNO(EPERM,
+	    setsockopt(s, IPPROTO_IP, IP_BINDANY, &(int){1}, sizeof(int)) ==
+	    -1);
+	ATF_REQUIRE(close(s) == 0);
+
+	s = socket(AF_INET6, SOCK_STREAM, 0);
+	ATF_REQUIRE(s >= 0);
+	ATF_REQUIRE_ERRNO(EPERM,
+	    setsockopt(s, IPPROTO_IPV6, IPV6_BINDANY, &(int){1}, sizeof(int)) ==
+	    -1);
+	ATF_REQUIRE(close(s) == 0);
+
+	s = socket(AF_INET6, SOCK_DGRAM, 0);
+	ATF_REQUIRE(s >= 0);
+	ATF_REQUIRE_ERRNO(EPERM,
+	    setsockopt(s, IPPROTO_IPV6, IPV6_BINDANY, &(int){1}, sizeof(int)) ==
+	    -1);
+	ATF_REQUIRE(close(s) == 0);
+}
+
+/*
+ * Bind a socket to the specified address, optionally dropping privileges and
+ * setting one of the SO_REUSE* options first.
+ *
+ * Returns true if the bind succeeded, and false if it failed with EADDRINUSE.
+ */
+static bool
+child_bind(const atf_tc_t *tc, int type, struct sockaddr *sa, int opt, bool unpriv)
+{
+	const char *user;
+	pid_t child;
+
+	if (unpriv) {
+		if (!atf_tc_has_config_var(tc, "unprivileged_user"))
+			atf_tc_skip("unprivileged_user not set");
+		user = atf_tc_get_config_var(tc, "unprivileged_user");
+	} else {
+		user = NULL;
+	}
+
+	child = fork();
+	ATF_REQUIRE(child != -1);
+	if (child == 0) {
+		int s;
+
+		if (user != NULL) {
+			struct passwd *passwd;
+
+			passwd = getpwnam(user);
+			if (seteuid(passwd->pw_uid) != 0)
+				_exit(1);
+		}
+
+		s = socket(sa->sa_family, type, 0);
+		if (s < 0)
+			_exit(2);
+		if (bind(s, sa, sa->sa_len) == 0)
+			_exit(3);
+		if (errno != EADDRINUSE)
+			_exit(4);
+		if (opt != 0) {
+			if (setsockopt(s, SOL_SOCKET, opt, &(int){1},
+			    sizeof(int)) != 0)
+				_exit(5);
+		}
+		if (bind(s, sa, sa->sa_len) == 0)
+			_exit(6);
+		if (errno != EADDRINUSE)
+			_exit(7);
+		_exit(0);
+	} else {
+		int status;
+
+		ATF_REQUIRE_EQ(waitpid(child, &status, 0), child);
+		ATF_REQUIRE(WIFEXITED(status));
+		status = WEXITSTATUS(status);
+		ATF_REQUIRE_MSG(status == 0 || status == 6,
+		    "child exited with %d", status);
+		return (status == 6);
+	}
+}
+
+static bool
+child_bind_priv(const atf_tc_t *tc, int type, struct sockaddr *sa, int opt)
+{
+	return (child_bind(tc, type, sa, opt, false));
+}
+
+static bool
+child_bind_unpriv(const atf_tc_t *tc, int type, struct sockaddr *sa, int opt)
+{
+	return (child_bind(tc, type, sa, opt, true));
+}
+
+static int
+bind_socket(int domain, int type, int opt, bool unspec, struct sockaddr *sa)
+{
+	socklen_t slen;
+	int s;
+
+	s = socket(domain, type, 0);
+	ATF_REQUIRE(s >= 0);
+
+	if (domain == AF_INET) {
+		struct sockaddr_in sin;
+
+		bzero(&sin, sizeof(sin));
+		sin.sin_family = AF_INET;
+		sin.sin_len = sizeof(sin);
+		sin.sin_addr.s_addr = htonl(unspec ?
+		    INADDR_ANY : INADDR_LOOPBACK);
+		sin.sin_port = htons(0);
+		ATF_REQUIRE(bind(s, (struct sockaddr *)&sin, sizeof(sin)) == 0);
+
+		slen = sizeof(sin);
+	} else /* if (domain == AF_INET6) */ {
+		struct sockaddr_in6 sin6;
+
+		bzero(&sin6, sizeof(sin6));
+		sin6.sin6_family = AF_INET6;
+		sin6.sin6_len = sizeof(sin6);
+		sin6.sin6_addr = unspec ? in6addr_any : in6addr_loopback;
+		sin6.sin6_port = htons(0);
+		ATF_REQUIRE(bind(s, (struct sockaddr *)&sin6, sizeof(sin6)) == 0);
+
+		slen = sizeof(sin6);
+	}
+
+	if (opt != 0) {
+		ATF_REQUIRE(setsockopt(s, SOL_SOCKET, opt, &(int){1},
+		    sizeof(int)) == 0);
+	}
+
+	ATF_REQUIRE(getsockname(s, sa, &slen) == 0);
+
+	return (s);
+}
+
+static void
+multibind_test(const atf_tc_t *tc, int domain, int type)
+{
+	struct sockaddr_storage ss;
+	int opts[4] = { 0, SO_REUSEADDR, SO_REUSEPORT, SO_REUSEPORT_LB };
+	int s;
+	bool flags[2] = { false, true };
+	bool res;
+
+	for (size_t flagi = 0; flagi < nitems(flags); flagi++) {
+		for (size_t opti = 0; opti < nitems(opts); opti++) {
+			s = bind_socket(domain, type, opts[opti], flags[flagi],
+			    (struct sockaddr *)&ss);
+			for (size_t optj = 0; optj < nitems(opts); optj++) {
+				int opt;
+
+				opt = opts[optj];
+				res = child_bind_priv(tc, type,
+				    (struct sockaddr *)&ss, opt);
+				/*
+				 * Multi-binding is only allowed when both
+				 * sockets have SO_REUSEPORT or SO_REUSEPORT_LB
+				 * set.
+				 */
+				if (opts[opti] != 0 &&
+				    opts[opti] != SO_REUSEADDR && opti == optj)
+					ATF_REQUIRE(res);
+				else
+					ATF_REQUIRE(!res);
+
+				res = child_bind_unpriv(tc, type,
+				    (struct sockaddr *)&ss, opt);
+				/*
+				 * Multi-binding is only allowed when both
+				 * sockets have the same owner.
+				 * 
+				 * XXX-MJ we for some reason permit this when
+				 * binding to the unspecified address, but I
+				 * don't think that's right
+				 */
+				if (flags[flagi] && opts[opti] != 0 &&
+				    opts[opti] != SO_REUSEADDR && opti == optj)
+					ATF_REQUIRE(res);
+				else
+					ATF_REQUIRE(!res);
+			}
+			ATF_REQUIRE(close(s) == 0);
+		}
+	}
+}
+
+/*
+ * Try to bind two sockets to the same address/port tuple.  Under some
+ * conditions this is permitted.
+ */
+ATF_TC(socket_afinet_multibind);
+ATF_TC_HEAD(socket_afinet_multibind, tc)
+{
+	atf_tc_set_md_var(tc, "require.user", "root");
+	atf_tc_set_md_var(tc, "require.config", "unprivileged_user");
+}
+ATF_TC_BODY(socket_afinet_multibind, tc)
+{
+	multibind_test(tc, AF_INET, SOCK_STREAM);
+	multibind_test(tc, AF_INET, SOCK_DGRAM);
+	multibind_test(tc, AF_INET6, SOCK_STREAM);
+	multibind_test(tc, AF_INET6, SOCK_DGRAM);
+}
+
 ATF_TP_ADD_TCS(tp)
 {
 	ATF_TP_ADD_TC(tp, socket_afinet);
@@ -289,6 +525,8 @@ ATF_TP_ADD_TCS(tp)
 	ATF_TP_ADD_TC(tp, socket_afinet_poll_no_rdhup);
 	ATF_TP_ADD_TC(tp, socket_afinet_poll_rdhup);
 	ATF_TP_ADD_TC(tp, socket_afinet_stream_reconnect);
+	ATF_TP_ADD_TC(tp, socket_afinet_bindany);
+	ATF_TP_ADD_TC(tp, socket_afinet_multibind);
 
 	return atf_no_error();
 }