git: 3acf7e0da487 - main - if_ovpn: avoid LOR between ovpn and UDP locks

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Thu, 12 Sep 2024 08:37:10 UTC
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=3acf7e0da487c08de39d04ac069ae73fb8a488c0

commit 3acf7e0da487c08de39d04ac069ae73fb8a488c0
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2024-09-09 12:14:03 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2024-09-12 07:44:19 +0000

    if_ovpn: avoid LOR between ovpn and UDP locks
    
    When we install the tunneling function we had the ovpn lock, and then took the
    UDP lock. During normal data flow we are called with the UDP lock held and then
    take the ovpn lock.
    This naturally produces a lock order reversal warning.
    
    Avoid this by releasing the ovpn lock before installing the tunnel function.
    This is safe, in that installing the tunnel function does not fail (other than
    with EBUSY, which would mean another thread has already installed the function).
    
    On cleanup the problem is more difficult, in that we cannot reasonably release
    the ovpn lock before we can remove the tunneling function callback.
    
    Solve this by delaying the removal of the tunnel callback until the ovpn_softc
    is cleaned up. It's still safe for ovpn_udp_input() to be caled when all peers
    are removed. That will only increment counters (which are still allocated),
    discover there are no peers and then pass the message on to userspace, if
    any userspace users of the socket remain.
    
    We ensure that the socket object remains valid by holding a reference, which
    we release when we remove the ovpn_softc. This removes the need for per-peer
    reference counting on the socket, so remove that.
    
    Reviewed by:    zlei
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    Differential Revision:: https://reviews.freebsd.org/D46616
---
 sys/net/if_ovpn.c | 46 ++++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/sys/net/if_ovpn.c b/sys/net/if_ovpn.c
index 349fa5b7cd13..e2c1dc7f7fc5 100644
--- a/sys/net/if_ovpn.c
+++ b/sys/net/if_ovpn.c
@@ -390,15 +390,11 @@ ovpn_rele_so(struct ovpn_softc *sc)
 
 	has_peers = ovpn_has_peers(sc);
 
-	/* Only remove the tunnel function if we're releasing the socket for
-	 * the last peer. */
-	if (! has_peers)
-		(void)udp_set_kernel_tunneling(sc->so, NULL, NULL, NULL);
-
-	sorele(sc->so);
-
-	if (! has_peers)
-		sc->so = NULL;
+	if (! has_peers) {
+		MPASS(sc->peercount == 0);
+	} else {
+		MPASS(sc->peercount > 0);
+	}
 }
 
 static void
@@ -628,26 +624,23 @@ ovpn_new_peer(struct ifnet *ifp, const nvlist_t *nvl)
 	if (sc->so != NULL && so != sc->so)
 		goto error_locked;
 
-	if (sc->so == NULL)
+	if (sc->so == NULL) {
 		sc->so = so;
+		/*
+		 * Maintain one extra ref so the socket doesn't go away until
+		 * we're destroying the ifp.
+		 */
+		soref(sc->so);
+	}
 
 	/* Insert the peer into the list. */
 	RB_INSERT(ovpn_kpeers, &sc->peers, peer);
 	sc->peercount++;
-	soref(sc->so);
-
-	ret = udp_set_kernel_tunneling(sc->so, ovpn_udp_input, NULL, sc);
-	if (ret == EBUSY) {
-		/* Fine, another peer already set the input function. */
-		ret = 0;
-	}
-	if (ret != 0) {
-		RB_REMOVE(ovpn_kpeers, &sc->peers, peer);
-		sc->peercount--;
-		goto error_locked;
-	}
 
 	OVPN_WUNLOCK(sc);
+	ret = udp_set_kernel_tunneling(sc->so, ovpn_udp_input, NULL, sc);
+	MPASS(ret == 0 || ret == EBUSY);
+	ret = 0;
 
 	goto done;
 
@@ -2493,12 +2486,21 @@ static void
 ovpn_clone_destroy_cb(struct epoch_context *ctx)
 {
 	struct ovpn_softc *sc;
+	int ret __diagused;
 
 	sc = __containerof(ctx, struct ovpn_softc, epoch_ctx);
 
 	MPASS(sc->peercount == 0);
 	MPASS(RB_EMPTY(&sc->peers));
 
+	if (sc->so != NULL) {
+		CURVNET_SET(sc->ifp->if_vnet);
+		ret = udp_set_kernel_tunneling(sc->so, NULL, NULL, NULL);
+		MPASS(ret == 0);
+		sorele(sc->so);
+		CURVNET_RESTORE();
+	}
+
 	COUNTER_ARRAY_FREE(sc->counters, OVPN_COUNTER_SIZE);
 
 	if_free(sc->ifp);