git: d862b165a6d3 - main - bridge: Add support for emulated netmap mode

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Mon, 10 Apr 2023 16:14:32 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=d862b165a6d3436d01c8ae63ab7879d17a5d143a

commit d862b165a6d3436d01c8ae63ab7879d17a5d143a
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2023-04-10 15:18:25 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2023-04-10 16:14:11 +0000

    bridge: Add support for emulated netmap mode
    
    if_bridge receives packets via a special interface, if_bridge_input,
    rather than by if_input.  Thus, netmap's usual hooking of ifnet routines
    does not work as expected.  Instead, modify bridge_input() to pass
    packets directly to netmap when it is enabled.  This applies to both
    locally delivered packets and forwarded packets.
    
    When a netmap application transmits a packet by writing it to the host
    TX ring, the mbuf chain is passed to if_input, which ordinarily points
    to ether_input().  However, when transmitting via if_bridge,
    bridge_input() needs to see the packet again in order to decide whether
    to deliver or forward.  Thus, introduce a new protocol flag,
    M_BRIDGE_INJECT, which 1) causes the packet to be passed to
    bridge_input() again after Ethernet processing, and 2) avoids passing
    the packet back to netmap.  The source MAC address of the packet is used
    to determine the original "receiving" interface.
    
    Reviewed by:    vmaffione
    MFC after:      2 months
    Sponsored by:   Zenarmor
    Sponsored by:   OPNsense
    Sponsored by:   Klara, Inc.
    Differential Revision:  https://reviews.freebsd.org/D38066
---
 share/man/man4/bridge.4 |  24 +++++++++++
 sys/net/ethernet.h      |   1 +
 sys/net/if_bridge.c     | 108 ++++++++++++++++++++++++++++++++++++++++++++----
 sys/net/if_bridgevar.h  |   4 +-
 sys/net/if_ethersubr.c  |   7 +++-
 5 files changed, 134 insertions(+), 10 deletions(-)

diff --git a/share/man/man4/bridge.4 b/share/man/man4/bridge.4
index b4dabcbb79bc..2671bfaf73cc 100644
--- a/share/man/man4/bridge.4
+++ b/share/man/man4/bridge.4
@@ -421,6 +421,29 @@ interface and not to the bridge members.
 Enabling
 .Va net.link.bridge.pfil_local_phys
 will let you do the additional filtering on the physical interface.
+.Sh NETMAP
+.Xr netmap 4
+applications may open a bridge interface in emulated mode.
+The netmap application will receive all packets which arrive from member
+interfaces.
+In particular, packets which would otherwise be forwarded to another
+member interface will be received by the netmap application.
+.Pp
+When the
+.Xr netmap 4
+application transmits a packet to the host stack via the bridge interface,
+.Nm
+receive it and attempts to determine its
+.Ql source
+interface by looking up the source MAC address in the interface's learning
+tables.
+Packets for which no matching source interface is found are dropped and the
+input error counter is incremented.
+If a matching source interface is found,
+.Nm
+treats the packet as though it was received from the corresponding interface
+and handles it normally without passing the packet back to
+.Xr netmap 4 .
 .Sh EXAMPLES
 The following when placed in the file
 .Pa /etc/rc.conf
@@ -495,6 +518,7 @@ ifconfig bridge0 addm fxp0 addm gif0 up
 .Xr gif 4 ,
 .Xr ipf 4 ,
 .Xr ipfw 4 ,
+.Xr netmap 4 ,
 .Xr pf 4 ,
 .Xr ifconfig 8
 .Sh HISTORY
diff --git a/sys/net/ethernet.h b/sys/net/ethernet.h
index 98b02d71da50..e259ab53e77e 100644
--- a/sys/net/ethernet.h
+++ b/sys/net/ethernet.h
@@ -43,6 +43,7 @@
  * Ethernet-specific mbuf flags.
  */
 #define	M_HASFCS	M_PROTO5	/* FCS included at end of frame */
+#define	M_BRIDGE_INJECT	M_PROTO6	/* if_bridge-injected frame */
 
 /*
  * Ethernet CRC32 polynomials (big- and little-endian versions).
diff --git a/sys/net/if_bridge.c b/sys/net/if_bridge.c
index 78be987d681b..f71d7c0cdfd1 100644
--- a/sys/net/if_bridge.c
+++ b/sys/net/if_bridge.c
@@ -267,6 +267,7 @@ struct bridge_softc {
 	uint32_t		sc_brtexceeded;	/* # of cache drops */
 	struct ifnet		*sc_ifaddr;	/* member mac copied from */
 	struct ether_addr	sc_defaddr;	/* Default MAC address */
+	if_input_fn_t		sc_if_input;	/* Saved copy of if_input */
 	struct epoch_context	sc_epoch_ctx;
 };
 
@@ -298,6 +299,7 @@ static int	bridge_altq_transmit(if_t, struct mbuf *);
 #endif
 static void	bridge_qflush(struct ifnet *);
 static struct mbuf *bridge_input(struct ifnet *, struct mbuf *);
+static void	bridge_inject(struct ifnet *, struct mbuf *);
 static int	bridge_output(struct ifnet *, struct mbuf *, struct sockaddr *,
 		    struct rtentry *);
 static int	bridge_enqueue(struct bridge_softc *, struct ifnet *,
@@ -768,6 +770,15 @@ bridge_clone_create(struct if_clone *ifc, char *name, size_t len,
 #ifdef VIMAGE
 	ifp->if_reassign = bridge_reassign;
 #endif
+	sc->sc_if_input = ifp->if_input;	/* ether_input */
+	ifp->if_input = bridge_inject;
+
+	/*
+	 * Allow BRIDGE_INPUT() to pass in packets originating from the bridge
+	 * itself via bridge_inject().  This is required for netmap but
+	 * otherwise has no effect.
+	 */
+	ifp->if_bridge_input = bridge_input;
 
 	BRIDGE_LIST_LOCK();
 	LIST_INSERT_HEAD(&V_bridge_list, sc, sc_list);
@@ -2355,6 +2366,19 @@ bridge_forward(struct bridge_softc *sc, struct bridge_iflist *sbif,
 	    sbif->bif_stp.bp_state == BSTP_IFSTATE_LEARNING)
 		goto drop;
 
+#ifdef DEV_NETMAP
+	/*
+	 * Hand the packet to netmap only if it wasn't injected by netmap
+	 * itself.
+	 */
+	if ((m->m_flags & M_BRIDGE_INJECT) == 0 &&
+	    (if_getcapenable(ifp) & IFCAP_NETMAP) != 0) {
+		ifp->if_input(ifp, m);
+		return;
+	}
+	m->m_flags &= ~M_BRIDGE_INJECT;
+#endif
+
 	/*
 	 * At this point, the port either doesn't participate
 	 * in spanning tree or it is in the forwarding state.
@@ -2461,7 +2485,7 @@ drop:
 static struct mbuf *
 bridge_input(struct ifnet *ifp, struct mbuf *m)
 {
-	struct bridge_softc *sc = ifp->if_bridge;
+	struct bridge_softc *sc;
 	struct bridge_iflist *bif, *bif2;
 	struct ifnet *bifp;
 	struct ether_header *eh;
@@ -2471,11 +2495,31 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 
 	NET_EPOCH_ASSERT();
 
-	if ((sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
-		return (m);
+	eh = mtod(m, struct ether_header *);
+	vlan = VLANTAGOF(m);
 
+	sc = ifp->if_bridge;
+	if (sc == NULL) {
+		/*
+		 * This packet originated from the bridge itself, so it must
+		 * have been transmitted by netmap.  Derive the "source"
+		 * interface from the source address and drop the packet if the
+		 * source address isn't known.
+		 */
+		KASSERT((m->m_flags & M_BRIDGE_INJECT) != 0,
+		    ("%s: ifnet %p missing a bridge softc", __func__, ifp));
+		sc = if_getsoftc(ifp);
+		ifp = bridge_rtlookup(sc, eh->ether_shost, vlan);
+		if (ifp == NULL) {
+			if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
+			m_freem(m);
+			return (NULL);
+		}
+		m->m_pkthdr.rcvif = ifp;
+	}
 	bifp = sc->sc_ifp;
-	vlan = VLANTAGOF(m);
+	if ((bifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
+		return (m);
 
 	/*
 	 * Implement support for bridge monitoring. If this flag has been
@@ -2496,8 +2540,6 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 		return (m);
 	}
 
-	eh = mtod(m, struct ether_header *);
-
 	bridge_span(sc, m);
 
 	if (m->m_flags & (M_BCAST|M_MCAST)) {
@@ -2526,6 +2568,18 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 		/* Perform the bridge forwarding function with the copy. */
 		bridge_forward(sc, bif, mc);
 
+#ifdef DEV_NETMAP
+		/*
+		 * If netmap is enabled and has not already seen this packet,
+		 * then it will be consumed by bridge_forward().
+		 */
+		if ((if_getcapenable(bifp) & IFCAP_NETMAP) != 0 &&
+		    (m->m_flags & M_BRIDGE_INJECT) == 0) {
+			m_freem(m);
+			return (NULL);
+		}
+#endif
+
 		/*
 		 * Reinject the mbuf as arriving on the bridge so we have a
 		 * chance at claiming multicast packets. We can not loop back
@@ -2542,7 +2596,8 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 		}
 		if (mc2 != NULL) {
 			mc2->m_pkthdr.rcvif = bifp;
-			(*bifp->if_input)(bifp, mc2);
+			mc2->m_flags &= ~M_BRIDGE_INJECT;
+			sc->sc_if_input(bifp, mc2);
 		}
 
 		/* Return the original packet for local processing. */
@@ -2570,6 +2625,18 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 #define	PFIL_HOOKED_INET6	false
 #endif
 
+#ifdef DEV_NETMAP
+#define	GRAB_FOR_NETMAP(ifp, m) do {					\
+	if ((if_getcapenable(ifp) & IFCAP_NETMAP) != 0 &&		\
+	    ((m)->m_flags & M_BRIDGE_INJECT) == 0) {			\
+		(ifp)->if_input(ifp, m);				\
+		return (NULL);						\
+	}								\
+} while (0)
+#else
+#define	GRAB_FOR_NETMAP(ifp, m)
+#endif
+
 #define GRAB_OUR_PACKETS(iface)						\
 	if ((iface)->if_type == IFT_GIF)				\
 		continue;						\
@@ -2592,7 +2659,9 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 		/* It's passing over or to the bridge, locally. */	\
 		ETHER_BPF_MTAP(bifp, m);				\
 		if_inc_counter(bifp, IFCOUNTER_IPACKETS, 1);		\
-		if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); \
+		if_inc_counter(bifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);\
+		/* Hand the packet over to netmap if necessary. */	\
+		GRAB_FOR_NETMAP(bifp, m);				\
 		/* Filter on the physical interface. */			\
 		if (V_pfil_local_phys && (PFIL_HOOKED_IN(V_inet_pfil_head) || \
 		    PFIL_HOOKED_INET6)) {				\
@@ -2635,6 +2704,7 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 #undef CARP_CHECK_WE_ARE_DST
 #undef CARP_CHECK_WE_ARE_SRC
 #undef PFIL_HOOKED_INET6
+#undef GRAB_FOR_NETMAP
 #undef GRAB_OUR_PACKETS
 
 	/* Perform the bridge forwarding function. */
@@ -2643,6 +2713,28 @@ bridge_input(struct ifnet *ifp, struct mbuf *m)
 	return (NULL);
 }
 
+/*
+ * Inject a packet back into the host ethernet stack.  This will generally only
+ * be used by netmap when an application writes to the host TX ring.  The
+ * M_BRIDGE_INJECT flag ensures that the packet is re-routed to the bridge
+ * interface after ethernet processing.
+ */
+static void
+bridge_inject(struct ifnet *ifp, struct mbuf *m)
+{
+	struct bridge_softc *sc;
+
+	KASSERT((if_getcapenable(ifp) & IFCAP_NETMAP) != 0,
+	    ("%s: iface %s is not running in netmap mode",
+	    __func__, if_name(ifp)));
+	KASSERT((m->m_flags & M_BRIDGE_INJECT) == 0,
+	    ("%s: mbuf %p has M_BRIDGE_INJECT set", __func__, m));
+
+	m->m_flags |= M_BRIDGE_INJECT;
+	sc = if_getsoftc(ifp);
+	sc->sc_if_input(ifp, m);
+}
+
 /*
  * bridge_broadcast:
  *
diff --git a/sys/net/if_bridgevar.h b/sys/net/if_bridgevar.h
index 69b9aa33f133..5696e8a62ae0 100644
--- a/sys/net/if_bridgevar.h
+++ b/sys/net/if_bridgevar.h
@@ -309,8 +309,10 @@ struct ifbpstpconf {
 	KASSERT((_ifp)->if_bridge_input != NULL,	\
 	    ("%s: if_bridge not loaded!", __func__));	\
 	_m = (*(_ifp)->if_bridge_input)(_ifp, _m);	\
-	if (_m != NULL)					\
+	if (_m != NULL)	{				\
 		_ifp = _m->m_pkthdr.rcvif;		\
+		m->m_flags &= ~M_BRIDGE_INJECT;		\
+	}						\
 } while (0)
 
 #define BRIDGE_OUTPUT(_ifp, _m, _err)	do {    	\
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 34ff4ac22e7f..839bae8e9d43 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -667,10 +667,15 @@ ether_input_internal(struct ifnet *ifp, struct mbuf *m)
 
 	/*
 	 * Allow if_bridge(4) to claim this frame.
+	 *
 	 * The BRIDGE_INPUT() macro will update ifp if the bridge changed it
 	 * and the frame should be delivered locally.
+	 *
+	 * If M_BRIDGE_INJECT is set, the packet was received directly by the
+	 * bridge via netmap, so "ifp" is the bridge itself and the packet
+	 * should be re-examined.
 	 */
-	if (ifp->if_bridge != NULL) {
+	if (ifp->if_bridge != NULL || (m->m_flags & M_BRIDGE_INJECT) != 0) {
 		m->m_flags &= ~M_PROMISC;
 		BRIDGE_INPUT(ifp, m);
 		if (m == NULL) {