[patch] RFC: allow divert from layer 2 ipfw (e.g. bridge)
Julian Elischer
julian at elischer.org
Wed Jul 26 18:35:16 UTC 2006
This code is running on quite a few systems but in a very limited
environment that may not test all possibilities..
Does anyone have comments or suggestions as to changes that I might make
for checkin into generic FreeBSD? It was originally written for 4.x but
with 6.x in mind.
It is now running on 6.1 and seems to be ok so far.
Certainly I am interested in hearing from Robert and Luigi and I am
particularly interested in
what people think on how this will handle locking/SMP difficulies.
-------------- next part --------------
Only in ./sys/i386/compile: MESSAGING_GATEWAY.i386
Only in ./sys/i386/conf: MESSAGING_GATEWAY.i386
diff -upr ../src/sys/net/bridge.c ./sys/net/bridge.c
--- ../src/sys/net/bridge.c Thu May 25 13:03:05 2006
+++ ./sys/net/bridge.c Fri Jun 9 11:19:58 2006
@@ -88,6 +88,7 @@
* - loop detection is still not very robust.
*/
+#include "opt_ipdivert.h"
#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
@@ -109,12 +110,15 @@
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
#include <netinet/ip.h>
#include <netinet/ip_var.h>
#include <net/route.h>
#include <netinet/ip_fw.h>
+#include <netinet/ip_divert.h>
#include <netinet/ip_dummynet.h>
+#include <netinet/ip_var.h>
#include <net/bridge.h>
/*--------------------*/
@@ -1107,7 +1111,30 @@ bdg_forward(struct mbuf *m0, struct ifne
if (i == 0) /* a PASS rule. */
goto forward;
- if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) {
+
+ /* To get here it is either a dummynet thing or a divert/tee */
+ if ((i & IP_FW_DUMMYNET) == 0) {
+#ifdef IPDIVERT
+ struct mbuf *clone = NULL;
+
+ /* Deliver packet to divert input routine */
+ /* Clone packet if we're doing a 'tee' */
+ if ((i & IP_FW_TEE) != 0) {
+ clone = m_dup(m0, M_DONTWAIT);
+ if (clone) {
+ if (clone->m_pkthdr.rcvif) {
+ ip_divert_enqueue_ptr(clone);
+ } else {
+ ip_divert_ptr(clone, 0);
+ }
+ }
+ goto forward;
+ } else {
+ ip_divert_enqueue_ptr(m0);
+ return (NULL);
+ }
+#endif
+ } else if (DUMMYNET_LOADED) {
/*
* Pass the pkt to dummynet, which consumes it.
* If shared, make a copy and keep the original.
Only in ./sys/net: bridge.c~
diff -upr ../src/sys/net/if_ethersubr.c ./sys/net/if_ethersubr.c
--- ../src/sys/net/if_ethersubr.c Thu May 25 13:03:19 2006
+++ ./sys/net/if_ethersubr.c Fri Jun 9 11:31:01 2006
@@ -34,6 +34,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipx.h"
+#include "opt_ipdivert.h"
#include "opt_bdg.h"
#include "opt_mac.h"
#include "opt_netgraph.h"
@@ -67,8 +68,10 @@
#if defined(INET) || defined(INET6)
#include <netinet/in.h>
#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
#include <netinet/if_ether.h>
#include <netinet/ip_fw.h>
+#include <netinet/ip_divert.h>
#include <netinet/ip_dummynet.h>
#endif
#ifdef INET6
@@ -377,6 +380,19 @@ ether_output_frame_pre_netgraph(struct i
return (0);
}
+ if (BDG_ACTIVE(ifp)) {
+ /*
+ * Beware, the bridge code notices the null rcvif and
+ * uses that identify that it's being called from
+ * ether_output as opposd to ether_input. Yech.
+ */
+ m->m_pkthdr.rcvif = NULL;
+ m = bdg_forward_ptr(m, ifp);
+ if (m != NULL)
+ m_freem(m);
+ return (0);
+ }
+
return ether_output_frame(ifp, m);
}
@@ -396,18 +412,33 @@ ether_output_frame(struct ifnet *ifp, st
#endif
int error;
- if (rule == NULL && BDG_ACTIVE(ifp)) {
- /*
- * Beware, the bridge code notices the null rcvif and
- * uses that identify that it's being called from
- * ether_output as opposd to ether_input. Yech.
- */
- m->m_pkthdr.rcvif = NULL;
- m = bdg_forward_ptr(m, ifp);
- if (m != NULL)
- m_freem(m);
- return (0);
+#ifdef IPDIVERT
+ /*
+ * It's either a dummynet thing or a divert (but not both).
+ */
+ if ((IP_FW_DUMMYNET) == 0) {
+ struct mbuf *clone = NULL;
+
+ /* Deliver packet to divert input routine */
+ /* Clone packet if we're doing a 'tee' */
+ if ((IP_FW_TEE) != 0) {
+ clone = m_dup(m, M_DONTWAIT);
+ if (clone) {
+ if (clone->m_pkthdr.rcvif) {
+ ip_divert_enqueue_ptr(clone);
+ } else {
+ ip_divert_ptr(clone, 0);
+ }
+ }
+ return (1);
+ } else {
+ ip_divert_enqueue_ptr(m);
+ m = NULL;
+ return (0);
+ }
}
+#endif
+
#if defined(INET) || defined(INET6)
if (IPFW_LOADED && ether_ipfw != 0) {
if (ether_ipfw_chk(&m, ifp, &rule, 0) == 0) {
@@ -499,6 +530,33 @@ ether_ipfw_chk(struct mbuf **m0, struct
if (i == IP_FW_PASS) /* a PASS rule. */
return 1;
+#ifdef IPDIVERT
+ /*
+ * It's either a dummynet thing or a divert (but not both).
+ */
+ if ((i & IP_FW_DUMMYNET) == 0) {
+ struct mbuf *clone = NULL;
+
+ /* Deliver packet to divert input routine */
+ /* Clone packet if we're doing a 'tee' */
+ if ((i & IP_FW_TEE) != 0) {
+ clone = m_dup(*m0, M_DONTWAIT);
+ if (clone) {
+ if (clone->m_pkthdr.rcvif) {
+ ip_divert_enqueue_ptr(clone);
+ } else {
+ ip_divert_ptr(clone, 0);
+ }
+ }
+ return (1);
+ } else {
+ ip_divert_ptr(*m0, (*m0)->m_pkthdr.rcvif?1:0);
+ *m0 = NULL;
+ return (0);
+ }
+ }
+#endif
+
if (DUMMYNET_LOADED && (i == IP_FW_DUMMYNET)) {
/*
* Pass the pkt to dummynet, which consumes it.
@@ -656,6 +714,11 @@ ether_demux(struct ifnet *ifp, struct mb
#if defined(INET) || defined(INET6)
struct ip_fw *rule = ip_dn_claim_rule(m);
#endif
+ /* Discard packet if interface is not up */
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ return;
+ }
KASSERT(ifp != NULL, ("ether_demux: NULL interface pointer"));
@@ -667,6 +730,17 @@ ether_demux(struct ifnet *ifp, struct mb
goto post_stats;
#endif
+
+#ifdef DEV_CARP
+pre_stats:
+#endif
+ if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
+ if (bcmp(etherbroadcastaddr, eh->ether_dhost,
+ sizeof(etherbroadcastaddr)) == 0)
+ m->m_flags |= M_BCAST;
+ else
+ m->m_flags |= M_MCAST;
+ } else {
if (!(BDG_ACTIVE(ifp)) && !(ifp->if_bridge) &&
!((ether_type == ETHERTYPE_VLAN || m->m_flags & M_VLANTAG) &&
ifp->if_nvlans > 0)) {
@@ -711,22 +785,7 @@ ether_demux(struct ifnet *ifp, struct mb
}
}
}
-
-#ifdef DEV_CARP
-pre_stats:
-#endif
- /* Discard packet if interface is not up */
- if ((ifp->if_flags & IFF_UP) == 0) {
- m_freem(m);
- return;
- }
- if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
- if (bcmp(etherbroadcastaddr, eh->ether_dhost,
- sizeof(etherbroadcastaddr)) == 0)
- m->m_flags |= M_BCAST;
- else
- m->m_flags |= M_MCAST;
- }
+ }
if (m->m_flags & (M_BCAST|M_MCAST))
ifp->if_imcasts++;
Only in ./sys/net: if_ethersubr.c~
diff -upr ../src/sys/net/netisr.h ./sys/net/netisr.h
--- ../src/sys/net/netisr.h Thu Jan 6 17:45:35 2005
+++ ./sys/net/netisr.h Thu Jun 8 13:51:56 2006
@@ -50,6 +50,7 @@
*/
#define NETISR_POLL 0 /* polling callback, must be first */
#define NETISR_IP 2 /* same as AF_INET */
+#define NETISR_DIVERT 3 /* For diverting level2 packets. */
#define NETISR_ROUTE 14 /* routing socket */
#define NETISR_AARP 15 /* Appletalk ARP */
#define NETISR_ATALK2 16 /* Appletalk phase 2 */
diff -upr ../src/sys/netinet/ip_divert.c ./sys/netinet/ip_divert.c
--- ../src/sys/netinet/ip_divert.c Wed Nov 16 02:31:22 2005
+++ ./sys/netinet/ip_divert.c Fri Jun 9 12:04:13 2006
@@ -61,7 +61,9 @@
#include <vm/uma.h>
#include <net/if.h>
+#include <net/if_var.h>
#include <net/route.h>
+#include <net/netisr.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
@@ -110,6 +112,9 @@
static struct inpcbhead divcb;
static struct inpcbinfo divcbinfo;
+static void divertintr(struct mbuf *m);
+static struct ifqueue divertintrq;
+static int div_intrqmax = IFQ_MAXLEN; /* was 50 */
static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */
static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */
@@ -132,6 +137,9 @@ div_init(void)
divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(divcbinfo.ipi_zone, maxsockets);
+ divertintrq.ifq_maxlen = div_intrqmax;
+ mtx_init(&divertintrq.ifq_mtx, "div_inq", NULL, MTX_DEF);
+ netisr_register(NETISR_DIVERT, divertintr, &divertintrq, 0);
}
/*
@@ -262,6 +268,53 @@ divert_packet(struct mbuf *m, int incomi
}
}
+void divert_enqueue(struct mbuf *m);
+/*
+ * enqueue a packet for processing after netisr has been activated
+ */
+void
+divert_enqueue(struct mbuf *m)
+{
+ struct socket *sa;
+ struct inpcb *inp;
+ u_int16_t nport;
+ struct m_tag *mtag;
+
+ mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL);
+ if (mtag == NULL) {
+ printf("%s: no divert tag\n", __func__);
+ m_freem(m);
+ return;
+ }
+
+ /* XXX Just checking if we even have a listenner.
+ * probably not safe to scan this list like this.
+ * as it could be in the middle of being fiddled.
+ */
+ sa = NULL;
+ nport = htons((u_int16_t)divert_info(mtag));
+ LIST_FOREACH(inp, &divcb, inp_list) {
+ if (inp->inp_lport == nport) {
+ sa = inp->inp_socket;
+ break;
+ }
+ }
+ if (sa == NULL) {
+ m_freem(m);
+ } else {
+ netisr_queue(NETISR_DIVERT, m);
+ }
+}
+
+static void
+divertintr(struct mbuf *m)
+{
+ if (m == 0 || (m->m_flags & M_PKTHDR) == 0)
+ panic("divertintr");
+
+ divert_packet(m, 1);
+}
+
/*
* Deliver packet back into the IP processing machinery.
*
@@ -674,6 +740,7 @@ div_modevent(module_t mod, int type, voi
*/
err = pf_proto_register(PF_INET, &div_protosw);
ip_divert_ptr = divert_packet;
+ ip_divert_enqueue_ptr = divert_enqueue;
break;
case MOD_QUIESCE:
/*
@@ -703,7 +770,9 @@ div_modevent(module_t mod, int type, voi
break;
}
ip_divert_ptr = NULL;
+ ip_divert_enqueue_ptr = NULL;
err = pf_proto_unregister(PF_INET, IPPROTO_DIVERT, SOCK_RAW);
+ netisr_unregister(NETISR_DIVERT);
INP_INFO_WUNLOCK(&divcbinfo);
INP_INFO_LOCK_DESTROY(&divcbinfo);
uma_zdestroy(divcbinfo.ipi_zone);
Only in ./sys/netinet: ip_divert.c~
diff -upr ../src/sys/netinet/ip_divert.h ./sys/netinet/ip_divert.h
--- ../src/sys/netinet/ip_divert.h Tue Oct 19 14:14:57 2004
+++ ./sys/netinet/ip_divert.h Fri Jun 9 11:21:06 2006
@@ -80,8 +80,10 @@ divert_find_info(struct mbuf *m)
return mtag ? divert_info(mtag) : 0;
}
+typedef void ip_divert_packet_enqueue_t(struct mbuf *m);
typedef void ip_divert_packet_t(struct mbuf *m, int incoming);
extern ip_divert_packet_t *ip_divert_ptr;
+extern ip_divert_packet_enqueue_t *ip_divert_enqueue_ptr;
extern void div_init(void);
extern void div_input(struct mbuf *, int);
Only in ./sys/netinet: ip_divert.h~
diff -upr ../src/sys/netinet/ip_fw2.c ./sys/netinet/ip_fw2.c
--- ../src/sys/netinet/ip_fw2.c Fri Jun 9 12:08:46 2006
+++ ./sys/netinet/ip_fw2.c Thu Jun 8 13:51:56 2006
@@ -3046,8 +3046,10 @@ check_body:
case O_TEE: {
struct divert_tag *dt;
+#if 0
if (args->eh) /* not on layer 2 */
break;
+#endif
mtag = m_tag_get(PACKET_TAG_DIVERT,
sizeof(struct divert_tag),
M_NOWAIT);
diff -upr ../src/sys/netinet/ip_fw_pfil.c ./sys/netinet/ip_fw_pfil.c
--- ../src/sys/netinet/ip_fw_pfil.c Sat Feb 11 00:19:37 2006
+++ ./sys/netinet/ip_fw_pfil.c Fri Jun 9 12:06:31 2006
@@ -71,6 +71,7 @@ ip_dn_ruledel_t *ip_dn_ruledel_ptr = NUL
/* Divert hooks. */
ip_divert_packet_t *ip_divert_ptr = NULL;
+ip_divert_packet_enqueue_t *ip_divert_enqueue_ptr = NULL;
/* ng_ipfw hooks. */
ng_ipfw_input_t *ng_ipfw_input_p = NULL;
Only in ./sys/netinet: ip_fw_pfil.c~
--- sys/net/bridge.c.orig Tue Jun 13 13:29:27 2006
+++ sys/net/bridge.c Tue Jun 13 13:31:54 2006
@@ -854,8 +854,16 @@ bridge_in(struct ifnet *ifp, struct mbuf
else
dst = BDG_DROP;
} else {
- if (dst == ifp)
- dst = BDG_DROP;
+ /*
+ * This is so that we can use a "half bridge" and not have
+ * packets discarded just because the destination is out the same
+ * interface. We only use this when we are firewalling it so the
+ * packet will get clobbered by the firewall anyhow before we send it.
+ */
+ if (ifp2sc[ifp->if_index].cluster->ports > 1) {
+ if (dst == ifp)
+ dst = BDG_DROP;
+ }
}
DPRINTF(("%s: %6D ->%6D ty 0x%04x dst %s\n", __func__,
eh->ether_shost, ".",
More information about the freebsd-net
mailing list