FIB separation

Alexander V. Chernikov melifaro at ipfw.ru
Sat Jul 16 16:21:57 UTC 2011


-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Hiroki Sato wrote:
> Vlad Galu <dudu at dudu.ro> wrote
>   in <A718ADB2-EC52-462C-A114-85053F1B2E55 at dudu.ro>:
> 
> du> Hello,
> du>
> du> A couple of years ago, Stef Walter proposed a patch[1] that enforced
> du> the scope of routing messages. The general consesus was that the best
> du> approach would be the OpenBSD way - transporting the FIB number in the
> du> message and letting the user applications filter out unwanted
> du> messages.
> du>
> du> Are there any plans to tackle this before 9.0?
> 
>  I am looking into this and investigating other possible extensions in
>  rtsock messages such as addition of a fib member to rt_msghdr.  I am
>  not sure it can be done before 9.0, though...
Actually there were an off-list discussion with bz@ and julian@ about
interface fibs and rtsock changes several weeks ago.

Initial messages:
http://lists.freebsd.org/pipermail/freebsd-net/2011-June/029040.html

I've got 3 different patches:
1) straight forwarded kern/134931 fix (no fib in rtsock, no breaking
ABI, send to bz@)
2) adding fib in rtsock with rtsock versioning and other ABI keeping tricks
3) adding special RTA which can contain TLV pairs, with single defined
TLV with routing socket

As a result of discussion, first patch was sent to bz at . Since patches
from kern/134931 are outdated attaching it here.

It is very much like original patch from kern/134931. The only
difference is using PACKET_TAG_RTSOCKFAM mbuf_tag more heavily.
This is required for keeping raw_input() with same number of parameters.
Actually it looks rather hackish now.


> 
> -- Hiroki

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.14 (FreeBSD)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iEYEARECAAYFAk4hugsACgkQwcJ4iSZ1q2nd1gCcDAgOIEjNbunK9QeADDEvyMa8
WtYAn1rlwUMzeSh1nX8o7Pw5TpZsfCJx
=TsVz
-----END PGP SIGNATURE-----
-------------- next part --------------
Index: netinet/in.c
===================================================================
--- netinet/in.c	(revision 223741)
+++ netinet/in.c	(working copy)
@@ -1009,7 +1009,7 @@ static void in_addralias_rtmsg(int cmd, struct in_
 			(struct sockaddr *)&target->ia_addr;
 		rt_newaddrmsg(cmd, 
 			      (struct ifaddr *)target,
-			      0, &msg_rt);
+			      0, &msg_rt, RT_ALLFIBS);
 		RTFREE(pfx_ro.ro_rt);
 	}
 	return;
Index: net/route.c
===================================================================
--- net/route.c	(revision 223741)
+++ net/route.c	(working copy)
@@ -384,7 +384,7 @@ miss:
 		 */
 		bzero(&info, sizeof(info));
 		info.rti_info[RTAX_DST] = dst;
-		rt_missmsg(msgtype, &info, 0, err);
+		rt_missmsg(msgtype, &info, 0, err, fibnum);
 	}	
 done:
 	if (newrt)
@@ -609,7 +609,7 @@ out:
 	info.rti_info[RTAX_GATEWAY] = gateway;
 	info.rti_info[RTAX_NETMASK] = netmask;
 	info.rti_info[RTAX_AUTHOR] = src;
-	rt_missmsg(RTM_REDIRECT, &info, flags, error);
+	rt_missmsg(RTM_REDIRECT, &info, flags, error, fibnum);
 	if (ifa != NULL)
 		ifa_free(ifa);
 }
@@ -1522,7 +1522,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, in
 			}
 			RT_ADDREF(rt);
 			RT_UNLOCK(rt);
-			rt_newaddrmsg(cmd, ifa, error, rt);
+			rt_newaddrmsg(cmd, ifa, error, rt, fibnum);
 			RT_LOCK(rt);
 			RT_REMREF(rt);
 			if (cmd == RTM_DELETE) {
Index: net/route.h
===================================================================
--- net/route.h	(revision 223741)
+++ net/route.h	(working copy)
@@ -303,6 +303,11 @@ struct rt_addrinfo {
 	struct	ifnet *rti_ifp;
 };
 
+struct rt_dispatch_ctx {
+	unsigned short family;  /* Socket family */
+	int            fibnum;  /* FIB for message or -1 for all */
+};	
+
 /*
  * This macro returns the size of a struct sockaddr when passed
  * through a routing socket. Basically we round up sa_len to
@@ -317,6 +322,8 @@ struct rt_addrinfo {
 
 #ifdef _KERNEL
 
+#define RT_ALLFIBS		-1
+
 #define RT_LINK_IS_UP(ifp)	(!((ifp)->if_capabilities & IFCAP_LINKSTATE) \
 				 || (ifp)->if_link_state == LINK_STATE_UP)
 
@@ -364,8 +371,8 @@ struct ifmultiaddr;
 void	 rt_ieee80211msg(struct ifnet *, int, void *, size_t);
 void	 rt_ifannouncemsg(struct ifnet *, int);
 void	 rt_ifmsg(struct ifnet *);
-void	 rt_missmsg(int, struct rt_addrinfo *, int, int);
-void	 rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
+void	 rt_missmsg(int, struct rt_addrinfo *, int, int, int);
+void	 rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *, int);
 void	 rt_newmaddrmsg(int, struct ifmultiaddr *);
 int	 rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
 void 	 rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
Index: net/rtsock.c
===================================================================
--- net/rtsock.c	(revision 223741)
+++ net/rtsock.c	(working copy)
@@ -159,7 +159,7 @@ static void	rt_setmetrics(u_long which, const stru
 			struct rt_metrics_lite *out);
 static void	rt_getmetrics(const struct rt_metrics_lite *in,
 			struct rt_metrics *out);
-static void	rt_dispatch(struct mbuf *, const struct sockaddr *);
+static void	rt_dispatch(struct mbuf *, const struct sockaddr *, int);
 
 static struct netisr_handler rtsock_nh = {
 	.nh_name = "rtsock",
@@ -200,17 +200,16 @@ static void
 rts_input(struct mbuf *m)
 {
 	struct sockproto route_proto;
-	unsigned short *family;
+	struct rt_dispatch_ctx *ctx;
 	struct m_tag *tag;
 
 	route_proto.sp_family = PF_ROUTE;
-	tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
+	route_proto.sp_protocol = 0;
+	tag = m_tag_find(m, PACKET_TAG_RTSOCK, NULL);
 	if (tag != NULL) {
-		family = (unsigned short *)(tag + 1);
-		route_proto.sp_protocol = *family;
-		m_tag_delete(m, tag);
-	} else
-		route_proto.sp_protocol = 0;
+		ctx = (struct rt_dispatch_ctx*)(tag + 1);
+		route_proto.sp_protocol = ctx->family;
+	}
 
 	raw_input(m, &route_proto, &route_src);
 }
@@ -892,10 +891,10 @@ flush:
 			 */
 			unsigned short family = rp->rcb_proto.sp_family;
 			rp->rcb_proto.sp_family = 0;
-			rt_dispatch(m, info.rti_info[RTAX_DST]);
+			rt_dispatch(m, info.rti_info[RTAX_DST], so->so_fibnum);
 			rp->rcb_proto.sp_family = family;
 		} else
-			rt_dispatch(m, info.rti_info[RTAX_DST]);
+			rt_dispatch(m, info.rti_info[RTAX_DST], so->so_fibnum);
 	}
 	/* info.rti_info[RTAX_DST] (used above) can point inside of rtm */
 	if (rtm)
@@ -1127,7 +1126,7 @@ again:
  * destination.
  */
 void
-rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
+rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error, int fibnum)
 {
 	struct rt_msghdr *rtm;
 	struct mbuf *m;
@@ -1142,7 +1141,7 @@ void
 	rtm->rtm_flags = RTF_DONE | flags;
 	rtm->rtm_errno = error;
 	rtm->rtm_addrs = rtinfo->rti_addrs;
-	rt_dispatch(m, sa);
+	rt_dispatch(m, sa, fibnum);
 }
 
 /*
@@ -1167,7 +1166,7 @@ rt_ifmsg(struct ifnet *ifp)
 	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
 	ifm->ifm_data = ifp->if_data;
 	ifm->ifm_addrs = 0;
-	rt_dispatch(m, NULL);
+	rt_dispatch(m, NULL, RT_ALLFIBS);
 }
 
 /*
@@ -1179,7 +1178,7 @@ rt_ifmsg(struct ifnet *ifp)
  * copies of it.
  */
 void
-rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
+rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt, int fibnum)
 {
 	struct rt_addrinfo info;
 	struct sockaddr *sa = NULL;
@@ -1237,7 +1236,7 @@ void
 			rtm->rtm_errno = error;
 			rtm->rtm_addrs = info.rti_addrs;
 		}
-		rt_dispatch(m, sa);
+		rt_dispatch(m, sa, fibnum);
 	}
 }
 
@@ -1273,7 +1272,7 @@ rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
 	    __func__));
 	ifmam->ifmam_index = ifp->if_index;
 	ifmam->ifmam_addrs = info.rti_addrs;
-	rt_dispatch(m, ifma->ifma_addr);
+	rt_dispatch(m, ifma->ifma_addr, RT_ALLFIBS);
 }
 
 static struct mbuf *
@@ -1333,7 +1332,7 @@ rt_ieee80211msg(struct ifnet *ifp, int what, void
 		if (m->m_flags & M_PKTHDR)
 			m->m_pkthdr.len += data_len;
 		mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
-		rt_dispatch(m, NULL);
+		rt_dispatch(m, NULL, RT_ALLFIBS);
 	}
 }
 
@@ -1349,27 +1348,30 @@ rt_ifannouncemsg(struct ifnet *ifp, int what)
 
 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
 	if (m != NULL)
-		rt_dispatch(m, NULL);
+		rt_dispatch(m, NULL, RT_ALLFIBS);
 }
 
 static void
-rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
+rt_dispatch(struct mbuf *m, const struct sockaddr *sa, int fibnum)
 {
+	struct rt_dispatch_ctx *ctx;
 	struct m_tag *tag;
 
 	/*
 	 * Preserve the family from the sockaddr, if any, in an m_tag for
 	 * use when injecting the mbuf into the routing socket buffer from
-	 * the netisr.
+	 * the netisr. Additionally save the fibnum if needed.
 	 */
-	if (sa != NULL) {
-		tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
-		    M_NOWAIT);
+	if (sa != NULL || fibnum >= 0) {
+		tag = m_tag_get(PACKET_TAG_RTSOCK, 
+		                sizeof(struct rt_dispatch_ctx*), M_NOWAIT);
 		if (tag == NULL) {
 			m_freem(m);
 			return;
 		}
-		*(unsigned short *)(tag + 1) = sa->sa_family;
+		ctx = (struct rt_dispatch_ctx*)(tag + 1);
+		ctx->family = sa->sa_family;
+		ctx->fibnum = fibnum;
 		m_tag_prepend(m, tag);
 	}
 #ifdef VIMAGE
Index: net/raw_usrreq.c
===================================================================
--- net/raw_usrreq.c	(revision 223741)
+++ net/raw_usrreq.c	(working copy)
@@ -48,6 +48,7 @@
 #include <net/if.h>
 #include <net/raw_cb.h>
 #include <net/vnet.h>
+#include <net/route.h>
 
 MTX_SYSINIT(rawcb_mtx, &rawcb_mtx, "rawcb", MTX_DEF);
 
@@ -74,7 +75,18 @@ raw_input(struct mbuf *m0, struct sockproto *proto
 	struct rawcb *rp;
 	struct mbuf *m = m0;
 	struct socket *last;
+	struct rt_dispatch_ctx *ctx;
+	struct m_tag *tag = NULL;
+	int fibnum = RT_ALLFIBS;
 
+	if (proto->sp_family == PF_ROUTE) {
+		tag = m_tag_find(m, PACKET_TAG_RTSOCK, NULL);
+		if (tag != NULL) {
+			ctx = (struct rt_dispatch_ctx*)(tag + 1);
+			fibnum = ctx->fibnum;
+		}
+	}
+
 	last = 0;
 	mtx_lock(&rawcb_mtx);
 	LIST_FOREACH(rp, &V_rawcb_list, list) {
@@ -83,6 +95,10 @@ raw_input(struct mbuf *m0, struct sockproto *proto
 		if (rp->rcb_proto.sp_protocol  &&
 		    rp->rcb_proto.sp_protocol != proto->sp_protocol)
 			continue;
+		if ((proto->sp_family == PF_ROUTE) && (fibnum >= 0) && 
+				(rp->rcb_socket != NULL) &&
+				(fibnum != rp->rcb_socket->so_fibnum))
+			continue; 
 		if (last) {
 			struct mbuf *n;
 			n = m_copy(m, 0, (int)M_COPYALL);
Index: netinet6/in6.c
===================================================================
--- netinet6/in6.c	(revision 223741)
+++ netinet6/in6.c	(working copy)
@@ -1280,7 +1280,7 @@ in6_purgeaddr(struct ifaddr *ifa)
 	rt_mask(&rt0) = (struct sockaddr *)&mask;
 	rt_key(&rt0) = (struct sockaddr *)&addr;
 	rt0.rt_flags = RTF_HOST | RTF_STATIC;
-	rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0);
+	rt_newaddrmsg(RTM_DELETE, ifa, 0, &rt0, RT_ALLFIBS);
 
 	/*
 	 * leave from multicast groups we have joined for the interface
@@ -1858,7 +1858,7 @@ in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *i
 		rt_mask(&rt) = (struct sockaddr *)&mask;
 		rt_key(&rt) = (struct sockaddr *)&addr;
 		rt.rt_flags = RTF_UP | RTF_HOST | RTF_STATIC;
-		rt_newaddrmsg(RTM_ADD, &ia->ia_ifa, 0, &rt);
+		rt_newaddrmsg(RTM_ADD, &ia->ia_ifa, 0, &rt, RT_ALLFIBS);
 	}
 
 	return (error);
Index: netinet6/nd6_rtr.c
===================================================================
--- netinet6/nd6_rtr.c	(revision 223741)
+++ netinet6/nd6_rtr.c	(working copy)
@@ -458,7 +458,7 @@ nd6_rtmsg(int cmd, struct rtentry *rt)
 	} else
 		ifa = NULL;
 
-	rt_missmsg(cmd, &info, rt->rt_flags, 0);
+	rt_missmsg(cmd, &info, rt->rt_flags, 0, RT_ALLFIBS);
 	if (ifa != NULL)
 		ifa_free(ifa);
 }
Index: sys/mbuf.h
===================================================================
--- sys/mbuf.h	(revision 223741)
+++ sys/mbuf.h	(working copy)
@@ -945,7 +945,7 @@ struct mbuf	*m_unshare(struct mbuf *, int how);
 #define	PACKET_TAG_IPFORWARD			18 /* ipforward info */
 #define	PACKET_TAG_MACLABEL	(19 | MTAG_PERSISTENT) /* MAC label */
 #define	PACKET_TAG_PF				21 /* PF + ALTQ information */
-#define	PACKET_TAG_RTSOCKFAM			25 /* rtsock sa family */
+#define	PACKET_TAG_RTSOCK			25 /* rtsock info */
 #define	PACKET_TAG_IPOPTIONS			27 /* Saved IP options */
 #define	PACKET_TAG_CARP				28 /* CARP info */
 #define	PACKET_TAG_IPSEC_NAT_T_PORTS		29 /* two uint16_t */


More information about the freebsd-net mailing list