svn commit: r347375 - in head/sys: net netinet
Gleb Smirnoff
glebius at FreeBSD.org
Wed May 8 23:39:26 UTC 2019
Author: glebius
Date: Wed May 8 23:39:24 2019
New Revision: 347375
URL: https://svnweb.freebsd.org/changeset/base/347375
Log:
Existense of PCB route caching doesn't allow us to use new fast route
lookup KPI in ip_output() like it is already used in ip_forward().
However, when there is no PCB provided we can use fast KPI, gaining
performance advantage.
Typical case when ip_output() is called without a PCB pointer is a
sendto(2) on a not connected UDP socket. In practice DNS servers do
this.
Reviewed by: melifaro
Differential Revision: https://reviews.freebsd.org/D19804
Modified:
head/sys/net/route.h
head/sys/net/route_var.h
head/sys/netinet/in_fib.c
head/sys/netinet/in_fib.h
head/sys/netinet/ip_output.c
Modified: head/sys/net/route.h
==============================================================================
--- head/sys/net/route.h Wed May 8 23:24:47 2019 (r347374)
+++ head/sys/net/route.h Wed May 8 23:39:24 2019 (r347375)
@@ -210,6 +210,7 @@ struct rtentry {
#define NHF_DEFAULT 0x0080 /* Default route */
#define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */
#define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */
+#define NHF_HOST 0x0400 /* RTF_HOST */
/* Nexthop request flags */
#define NHR_IFAIF 0x01 /* Return ifa_ifp interface */
Modified: head/sys/net/route_var.h
==============================================================================
--- head/sys/net/route_var.h Wed May 8 23:24:47 2019 (r347374)
+++ head/sys/net/route_var.h Wed May 8 23:39:24 2019 (r347375)
@@ -67,6 +67,7 @@ fib_rte_to_nh_flags(int rt_flags)
uint16_t res;
res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
+ res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0;
res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;
Modified: head/sys/netinet/in_fib.c
==============================================================================
--- head/sys/netinet/in_fib.c Wed May 8 23:24:47 2019 (r347374)
+++ head/sys/netinet/in_fib.c Wed May 8 23:39:24 2019 (r347375)
@@ -96,7 +96,6 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in
uint32_t flags, struct nhop4_extended *pnh4)
{
struct sockaddr_in *gw;
- struct in_ifaddr *ia;
if ((flags & NHR_IFAIF) != 0)
pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
@@ -113,10 +112,8 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in
gw = (struct sockaddr_in *)rt_key(rte);
if (gw->sin_addr.s_addr == 0)
pnh4->nh_flags |= NHF_DEFAULT;
- /* XXX: Set RTF_BROADCAST if GW address is broadcast */
-
- ia = ifatoia(rte->rt_ifa);
- pnh4->nh_src = IA_SIN(ia)->sin_addr;
+ pnh4->nh_ia = ifatoia(rte->rt_ifa);
+ pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr;
}
/*
Modified: head/sys/netinet/in_fib.h
==============================================================================
--- head/sys/netinet/in_fib.h Wed May 8 23:24:47 2019 (r347374)
+++ head/sys/netinet/in_fib.h Wed May 8 23:39:24 2019 (r347375)
@@ -43,12 +43,13 @@ struct nhop4_basic {
/* Extended nexthop info used for control protocols */
struct nhop4_extended {
struct ifnet *nh_ifp; /* Logical egress interface */
+ struct in_ifaddr *nh_ia; /* Associated address */
uint16_t nh_mtu; /* nexthop mtu */
uint16_t nh_flags; /* nhop flags */
uint8_t spare[4];
struct in_addr nh_addr; /* GW/DST IPv4 address */
struct in_addr nh_src; /* default source IPv4 address */
- uint64_t spare2[2];
+ uint64_t spare2;
};
int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
Modified: head/sys/netinet/ip_output.c
==============================================================================
--- head/sys/netinet/ip_output.c Wed May 8 23:24:47 2019 (r347374)
+++ head/sys/netinet/ip_output.c Wed May 8 23:39:24 2019 (r347375)
@@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/in_fib.h>
#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
@@ -227,13 +228,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
int hlen = sizeof (struct ip);
int mtu;
int error = 0;
- struct sockaddr_in *dst;
+ struct sockaddr_in *dst, sin;
const struct sockaddr_in *gw;
struct in_ifaddr *ia;
+ struct in_addr src;
int isbroadcast;
uint16_t ip_len, ip_off;
- struct route iproute;
- struct rtentry *rte; /* cache for ro->ro_rt */
uint32_t fibnum;
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
int no_route_but_check_spd = 0;
@@ -252,11 +252,6 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
#endif
}
- if (ro == NULL) {
- ro = &iproute;
- bzero(ro, sizeof (*ro));
- }
-
if (opt) {
int len = 0;
m = ip_insertoptions(m, opt, &len);
@@ -281,26 +276,28 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou
/*
* dst/gw handling:
*
- * dst can be rewritten but always points to &ro->ro_dst.
* gw is readonly but can point either to dst OR rt_gateway,
* therefore we need restore gw if we're redoing lookup.
*/
- gw = dst = (struct sockaddr_in *)&ro->ro_dst;
fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
- rte = ro->ro_rt;
- if (rte == NULL) {
+ if (ro != NULL)
+ dst = (struct sockaddr_in *)&ro->ro_dst;
+ else
+ dst = &sin;
+ if (ro == NULL || ro->ro_rt == NULL) {
bzero(dst, sizeof(*dst));
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr = ip->ip_dst;
}
+ gw = dst;
NET_EPOCH_ENTER(et);
again:
/*
* Validate route against routing table additions;
* a better/more specific route might have been added.
*/
- if (inp)
+ if (inp != NULL && ro != NULL && ro->ro_rt != NULL)
RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
/*
* If there is a cached route,
@@ -310,15 +307,12 @@ again:
* cache with IPv6.
* Also check whether routing cache needs invalidation.
*/
- rte = ro->ro_rt;
- if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
- rte->rt_ifp == NULL ||
- !RT_LINK_IS_UP(rte->rt_ifp) ||
- dst->sin_family != AF_INET ||
- dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+ if (ro != NULL && ro->ro_rt != NULL &&
+ ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+ ro->ro_rt->rt_ifp == NULL || !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) ||
+ dst->sin_family != AF_INET ||
+ dst->sin_addr.s_addr != ip->ip_dst.s_addr))
RO_INVALIDATE_CACHE(ro);
- rte = NULL;
- }
ia = NULL;
/*
* If routing to interface only, short circuit routing lookup.
@@ -338,8 +332,10 @@ again:
ip->ip_dst.s_addr = INADDR_BROADCAST;
dst->sin_addr = ip->ip_dst;
ifp = ia->ia_ifp;
+ mtu = ifp->if_mtu;
ip->ip_ttl = 1;
isbroadcast = 1;
+ src = IA_SIN(ia)->sin_addr;
} else if (flags & IP_ROUTETOIF) {
if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
M_GETFIB(m)))) == NULL &&
@@ -350,9 +346,11 @@ again:
goto bad;
}
ifp = ia->ia_ifp;
+ mtu = ifp->if_mtu;
ip->ip_ttl = 1;
isbroadcast = ifp->if_flags & IFF_BROADCAST ?
in_ifaddr_broadcast(dst->sin_addr, ia) : 0;
+ src = IA_SIN(ia)->sin_addr;
} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
imo != NULL && imo->imo_multicast_ifp != NULL) {
/*
@@ -360,15 +358,17 @@ again:
* packets if the interface is specified.
*/
ifp = imo->imo_multicast_ifp;
+ mtu = ifp->if_mtu;
IFP_TO_IA(ifp, ia, &in_ifa_tracker);
isbroadcast = 0; /* fool gcc */
- } else {
- /*
- * We want to do any cloning requested by the link layer,
- * as this is probably required in all cases for correct
- * operation (as it is for ARP).
- */
- if (rte == NULL) {
+ src = IA_SIN(ia)->sin_addr;
+ } else if (ro != NULL) {
+ if (ro->ro_rt == NULL) {
+ /*
+ * We want to do any cloning requested by the link
+ * layer, as this is probably required in all cases
+ * for correct operation (as it is for ARP).
+ */
#ifdef RADIX_MPATH
rtalloc_mpath_fib(ro,
ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
@@ -376,12 +376,47 @@ again:
#else
in_rtalloc_ign(ro, 0, fibnum);
#endif
- rte = ro->ro_rt;
+ if (ro->ro_rt == NULL ||
+ (ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+ ro->ro_rt->rt_ifp == NULL ||
+ !RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) {
+#if defined(IPSEC) || defined(IPSEC_SUPPORT)
+ /*
+ * There is no route for this packet, but it is
+ * possible that a matching SPD entry exists.
+ */
+ no_route_but_check_spd = 1;
+ mtu = 0; /* Silence GCC warning. */
+ goto sendit;
+#endif
+ IPSTAT_INC(ips_noroute);
+ error = EHOSTUNREACH;
+ goto bad;
+ }
}
- if (rte == NULL ||
- (rte->rt_flags & RTF_UP) == 0 ||
- rte->rt_ifp == NULL ||
- !RT_LINK_IS_UP(rte->rt_ifp)) {
+ ia = ifatoia(ro->ro_rt->rt_ifa);
+ ifp = ro->ro_rt->rt_ifp;
+ counter_u64_add(ro->ro_rt->rt_pksent, 1);
+ rt_update_ro_flags(ro);
+ if (ro->ro_rt->rt_flags & RTF_GATEWAY)
+ gw = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
+ if (ro->ro_rt->rt_flags & RTF_HOST)
+ isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
+ else if (ifp->if_flags & IFF_BROADCAST)
+ isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
+ else
+ isbroadcast = 0;
+ if (ro->ro_rt->rt_flags & RTF_HOST)
+ mtu = ro->ro_rt->rt_mtu;
+ else
+ mtu = ifp->if_mtu;
+ src = IA_SIN(ia)->sin_addr;
+ } else {
+ struct nhop4_extended nh;
+
+ bzero(&nh, sizeof(nh));
+ if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh) !=
+ 0) {
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
/*
* There is no route for this packet, but it is
@@ -395,31 +430,29 @@ again:
error = EHOSTUNREACH;
goto bad;
}
- ia = ifatoia(rte->rt_ifa);
- ifp = rte->rt_ifp;
- counter_u64_add(rte->rt_pksent, 1);
- rt_update_ro_flags(ro);
- if (rte->rt_flags & RTF_GATEWAY)
- gw = (struct sockaddr_in *)rte->rt_gateway;
- if (rte->rt_flags & RTF_HOST)
- isbroadcast = (rte->rt_flags & RTF_BROADCAST);
- else if (ifp->if_flags & IFF_BROADCAST)
- isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
- else
- isbroadcast = 0;
+ ifp = nh.nh_ifp;
+ mtu = nh.nh_mtu;
+ /*
+ * We are rewriting here dst to be gw actually, contradicting
+ * comment at the beginning of the function. However, in this
+ * case we are always dealing with on stack dst.
+ * In case if pfil(9) sends us back to beginning of the
+ * function, the dst would be rewritten by ip_output_pfil().
+ */
+ MPASS(dst == &sin);
+ dst->sin_addr = nh.nh_addr;
+ ia = nh.nh_ia;
+ src = nh.nh_src;
+ isbroadcast = (((nh.nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
+ (NHF_HOST | NHF_BROADCAST)) ||
+ ((ifp->if_flags & IFF_BROADCAST) &&
+ in_ifaddr_broadcast(dst->sin_addr, ia)));
}
- /*
- * Calculate MTU. If we have a route that is up, use that,
- * otherwise use the interface's MTU.
- */
- if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
- mtu = rte->rt_mtu;
- else
- mtu = ifp->if_mtu;
/* Catch a possible divide by zero later. */
- KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
- __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
+ KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (rt_flags=0x%08x) ifp=%p",
+ __func__, mtu, ro,
+ (ro != NULL && ro->ro_rt != NULL) ? ro->ro_rt->rt_flags : 0, ifp));
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
m->m_flags |= M_MCAST;
@@ -455,11 +488,8 @@ again:
* If source address not specified yet, use address
* of outgoing interface.
*/
- if (ip->ip_src.s_addr == INADDR_ANY) {
- /* Interface may have no addresses. */
- if (ia != NULL)
- ip->ip_src = IA_SIN(ia)->sin_addr;
- }
+ if (ip->ip_src.s_addr == INADDR_ANY)
+ ip->ip_src = src;
if ((imo == NULL && in_mcast_loop) ||
(imo && imo->imo_multicast_loop)) {
@@ -522,12 +552,8 @@ again:
* If the source address is not specified yet, use the address
* of the outoing interface.
*/
- if (ip->ip_src.s_addr == INADDR_ANY) {
- /* Interface may have no addresses. */
- if (ia != NULL) {
- ip->ip_src = IA_SIN(ia)->sin_addr;
- }
- }
+ if (ip->ip_src.s_addr == INADDR_ANY)
+ ip->ip_src = src;
/*
* Look for broadcast address and
@@ -587,9 +613,10 @@ sendit:
case -1: /* Need to try again */
/* Reset everything for a new round */
- RO_RTFREE(ro);
- ro->ro_prepend = NULL;
- rte = NULL;
+ if (ro != NULL) {
+ RO_RTFREE(ro);
+ ro->ro_prepend = NULL;
+ }
gw = dst;
ip = mtod(m, struct ip *);
goto again;
@@ -733,15 +760,6 @@ sendit:
IPSTAT_INC(ips_fragmented);
done:
- if (ro == &iproute)
- RO_RTFREE(ro);
- else if (rte == NULL)
- /*
- * If the caller supplied a route but somehow the reference
- * to it has been released need to prevent the caller
- * calling RTFREE on it again.
- */
- ro->ro_rt = NULL;
NET_EPOCH_EXIT(et);
return (error);
bad:
More information about the svn-src-all
mailing list