git: 62e1a437f328 - main - routing: Allow using IPv6 next-hops for IPv4 routes (RFC 5549).

Alexander V. Chernikov melifaro at FreeBSD.org
Sun Aug 22 22:58:41 UTC 2021


The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=62e1a437f3285e785d9b35a476d36a469a90028d

commit 62e1a437f3285e785d9b35a476d36a469a90028d
Author:     Zhenlei Huang <zlei.huang at gmail.com>
AuthorDate: 2021-08-22 22:28:47 +0000
Commit:     Alexander V. Chernikov <melifaro at FreeBSD.org>
CommitDate: 2021-08-22 22:56:08 +0000

    routing: Allow using IPv6 next-hops for IPv4 routes (RFC 5549).
    
    Implement kernel support for RFC 5549/8950.
    
    * Relax control plane restrictions and allow specifying IPv6 gateways
     for IPv4 routes. This behavior is controlled by the
     net.route.rib_route_ipv6_nexthop sysctl (on by default).
    
    * Always pass final destination in ro->ro_dst in ip_forward().
    
    * Use ro->ro_dst to exract packet family inside if_output() routines.
     Consistently use RO_GET_FAMILY() macro to handle ro=NULL case.
    
    * Pass extracted family to nd6_resolve() to get the LLE with proper encap.
     It leverages recent lltable changes committed in c541bd368f86.
    
    Presence of the functionality can be checked using ipv4_rfc5549_support feature(3).
    Example usage:
      route add -net 192.0.0.0/24 -inet6 fe80::5054:ff:fe14:e319%vtnet0
    
    Differential Revision: https://reviews.freebsd.org/D30398
    MFC after:      2 weeks
---
 sys/contrib/ipfilter/netinet/ip_fil_freebsd.c | 33 +++++++++--------
 sys/dev/cxgbe/tom/t4_listen.c                 |  5 ++-
 sys/dev/iicbus/if_ic.c                        |  2 +-
 sys/net/debugnet.c                            |  1 +
 sys/net/if_disc.c                             |  2 +-
 sys/net/if_ethersubr.c                        | 11 +++---
 sys/net/if_fwsubr.c                           | 28 +++++++++++---
 sys/net/if_gif.c                              |  2 +-
 sys/net/if_gre.c                              |  2 +-
 sys/net/if_infiniband.c                       |  7 ++--
 sys/net/if_loop.c                             |  2 +-
 sys/net/if_me.c                               |  4 +-
 sys/net/if_spppsubr.c                         | 11 +++---
 sys/net/if_tuntap.c                           |  2 +-
 sys/net/route.h                               |  4 ++
 sys/net/route/route_ctl.c                     | 30 ++++++++++++++-
 sys/netgraph/netflow/netflow.c                |  4 ++
 sys/netgraph/ng_iface.c                       |  2 +-
 sys/netinet/ip_fastfwd.c                      | 29 +++++++++------
 sys/netinet/ip_input.c                        | 11 ++++--
 sys/netinet/ip_output.c                       | 53 ++++++++++++---------------
 sys/netinet/toecore.c                         |  3 +-
 sys/ofed/drivers/infiniband/core/ib_addr.c    | 14 +++++--
 23 files changed, 168 insertions(+), 94 deletions(-)

diff --git a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
index 072ab8bcd4e5..7e0ac4bc2927 100644
--- a/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
+++ b/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c
@@ -689,7 +689,9 @@ ipf_fastroute(m0, mpp, fin, fdp)
 	register struct mbuf *m = *mpp;
 	int len, off, error = 0, hlen, code;
 	struct ifnet *ifp, *sifp;
-	struct sockaddr_in dst;
+	struct route ro;
+	struct sockaddr_in *dst;
+	const struct sockaddr *gw;
 	struct nhop_object *nh;
 	u_long fibnum = 0;
 	u_short ip_off;
@@ -739,10 +741,12 @@ ipf_fastroute(m0, mpp, fin, fdp)
 	/*
 	 * Route packet.
 	 */
-	bzero(&dst, sizeof (dst));
-	dst.sin_family = AF_INET;
-	dst.sin_addr = ip->ip_dst;
-	dst.sin_len = sizeof(dst);
+	bzero(&ro, sizeof (ro));
+	dst = (struct sockaddr_in *)&ro.ro_dst;
+	dst->sin_family = AF_INET;
+	dst->sin_addr = ip->ip_dst;
+	dst->sin_len = sizeof(dst);
+	gw = (const struct sockaddr *)dst;
 
 	fr = fin->fin_fr;
 	if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) &&
@@ -762,11 +766,11 @@ ipf_fastroute(m0, mpp, fin, fdp)
 	}
 
 	if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0))
-		dst.sin_addr = fdp->fd_ip;
+		dst->sin_addr = fdp->fd_ip;
 
 	fibnum = M_GETFIB(m0);
 	NET_EPOCH_ASSERT();
-	nh = fib4_lookup(fibnum, dst.sin_addr, 0, NHR_NONE, 0);
+	nh = fib4_lookup(fibnum, dst->sin_addr, 0, NHR_NONE, 0);
 	if (nh == NULL) {
 		if (in_localaddr(ip->ip_dst))
 			error = EHOSTUNREACH;
@@ -777,8 +781,10 @@ ipf_fastroute(m0, mpp, fin, fdp)
 
 	if (ifp == NULL)
 		ifp = nh->nh_ifp;
-	if (nh->nh_flags & NHF_GATEWAY)
-		dst.sin_addr = nh->gw4_sa.sin_addr;
+	if (nh->nh_flags & NHF_GATEWAY) {
+		gw = &nh->gw_sa;
+		ro.ro_flags |= RT_HAS_GW;
+	}
 
 	/*
 	 * For input packets which are being "fastrouted", they won't
@@ -822,9 +828,7 @@ ipf_fastroute(m0, mpp, fin, fdp)
 	if (ntohs(ip->ip_len) <= ifp->if_mtu) {
 		if (!ip->ip_sum)
 			ip->ip_sum = in_cksum(m, hlen);
-		error = (*ifp->if_output)(ifp, m, (struct sockaddr *)&dst,
-			    NULL
-			);
+		error = (*ifp->if_output)(ifp, m, gw, &ro);
 		goto done;
 	}
 	/*
@@ -904,10 +908,7 @@ sendorfree:
 		m0 = m->m_act;
 		m->m_act = 0;
 		if (error == 0)
-			error = (*ifp->if_output)(ifp, m,
-			    (struct sockaddr *)&dst,
-			    NULL
-			    );
+			error = (*ifp->if_output)(ifp, m, gw, &ro);
 		else
 			FREE_MB_T(m);
 	}
diff --git a/sys/dev/cxgbe/tom/t4_listen.c b/sys/dev/cxgbe/tom/t4_listen.c
index 8623079fe429..4d98597409d6 100644
--- a/sys/dev/cxgbe/tom/t4_listen.c
+++ b/sys/dev/cxgbe/tom/t4_listen.c
@@ -1113,7 +1113,10 @@ get_l2te_for_nexthop(struct port_info *pi, struct ifnet *ifp,
 		if (nh->nh_ifp != ifp)
 			return (NULL);
 		if (nh->nh_flags & NHF_GATEWAY)
-			((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr;
+			if (nh->gw_sa.sa_family == AF_INET)
+				((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr;
+			else
+				*((struct sockaddr_in6 *)dst) = nh->gw6_sa;
 		else
 			((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
 	}
diff --git a/sys/dev/iicbus/if_ic.c b/sys/dev/iicbus/if_ic.c
index 4dac86141230..603265a52b13 100644
--- a/sys/dev/iicbus/if_ic.c
+++ b/sys/dev/iicbus/if_ic.c
@@ -372,7 +372,7 @@ icoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &hdr, sizeof(hdr));
 	else 
-		hdr = dst->sa_family;
+		hdr = RO_GET_FAMILY(ro, dst);
 
 	mtx_lock(&sc->ic_lock);
 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
diff --git a/sys/net/debugnet.c b/sys/net/debugnet.c
index bb59ff33a93f..8652597c55db 100644
--- a/sys/net/debugnet.c
+++ b/sys/net/debugnet.c
@@ -673,6 +673,7 @@ debugnet_connect(const struct debugnet_conn_params *dcp,
 			goto cleanup;
 		}
 
+		/* TODO support AF_INET6 */
 		if (nh->gw_sa.sa_family == AF_INET)
 			gw_sin = &nh->gw4_sa;
 		else {
diff --git a/sys/net/if_disc.c b/sys/net/if_disc.c
index ac0028c42f70..14d544dfd86a 100644
--- a/sys/net/if_disc.c
+++ b/sys/net/if_disc.c
@@ -185,7 +185,7 @@ discoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 
 	if (bpf_peers_present(ifp->if_bpf))
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m);
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index 6d8b79d4dd12..3209e8a82978 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -236,10 +236,11 @@ ether_resolve_addr(struct ifnet *ifp, struct mbuf *m,
 #endif
 #ifdef INET6
 	case AF_INET6:
-		if ((m->m_flags & M_MCAST) == 0)
-			error = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), m, dst, phdr,
+		if ((m->m_flags & M_MCAST) == 0) {
+			int af = RO_GET_FAMILY(ro, dst);
+			error = nd6_resolve(ifp, LLE_SF(af, 0), m, dst, phdr,
 			    &lleflags, plle);
-		else {
+		} else {
 			const struct in6_addr *a6;
 			a6 = &(((const struct sockaddr_in6 *)dst)->sin6_addr);
 			ETHER_MAP_IPV6_MULTICAST(a6, eh->ether_dhost);
@@ -353,7 +354,7 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
 
 	if ((pflags & RT_L2_ME) != 0) {
 		update_mbuf_csumflags(m, m);
-		return (if_simloop(ifp, m, dst->sa_family, 0));
+		return (if_simloop(ifp, m, RO_GET_FAMILY(ro, dst), 0));
 	}
 	loop_copy = (pflags & RT_MAY_LOOP) != 0;
 
@@ -400,7 +401,7 @@ ether_output(struct ifnet *ifp, struct mbuf *m,
 		 */
 		if ((n = m_dup(m, M_NOWAIT)) != NULL) {
 			update_mbuf_csumflags(m, n);
-			(void)if_simloop(ifp, n, dst->sa_family, hlen);
+			(void)if_simloop(ifp, n, RO_GET_FAMILY(ro, dst), hlen);
 		} else
 			if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
 	}
diff --git a/sys/net/if_fwsubr.c b/sys/net/if_fwsubr.c
index a6c43d4d05a4..321721737d36 100644
--- a/sys/net/if_fwsubr.c
+++ b/sys/net/if_fwsubr.c
@@ -94,6 +94,7 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 #if defined(INET) || defined(INET6)
 	int is_gw = 0;
 #endif
+	int af = RO_GET_FAMILY(ro, dst);
 
 #ifdef MAC
 	error = mac_ifnet_check_transmit(ifp, m);
@@ -137,6 +138,26 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 		destfw = NULL;
 	}
 
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		type = ETHERTYPE_IP;
+		break;
+	case AF_ARP:
+		type = ETHERTYPE_ARP;
+		break;
+#endif
+#ifdef INET6
+	case AF_INET6:
+		type = ETHERTYPE_IPV6;
+		break;
+#endif
+	default:
+		if_printf(ifp, "can't handle af%d\n", af);
+		error = EAFNOSUPPORT;
+		goto bad;
+	}
+
 	switch (dst->sa_family) {
 #ifdef INET
 	case AF_INET:
@@ -151,7 +172,6 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
-		type = ETHERTYPE_IP;
 		break;
 
 	case AF_ARP:
@@ -159,7 +179,6 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 		struct arphdr *ah;
 		ah = mtod(m, struct arphdr *);
 		ah->ar_hrd = htons(ARPHRD_IEEE1394);
-		type = ETHERTYPE_ARP;
 		if (unicast)
 			*destfw = *(struct fw_hwaddr *) ar_tha(ah);
 
@@ -176,12 +195,11 @@ firewire_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 #ifdef INET6
 	case AF_INET6:
 		if (unicast) {
-			error = nd6_resolve(fc->fc_ifp, LLE_SF(AF_INET6, is_gw),
-			    m, dst, (u_char *) destfw, NULL, NULL);
+			error = nd6_resolve(fc->fc_ifp, LLE_SF(af, is_gw), m,
+			    dst, (u_char *) destfw, NULL, NULL);
 			if (error)
 				return (error == EWOULDBLOCK ? 0 : error);
 		}
-		type = ETHERTYPE_IPV6;
 		break;
 #endif
 
diff --git a/sys/net/if_gif.c b/sys/net/if_gif.c
index 113bcb5c916e..796f427e356b 100644
--- a/sys/net/if_gif.c
+++ b/sys/net/if_gif.c
@@ -409,7 +409,7 @@ gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 	/*
 	 * Now save the af in the inbound pkt csum data, this is a cheat since
 	 * we are using the inbound csum_data field to carry the af over to
diff --git a/sys/net/if_gre.c b/sys/net/if_gre.c
index 19014f9fd3de..5ad452ac38e0 100644
--- a/sys/net/if_gre.c
+++ b/sys/net/if_gre.c
@@ -613,7 +613,7 @@ gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 	/*
 	 * Now save the af in the inbound pkt csum data, this is a cheat since
 	 * we are using the inbound csum_data field to carry the af over to
diff --git a/sys/net/if_infiniband.c b/sys/net/if_infiniband.c
index 244b2a5ba117..4dfbd5272d15 100644
--- a/sys/net/if_infiniband.c
+++ b/sys/net/if_infiniband.c
@@ -253,8 +253,9 @@ infiniband_resolve_addr(struct ifnet *ifp, struct mbuf *m,
 #ifdef INET6
 	case AF_INET6:
 		if ((m->m_flags & M_MCAST) == 0) {
-			error = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), m, dst,
-			    phdr, &lleflags, plle);
+			int af = RO_GET_FAMILY(ro, dst);
+			error = nd6_resolve(ifp, LLE_SF(af, 0), m, dst, phdr,
+			    &lleflags, plle);
 		} else {
 			infiniband_ipv6_multicast_map(
 			    &((const struct sockaddr_in6 *)dst)->sin6_addr,
@@ -371,7 +372,7 @@ infiniband_output(struct ifnet *ifp, struct mbuf *m,
 
 	if ((pflags & RT_L2_ME) != 0) {
 		update_mbuf_csumflags(m, m);
-		return (if_simloop(ifp, m, dst->sa_family, 0));
+		return (if_simloop(ifp, m, RO_GET_FAMILY(ro, dst), 0));
 	}
 
 	/*
diff --git a/sys/net/if_loop.c b/sys/net/if_loop.c
index cbff8200806a..643ef2240fe1 100644
--- a/sys/net/if_loop.c
+++ b/sys/net/if_loop.c
@@ -235,7 +235,7 @@ looutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	if (dst->sa_family == AF_UNSPEC || dst->sa_family == pseudo_AF_HDRCMPLT)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 
 #if 1	/* XXX */
 	switch (af) {
diff --git a/sys/net/if_me.c b/sys/net/if_me.c
index aafc07c2b203..067ab22cd84d 100644
--- a/sys/net/if_me.c
+++ b/sys/net/if_me.c
@@ -533,14 +533,14 @@ drop:
 
 static int
 me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
-   struct route *ro __unused)
+   struct route *ro)
 {
 	uint32_t af;
 
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 	m->m_pkthdr.csum_data = af;
 	return (ifp->if_transmit(ifp, m));
 }
diff --git a/sys/net/if_spppsubr.c b/sys/net/if_spppsubr.c
index fbf7b0ea8f4c..804367025532 100644
--- a/sys/net/if_spppsubr.c
+++ b/sys/net/if_spppsubr.c
@@ -780,6 +780,7 @@ sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	int ipproto = PPP_IP;
 #endif
 	int debug = ifp->if_flags & IFF_DEBUG;
+	int af = RO_GET_FAMILY(ro, dst);
 
 	SPPP_LOCK(sp);
 
@@ -805,7 +806,7 @@ sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 		 * dialout event in case IPv6 has been
 		 * administratively disabled on that interface.
 		 */
-		if (dst->sa_family == AF_INET6 &&
+		if (af == AF_INET6 &&
 		    !(sp->confflags & CONF_ENABLE_IPV6))
 			goto drop;
 #endif
@@ -818,7 +819,7 @@ sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 	}
 
 #ifdef INET
-	if (dst->sa_family == AF_INET) {
+	if (af == AF_INET) {
 		/* XXX Check mbuf length here? */
 		struct ip *ip = mtod (m, struct ip*);
 		struct tcphdr *tcp = (struct tcphdr*) ((long*)ip + ip->ip_hl);
@@ -888,14 +889,14 @@ sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
 #endif
 
 #ifdef INET6
-	if (dst->sa_family == AF_INET6) {
+	if (af == AF_INET6) {
 		/* XXX do something tricky here? */
 	}
 #endif
 
 	if (sp->pp_mode == PP_FR) {
 		/* Add frame relay header. */
-		m = sppp_fr_header (sp, m, dst->sa_family);
+		m = sppp_fr_header (sp, m, af);
 		if (! m)
 			goto nobufs;
 		goto out;
@@ -926,7 +927,7 @@ nobufs:		if (debug)
 		h->control = PPP_UI;                 /* Unnumbered Info */
 	}
 
-	switch (dst->sa_family) {
+	switch (af) {
 #ifdef INET
 	case AF_INET:   /* Internet Protocol */
 		if (sp->pp_mode == IFF_CISCO)
diff --git a/sys/net/if_tuntap.c b/sys/net/if_tuntap.c
index b4b1b77ddc7c..bd9fc811d19f 100644
--- a/sys/net/if_tuntap.c
+++ b/sys/net/if_tuntap.c
@@ -1402,7 +1402,7 @@ tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst,
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 
 	if (bpf_peers_present(ifp->if_bpf))
 		bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0);
diff --git a/sys/net/route.h b/sys/net/route.h
index 67217f237e0b..ec77d39b9649 100644
--- a/sys/net/route.h
+++ b/sys/net/route.h
@@ -394,6 +394,10 @@ struct rt_addrinfo {
 		}							\
 	} while (0)
 
+#define RO_GET_FAMILY(ro, dst)	((ro) != NULL &&		\
+	(ro)->ro_flags & RT_HAS_GW				\
+	? (ro)->ro_dst.sa_family : (dst)->sa_family)
+
 /*
  * Validate a cached route based on a supplied cookie.  If there is an
  * out-of-date cache, simply free it.  Update the generation number
diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c
index a686d1623053..6db088102cd3 100644
--- a/sys/net/route/route_ctl.c
+++ b/sys/net/route/route_ctl.c
@@ -106,6 +106,14 @@ SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
     &VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
 #undef _MP_FLAGS
 
+#if defined(INET) && defined(INET6)
+FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
+#define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
+VNET_DEFINE(u_int, rib_route_ipv6_nexthop) = 1;
+SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
+    &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
+#endif
+
 /* Routing table UMA zone */
 VNET_DEFINE_STATIC(uma_zone_t, rtzone);
 #define	V_rtzone	VNET(rtzone)
@@ -197,6 +205,20 @@ get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
 	return (rnh);
 }
 
+#if defined(INET) && defined(INET6)
+static bool
+rib_can_ipv6_nexthop_address(struct rib_head *rh)
+{
+	int result;
+
+	CURVNET_SET(rh->rib_vnet);
+	result = !!V_rib_route_ipv6_nexthop;
+	CURVNET_RESTORE();
+
+	return (result);
+}
+#endif
+
 #ifdef ROUTE_MPATH
 static bool
 rib_can_multipath(struct rib_head *rh)
@@ -582,7 +604,13 @@ check_gateway(struct rib_head *rnh, struct sockaddr *dst,
 		return (true);
 	else if (gateway->sa_family == AF_LINK)
 		return (true);
-	return (false);
+#if defined(INET) && defined(INET6)
+	else if (dst->sa_family == AF_INET && gateway->sa_family == AF_INET6 &&
+		rib_can_ipv6_nexthop_address(rnh))
+		return (true);
+#endif
+	else
+		return (false);
 }
 
 /*
diff --git a/sys/netgraph/netflow/netflow.c b/sys/netgraph/netflow/netflow.c
index 7d4108ee59a7..5c0358d9b67a 100644
--- a/sys/netgraph/netflow/netflow.c
+++ b/sys/netgraph/netflow/netflow.c
@@ -364,6 +364,10 @@ hash_insert(priv_p priv, struct flow_hash_entry *hsh, struct flow_rec *r,
 			fle->f.fle_o_ifx = nh->nh_ifp->if_index;
 			if (nh->gw_sa.sa_family == AF_INET)
 				fle->f.next_hop = nh->gw4_sa.sin_addr;
+			/*
+			 * XXX we're leaving an empty gateway here for
+			 * IPv6 nexthops.
+			 */
 			fle->f.dst_mask = plen;
 		}
 	}
diff --git a/sys/netgraph/ng_iface.c b/sys/netgraph/ng_iface.c
index 1e586d687244..e6871435fa88 100644
--- a/sys/netgraph/ng_iface.c
+++ b/sys/netgraph/ng_iface.c
@@ -371,7 +371,7 @@ ng_iface_output(struct ifnet *ifp, struct mbuf *m,
 	if (dst->sa_family == AF_UNSPEC)
 		bcopy(dst->sa_data, &af, sizeof(af));
 	else
-		af = dst->sa_family;
+		af = RO_GET_FAMILY(ro, dst);
 
 	/* Berkeley packet filter */
 	ng_iface_bpftap(ifp, m, af);
diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c
index 44da6b73e41c..facf876f18cc 100644
--- a/sys/netinet/ip_fastfwd.c
+++ b/sys/netinet/ip_fastfwd.c
@@ -199,7 +199,9 @@ ip_tryforward(struct mbuf *m)
 	struct ip *ip;
 	struct mbuf *m0 = NULL;
 	struct nhop_object *nh = NULL;
-	struct sockaddr_in dst;
+	struct route ro;
+	struct sockaddr_in *dst;
+	const struct sockaddr *gw;
 	struct in_addr dest, odest, rtdest;
 	uint16_t ip_len, ip_off;
 	int error = 0;
@@ -421,19 +423,23 @@ passout:
 	ip_len = ntohs(ip->ip_len);
 	ip_off = ntohs(ip->ip_off);
 
-	bzero(&dst, sizeof(dst));
-	dst.sin_family = AF_INET;
-	dst.sin_len = sizeof(dst);
-	if (nh->nh_flags & NHF_GATEWAY)
-		dst.sin_addr = nh->gw4_sa.sin_addr;
-	else
-		dst.sin_addr = dest;
+	bzero(&ro, sizeof(ro));
+	dst = (struct sockaddr_in *)&ro.ro_dst;
+	dst->sin_family = AF_INET;
+	dst->sin_len = sizeof(*dst);
+	dst->sin_addr = dest;
+	if (nh->nh_flags & NHF_GATEWAY) {
+		gw = &nh->gw_sa;
+		ro.ro_flags |= RT_HAS_GW;
+	} else
+		gw = (const struct sockaddr *)dst;
 
 	/*
 	 * Handle redirect case.
 	 */
 	redest.s_addr = 0;
-	if (V_ipsendredirects && (nh->nh_ifp == m->m_pkthdr.rcvif))
+	if (V_ipsendredirects && (nh->nh_ifp == m->m_pkthdr.rcvif) &&
+	    gw->sa_family == AF_INET)
 		mcopy = ip_redir_alloc(m, nh, ip, &redest.s_addr);
 
 	/*
@@ -448,8 +454,7 @@ passout:
 		 * Send off the packet via outgoing interface
 		 */
 		IP_PROBE(send, NULL, NULL, ip, nh->nh_ifp, ip, NULL);
-		error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m,
-		    (struct sockaddr *)&dst, NULL);
+		error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m, gw, &ro);
 	} else {
 		/*
 		 * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery
@@ -484,7 +489,7 @@ passout:
 				    mtod(m, struct ip *), nh->nh_ifp,
 				    mtod(m, struct ip *), NULL);
 				error = (*nh->nh_ifp->if_output)(nh->nh_ifp, m,
-				    (struct sockaddr *)&dst, NULL);
+				    gw, &ro);
 				if (error)
 					break;
 			} while ((m = m0) != NULL);
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 733cc2901879..465c00e4dac7 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1065,13 +1065,16 @@ ip_forward(struct mbuf *m, int srcrt)
 
 			if (nh_ia != NULL &&
 			    (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) {
-				if (nh->nh_flags & NHF_GATEWAY)
-					dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
-				else
-					dest.s_addr = ip->ip_dst.s_addr;
 				/* Router requirements says to only send host redirects */
 				type = ICMP_REDIRECT;
 				code = ICMP_REDIRECT_HOST;
+				if (nh->nh_flags & NHF_GATEWAY) {
+				    if (nh->gw_sa.sa_family == AF_INET)
+					dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
+				    else /* Do not redirect in case gw is AF_INET6 */
+					type = 0;
+				} else
+					dest.s_addr = ip->ip_dst.s_addr;
 			}
 		}
 	}
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index 3041232b7223..ad41c9df0b8c 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -212,7 +212,7 @@ ip_output_pfil(struct mbuf **mp, struct ifnet *ifp, int flags,
 
 static int
 ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
-    const struct sockaddr_in *gw, struct route *ro, bool stamp_tag)
+    const struct sockaddr *gw, struct route *ro, bool stamp_tag)
 {
 #ifdef KERN_TLS
 	struct ktls_session *tls = NULL;
@@ -273,7 +273,7 @@ ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
 		m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
 	}
 
-	error = (*ifp->if_output)(ifp, m, (const struct sockaddr *)gw, ro);
+	error = (*ifp->if_output)(ifp, m, gw, ro);
 
 done:
 	/* Check for route change invalidating send tags. */
@@ -329,12 +329,13 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
 	int mtu = 0;
 	int error = 0;
 	int vlan_pcp = -1;
-	struct sockaddr_in *dst, sin;
-	const struct sockaddr_in *gw;
+	struct sockaddr_in *dst;
+	const struct sockaddr *gw;
 	struct in_ifaddr *ia = NULL;
 	struct in_addr src;
 	int isbroadcast;
 	uint16_t ip_len, ip_off;
+	struct route iproute;
 	uint32_t fibnum;
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	int no_route_but_check_spd = 0;
@@ -386,23 +387,23 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
 	 * therefore we need restore gw if we're redoing lookup.
 	 */
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
-	if (ro != NULL)
-		dst = (struct sockaddr_in *)&ro->ro_dst;
-	else
-		dst = &sin;
-	if (ro == NULL || ro->ro_nh == NULL) {
-		bzero(dst, sizeof(*dst));
+	if (ro == NULL) {
+		ro = &iproute;
+		bzero(ro, sizeof (*ro));
+	}
+	dst = (struct sockaddr_in *)&ro->ro_dst;
+	if (ro->ro_nh == NULL) {
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 	}
-	gw = dst;
+	gw = (const struct sockaddr *)dst;
 again:
 	/*
 	 * Validate route against routing table additions;
 	 * a better/more specific route might have been added.
 	 */
-	if (inp != NULL && ro != NULL && ro->ro_nh != NULL)
+	if (inp != NULL && ro->ro_nh != NULL)
 		NH_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
 	/*
 	 * If there is a cached route,
@@ -412,7 +413,7 @@ again:
 	 * cache with IPv6.
 	 * Also check whether routing cache needs invalidation.
 	 */
-	if (ro != NULL && ro->ro_nh != NULL &&
+	if (ro->ro_nh != NULL &&
 	    ((!NH_IS_VALID(ro->ro_nh)) || dst->sin_family != AF_INET ||
 	    dst->sin_addr.s_addr != ip->ip_dst.s_addr))
 		RO_INVALIDATE_CACHE(ro);
@@ -469,7 +470,7 @@ again:
 			src = IA_SIN(ia)->sin_addr;
 		else
 			src.s_addr = INADDR_ANY;
-	} else if (ro != NULL) {
+	} else if (ro != &iproute) {
 		if (ro->ro_nh == NULL) {
 			/*
 			 * We want to do any cloning requested by the link
@@ -502,11 +503,11 @@ again:
 		counter_u64_add(nh->nh_pksent, 1);
 		rt_update_ro_flags(ro, nh);
 		if (nh->nh_flags & NHF_GATEWAY)
-			gw = &nh->gw4_sa;
+			gw = &nh->gw_sa;
 		if (nh->nh_flags & NHF_HOST)
 			isbroadcast = (nh->nh_flags & NHF_BROADCAST);
-		else if (ifp->if_flags & IFF_BROADCAST)
-			isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
+		else if ((ifp->if_flags & IFF_BROADCAST) && (gw->sa_family == AF_INET))
+			isbroadcast = in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia);
 		else
 			isbroadcast = 0;
 		mtu = nh->nh_mtu;
@@ -531,22 +532,16 @@ again:
 		}
 		ifp = nh->nh_ifp;
 		mtu = nh->nh_mtu;
-		/*
-		 * We are rewriting here dst to be gw actually, contradicting
-		 * comment at the beginning of the function. However, in this
-		 * case we are always dealing with on stack dst.
-		 * In case if pfil(9) sends us back to beginning of the
-		 * function, the dst would be rewritten by ip_output_pfil().
-		 */
-		MPASS(dst == &sin);
+		rt_update_ro_flags(ro, nh);
 		if (nh->nh_flags & NHF_GATEWAY)
-			dst->sin_addr = nh->gw4_sa.sin_addr;
+			gw = &nh->gw_sa;
 		ia = ifatoia(nh->nh_ifa);
 		src = IA_SIN(ia)->sin_addr;
 		isbroadcast = (((nh->nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
 		    (NHF_HOST | NHF_BROADCAST)) ||
 		    ((ifp->if_flags & IFF_BROADCAST) &&
-		    in_ifaddr_broadcast(dst->sin_addr, ia)));
+		    (gw->sa_family == AF_INET) &&
+		    in_ifaddr_broadcast(((const struct sockaddr_in *)gw)->sin_addr, ia)));
 	}
 
 	/* Catch a possible divide by zero later. */
@@ -561,7 +556,7 @@ again:
 		 * still points to the address in "ro".  (It may have been
 		 * changed to point to a gateway address, above.)
 		 */
-		gw = dst;
+		gw = (const struct sockaddr *)dst;
 		/*
 		 * See if the caller provided any multicast options
 		 */
@@ -721,7 +716,7 @@ sendit:
 				RO_NHFREE(ro);
 				ro->ro_prepend = NULL;
 			}
-			gw = dst;
+			gw = (const struct sockaddr *)dst;
 			ip = mtod(m, struct ip *);
 			goto again;
 		}
diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c
index 1c0be6011253..0bf55958c618 100644
--- a/sys/netinet/toecore.c
+++ b/sys/netinet/toecore.c
@@ -483,7 +483,8 @@ toe_l2_resolve(struct toedev *tod, struct ifnet *ifp, struct sockaddr *sa,
 #endif
 #ifdef INET6
 	case AF_INET6:
-		rc = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), NULL, sa, lladdr, NULL, NULL);
+		rc = nd6_resolve(ifp, LLE_SF(AF_INET6, 0), NULL, sa, lladdr,
+		    NULL, NULL);
 		break;
 #endif
 	default:
diff --git a/sys/ofed/drivers/infiniband/core/ib_addr.c b/sys/ofed/drivers/infiniband/core/ib_addr.c
index 297469bd4d87..2ac79ca64664 100644
--- a/sys/ofed/drivers/infiniband/core/ib_addr.c
+++ b/sys/ofed/drivers/infiniband/core/ib_addr.c
@@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/netevent.h>
+#include <net/if_llatbl.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib.h>
 
@@ -397,9 +398,16 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 	} else {
 		bool is_gw = (nh->nh_flags & NHF_GATEWAY) != 0;
 		memset(edst, 0, MAX_ADDR_LEN);
-		error = arpresolve(ifp, is_gw, NULL, is_gw ?
-		    &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
-		    edst, NULL, NULL);
+#ifdef INET6
+		if (is_gw && nh->gw_sa.sa_family == AF_INET6)
+			error = nd6_resolve(ifp, LLE_SF(AF_INET, is_gw), NULL,
+			    &nh->gw_sa, edst, NULL, NULL);
+		else
+#endif
+			error = arpresolve(ifp, is_gw, NULL, is_gw ?
+			    &nh->gw_sa : (const struct sockaddr *)&dst_tmp,
+			    edst, NULL, NULL);
+
 		if (error != 0)
 			goto error_put_ifp;
 		else if (is_gw)


More information about the dev-commits-src-main mailing list