git: fe05d1dd0fdf - main - routing: extend nhop(9) kpi

From: Alexander V. Chernikov <melifaro_at_FreeBSD.org>
Date: Mon, 29 Aug 2022 15:17:53 UTC
The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=fe05d1dd0fdf7c8d09b9aad4ff74c44b7c90db80

commit fe05d1dd0fdf7c8d09b9aad4ff74c44b7c90db80
Author:     Alexander V. Chernikov <melifaro@FreeBSD.org>
AuthorDate: 2022-08-29 14:38:25 +0000
Commit:     Alexander V. Chernikov <melifaro@FreeBSD.org>
CommitDate: 2022-08-29 14:46:03 +0000

    routing: extend nhop(9) kpi
    
    * add nhop_get_unlinked() used to prepare referenced but not
     linked nexthop, that can later be used as a clone source.
    * add nhop_check_gateway() to check for allowed address family
      combinations between the rib family and neighbor family (useful
      for 4o6 or direct routes)
    * add nhop_set_upper_family() to allow copying IPv6 nexthops to
     IPv4 rib.
    * add rt_get_rnd() wrapper, returning both nexthop/group and its
     weight attached to the rtentry.
    * Add CHT_SLIST_FOREACH_SAFE(), allowing to delete items during
      iteration.
    
    MFC after:      2 weeks
---
 sys/net/route/nhop.h          |  3 ++
 sys/net/route/nhop_ctl.c      | 85 +++++++++++++++++++++++++++++++++++++++----
 sys/net/route/nhop_utils.h    |  5 +++
 sys/net/route/route_ctl.c     | 41 +++------------------
 sys/net/route/route_ctl.h     |  1 +
 sys/net/route/route_rtentry.c |  7 ++++
 sys/net/route/route_var.h     |  2 +
 7 files changed, 102 insertions(+), 42 deletions(-)

diff --git a/sys/net/route/nhop.h b/sys/net/route/nhop.h
index 9d0891c5b978..24ddb692efb8 100644
--- a/sys/net/route/nhop.h
+++ b/sys/net/route/nhop.h
@@ -182,6 +182,7 @@ struct rib_head;
 struct nhop_object *nhop_alloc(uint32_t fibnum, int family);
 void nhop_copy(struct nhop_object *nh, const struct nhop_object *nh_orig);
 struct nhop_object *nhop_get_nhop(struct nhop_object *nh, int *perror);
+int nhop_get_unlinked(struct nhop_object *nh);
 
 void nhop_set_direct_gw(struct nhop_object *nh, struct ifnet *ifp);
 bool nhop_set_gw(struct nhop_object *nh, const struct sockaddr *sa, bool is_gw);
@@ -206,11 +207,13 @@ int nhop_get_rtflags(const struct nhop_object *nh);
 struct vnet *nhop_get_vnet(const struct nhop_object *nh);
 struct nhop_object *nhop_select_func(struct nhop_object *nh, uint32_t flowid);
 int nhop_get_upper_family(const struct nhop_object *nh);
+bool nhop_set_upper_family(struct nhop_object *nh, int family);
 int nhop_get_neigh_family(const struct nhop_object *nh);
 uint32_t nhop_get_fibnum(const struct nhop_object *nh);
 void nhop_set_fibnum(struct nhop_object *nh, uint32_t fibnum);
 uint32_t nhop_get_expire(const struct nhop_object *nh);
 void nhop_set_expire(struct nhop_object *nh, uint32_t expire);
+struct rib_head *nhop_get_rh(const struct nhop_object *nh);
 
 struct nhgrp_object;
 uint32_t nhgrp_get_uidx(const struct nhgrp_object *nhg);
diff --git a/sys/net/route/nhop_ctl.c b/sys/net/route/nhop_ctl.c
index 4af57d766ab7..263a71c1322e 100644
--- a/sys/net/route/nhop_ctl.c
+++ b/sys/net/route/nhop_ctl.c
@@ -85,13 +85,12 @@ _DECLARE_DEBUG(LOG_INFO);
 
 static int dump_nhop_entry(struct rib_head *rh, struct nhop_object *nh, struct sysctl_req *w);
 
-static int finalize_nhop(struct nh_control *ctl, struct nhop_object *nh);
+static int finalize_nhop(struct nh_control *ctl, struct nhop_object *nh, bool link);
 static struct ifnet *get_aifp(const struct nhop_object *nh);
 static void fill_sdl_from_ifp(struct sockaddr_dl_short *sdl, const struct ifnet *ifp);
 
 static void destroy_nhop_epoch(epoch_context_t ctx);
 static void destroy_nhop(struct nhop_object *nh);
-static struct rib_head *nhop_get_rh(const struct nhop_object *nh);
 
 _Static_assert(__offsetof(struct nhop_object, nh_ifp) == 32,
     "nhop_object: wrong nh_ifp offset");
@@ -315,6 +314,12 @@ nhop_get_nhop(struct nhop_object *nh, int *perror)
 {
 	struct rib_head *rnh = nhop_get_rh(nh);
 
+	if (__predict_false(rnh == NULL)) {
+		*perror = EAFNOSUPPORT;
+		nhop_free(nh);
+		return (NULL);
+	}
+
 	return (nhop_get_nhop_internal(rnh, nh, perror));
 }
 
@@ -349,10 +354,33 @@ nhop_get_nhop_internal(struct rib_head *rnh, struct nhop_object *nh, int *perror
 	 *  relative number of such nexthops is significant, which
 	 *  is extremely unlikely.
 	 */
-	*perror = finalize_nhop(rnh->nh_control, nh);
+	*perror = finalize_nhop(rnh->nh_control, nh, true);
 	return (*perror == 0 ? nh : NULL);
 }
 
+/*
+ * Gets referenced but unlinked nhop.
+ * Alocates/references the remaining bits of the nexthop data, so
+ *  it can be safely linked later or used as a clone source.
+ *
+ * Returns 0 on success.
+ */
+int
+nhop_get_unlinked(struct nhop_object *nh)
+{
+	struct rib_head *rnh = nhop_get_rh(nh);
+
+	if (__predict_false(rnh == NULL)) {
+		nhop_free(nh);
+		return (EAFNOSUPPORT);
+	}
+
+	nh->nh_aifp = get_aifp(nh);
+
+	return (finalize_nhop(rnh->nh_control, nh, false));
+}
+
+
 /*
  * Update @nh with data supplied in @info.
  * This is a helper function to support route changes.
@@ -458,7 +486,7 @@ reference_nhop_deps(struct nhop_object *nh)
  *  errno otherwise. @nh_priv is freed in case of error.
  */
 static int
-finalize_nhop(struct nh_control *ctl, struct nhop_object *nh)
+finalize_nhop(struct nh_control *ctl, struct nhop_object *nh, bool link)
 {
 
 	/* Allocate per-cpu packet counter */
@@ -484,9 +512,14 @@ finalize_nhop(struct nh_control *ctl, struct nhop_object *nh)
 	/* Please see nhop_free() comments on the initial value */
 	refcount_init(&nh->nh_priv->nh_linked, 2);
 
-	nh->nh_priv->nh_fibnum = ctl->ctl_rh->rib_fibnum;
+	MPASS(nh->nh_priv->nh_fibnum == ctl->ctl_rh->rib_fibnum);
 
-	if (link_nhop(ctl, nh->nh_priv) == 0) {
+	if (!link) {
+		refcount_release(&nh->nh_priv->nh_linked);
+		NHOPS_WLOCK(ctl);
+		nh->nh_priv->nh_finalized = 1;
+		NHOPS_WUNLOCK(ctl);
+	} else if (link_nhop(ctl, nh->nh_priv) == 0) {
 		/*
 		 * Adding nexthop to the datastructures
 		 *  failed. Call destructor w/o waiting for
@@ -693,6 +726,22 @@ nhop_set_direct_gw(struct nhop_object *nh, struct ifnet *ifp)
 	memset(&nh->gw_buf[nh->gw_sa.sa_len], 0, sizeof(nh->gw_buf) - nh->gw_sa.sa_len);
 }
 
+bool
+nhop_check_gateway(int upper_family, int neigh_family)
+{
+	if (upper_family == neigh_family)
+		return (true);
+	else if (neigh_family == AF_UNSPEC || neigh_family == AF_LINK)
+		return (true);
+#if defined(INET) && defined(INET6)
+	else if (upper_family == AF_INET && neigh_family == AF_INET6 &&
+	    rib_can_4o6_nhop())
+		return (true);
+#endif
+	else
+		return (false);
+}
+
 /*
  * Sets gateway for the nexthop.
  * It can be "normal" gateway with is_gw set or a special form of
@@ -707,6 +756,14 @@ nhop_set_gw(struct nhop_object *nh, const struct sockaddr *gw, bool is_gw)
 		    gw->sa_family, gw->sa_len);
 		return (false);
 	}
+
+	if (!nhop_check_gateway(nh->nh_priv->nh_upper_family, gw->sa_family)) {
+		FIB_NH_LOG(LOG_DEBUG, nh,
+		    "error: invalid dst/gateway family combination (%d, %d)",
+		    nh->nh_priv->nh_upper_family, gw->sa_family);
+		return (false);
+	}
+
 	memcpy(&nh->gw_sa, gw, gw->sa_len);
 	memset(&nh->gw_buf[gw->sa_len], 0, sizeof(nh->gw_buf) - gw->sa_len);
 
@@ -723,6 +780,20 @@ nhop_set_gw(struct nhop_object *nh, const struct sockaddr *gw, bool is_gw)
 	return (true);
 }
 
+bool
+nhop_set_upper_family(struct nhop_object *nh, int family)
+{
+	if (!nhop_check_gateway(nh->nh_priv->nh_upper_family, family)) {
+		FIB_NH_LOG(LOG_DEBUG, nh,
+		    "error: invalid upper/neigh family combination (%d, %d)",
+		    nh->nh_priv->nh_upper_family, family);
+		return (false);
+	}
+
+	nh->nh_priv->nh_upper_family = family;
+	return (true);
+}
+
 void
 nhop_set_broadcast(struct nhop_object *nh, bool is_broadcast)
 {
@@ -930,7 +1001,7 @@ nhop_set_expire(struct nhop_object *nh, uint32_t expire)
 	nh->nh_priv->nh_expire = expire;
 }
 
-static struct rib_head *
+struct rib_head *
 nhop_get_rh(const struct nhop_object *nh)
 {
 	uint32_t fibnum = nhop_get_fibnum(nh);
diff --git a/sys/net/route/nhop_utils.h b/sys/net/route/nhop_utils.h
index 1f56f4cb8b0b..210b1387c5c9 100644
--- a/sys/net/route/nhop_utils.h
+++ b/sys/net/route/nhop_utils.h
@@ -139,6 +139,11 @@ struct _HNAME##_head {				\
 		for (_x = CHT_FIRST(_head, _i); _x; _x = _PX##_next(_x))
 #define	CHT_SLIST_FOREACH_END	}
 
+#define	CHT_SLIST_FOREACH_SAFE(_head, _PX, _x, _tmp)			\
+	for (uint32_t _i = 0; _i < (_head)->hash_size; _i++) {		\
+		for (_x = CHT_FIRST(_head, _i); (_tmp = _PX##_next(_x), _x); _x = _tmp)
+#define	CHT_SLIST_FOREACH_SAFE_END	}
+
 #define	CHT_SLIST_RESIZE(_head, _PX, _new_void_ptr, _new_hsize)		\
 	uint32_t _new_idx;						\
 	typeof((_head)->ptr) _new_ptr = (void *)_new_void_ptr;		\
diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c
index 6881b8131d08..4b7572ce7980 100644
--- a/sys/net/route/route_ctl.c
+++ b/sys/net/route/route_ctl.c
@@ -134,7 +134,7 @@ uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = {
 #if defined(INET) && defined(INET6)
 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops");
 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop)
-VNET_DEFINE(u_int, rib_route_ipv6_nexthop) = 1;
+VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1;
 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET,
     &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address");
 #endif
@@ -157,16 +157,10 @@ get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
 }
 
 #if defined(INET) && defined(INET6)
-static bool
-rib_can_ipv6_nexthop_address(struct rib_head *rh)
+bool
+rib_can_4o6_nhop(void)
 {
-	int result;
-
-	CURVNET_SET(rh->rib_vnet);
-	result = !!V_rib_route_ipv6_nexthop;
-	CURVNET_RESTORE();
-
-	return (result);
+	return (!!V_rib_route_ipv6_nexthop);
 }
 #endif
 
@@ -702,30 +696,6 @@ rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
 	return (error);
 }
 
-/*
- * Checks if @dst and @gateway is valid combination.
- *
- * Returns true if is valid, false otherwise.
- */
-static bool
-check_gateway(struct rib_head *rnh, struct sockaddr *dst,
-    struct sockaddr *gateway)
-{
-	if (dst->sa_family == gateway->sa_family)
-		return (true);
-	else if (gateway->sa_family == AF_UNSPEC)
-		return (true);
-	else if (gateway->sa_family == AF_LINK)
-		return (true);
-#if defined(INET) && defined(INET6)
-	else if (dst->sa_family == AF_INET && gateway->sa_family == AF_INET6 &&
-		rib_can_ipv6_nexthop_address(rnh))
-		return (true);
-#endif
-	else
-		return (false);
-}
-
 static int
 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
     struct rib_cmd_info *rc)
@@ -744,7 +714,7 @@ add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info,
 		FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw");
 		return (EINVAL);
 	}
-	if (dst && gateway && !check_gateway(rnh, dst, gateway)) {
+	if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) {
 		FIB_RH_LOG(LOG_DEBUG, rnh,
 		    "error: invalid dst/gateway family combination (%d, %d)",
 		    dst->sa_family, gateway->sa_family);
@@ -1578,3 +1548,4 @@ rib_print_family(int family)
 	}
 	return ("unknown");
 }
+
diff --git a/sys/net/route/route_ctl.h b/sys/net/route/route_ctl.h
index 1fc67591c2e8..e65e6be4efda 100644
--- a/sys/net/route/route_ctl.h
+++ b/sys/net/route/route_ctl.h
@@ -136,6 +136,7 @@ const struct rtentry *rib_lookup_lpm(uint32_t fibnum, int family,
 bool rt_is_host(const struct rtentry *rt);
 sa_family_t rt_get_family(const struct rtentry *);
 struct nhop_object *rt_get_raw_nhop(const struct rtentry *rt);
+void rt_get_rnd(const struct rtentry *rt, struct route_nhop_data *rnd);
 #ifdef INET
 struct in_addr;
 void rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr,
diff --git a/sys/net/route/route_rtentry.c b/sys/net/route/route_rtentry.c
index 35ca304a669e..41e4ff8ac49f 100644
--- a/sys/net/route/route_rtentry.c
+++ b/sys/net/route/route_rtentry.c
@@ -192,6 +192,13 @@ rt_get_raw_nhop(const struct rtentry *rt)
 	return (rt->rt_nhop);
 }
 
+void
+rt_get_rnd(const struct rtentry *rt, struct route_nhop_data *rnd)
+{
+	rnd->rnd_nhop = rt->rt_nhop;
+	rnd->rnd_weight = rt->rt_weight;
+}
+
 #ifdef INET
 /*
  * Stores IPv4 address and prefix length of @rt inside
diff --git a/sys/net/route/route_var.h b/sys/net/route/route_var.h
index 51a4285e673e..f286e49b4526 100644
--- a/sys/net/route/route_var.h
+++ b/sys/net/route/route_var.h
@@ -228,6 +228,7 @@ bool nhop_can_multipath(const struct nhop_object *nh);
 bool match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw);
 int check_info_match_nhop(const struct rt_addrinfo *info,
     const struct rtentry *rt, const struct nhop_object *nh);
+bool rib_can_4o6_nhop(void);
 
 /* route_rtentry.c */
 void vnet_rtzone_init(void);
@@ -255,6 +256,7 @@ void nhop_free_any(struct nhop_object *nh);
 struct nhop_object *nhop_get_nhop_internal(struct rib_head *rnh,
     struct nhop_object *nh, int *perror);
 
+bool nhop_check_gateway(int upper_family, int neigh_family);
 
 int nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info,
     struct nhop_object **nh_ret);