git: 36e15b717eec - main - routing: Fix crashes with dpdk_lpm[46] algo.
Alexander V. Chernikov
melifaro at FreeBSD.org
Tue Aug 17 21:12:03 UTC 2021
The branch main has been updated by melifaro:
URL: https://cgit.FreeBSD.org/src/commit/?id=36e15b717eec80047fe7442898b5752101f2fbca
commit 36e15b717eec80047fe7442898b5752101f2fbca
Author: Alexander V. Chernikov <melifaro at FreeBSD.org>
AuthorDate: 2021-08-15 22:25:21 +0000
Commit: Alexander V. Chernikov <melifaro at FreeBSD.org>
CommitDate: 2021-08-17 20:46:22 +0000
routing: Fix crashes with dpdk_lpm[46] algo.
When a prefix gets deleted from the RIB, dpdk_lpm algo needs to know
the nexthop of the "parent" prefix to update its internal state.
The glue code, which utilises RIB as a backing route store, uses
fib[46]_lookup_rt() for the prefix destination after its deletion
to fetch the desired nexthop.
This approach does not work when deleting less-specific prefixes
with most-specific ones are still present. For example, if
10.0.0.0/24, 10.0.0.0/23 and 10.0.0.0/22 exist in RIB, deleting
10.0.0.0/23 would result in 10.0.0.0/24 being returned as a search
result instead of 10.0.0.0/22. This, in turn, results in the failed
datastructure update: part of the deleted /23 prefix will still
contain the reference to an old nexthop. This leads to the
use-after-free behaviour, ending with the eventual crashes.
Fix the logic flaw by properly fetching the prefix "parent" via
newly-created rt_get_inet[6]_parent() helpers.
Differential Revision: https://reviews.freebsd.org/D31546
PR: 256882,256833
MFC after: 1 week
---
sys/contrib/dpdk_rte_lpm/dpdk_lpm.c | 32 ++++----
sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c | 42 +++++-----
sys/net/radix.c | 14 ++++
sys/net/radix.h | 1 +
sys/net/route/route_ctl.h | 3 +
sys/net/route/route_helpers.c | 150 +++++++++++++++++++++++++++++++++++
6 files changed, 208 insertions(+), 34 deletions(-)
diff --git a/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c b/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c
index af145997c4d6..51cd134132f6 100644
--- a/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c
+++ b/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c
@@ -134,26 +134,27 @@ handle_default_change(struct dpdk_lpm_data *dd, struct rib_cmd_info *rc)
}
static void
-get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, uint8_t *plen, uint32_t *nhop_idx)
+get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, int plen,
+ uint8_t *pplen, uint32_t *nhop_idx)
{
- struct route_nhop_data rnd;
struct rtentry *rt;
- rt = fib4_lookup_rt(dd->fibnum, addr, 0, NHR_UNLOCKED, &rnd);
+ rt = rt_get_inet_parent(dd->fibnum, addr, plen);
if (rt != NULL) {
struct in_addr addr4;
uint32_t scopeid;
- int inet_plen;
- rt_get_inet_prefix_plen(rt, &addr4, &inet_plen, &scopeid);
- if (inet_plen > 0) {
- *plen = inet_plen;
- *nhop_idx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop);
+ int parent_plen;
+
+ rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
+ if (parent_plen > 0) {
+ *pplen = parent_plen;
+ *nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt));
return;
}
}
*nhop_idx = 0;
- *plen = 0;
+ *pplen = 0;
}
static enum flm_op_result
@@ -181,20 +182,23 @@ handle_gu_change(struct dpdk_lpm_data *dd, const struct rib_cmd_info *rc,
}
ret = rte_lpm_add(dd->lpm, ip, plen, nhidx);
- FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d",
+ FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d",
(rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
- abuf, plen, nhidx, ret);
+ abuf, plen,
+ rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0,
+ nhidx, ret);
} else {
/*
* Need to lookup parent. Assume deletion happened already
*/
uint8_t parent_plen;
uint32_t parent_nhop_idx;
- get_parent_rule(dd, addr, &parent_plen, &parent_nhop_idx);
+ get_parent_rule(dd, addr, plen, &parent_plen, &parent_nhop_idx);
ret = rte_lpm_delete(dd->lpm, ip, plen, parent_plen, parent_nhop_idx);
- FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d nhop %u = %d",
- "DEL", abuf, plen, nhidx, ret);
+ FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d -> /%d nhop %u -> %u ret: %d",
+ "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old),
+ parent_nhop_idx, ret);
}
if (ret != 0) {
diff --git a/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c b/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c
index 17d35c16346d..ec1a3228d42b 100644
--- a/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c
+++ b/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c
@@ -165,30 +165,26 @@ handle_ll_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc,
}
static struct rte_lpm6_rule *
-pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6,
- char *buffer)
+pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, int plen,
+ int *pplen, uint32_t *pnhop_idx, char *buffer)
{
struct rte_lpm6_rule *lsp_rule = NULL;
- struct route_nhop_data rnd;
struct rtentry *rt;
- int plen;
- rt = fib6_lookup_rt(dd->fibnum, addr6, 0, NHR_UNLOCKED, &rnd);
+ *pnhop_idx = 0;
+ *pplen = 0;
+
+ rt = rt_get_inet6_parent(dd->fibnum, addr6, plen);
/* plen = 0 means default route and it's out of scope */
if (rt != NULL) {
- uint32_t scopeid;
+ uint32_t nhop_idx, scopeid;
struct in6_addr new_addr6;
rt_get_inet6_prefix_plen(rt, &new_addr6, &plen, &scopeid);
if (plen > 0) {
- uint32_t nhidx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop);
- if (nhidx == 0) {
- /*
- * shouldn't happen as we already have parent route.
- * It will trigger rebuild automatically.
- */
- return (NULL);
- }
- lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhidx);
+ nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt));
+ lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhop_idx);
+ *pnhop_idx = nhop_idx;
+ *pplen = plen;
}
}
@@ -217,20 +213,26 @@ handle_gu_change(struct dpdk_lpm6_data *dd, const struct rib_cmd_info *rc,
ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6,
plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0);
- FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d",
+ FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d",
(rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE",
- abuf, plen, nhidx, ret);
+ abuf, plen,
+ rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0,
+ nhidx, ret);
} else {
/*
* Need to lookup parent. Assume deletion happened already
*/
char buffer[RTE_LPM6_RULE_SIZE];
struct rte_lpm6_rule *lsp_rule = NULL;
- lsp_rule = pack_parent_rule(dd, addr6, buffer);
+ int parent_plen;
+ uint32_t parent_nhop_idx;
+ lsp_rule = pack_parent_rule(dd, addr6, plen, &parent_plen,
+ &parent_nhop_idx, buffer);
ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule);
- FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop ? = %d",
- "DEL", abuf, plen, ret);
+ FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d -> /%d nhop %u -> %u ret: %d",
+ "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old),
+ parent_nhop_idx, ret);
}
if (ret != 0) {
diff --git a/sys/net/radix.c b/sys/net/radix.c
index 931bf6db871b..2169361c7229 100644
--- a/sys/net/radix.c
+++ b/sys/net/radix.c
@@ -371,6 +371,20 @@ on1:
return (0);
}
+/*
+ * Returns the next (wider) prefix for the key defined by @rn
+ * if exists.
+ */
+struct radix_node *
+rn_nextprefix(struct radix_node *rn)
+{
+ for (rn = rn->rn_dupedkey; rn != NULL; rn = rn->rn_dupedkey) {
+ if (!(rn->rn_flags & RNF_ROOT))
+ return (rn);
+ }
+ return (NULL);
+}
+
#ifdef RN_DEBUG
int rn_nodenum;
struct radix_node *rn_clist;
diff --git a/sys/net/radix.h b/sys/net/radix.h
index a0e5e5c5aa3f..97555ee9e16d 100644
--- a/sys/net/radix.h
+++ b/sys/net/radix.h
@@ -119,6 +119,7 @@ typedef int rn_walktree_t(struct radix_head *head, walktree_f_t *f,
typedef int rn_walktree_from_t(struct radix_head *head,
void *a, void *m, walktree_f_t *f, void *w);
typedef void rn_close_t(struct radix_node *rn, struct radix_head *head);
+struct radix_node *rn_nextprefix(struct radix_node *rn);
struct radix_mask_head;
diff --git a/sys/net/route/route_ctl.h b/sys/net/route/route_ctl.h
index 229c762d4a73..a670979df8c9 100644
--- a/sys/net/route/route_ctl.h
+++ b/sys/net/route/route_ctl.h
@@ -117,6 +117,7 @@ void rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr,
int *plen, uint32_t *pscopeid);
void rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr,
struct in_addr *pmask, uint32_t *pscopeid);
+struct rtentry *rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen);
#endif
#ifdef INET6
struct in6_addr;
@@ -124,6 +125,8 @@ void rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr,
int *plen, uint32_t *pscopeid);
void rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr,
struct in6_addr *pmask, uint32_t *pscopeid);
+struct rtentry *rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr,
+ int plen);
#endif
/* Nexthops */
diff --git a/sys/net/route/route_helpers.c b/sys/net/route/route_helpers.c
index 5d29197cc4fb..569e13a308eb 100644
--- a/sys/net/route/route_helpers.c
+++ b/sys/net/route/route_helpers.c
@@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
#endif
#ifdef INET6
#include <netinet6/in6_fib.h>
+#include <netinet6/in6_var.h>
#endif
#include <net/vnet.h>
@@ -415,3 +416,152 @@ rib_decompose_notification(struct rib_cmd_info *rc, route_notification_t *cb,
}
}
#endif
+
+#ifdef INET
+/*
+ * Checks if the found key in the trie contains (<=) a prefix covering
+ * @paddr/@plen.
+ * Returns the most specific rtentry matching the condition or NULL.
+ */
+static struct rtentry *
+get_inet_parent_prefix(uint32_t fibnum, struct in_addr addr, int plen)
+{
+ struct route_nhop_data rnd;
+ struct rtentry *rt;
+ struct in_addr addr4;
+ uint32_t scopeid;
+ int parent_plen;
+ struct radix_node *rn;
+
+ rt = fib4_lookup_rt(fibnum, addr, 0, NHR_UNLOCKED, &rnd);
+ rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
+ if (parent_plen <= plen)
+ return (rt);
+
+ /*
+ * There can be multiple prefixes associated with the found key:
+ * 10.0.0.0 -> 10.0.0.0/24, 10.0.0.0/23, 10.0.0.0/22, etc.
+ * All such prefixes are linked via rn_dupedkey, from most specific
+ * to least specific. Iterate over them to check if any of these
+ * prefixes are wider than desired plen.
+ */
+ rn = (struct radix_node *)rt;
+ while ((rn = rn_nextprefix(rn)) != NULL) {
+ rt = RNTORT(rn);
+ rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid);
+ if (parent_plen <= plen)
+ return (rt);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Returns the most specific prefix containing (>) @paddr/plen.
+ */
+struct rtentry *
+rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen)
+{
+ struct in_addr lookup_addr = { .s_addr = INADDR_BROADCAST };
+ struct in_addr addr4 = addr;
+ struct in_addr mask4;
+ struct rtentry *rt;
+
+ while (plen-- > 0) {
+ /* Calculate wider mask & new key to lookup */
+ mask4.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0);
+ addr4.s_addr = htonl(ntohl(addr4.s_addr) & ntohl(mask4.s_addr));
+ if (addr4.s_addr == lookup_addr.s_addr) {
+ /* Skip lookup if the key is the same */
+ continue;
+ }
+ lookup_addr = addr4;
+
+ rt = get_inet_parent_prefix(fibnum, lookup_addr, plen);
+ if (rt != NULL)
+ return (rt);
+ }
+
+ return (NULL);
+}
+#endif
+
+#ifdef INET6
+/*
+ * Checks if the found key in the trie contains (<=) a prefix covering
+ * @paddr/@plen.
+ * Returns the most specific rtentry matching the condition or NULL.
+ */
+static struct rtentry *
+get_inet6_parent_prefix(uint32_t fibnum, const struct in6_addr *paddr, int plen)
+{
+ struct route_nhop_data rnd;
+ struct rtentry *rt;
+ struct in6_addr addr6;
+ uint32_t scopeid;
+ int parent_plen;
+ struct radix_node *rn;
+
+ rt = fib6_lookup_rt(fibnum, paddr, 0, NHR_UNLOCKED, &rnd);
+ rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid);
+ if (parent_plen <= plen)
+ return (rt);
+
+ /*
+ * There can be multiple prefixes associated with the found key:
+ * 2001:db8:1::/64 -> 2001:db8:1::/56, 2001:db8:1::/48, etc.
+ * All such prefixes are linked via rn_dupedkey, from most specific
+ * to least specific. Iterate over them to check if any of these
+ * prefixes are wider than desired plen.
+ */
+ rn = (struct radix_node *)rt;
+ while ((rn = rn_nextprefix(rn)) != NULL) {
+ rt = RNTORT(rn);
+ rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid);
+ if (parent_plen <= plen)
+ return (rt);
+ }
+
+ return (NULL);
+}
+
+static void
+ipv6_writemask(struct in6_addr *addr6, uint8_t mask)
+{
+ uint32_t *cp;
+
+ for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32)
+ *cp++ = 0xFFFFFFFF;
+ if (mask > 0)
+ *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0);
+}
+
+/*
+ * Returns the most specific prefix containing (>) @paddr/plen.
+ */
+struct rtentry *
+rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr, int plen)
+{
+ struct in6_addr lookup_addr = in6mask128;
+ struct in6_addr addr6 = *paddr;
+ struct in6_addr mask6;
+ struct rtentry *rt;
+
+ while (plen-- > 0) {
+ /* Calculate wider mask & new key to lookup */
+ ipv6_writemask(&mask6, plen);
+ IN6_MASK_ADDR(&addr6, &mask6);
+ if (IN6_ARE_ADDR_EQUAL(&addr6, &lookup_addr)) {
+ /* Skip lookup if the key is the same */
+ continue;
+ }
+ lookup_addr = addr6;
+
+ rt = get_inet6_parent_prefix(fibnum, &lookup_addr, plen);
+ if (rt != NULL)
+ return (rt);
+ }
+
+ return (NULL);
+}
+#endif
More information about the dev-commits-src-main
mailing list