git: 800c68469ba6 - main - routing: add nhop(9) kpi.
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Mon, 01 Aug 2022 08:53:23 UTC
The branch main has been updated by melifaro: URL: https://cgit.FreeBSD.org/src/commit/?id=800c68469ba6a0a9972129777bed71e1c2b50a87 commit 800c68469ba6a0a9972129777bed71e1c2b50a87 Author: Alexander V. Chernikov <melifaro@FreeBSD.org> AuthorDate: 2022-07-29 12:32:27 +0000 Commit: Alexander V. Chernikov <melifaro@FreeBSD.org> CommitDate: 2022-08-01 08:52:26 +0000 routing: add nhop(9) kpi. Differential Revision: https://reviews.freebsd.org/D35985 MFC after: 1 month --- sys/net/route.c | 2 +- sys/net/route/nhgrp.c | 15 ++ sys/net/route/nhop.c | 1 + sys/net/route/nhop.h | 23 +++ sys/net/route/nhop_ctl.c | 490 +++++++++++++++++++++++++++------------------- sys/net/route/nhop_var.h | 1 + sys/net/route/route_var.h | 15 +- sys/netinet/in_rmx.c | 43 ++-- sys/netinet6/in6_rmx.c | 27 ++- sys/netinet6/nd6_rtr.c | 1 + 10 files changed, 376 insertions(+), 242 deletions(-) diff --git a/sys/net/route.c b/sys/net/route.c index b99939179ff2..0cf56fc18364 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -570,7 +570,7 @@ rt_getifa_family(struct rt_addrinfo *info, uint32_t fibnum) } /* - * Look up rt_addrinfo for a specific fib. + * Fills in rti_ifp and rti_ifa for the provided fib. * * Assume basic consistency checks are executed by callers: * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well. diff --git a/sys/net/route/nhgrp.c b/sys/net/route/nhgrp.c index 8f6c04b86395..f565842bb7d4 100644 --- a/sys/net/route/nhgrp.c +++ b/sys/net/route/nhgrp.c @@ -179,6 +179,13 @@ link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv) NHOPS_WUNLOCK(ctl); +#if DEBUG_MAX_LEVEL >= LOG_DEBUG2 + { + char nhgrp_buf[NHOP_PRINT_BUFSIZE]; + nhgrp_print_buf(grp_priv->nhg, nhgrp_buf, sizeof(nhgrp_buf)); + FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "linked %s", nhgrp_buf); + } +#endif consider_resize(ctl, new_num_buckets, new_num_items); return (1); @@ -207,6 +214,14 @@ unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key) NHOPS_WUNLOCK(ctl); +#if DEBUG_MAX_LEVEL >= LOG_DEBUG2 + { + char nhgrp_buf[NHOP_PRINT_BUFSIZE]; + nhgrp_print_buf(nhg_priv_ret->nhg, nhgrp_buf, sizeof(nhgrp_buf)); + FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "unlinked idx#%d %s", idx, + nhgrp_buf); + } +#endif return (nhg_priv_ret); } diff --git a/sys/net/route/nhop.c b/sys/net/route/nhop.c index 531eae03638f..843ef64bb141 100644 --- a/sys/net/route/nhop.c +++ b/sys/net/route/nhop.c @@ -304,6 +304,7 @@ link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv) nh_priv->nh_idx = idx; nh_priv->nh_control = ctl; + nh_priv->nh_finalized = 1; CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv); diff --git a/sys/net/route/nhop.h b/sys/net/route/nhop.h index 985e4c32ccd3..bd3c3825ed86 100644 --- a/sys/net/route/nhop.h +++ b/sys/net/route/nhop.h @@ -175,6 +175,29 @@ struct sysctl_req; struct sockaddr_dl; struct rib_head; +/* flags that can be set using nhop_set_rtflags() */ +#define RT_SET_RTFLAGS_MASK (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_STATIC) +#define RT_CHANGE_RTFLAGS_MASK RT_SET_RTFLAGS_MASK + +struct nhop_object *nhop_alloc(uint32_t fibnum, int family); +void nhop_copy(struct nhop_object *nh, const struct nhop_object *nh_orig); +struct nhop_object *nhop_get_nhop(struct nhop_object *nh, int *perror); + +void nhop_set_direct_gw(struct nhop_object *nh, struct ifnet *ifp); +bool nhop_set_gw(struct nhop_object *nh, const struct sockaddr *sa, bool is_gw); + + +void nhop_set_mtu(struct nhop_object *nh, uint32_t mtu, bool from_user); +void nhop_set_rtflags(struct nhop_object *nh, int rt_flags); +void nhop_set_pxtype_flag(struct nhop_object *nh, int nh_flag); +void nhop_set_broadcast(struct nhop_object *nh, bool is_broadcast); +void nhop_set_blackhole(struct nhop_object *nh, int blackhole_rt_flag); +void nhop_set_pinned(struct nhop_object *nh, bool is_pinned); +void nhop_set_redirect(struct nhop_object *nh, bool is_redirect); +void nhop_set_type(struct nhop_object *nh, enum nhop_type nh_type); +void nhop_set_src(struct nhop_object *nh, struct ifaddr *ifa); +void nhop_set_transmit_ifp(struct nhop_object *nh, struct ifnet *ifp); + uint32_t nhop_get_idx(const struct nhop_object *nh); enum nhop_type nhop_get_type(const struct nhop_object *nh); int nhop_get_rtflags(const struct nhop_object *nh); diff --git a/sys/net/route/nhop_ctl.c b/sys/net/route/nhop_ctl.c index 9f612e354fa6..824bf12a903d 100644 --- a/sys/net/route/nhop_ctl.c +++ b/sys/net/route/nhop_ctl.c @@ -85,16 +85,13 @@ _DECLARE_DEBUG(LOG_INFO); static int dump_nhop_entry(struct rib_head *rh, struct nhop_object *nh, struct sysctl_req *w); -static struct nhop_priv *alloc_nhop_structure(void); -static int get_nhop(struct rib_head *rnh, struct rt_addrinfo *info, - struct nhop_priv **pnh_priv); -static int finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info, - struct nhop_priv *nh_priv); +static int finalize_nhop(struct nh_control *ctl, struct nhop_object *nh); static struct ifnet *get_aifp(const struct nhop_object *nh); static void fill_sdl_from_ifp(struct sockaddr_dl_short *sdl, const struct ifnet *ifp); static void destroy_nhop_epoch(epoch_context_t ctx); -static void destroy_nhop(struct nhop_priv *nh_priv); +static void destroy_nhop(struct nhop_object *nh); +static struct rib_head *nhop_get_rh(const struct nhop_object *nh); _Static_assert(__offsetof(struct nhop_object, nh_ifp) == 32, "nhop_object: wrong nh_ifp offset"); @@ -172,24 +169,8 @@ cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two) static void set_nhop_mtu_from_info(struct nhop_object *nh, const struct rt_addrinfo *info) { - - if (info->rti_mflags & RTV_MTU) { - if (info->rti_rmx->rmx_mtu != 0) { - /* - * MTU was explicitly provided by user. - * Keep it. - */ - - nh->nh_priv->rt_flags |= RTF_FIXEDMTU; - } else { - /* - * User explicitly sets MTU to 0. - * Assume rollback to default. - */ - nh->nh_priv->rt_flags &= ~RTF_FIXEDMTU; - } - nh->nh_mtu = info->rti_rmx->rmx_mtu; - } + if (info->rti_mflags & RTV_MTU) + nhop_set_mtu(nh, info->rti_rmx->rmx_mtu, true); } /* @@ -213,9 +194,10 @@ set_nhop_gw_from_info(struct nhop_object *nh, struct rt_addrinfo *info) struct sockaddr *gw; gw = info->rti_info[RTAX_GATEWAY]; - KASSERT(gw != NULL, ("gw is NULL")); + MPASS(gw != NULL); + bool is_gw = info->rti_flags & RTF_GATEWAY; - if ((gw->sa_family == AF_LINK) && !(info->rti_flags & RTF_GATEWAY)) { + if ((gw->sa_family == AF_LINK) && !is_gw) { /* * Interface route with interface specified by the interface @@ -233,7 +215,7 @@ set_nhop_gw_from_info(struct nhop_object *nh, struct rt_addrinfo *info) sdl->sdl_index); return (EINVAL); } - fill_sdl_from_ifp(&nh->gwl_sa, ifp); + nhop_set_direct_gw(nh, ifp); } else { /* @@ -247,31 +229,12 @@ set_nhop_gw_from_info(struct nhop_object *nh, struct rt_addrinfo *info) * In both cases, save the original nexthop to make the callers * happy. */ - if (gw->sa_len > sizeof(struct sockaddr_in6)) { - FIB_NH_LOG(LOG_DEBUG, nh, "nhop SA size too big: AF %d len %u", - gw->sa_family, gw->sa_len); + if (!nhop_set_gw(nh, gw, is_gw)) return (EINVAL); - } - memcpy(&nh->gw_sa, gw, gw->sa_len); } return (0); } -static uint16_t -convert_rt_to_nh_flags(int rt_flags) -{ - uint16_t res; - - res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0; - res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0; - res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0; - res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0; - res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0; - res |= (rt_flags & RTF_GATEWAY) ? NHF_GATEWAY : 0; - - return (res); -} - static void set_nhop_expire_from_info(struct nhop_object *nh, const struct rt_addrinfo *info) { @@ -283,43 +246,6 @@ set_nhop_expire_from_info(struct nhop_object *nh, const struct rt_addrinfo *info nhop_set_expire(nh, nh_expire); } -static int -fill_nhop_from_info(struct nhop_priv *nh_priv, struct rt_addrinfo *info) -{ - int error, rt_flags; - struct nhop_object *nh; - - nh = nh_priv->nh; - - rt_flags = info->rti_flags & NHOP_RT_FLAG_MASK; - - nh->nh_priv->rt_flags = rt_flags; - nh_priv->nh_upper_family = info->rti_info[RTAX_DST]->sa_family; - nh_priv->nh_type = 0; // hook responsibility to set nhop type - nh->nh_flags = convert_rt_to_nh_flags(rt_flags); - - set_nhop_mtu_from_info(nh, info); - if ((error = set_nhop_gw_from_info(nh, info)) != 0) - return (error); - if (nh->gw_sa.sa_family == AF_LINK) - nh_priv->nh_neigh_family = nh_priv->nh_upper_family; - else - nh_priv->nh_neigh_family = nh->gw_sa.sa_family; - set_nhop_expire_from_info(nh, info); - - nh->nh_ifp = (info->rti_ifp != NULL) ? info->rti_ifp : info->rti_ifa->ifa_ifp; - nh->nh_ifa = info->rti_ifa; - /* depends on the gateway */ - nh->nh_aifp = get_aifp(nh); - - /* - * Note some of the remaining data is set by the - * per-address-family pre-add hook. - */ - - return (0); -} - /* * Creates a new nexthop based on the information in @info. * @@ -331,81 +257,94 @@ int nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info, struct nhop_object **nh_ret) { - struct nhop_priv *nh_priv; int error; NET_EPOCH_ASSERT(); + MPASS(info->rti_ifa != NULL); + MPASS(info->rti_ifp != NULL); + if (info->rti_info[RTAX_GATEWAY] == NULL) { FIB_RH_LOG(LOG_DEBUG, rnh, "error: empty gateway"); return (EINVAL); } - nh_priv = alloc_nhop_structure(); + struct nhop_object *nh = nhop_alloc(rnh->rib_fibnum, rnh->rib_family); + if (nh == NULL) + return (ENOMEM); - error = fill_nhop_from_info(nh_priv, info); - if (error != 0) { - uma_zfree(nhops_zone, nh_priv->nh); + if ((error = set_nhop_gw_from_info(nh, info)) != 0) { + nhop_free(nh); return (error); } + nhop_set_transmit_ifp(nh, info->rti_ifp); - error = get_nhop(rnh, info, &nh_priv); - if (error == 0) - *nh_ret = nh_priv->nh; + nhop_set_blackhole(nh, info->rti_flags & (RTF_BLACKHOLE | RTF_REJECT)); + + error = rnh->rnh_set_nh_pfxflags(rnh->rib_fibnum, info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], nh); + + nhop_set_redirect(nh, info->rti_flags & RTF_DYNAMIC); + nhop_set_pinned(nh, info->rti_flags & RTF_PINNED); + set_nhop_expire_from_info(nh, info); + nhop_set_rtflags(nh, info->rti_flags); + + set_nhop_mtu_from_info(nh, info); + nhop_set_src(nh, info->rti_ifa); + + /* + * The remaining fields are either set from nh_preadd hook + * or are computed from the provided data + */ + *nh_ret = nhop_get_nhop(nh, &error); return (error); } /* - * Gets linked nhop using the provided @pnh_priv nexhop data. + * Gets linked nhop using the provided @nh nexhop data. * If linked nhop is found, returns it, freeing the provided one. * If there is no such nexthop, attaches the remaining data to the * provided nexthop and links it. * - * Returns 0 on success, storing referenced nexthop in @pnh_priv. + * Returns 0 on success, storing referenced nexthop in @pnh. * Otherwise, errno is returned. */ -static int -get_nhop(struct rib_head *rnh, struct rt_addrinfo *info, - struct nhop_priv **pnh_priv) +struct nhop_object * +nhop_get_nhop(struct nhop_object *nh, int *perror) { - const struct sockaddr *dst, *netmask; - struct nhop_priv *nh_priv, *tmp_priv; + struct nhop_priv *tmp_priv; int error; - nh_priv = *pnh_priv; + nh->nh_aifp = get_aifp(nh); - /* Give the protocols chance to augment the request data */ - dst = info->rti_info[RTAX_DST]; - netmask = info->rti_info[RTAX_NETMASK]; + struct rib_head *rnh = nhop_get_rh(nh); - error = rnh->rnh_preadd(rnh->rib_fibnum, dst, netmask, nh_priv->nh); + /* Give the protocols chance to augment nexthop properties */ + error = rnh->rnh_augment_nh(rnh->rib_fibnum, nh); if (error != 0) { - uma_zfree(nhops_zone, nh_priv->nh); - return (error); + nhop_free(nh); + *perror = error; + return (NULL); } - tmp_priv = find_nhop(rnh->nh_control, nh_priv); + tmp_priv = find_nhop(rnh->nh_control, nh->nh_priv); if (tmp_priv != NULL) { - uma_zfree(nhops_zone, nh_priv->nh); - *pnh_priv = tmp_priv; - return (0); + nhop_free(nh); + *perror = 0; + return (tmp_priv->nh); } /* * Existing nexthop not found, need to create new one. - * Note: multiple simultaneous get_nhop() requests + * Note: multiple simultaneous requests * can result in multiple equal nexhops existing in the * nexthop table. This is not a not a problem until the * relative number of such nexthops is significant, which * is extremely unlikely. */ - - error = finalize_nhop(rnh->nh_control, info, nh_priv); - if (error != 0) - return (error); - - return (0); + *perror = finalize_nhop(rnh->nh_control, nh); + return (*perror == 0 ? nh : NULL); } /* @@ -413,28 +352,26 @@ get_nhop(struct rib_head *rnh, struct rt_addrinfo *info, * This is a helper function to support route changes. * * It limits the changes that can be done to the route to the following: - * 1) all combination of gateway changes (gw, interface, blackhole/reject) - * 2) route flags (FLAG[123],STATIC,BLACKHOLE,REJECT) + * 1) all combination of gateway changes + * 2) route flags (FLAG[123],STATIC) * 3) route MTU * * Returns: - * 0 on success + * 0 on success, errno otherwise */ static int alter_nhop_from_info(struct nhop_object *nh, struct rt_addrinfo *info) { - struct nhop_priv *nh_priv = nh->nh_priv; struct sockaddr *info_gw; int error; /* Update MTU if set in the request*/ set_nhop_mtu_from_info(nh, info); - /* XXX: allow only one of BLACKHOLE,REJECT,GATEWAY */ - - /* Allow some flags (FLAG1,STATIC,BLACKHOLE,REJECT) to be toggled on change. */ - nh_priv->rt_flags &= ~RTF_FMASK; - nh_priv->rt_flags |= info->rti_flags & RTF_FMASK; + /* Only RTF_FLAG[123] and RTF_STATIC */ + uint32_t rt_flags = nhop_get_rtflags(nh) & ~RT_CHANGE_RTFLAGS_MASK; + rt_flags |= info->rti_flags & RT_CHANGE_RTFLAGS_MASK; + nhop_set_rtflags(nh, rt_flags); /* Consider gateway change */ info_gw = info->rti_info[RTAX_GATEWAY]; @@ -442,22 +379,12 @@ alter_nhop_from_info(struct nhop_object *nh, struct rt_addrinfo *info) error = set_nhop_gw_from_info(nh, info); if (error != 0) return (error); - if (nh->gw_sa.sa_family == AF_LINK) - nh_priv->nh_neigh_family = nh_priv->nh_upper_family; - else - nh_priv->nh_neigh_family = nh->gw_sa.sa_family; - /* Update RTF_GATEWAY flag status */ - nh_priv->rt_flags &= ~RTF_GATEWAY; - nh_priv->rt_flags |= (RTF_GATEWAY & info->rti_flags); } - /* Update datapath flags */ - nh->nh_flags = convert_rt_to_nh_flags(nh_priv->rt_flags); if (info->rti_ifa != NULL) - nh->nh_ifa = info->rti_ifa; + nhop_set_src(nh, info->rti_ifa); if (info->rti_ifp != NULL) - nh->nh_ifp = info->rti_ifp; - nh->nh_aifp = get_aifp(nh); + nhop_set_transmit_ifp(nh, info->rti_ifp); return (0); } @@ -475,64 +402,28 @@ int nhop_create_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_orig, struct rt_addrinfo *info, struct nhop_object **pnh) { - struct nhop_priv *nh_priv; struct nhop_object *nh; int error; NET_EPOCH_ASSERT(); - nh_priv = alloc_nhop_structure(); - nh = nh_priv->nh; - - /* Start with copying data from original nexthop */ - nh_priv->nh_upper_family = nh_orig->nh_priv->nh_upper_family; - nh_priv->nh_neigh_family = nh_orig->nh_priv->nh_neigh_family; - nh_priv->rt_flags = nh_orig->nh_priv->rt_flags; - nh_priv->nh_type = nh_orig->nh_priv->nh_type; - nh_priv->nh_fibnum = nh_orig->nh_priv->nh_fibnum; + nh = nhop_alloc(rnh->rib_fibnum, rnh->rib_family); + if (nh == NULL) + return (ENOMEM); - nh->nh_ifp = nh_orig->nh_ifp; - nh->nh_ifa = nh_orig->nh_ifa; - nh->nh_aifp = nh_orig->nh_aifp; - nh->nh_mtu = nh_orig->nh_mtu; - nh->nh_flags = nh_orig->nh_flags; - memcpy(&nh->gw_sa, &nh_orig->gw_sa, nh_orig->gw_sa.sa_len); + nhop_copy(nh, nh_orig); error = alter_nhop_from_info(nh, info); if (error != 0) { - uma_zfree(nhops_zone, nh_priv->nh); + nhop_free(nh); return (error); } - error = get_nhop(rnh, info, &nh_priv); - if (error == 0) - *pnh = nh_priv->nh; + *pnh = nhop_get_nhop(nh, &error); return (error); } -/* - * Allocates memory for public/private nexthop structures. - * - * Returns pointer to nhop_priv or NULL. - */ -static struct nhop_priv * -alloc_nhop_structure(void) -{ - struct nhop_object *nh; - struct nhop_priv *nh_priv; - - nh = (struct nhop_object *)uma_zalloc(nhops_zone, M_NOWAIT | M_ZERO); - if (nh == NULL) - return (NULL); - nh_priv = (struct nhop_priv *)((char *)nh + NHOP_OBJECT_ALIGNED_SIZE); - - nh->nh_priv = nh_priv; - nh_priv->nh = nh; - - return (nh_priv); -} - static bool reference_nhop_deps(struct nhop_object *nh) { @@ -543,7 +434,8 @@ reference_nhop_deps(struct nhop_object *nh) ifa_free(nh->nh_ifa); return (false); } - FIB_NH_LOG(LOG_DEBUG, nh, "AIFP: %p nh_ifp %p", nh->nh_aifp, nh->nh_ifp); + FIB_NH_LOG(LOG_DEBUG2, nh, "nh_aifp: %s nh_ifp %s", + if_name(nh->nh_aifp), if_name(nh->nh_ifp)); if (!if_try_ref(nh->nh_ifp)) { ifa_free(nh->nh_ifa); if_rele(nh->nh_aifp); @@ -560,15 +452,13 @@ reference_nhop_deps(struct nhop_object *nh) * errno otherwise. @nh_priv is freed in case of error. */ static int -finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info, - struct nhop_priv *nh_priv) +finalize_nhop(struct nh_control *ctl, struct nhop_object *nh) { - struct nhop_object *nh = nh_priv->nh; /* Allocate per-cpu packet counter */ nh->nh_pksent = counter_u64_alloc(M_NOWAIT); if (nh->nh_pksent == NULL) { - uma_zfree(nhops_zone, nh); + nhop_free(nh); RTSTAT_INC(rts_nh_alloc_failure); FIB_NH_LOG(LOG_WARNING, nh, "counter_u64_alloc() failed"); return (ENOMEM); @@ -576,23 +466,21 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info, if (!reference_nhop_deps(nh)) { counter_u64_free(nh->nh_pksent); - uma_zfree(nhops_zone, nh); + nhop_free(nh); RTSTAT_INC(rts_nh_alloc_failure); FIB_NH_LOG(LOG_WARNING, nh, "interface reference failed"); return (EAGAIN); } /* Save vnet to ease destruction */ - nh_priv->nh_vnet = curvnet; - - refcount_init(&nh_priv->nh_refcnt, 1); + nh->nh_priv->nh_vnet = curvnet; /* Please see nhop_free() comments on the initial value */ - refcount_init(&nh_priv->nh_linked, 2); + refcount_init(&nh->nh_priv->nh_linked, 2); - nh_priv->nh_fibnum = ctl->ctl_rh->rib_fibnum; + nh->nh_priv->nh_fibnum = ctl->ctl_rh->rib_fibnum; - if (link_nhop(ctl, nh_priv) == 0) { + if (link_nhop(ctl, nh->nh_priv) == 0) { /* * Adding nexthop to the datastructures * failed. Call destructor w/o waiting for @@ -602,7 +490,7 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info, char nhbuf[NHOP_PRINT_BUFSIZE]; FIB_NH_LOG(LOG_WARNING, nh, "failed to link %s", nhop_print_buf(nh, nhbuf, sizeof(nhbuf))); - destroy_nhop(nh_priv); + destroy_nhop(nh); return (ENOBUFS); } @@ -616,12 +504,8 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info, } static void -destroy_nhop(struct nhop_priv *nh_priv) +destroy_nhop(struct nhop_object *nh) { - struct nhop_object *nh; - - nh = nh_priv->nh; - if_rele(nh->nh_ifp); if_rele(nh->nh_aifp); ifa_free(nh->nh_ifa); @@ -640,7 +524,7 @@ destroy_nhop_epoch(epoch_context_t ctx) nh_priv = __containerof(ctx, struct nhop_priv, nh_epoch_ctx); - destroy_nhop(nh_priv); + destroy_nhop(nh_priv->nh); } void @@ -669,6 +553,12 @@ nhop_free(struct nhop_object *nh) if (!refcount_release(&nh_priv->nh_refcnt)) return; + /* allows to use nhop_free() during nhop init */ + if (__predict_false(nh_priv->nh_finalized == 0)) { + uma_zfree(nhops_zone, nh); + return; + } + #if DEBUG_MAX_LEVEL >= LOG_DEBUG char nhbuf[NHOP_PRINT_BUFSIZE]; FIB_NH_LOG(LOG_DEBUG, nh, "deleting %s", nhop_print_buf(nh, nhbuf, sizeof(nhbuf))); @@ -738,7 +628,144 @@ nhop_free_any(struct nhop_object *nh) #endif } -/* Helper functions */ +/* Nhop-related methods */ + +/* + * Allocates an empty unlinked nhop object. + * Returns object pointer or NULL on failure + */ +struct nhop_object * +nhop_alloc(uint32_t fibnum, int family) +{ + struct nhop_object *nh; + struct nhop_priv *nh_priv; + + nh = (struct nhop_object *)uma_zalloc(nhops_zone, M_NOWAIT | M_ZERO); + if (__predict_false(nh == NULL)) + return (NULL); + + nh_priv = (struct nhop_priv *)((char *)nh + NHOP_OBJECT_ALIGNED_SIZE); + nh->nh_priv = nh_priv; + nh_priv->nh = nh; + + nh_priv->nh_upper_family = family; + nh_priv->nh_fibnum = fibnum; + + /* Setup refcount early to allow nhop_free() to work */ + refcount_init(&nh_priv->nh_refcnt, 1); + + return (nh); +} + +void +nhop_copy(struct nhop_object *nh, const struct nhop_object *nh_orig) +{ + struct nhop_priv *nh_priv = nh->nh_priv; + + nh->nh_flags = nh_orig->nh_flags; + nh->nh_mtu = nh_orig->nh_mtu; + memcpy(&nh->gw_sa, &nh_orig->gw_sa, nh_orig->gw_sa.sa_len); + nh->nh_ifp = nh_orig->nh_ifp; + nh->nh_ifa = nh_orig->nh_ifa; + nh->nh_aifp = nh_orig->nh_aifp; + + nh_priv->nh_upper_family = nh_orig->nh_priv->nh_upper_family; + nh_priv->nh_neigh_family = nh_orig->nh_priv->nh_neigh_family; + nh_priv->nh_type = nh_orig->nh_priv->nh_type; + nh_priv->rt_flags = nh_orig->nh_priv->rt_flags; + nh_priv->nh_fibnum = nh_orig->nh_priv->nh_fibnum; +} + +void +nhop_set_direct_gw(struct nhop_object *nh, struct ifnet *ifp) +{ + nh->nh_flags &= ~NHF_GATEWAY; + nh->nh_priv->rt_flags &= ~RTF_GATEWAY; + nh->nh_priv->nh_neigh_family = nh->nh_priv->nh_upper_family; + + fill_sdl_from_ifp(&nh->gwl_sa, ifp); + memset(&nh->gw_buf[nh->gw_sa.sa_len], 0, sizeof(nh->gw_buf) - nh->gw_sa.sa_len); +} + +/* + * Sets gateway for the nexthop. + * It can be "normal" gateway with is_gw set or a special form of + * adding interface route, refering to it by specifying local interface + * address. In that case is_gw is set to false. + */ +bool +nhop_set_gw(struct nhop_object *nh, const struct sockaddr *gw, bool is_gw) +{ + if (gw->sa_len > sizeof(nh->gw_buf)) { + FIB_NH_LOG(LOG_DEBUG, nh, "nhop SA size too big: AF %d len %u", + gw->sa_family, gw->sa_len); + return (false); + } + memcpy(&nh->gw_sa, gw, gw->sa_len); + memset(&nh->gw_buf[gw->sa_len], 0, sizeof(nh->gw_buf) - gw->sa_len); + + if (is_gw) { + nh->nh_flags |= NHF_GATEWAY; + nh->nh_priv->rt_flags |= RTF_GATEWAY; + nh->nh_priv->nh_neigh_family = gw->sa_family; + } else { + nh->nh_flags &= ~NHF_GATEWAY; + nh->nh_priv->rt_flags &= ~RTF_GATEWAY; + nh->nh_priv->nh_neigh_family = nh->nh_priv->nh_upper_family; + } + + return (true); +} + +void +nhop_set_broadcast(struct nhop_object *nh, bool is_broadcast) +{ + if (is_broadcast) { + nh->nh_flags |= NHF_BROADCAST; + nh->nh_priv->rt_flags |= RTF_BROADCAST; + } else { + nh->nh_flags &= ~NHF_BROADCAST; + nh->nh_priv->rt_flags &= ~RTF_BROADCAST; + } +} + +void +nhop_set_blackhole(struct nhop_object *nh, int blackhole_rt_flag) +{ + nh->nh_flags &= ~(NHF_BLACKHOLE | NHF_REJECT); + nh->nh_priv->rt_flags &= ~(RTF_BLACKHOLE | RTF_REJECT); + switch (blackhole_rt_flag) { + case RTF_BLACKHOLE: + nh->nh_flags |= NHF_BLACKHOLE; + nh->nh_priv->rt_flags |= RTF_BLACKHOLE; + break; + case RTF_REJECT: + nh->nh_flags |= NHF_REJECT; + nh->nh_priv->rt_flags |= RTF_REJECT; + break; + } +} + +void +nhop_set_redirect(struct nhop_object *nh, bool is_redirect) +{ + if (is_redirect) { + nh->nh_priv->rt_flags |= RTF_DYNAMIC; + nh->nh_flags |= NHF_REDIRECT; + } else { + nh->nh_priv->rt_flags &= ~RTF_DYNAMIC; + nh->nh_flags &= ~NHF_REDIRECT; + } +} + +void +nhop_set_pinned(struct nhop_object *nh, bool is_pinned) +{ + if (is_pinned) + nh->nh_priv->rt_flags |= RTF_PINNED; + else + nh->nh_priv->rt_flags &= ~RTF_PINNED; +} uint32_t nhop_get_idx(const struct nhop_object *nh) @@ -768,13 +795,65 @@ nhop_get_rtflags(const struct nhop_object *nh) return (nh->nh_priv->rt_flags); } +/* + * Sets generic rtflags that are not covered by other functions. + */ void nhop_set_rtflags(struct nhop_object *nh, int rt_flags) { + nh->nh_priv->rt_flags &= ~RT_SET_RTFLAGS_MASK; + nh->nh_priv->rt_flags |= (rt_flags & RT_SET_RTFLAGS_MASK); +} - nh->nh_priv->rt_flags = rt_flags; +/* + * Sets flags that are specific to the prefix (NHF_HOST or NHF_DEFAULT). + */ +void +nhop_set_pxtype_flag(struct nhop_object *nh, int nh_flag) +{ + if (nh_flag == NHF_HOST) { + nh->nh_flags |= NHF_HOST; + nh->nh_flags &= ~NHF_DEFAULT; + nh->nh_priv->rt_flags |= RTF_HOST; + } else if (nh_flag == NHF_DEFAULT) { + nh->nh_flags |= NHF_DEFAULT; + nh->nh_flags &= ~NHF_HOST; + nh->nh_priv->rt_flags &= ~RTF_HOST; + } else { + nh->nh_flags &= ~(NHF_HOST | NHF_DEFAULT); + nh->nh_priv->rt_flags &= ~RTF_HOST; + } +} + +/* + * Sets nhop MTU. Sets RTF_FIXEDMTU if mtu is explicitly + * specified by userland. + */ +void +nhop_set_mtu(struct nhop_object *nh, uint32_t mtu, bool from_user) +{ + if (from_user) { + if (mtu != 0) + nh->nh_priv->rt_flags |= RTF_FIXEDMTU; + else + nh->nh_priv->rt_flags &= ~RTF_FIXEDMTU; + } + nh->nh_mtu = mtu; } +void +nhop_set_src(struct nhop_object *nh, struct ifaddr *ifa) +{ + nh->nh_ifa = ifa; +} + +void +nhop_set_transmit_ifp(struct nhop_object *nh, struct ifnet *ifp) +{ + nh->nh_ifp = ifp; +} + + struct vnet * nhop_get_vnet(const struct nhop_object *nh) { @@ -827,6 +906,15 @@ nhop_set_expire(struct nhop_object *nh, uint32_t expire) nh->nh_priv->nh_expire = expire; } +static struct rib_head * +nhop_get_rh(const struct nhop_object *nh) +{ + uint32_t fibnum = nhop_get_fibnum(nh); + int family = nhop_get_neigh_family(nh); + + return (rt_tables_get_rnh(fibnum, family)); +} + void nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu) { diff --git a/sys/net/route/nhop_var.h b/sys/net/route/nhop_var.h index 516032cd3756..3cc7da4649a5 100644 --- a/sys/net/route/nhop_var.h +++ b/sys/net/route/nhop_var.h @@ -85,6 +85,7 @@ struct nhop_priv { void *cb_func; /* function handling additional rewrite caps */ u_int nh_refcnt; /* number of references, refcount(9) */ u_int nh_linked; /* refcount(9), == 2 if linked to the list */ + int nh_finalized; /* non-zero if finalized() was called */ struct nhop_object *nh; /* backreference to the dataplane nhop */ struct nh_control *nh_control; /* backreference to the rnh */ struct nhop_priv *nh_next; /* hash table membership */ diff --git a/sys/net/route/route_var.h b/sys/net/route/route_var.h index 2998c51b608d..740729ecb415 100644 --- a/sys/net/route/route_var.h +++ b/sys/net/route/route_var.h @@ -48,8 +48,11 @@ #endif struct nh_control; -typedef int rnh_preadd_entry_f_t(u_int fibnum, const struct sockaddr *addr, +/* Sets prefix-specific nexthop flags (NHF_DEFAULT, RTF/NHF_HOST, RTF_BROADCAST,..) */ +typedef int rnh_set_nh_pfxflags_f_t(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask, struct nhop_object *nh); +/* Fills in family-specific details that are not yet set up (mtu, nhop type, ..) */ +typedef int rnh_augment_nh_f_t(u_int fibnum, struct nhop_object *nh); struct rib_head { struct radix_head head; @@ -59,7 +62,7 @@ struct rib_head { rn_lookup_f_t *rnh_lookup; /* exact match for sockaddr */ rn_walktree_t *rnh_walktree; /* traverse tree */ rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */ - rnh_preadd_entry_f_t *rnh_preadd; /* hook to alter record prior to insertion */ + rnh_set_nh_pfxflags_f_t *rnh_set_nh_pfxflags; /* hook to alter record prior to insertion */ rt_gen_t rnh_gen; /* datapath generation counter */ int rnh_multipath; /* multipath capable ? */ struct radix_node rnh_nodes[3]; /* empty tree for common case */ @@ -76,6 +79,7 @@ struct rib_head { uint32_t rib_algo_fixed:1;/* fixed algorithm */ uint32_t rib_algo_init:1;/* algo init done */ struct nh_control *nh_control; /* nexthop subsystem data */ + rnh_augment_nh_f_t *rnh_augment_nh;/* hook to alter nexthop prior to insertion */ CK_STAILQ_HEAD(, rib_subscription) rnh_subscribers;/* notification subscribers */ }; @@ -204,11 +208,6 @@ struct rtentry { * RTF_PINNED, RTF_REJECT, RTF_BLACKHOLE, RTF_BROADCAST */ -/* Nexthop rt flags mask */ -#define NHOP_RT_FLAG_MASK (RTF_GATEWAY | RTF_HOST | RTF_REJECT | RTF_DYNAMIC | \ - RTF_MODIFIED | RTF_STATIC | RTF_BLACKHOLE | RTF_PROTO1 | RTF_PROTO2 | \ - RTF_PROTO3 | RTF_FIXEDMTU | RTF_PINNED | RTF_BROADCAST) - /* rtentry rt flag mask */ #define RTE_RT_FLAG_MASK (RTF_UP | RTF_HOST) @@ -250,8 +249,6 @@ int nhop_try_ref_object(struct nhop_object *nh); void nhop_ref_any(struct nhop_object *nh); void nhop_free_any(struct nhop_object *nh); -void nhop_set_type(struct nhop_object *nh, enum nhop_type nh_type); -void nhop_set_rtflags(struct nhop_object *nh, int rt_flags); int nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info, struct nhop_object **nh_ret); diff --git a/sys/netinet/in_rmx.c b/sys/netinet/in_rmx.c index a20bb9392d6a..623e788eec91 100644 --- a/sys/netinet/in_rmx.c +++ b/sys/netinet/in_rmx.c @@ -52,18 +52,15 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip_var.h> static int -rib4_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask, +rib4_set_nh_pfxflags(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask, struct nhop_object *nh) { const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr; - uint16_t nh_type; - int rt_flags; - - /* XXX: RTF_LOCAL && RTF_MULTICAST */ - - rt_flags = nhop_get_rtflags(nh); + const struct sockaddr_in *mask4 = (const struct sockaddr_in *)mask; + bool is_broadcast = false; - if (rt_flags & RTF_HOST) { + if (mask == NULL) { + nhop_set_pxtype_flag(nh, NHF_HOST); /* * Backward compatibility: * if the destination is broadcast, @@ -76,13 +73,21 @@ rib4_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *ma * add these routes to support some cases with active-active * load balancing. Given that, retain this support. */ - if (in_broadcast(addr4->sin_addr, nh->nh_ifp)) { - rt_flags |= RTF_BROADCAST; - nhop_set_rtflags(nh, rt_flags); - nh->nh_flags |= NHF_BROADCAST; - } - } + if (in_broadcast(addr4->sin_addr, nh->nh_ifp)) + is_broadcast = true; + } else if (mask4->sin_addr.s_addr == 0) + nhop_set_pxtype_flag(nh, NHF_DEFAULT); + else + nhop_set_pxtype_flag(nh, 0); + + nhop_set_broadcast(nh, is_broadcast); + + return (0); +} +static int +rib4_augment_nh(u_int fibnum, struct nhop_object *nh) +{ /* * Check route MTU: * inherit interface MTU if not set or @@ -93,14 +98,9 @@ rib4_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *ma } else if (nh->nh_mtu > nh->nh_ifp->if_mtu) nh->nh_mtu = nh->nh_ifp->if_mtu; *** 92 LINES SKIPPED ***