NDP prefix list locking
Vijay Singh
vijju.singh at gmail.com
Tue Feb 19 21:47:58 UTC 2013
Mark, is this the panic you were hitting?
PANIC : Bad list head 0xffffff800057e720 first->prev != head
cpuid = 6
KDB: stack backtrace:
kdb_backtrace() at kdb_backtrace+0x3e
panic() at panic+0x479
nd6_prelist_add() at nd6_prelist_add+0x236
prelist_remove() at prelist_remove+0x4c5
nd6_ra_input() at nd6_ra_input+0x8c2
icmp6_input() at icmp6_input+0x149e
ip6_input() at ip6_input+0x14b2
netisr_dispatch_src() at netisr_dispatch_src+0x14a
netisr_dispatch() at netisr_dispatch+0x20
ether_demux() at ether_demux+0x281
-vijay
On Mon, Feb 18, 2013 at 7:05 PM, Mark Johnston <markj at freebsd.org> wrote:
> Hi Everyone,
>
> For the past little while I've been tracking down some memory corruption
> issues that seem to be caused by a double free that can happen when
> destroying an IPv6-enabled interface or when removing an IPv6 address
> from an interface.
>
> The problem seems to be caused by a lack of locking for nd_prefix, the
> global IPv6 prefix list. There's a callout that periodically executes
> nd6_timer(), which purges expired prefixes (among other things). There's
> nothing preventing an expired prefix from being freed twice if a user
> happens to destroy the corresponding address or IP address at the same
> time. In fact, a prefix with an infinite lifetime can be double freed as
> well, since the first thing prelist_remove() does is set the input
> prefix's vltime field to 0.
>
> I'd like to fix this, and the patch below is my attempt at adding some
> locking to accesses of the prefix and default router lists. It's only
> received very light testing so far, but I was hoping that others could
> either review/test the patch, or suggest alternative approaches to
> fixing this. My approach here has been to add two rw locks - one for
> the prefix list and one for the default router list. I'm not very
> familiar with the NDP code so I'm very much open to comments and
> suggestions. :)
>
> Thanks,
> -Mark
>
> diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
> index e260e5d..3cb327c 100644
> --- a/sys/netinet6/in6.c
> +++ b/sys/netinet6/in6.c
> @@ -805,8 +805,11 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
> */
> pr = ia->ia6_ndpr;
> in6_purgeaddr(&ia->ia_ifa);
> - if (pr && pr->ndpr_refcnt == 0)
> + if (pr && pr->ndpr_refcnt == 0) {
> + ND_PREFIX_WLOCK();
> prelist_remove(pr);
> + ND_PREFIX_WUNLOCK();
> + }
> EVENTHANDLER_INVOKE(ifaddr_event, ifp);
> break;
> }
> diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
> index fd420d8..d96340c 100644
> --- a/sys/netinet6/nd6.c
> +++ b/sys/netinet6/nd6.c
> @@ -117,6 +117,9 @@ static int nd6_inuse, nd6_allocated;
> VNET_DEFINE(struct nd_drhead, nd_defrouter);
> VNET_DEFINE(struct nd_prhead, nd_prefix);
>
> +struct rwlock nd_prefix_rwlock;
> +struct rwlock nd_defrouter_rwlock;
> +
> VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
> #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval)
>
> @@ -143,6 +146,7 @@ nd6_init(void)
> {
> int i;
>
> + rw_init(&nd_prefix_rwlock, "nd_prefix rwlock");
> LIST_INIT(&V_nd_prefix);
>
> all1_sa.sin6_family = AF_INET6;
> @@ -151,6 +155,7 @@ nd6_init(void)
> all1_sa.sin6_addr.s6_addr[i] = 0xff;
>
> /* initialization of the default router list */
> + rw_init(&nd_defrouter_rwlock, "nd_defrouter rwlock");
> TAILQ_INIT(&V_nd_defrouter);
>
> /* start timer */
> @@ -586,10 +591,14 @@ nd6_timer(void *arg)
> nd6_timer, curvnet);
>
> /* expire default router list */
> + ND_PREFIX_RLOCK();
> + ND_DEFRTR_WLOCK();
> TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
> if (dr->expire && dr->expire < time_second)
> defrtrlist_del(dr);
> }
> + ND_DEFRTR_WUNLOCK();
> + ND_PREFIX_RUNLOCK();
>
> /*
> * expire interface addresses.
> @@ -664,6 +673,7 @@ nd6_timer(void *arg)
> }
>
> /* expire prefix list */
> + ND_PREFIX_WLOCK();
> LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
> /*
> * check prefix lifetime.
> @@ -680,6 +690,7 @@ nd6_timer(void *arg)
> prelist_remove(pr);
> }
> }
> + ND_PREFIX_WUNLOCK();
> CURVNET_RESTORE();
> }
>
> @@ -770,6 +781,8 @@ nd6_purge(struct ifnet *ifp)
> * in the routing table, in order to keep additional side effects as
> * small as possible.
> */
> + ND_PREFIX_RLOCK();
> + ND_DEFRTR_WLOCK();
> TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
> if (dr->installed)
> continue;
> @@ -785,8 +798,11 @@ nd6_purge(struct ifnet *ifp)
> if (dr->ifp == ifp)
> defrtrlist_del(dr);
> }
> + ND_DEFRTR_WUNLOCK();
> + ND_PREFIX_RUNLOCK();
>
> /* Nuke prefix list entries toward ifp */
> + ND_PREFIX_WLOCK();
> LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
> if (pr->ndpr_ifp == ifp) {
> /*
> @@ -808,6 +824,7 @@ nd6_purge(struct ifnet *ifp)
> prelist_remove(pr);
> }
> }
> + ND_PREFIX_WUNLOCK();
>
> /* cancel default outgoing interface setting */
> if (V_nd6_defifindex == ifp->if_index)
> @@ -898,6 +915,7 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
> * If the address matches one of our on-link prefixes, it should be a
> * neighbor.
> */
> + ND_PREFIX_RLOCK();
> LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
> if (pr->ndpr_ifp != ifp)
> continue;
> @@ -929,9 +947,12 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
> }
>
> if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
> - &addr->sin6_addr, &pr->ndpr_mask))
> + &addr->sin6_addr, &pr->ndpr_mask)) {
> + ND_PREFIX_RUNLOCK();
> return (1);
> + }
> }
> + ND_PREFIX_RUNLOCK();
>
> /*
> * If the address is assigned on the node of the other side of
> @@ -1229,6 +1250,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
> * obsolete API, use sysctl under net.inet6.icmp6
> */
> bzero(drl, sizeof(*drl));
> + ND_DEFRTR_RLOCK();
> TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
> if (i >= DRLSTSIZ)
> break;
> @@ -1241,6 +1263,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
> drl->defrouter[i].if_index = dr->ifp->if_index;
> i++;
> }
> + ND_DEFRTR_RUNLOCK();
> break;
> case SIOCGPRLST_IN6:
> /*
> @@ -1256,6 +1279,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
> * how about separating ioctls into two?
> */
> bzero(oprl, sizeof(*oprl));
> + ND_PREFIX_RLOCK();
> LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
> struct nd_pfxrouter *pfr;
> int j;
> @@ -1301,6 +1325,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
>
> i++;
> }
> + ND_PREFIX_RUNLOCK();
>
> break;
> case OSIOCGIFINFO_IN6:
> @@ -1449,6 +1474,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
> /* flush all the prefix advertised by routers */
> struct nd_prefix *pr, *next;
>
> + ND_PREFIX_WLOCK();
> LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
> struct in6_ifaddr *ia, *ia_next;
>
> @@ -1467,6 +1493,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
> }
> prelist_remove(pr);
> }
> + ND_PREFIX_WUNLOCK();
> break;
> }
> case SIOCSRTRFLUSH_IN6:
> @@ -1475,9 +1502,13 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
> struct nd_defrouter *dr, *next;
>
> defrouter_reset();
> + ND_PERFIX_RLOCK();
> + ND_DEFRTR_WLOCK();
> TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) {
> defrtrlist_del(dr);
> }
> + ND_DEFRTR_WUNLOCK();
> + ND_PREFIX_RUNLOCK();
> defrouter_select();
> break;
> }
> @@ -2268,23 +2299,30 @@ nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
> d.rtaddr.sin6_family = AF_INET6;
> d.rtaddr.sin6_len = sizeof(d.rtaddr);
>
> + error = sysctl_wire_old_buffer(req, 0);
> + if (error != 0)
> + return (error);
> +
> /*
> * XXX locking
> */
> + ND_DEFRTR_RLOCK();
> TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
> d.rtaddr.sin6_addr = dr->rtaddr;
> error = sa6_recoverscope(&d.rtaddr);
> if (error != 0)
> - return (error);
> + break;
> d.flags = dr->flags;
> d.rtlifetime = dr->rtlifetime;
> d.expire = dr->expire;
> d.if_index = dr->ifp->if_index;
> error = SYSCTL_OUT(req, &d, sizeof(d));
> if (error != 0)
> - return (error);
> + break;
> }
> - return (0);
> + ND_DEFRTR_RUNLOCK();
> +
> + return (error);
> }
>
> static int
> @@ -2307,9 +2345,14 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
> s6.sin6_family = AF_INET6;
> s6.sin6_len = sizeof(s6);
>
> + error = sysctl_wire_old_buffer(req, 0);
> + if (error != 0)
> + return (error);
> +
> /*
> * XXX locking
> */
> + ND_PREFIX_RLOCK();
> LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
> p.prefix = pr->ndpr_prefix;
> if (sa6_recoverscope(&p.prefix)) {
> @@ -2341,7 +2384,7 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
> p.advrtrs++;
> error = SYSCTL_OUT(req, &p, sizeof(p));
> if (error != 0)
> - return (error);
> + break;
> LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
> s6.sin6_addr = pfr->router->rtaddr;
> if (sa6_recoverscope(&s6))
> @@ -2349,9 +2392,13 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
> "scope error in prefix list (%s)\n",
> ip6_sprintf(ip6buf, &pfr->router->rtaddr));
> error = SYSCTL_OUT(req, &s6, sizeof(s6));
> - if (error != 0)
> + if (error != 0) {
> + ND_PREFIX_RUNLOCK();
> return (error);
> + }
> }
> }
> - return (0);
> + ND_PREFIX_RUNLOCK();
> +
> + return (error);
> }
> diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h
> index 94202e1..bc5ff2d 100644
> --- a/sys/netinet6/nd6.h
> +++ b/sys/netinet6/nd6.h
> @@ -38,6 +38,7 @@
> #define RTF_ANNOUNCE RTF_PROTO2
> #endif
>
> +#include <sys/_rwlock.h>
> #include <sys/queue.h>
> #include <sys/callout.h>
>
> @@ -341,6 +342,35 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861);
> #define V_nd6_debug VNET(nd6_debug)
> #define V_nd6_onlink_ns_rfc4861 VNET(nd6_onlink_ns_rfc4861)
>
> +/* Lock to protect access to the nd_prefix list. */
> +extern struct rwlock nd_prefix_rwlock;
> +#define ND_PREFIX_RLOCK() rw_rlock(&nd_prefix_rwlock)
> +#define ND_PREFIX_RUNLOCK() rw_runlock(&nd_prefix_rwlock)
> +#define ND_PREFIX_WLOCK() rw_wlock(&nd_prefix_rwlock)
> +#define ND_PREFIX_WUNLOCK() rw_wunlock(&nd_prefix_rwlock)
> +#define ND_PREFIX_LOCK_ASSERT() rw_assert(&nd_prefix_rwlock, RA_LOCKED)
> +#define ND_PREFIX_RLOCK_ASSERT() rw_assert(&nd_prefix_rwlock, RA_RLOCKED)
> +#define ND_PREFIX_WLOCK_ASSERT() rw_assert(&nd_prefix_rwlock, RA_WLOCKED)
> +#define ND_PREFIX_WLOCK_OWNED() rw_wowned(&nd_prefix_rwlock)
> +
> +/*
> + * Lock to protect access to the nd_defrouter list. A shared lock should be
> + * acquired on the prefix list before acquiring an exclusive lock on the router
> + * list and calling defrouterlist_del() in order to avoid lock order reversals.
> + */
> +extern struct rwlock nd_defrouter_rwlock;
> +#define ND_DEFRTR_RLOCK() rw_rlock(&nd_defrouter_rwlock)
> +#define ND_DEFRTR_RUNLOCK() rw_runlock(&nd_defrouter_rwlock)
> +#define ND_DEFRTR_WLOCK() rw_wlock(&nd_defrouter_rwlock)
> +#define ND_DEFRTR_WUNLOCK() rw_wunlock(&nd_defrouter_rwlock)
> +#define ND_DEFRTR_LOCK_ASSERT() rw_assert(&nd_defrouter_rwlock, \
> + RA_LOCKED)
> +#define ND_DEFRTR_RLOCK_ASSERT() rw_assert(&nd_defrouter_rwlock, \
> + RA_RLOCKED)
> +#define ND_DEFRTR_WLOCK_ASSERT() rw_assert(&nd_defrouter_rwlock, \
> + RA_WLOCKED)
> +#define ND_DEFRTR_WLOCK_OWNED() rw_wowned(&nd_defrouter_rwlock)
> +
> #define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
>
> VNET_DECLARE(struct callout, nd6_timer_ch);
> diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c
> index f6bae0c..e5c63b9 100644
> --- a/sys/netinet6/nd6_rtr.c
> +++ b/sys/netinet6/nd6_rtr.c
> @@ -499,14 +499,16 @@ defrouter_addreq(struct nd_defrouter *new)
> struct nd_defrouter *
> defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
> {
> - struct nd_defrouter *dr;
> + struct nd_defrouter *dr = NULL;
>
> + ND_DEFRTR_RLOCK();
> TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
> if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
> - return (dr);
> + break;
> }
> + ND_DEFRTR_RUNLOCK();
>
> - return (NULL); /* search failed */
> + return (dr); /* search failed */
> }
>
> /*
> @@ -548,8 +550,10 @@ defrouter_reset(void)
> {
> struct nd_defrouter *dr;
>
> + ND_DEFRTR_RLOCK();
> TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
> defrouter_delreq(dr);
> + ND_DEFRTR_RUNLOCK();
>
> /*
> * XXX should we also nuke any default routers in the kernel, by
> @@ -574,11 +578,13 @@ defrtrlist_del(struct nd_defrouter *dr)
> deldr = dr;
> defrouter_delreq(dr);
> }
> + ND_DEFRTR_WLOCK_ASSERT();
> TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
>
> /*
> * Also delete all the pointers to the router in each prefix lists.
> */
> + ND_PREFIX_RLOCK_ASSERT();
> LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
> struct nd_pfxrouter *pfxrtr;
> if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
> @@ -636,6 +642,7 @@ defrouter_select(void)
> * We just pick up the first reachable one (if any), assuming that
> * the ordering rule of the list described in defrtrlist_update().
> */
> + ND_DEFRTR_RLOCK();
> TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
> IF_AFDATA_RLOCK(dr->ifp);
> if (selected_dr == NULL &&
> @@ -657,6 +664,8 @@ defrouter_select(void)
> " is installed\n");
> }
> }
> + ND_DEFRTR_RUNLOCK();
> +
> /*
> * If none of the default routers was found to be reachable,
> * round-robin the list regardless of preference.
> @@ -758,7 +767,9 @@ defrtrlist_update(struct nd_defrouter *new)
> * defrouter_select() below will handle routing
> * changes later.
> */
> + ND_DEFRTR_WLOCK();
> TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
> + ND_DEFRTR_WUNLOCK();
> n = dr;
> goto insert;
> }
> @@ -784,6 +795,7 @@ insert:
> */
>
> /* insert at the end of the group */
> + ND_DEFRTR_WLOCK();
> TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
> if (rtpref(n) > rtpref(dr))
> break;
> @@ -792,6 +804,7 @@ insert:
> TAILQ_INSERT_BEFORE(dr, n, dr_entry);
> else
> TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
> + ND_DEFRTR_WUNLOCK();
>
> defrouter_select();
>
> @@ -839,6 +852,7 @@ nd6_prefix_lookup(struct nd_prefixctl *key)
> {
> struct nd_prefix *search;
>
> + ND_PREFIX_RLOCK();
> LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) {
> if (key->ndpr_ifp == search->ndpr_ifp &&
> key->ndpr_plen == search->ndpr_plen &&
> @@ -847,6 +861,7 @@ nd6_prefix_lookup(struct nd_prefixctl *key)
> break;
> }
> }
> + ND_PREFIX_RUNLOCK();
>
> return (search);
> }
> @@ -887,7 +902,9 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
> new->ndpr_mask.s6_addr32[i];
>
> /* link ndpr_entry to nd_prefix list */
> + ND_PREFIX_WLOCK();
> LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
> + ND_PREFIX_WUNLOCK();
>
> /* ND_OPT_PI_FLAG_ONLINK processing */
> if (new->ndpr_raf_onlink) {
> @@ -938,6 +955,7 @@ prelist_remove(struct nd_prefix *pr)
> return; /* notice here? */
>
> /* unlink ndpr_entry from nd_prefix list */
> + ND_PREFIX_WLOCK_ASSERT();
> LIST_REMOVE(pr, ndpr_entry);
>
> /* free list of routers that adversed the prefix */
> @@ -1339,6 +1357,8 @@ pfxlist_onlink_check()
> * Check if there is a prefix that has a reachable advertising
> * router.
> */
> + if (!ND_PREFIX_WLOCK_OWNED())
> + ND_PREFIX_RLOCK();
> LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
> if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
> break;
> @@ -1349,6 +1369,7 @@ pfxlist_onlink_check()
> * that does not advertise any prefixes.
> */
> if (pr == NULL) {
> + ND_DEFRTR_RLOCK();
> TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
> struct nd_prefix *pr0;
>
> @@ -1359,6 +1380,7 @@ pfxlist_onlink_check()
> if (pfxrtr != NULL)
> break;
> }
> + ND_DEFRTR_RUNLOCK();
> }
> if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
> /*
> @@ -1454,6 +1476,8 @@ pfxlist_onlink_check()
> }
> }
> }
> + if (!ND_PREFIX_WLOCK_OWNED())
> + ND_PREFIX_RUNLOCK();
>
> /*
> * Changes on the prefix status might affect address status as well.
> @@ -1618,6 +1642,7 @@ nd6_prefix_onlink(struct nd_prefix *pr)
> * Although such a configuration is expected to be rare, we explicitly
> * allow it.
> */
> + ND_PREFIX_RLOCK();
> LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
> if (opr == pr)
> continue;
> @@ -1627,9 +1652,12 @@ nd6_prefix_onlink(struct nd_prefix *pr)
>
> if (opr->ndpr_plen == pr->ndpr_plen &&
> in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
> - &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen))
> + &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
> + ND_PREFIX_RUNLOCK();
> return (0);
> + }
> }
> + ND_PREFIX_RUNLOCK();
>
> /*
> * We prefer link-local addresses as the associated interface address.
> @@ -1730,6 +1758,7 @@ nd6_prefix_offlink(struct nd_prefix *pr)
> * If there's one, try to make the prefix on-link on the
> * interface.
> */
> + ND_PREFIX_LOCK_ASSERT();
> LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
> if (opr == pr)
> continue;
> _______________________________________________
> freebsd-net at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-net
> To unsubscribe, send any mail to "freebsd-net-unsubscribe at freebsd.org"
More information about the freebsd-net
mailing list