NDP prefix list locking

Mark Johnston markj at freebsd.org
Tue Feb 19 03:06:03 UTC 2013


Hi Everyone,

For the past little while I've been tracking down some memory corruption
issues that seem to be caused by a double free that can happen when
destroying an IPv6-enabled interface or when removing an IPv6 address
from an interface.

The problem seems to be caused by a lack of locking for nd_prefix, the
global IPv6 prefix list. There's a callout that periodically executes
nd6_timer(), which purges expired prefixes (among other things). There's
nothing preventing an expired prefix from being freed twice if a user
happens to destroy the corresponding address or IP address at the same
time. In fact, a prefix with an infinite lifetime can be double freed as
well, since the first thing prelist_remove() does is set the input
prefix's vltime field to 0.

I'd like to fix this, and the patch below is my attempt at adding some
locking to accesses of the prefix and default router lists. It's only
received very light testing so far, but I was hoping that others could
either review/test the patch, or suggest alternative approaches to
fixing this. My approach here has been to add two rw locks - one for
the prefix list and one for the default router list. I'm not very
familiar with the NDP code so I'm very much open to comments and
suggestions. :)

Thanks,
-Mark

diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index e260e5d..3cb327c 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -805,8 +805,11 @@ in6_control(struct socket *so, u_long cmd, caddr_t data,
 		 */
 		pr = ia->ia6_ndpr;
 		in6_purgeaddr(&ia->ia_ifa);
-		if (pr && pr->ndpr_refcnt == 0)
+		if (pr && pr->ndpr_refcnt == 0) {
+			ND_PREFIX_WLOCK();
 			prelist_remove(pr);
+			ND_PREFIX_WUNLOCK();
+		}
 		EVENTHANDLER_INVOKE(ifaddr_event, ifp);
 		break;
 	}
diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
index fd420d8..d96340c 100644
--- a/sys/netinet6/nd6.c
+++ b/sys/netinet6/nd6.c
@@ -117,6 +117,9 @@ static int nd6_inuse, nd6_allocated;
 VNET_DEFINE(struct nd_drhead, nd_defrouter);
 VNET_DEFINE(struct nd_prhead, nd_prefix);
 
+struct rwlock nd_prefix_rwlock;
+struct rwlock nd_defrouter_rwlock;
+
 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
 #define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
 
@@ -143,6 +146,7 @@ nd6_init(void)
 {
 	int i;
 
+	rw_init(&nd_prefix_rwlock, "nd_prefix rwlock");
 	LIST_INIT(&V_nd_prefix);
 
 	all1_sa.sin6_family = AF_INET6;
@@ -151,6 +155,7 @@ nd6_init(void)
 		all1_sa.sin6_addr.s6_addr[i] = 0xff;
 
 	/* initialization of the default router list */
+	rw_init(&nd_defrouter_rwlock, "nd_defrouter rwlock");
 	TAILQ_INIT(&V_nd_defrouter);
 
 	/* start timer */
@@ -586,10 +591,14 @@ nd6_timer(void *arg)
 	    nd6_timer, curvnet);
 
 	/* expire default router list */
+	ND_PREFIX_RLOCK();
+	ND_DEFRTR_WLOCK();
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (dr->expire && dr->expire < time_second)
 			defrtrlist_del(dr);
 	}
+	ND_DEFRTR_WUNLOCK();
+	ND_PREFIX_RUNLOCK();
 
 	/*
 	 * expire interface addresses.
@@ -664,6 +673,7 @@ nd6_timer(void *arg)
 	}
 
 	/* expire prefix list */
+	ND_PREFIX_WLOCK();
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		/*
 		 * check prefix lifetime.
@@ -680,6 +690,7 @@ nd6_timer(void *arg)
 			prelist_remove(pr);
 		}
 	}
+	ND_PREFIX_WUNLOCK();
 	CURVNET_RESTORE();
 }
 
@@ -770,6 +781,8 @@ nd6_purge(struct ifnet *ifp)
 	 * in the routing table, in order to keep additional side effects as
 	 * small as possible.
 	 */
+	ND_PREFIX_RLOCK();
+	ND_DEFRTR_WLOCK();
 	TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) {
 		if (dr->installed)
 			continue;
@@ -785,8 +798,11 @@ nd6_purge(struct ifnet *ifp)
 		if (dr->ifp == ifp)
 			defrtrlist_del(dr);
 	}
+	ND_DEFRTR_WUNLOCK();
+	ND_PREFIX_RUNLOCK();
 
 	/* Nuke prefix list entries toward ifp */
+	ND_PREFIX_WLOCK();
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		if (pr->ndpr_ifp == ifp) {
 			/*
@@ -808,6 +824,7 @@ nd6_purge(struct ifnet *ifp)
 			prelist_remove(pr);
 		}
 	}
+	ND_PREFIX_WUNLOCK();
 
 	/* cancel default outgoing interface setting */
 	if (V_nd6_defifindex == ifp->if_index)
@@ -898,6 +915,7 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
 	 * If the address matches one of our on-link prefixes, it should be a
 	 * neighbor.
 	 */
+	ND_PREFIX_RLOCK();
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		if (pr->ndpr_ifp != ifp)
 			continue;
@@ -929,9 +947,12 @@ nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
 		}
 
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
-		    &addr->sin6_addr, &pr->ndpr_mask))
+		    &addr->sin6_addr, &pr->ndpr_mask)) {
+			ND_PREFIX_RUNLOCK();
 			return (1);
+		}
 	}
+	ND_PREFIX_RUNLOCK();
 
 	/*
 	 * If the address is assigned on the node of the other side of
@@ -1229,6 +1250,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 		 * obsolete API, use sysctl under net.inet6.icmp6
 		 */
 		bzero(drl, sizeof(*drl));
+		ND_DEFRTR_RLOCK();
 		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 			if (i >= DRLSTSIZ)
 				break;
@@ -1241,6 +1263,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 			drl->defrouter[i].if_index = dr->ifp->if_index;
 			i++;
 		}
+		ND_DEFRTR_RUNLOCK();
 		break;
 	case SIOCGPRLST_IN6:
 		/*
@@ -1256,6 +1279,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 		 * how about separating ioctls into two?
 		 */
 		bzero(oprl, sizeof(*oprl));
+		ND_PREFIX_RLOCK();
 		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 			struct nd_pfxrouter *pfr;
 			int j;
@@ -1301,6 +1325,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 
 			i++;
 		}
+		ND_PREFIX_RUNLOCK();
 
 		break;
 	case OSIOCGIFINFO_IN6:
@@ -1449,6 +1474,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 		/* flush all the prefix advertised by routers */
 		struct nd_prefix *pr, *next;
 
+		ND_PREFIX_WLOCK();
 		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
 			struct in6_ifaddr *ia, *ia_next;
 
@@ -1467,6 +1493,7 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 			}
 			prelist_remove(pr);
 		}
+		ND_PREFIX_WUNLOCK();
 		break;
 	}
 	case SIOCSRTRFLUSH_IN6:
@@ -1475,9 +1502,13 @@ nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 		struct nd_defrouter *dr, *next;
 
 		defrouter_reset();
+		ND_PERFIX_RLOCK();
+		ND_DEFRTR_WLOCK();
 		TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) {
 			defrtrlist_del(dr);
 		}
+		ND_DEFRTR_WUNLOCK();
+		ND_PREFIX_RUNLOCK();
 		defrouter_select();
 		break;
 	}
@@ -2268,23 +2299,30 @@ nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
 	d.rtaddr.sin6_family = AF_INET6;
 	d.rtaddr.sin6_len = sizeof(d.rtaddr);
 
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+
 	/*
 	 * XXX locking
 	 */
+	ND_DEFRTR_RLOCK();
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		d.rtaddr.sin6_addr = dr->rtaddr;
 		error = sa6_recoverscope(&d.rtaddr);
 		if (error != 0)
-			return (error);
+			break;
 		d.flags = dr->flags;
 		d.rtlifetime = dr->rtlifetime;
 		d.expire = dr->expire;
 		d.if_index = dr->ifp->if_index;
 		error = SYSCTL_OUT(req, &d, sizeof(d));
 		if (error != 0)
-			return (error);
+			break;
 	}
-	return (0);
+	ND_DEFRTR_RUNLOCK();
+
+	return (error);
 }
 
 static int
@@ -2307,9 +2345,14 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 	s6.sin6_family = AF_INET6;
 	s6.sin6_len = sizeof(s6);
 
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+
 	/*
 	 * XXX locking
 	 */
+	ND_PREFIX_RLOCK();
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		p.prefix = pr->ndpr_prefix;
 		if (sa6_recoverscope(&p.prefix)) {
@@ -2341,7 +2384,7 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 			p.advrtrs++;
 		error = SYSCTL_OUT(req, &p, sizeof(p));
 		if (error != 0)
-			return (error);
+			break;
 		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
 			s6.sin6_addr = pfr->router->rtaddr;
 			if (sa6_recoverscope(&s6))
@@ -2349,9 +2392,13 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 				    "scope error in prefix list (%s)\n",
 				    ip6_sprintf(ip6buf, &pfr->router->rtaddr));
 			error = SYSCTL_OUT(req, &s6, sizeof(s6));
-			if (error != 0)
+			if (error != 0) {
+				ND_PREFIX_RUNLOCK();
 				return (error);
+			}
 		}
 	}
-	return (0);
+	ND_PREFIX_RUNLOCK();
+
+	return (error);
 }
diff --git a/sys/netinet6/nd6.h b/sys/netinet6/nd6.h
index 94202e1..bc5ff2d 100644
--- a/sys/netinet6/nd6.h
+++ b/sys/netinet6/nd6.h
@@ -38,6 +38,7 @@
 #define RTF_ANNOUNCE	RTF_PROTO2
 #endif
 
+#include <sys/_rwlock.h>
 #include <sys/queue.h>
 #include <sys/callout.h>
 
@@ -341,6 +342,35 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861);
 #define	V_nd6_debug			VNET(nd6_debug)
 #define	V_nd6_onlink_ns_rfc4861		VNET(nd6_onlink_ns_rfc4861)
 
+/* Lock to protect access to the nd_prefix list. */
+extern struct rwlock nd_prefix_rwlock;
+#define	ND_PREFIX_RLOCK()		rw_rlock(&nd_prefix_rwlock)
+#define	ND_PREFIX_RUNLOCK()		rw_runlock(&nd_prefix_rwlock)
+#define	ND_PREFIX_WLOCK()		rw_wlock(&nd_prefix_rwlock)
+#define	ND_PREFIX_WUNLOCK()		rw_wunlock(&nd_prefix_rwlock)
+#define	ND_PREFIX_LOCK_ASSERT()		rw_assert(&nd_prefix_rwlock, RA_LOCKED)
+#define	ND_PREFIX_RLOCK_ASSERT()	rw_assert(&nd_prefix_rwlock, RA_RLOCKED)
+#define	ND_PREFIX_WLOCK_ASSERT()	rw_assert(&nd_prefix_rwlock, RA_WLOCKED)
+#define	ND_PREFIX_WLOCK_OWNED()		rw_wowned(&nd_prefix_rwlock)
+
+/*
+ * Lock to protect access to the nd_defrouter list. A shared lock should be
+ * acquired on the prefix list before acquiring an exclusive lock on the router
+ * list and calling defrouterlist_del() in order to avoid lock order reversals.
+ */
+extern struct rwlock nd_defrouter_rwlock;
+#define	ND_DEFRTR_RLOCK()		rw_rlock(&nd_defrouter_rwlock)
+#define	ND_DEFRTR_RUNLOCK()		rw_runlock(&nd_defrouter_rwlock)
+#define	ND_DEFRTR_WLOCK()		rw_wlock(&nd_defrouter_rwlock)
+#define	ND_DEFRTR_WUNLOCK()		rw_wunlock(&nd_defrouter_rwlock)
+#define	ND_DEFRTR_LOCK_ASSERT()		rw_assert(&nd_defrouter_rwlock,        \
+					    RA_LOCKED)
+#define	ND_DEFRTR_RLOCK_ASSERT()	rw_assert(&nd_defrouter_rwlock,        \
+					    RA_RLOCKED)
+#define	ND_DEFRTR_WLOCK_ASSERT()	rw_assert(&nd_defrouter_rwlock,        \
+					    RA_WLOCKED)
+#define	ND_DEFRTR_WLOCK_OWNED()		rw_wowned(&nd_defrouter_rwlock)
+
 #define nd6log(x)	do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
 
 VNET_DECLARE(struct callout, nd6_timer_ch);
diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c
index f6bae0c..e5c63b9 100644
--- a/sys/netinet6/nd6_rtr.c
+++ b/sys/netinet6/nd6_rtr.c
@@ -499,14 +499,16 @@ defrouter_addreq(struct nd_defrouter *new)
 struct nd_defrouter *
 defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
 {
-	struct nd_defrouter *dr;
+	struct nd_defrouter *dr = NULL;
 
+	ND_DEFRTR_RLOCK();
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
-			return (dr);
+			break;
 	}
+	ND_DEFRTR_RUNLOCK();
 
-	return (NULL);		/* search failed */
+	return (dr);		/* search failed */
 }
 
 /*
@@ -548,8 +550,10 @@ defrouter_reset(void)
 {
 	struct nd_defrouter *dr;
 
+	ND_DEFRTR_RLOCK();
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry)
 		defrouter_delreq(dr);
+	ND_DEFRTR_RUNLOCK();
 
 	/*
 	 * XXX should we also nuke any default routers in the kernel, by
@@ -574,11 +578,13 @@ defrtrlist_del(struct nd_defrouter *dr)
 		deldr = dr;
 		defrouter_delreq(dr);
 	}
+	ND_DEFRTR_WLOCK_ASSERT();
 	TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
 
 	/*
 	 * Also delete all the pointers to the router in each prefix lists.
 	 */
+	ND_PREFIX_RLOCK_ASSERT();
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		struct nd_pfxrouter *pfxrtr;
 		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
@@ -636,6 +642,7 @@ defrouter_select(void)
 	 * We just pick up the first reachable one (if any), assuming that
 	 * the ordering rule of the list described in defrtrlist_update().
 	 */
+	ND_DEFRTR_RLOCK();
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		IF_AFDATA_RLOCK(dr->ifp);
 		if (selected_dr == NULL &&
@@ -657,6 +664,8 @@ defrouter_select(void)
 			    " is installed\n");
 		}
 	}
+	ND_DEFRTR_RUNLOCK();
+
 	/*
 	 * If none of the default routers was found to be reachable,
 	 * round-robin the list regardless of preference.
@@ -758,7 +767,9 @@ defrtrlist_update(struct nd_defrouter *new)
 			 * defrouter_select() below will handle routing
 			 * changes later.
 			 */
+			ND_DEFRTR_WLOCK();
 			TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
+			ND_DEFRTR_WUNLOCK();
 			n = dr;
 			goto insert;
 		}
@@ -784,6 +795,7 @@ insert:
 	 */
 
 	/* insert at the end of the group */
+	ND_DEFRTR_WLOCK();
 	TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 		if (rtpref(n) > rtpref(dr))
 			break;
@@ -792,6 +804,7 @@ insert:
 		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
 	else
 		TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
+	ND_DEFRTR_WUNLOCK();
 
 	defrouter_select();
 
@@ -839,6 +852,7 @@ nd6_prefix_lookup(struct nd_prefixctl *key)
 {
 	struct nd_prefix *search;
 
+	ND_PREFIX_RLOCK();
 	LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) {
 		if (key->ndpr_ifp == search->ndpr_ifp &&
 		    key->ndpr_plen == search->ndpr_plen &&
@@ -847,6 +861,7 @@ nd6_prefix_lookup(struct nd_prefixctl *key)
 			break;
 		}
 	}
+	ND_PREFIX_RUNLOCK();
 
 	return (search);
 }
@@ -887,7 +902,9 @@ nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
 		    new->ndpr_mask.s6_addr32[i];
 
 	/* link ndpr_entry to nd_prefix list */
+	ND_PREFIX_WLOCK();
 	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
+	ND_PREFIX_WUNLOCK();
 
 	/* ND_OPT_PI_FLAG_ONLINK processing */
 	if (new->ndpr_raf_onlink) {
@@ -938,6 +955,7 @@ prelist_remove(struct nd_prefix *pr)
 		return;		/* notice here? */
 
 	/* unlink ndpr_entry from nd_prefix list */
+	ND_PREFIX_WLOCK_ASSERT();
 	LIST_REMOVE(pr, ndpr_entry);
 
 	/* free list of routers that adversed the prefix */
@@ -1339,6 +1357,8 @@ pfxlist_onlink_check()
 	 * Check if there is a prefix that has a reachable advertising
 	 * router.
 	 */
+	if (!ND_PREFIX_WLOCK_OWNED())
+		ND_PREFIX_RLOCK();
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
 			break;
@@ -1349,6 +1369,7 @@ pfxlist_onlink_check()
 	 * that does not advertise any prefixes.
 	 */
 	if (pr == NULL) {
+		ND_DEFRTR_RLOCK();
 		TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) {
 			struct nd_prefix *pr0;
 
@@ -1359,6 +1380,7 @@ pfxlist_onlink_check()
 			if (pfxrtr != NULL)
 				break;
 		}
+		ND_DEFRTR_RUNLOCK();
 	}
 	if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) {
 		/*
@@ -1454,6 +1476,8 @@ pfxlist_onlink_check()
 			}
 		}
 	}
+	if (!ND_PREFIX_WLOCK_OWNED())
+		ND_PREFIX_RUNLOCK();
 
 	/*
 	 * Changes on the prefix status might affect address status as well.
@@ -1618,6 +1642,7 @@ nd6_prefix_onlink(struct nd_prefix *pr)
 	 * Although such a configuration is expected to be rare, we explicitly
 	 * allow it.
 	 */
+	ND_PREFIX_RLOCK();
 	LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
 		if (opr == pr)
 			continue;
@@ -1627,9 +1652,12 @@ nd6_prefix_onlink(struct nd_prefix *pr)
 
 		if (opr->ndpr_plen == pr->ndpr_plen &&
 		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
-		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen))
+		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
+			ND_PREFIX_RUNLOCK();
 			return (0);
+		}
 	}
+	ND_PREFIX_RUNLOCK();
 
 	/*
 	 * We prefer link-local addresses as the associated interface address.
@@ -1730,6 +1758,7 @@ nd6_prefix_offlink(struct nd_prefix *pr)
 		 * If there's one, try to make the prefix on-link on the
 		 * interface.
 		 */
+		ND_PREFIX_LOCK_ASSERT();
 		LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
 			if (opr == pr)
 				continue;


More information about the freebsd-net mailing list