svn commit: r291853 - in head/sys: net netinet

Alexander V. Chernikov melifaro at FreeBSD.org
Sat Dec 5 09:50:39 UTC 2015


Author: melifaro
Date: Sat Dec  5 09:50:37 2015
New Revision: 291853
URL: https://svnweb.freebsd.org/changeset/base/291853

Log:
  Remove LLE read lock from IPv4 fast path.
  
  LLE structure is mostly unchanged during its lifecycle.
  To be more specific, there are 2 things relevant for fast path
    lookup code:
  1) link-level address change. Since r286722, these updates are performed
    under AFDATA WLOCK.
  2) Some sort of feedback indicating that this particular entry is used so
    we re-send arp request to perform reachability verification instead of
    expiring entry. The only signal that is needed from fast path is something
    like binary yes/no.
  
  The latter is solved by the following changes:
  1) introduce special r_skip_req field which is read lockless by fast path,
    but updated under (new) req_mutex mutex. If this field is non-zero, then
    fast path will acquire lock and set it back to 0.
  2) introduce simple state machine: incomplete->reachable<->verify->deleted.
    Before that we implicitely had incomplete->reachable->deleted state machine,
    with V_arpt_keep between "reachable" and "deleted". Verification was performed
    in runtime 5 seconds before V_arpt_keep expire.
    This is changed to "change state to verify 5 seconds before V_arpt_keep,
    set r_skip_req to non-zero value and check it every second". If the value
    is zero - then send arp verification probe.
  These changes do not introduce any signifficant control plane overhead:
    typically lle callout timer would fire 1 time more each V_arpt_keep (1200s)
    for used lles and up to arp_maxtries (5) for dead lles.
  
  As a result, all packets towards "reachable" lle are handled by fast path without
  acquiring lle read lock.
  
  Additional "req_mutex" is needed because callout / arpresolve_slow() or eventhandler
    might keep LLE lock for signifficant amount of time, which might not be feasible
    for fast path locking (e.g. having rmlock as ether AFDATA or lltable own lock).
  
  Differential Revision:	https://reviews.freebsd.org/D3688

Modified:
  head/sys/net/if_llatbl.c
  head/sys/net/if_llatbl.h
  head/sys/netinet/if_ether.c
  head/sys/netinet/in.c

Modified: head/sys/net/if_llatbl.c
==============================================================================
--- head/sys/net/if_llatbl.c	Sat Dec  5 09:32:36 2015	(r291852)
+++ head/sys/net/if_llatbl.c	Sat Dec  5 09:50:37 2015	(r291853)
@@ -284,6 +284,7 @@ lltable_set_entry_addr(struct ifnet *ifp
 
 	bcopy(lladdr, &lle->ll_addr, ifp->if_addrlen);
 	lle->la_flags |= LLE_VALID;
+	lle->r_flags |= RLLE_VALID;
 }
 
 /*
@@ -640,6 +641,7 @@ lla_rt_output(struct rt_msghdr *rtm, str
 		if ((rtm->rtm_flags & RTF_ANNOUNCE))
 			lle->la_flags |= LLE_PUB;
 		lle->la_flags |= LLE_VALID;
+		lle->r_flags |= RLLE_VALID;
 		lle->la_expire = rtm->rtm_rmx.rmx_expire;
 
 		laflags = lle->la_flags;

Modified: head/sys/net/if_llatbl.h
==============================================================================
--- head/sys/net/if_llatbl.h	Sat Dec  5 09:32:36 2015	(r291852)
+++ head/sys/net/if_llatbl.h	Sat Dec  5 09:50:37 2015	(r291853)
@@ -63,7 +63,8 @@ struct llentry {
 		uint16_t	mac16[3];
 		uint8_t		mac8[20];	/* IB needs 20 bytes. */
 	} ll_addr;
-	uint32_t		spare0;
+	uint16_t		r_flags;	/* LLE runtime flags */
+	uint16_t		r_skip_req;	/* feedback from fast path */
 	uint64_t		spare1;
 
 	struct lltable		 *lle_tbl;
@@ -83,6 +84,7 @@ struct llentry {
 	LIST_ENTRY(llentry)	lle_chain;	/* chain of deleted items */
 	struct callout		lle_timer;
 	struct rwlock		 lle_lock;
+	struct mtx		req_mtx;
 };
 
 #define	LLE_WLOCK(lle)		rw_wlock(&(lle)->lle_lock)
@@ -95,6 +97,12 @@ struct llentry {
 #define	LLE_LOCK_DESTROY(lle)	rw_destroy(&(lle)->lle_lock)
 #define	LLE_WLOCK_ASSERT(lle)	rw_assert(&(lle)->lle_lock, RA_WLOCKED)
 
+#define	LLE_REQ_INIT(lle)	mtx_init(&(lle)->req_mtx, "lle req", \
+	NULL, MTX_DEF)
+#define	LLE_REQ_DESTROY(lle)	mtx_destroy(&(lle)->req_mtx)
+#define	LLE_REQ_LOCK(lle)	mtx_lock(&(lle)->req_mtx)
+#define	LLE_REQ_UNLOCK(lle)	mtx_unlock(&(lle)->req_mtx)
+
 #define LLE_IS_VALID(lle)	(((lle) != NULL) && ((lle) != (void *)-1))
 
 #define	LLE_ADDREF(lle) do {					\
@@ -187,6 +195,11 @@ MALLOC_DECLARE(M_LLTABLE);
 #define	LLE_LINKED	0x0040	/* linked to lookup structure */
 /* LLE request flags */
 #define	LLE_EXCLUSIVE	0x2000	/* return lle xlocked  */
+#define	LLE_UNLOCKED	0x4000	/* return lle unlocked */
+
+/* LLE flags used by fastpath code */
+#define	RLLE_VALID	0x0001		/* entry is valid */
+#define	RLLE_IFADDR	LLE_IFADDR	/* entry is ifaddr */
 
 #define LLATBL_HASH(key, mask) \
 	(((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)

Modified: head/sys/netinet/if_ether.c
==============================================================================
--- head/sys/netinet/if_ether.c	Sat Dec  5 09:32:36 2015	(r291852)
+++ head/sys/netinet/if_ether.c	Sat Dec  5 09:50:37 2015	(r291853)
@@ -78,6 +78,14 @@ static struct timeval arp_lastlog;
 static int arp_curpps;
 static int arp_maxpps = 1;
 
+/* Simple ARP state machine */
+enum arp_llinfo_state {
+	ARP_LLINFO_INCOMPLETE = 0, /* No LLE data */
+	ARP_LLINFO_REACHABLE,	/* LLE is valid */
+	ARP_LLINFO_VERIFY,	/* LLE is valid, need refresh */
+	ARP_LLINFO_DELETED,	/* LLE is deleted */
+};
+
 SYSCTL_DECL(_net_link_ether);
 static SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
 static SYSCTL_NODE(_net_link_ether, PF_ARP, arp, CTLFLAG_RW, 0, "");
@@ -89,6 +97,7 @@ static VNET_DEFINE(int, arp_maxtries) = 
 static VNET_DEFINE(int, arp_proxyall) = 0;
 static VNET_DEFINE(int, arpt_down) = 20;	/* keep incomplete entries for
 						 * 20 seconds */
+static VNET_DEFINE(int, arpt_rexmit) = 1;	/* retransmit arp entries, sec*/
 VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat);  /* ARP statistics, see if_arp.h */
 VNET_PCPUSTAT_SYSINIT(arpstat);
 
@@ -100,6 +109,7 @@ static VNET_DEFINE(int, arp_maxhold) = 1
 
 #define	V_arpt_keep		VNET(arpt_keep)
 #define	V_arpt_down		VNET(arpt_down)
+#define	V_arpt_rexmit		VNET(arpt_rexmit)
 #define	V_arp_maxtries		VNET(arp_maxtries)
 #define	V_arp_proxyall		VNET(arp_proxyall)
 #define	V_arp_maxhold		VNET(arp_maxhold)
@@ -161,6 +171,7 @@ arptimer(void *arg)
 {
 	struct llentry *lle = (struct llentry *)arg;
 	struct ifnet *ifp;
+	int r_skip_req;
 
 	if (lle->la_flags & LLE_STATIC) {
 		return;
@@ -188,6 +199,55 @@ arptimer(void *arg)
 	ifp = lle->lle_tbl->llt_ifp;
 	CURVNET_SET(ifp->if_vnet);
 
+	switch (lle->ln_state) {
+	case ARP_LLINFO_REACHABLE:
+
+		/*
+		 * Expiration time is approaching.
+		 * Let's try to refresh entry if it is still
+		 * in use.
+		 *
+		 * Set r_skip_req to get feedback from
+		 * fast path. Change state and re-schedule
+		 * ourselves.
+		 */
+		LLE_REQ_LOCK(lle);
+		lle->r_skip_req = 1;
+		LLE_REQ_UNLOCK(lle);
+		lle->ln_state = ARP_LLINFO_VERIFY;
+		callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
+		LLE_WUNLOCK(lle);
+		CURVNET_RESTORE();
+		return;
+	case ARP_LLINFO_VERIFY:
+		LLE_REQ_LOCK(lle);
+		r_skip_req = lle->r_skip_req;
+		LLE_REQ_UNLOCK(lle);
+
+		if (r_skip_req == 0 && lle->la_preempt > 0) {
+			/* Entry was used, issue refresh request */
+			struct in_addr dst;
+			dst = lle->r_l3addr.addr4;
+			lle->la_preempt--;
+			callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
+			LLE_WUNLOCK(lle);
+			arprequest(ifp, NULL, &dst, NULL);
+			CURVNET_RESTORE();
+			return;
+		}
+		/* Nothing happened. Reschedule if not too late */
+		if (lle->la_expire > time_uptime) {
+			callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
+			LLE_WUNLOCK(lle);
+			CURVNET_RESTORE();
+			return;
+		}
+		break;
+	case ARP_LLINFO_INCOMPLETE:
+	case ARP_LLINFO_DELETED:
+		break;
+	}
+
 	if ((lle->la_flags & LLE_DELETED) == 0) {
 		int evt;
 
@@ -353,26 +413,16 @@ arpresolve_full(struct ifnet *ifp, int i
 	if ((la->la_flags & LLE_VALID) &&
 	    ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
 		bcopy(&la->ll_addr, desten, ifp->if_addrlen);
-		renew = 0;
-		/*
-		 * If entry has an expiry time and it is approaching,
-		 * see if we need to send an ARP request within this
-		 * arpt_down interval.
-		 */
-		if (!(la->la_flags & LLE_STATIC) &&
-		    time_uptime + la->la_preempt > la->la_expire) {
-			renew = 1;
-			la->la_preempt--;
-		}
 
+		/* Check if we have feedback request from arptimer() */
+		if (la->r_skip_req != 0) {
+			LLE_REQ_LOCK(la);
+			la->r_skip_req = 0; /* Notify that entry was used */
+			LLE_REQ_UNLOCK(la);
+		}
 		if (pflags != NULL)
-			*pflags = la->la_flags;
-
+			*pflags = la->la_flags & (LLE_VALID|LLE_IFADDR);
 		LLE_WUNLOCK(la);
-
-		if (renew == 1)
-			arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
-
 		return (0);
 	}
 
@@ -440,7 +490,7 @@ arpresolve_full(struct ifnet *ifp, int i
  *    m is the mbuf. May be NULL if we don't have a packet.
  *    dst is the next hop,
  *    desten is the storage to put LL address.
- *    flags returns lle entry flags.
+ *    flags returns subset of lle flags: LLE_VALID | LLE_IFADDR
  *
  * On success, desten and flags are filled in and the function returns 0;
  * If the packet must be held pending resolution, we return EWOULDBLOCK
@@ -452,7 +502,6 @@ arpresolve(struct ifnet *ifp, int is_gw,
 	const struct sockaddr *dst, u_char *desten, uint32_t *pflags)
 {
 	struct llentry *la = 0;
-	int renew;
 
 	if (pflags != NULL)
 		*pflags = 0;
@@ -472,40 +521,24 @@ arpresolve(struct ifnet *ifp, int is_gw,
 	}
 
 	IF_AFDATA_RLOCK(ifp);
-	la = lla_lookup(LLTABLE(ifp), 0, dst);
-	IF_AFDATA_RUNLOCK(ifp);
-
-	if (la == NULL)
-		return (arpresolve_full(ifp, is_gw, 1, m, dst, desten, pflags));
-
-	if ((la->la_flags & LLE_VALID) &&
-	    ((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
+	la = lla_lookup(LLTABLE(ifp), LLE_UNLOCKED, dst);
+	if (la != NULL && (la->r_flags & RLLE_VALID) != 0) {
+		/* Entry found, let's copy lle info */
 		bcopy(&la->ll_addr, desten, ifp->if_addrlen);
-		renew = 0;
-		/*
-		 * If entry has an expiry time and it is approaching,
-		 * see if we need to send an ARP request within this
-		 * arpt_down interval.
-		 */
-		if (!(la->la_flags & LLE_STATIC) &&
-		    time_uptime + la->la_preempt > la->la_expire) {
-			renew = 1;
-			la->la_preempt--;
-		}
-
 		if (pflags != NULL)
-			*pflags = la->la_flags;
-
-		LLE_RUNLOCK(la);
-
-		if (renew == 1)
-			arprequest(ifp, NULL, &SIN(dst)->sin_addr, NULL);
-
+			*pflags = LLE_VALID | (la->r_flags & RLLE_IFADDR);
+		/* Check if we have feedback request from arptimer() */
+		if (la->r_skip_req != 0) {
+			LLE_REQ_LOCK(la);
+			la->r_skip_req = 0; /* Notify that entry was used */
+			LLE_REQ_UNLOCK(la);
+		}
+		IF_AFDATA_RUNLOCK(ifp);
 		return (0);
 	}
-	LLE_RUNLOCK(la);
+	IF_AFDATA_RUNLOCK(ifp);
 
-	return (arpresolve_full(ifp, is_gw, 0, m, dst, desten, pflags));
+	return (arpresolve_full(ifp, is_gw, 1, m, dst, desten, pflags));
 }
 
 /*
@@ -1042,6 +1075,9 @@ arp_check_update_lle(struct arphdr *ah, 
 
 		IF_AFDATA_WUNLOCK(ifp);
 		LLE_REMREF(la);
+
+		/* Clear fast path feedback request if set */
+		la->r_skip_req = 0;
 	}
 
 	arp_mark_lle_reachable(la);
@@ -1073,17 +1109,21 @@ arp_check_update_lle(struct arphdr *ah, 
 static void
 arp_mark_lle_reachable(struct llentry *la)
 {
-	int canceled;
+	int canceled, wtime;
 
 	LLE_WLOCK_ASSERT(la);
 
+	la->ln_state = ARP_LLINFO_REACHABLE;
 	EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
 
 	if (!(la->la_flags & LLE_STATIC)) {
 		LLE_ADDREF(la);
 		la->la_expire = time_uptime + V_arpt_keep;
+		wtime = V_arpt_keep - V_arp_maxtries * V_arpt_rexmit;
+		if (wtime < 0)
+			wtime = V_arpt_keep;
 		canceled = callout_reset(&la->lle_timer,
-		    hz * V_arpt_keep, arptimer, la);
+		    hz * wtime, arptimer, la);
 		if (canceled)
 			LLE_REMREF(la);
 	}

Modified: head/sys/netinet/in.c
==============================================================================
--- head/sys/netinet/in.c	Sat Dec  5 09:32:36 2015	(r291852)
+++ head/sys/netinet/in.c	Sat Dec  5 09:50:37 2015	(r291853)
@@ -1013,6 +1013,7 @@ in_lltable_destroy_lle(struct llentry *l
 
 	LLE_WUNLOCK(lle);
 	LLE_LOCK_DESTROY(lle);
+	LLE_REQ_DESTROY(lle);
 	free(lle, M_LLTABLE);
 }
 
@@ -1034,6 +1035,7 @@ in_lltable_new(struct in_addr addr4, u_i
 	lle->base.lle_refcnt = 1;
 	lle->base.lle_free = in_lltable_destroy_lle;
 	LLE_LOCK_INIT(&lle->base);
+	LLE_REQ_INIT(&lle->base);
 	callout_init(&lle->base.lle_timer, 1);
 
 	return (&lle->base);
@@ -1257,9 +1259,12 @@ in_lltable_alloc(struct lltable *llt, u_
 		return (NULL);
 	}
 	lle->la_flags = flags;
+	if (flags & LLE_STATIC)
+		lle->r_flags |= RLLE_VALID;
 	if ((flags & LLE_IFADDR) == LLE_IFADDR) {
 		lltable_set_entry_addr(ifp, lle, IF_LLADDR(ifp));
 		lle->la_flags |= LLE_STATIC;
+		lle->r_flags |= (RLLE_VALID | RLLE_IFADDR);
 	}
 
 	return (lle);
@@ -1283,6 +1288,13 @@ in_lltable_lookup(struct lltable *llt, u
 	if (lle == NULL)
 		return (NULL);
 
+	KASSERT((flags & (LLE_UNLOCKED|LLE_EXCLUSIVE)) !=
+	    (LLE_UNLOCKED|LLE_EXCLUSIVE),("wrong lle request flags: 0x%X",
+	    flags));
+
+	if (flags & LLE_UNLOCKED)
+		return (lle);
+
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WLOCK(lle);
 	else


More information about the svn-src-head mailing list