svn commit: r261823 - head/sys/net
Gleb Smirnoff
glebius at FreeBSD.org
Thu Feb 13 04:59:19 UTC 2014
Author: glebius
Date: Thu Feb 13 04:59:18 2014
New Revision: 261823
URL: http://svnweb.freebsd.org/changeset/base/261823
Log:
o Axe non-pcpu flowtable implementation. It wasn't enabled or used,
and probably is a leftover from first prototyping by Kip. The
non-pcpu implementation used mutexes, so it doubtfully worked
better than simple routing lookup.
o Use UMA_ZONE_PCPU zone for pointers instead of [MAXCPU] arrays,
use zpcpu_get() to access data in there.
o Substitute own single list implementation with SLIST(). This
has two functional side effects:
- new flows go into head of a list, before they went to tail.
- a bug when incorrect flow was deleted in flow cleaner is
fixed.
o Due to cache line alignment, there is no reason to keep
different zones for IPv4 and IPv6 flows. Both consume one
cache line, real size of allocation is equal.
o Rely on that f_hash, f_rt, f_lle are stable during fle
lifetime, remove useless volatile quilifiers.
o More INET/INET6 splitting.
Reviewed by: adrian
Sponsored by: Netflix
Sponsored by: Nginx, Inc.
Modified:
head/sys/net/flowtable.c
head/sys/net/flowtable.h
Modified: head/sys/net/flowtable.c
==============================================================================
--- head/sys/net/flowtable.c Thu Feb 13 04:55:46 2014 (r261822)
+++ head/sys/net/flowtable.c Thu Feb 13 04:59:18 2014 (r261823)
@@ -47,13 +47,16 @@ __FBSDID("$FreeBSD$");
#include <sys/limits.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
+#include <sys/pcpu.h>
#include <sys/proc.h>
+#include <sys/queue.h>
#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
+#include <vm/uma.h>
#include <net/if.h>
#include <net/if_llatbl.h>
@@ -76,6 +79,7 @@ __FBSDID("$FreeBSD$");
#include <ddb/ddb.h>
+#ifdef INET
struct ipv4_tuple {
uint16_t ip_sport; /* source port */
uint16_t ip_dport; /* destination port */
@@ -87,7 +91,9 @@ union ipv4_flow {
struct ipv4_tuple ipf_ipt;
uint32_t ipf_key[3];
};
+#endif
+#ifdef INET6
struct ipv6_tuple {
uint16_t ip_sport; /* source port */
uint16_t ip_dport; /* destination port */
@@ -99,28 +105,44 @@ union ipv6_flow {
struct ipv6_tuple ipf_ipt;
uint32_t ipf_key[9];
};
+#endif
struct flentry {
- volatile uint32_t f_fhash; /* hash flowing forward */
+ uint32_t f_fhash; /* hash flowing forward */
uint16_t f_flags; /* flow flags */
uint8_t f_pad;
uint8_t f_proto; /* protocol */
uint32_t f_fibnum; /* fib index */
uint32_t f_uptime; /* uptime at last access */
- struct flentry *f_next; /* pointer to collision entry */
- volatile struct rtentry *f_rt; /* rtentry for flow */
- volatile struct llentry *f_lle; /* llentry for flow */
+ SLIST_ENTRY(flentry) f_next; /* pointer to collision entry */
+ struct rtentry *f_rt; /* rtentry for flow */
+ struct llentry *f_lle; /* llentry for flow */
+ union {
+#ifdef INET
+ union ipv4_flow v4;
+#endif
+#ifdef INET6
+ union ipv6_flow v6;
+#endif
+ } f_flow;
+#define f_flow4 f_flow.v4
+#define f_flow6 f_flow.v6
};
+#define KEYLEN(flags) ((((flags) & FL_IPV6) ? 9 : 3) * 4)
-struct flentry_v4 {
- struct flentry fl_entry;
- union ipv4_flow fl_flow;
-};
+/* Make sure f_flow begins with key. */
+#ifdef INET
+CTASSERT(offsetof(struct flentry, f_flow) ==
+ offsetof(struct flentry, f_flow4.ipf_key));
+#endif
+#ifdef INET6
+CTASSERT(offsetof(struct flentry, f_flow) ==
+ offsetof(struct flentry, f_flow6.ipf_key));
+#endif
-struct flentry_v6 {
- struct flentry fl_entry;
- union ipv6_flow fl_flow;
-};
+SLIST_HEAD(flist, flentry);
+/* Make sure we can use pcpu_zone_ptr for struct flist. */
+CTASSERT(sizeof(struct flist) == sizeof(void *));
#define SECS_PER_HOUR 3600
#define SECS_PER_DAY (24*SECS_PER_HOUR)
@@ -130,37 +152,28 @@ struct flentry_v6 {
#define FIN_WAIT_IDLE 600
#define TCP_IDLE SECS_PER_DAY
-
-typedef void fl_lock_t(struct flowtable *, uint32_t);
-
-union flentryp {
- struct flentry **global;
- struct flentry **pcpu[MAXCPU];
-};
-
struct flowtable {
counter_u64_t *ft_stat;
- uma_zone_t ft_zone;
int ft_size;
- int ft_lock_count;
uint32_t ft_flags;
uint32_t ft_max_depth;
- fl_lock_t *ft_lock;
- fl_lock_t *ft_unlock;
+
/*
- * XXX need to pad out
+ * ft_table is a malloc(9)ed array of pointers. Pointers point to
+ * memory from UMA_ZONE_PCPU zone.
+ * ft_masks is per-cpu pointer itself. Each instance points
+ * to a malloc(9)ed bitset, that is private to corresponding CPU.
*/
- struct mtx *ft_locks;
- union flentryp ft_table;
- bitstr_t *ft_masks[MAXCPU];
+ struct flist **ft_table;
+ bitstr_t **ft_masks;
bitstr_t *ft_tmpmask;
- uint32_t ft_udp_idle __aligned(CACHE_LINE_SIZE);
+ uint32_t ft_udp_idle;
uint32_t ft_fin_wait_idle;
uint32_t ft_syn_idle;
uint32_t ft_tcp_idle;
boolean_t ft_full;
-} __aligned(CACHE_LINE_SIZE);
+};
#define FLOWSTAT_ADD(ft, name, v) \
counter_u64_add((ft)->ft_stat[offsetof(struct flowtable_stat, name) / sizeof(uint64_t)], (v))
@@ -190,15 +203,15 @@ static uint32_t flowclean_freq;
*/
#ifdef INET
static VNET_DEFINE(struct flowtable, ip4_ft);
-#define V_ip4_ft VNET(ip4_ft)
-static uma_zone_t flow_ipv4_zone;
+#define V_ip4_ft VNET(ip4_ft)
#endif
#ifdef INET6
static VNET_DEFINE(struct flowtable, ip6_ft);
#define V_ip6_ft VNET(ip6_ft)
-static uma_zone_t flow_ipv6_zone;
#endif
+static uma_zone_t flow_zone;
+
static VNET_DEFINE(int, flowtable_enable) = 1;
static VNET_DEFINE(int, flowtable_syn_expire) = SYN_IDLE;
static VNET_DEFINE(int, flowtable_udp_expire) = UDP_IDLE;
@@ -215,6 +228,8 @@ static SYSCTL_NODE(_net, OID_AUTO, flowt
"flowtable");
SYSCTL_VNET_INT(_net_flowtable, OID_AUTO, enable, CTLFLAG_RW,
&VNET_NAME(flowtable_enable), 0, "enable flowtable caching.");
+SYSCTL_UMA_MAX(_net_flowtable, OID_AUTO, maxflows, CTLFLAG_RW,
+ &flow_zone, "Maximum number of flows allowed");
/*
* XXX This does not end up updating timeouts at runtime
@@ -233,43 +248,10 @@ SYSCTL_VNET_INT(_net_flowtable, OID_AUTO
&VNET_NAME(flowtable_tcp_expire), 0,
"seconds after which to remove flow allocated to a TCP connection.");
-static void
-flowtable_global_lock(struct flowtable *table, uint32_t hash)
-{
- int lock_index = (hash)&(table->ft_lock_count - 1);
-
- mtx_lock(&table->ft_locks[lock_index]);
-}
-
-static void
-flowtable_global_unlock(struct flowtable *table, uint32_t hash)
-{
- int lock_index = (hash)&(table->ft_lock_count - 1);
-
- mtx_unlock(&table->ft_locks[lock_index]);
-}
-
-static void
-flowtable_pcpu_lock(struct flowtable *table, uint32_t hash)
-{
-
- critical_enter();
-}
-
-static void
-flowtable_pcpu_unlock(struct flowtable *table, uint32_t hash)
-{
-
- critical_exit();
-}
-
-#define FL_ENTRY_INDEX(table, hash)((hash) % (table)->ft_size)
-#define FL_ENTRY(table, hash) *flowtable_entry((table), (hash))
-#define FL_ENTRY_LOCK(table, hash) (table)->ft_lock((table), (hash))
-#define FL_ENTRY_UNLOCK(table, hash) (table)->ft_unlock((table), (hash))
-
#define FL_STALE (1<<8)
+static MALLOC_DEFINE(M_FTABLE, "flowtable", "flowtable hashes and bitstrings");
+
static struct flentry *flowtable_lookup_common(struct flowtable *,
struct sockaddr_storage *, struct sockaddr_storage *, struct mbuf *, int);
@@ -320,27 +302,6 @@ flags_to_proto(int flags)
}
#ifdef INET
-#ifdef FLOWTABLE_DEBUG
-static void
-ipv4_flow_print_tuple(int flags, int proto, struct sockaddr_in *ssin,
- struct sockaddr_in *dsin)
-{
- char saddr[4*sizeof "123"], daddr[4*sizeof "123"];
-
- if (flags & FL_HASH_ALL) {
- inet_ntoa_r(ssin->sin_addr, saddr);
- inet_ntoa_r(dsin->sin_addr, daddr);
- printf("proto=%d %s:%d->%s:%d\n",
- proto, saddr, ntohs(ssin->sin_port), daddr,
- ntohs(dsin->sin_port));
- } else {
- inet_ntoa_r(*(struct in_addr *) &dsin->sin_addr, daddr);
- printf("proto=%d %s\n", proto, daddr);
- }
-
-}
-#endif
-
static int
ipv4_mbuf_demarshal(struct mbuf *m, struct sockaddr_in *ssin,
struct sockaddr_in *dsin, uint16_t *flags)
@@ -456,10 +417,10 @@ flow_to_route(struct flentry *fle, struc
sin = (struct sockaddr_in *)&ro->ro_dst;
sin->sin_family = AF_INET;
sin->sin_len = sizeof(*sin);
- hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
+ hashkey = fle->f_flow4.ipf_key;
sin->sin_addr.s_addr = hashkey[2];
- ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
- ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
+ ro->ro_rt = fle->f_rt;
+ ro->ro_lle = fle->f_lle;
ro->ro_flags |= RT_NORTREF;
}
#endif /* INET */
@@ -661,10 +622,10 @@ flow_to_route_in6(struct flentry *fle, s
sin6->sin6_family = AF_INET6;
sin6->sin6_len = sizeof(*sin6);
- hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
+ hashkey = fle->f_flow6.ipf_key;
memcpy(&sin6->sin6_addr, &hashkey[5], sizeof (struct in6_addr));
- ro->ro_rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
- ro->ro_lle = __DEVOLATILE(struct llentry *, fle->f_lle);
+ ro->ro_rt = fle->f_rt;
+ ro->ro_lle = fle->f_lle;
ro->ro_flags |= RT_NORTREF;
}
#endif /* INET6 */
@@ -672,31 +633,24 @@ flow_to_route_in6(struct flentry *fle, s
static bitstr_t *
flowtable_mask(struct flowtable *ft)
{
- bitstr_t *mask;
- if (ft->ft_flags & FL_PCPU)
- mask = ft->ft_masks[curcpu];
- else
- mask = ft->ft_masks[0];
+ /*
+ * flowtable_free_stale() calls w/o critical section, but
+ * with sched_bind(). Since pointer is stable throughout
+ * ft lifetime, it is safe, otherwise...
+ *
+ * CRITICAL_ASSERT(curthread);
+ */
- return (mask);
+ return (*(bitstr_t **)zpcpu_get(ft->ft_masks));
}
-static struct flentry **
-flowtable_entry(struct flowtable *ft, uint32_t hash)
+static struct flist *
+flowtable_list(struct flowtable *ft, uint32_t hash)
{
- struct flentry **fle;
- int index = (hash % ft->ft_size);
- if (ft->ft_flags & FL_PCPU) {
- KASSERT(&ft->ft_table.pcpu[curcpu][0] != NULL, ("pcpu not set"));
- fle = &ft->ft_table.pcpu[curcpu][index];
- } else {
- KASSERT(&ft->ft_table.global[0] != NULL, ("global not set"));
- fle = &ft->ft_table.global[index];
- }
-
- return (fle);
+ CRITICAL_ASSERT(curthread);
+ return (zpcpu_get(ft->ft_table[hash % ft->ft_size]));
}
static int
@@ -730,24 +684,6 @@ flow_stale(struct flowtable *ft, struct
return (0);
}
-static void
-flowtable_set_hashkey(struct flentry *fle, uint32_t *key)
-{
- uint32_t *hashkey;
- int i, nwords;
-
- if (fle->f_flags & FL_IPV6) {
- nwords = 9;
- hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
- } else {
- nwords = 3;
- hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
- }
-
- for (i = 0; i < nwords; i++)
- hashkey[i] = key[i];
-}
-
static int
flow_full(struct flowtable *ft)
{
@@ -755,8 +691,8 @@ flow_full(struct flowtable *ft)
int count, max;
full = ft->ft_full;
- count = uma_zone_get_cur(ft->ft_zone);
- max = uma_zone_get_max(ft->ft_zone);
+ count = uma_zone_get_cur(flow_zone);
+ max = uma_zone_get_max(flow_zone);
if (full && (count < (max - (max >> 3))))
ft->ft_full = FALSE;
@@ -783,26 +719,31 @@ static int
flowtable_insert(struct flowtable *ft, uint32_t hash, uint32_t *key,
uint32_t fibnum, struct route *ro, uint16_t flags)
{
- struct flentry *fle, *fletail, *newfle, **flep;
+ struct flist *flist;
+ struct flentry *fle, *iter;
int depth;
bitstr_t *mask;
- uint8_t proto;
- newfle = uma_zalloc(ft->ft_zone, M_NOWAIT | M_ZERO);
- if (newfle == NULL)
+ fle = uma_zalloc(flow_zone, M_NOWAIT | M_ZERO);
+ if (fle == NULL)
return (ENOMEM);
- newfle->f_flags |= (flags & FL_IPV6);
- proto = flags_to_proto(flags);
+ bcopy(key, &fle->f_flow, KEYLEN(flags));
+ fle->f_flags |= (flags & FL_IPV6);
+ fle->f_proto = flags_to_proto(flags);
+ fle->f_rt = ro->ro_rt;
+ fle->f_lle = ro->ro_lle;
+ fle->f_fhash = hash;
+ fle->f_fibnum = fibnum;
+ fle->f_uptime = time_uptime;
- FL_ENTRY_LOCK(ft, hash);
+ critical_enter();
mask = flowtable_mask(ft);
- flep = flowtable_entry(ft, hash);
- fletail = fle = *flep;
+ flist = flowtable_list(ft, hash);
- if (fle == NULL) {
- bit_set(mask, FL_ENTRY_INDEX(ft, hash));
- *flep = fle = newfle;
+ if (SLIST_EMPTY(flist)) {
+ bit_set(mask, (hash % ft->ft_size));
+ SLIST_INSERT_HEAD(flist, fle, f_next);
goto skip;
}
@@ -812,65 +753,30 @@ flowtable_insert(struct flowtable *ft, u
* find end of list and make sure that we were not
* preempted by another thread handling this flow
*/
- while (fle != NULL) {
- if (fle->f_fhash == hash && !flow_stale(ft, fle)) {
+ SLIST_FOREACH(iter, flist, f_next) {
+ if (iter->f_fhash == hash && !flow_stale(ft, iter)) {
/*
* there was either a hash collision
* or we lost a race to insert
*/
- FL_ENTRY_UNLOCK(ft, hash);
- uma_zfree(ft->ft_zone, newfle);
+ critical_exit();
+ uma_zfree(flow_zone, fle);
return (EEXIST);
}
- /*
- * re-visit this double condition XXX
- */
- if (fletail->f_next != NULL)
- fletail = fle->f_next;
-
depth++;
- fle = fle->f_next;
}
if (depth > ft->ft_max_depth)
ft->ft_max_depth = depth;
- fletail->f_next = newfle;
- fle = newfle;
+
+ SLIST_INSERT_HEAD(flist, fle, f_next);
skip:
- flowtable_set_hashkey(fle, key);
+ critical_exit();
- fle->f_proto = proto;
- fle->f_rt = ro->ro_rt;
- fle->f_lle = ro->ro_lle;
- fle->f_fhash = hash;
- fle->f_fibnum = fibnum;
- fle->f_uptime = time_uptime;
- FL_ENTRY_UNLOCK(ft, hash);
return (0);
}
-static int
-flowtable_key_equal(struct flentry *fle, uint32_t *key)
-{
- uint32_t *hashkey;
- int i, nwords;
-
- if (fle->f_flags & FL_IPV6) {
- nwords = 9;
- hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
- } else {
- nwords = 3;
- hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
- }
-
- for (i = 0; i < nwords; i++)
- if (hashkey[i] != key[i])
- return (0);
-
- return (1);
-}
-
struct flentry *
flowtable_lookup(sa_family_t sa, struct mbuf *m)
{
@@ -895,6 +801,7 @@ flowtable_lookup_common(struct flowtable
{
struct route_in6 sro6;
struct route sro, *ro;
+ struct flist *flist;
struct flentry *fle;
struct rtentry *rt;
struct llentry *lle;
@@ -974,34 +881,24 @@ flowtable_lookup_common(struct flowtable
return (NULL);
FLOWSTAT_INC(ft, ft_lookups);
- FL_ENTRY_LOCK(ft, hash);
- if ((fle = FL_ENTRY(ft, hash)) == NULL) {
- FL_ENTRY_UNLOCK(ft, hash);
- goto uncached;
- }
-keycheck:
- rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
- lle = __DEVOLATILE(struct llentry *, fle->f_lle);
- if ((rt != NULL)
- && lle != NULL
- && fle->f_fhash == hash
- && flowtable_key_equal(fle, key)
- && (proto == fle->f_proto)
- && (fibnum == fle->f_fibnum)
- && (rt->rt_flags & RTF_UP)
- && (rt->rt_ifp != NULL)
- && (lle->la_flags & LLE_VALID)) {
- FLOWSTAT_INC(ft, ft_hits);
- fle->f_uptime = time_uptime;
- fle->f_flags |= flags;
- FL_ENTRY_UNLOCK(ft, hash);
- goto success;
- } else if (fle->f_next != NULL) {
- fle = fle->f_next;
- goto keycheck;
- }
- FL_ENTRY_UNLOCK(ft, hash);
-uncached:
+
+ critical_enter();
+ flist = flowtable_list(ft, hash);
+ SLIST_FOREACH(fle, flist, f_next)
+ if (fle->f_fhash == hash && bcmp(&fle->f_flow, key,
+ KEYLEN(fle->f_flags)) == 0 &&
+ proto == fle->f_proto && fibnum == fle->f_fibnum &&
+ (fle->f_rt->rt_flags & RTF_UP) &&
+ fle->f_rt->rt_ifp != NULL &&
+ (fle->f_lle->la_flags & LLE_VALID)) {
+ fle->f_uptime = time_uptime;
+ fle->f_flags |= flags;
+ critical_exit();
+ FLOWSTAT_INC(ft, ft_hits);
+ goto success;
+ }
+ critical_exit();
+
if (flags & FL_NOAUTO || flow_full(ft))
return (NULL);
@@ -1088,38 +985,22 @@ success:
/*
* used by the bit_alloc macro
*/
-#define calloc(count, size) malloc((count)*(size), M_DEVBUF, M_WAITOK|M_ZERO)
-
+#define calloc(count, size) malloc((count)*(size), M_FTABLE, M_WAITOK | M_ZERO)
static void
flowtable_alloc(struct flowtable *ft)
{
- if (ft->ft_flags & FL_PCPU) {
- ft->ft_lock = flowtable_pcpu_lock;
- ft->ft_unlock = flowtable_pcpu_unlock;
-
- for (int i = 0; i <= mp_maxid; i++) {
- ft->ft_table.pcpu[i] =
- malloc(ft->ft_size * sizeof(struct flentry *),
- M_RTABLE, M_WAITOK | M_ZERO);
- ft->ft_masks[i] = bit_alloc(ft->ft_size);
- }
- } else {
- ft->ft_lock_count = 2*(powerof2(mp_maxid + 1) ? (mp_maxid + 1):
- (fls(mp_maxid + 1) << 1));
-
- ft->ft_lock = flowtable_global_lock;
- ft->ft_unlock = flowtable_global_unlock;
- ft->ft_table.global =
- malloc(ft->ft_size * sizeof(struct flentry *),
- M_RTABLE, M_WAITOK | M_ZERO);
- ft->ft_locks = malloc(ft->ft_lock_count*sizeof(struct mtx),
- M_RTABLE, M_WAITOK | M_ZERO);
- for (int i = 0; i < ft->ft_lock_count; i++)
- mtx_init(&ft->ft_locks[i], "flow", NULL,
- MTX_DEF | MTX_DUPOK);
+ ft->ft_table = malloc(ft->ft_size * sizeof(struct flist),
+ M_FTABLE, M_WAITOK);
+ for (int i = 0; i < ft->ft_size; i++)
+ ft->ft_table[i] = uma_zalloc(pcpu_zone_ptr, M_WAITOK | M_ZERO);
+
+ ft->ft_masks = uma_zalloc(pcpu_zone_ptr, M_WAITOK);
+ for (int i = 0; i < mp_ncpus; i++) {
+ bitstr_t **b;
- ft->ft_masks[0] = bit_alloc(ft->ft_size);
+ b = zpcpu_get_cpu(ft->ft_masks, i);
+ *b = bit_alloc(ft->ft_size);
}
ft->ft_tmpmask = bit_alloc(ft->ft_size);
@@ -1139,41 +1020,22 @@ flowtable_alloc(struct flowtable *ft)
}
}
-
-/*
- * The rest of the code is devoted to garbage collection of expired entries.
- * It is a new additon made necessary by the switch to dynamically allocating
- * flow tables.
- *
- */
-static void
-fle_free(struct flentry *fle, struct flowtable *ft)
-{
- struct rtentry *rt;
- struct llentry *lle;
-
- rt = __DEVOLATILE(struct rtentry *, fle->f_rt);
- lle = __DEVOLATILE(struct llentry *, fle->f_lle);
- if (rt != NULL)
- RTFREE(rt);
- if (lle != NULL)
- LLE_FREE(lle);
- uma_zfree(ft->ft_zone, fle);
-}
+#undef calloc
static void
flowtable_free_stale(struct flowtable *ft, struct rtentry *rt)
{
- int curbit = 0, tmpsize;
- struct flentry *fle, **flehead, *fleprev;
- struct flentry *flefreehead, *flefreetail, *fletmp;
+ struct flist *flist, freelist;
+ struct flentry *fle, *fle1, *fleprev;
bitstr_t *mask, *tmpmask;
+ int curbit, tmpsize;
- flefreehead = flefreetail = NULL;
+ SLIST_INIT(&freelist);
mask = flowtable_mask(ft);
tmpmask = ft->ft_tmpmask;
tmpsize = ft->ft_size;
memcpy(tmpmask, mask, ft->ft_size/8);
+ curbit = 0;
/*
* XXX Note to self, bit_ffs operates at the byte level
* and thus adds gratuitous overhead
@@ -1187,69 +1049,72 @@ flowtable_free_stale(struct flowtable *f
break;
}
- FL_ENTRY_LOCK(ft, curbit);
- flehead = flowtable_entry(ft, curbit);
- fle = fleprev = *flehead;
-
FLOWSTAT_INC(ft, ft_free_checks);
+
+ critical_enter();
+ flist = flowtable_list(ft, curbit);
#ifdef DIAGNOSTIC
- if (fle == NULL && curbit > 0) {
+ if (SLIST_EMPTY(flist) && curbit > 0) {
log(LOG_ALERT,
"warning bit=%d set, but no fle found\n",
curbit);
}
#endif
- while (fle != NULL) {
- if (rt != NULL) {
- if (__DEVOLATILE(struct rtentry *, fle->f_rt) != rt) {
- fleprev = fle;
- fle = fle->f_next;
- continue;
- }
- } else if (!flow_stale(ft, fle)) {
+ SLIST_FOREACH_SAFE(fle, flist, f_next, fle1) {
+ if (rt != NULL && fle->f_rt != rt) {
fleprev = fle;
- fle = fle->f_next;
continue;
}
- /*
- * delete head of the list
- */
- if (fleprev == *flehead) {
- fletmp = fleprev;
- if (fle == fleprev) {
- fleprev = *flehead = fle->f_next;
- } else
- fleprev = *flehead = fle;
- fle = fle->f_next;
- } else {
- /*
- * don't advance fleprev
- */
- fletmp = fle;
- fleprev->f_next = fle->f_next;
- fle = fleprev->f_next;
+ if (!flow_stale(ft, fle)) {
+ fleprev = fle;
+ continue;
}
- if (flefreehead == NULL)
- flefreehead = flefreetail = fletmp;
- else {
- flefreetail->f_next = fletmp;
- flefreetail = fletmp;
- }
- fletmp->f_next = NULL;
+ if (fle == SLIST_FIRST(flist))
+ SLIST_REMOVE_HEAD(flist, f_next);
+ else
+ SLIST_REMOVE_AFTER(fleprev, f_next);
+ SLIST_INSERT_HEAD(&freelist, fle, f_next);
}
- if (*flehead == NULL)
+ if (SLIST_EMPTY(flist))
bit_clear(mask, curbit);
- FL_ENTRY_UNLOCK(ft, curbit);
+ critical_exit();
+
bit_clear(tmpmask, curbit);
tmpmask += (curbit / 8);
tmpsize -= (curbit / 8) * 8;
bit_ffs(tmpmask, tmpsize, &curbit);
}
- while ((fle = flefreehead) != NULL) {
- flefreehead = fle->f_next;
+
+ SLIST_FOREACH_SAFE(fle, &freelist, f_next, fle1) {
FLOWSTAT_INC(ft, ft_frees);
- fle_free(fle, ft);
+ if (fle->f_rt != NULL)
+ RTFREE(fle->f_rt);
+ if (fle->f_lle != NULL)
+ LLE_FREE(fle->f_lle);
+ uma_zfree(flow_zone, fle);
+ }
+}
+
+static void
+flowtable_clean_vnet(struct flowtable *ft, struct rtentry *rt)
+{
+ int i;
+
+ CPU_FOREACH(i) {
+ if (smp_started == 1) {
+ thread_lock(curthread);
+ sched_bind(curthread, i);
+ thread_unlock(curthread);
+ }
+
+ flowtable_free_stale(ft, rt);
+
+ if (smp_started == 1) {
+ thread_lock(curthread);
+ sched_unbind(curthread);
+ thread_unlock(curthread);
+ }
}
}
@@ -1257,7 +1122,6 @@ void
flowtable_route_flush(sa_family_t sa, struct rtentry *rt)
{
struct flowtable *ft;
- int i;
switch (sa) {
#ifdef INET
@@ -1274,51 +1138,7 @@ flowtable_route_flush(sa_family_t sa, st
panic("%s: sa %d", __func__, sa);
}
- if (ft->ft_flags & FL_PCPU) {
- CPU_FOREACH(i) {
- if (smp_started == 1) {
- thread_lock(curthread);
- sched_bind(curthread, i);
- thread_unlock(curthread);
- }
-
- flowtable_free_stale(ft, rt);
-
- if (smp_started == 1) {
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- }
- }
- } else {
- flowtable_free_stale(ft, rt);
- }
-}
-
-static void
-flowtable_clean_vnet(struct flowtable *ft)
-{
-
- if (ft->ft_flags & FL_PCPU) {
- int i;
-
- CPU_FOREACH(i) {
- if (smp_started == 1) {
- thread_lock(curthread);
- sched_bind(curthread, i);
- thread_unlock(curthread);
- }
-
- flowtable_free_stale(ft, NULL);
-
- if (smp_started == 1) {
- thread_lock(curthread);
- sched_unbind(curthread);
- thread_unlock(curthread);
- }
- }
- } else
- flowtable_free_stale(ft, NULL);
+ flowtable_clean_vnet(ft, rt);
}
static void
@@ -1335,10 +1155,10 @@ flowtable_cleaner(void)
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
#ifdef INET
- flowtable_clean_vnet(&V_ip4_ft);
+ flowtable_clean_vnet(&V_ip4_ft, NULL);
#endif
#ifdef INET6
- flowtable_clean_vnet(&V_ip6_ft);
+ flowtable_clean_vnet(&V_ip6_ft, NULL);
#endif
CURVNET_RESTORE();
}
@@ -1408,16 +1228,9 @@ flowtable_init(const void *unused __unus
flow_hashjitter = arc4random();
-#ifdef INET
- flow_ipv4_zone = uma_zcreate("ip4flow", sizeof(struct flentry_v4),
+ flow_zone = uma_zcreate("flows", sizeof(struct flentry),
NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_MAXBUCKET);
- uma_zone_set_max(flow_ipv4_zone, 1024 + maxusers * 64 * mp_ncpus);
-#endif
-#ifdef INET6
- flow_ipv6_zone = uma_zcreate("ip6flow", sizeof(struct flentry_v6),
- NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, UMA_ZONE_MAXBUCKET);
- uma_zone_set_max(flow_ipv6_zone, 1024 + maxusers * 64 * mp_ncpus);
-#endif
+ uma_zone_set_max(flow_zone, 1024 + maxusers * 64 * mp_ncpus);
cv_init(&flowclean_c_cv, "c_flowcleanwait");
cv_init(&flowclean_f_cv, "f_flowcleanwait");
@@ -1432,8 +1245,6 @@ SYSINIT(flowtable_init, SI_SUB_PROTO_BEG
#ifdef INET
static SYSCTL_NODE(_net_flowtable, OID_AUTO, ip4, CTLFLAG_RD, NULL,
"Flowtable for IPv4");
-SYSCTL_UMA_MAX(_net_flowtable_ip4, OID_AUTO, maxflows, CTLFLAG_RW,
- &flow_ipv4_zone, "Maximum number of IPv4 flows allowed");
static VNET_PCPUSTAT_DEFINE(struct flowtable_stat, ip4_ftstat);
VNET_PCPUSTAT_SYSINIT(ip4_ftstat);
@@ -1446,9 +1257,7 @@ static void
flowtable_init_vnet_v4(const void *unused __unused)
{
- V_ip4_ft.ft_zone = flow_ipv4_zone;
V_ip4_ft.ft_size = flowtable_get_size("net.flowtable.ip4.size");
- V_ip4_ft.ft_flags = FL_PCPU;
V_ip4_ft.ft_stat = VNET(ip4_ftstat);
flowtable_alloc(&V_ip4_ft);
}
@@ -1459,8 +1268,6 @@ VNET_SYSINIT(ft_vnet_v4, SI_SUB_PROTO_IF
#ifdef INET6
static SYSCTL_NODE(_net_flowtable, OID_AUTO, ip6, CTLFLAG_RD, NULL,
"Flowtable for IPv6");
-SYSCTL_UMA_MAX(_net_flowtable_ip6, OID_AUTO, maxflows, CTLFLAG_RW,
- &flow_ipv6_zone, "Maximum number of IPv6 flows allowed");
static VNET_PCPUSTAT_DEFINE(struct flowtable_stat, ip6_ftstat);
VNET_PCPUSTAT_SYSINIT(ip6_ftstat);
@@ -1473,9 +1280,7 @@ static void
flowtable_init_vnet_v6(const void *unused __unused)
{
- V_ip6_ft.ft_zone = flow_ipv6_zone;
V_ip6_ft.ft_size = flowtable_get_size("net.flowtable.ip6.size");
- V_ip6_ft.ft_flags = FL_PCPU;
V_ip6_ft.ft_stat = VNET(ip6_ftstat);
flowtable_alloc(&V_ip6_ft);
}
@@ -1484,45 +1289,18 @@ VNET_SYSINIT(flowtable_init_vnet_v6, SI_
#endif /* INET6 */
#ifdef DDB
-static uint32_t *
-flowtable_get_hashkey(struct flentry *fle)
-{
- uint32_t *hashkey;
-
- if (fle->f_flags & FL_IPV6)
- hashkey = ((struct flentry_v4 *)fle)->fl_flow.ipf_key;
- else
- hashkey = ((struct flentry_v6 *)fle)->fl_flow.ipf_key;
-
- return (hashkey);
-}
-
static bitstr_t *
flowtable_mask_pcpu(struct flowtable *ft, int cpuid)
{
- bitstr_t *mask;
- if (ft->ft_flags & FL_PCPU)
- mask = ft->ft_masks[cpuid];
- else
- mask = ft->ft_masks[0];
-
- return (mask);
+ return (zpcpu_get_cpu(*ft->ft_masks, cpuid));
}
-static struct flentry **
-flowtable_entry_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
+static struct flist *
+flowtable_list_pcpu(struct flowtable *ft, uint32_t hash, int cpuid)
{
- struct flentry **fle;
- int index = (hash % ft->ft_size);
-
- if (ft->ft_flags & FL_PCPU) {
- fle = &ft->ft_table.pcpu[cpuid][index];
- } else {
- fle = &ft->ft_table.global[index];
- }
- return (fle);
+ return (zpcpu_get_cpu(&ft->ft_table[hash % ft->ft_size], cpuid));
}
static void
@@ -1542,7 +1320,7 @@ flow_show(struct flowtable *ft, struct f
if (rt_valid)
ifp = rt->rt_ifp;
ifp_valid = ifp != NULL;
- hashkey = flowtable_get_hashkey(fle);
+ hashkey = (uint32_t *)&fle->f_flow;
if (fle->f_flags & FL_IPV6)
goto skipaddr;
@@ -1594,7 +1372,6 @@ static void
flowtable_show(struct flowtable *ft, int cpuid)
{
int curbit = 0;
- struct flentry *fle, **flehead;
bitstr_t *mask, *tmpmask;
if (cpuid != -1)
@@ -1608,20 +1385,19 @@ flowtable_show(struct flowtable *ft, int
*/
bit_ffs(tmpmask, ft->ft_size, &curbit);
while (curbit != -1) {
+ struct flist *flist;
+ struct flentry *fle;
+
if (curbit >= ft->ft_size || curbit < -1) {
db_printf("warning: bad curbit value %d \n",
curbit);
break;
}
- flehead = flowtable_entry_pcpu(ft, curbit, cpuid);
- fle = *flehead;
+ flist = flowtable_list_pcpu(ft, curbit, cpuid);
- while (fle != NULL) {
+ SLIST_FOREACH(fle, flist, f_next)
flow_show(ft, fle);
- fle = fle->f_next;
- continue;
- }
bit_clear(tmpmask, curbit);
bit_ffs(tmpmask, ft->ft_size, &curbit);
}
@@ -1631,14 +1407,10 @@ static void
flowtable_show_vnet(struct flowtable *ft)
{
- if (ft->ft_flags & FL_PCPU) {
- int i;
+ int i;
- CPU_FOREACH(i) {
- flowtable_show(ft, i);
- }
- } else
- flowtable_show(ft, -1);
+ CPU_FOREACH(i)
+ flowtable_show(ft, i);
}
DB_SHOW_COMMAND(flowtables, db_show_flowtables)
Modified: head/sys/net/flowtable.h
==============================================================================
--- head/sys/net/flowtable.h Thu Feb 13 04:55:46 2014 (r261822)
+++ head/sys/net/flowtable.h Thu Feb 13 04:59:18 2014 (r261823)
@@ -44,7 +44,6 @@ struct flowtable_stat {
#ifdef _KERNEL
#define FL_HASH_ALL (1<<0) /* hash 4-tuple + protocol */
-#define FL_PCPU (1<<1) /* pcpu cache */
#define FL_NOAUTO (1<<2) /* don't automatically add flentry on miss */
#define FL_IPV6 (1<<9)
More information about the svn-src-head
mailing list