PERFORCE change 111250 for review
Marko Zec
zec at FreeBSD.org
Thu Dec 7 08:54:43 PST 2006
http://perforce.freebsd.org/chv.cgi?CH=111250
Change 111250 by zec at zec_tca51 on 2006/12/07 16:45:41
Initial attempt at virtualizing tcp_hostcache.
Affected files ...
.. //depot/projects/vimage/src/sys/netinet/tcp_hostcache.c#3 edit
.. //depot/projects/vimage/src/sys/netinet/tcp_hostcache.h#1 add
.. //depot/projects/vimage/src/sys/netinet/tcp_subr.c#5 edit
.. //depot/projects/vimage/src/sys/netinet/vinet.h#3 edit
Differences ...
==== //depot/projects/vimage/src/sys/netinet/tcp_hostcache.c#3 (text+ko) ====
@@ -60,11 +60,6 @@
* memory constrains.
*/
-/*
- * Many thanks to jlemon for basic structure of tcp_syncache which is being
- * followed here.
- */
-
#include "opt_inet6.h"
#include "opt_vimage.h"
@@ -98,57 +93,14 @@
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
+#include <netinet/tcp_hostcache.h>
#include <vm/uma.h>
-TAILQ_HEAD(hc_qhead, hc_metrics);
-
-struct hc_head {
- struct hc_qhead hch_bucket;
- u_int hch_length;
- struct mtx hch_mtx;
-};
-
-struct hc_metrics {
- /* housekeeping */
- TAILQ_ENTRY(hc_metrics) rmx_q;
- struct hc_head *rmx_head; /* head of bucket tail queue */
- struct in_addr ip4; /* IP address */
- struct in6_addr ip6; /* IP6 address */
- /* endpoint specific values for tcp */
- u_long rmx_mtu; /* MTU for this path */
- u_long rmx_ssthresh; /* outbound gateway buffer limit */
- u_long rmx_rtt; /* estimated round trip time */
- u_long rmx_rttvar; /* estimated rtt variance */
- u_long rmx_bandwidth; /* estimated bandwidth */
- u_long rmx_cwnd; /* congestion window */
- u_long rmx_sendpipe; /* outbound delay-bandwidth product */
- u_long rmx_recvpipe; /* inbound delay-bandwidth product */
- /* tcp hostcache internal data */
- int rmx_expire; /* lifetime for object */
- u_long rmx_hits; /* number of hits */
- u_long rmx_updates; /* number of updates */
-};
-
-/* Arbitrary values */
-#define TCP_HOSTCACHE_HASHSIZE 512
-#define TCP_HOSTCACHE_BUCKETLIMIT 30
-#define TCP_HOSTCACHE_EXPIRE 60*60 /* one hour */
-#define TCP_HOSTCACHE_PRUNE 5*60 /* every 5 minutes */
-
-struct tcp_hostcache {
- struct hc_head *hashbase;
- uma_zone_t zone;
- u_int hashsize;
- u_int hashmask;
- u_int bucket_limit;
- u_int cache_count;
- u_int cache_limit;
- int expire;
- int purgeall;
-};
+#ifndef VIMAGE
static struct tcp_hostcache tcp_hostcache;
+#endif
static struct callout tcp_hc_callout;
@@ -157,25 +109,32 @@
static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS);
static void tcp_hc_purge(void *);
-SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, "TCP Host cache");
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0,
+ "TCP Host cache");
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_RDTUN,
- &tcp_hostcache.cache_limit, 0, "Overall entry limit for hostcache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, cachelimit,
+ CTLFLAG_RDTUN, tcp_hostcache.cache_limit, 0,
+ "Overall entry limit for hostcache");
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, hashsize, CTLFLAG_RDTUN,
- &tcp_hostcache.hashsize, 0, "Size of TCP hostcache hashtable");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, hashsize,
+ CTLFLAG_RDTUN, tcp_hostcache.hashsize, 0,
+ "Size of TCP hostcache hashtable");
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, bucketlimit, CTLFLAG_RDTUN,
- &tcp_hostcache.bucket_limit, 0, "Per-bucket hash limit for hostcache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, bucketlimit,
+ CTLFLAG_RDTUN, tcp_hostcache.bucket_limit, 0,
+ "Per-bucket hash limit for hostcache");
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, count, CTLFLAG_RD,
- &tcp_hostcache.cache_count, 0, "Current number of entries in hostcache");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, count,
+ CTLFLAG_RD, tcp_hostcache.cache_count, 0,
+ "Current number of entries in hostcache");
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, expire, CTLFLAG_RW,
- &tcp_hostcache.expire, 0, "Expire time of TCP hostcache entries");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, expire,
+ CTLFLAG_RW, tcp_hostcache.expire, 0,
+ "Expire time of TCP hostcache entries");
-SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_RW,
- &tcp_hostcache.purgeall, 0, "Expire all entires on next purge run");
+SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_tcp_hostcache, OID_AUTO, purge,
+ CTLFLAG_RW, tcp_hostcache.purgeall, 0,
+ "Expire all entires on next purge run");
SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list,
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP, 0, 0,
@@ -186,7 +145,7 @@
#define HOSTCACHE_HASH(ip) \
(((ip)->s_addr ^ ((ip)->s_addr >> 7) ^ ((ip)->s_addr >> 17)) & \
- tcp_hostcache.hashmask)
+ V_tcp_hostcache.hashmask)
/* XXX: What is the recommended hash to get good entropy for IPv6 addresses? */
#define HOSTCACHE_HASH6(ip6) \
@@ -194,7 +153,7 @@
(ip6)->s6_addr32[1] ^ \
(ip6)->s6_addr32[2] ^ \
(ip6)->s6_addr32[3]) & \
- tcp_hostcache.hashmask)
+ V_tcp_hostcache.hashmask)
#define THC_LOCK(lp) mtx_lock(lp)
#define THC_UNLOCK(lp) mtx_unlock(lp)
@@ -202,59 +161,64 @@
void
tcp_hc_init(void)
{
+ INIT_VNET_INET(curvnetb);
int i;
/*
* Initialize hostcache structures
*/
- tcp_hostcache.cache_count = 0;
- tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE;
- tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT;
- tcp_hostcache.cache_limit =
- tcp_hostcache.hashsize * tcp_hostcache.bucket_limit;
- tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE;
+ V_tcp_hostcache.cache_count = 0;
+ V_tcp_hostcache.hashsize = TCP_HOSTCACHE_HASHSIZE;
+ V_tcp_hostcache.bucket_limit = TCP_HOSTCACHE_BUCKETLIMIT;
+ V_tcp_hostcache.cache_limit =
+ V_tcp_hostcache.hashsize * V_tcp_hostcache.bucket_limit;
+ V_tcp_hostcache.expire = TCP_HOSTCACHE_EXPIRE;
TUNABLE_INT_FETCH("net.inet.tcp.hostcache.hashsize",
- &tcp_hostcache.hashsize);
+ &V_tcp_hostcache.hashsize);
TUNABLE_INT_FETCH("net.inet.tcp.hostcache.cachelimit",
- &tcp_hostcache.cache_limit);
+ &V_tcp_hostcache.cache_limit);
TUNABLE_INT_FETCH("net.inet.tcp.hostcache.bucketlimit",
- &tcp_hostcache.bucket_limit);
- if (!powerof2(tcp_hostcache.hashsize)) {
+ &V_tcp_hostcache.bucket_limit);
+ if (!powerof2(V_tcp_hostcache.hashsize)) {
printf("WARNING: hostcache hash size is not a power of 2.\n");
- tcp_hostcache.hashsize = 512; /* safe default */
+ V_tcp_hostcache.hashsize = 512; /* safe default */
}
- tcp_hostcache.hashmask = tcp_hostcache.hashsize - 1;
+ V_tcp_hostcache.hashmask = V_tcp_hostcache.hashsize - 1;
/*
* Allocate the hash table
*/
- tcp_hostcache.hashbase = (struct hc_head *)
- malloc(tcp_hostcache.hashsize * sizeof(struct hc_head),
+ V_tcp_hostcache.hashbase = (struct hc_head *)
+ malloc(V_tcp_hostcache.hashsize * sizeof(struct hc_head),
M_HOSTCACHE, M_WAITOK | M_ZERO);
/*
* Initialize the hash buckets
*/
- for (i = 0; i < tcp_hostcache.hashsize; i++) {
- TAILQ_INIT(&tcp_hostcache.hashbase[i].hch_bucket);
- tcp_hostcache.hashbase[i].hch_length = 0;
- mtx_init(&tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry",
+ for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
+ TAILQ_INIT(&V_tcp_hostcache.hashbase[i].hch_bucket);
+ V_tcp_hostcache.hashbase[i].hch_length = 0;
+ mtx_init(&V_tcp_hostcache.hashbase[i].hch_mtx, "tcp_hc_entry",
NULL, MTX_DEF);
}
/*
* Allocate the hostcache entries.
+ *
+ * XXX don't need a separate zone for each hc instance - revisit!!!
*/
- tcp_hostcache.zone = uma_zcreate("hostcache", sizeof(struct hc_metrics),
- NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
- uma_zone_set_max(tcp_hostcache.zone, tcp_hostcache.cache_limit);
+ V_tcp_hostcache.zone =
+ uma_zcreate("hostcache", sizeof(struct hc_metrics),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+ uma_zone_set_max(V_tcp_hostcache.zone, V_tcp_hostcache.cache_limit);
/*
* Set up periodic cache cleanup.
*/
callout_init(&tcp_hc_callout, CALLOUT_MPSAFE);
- callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0);
+ callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz,
+ tcp_hc_purge, 0);
}
/*
@@ -266,6 +230,7 @@
static struct hc_metrics *
tcp_hc_lookup(struct in_conninfo *inc)
{
+ INIT_VNET_INET(curvnetb);
int hash;
struct hc_head *hc_head;
struct hc_metrics *hc_entry;
@@ -280,7 +245,7 @@
else
hash = HOSTCACHE_HASH(&inc->inc_faddr);
- hc_head = &tcp_hostcache.hashbase[hash];
+ hc_head = &V_tcp_hostcache.hashbase[hash];
/*
* aquire lock for this bucket row
@@ -336,7 +301,7 @@
else
hash = HOSTCACHE_HASH(&inc->inc_faddr);
- hc_head = &tcp_hostcache.hashbase[hash];
+ hc_head = &V_tcp_hostcache.hashbase[hash];
/*
* aquire lock for this bucket row
@@ -348,8 +313,8 @@
/*
* If the bucket limit is reached reuse the least used element
*/
- if (hc_head->hch_length >= tcp_hostcache.bucket_limit ||
- tcp_hostcache.cache_count >= tcp_hostcache.cache_limit) {
+ if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit ||
+ V_tcp_hostcache.cache_count >= V_tcp_hostcache.cache_limit) {
hc_entry = TAILQ_LAST(&hc_head->hch_bucket, hc_qhead);
/*
* At first we were dropping the last element, just to
@@ -359,17 +324,17 @@
* be "lossy".
*/
TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q);
- tcp_hostcache.hashbase[hash].hch_length--;
- tcp_hostcache.cache_count--;
+ V_tcp_hostcache.hashbase[hash].hch_length--;
+ V_tcp_hostcache.cache_count--;
V_tcpstat.tcps_hc_bucketoverflow++;
#if 0
- uma_zfree(tcp_hostcache.zone, hc_entry);
+ uma_zfree(V_tcp_hostcache.zone, hc_entry);
#endif
} else {
/*
* Allocate a new entry, or balk if not possible
*/
- hc_entry = uma_zalloc(tcp_hostcache.zone, M_NOWAIT);
+ hc_entry = uma_zalloc(V_tcp_hostcache.zone, M_NOWAIT);
if (hc_entry == NULL) {
THC_UNLOCK(&hc_head->hch_mtx);
return NULL;
@@ -385,14 +350,14 @@
else
hc_entry->ip4 = inc->inc_faddr;
hc_entry->rmx_head = hc_head;
- hc_entry->rmx_expire = tcp_hostcache.expire;
+ hc_entry->rmx_expire = V_tcp_hostcache.expire;
/*
* Put it upfront
*/
TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q);
- tcp_hostcache.hashbase[hash].hch_length++;
- tcp_hostcache.cache_count++;
+ V_tcp_hostcache.hashbase[hash].hch_length++;
+ V_tcp_hostcache.cache_count++;
V_tcpstat.tcps_hc_added++;
return hc_entry;
@@ -406,6 +371,7 @@
void
tcp_hc_get(struct in_conninfo *inc, struct hc_metrics_lite *hc_metrics_lite)
{
+ INIT_VNET_INET(curvnetb);
struct hc_metrics *hc_entry;
/*
@@ -421,7 +387,7 @@
return;
}
hc_entry->rmx_hits++;
- hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+ hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
hc_metrics_lite->rmx_mtu = hc_entry->rmx_mtu;
hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh;
@@ -446,6 +412,7 @@
u_long
tcp_hc_getmtu(struct in_conninfo *inc)
{
+ INIT_VNET_INET(curvnetb);
struct hc_metrics *hc_entry;
u_long mtu;
@@ -454,7 +421,7 @@
return 0;
}
hc_entry->rmx_hits++;
- hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+ hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
mtu = hc_entry->rmx_mtu;
THC_UNLOCK(&hc_entry->rmx_head->hch_mtx);
@@ -468,6 +435,7 @@
void
tcp_hc_updatemtu(struct in_conninfo *inc, u_long mtu)
{
+ INIT_VNET_INET(curvnetb);
struct hc_metrics *hc_entry;
/*
@@ -484,7 +452,7 @@
return;
}
hc_entry->rmx_updates++;
- hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+ hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
hc_entry->rmx_mtu = mtu;
@@ -517,7 +485,7 @@
return;
}
hc_entry->rmx_updates++;
- hc_entry->rmx_expire = tcp_hostcache.expire; /* start over again */
+ hc_entry->rmx_expire = V_tcp_hostcache.expire; /* start over again */
if (hcml->rmx_rtt != 0) {
if (hc_entry->rmx_rtt == 0)
@@ -588,13 +556,14 @@
static int
sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS)
{
+ INIT_VNET_INET(curvnetb);
int bufsize;
int linesize = 128;
char *p, *buf;
int len, i, error;
struct hc_metrics *hc_entry;
- bufsize = linesize * (tcp_hostcache.cache_count + 1);
+ bufsize = linesize * (V_tcp_hostcache.cache_count + 1);
p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
@@ -604,9 +573,9 @@
p += len;
#define msec(u) (((u) + 500) / 1000)
- for (i = 0; i < tcp_hostcache.hashsize; i++) {
- THC_LOCK(&tcp_hostcache.hashbase[i].hch_mtx);
- TAILQ_FOREACH(hc_entry, &tcp_hostcache.hashbase[i].hch_bucket,
+ for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
+ THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
+ TAILQ_FOREACH(hc_entry, &V_tcp_hostcache.hashbase[i].hch_bucket,
rmx_q) {
len = snprintf(p, linesize,
"%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu "
@@ -632,7 +601,7 @@
hc_entry->rmx_expire);
p += len;
}
- THC_UNLOCK(&tcp_hostcache.hashbase[i].hch_mtx);
+ THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
}
#undef msec
error = SYSCTL_OUT(req, buf, p - buf);
@@ -648,28 +617,32 @@
tcp_hc_purge(void *arg)
{
struct hc_metrics *hc_entry, *hc_next;
- int all = (intptr_t)arg;
+ int all = 0;
int i;
- if (tcp_hostcache.purgeall) {
+ VNETB_ITERLOOP_BEGIN()
+ INIT_VNET_INET(curvnetb);
+ if (V_tcp_hostcache.purgeall) {
all = 1;
- tcp_hostcache.purgeall = 0;
+ V_tcp_hostcache.purgeall = 0;
}
- for (i = 0; i < tcp_hostcache.hashsize; i++) {
- THC_LOCK(&tcp_hostcache.hashbase[i].hch_mtx);
- TAILQ_FOREACH_SAFE(hc_entry, &tcp_hostcache.hashbase[i].hch_bucket,
- rmx_q, hc_next) {
+ for (i = 0; i < V_tcp_hostcache.hashsize; i++) {
+ THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
+ TAILQ_FOREACH_SAFE(hc_entry,
+ &V_tcp_hostcache.hashbase[i].hch_bucket,
+ rmx_q, hc_next) {
if (all || hc_entry->rmx_expire <= 0) {
- TAILQ_REMOVE(&tcp_hostcache.hashbase[i].hch_bucket,
+ TAILQ_REMOVE(&V_tcp_hostcache.hashbase[i].hch_bucket,
hc_entry, rmx_q);
- uma_zfree(tcp_hostcache.zone, hc_entry);
- tcp_hostcache.hashbase[i].hch_length--;
- tcp_hostcache.cache_count--;
+ uma_zfree(V_tcp_hostcache.zone, hc_entry);
+ V_tcp_hostcache.hashbase[i].hch_length--;
+ V_tcp_hostcache.cache_count--;
} else
hc_entry->rmx_expire -= TCP_HOSTCACHE_PRUNE;
}
- THC_UNLOCK(&tcp_hostcache.hashbase[i].hch_mtx);
+ THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx);
}
+ VNETB_ITERLOOP_END();
callout_reset(&tcp_hc_callout, TCP_HOSTCACHE_PRUNE * hz, tcp_hc_purge, 0);
}
==== //depot/projects/vimage/src/sys/netinet/tcp_subr.c#5 (text+ko) ====
@@ -387,6 +387,7 @@
#undef TCP_MINPROTOHDR
tcp_timer_init();
+ tcp_hc_init();
#ifdef VIMAGE
if (curvnetb != &vnetb_0)
@@ -394,7 +395,6 @@
#endif
syncache_init();
- tcp_hc_init();
tcp_reass_init();
ISN_LOCK_INIT();
callout_init(&isn_callout, CALLOUT_MPSAFE);
==== //depot/projects/vimage/src/sys/netinet/vinet.h#3 (text+ko) ====
@@ -47,6 +47,7 @@
#include <netinet/icmp_var.h>
#include <netinet/tcp.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_hostcache.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
@@ -66,6 +67,7 @@
struct inpcbinfo _tcbinfo;
struct tcpstat _tcpstat; /* tcp statistics */
TAILQ_HEAD(, tcptw) _twq_2msl;
+ struct tcp_hostcache _tcp_hostcache;
struct inpcbhead _udb;
struct inpcbinfo _udbinfo;
@@ -110,6 +112,7 @@
#define V_tcbinfo VNET_INET(tcbinfo)
#define V_tcpstat VNET_INET(tcpstat)
#define V_twq_2msl VNET_INET(twq_2msl)
+#define V_tcp_hostcache VNET_INET(tcp_hostcache)
#define V_udb VNET_INET(udb)
#define V_udbinfo VNET_INET(udbinfo)
More information about the p4-projects
mailing list