svn commit: r330001 - in head/sys: conf netinet
Patrick Kelsey
pkelsey at FreeBSD.org
Mon Feb 26 02:53:24 UTC 2018
Author: pkelsey
Date: Mon Feb 26 02:53:22 2018
New Revision: 330001
URL: https://svnweb.freebsd.org/changeset/base/330001
Log:
This is an implementation of the client side of TCP Fast Open (TFO)
[RFC7413]. It also includes a pre-shared key mode of operation in
which the server requires the client to be in possession of a shared
secret in order to successfully open TFO connections with that server.
The names of some existing fastopen sysctls have changed (e.g.,
net.inet.tcp.fastopen.enabled -> net.inet.tcp.fastopen.server_enable).
Reviewed by: tuexen
MFC after: 1 month
Sponsored by: Limelight Networks
Differential Revision: https://reviews.freebsd.org/D14047
Modified:
head/sys/conf/options
head/sys/netinet/tcp.h
head/sys/netinet/tcp_fastopen.c
head/sys/netinet/tcp_fastopen.h
head/sys/netinet/tcp_input.c
head/sys/netinet/tcp_output.c
head/sys/netinet/tcp_subr.c
head/sys/netinet/tcp_syncache.c
head/sys/netinet/tcp_usrreq.c
head/sys/netinet/tcp_var.h
Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options Mon Feb 26 02:43:26 2018 (r330000)
+++ head/sys/conf/options Mon Feb 26 02:53:22 2018 (r330001)
@@ -456,6 +456,7 @@ TCP_HHOOK opt_inet.h
TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading
TCP_RFC7413 opt_inet.h
TCP_RFC7413_MAX_KEYS opt_inet.h
+TCP_RFC7413_MAX_PSKS opt_inet.h
TCP_SIGNATURE opt_ipsec.h
VLAN_ARRAY opt_vlan.h
XBONEHACK
Modified: head/sys/netinet/tcp.h
==============================================================================
--- head/sys/netinet/tcp.h Mon Feb 26 02:43:26 2018 (r330000)
+++ head/sys/netinet/tcp.h Mon Feb 26 02:53:22 2018 (r330001)
@@ -101,8 +101,6 @@ struct tcphdr {
#define TCPOLEN_SIGNATURE 18
#define TCPOPT_FAST_OPEN 34
#define TCPOLEN_FAST_OPEN_EMPTY 2
-#define TCPOLEN_FAST_OPEN_MIN 6
-#define TCPOLEN_FAST_OPEN_MAX 18
/* Miscellaneous constants */
#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */
@@ -152,6 +150,10 @@ struct tcphdr {
#define TCP_MAXHLEN (0xf<<2) /* max length of header in bytes */
#define TCP_MAXOLEN (TCP_MAXHLEN - sizeof(struct tcphdr))
/* max space left for options */
+
+#define TCP_FASTOPEN_MIN_COOKIE_LEN 4 /* Per RFC7413 */
+#define TCP_FASTOPEN_MAX_COOKIE_LEN 16 /* Per RFC7413 */
+#define TCP_FASTOPEN_PSK_LEN 16 /* Same as TCP_FASTOPEN_KEY_LEN */
#endif /* __BSD_VISIBLE */
/*
@@ -251,6 +253,16 @@ struct tcp_info {
/* Padding to grow without breaking ABI. */
u_int32_t __tcpi_pad[26]; /* Padding. */
+};
+
+/*
+ * If this structure is provided when setting the TCP_FASTOPEN socket
+ * option, and the enable member is non-zero, a subsequent connect will use
+ * pre-shared key (PSK) mode using the provided key.
+ */
+struct tcp_fastopen {
+ int enable;
+ uint8_t psk[TCP_FASTOPEN_PSK_LEN];
};
#endif
#define TCP_FUNCTION_NAME_LEN_MAX 32
Modified: head/sys/netinet/tcp_fastopen.c
==============================================================================
--- head/sys/netinet/tcp_fastopen.c Mon Feb 26 02:43:26 2018 (r330000)
+++ head/sys/netinet/tcp_fastopen.c Mon Feb 26 02:53:22 2018 (r330001)
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2015 Patrick Kelsey
+ * Copyright (c) 2015-2017 Patrick Kelsey
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,23 +25,44 @@
*/
/*
- * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413].
+ * This is an implementation of TCP Fast Open (TFO) [RFC7413]. To include
+ * this code, add the following line to your kernel config:
*
- * This implementation is currently considered to be experimental and is not
- * included in kernel builds by default. To include this code, add the
- * following line to your kernel config:
- *
* options TCP_RFC7413
*
+ *
* The generated TFO cookies are the 64-bit output of
- * SipHash24(<16-byte-key><client-ip>). Multiple concurrent valid keys are
- * supported so that time-based rolling cookie invalidation policies can be
- * implemented in the system. The default number of concurrent keys is 2.
- * This can be adjusted in the kernel config as follows:
+ * SipHash24(key=<16-byte-key>, msg=<client-ip>). Multiple concurrent valid
+ * keys are supported so that time-based rolling cookie invalidation
+ * policies can be implemented in the system. The default number of
+ * concurrent keys is 2. This can be adjusted in the kernel config as
+ * follows:
*
* options TCP_RFC7413_MAX_KEYS=<num-keys>
*
*
+ * In addition to the facilities defined in RFC7413, this implementation
+ * supports a pre-shared key (PSK) mode of operation in which the TFO server
+ * requires the client to be in posession of a shared secret in order for
+ * the client to be able to successfully open TFO connections with the
+ * server. This is useful, for example, in environments where TFO servers
+ * are exposed to both internal and external clients and only wish to allow
+ * TFO connections from internal clients.
+ *
+ * In the PSK mode of operation, the server generates and sends TFO cookies
+ * to requesting clients as usual. However, when validating cookies
+ * received in TFO SYNs from clients, the server requires the
+ * client-supplied cookie to equal SipHash24(key=<16-byte-psk>,
+ * msg=<cookie-sent-to-client>).
+ *
+ * Multiple concurrent valid pre-shared keys are supported so that
+ * time-based rolling PSK invalidation policies can be implemented in the
+ * system. The default number of concurrent pre-shared keys is 2. This can
+ * be adjusted in the kernel config as follows:
+ *
+ * options TCP_RFC7413_MAX_PSKS=<num-psks>
+ *
+ *
* The following TFO-specific sysctls are defined:
*
* net.inet.tcp.fastopen.acceptany (RW, default 0)
@@ -49,32 +70,73 @@
* be valid.
*
* net.inet.tcp.fastopen.autokey (RW, default 120)
- * When this and net.inet.tcp.fastopen.enabled are non-zero, a new key
- * will be automatically generated after this many seconds.
+ * When this and net.inet.tcp.fastopen.server_enable are non-zero, a new
+ * key will be automatically generated after this many seconds.
*
- * net.inet.tcp.fastopen.enabled (RW, default 0)
- * When zero, no new TFO connections can be created. On the transition
- * from enabled to disabled, all installed keys are removed. On the
- * transition from disabled to enabled, if net.inet.tcp.fastopen.autokey
- * is non-zero and there are no keys installed, a new key will be
- * generated immediately. The transition from enabled to disabled does
- * not affect any TFO connections in progress; it only prevents new ones
- * from being made.
+ * net.inet.tcp.fastopen.ccache_bucket_limit
+ * (RWTUN, default TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT)
+ * The maximum number of entries in a client cookie cache bucket.
*
- * net.inet.tcp.fastopen.keylen (RO)
+ * net.inet.tcp.fastopen.ccache_buckets
+ * (RDTUN, default TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT)
+ * The number of client cookie cache buckets.
+ *
+ * net.inet.tcp.fastopen.client_enable (RW, default 0)
+ * When zero, no new active (i.e., client) TFO connections can be
+ * created. On the transition from enabled to disabled, the client
+ * cookie cache is cleared and disabled. The transition from enabled to
+ * disabled does not affect any active TFO connections in progress; it
+ * only prevents new ones from being made.
+ *
+ * net.inet.tcp.fastopen.keylen (RD)
* The key length in bytes.
*
- * net.inet.tcp.fastopen.maxkeys (RO)
+ * net.inet.tcp.fastopen.maxkeys (RD)
* The maximum number of keys supported.
*
- * net.inet.tcp.fastopen.numkeys (RO)
+ * net.inet.tcp.fastopen.maxpsks (RD)
+ * The maximum number of pre-shared keys supported.
+ *
+ * net.inet.tcp.fastopen.numkeys (RD)
* The current number of keys installed.
*
- * net.inet.tcp.fastopen.setkey (WO)
- * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this
- * sysctl.
+ * net.inet.tcp.fastopen.numpsks (RD)
+ * The current number of pre-shared keys installed.
*
+ * net.inet.tcp.fastopen.path_disable_time
+ * (RW, default TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT)
+ * When a failure occurs while trying to create a new active (i.e.,
+ * client) TFO connection, new active connections on the same path, as
+ * determined by the tuple {client_ip, server_ip, server_port}, will be
+ * forced to be non-TFO for this many seconds. Note that the path
+ * disable mechanism relies on state stored in client cookie cache
+ * entries, so it is possible for the disable time for a given path to
+ * be reduced if the corresponding client cookie cache entry is reused
+ * due to resource pressure before the disable period has elapsed.
*
+ * net.inet.tcp.fastopen.psk_enable (RW, default 0)
+ * When non-zero, pre-shared key (PSK) mode is enabled for all TFO
+ * servers. On the transition from enabled to disabled, all installed
+ * pre-shared keys are removed.
+ *
+ * net.inet.tcp.fastopen.server_enable (RW, default 0)
+ * When zero, no new passive (i.e., server) TFO connections can be
+ * created. On the transition from enabled to disabled, all installed
+ * keys and pre-shared keys are removed. On the transition from
+ * disabled to enabled, if net.inet.tcp.fastopen.autokey is non-zero and
+ * there are no keys installed, a new key will be generated immediately.
+ * The transition from enabled to disabled does not affect any passive
+ * TFO connections in progress; it only prevents new ones from being
+ * made.
+ *
+ * net.inet.tcp.fastopen.setkey (WR)
+ * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to
+ * this sysctl.
+ *
+ * net.inet.tcp.fastopen.setpsk (WR)
+ * Install a new pre-shared key by writing net.inet.tcp.fastopen.keylen
+ * bytes to this sysctl.
+ *
* In order for TFO connections to be created via a listen socket, that
* socket must have the TCP_FASTOPEN socket option set on it. This option
* can be set on the socket either before or after the listen() is invoked.
@@ -105,6 +167,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/hash.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
@@ -119,21 +182,56 @@ __FBSDID("$FreeBSD$");
#include <netinet/in.h>
#include <netinet/in_pcb.h>
-#include <netinet/tcp_fastopen.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_fastopen.h>
#define TCP_FASTOPEN_KEY_LEN SIPHASH_KEY_LENGTH
+#if TCP_FASTOPEN_PSK_LEN != TCP_FASTOPEN_KEY_LEN
+#error TCP_FASTOPEN_PSK_LEN must be equal to TCP_FASTOPEN_KEY_LEN
+#endif
+
+/*
+ * Because a PSK-mode setsockopt() uses tcpcb.t_tfo_cookie.client to hold
+ * the PSK until the connect occurs.
+ */
+#if TCP_FASTOPEN_MAX_COOKIE_LEN < TCP_FASTOPEN_PSK_LEN
+#error TCP_FASTOPEN_MAX_COOKIE_LEN must be >= TCP_FASTOPEN_PSK_LEN
+#endif
+
+#define TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT 16
+#define TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT 2048 /* must be power of 2 */
+
+#define TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT 900 /* seconds */
+
#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1)
#define TCP_FASTOPEN_MAX_KEYS 2
#else
#define TCP_FASTOPEN_MAX_KEYS TCP_RFC7413_MAX_KEYS
#endif
+#if TCP_FASTOPEN_MAX_KEYS > 10
+#undef TCP_FASTOPEN_MAX_KEYS
+#define TCP_FASTOPEN_MAX_KEYS 10
+#endif
+
+#if !defined(TCP_RFC7413_MAX_PSKS) || (TCP_RFC7413_MAX_PSKS < 1)
+#define TCP_FASTOPEN_MAX_PSKS 2
+#else
+#define TCP_FASTOPEN_MAX_PSKS TCP_RFC7413_MAX_PSKS
+#endif
+
+#if TCP_FASTOPEN_MAX_PSKS > 10
+#undef TCP_FASTOPEN_MAX_PSKS
+#define TCP_FASTOPEN_MAX_PSKS 10
+#endif
+
struct tcp_fastopen_keylist {
unsigned int newest;
+ unsigned int newest_psk;
uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN];
+ uint8_t psk[TCP_FASTOPEN_MAX_PSKS][TCP_FASTOPEN_KEY_LEN];
};
struct tcp_fastopen_callout {
@@ -141,6 +239,16 @@ struct tcp_fastopen_callout {
struct vnet *v;
};
+static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_lookup(
+ struct in_conninfo *, struct tcp_fastopen_ccache_bucket **);
+static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_create(
+ struct tcp_fastopen_ccache_bucket *, struct in_conninfo *, uint16_t, uint8_t,
+ uint8_t *);
+static void tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *,
+ unsigned int);
+static void tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *,
+ struct tcp_fastopen_ccache_bucket *);
+
SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open");
static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0;
@@ -157,12 +265,25 @@ SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey,
&sysctl_net_inet_tcp_fastopen_autokey, "IU",
"Number of seconds between auto-generation of a new key; zero disables");
-VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0;
-static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS);
-SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled,
+static int sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_bucket_limit,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RWTUN, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_ccache_bucket_limit, "IU",
+ "Max entries per bucket in client cookie cache");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_ccache_buckets) =
+ TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
+#define V_tcp_fastopen_ccache_buckets VNET(tcp_fastopen_ccache_buckets)
+SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, ccache_buckets,
+ CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_fastopen_ccache_buckets), 0,
+ "Client cookie cache number of buckets (power of 2)");
+
+VNET_DEFINE(unsigned int, tcp_fastopen_client_enable) = 0;
+static int sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, client_enable,
CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
- &sysctl_net_inet_tcp_fastopen_enabled, "IU",
- "Enable/disable TCP Fast Open processing");
+ &sysctl_net_inet_tcp_fastopen_client_enable, "IU",
+ "Enable/disable TCP Fast Open client functionality");
SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen,
CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN,
@@ -172,18 +293,56 @@ SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys,
CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS,
"Maximum number of keys supported");
+SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxpsks,
+ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_PSKS,
+ "Maximum number of pre-shared keys supported");
+
static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0;
#define V_tcp_fastopen_numkeys VNET(tcp_fastopen_numkeys)
SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys,
CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0,
"Number of keys installed");
+static VNET_DEFINE(unsigned int, tcp_fastopen_numpsks) = 0;
+#define V_tcp_fastopen_numpsks VNET(tcp_fastopen_numpsks)
+SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numpsks,
+ CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numpsks), 0,
+ "Number of pre-shared keys installed");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_path_disable_time) =
+ TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT;
+#define V_tcp_fastopen_path_disable_time VNET(tcp_fastopen_path_disable_time)
+SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, path_disable_time,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_path_disable_time), 0,
+ "Seconds a TFO failure disables a {client_ip, server_ip, server_port} path");
+
+static VNET_DEFINE(unsigned int, tcp_fastopen_psk_enable) = 0;
+#define V_tcp_fastopen_psk_enable VNET(tcp_fastopen_psk_enable)
+static int sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, psk_enable,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_psk_enable, "IU",
+ "Enable/disable TCP Fast Open server pre-shared key mode");
+
+VNET_DEFINE(unsigned int, tcp_fastopen_server_enable) = 0;
+static int sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, server_enable,
+ CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_server_enable, "IU",
+ "Enable/disable TCP Fast Open server functionality");
+
static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS);
SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey,
CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
&sysctl_net_inet_tcp_fastopen_setkey, "",
"Install a new key");
+static int sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setpsk,
+ CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0,
+ &sysctl_net_inet_tcp_fastopen_setpsk, "",
+ "Install a new pre-shared key");
+
static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock);
#define V_tcp_fastopen_keylock VNET(tcp_fastopen_keylock)
@@ -201,9 +360,21 @@ static VNET_DEFINE(struct tcp_fastopen_callout, tcp_fa
static VNET_DEFINE(uma_zone_t, counter_zone);
#define V_counter_zone VNET(counter_zone)
+static MALLOC_DEFINE(M_TCP_FASTOPEN_CCACHE, "tfo_ccache", "TFO client cookie cache buckets");
+
+static VNET_DEFINE(struct tcp_fastopen_ccache, tcp_fastopen_ccache);
+#define V_tcp_fastopen_ccache VNET(tcp_fastopen_ccache)
+
+#define CCB_LOCK(ccb) mtx_lock(&(ccb)->ccb_mtx)
+#define CCB_UNLOCK(ccb) mtx_unlock(&(ccb)->ccb_mtx)
+#define CCB_LOCK_ASSERT(ccb) mtx_assert(&(ccb)->ccb_mtx, MA_OWNED)
+
+
void
tcp_fastopen_init(void)
{
+ unsigned int i;
+
V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
rm_init(&V_tcp_fastopen_keylock, "tfo_keylock");
@@ -211,11 +382,67 @@ tcp_fastopen_init(void)
&V_tcp_fastopen_keylock, 0);
V_tcp_fastopen_autokey_ctx.v = curvnet;
V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
+ V_tcp_fastopen_keys.newest_psk = TCP_FASTOPEN_MAX_PSKS - 1;
+
+ /* May already be non-zero if kernel tunable was set */
+ if (V_tcp_fastopen_ccache.bucket_limit == 0)
+ V_tcp_fastopen_ccache.bucket_limit =
+ TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT;
+
+ /* May already be non-zero if kernel tunable was set */
+ if ((V_tcp_fastopen_ccache_buckets == 0) ||
+ !powerof2(V_tcp_fastopen_ccache_buckets))
+ V_tcp_fastopen_ccache.buckets =
+ TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT;
+ else
+ V_tcp_fastopen_ccache.buckets = V_tcp_fastopen_ccache_buckets;
+
+ V_tcp_fastopen_ccache.mask = V_tcp_fastopen_ccache.buckets - 1;
+ V_tcp_fastopen_ccache.secret = arc4random();
+
+ V_tcp_fastopen_ccache.base = malloc(V_tcp_fastopen_ccache.buckets *
+ sizeof(struct tcp_fastopen_ccache_bucket), M_TCP_FASTOPEN_CCACHE,
+ M_WAITOK | M_ZERO);
+
+ for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
+ TAILQ_INIT(&V_tcp_fastopen_ccache.base[i].ccb_entries);
+ mtx_init(&V_tcp_fastopen_ccache.base[i].ccb_mtx, "tfo_ccache_bucket",
+ NULL, MTX_DEF);
+ V_tcp_fastopen_ccache.base[i].ccb_num_entries = -1; /* bucket disabled */
+ V_tcp_fastopen_ccache.base[i].ccb_ccache = &V_tcp_fastopen_ccache;
+ }
+
+ /*
+ * Note that while the total number of entries in the cookie cache
+ * is limited by the table management logic to
+ * V_tcp_fastopen_ccache.buckets *
+ * V_tcp_fastopen_ccache.bucket_limit, the total number of items in
+ * this zone can exceed that amount by the number of CPUs in the
+ * system times the maximum number of unallocated items that can be
+ * present in each UMA per-CPU cache for this zone.
+ */
+ V_tcp_fastopen_ccache.zone = uma_zcreate("tfo_ccache_entries",
+ sizeof(struct tcp_fastopen_ccache_entry), NULL, NULL, NULL, NULL,
+ UMA_ALIGN_CACHE, 0);
}
void
tcp_fastopen_destroy(void)
{
+ struct tcp_fastopen_ccache_bucket *ccb;
+ unsigned int i;
+
+ for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
+ ccb = &V_tcp_fastopen_ccache.base[i];
+ tcp_fastopen_ccache_bucket_trim(ccb, 0);
+ mtx_destroy(&ccb->ccb_mtx);
+ }
+
+ KASSERT(uma_zone_get_cur(V_tcp_fastopen_ccache.zone) == 0,
+ ("%s: TFO ccache zone allocation count not 0", __func__));
+ uma_zdestroy(V_tcp_fastopen_ccache.zone);
+ free(V_tcp_fastopen_ccache.base, M_TCP_FASTOPEN_CCACHE);
+
callout_drain(&V_tcp_fastopen_autokey_ctx.c);
rm_destroy(&V_tcp_fastopen_keylock);
uma_zdestroy(V_counter_zone);
@@ -254,6 +481,19 @@ tcp_fastopen_addkey_locked(uint8_t *key)
}
static void
+tcp_fastopen_addpsk_locked(uint8_t *psk)
+{
+
+ V_tcp_fastopen_keys.newest_psk++;
+ if (V_tcp_fastopen_keys.newest_psk == TCP_FASTOPEN_MAX_PSKS)
+ V_tcp_fastopen_keys.newest_psk = 0;
+ memcpy(V_tcp_fastopen_keys.psk[V_tcp_fastopen_keys.newest_psk], psk,
+ TCP_FASTOPEN_KEY_LEN);
+ if (V_tcp_fastopen_numpsks < TCP_FASTOPEN_MAX_PSKS)
+ V_tcp_fastopen_numpsks++;
+}
+
+static void
tcp_fastopen_autokey_locked(void)
{
uint8_t newkey[TCP_FASTOPEN_KEY_LEN];
@@ -300,7 +540,50 @@ tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGT
return (siphash);
}
+static uint64_t
+tcp_fastopen_make_psk_cookie(uint8_t *psk, uint8_t *cookie, uint8_t cookie_len)
+{
+ SIPHASH_CTX ctx;
+ uint64_t psk_cookie;
+ SipHash24_Init(&ctx);
+ SipHash_SetKey(&ctx, psk);
+ SipHash_Update(&ctx, cookie, cookie_len);
+ SipHash_Final((u_int8_t *)&psk_cookie, &ctx);
+
+ return (psk_cookie);
+}
+
+static int
+tcp_fastopen_find_cookie_match_locked(uint8_t *wire_cookie, uint64_t *cur_cookie)
+{
+ unsigned int i, psk_index;
+ uint64_t psk_cookie;
+
+ if (V_tcp_fastopen_psk_enable) {
+ psk_index = V_tcp_fastopen_keys.newest_psk;
+ for (i = 0; i < V_tcp_fastopen_numpsks; i++) {
+ psk_cookie =
+ tcp_fastopen_make_psk_cookie(
+ V_tcp_fastopen_keys.psk[psk_index],
+ (uint8_t *)cur_cookie,
+ TCP_FASTOPEN_COOKIE_LEN);
+
+ if (memcmp(wire_cookie, &psk_cookie,
+ TCP_FASTOPEN_COOKIE_LEN) == 0)
+ return (1);
+
+ if (psk_index == 0)
+ psk_index = TCP_FASTOPEN_MAX_PSKS - 1;
+ else
+ psk_index--;
+ }
+ } else if (memcmp(wire_cookie, cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0)
+ return (1);
+
+ return (0);
+}
+
/*
* Return values:
* -1 the cookie is invalid and no valid cookie is available
@@ -344,10 +627,9 @@ tcp_fastopen_check_cookie(struct in_conninfo *inc, uin
inc);
if (i == 0)
*latest_cookie = cur_cookie;
- if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) {
- rv = 1;
+ rv = tcp_fastopen_find_cookie_match_locked(cookie, &cur_cookie);
+ if (rv)
goto out;
- }
if (key_index == 0)
key_index = TCP_FASTOPEN_MAX_KEYS - 1;
else
@@ -355,7 +637,7 @@ tcp_fastopen_check_cookie(struct in_conninfo *inc, uin
}
rv = 0;
-out:
+ out:
TCP_FASTOPEN_KEYS_RUNLOCK(&tracker);
return (rv);
}
@@ -373,7 +655,7 @@ sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_AR
return (EINVAL);
TCP_FASTOPEN_KEYS_WLOCK();
- if (V_tcp_fastopen_enabled) {
+ if (V_tcp_fastopen_server_enable) {
if (V_tcp_fastopen_autokey && !new)
callout_stop(&V_tcp_fastopen_autokey_ctx.c);
else if (new)
@@ -389,24 +671,54 @@ sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_AR
}
static int
-sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS)
+sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS)
{
int error;
unsigned int new;
- new = V_tcp_fastopen_enabled;
+ new = V_tcp_fastopen_psk_enable;
error = sysctl_handle_int(oidp, &new, 0, req);
if (error == 0 && req->newptr) {
- if (V_tcp_fastopen_enabled && !new) {
+ if (V_tcp_fastopen_psk_enable && !new) {
/* enabled -> disabled */
TCP_FASTOPEN_KEYS_WLOCK();
+ V_tcp_fastopen_numpsks = 0;
+ V_tcp_fastopen_keys.newest_psk =
+ TCP_FASTOPEN_MAX_PSKS - 1;
+ V_tcp_fastopen_psk_enable = 0;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ } else if (!V_tcp_fastopen_psk_enable && new) {
+ /* disabled -> enabled */
+ TCP_FASTOPEN_KEYS_WLOCK();
+ V_tcp_fastopen_psk_enable = 1;
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+ }
+ }
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ unsigned int new;
+
+ new = V_tcp_fastopen_server_enable;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if (V_tcp_fastopen_server_enable && !new) {
+ /* enabled -> disabled */
+ TCP_FASTOPEN_KEYS_WLOCK();
V_tcp_fastopen_numkeys = 0;
V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1;
if (V_tcp_fastopen_autokey)
callout_stop(&V_tcp_fastopen_autokey_ctx.c);
- V_tcp_fastopen_enabled = 0;
+ V_tcp_fastopen_numpsks = 0;
+ V_tcp_fastopen_keys.newest_psk =
+ TCP_FASTOPEN_MAX_PSKS - 1;
+ V_tcp_fastopen_server_enable = 0;
TCP_FASTOPEN_KEYS_WUNLOCK();
- } else if (!V_tcp_fastopen_enabled && new) {
+ } else if (!V_tcp_fastopen_server_enable && new) {
/* disabled -> enabled */
TCP_FASTOPEN_KEYS_WLOCK();
if (V_tcp_fastopen_autokey &&
@@ -417,7 +729,7 @@ sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_AR
tcp_fastopen_autokey_callout,
&V_tcp_fastopen_autokey_ctx);
}
- V_tcp_fastopen_enabled = 1;
+ V_tcp_fastopen_server_enable = 1;
TCP_FASTOPEN_KEYS_WUNLOCK();
}
}
@@ -446,3 +758,369 @@ sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARG
return (0);
}
+
+static int
+sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ uint8_t newpsk[TCP_FASTOPEN_KEY_LEN];
+
+ if (req->oldptr != NULL || req->oldlen != 0)
+ return (EINVAL);
+ if (req->newptr == NULL)
+ return (EPERM);
+ if (req->newlen != sizeof(newpsk))
+ return (EINVAL);
+ error = SYSCTL_IN(req, newpsk, sizeof(newpsk));
+ if (error)
+ return (error);
+
+ TCP_FASTOPEN_KEYS_WLOCK();
+ tcp_fastopen_addpsk_locked(newpsk);
+ TCP_FASTOPEN_KEYS_WUNLOCK();
+
+ return (0);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS)
+{
+ struct tcp_fastopen_ccache_bucket *ccb;
+ int error;
+ unsigned int new;
+ unsigned int i;
+
+ new = V_tcp_fastopen_ccache.bucket_limit;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if ((new == 0) || (new > INT_MAX))
+ error = EINVAL;
+ else {
+ if (new < V_tcp_fastopen_ccache.bucket_limit) {
+ for (i = 0; i < V_tcp_fastopen_ccache.buckets;
+ i++) {
+ ccb = &V_tcp_fastopen_ccache.base[i];
+ tcp_fastopen_ccache_bucket_trim(ccb, new);
+ }
+ }
+ V_tcp_fastopen_ccache.bucket_limit = new;
+ }
+
+ }
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS)
+{
+ struct tcp_fastopen_ccache_bucket *ccb;
+ int error;
+ unsigned int new, i;
+
+ new = V_tcp_fastopen_client_enable;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr) {
+ if (V_tcp_fastopen_client_enable && !new) {
+ /* enabled -> disabled */
+ for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
+ ccb = &V_tcp_fastopen_ccache.base[i];
+ tcp_fastopen_ccache_bucket_trim(ccb, 0);
+ }
+ V_tcp_fastopen_client_enable = 0;
+ } else if (!V_tcp_fastopen_client_enable && new) {
+ /* disabled -> enabled */
+ for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) {
+ ccb = &V_tcp_fastopen_ccache.base[i];
+ CCB_LOCK(ccb);
+ KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
+ ("%s: ccb->ccb_entries not empty", __func__));
+ KASSERT(ccb->ccb_num_entries == -1,
+ ("%s: ccb->ccb_num_entries %d not -1", __func__,
+ ccb->ccb_num_entries));
+ ccb->ccb_num_entries = 0; /* enable bucket */
+ CCB_UNLOCK(ccb);
+ }
+ V_tcp_fastopen_client_enable = 1;
+ }
+ }
+ return (error);
+}
+
+void
+tcp_fastopen_connect(struct tcpcb *tp)
+{
+ struct inpcb *inp;
+ struct tcp_fastopen_ccache_bucket *ccb;
+ struct tcp_fastopen_ccache_entry *cce;
+ sbintime_t now;
+ uint16_t server_mss;
+ uint64_t psk_cookie;
+
+ inp = tp->t_inpcb;
+ cce = tcp_fastopen_ccache_lookup(&inp->inp_inc, &ccb);
+ if (cce) {
+ if (cce->disable_time == 0) {
+ if ((cce->cookie_len > 0) &&
+ (tp->t_tfo_client_cookie_len ==
+ TCP_FASTOPEN_PSK_LEN)) {
+ psk_cookie =
+ tcp_fastopen_make_psk_cookie(
+ tp->t_tfo_cookie.client,
+ cce->cookie, cce->cookie_len);
+ } else {
+ tp->t_tfo_client_cookie_len = cce->cookie_len;
+ memcpy(tp->t_tfo_cookie.client, cce->cookie,
+ cce->cookie_len);
+ }
+ server_mss = cce->server_mss;
+ CCB_UNLOCK(ccb);
+ if (tp->t_tfo_client_cookie_len ==
+ TCP_FASTOPEN_PSK_LEN) {
+ tp->t_tfo_client_cookie_len =
+ TCP_FASTOPEN_COOKIE_LEN;
+ memcpy(tp->t_tfo_cookie.client, &psk_cookie,
+ TCP_FASTOPEN_COOKIE_LEN);
+ }
+ tcp_mss(tp, server_mss ? server_mss : -1);
+ tp->snd_wnd = tp->t_maxseg;
+ } else {
+ /*
+ * The path is disabled. Check the time and
+ * possibly re-enable.
+ */
+ now = getsbinuptime();
+ if (now - cce->disable_time >
+ ((sbintime_t)V_tcp_fastopen_path_disable_time << 32)) {
+ /*
+ * Re-enable path. Force a TFO cookie
+ * request. Forget the old MSS as it may be
+ * bogus now, and we will rediscover it in
+ * the SYN|ACK.
+ */
+ cce->disable_time = 0;
+ cce->server_mss = 0;
+ cce->cookie_len = 0;
+ /*
+ * tp->t_tfo... cookie details are already
+ * zero from the tcpcb init.
+ */
+ } else {
+ /*
+ * Path is disabled, so disable TFO on this
+ * connection.
+ */
+ tp->t_flags &= ~TF_FASTOPEN;
+ }
+ CCB_UNLOCK(ccb);
+ tcp_mss(tp, -1);
+ /*
+ * snd_wnd is irrelevant since we are either forcing
+ * a TFO cookie request or disabling TFO - either
+ * way, no data with the SYN.
+ */
+ }
+ } else {
+ /*
+ * A new entry for this path will be created when a SYN|ACK
+ * comes back, or the attempt otherwise fails.
+ */
+ CCB_UNLOCK(ccb);
+ tcp_mss(tp, -1);
+ /*
+ * snd_wnd is irrelevant since we are forcing a TFO cookie
+ * request.
+ */
+ }
+}
+
+void
+tcp_fastopen_disable_path(struct tcpcb *tp)
+{
+ struct in_conninfo *inc = &tp->t_inpcb->inp_inc;
+ struct tcp_fastopen_ccache_bucket *ccb;
+ struct tcp_fastopen_ccache_entry *cce;
+
+ cce = tcp_fastopen_ccache_lookup(inc, &ccb);
+ if (cce) {
+ cce->server_mss = 0;
+ cce->cookie_len = 0;
+ /*
+ * Preserve the existing disable time if it is already
+ * disabled.
+ */
+ if (cce->disable_time == 0)
+ cce->disable_time = getsbinuptime();
+ } else /* use invalid cookie len to create disabled entry */
+ tcp_fastopen_ccache_create(ccb, inc, 0,
+ TCP_FASTOPEN_MAX_COOKIE_LEN + 1, NULL);
+
+ CCB_UNLOCK(ccb);
+ tp->t_flags &= ~TF_FASTOPEN;
+}
+
+void
+tcp_fastopen_update_cache(struct tcpcb *tp, uint16_t mss,
+ uint8_t cookie_len, uint8_t *cookie)
+{
+ struct in_conninfo *inc = &tp->t_inpcb->inp_inc;
+ struct tcp_fastopen_ccache_bucket *ccb;
+ struct tcp_fastopen_ccache_entry *cce;
+
+ cce = tcp_fastopen_ccache_lookup(inc, &ccb);
+ if (cce) {
+ if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) &&
+ (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
+ ((cookie_len & 0x1) == 0)) {
+ cce->server_mss = mss;
+ cce->cookie_len = cookie_len;
+ memcpy(cce->cookie, cookie, cookie_len);
+ cce->disable_time = 0;
+ } else {
+ /* invalid cookie length, disable entry */
+ cce->server_mss = 0;
+ cce->cookie_len = 0;
+ /*
+ * Preserve the existing disable time if it is
+ * already disabled.
+ */
+ if (cce->disable_time == 0)
+ cce->disable_time = getsbinuptime();
+ }
+ } else
+ tcp_fastopen_ccache_create(ccb, inc, mss, cookie_len, cookie);
+
+ CCB_UNLOCK(ccb);
+}
+
+static struct tcp_fastopen_ccache_entry *
+tcp_fastopen_ccache_lookup(struct in_conninfo *inc,
+ struct tcp_fastopen_ccache_bucket **ccbp)
+{
+ struct tcp_fastopen_ccache_bucket *ccb;
+ struct tcp_fastopen_ccache_entry *cce;
+ uint32_t last_word;
+ uint32_t hash;
+
+ hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependladdr, 4,
+ V_tcp_fastopen_ccache.secret);
+ hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependfaddr, 4,
+ hash);
+ last_word = inc->inc_fport;
+ hash = jenkins_hash32(&last_word, 1, hash);
+ ccb = &V_tcp_fastopen_ccache.base[hash & V_tcp_fastopen_ccache.mask];
+ *ccbp = ccb;
+ CCB_LOCK(ccb);
+
+ /*
+ * Always returns with locked bucket.
+ */
+ TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link)
+ if ((!(cce->af == AF_INET6) == !(inc->inc_flags & INC_ISIPV6)) &&
+ (cce->server_port == inc->inc_ie.ie_fport) &&
+ (((cce->af == AF_INET) &&
+ (cce->cce_client_ip.v4.s_addr == inc->inc_laddr.s_addr) &&
+ (cce->cce_server_ip.v4.s_addr == inc->inc_faddr.s_addr)) ||
+ ((cce->af == AF_INET6) &&
+ IN6_ARE_ADDR_EQUAL(&cce->cce_client_ip.v6, &inc->inc6_laddr) &&
+ IN6_ARE_ADDR_EQUAL(&cce->cce_server_ip.v6, &inc->inc6_faddr))))
+ break;
+
+ return (cce);
+}
+
+static struct tcp_fastopen_ccache_entry *
+tcp_fastopen_ccache_create(struct tcp_fastopen_ccache_bucket *ccb,
+ struct in_conninfo *inc, uint16_t mss, uint8_t cookie_len, uint8_t *cookie)
+{
+ struct tcp_fastopen_ccache_entry *cce;
+
+ /*
+ * 1. Create a new entry, or
+ * 2. Reclaim an existing entry, or
+ * 3. Fail
+ */
+
+ CCB_LOCK_ASSERT(ccb);
+
+ cce = NULL;
+ if (ccb->ccb_num_entries < V_tcp_fastopen_ccache.bucket_limit)
+ cce = uma_zalloc(V_tcp_fastopen_ccache.zone, M_NOWAIT);
+
+ if (cce == NULL) {
+ /*
+ * At bucket limit, or out of memory - reclaim last
+ * entry in bucket.
+ */
+ cce = TAILQ_LAST(&ccb->ccb_entries, bucket_entries);
+ if (cce == NULL) {
+ /* XXX count this event */
+ return (NULL);
+ }
+
+ TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
+ } else
+ ccb->ccb_num_entries++;
+
+ TAILQ_INSERT_HEAD(&ccb->ccb_entries, cce, cce_link);
+ cce->af = (inc->inc_flags & INC_ISIPV6) ? AF_INET6 : AF_INET;
+ if (cce->af == AF_INET) {
+ cce->cce_client_ip.v4 = inc->inc_laddr;
+ cce->cce_server_ip.v4 = inc->inc_faddr;
+ } else {
+ cce->cce_client_ip.v6 = inc->inc6_laddr;
+ cce->cce_server_ip.v6 = inc->inc6_faddr;
+ }
+ cce->server_port = inc->inc_fport;
+ if ((cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) &&
+ ((cookie_len & 0x1) == 0)) {
+ cce->server_mss = mss;
+ cce->cookie_len = cookie_len;
+ memcpy(cce->cookie, cookie, cookie_len);
+ cce->disable_time = 0;
+ } else {
+ /* invalid cookie length, disable cce */
+ cce->server_mss = 0;
+ cce->cookie_len = 0;
+ cce->disable_time = getsbinuptime();
+ }
+
+ return (cce);
+}
+
+static void
+tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *ccb,
+ unsigned int limit)
+{
+ struct tcp_fastopen_ccache_entry *cce, *cce_tmp;
+ unsigned int entries;
+
+ CCB_LOCK(ccb);
+ entries = 0;
+ TAILQ_FOREACH_SAFE(cce, &ccb->ccb_entries, cce_link, cce_tmp) {
+ entries++;
+ if (entries > limit)
+ tcp_fastopen_ccache_entry_drop(cce, ccb);
+ }
+ KASSERT(ccb->ccb_num_entries <= limit,
+ ("%s: ccb->ccb_num_entries %d exceeds limit %d", __func__,
+ ccb->ccb_num_entries, limit));
+ if (limit == 0) {
+ KASSERT(TAILQ_EMPTY(&ccb->ccb_entries),
+ ("%s: ccb->ccb_entries not empty", __func__));
+ ccb->ccb_num_entries = -1; /* disable bucket */
+ }
+ CCB_UNLOCK(ccb);
+}
+
+static void
+tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *cce,
+ struct tcp_fastopen_ccache_bucket *ccb)
+{
+
+ CCB_LOCK_ASSERT(ccb);
+
+ TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link);
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list