git: c68eed82a3dc - main - accf_tls: accept filter that waits for TLS handshake header

From: Gleb Smirnoff <glebius_at_FreeBSD.org>
Date: Thu, 25 Apr 2024 03:19:49 UTC
The branch main has been updated by glebius:

URL: https://cgit.FreeBSD.org/src/commit/?id=c68eed82a3dcadf0c826e9e150f59769f4c44f24

commit c68eed82a3dcadf0c826e9e150f59769f4c44f24
Author:     Gleb Smirnoff <glebius@FreeBSD.org>
AuthorDate: 2024-04-24 20:36:43 +0000
Commit:     Gleb Smirnoff <glebius@FreeBSD.org>
CommitDate: 2024-04-25 00:53:10 +0000

    accf_tls: accept filter that waits for TLS handshake header
---
 share/man/man9/Makefile       |   1 +
 share/man/man9/accf_tls.9     |  95 +++++++++++++++++++++++++++++++++++++
 sys/conf/NOTES                |   1 +
 sys/conf/files                |   1 +
 sys/conf/options              |   1 +
 sys/modules/Makefile          |   1 +
 sys/modules/accf_tls/Makefile |   7 +++
 sys/netinet/accf_tls.c        | 106 ++++++++++++++++++++++++++++++++++++++++++
 tests/sys/kern/socket_accf.c  |  61 +++++++++++++++++++++++-
 9 files changed, 273 insertions(+), 1 deletion(-)

diff --git a/share/man/man9/Makefile b/share/man/man9/Makefile
index b9fbc624d1ec..a5fa777d037c 100644
--- a/share/man/man9/Makefile
+++ b/share/man/man9/Makefile
@@ -5,6 +5,7 @@ MAN=	accept_filter.9 \
 	accf_data.9 \
 	accf_dns.9 \
 	accf_http.9 \
+	accf_tls.9 \
 	acl.9 \
 	alq.9 \
 	altq.9 \
diff --git a/share/man/man9/accf_tls.9 b/share/man/man9/accf_tls.9
new file mode 100644
index 000000000000..331ea2aa4fb8
--- /dev/null
+++ b/share/man/man9/accf_tls.9
@@ -0,0 +1,95 @@
+.\"
+.\" Copyright (c) 2024 Gleb Smirnoff <glebius@FreeBSD.org>
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE DEVELOPERS ``AS IS'' AND ANY EXPRESS OR
+.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+.\" IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\" "
+.Dd April 24, 2024
+.Dt ACCF_TLS 9
+.Os
+.Sh NAME
+.Nm accf_tls
+.Nd "buffer incoming connections until a TLS handshake like requests arrive"
+.Sh SYNOPSIS
+.Nm options INET
+.Nm options ACCEPT_FILTER_TLS
+.Nm kldload accf_tls
+.Sh DESCRIPTION
+This is a filter to be placed on a socket that will be using
+.Fn accept 2
+to receive incoming HTTPS connections.
+It prevents the application from receiving the connected descriptor via
+.Fn accept 2
+until a full TLS handshake has been buffered by the kernel.
+The
+.Nm
+will first check that byte at offset 0 is
+.Va 0x16 ,
+which matches handshake type.
+Then it will read 2-byte request length value at offset 3 and will
+continue reading until reading the entire length of the handshake is buffered.
+If something other than
+.Va 0x16
+is at offset 0, the kernel will allow the application to receive the
+connection descriptor via
+.Fn accept 2 .
+.Pp
+The utility of
+.Nm
+is such that a server will not have to context switch several times
+before performing the initial parsing of the request.
+This effectively reduces the amount of required CPU utilization
+to handle incoming requests by keeping active
+processes in preforking servers such as Apache low
+and reducing the size of the file descriptor set that needs
+to be managed by interfaces such as
+.Fn select ,
+.Fn poll
+or
+.Fn kevent
+based servers.
+.Sh EXAMPLES
+Assuming ACCEPT_FILTER_TLS has been included in the kernel config
+file or the
+.Nm
+module
+has been loaded, this will enable the TLS accept filter
+on the socket
+.Fa sok .
+.Bd -literal -offset 0i
+	struct accept_filter_arg afa;
+
+	bzero(&afa, sizeof(afa));
+	strcpy(afa.af_name, "tlsready");
+	setsockopt(sok, SOL_SOCKET, SO_ACCEPTFILTER, &afa, sizeof(afa));
+.Ed
+.Sh SEE ALSO
+.Xr setsockopt 2 ,
+.Xr accept_filter 9
+.Sh HISTORY
+The
+.Nm
+accept filter was introduced in
+.Fx 15.0 .
+.Sh AUTHORS
+The
+.Nm
+filter was written by
+.An Maksim Yevmenkin .
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index cdeee4eb7fd6..216a96c2073c 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -1066,6 +1066,7 @@ options 	MBUF_PROFILING
 options 	ACCEPT_FILTER_DATA
 options 	ACCEPT_FILTER_DNS
 options 	ACCEPT_FILTER_HTTP
+options		ACCEPT_FILTER_TLS
 
 # TCP_SIGNATURE adds support for RFC 2385 (TCP-MD5) digests. These are
 # carried in TCP option 19. This option is commonly used to protect
diff --git a/sys/conf/files b/sys/conf/files
index 59d99b9f832b..29c02a503027 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4299,6 +4299,7 @@ netgraph/ng_vlan_rotate.c	optional netgraph_vlan_rotate
 netinet/accf_data.c		optional accept_filter_data inet
 netinet/accf_dns.c		optional accept_filter_dns inet
 netinet/accf_http.c		optional accept_filter_http inet
+netinet/accf_tls.c		optional accept_filter_tls inet
 netinet/if_ether.c		optional inet ether
 netinet/igmp.c			optional inet
 netinet/in.c			optional inet
diff --git a/sys/conf/options b/sys/conf/options
index 4e9d8a5b0741..fcab21ad7e78 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -425,6 +425,7 @@ ISP_FCTAPE_OFF		opt_isp.h
 ACCEPT_FILTER_DATA
 ACCEPT_FILTER_DNS
 ACCEPT_FILTER_HTTP
+ACCEPT_FILTER_TLS
 ALTQ			opt_global.h
 ALTQ_CBQ		opt_altq.h
 ALTQ_CDNR		opt_altq.h
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index d83a75ef2d4d..8af12fc02860 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -20,6 +20,7 @@ SUBDIR=	\
 	accf_data \
 	accf_dns \
 	accf_http \
+	accf_tls \
 	acl_nfs4 \
 	acl_posix1e \
 	${_acpi} \
diff --git a/sys/modules/accf_tls/Makefile b/sys/modules/accf_tls/Makefile
new file mode 100644
index 000000000000..f45b2f057f0a
--- /dev/null
+++ b/sys/modules/accf_tls/Makefile
@@ -0,0 +1,7 @@
+
+.PATH: ${SRCTOP}/sys/netinet
+
+KMOD=	accf_tls
+SRCS=	accf_tls.c
+
+.include <bsd.kmod.mk>
diff --git a/sys/netinet/accf_tls.c b/sys/netinet/accf_tls.c
new file mode 100644
index 000000000000..9f1ed7000474
--- /dev/null
+++ b/sys/netinet/accf_tls.c
@@ -0,0 +1,106 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018-2024 Netflix
+ * Author: Maksim Yevmenkin <maksim.yevmenkin@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#define ACCEPT_FILTER_MOD
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/signalvar.h>
+#include <sys/sysctl.h>
+#include <sys/socketvar.h>
+
+static int sbfull(struct sockbuf *sb);
+static uint8_t sbmget8(struct mbuf *m, int offset);
+static int so_hastls(struct socket *so, void *arg, int waitflag);
+
+ACCEPT_FILTER_DEFINE(accf_tls, "tlsready", so_hastls, NULL, NULL, 1);
+
+static int
+sbfull(struct sockbuf *sb)
+{
+
+	return (sbused(sb) >= sb->sb_hiwat || sb->sb_mbcnt >= sb->sb_mbmax);
+}
+
+static uint8_t
+sbmget8(struct mbuf *m, int offset)
+{
+	struct mbuf *n = m->m_nextpkt;
+
+	while (m != NULL && offset >= m->m_len) {
+		offset -= m->m_len;
+		m = m->m_next;
+		if (m == NULL) {
+			m = n;
+			n = m->m_nextpkt;
+		}
+	}
+
+	return *(mtod(m, uint8_t *) + offset);
+}
+
+static int
+so_hastls(struct socket *so, void *arg, int waitflag)
+{
+	struct sockbuf	*sb = &so->so_rcv;
+	int		avail;
+	uint16_t	reclen;
+
+	if ((sb->sb_state & SBS_CANTRCVMORE) || sbfull(sb))
+		return (SU_ISCONNECTED); /* can't wait any longer */
+
+	/*
+	 * struct {
+	 * 	ContentType type;		- 1 byte, 0x16 handshake
+	 * 	ProtocolVersion version;	- 2 bytes (major, minor)
+	 * 	uint16 length;			- 2 bytes, NBO, 2^14 max
+	 * 	opaque fragment[TLSPlaintext.length];
+	 * } TLSPlaintext;
+	 */
+
+	/* Did we get at least 5 bytes */
+	avail = sbavail(sb);
+	if (avail < 5)
+		return (SU_OK); /* nope */
+
+	/* Does this look like TLS handshake? */
+	if (sbmget8(sb->sb_mb, 0) != 0x16)
+		return (SU_ISCONNECTED); /* nope */
+
+	/* Did we get a complete TLS record? */
+	reclen  = (uint16_t) sbmget8(sb->sb_mb, 3) << 8;
+	reclen |= (uint16_t) sbmget8(sb->sb_mb, 4);
+
+	if (reclen <= 16384 && avail < (int) 5 + reclen)
+		return (SU_OK); /* nope */
+
+	return (SU_ISCONNECTED);
+}
diff --git a/tests/sys/kern/socket_accf.c b/tests/sys/kern/socket_accf.c
index 384062810c8d..747bcda87010 100644
--- a/tests/sys/kern/socket_accf.c
+++ b/tests/sys/kern/socket_accf.c
@@ -1,7 +1,7 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
- * Copyright (c) 2022 Gleb Smirnoff <glebius@FreeBSD.org>
+ * Copyright (c) 2022-2024 Gleb Smirnoff <glebius@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,6 +29,7 @@
 #include <netinet/in.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <stdlib.h>
 
 #include <atf-c.h>
 
@@ -151,10 +152,68 @@ ATF_TC_BODY(http, tc)
 	ATF_REQUIRE((a = accept(l, NULL, 0)) > 0);
 }
 
+ATF_TC_WITHOUT_HEAD(tls);
+ATF_TC_BODY(tls, tc)
+{
+	struct accept_filter_arg afa = {
+		.af_name = "tlsready"
+	};
+	struct sockaddr_in sin;
+	int l, s, a;
+
+	l = listensock(&sin);
+	accfon(l, &afa);
+	s = clientsock(&sin);
+
+	/* 1) No data. */
+	ATF_REQUIRE(accept(l, NULL, 0) == -1);
+	ATF_REQUIRE(errno == EAGAIN);
+
+	/* 2) Less than 5 bytes. */
+	ATF_REQUIRE(usend(s, "foo", sizeof("foo")) == sizeof("foo"));
+	ATF_REQUIRE(errno == EAGAIN);
+
+	/* 3) Something that doesn't look like TLS handshake. */
+	ATF_REQUIRE(usend(s, "bar", sizeof("bar")) == sizeof("bar"));
+	ATF_REQUIRE((a = accept(l, NULL, 0)) > 0);
+
+	close(s);
+	close(a);
+
+	/* 4) Partial TLS record. */
+	s = clientsock(&sin);
+	struct {
+		uint8_t  type;
+		uint16_t version;
+		uint16_t length;
+	} __attribute__((__packed__)) header = {
+		.type = 0x16,
+		.length = htons((uint16_t)(arc4random() % 16384)),
+	};
+	_Static_assert(sizeof(header) == 5, "");
+	ATF_REQUIRE(usend(s, &header, sizeof(header)) == sizeof(header));
+	ssize_t sent = 0;
+	do {
+		size_t len;
+		char *buf;
+
+		ATF_REQUIRE(accept(l, NULL, 0) == -1);
+		ATF_REQUIRE(errno == EAGAIN);
+
+		len = arc4random() % 1024;
+		buf = alloca(len);
+		ATF_REQUIRE(usend(s, buf, len) == (ssize_t)len);
+		sent += len;
+	} while (sent < ntohs(header.length));
+	/* TLS header with bytes >= declared length. */
+	ATF_REQUIRE((a = accept(l, NULL, 0)) > 0);
+}
+
 ATF_TP_ADD_TCS(tp)
 {
 	ATF_TP_ADD_TC(tp, data);
 	ATF_TP_ADD_TC(tp, http);
+	ATF_TP_ADD_TC(tp, tls);
 
 	return (atf_no_error());
 }