git: 010ee43f5673 - main - pf: initial SCTP support

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Fri, 21 Jul 2023 10:32:37 UTC
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=010ee43f5673eea4c86f846893eadc3c5529b2f8

commit 010ee43f5673eea4c86f846893eadc3c5529b2f8
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2023-04-27 08:58:02 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2023-07-21 10:32:18 +0000

    pf: initial SCTP support
    
    Basic state tracking for SCTP. This means we scan through the packet to
    identify the different chunks (so we can identify state changes).
    
    MFC after:      3 weeks
    Sponsored by:   Orange Business Services
    Differential Revision:  https://reviews.freebsd.org/D40862
---
 sbin/pfctl/pf_print_state.c |  34 +++++++++
 sys/net/pfvar.h             |  13 ++++
 sys/netpfil/pf/pf.c         | 135 +++++++++++++++++++++++++++++++++
 sys/netpfil/pf/pf_norm.c    | 180 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 362 insertions(+)

diff --git a/sbin/pfctl/pf_print_state.c b/sbin/pfctl/pf_print_state.c
index f0ad9a427006..cabfc78de97a 100644
--- a/sbin/pfctl/pf_print_state.c
+++ b/sbin/pfctl/pf_print_state.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 #include <net/if.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
+#include <netinet/sctp.h>
 #include <net/pfvar.h>
 #include <arpa/inet.h>
 #include <netdb.h>
@@ -206,6 +207,36 @@ print_seq(struct pfctl_state_peer *p)
 		    p->seqhi - p->seqlo);
 }
 
+
+static const char *
+sctp_state_name(int state)
+{
+	switch (state) {
+	case SCTP_CLOSED:
+		return ("CLOSED");
+	case SCTP_BOUND:
+		return ("BOUND");
+	case SCTP_LISTEN:
+		return ("LISTEN");
+	case SCTP_COOKIE_WAIT:
+		return ("COOKIE_WAIT");
+	case SCTP_COOKIE_ECHOED:
+		return ("COOKIE_ECHOED");
+	case SCTP_ESTABLISHED:
+		return ("ESTABLISHED");
+	case SCTP_SHUTDOWN_SENT:
+		return ("SHUTDOWN_SENT");
+	case SCTP_SHUTDOWN_RECEIVED:
+		return ("SHUTDOWN_RECEIVED");
+	case SCTP_SHUTDOWN_ACK_SENT:
+		return ("SHUTDOWN_ACK_SENT");
+	case SCTP_SHUTDOWN_PENDING:
+		return ("SHUTDOWN_PENDING");
+	default:
+		return ("?");
+	}
+}
+
 void
 print_state(struct pfctl_state *s, int opts)
 {
@@ -300,6 +331,9 @@ print_state(struct pfctl_state *s, int opts)
 		const char *states[] = PFUDPS_NAMES;
 
 		printf("   %s:%s\n", states[src->state], states[dst->state]);
+	} else if (proto == IPPROTO_SCTP) {
+		printf("   %s:%s\n", sctp_state_name(src->state),
+		    sctp_state_name(dst->state));
 #ifndef INET6
 	} else if (proto != IPPROTO_ICMP && src->state < PFOTHERS_NSTATES &&
 	    dst->state < PFOTHERS_NSTATES) {
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index e0c5fb5214a8..86e15f7c1709 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -60,6 +60,7 @@
 #include <netinet/ip.h>
 #include <netinet/tcp.h>
 #include <netinet/udp.h>
+#include <netinet/sctp.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp6.h>
 #endif
@@ -1541,6 +1542,7 @@ struct pf_pdesc {
 	union pf_headers {
 		struct tcphdr		tcp;
 		struct udphdr		udp;
+		struct sctphdr		sctp;
 		struct icmp		icmp;
 #ifdef INET6
 		struct icmp6_hdr	icmp6;
@@ -1570,6 +1572,15 @@ struct pf_pdesc {
 	u_int8_t	 dir;		/* direction */
 	u_int8_t	 sidx;		/* key index for source */
 	u_int8_t	 didx;		/* key index for destination */
+#define PFDESC_SCTP_INIT	0x0001
+#define PFDESC_SCTP_INIT_ACK	0x0002
+#define PFDESC_SCTP_COOKIE	0x0004
+#define PFDESC_SCTP_ABORT	0x0008
+#define PFDESC_SCTP_SHUTDOWN	0x0010
+#define PFDESC_SCTP_SHUTDOWN_COMPLETE	0x0020
+#define PFDESC_SCTP_DATA	0x0040
+#define PFDESC_SCTP_OTHER	0x0080
+	u_int16_t	 sctp_flags;
 };
 #endif
 
@@ -2270,6 +2281,8 @@ int	pf_normalize_tcp_init(struct mbuf *, int, struct pf_pdesc *,
 int	pf_normalize_tcp_stateful(struct mbuf *, int, struct pf_pdesc *,
 	    u_short *, struct tcphdr *, struct pf_kstate *,
 	    struct pf_state_peer *, struct pf_state_peer *, int *);
+int	pf_normalize_sctp(int, struct pfi_kkif *, struct mbuf *, int,
+	    int, void *, struct pf_pdesc *);
 u_int32_t
 	pf_state_expires(const struct pf_kstate *);
 void	pf_purge_expired_fragments(void);
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 490a4130f025..a721385f761f 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -309,6 +309,9 @@ static int		 pf_test_state_udp(struct pf_kstate **,
 static int		 pf_test_state_icmp(struct pf_kstate **,
 			    struct pfi_kkif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
+static int		 pf_test_state_sctp(struct pf_kstate **,
+			    struct pfi_kkif *, struct mbuf *, int,
+			    void *, struct pf_pdesc *, u_short *);
 static int		 pf_test_state_other(struct pf_kstate **,
 			    struct pfi_kkif *, struct mbuf *, struct pf_pdesc *);
 static u_int16_t	 pf_calc_mss(struct pf_addr *, sa_family_t,
@@ -4244,6 +4247,11 @@ pf_test_rule(struct pf_krule **rm, struct pf_kstate **sm, struct pfi_kkif *kif,
 		dport = pd->hdr.udp.uh_dport;
 		hdrlen = sizeof(pd->hdr.udp);
 		break;
+	case IPPROTO_SCTP:
+		sport = pd->hdr.sctp.src_port;
+		dport = pd->hdr.sctp.dest_port;
+		hdrlen = sizeof(pd->hdr.sctp);
+		break;
 #ifdef INET
 	case IPPROTO_ICMP:
 		if (pd->af != AF_INET)
@@ -4702,6 +4710,11 @@ pf_create_state(struct pf_krule *r, struct pf_krule *nr, struct pf_krule *a,
 		pf_set_protostate(s, PF_PEER_DST, PFUDPS_NO_TRAFFIC);
 		s->timeout = PFTM_UDP_FIRST_PACKET;
 		break;
+	case IPPROTO_SCTP:
+		pf_set_protostate(s, PF_PEER_SRC, SCTP_COOKIE_WAIT);
+		pf_set_protostate(s, PF_PEER_DST, SCTP_CLOSED);
+		s->timeout = PFTM_TCP_FIRST_PACKET;
+		break;
 	case IPPROTO_ICMP:
 #ifdef INET6
 	case IPPROTO_ICMPV6:
@@ -5669,6 +5682,66 @@ pf_test_state_udp(struct pf_kstate **state, struct pfi_kkif *kif,
 	return (PF_PASS);
 }
 
+static int
+pf_test_state_sctp(struct pf_kstate **state, struct pfi_kkif *kif,
+    struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
+{
+	struct pf_state_key_cmp	 key;
+	struct pf_state_peer	*src; //, *dst;
+	struct sctphdr		*sh = &pd->hdr.sctp;
+	u_int8_t		 psrc; //, pdst;
+
+	bzero(&key, sizeof(key));
+	key.af = pd->af;
+	key.proto = IPPROTO_SCTP;
+	if (pd->dir == PF_IN)	{	/* wire side, straight */
+		PF_ACPY(&key.addr[0], pd->src, key.af);
+		PF_ACPY(&key.addr[1], pd->dst, key.af);
+		key.port[0] = sh->src_port;
+		key.port[1] = sh->dest_port;
+	} else {			/* stack side, reverse */
+		PF_ACPY(&key.addr[1], pd->src, key.af);
+		PF_ACPY(&key.addr[0], pd->dst, key.af);
+		key.port[1] = sh->src_port;
+		key.port[0] = sh->dest_port;
+	}
+
+	STATE_LOOKUP(kif, &key, *state, pd);
+
+	if (pd->dir == (*state)->direction) {
+		src = &(*state)->src;
+		psrc = PF_PEER_SRC;
+	} else {
+		src = &(*state)->dst;
+		psrc = PF_PEER_DST;
+	}
+
+	/* Track state. */
+	if (pd->sctp_flags & PFDESC_SCTP_INIT) {
+		if (src->state < SCTP_COOKIE_WAIT) {
+			pf_set_protostate(*state, psrc, SCTP_COOKIE_WAIT);
+			(*state)->timeout = PFTM_TCP_OPENING;
+		}
+	}
+	if (pd->sctp_flags & PFDESC_SCTP_COOKIE) {
+		if (src->state < SCTP_ESTABLISHED) {
+			pf_set_protostate(*state, psrc, SCTP_ESTABLISHED);
+			(*state)->timeout = PFTM_TCP_ESTABLISHED;
+		}
+	}
+	if (pd->sctp_flags & (PFDESC_SCTP_SHUTDOWN | PFDESC_SCTP_ABORT |
+	    PFDESC_SCTP_SHUTDOWN_COMPLETE)) {
+		if (src->state < SCTP_SHUTDOWN_PENDING) {
+			pf_set_protostate(*state, psrc, SCTP_SHUTDOWN_PENDING);
+			(*state)->timeout = PFTM_TCP_CLOSING;
+		}
+	}
+
+	(*state)->expire = time_uptime;
+
+	return (PF_PASS);
+}
+
 static int
 pf_test_state_icmp(struct pf_kstate **state, struct pfi_kkif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
@@ -7365,6 +7438,37 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0,
 		break;
 	}
 
+	case IPPROTO_SCTP: {
+		if (!pf_pull_hdr(m, off, &pd.hdr.sctp, sizeof(pd.hdr.sctp),
+		    &action, &reason, AF_INET)) {
+			if (action != PF_PASS)
+				pd.act.log |= PF_LOG_FORCE;
+			goto done;
+		}
+		pd.sport = &pd.hdr.sctp.src_port;
+		pd.dport = &pd.hdr.sctp.dest_port;
+		if (pd.hdr.sctp.src_port == 0 || pd.hdr.sctp.dest_port == 0) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_SHORT);
+			goto done;
+		}
+		action = pf_normalize_sctp(dir, kif, m, 0, off, h, &pd);
+		if (action == PF_DROP)
+			goto done;
+		action = pf_test_state_sctp(&s, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+			if (V_pfsync_update_state_ptr != NULL)
+				V_pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+		} else {
+			action = pf_test_rule(&r, &s, kif, m, off,
+			    &pd, &a, &ruleset, inp);
+		}
+		break;
+	}
+
 	case IPPROTO_ICMP: {
 		if (!pf_pull_hdr(m, off, &pd.hdr.icmp, ICMP_MINLEN,
 		    &action, &reason, AF_INET)) {
@@ -7882,6 +7986,37 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb
 		break;
 	}
 
+	case IPPROTO_SCTP: {
+		if (!pf_pull_hdr(m, off, &pd.hdr.sctp, sizeof(pd.hdr.sctp),
+		    &action, &reason, AF_INET6)) {
+			if (action != PF_PASS)
+				pd.act.log |= PF_LOG_FORCE;
+			goto done;
+		}
+		pd.sport = &pd.hdr.sctp.src_port;
+		pd.dport = &pd.hdr.sctp.dest_port;
+		if (pd.hdr.sctp.src_port == 0 || pd.hdr.sctp.dest_port == 0) {
+			action = PF_DROP;
+			REASON_SET(&reason, PFRES_SHORT);
+			goto done;
+		}
+		action = pf_normalize_sctp(dir, kif, m, 0, off, h, &pd);
+		if (action == PF_DROP)
+			goto done;
+		action = pf_test_state_sctp(&s, kif, m, off, h, &pd,
+		    &reason);
+		if (action == PF_PASS) {
+			if (V_pfsync_update_state_ptr != NULL)
+				V_pfsync_update_state_ptr(s);
+			r = s->rule.ptr;
+			a = s->anchor.ptr;
+		} else {
+			action = pf_test_rule(&r, &s, kif, m, off,
+			    &pd, &a, &ruleset, inp);
+		}
+		break;
+	}
+
 	case IPPROTO_ICMP: {
 		action = PF_DROP;
 		DPFPRINTF(PF_DEBUG_MISC,
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index 5f6d9c1635cd..38312712a0ad 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -56,6 +56,8 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
+#include <netinet/sctp_constants.h>
+#include <netinet/sctp_header.h>
 
 #ifdef INET6
 #include <netinet/ip6.h>
@@ -2016,6 +2018,184 @@ pf_normalize_mss(struct mbuf *m, int off, struct pf_pdesc *pd)
 	return (0);
 }
 
+static int
+pf_scan_sctp(struct mbuf *m, int ipoff, int off, struct pf_pdesc *pd)
+{
+	struct sctp_chunkhdr ch = { };
+	int chunk_off = sizeof(struct sctphdr);
+	int chunk_start;
+
+	while (off + chunk_off < pd->tot_len) {
+		if (!pf_pull_hdr(m, off + chunk_off, &ch, sizeof(ch), NULL,
+		    NULL, pd->af))
+			return (PF_DROP);
+
+		/* Length includes the header, this must be at least 4. */
+		if (ntohs(ch.chunk_length) < 4)
+			return (PF_DROP);
+
+		chunk_start = chunk_off;
+		chunk_off += roundup(ntohs(ch.chunk_length), 4);
+
+		switch (ch.chunk_type) {
+		case SCTP_INITIATION: {
+			struct sctp_init_chunk init;
+
+			if (!pf_pull_hdr(m, off + chunk_start, &init,
+			    sizeof(init), NULL, NULL, pd->af))
+				return (PF_DROP);
+
+			/*
+			 * RFC 9620, Section 3.3.2, "The Initiate Tag is allowed to have
+			 * any value except 0."
+			 */
+			if (init.init.initiate_tag == 0)
+				return (PF_DROP);
+			if (init.init.num_inbound_streams == 0)
+				return (PF_DROP);
+			if (init.init.num_outbound_streams == 0)
+				return (PF_DROP);
+			if (ntohl(init.init.a_rwnd) < SCTP_MIN_RWND)
+				return (PF_DROP);
+
+			/*
+			 * RFC 9260, Section 3.1, INIT chunks MUST have zero
+			 * verification tag.
+			 */
+			if (pd->hdr.sctp.v_tag != 0)
+				return (PF_DROP);
+
+			pd->sctp_flags |= PFDESC_SCTP_INIT;
+			break;
+		}
+		case SCTP_INITIATION_ACK:
+			pd->sctp_flags |= PFDESC_SCTP_INIT_ACK;
+			break;
+		case SCTP_ABORT_ASSOCIATION:
+			pd->sctp_flags |= PFDESC_SCTP_ABORT;
+			break;
+		case SCTP_SHUTDOWN:
+		case SCTP_SHUTDOWN_ACK:
+			pd->sctp_flags |= PFDESC_SCTP_SHUTDOWN;
+			break;
+		case SCTP_SHUTDOWN_COMPLETE:
+			pd->sctp_flags |= PFDESC_SCTP_SHUTDOWN_COMPLETE;
+			break;
+		case SCTP_COOKIE_ECHO:
+		case SCTP_COOKIE_ACK:
+			pd->sctp_flags |= PFDESC_SCTP_COOKIE;
+			break;
+		case SCTP_DATA:
+			pd->sctp_flags |= PFDESC_SCTP_DATA;
+			break;
+		default:
+			pd->sctp_flags |= PFDESC_SCTP_OTHER;
+			break;
+		}
+	}
+
+	/* Validate chunk lengths vs. packet length. */
+	if (off + chunk_off != pd->tot_len)
+		return (PF_DROP);
+
+	/*
+	 * INIT, INIT_ACK or SHUTDOWN_COMPLETE chunks must always be the only
+	 * one in a packet.
+	 */
+	if ((pd->sctp_flags & PFDESC_SCTP_INIT) &&
+	    (pd->sctp_flags & ~PFDESC_SCTP_INIT))
+		return (PF_DROP);
+	if ((pd->sctp_flags & PFDESC_SCTP_INIT_ACK) &&
+	    (pd->sctp_flags & ~PFDESC_SCTP_INIT_ACK))
+		return (PF_DROP);
+	if ((pd->sctp_flags & PFDESC_SCTP_SHUTDOWN_COMPLETE) &&
+	    (pd->sctp_flags & ~PFDESC_SCTP_SHUTDOWN_COMPLETE))
+		return (PF_DROP);
+
+	return (PF_PASS);
+}
+
+int
+pf_normalize_sctp(int dir, struct pfi_kkif *kif, struct mbuf *m, int ipoff,
+    int off, void *h, struct pf_pdesc *pd)
+{
+	struct pf_krule	*r, *rm = NULL;
+	struct sctphdr	*sh = &pd->hdr.sctp;
+	u_short		 reason;
+	sa_family_t	 af = pd->af;
+	int		 srs;
+
+	PF_RULES_RASSERT();
+
+	/* Unconditionally scan the SCTP packet, because we need to look for
+	 * things like shutdown and asconf chunks. */
+	if (pf_scan_sctp(m, ipoff, off, pd) != PF_PASS)
+		goto sctp_drop;
+
+	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
+	/* Check if there any scrub rules. Lack of scrub rules means enforced
+	 * packet normalization operation just like in OpenBSD. */
+	srs = (r != NULL);
+	while (r != NULL) {
+		pf_counter_u64_add(&r->evaluations, 1);
+		if (pfi_kkif_match(r->kif, kif) == r->ifnot)
+			r = r->skip[PF_SKIP_IFP].ptr;
+		else if (r->direction && r->direction != dir)
+			r = r->skip[PF_SKIP_DIR].ptr;
+		else if (r->af && r->af != af)
+			r = r->skip[PF_SKIP_AF].ptr;
+		else if (r->proto && r->proto != pd->proto)
+			r = r->skip[PF_SKIP_PROTO].ptr;
+		else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
+		    r->src.neg, kif, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_SRC_ADDR].ptr;
+		else if (r->src.port_op && !pf_match_port(r->src.port_op,
+			    r->src.port[0], r->src.port[1], sh->src_port))
+			r = r->skip[PF_SKIP_SRC_PORT].ptr;
+		else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
+		    r->dst.neg, NULL, M_GETFIB(m)))
+			r = r->skip[PF_SKIP_DST_ADDR].ptr;
+		else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
+			    r->dst.port[0], r->dst.port[1], sh->dest_port))
+			r = r->skip[PF_SKIP_DST_PORT].ptr;
+		else {
+			rm = r;
+			break;
+		}
+	}
+
+	if (srs) {
+		/* With scrub rules present SCTP normalization happens only
+		 * if one of rules has matched and it's not a "no scrub" rule */
+		if (rm == NULL || rm->action == PF_NOSCRUB)
+			return (PF_PASS);
+
+		pf_counter_u64_critical_enter();
+		pf_counter_u64_add_protected(&r->packets[dir == PF_OUT], 1);
+		pf_counter_u64_add_protected(&r->bytes[dir == PF_OUT], pd->tot_len);
+		pf_counter_u64_critical_exit();
+	}
+
+	/* Verify we're a multiple of 4 bytes long */
+	if ((pd->tot_len - off - sizeof(struct sctphdr)) % 4)
+		goto sctp_drop;
+
+	/* INIT chunk needs to be the only chunk */
+	if (pd->sctp_flags & PFDESC_SCTP_INIT)
+		if (pd->sctp_flags & ~PFDESC_SCTP_INIT)
+			goto sctp_drop;
+
+	return (PF_PASS);
+
+sctp_drop:
+	REASON_SET(&reason, PFRES_NORM);
+	if (rm != NULL && r->log)
+		PFLOG_PACKET(kif, m, AF_INET, reason, r, NULL, NULL, pd,
+		    1);
+
+	return (PF_DROP);
+}
+
 #ifdef INET
 void
 pf_scrub_ip(struct mbuf **m0, struct pf_pdesc *pd)