git: 7d5e02b01577 - main - pf: allow ICMP messages related to an SCTP state to pass

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Sat, 04 Jan 2025 14:49:54 UTC
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=7d5e02b01577047290e937399accc02e6b184ce9

commit 7d5e02b01577047290e937399accc02e6b184ce9
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2024-12-20 13:38:41 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2025-01-04 14:37:41 +0000

    pf: allow ICMP messages related to an SCTP state to pass
    
    Much like we already do for TCP and UDP we should also parse SCTP-in-ICMP
    messages to see if they apply to an SCTP connection we've already allowed. If so
    we should allow the ICMP packet to pass, even if we'd otherwise block it.
    
    Add a test case where we generate an 'ICMP unreachable - need to frag' packet
    and check that it passes through pf.
    
    MFC after:      2 weeks
    Sponsored by:   Orange Business Services
    Differential Revision:  https://reviews.freebsd.org/D48170
---
 sys/netpfil/pf/pf.c          | 137 ++++++++++++++++++++++++++++++++++++++++++-
 tests/sys/netpfil/pf/sctp.sh |  86 +++++++++++++++++++++++++++
 2 files changed, 221 insertions(+), 2 deletions(-)

diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 2a0dd2c3933a..335b192d454d 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -7736,8 +7736,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
 	if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &multi,
 	    &virtual_id, &virtual_type) == 0) {
 		/*
-		 * ICMP query/reply message not related to a TCP/UDP packet.
-		 * Search for an ICMP state.
+		 * ICMP query/reply message not related to a TCP/UDP/SCTP
+		 * packet. Search for an ICMP state.
 		 */
 		ret = pf_icmp_state_lookup(&key, pd, state, pd->dir,
 		    virtual_id, virtual_type, icmp_dir, &iidx,
@@ -8242,6 +8242,139 @@ pf_test_state_icmp(struct pf_kstate **state, struct pf_pdesc *pd,
 			break;
 		}
 #ifdef INET
+		case IPPROTO_SCTP: {
+			struct sctphdr		sh;
+			struct pf_state_peer	*src;
+			int			 copyback = 0;
+
+			if (! pf_pull_hdr(pd->m, pd2.off, &sh, sizeof(sh), NULL, reason,
+			    pd2.af)) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message too short "
+				    "(sctp)\n"));
+				return (PF_DROP);
+			}
+
+			key.af = pd2.af;
+			key.proto = IPPROTO_SCTP;
+			PF_ACPY(&key.addr[pd2.sidx], pd2.src, key.af);
+			PF_ACPY(&key.addr[pd2.didx], pd2.dst, key.af);
+			key.port[pd2.sidx] = sh.src_port;
+			key.port[pd2.didx] = sh.dest_port;
+
+			STATE_LOOKUP(&key, *state, pd);
+
+			if (pd->dir == (*state)->direction) {
+				if (PF_REVERSED_KEY((*state)->key, pd->af))
+					src = &(*state)->src;
+				else
+					src = &(*state)->dst;
+			} else {
+				if (PF_REVERSED_KEY((*state)->key, pd->af))
+					src = &(*state)->dst;
+				else
+					src = &(*state)->src;
+			}
+
+			if (src->scrub->pfss_v_tag != sh.v_tag) {
+				DPFPRINTF(PF_DEBUG_MISC,
+				    ("pf: ICMP error message has incorrect "
+				    "SCTP v_tag\n"));
+				return (PF_DROP);
+			}
+
+			/* translate source/destination address, if necessary */
+			if ((*state)->key[PF_SK_WIRE] !=
+			    (*state)->key[PF_SK_STACK]) {
+
+				struct pf_state_key	*nk;
+
+				if (PF_REVERSED_KEY((*state)->key, pd->af))
+					nk = (*state)->key[pd->sidx];
+				else
+					nk = (*state)->key[pd->didx];
+
+#if defined(INET) && defined(INET6)
+				int	 afto, sidx, didx;
+
+				afto = pd->af != nk->af;
+				sidx = afto ? pd2.didx : pd2.sidx;
+				didx = afto ? pd2.sidx : pd2.didx;
+
+				if (afto) {
+					if (pf_translate_icmp_af(nk->af,
+					    &pd->hdr.icmp))
+						return (PF_DROP);
+					m_copyback(pd->m, pd->off,
+					    sizeof(struct icmp6_hdr),
+					    (c_caddr_t)&pd->hdr.icmp6);
+					if (pf_change_icmp_af(pd->m, ipoff2, pd,
+					    &pd2, &nk->addr[sidx],
+					    &nk->addr[didx], pd->af,
+					    nk->af))
+						return (PF_DROP);
+					if (nk->af == AF_INET)
+						pd->proto = IPPROTO_ICMP;
+					else
+						pd->proto = IPPROTO_ICMPV6;
+					sh.src_port = nk->port[sidx];
+					sh.dest_port = nk->port[didx];
+					m_copyback(pd2.m, pd2.off, sizeof(sh), (c_caddr_t)&sh);
+					PF_ACPY(pd->src,
+					    &nk->addr[pd2.sidx], nk->af);
+					PF_ACPY(pd->dst,
+					    &nk->addr[pd2.didx], nk->af);
+					pd->naf = nk->af;
+					return (PF_AFRT);
+				}
+#endif
+
+				if (PF_ANEQ(pd2.src,
+				    &nk->addr[pd2.sidx], pd2.af) ||
+				    nk->port[pd2.sidx] != sh.src_port)
+					pf_change_icmp(pd2.src, &sh.src_port,
+					    daddr, &nk->addr[pd2.sidx],
+					    nk->port[pd2.sidx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+
+				if (PF_ANEQ(pd2.dst,
+				    &nk->addr[pd2.didx], pd2.af) ||
+				    nk->port[pd2.didx] != sh.dest_port)
+					pf_change_icmp(pd2.dst, &sh.dest_port,
+					    saddr, &nk->addr[pd2.didx],
+					    nk->port[pd2.didx], NULL,
+					    pd2.ip_sum, icmpsum,
+					    pd->ip_sum, 0, pd2.af);
+				copyback = 1;
+			}
+
+			if (copyback) {
+				switch (pd2.af) {
+#ifdef INET
+				case AF_INET:
+					m_copyback(pd->m, pd->off, ICMP_MINLEN,
+					    (caddr_t )&pd->hdr.icmp);
+					m_copyback(pd->m, ipoff2, sizeof(h2),
+					    (caddr_t )&h2);
+					break;
+#endif /* INET */
+#ifdef INET6
+				case AF_INET6:
+					m_copyback(pd->m, pd->off,
+					    sizeof(struct icmp6_hdr),
+					    (caddr_t )&pd->hdr.icmp6);
+					m_copyback(pd->m, ipoff2, sizeof(h2_6),
+					    (caddr_t )&h2_6);
+					break;
+#endif /* INET6 */
+				}
+				m_copyback(pd->m, pd2.off, sizeof(sh), (caddr_t)&sh);
+			}
+
+			return (PF_PASS);
+			break;
+		}
 		case IPPROTO_ICMP: {
 			struct icmp	*iih = &pd2.hdr.icmp;
 
diff --git a/tests/sys/netpfil/pf/sctp.sh b/tests/sys/netpfil/pf/sctp.sh
index 95a780747d82..563103827fac 100644
--- a/tests/sys/netpfil/pf/sctp.sh
+++ b/tests/sys/netpfil/pf/sctp.sh
@@ -745,6 +745,91 @@ timeout_cleanup()
 	pft_cleanup
 }
 
+atf_test_case "related_icmp" "cleanup"
+related_icmp_head()
+{
+	atf_set descr 'Verify that ICMP messages related to an SCTP connection are allowed'
+	atf_set require.user root
+}
+
+related_icmp_body()
+{
+	sctp_init
+
+	epair_cl=$(vnet_mkepair)
+	epair_rtr=$(vnet_mkepair)
+	epair_srv=$(vnet_mkepair)
+
+	ifconfig ${epair_cl}a 192.0.2.1/24 up
+	route add default 192.0.2.2
+
+	vnet_mkjail rtr ${epair_cl}b ${epair_rtr}a
+	jexec rtr ifconfig ${epair_cl}b 192.0.2.2/24 up
+	jexec rtr ifconfig ${epair_rtr}a 198.51.100.1/24 up
+	jexec rtr sysctl net.inet.ip.forwarding=1
+	jexec rtr route add default 198.51.100.2
+
+	vnet_mkjail rtr2 ${epair_rtr}b ${epair_srv}a
+	jexec rtr2 ifconfig ${epair_rtr}b 198.51.100.2/24 up
+	jexec rtr2 ifconfig ${epair_srv}a 203.0.113.1/24 up
+	jexec rtr2 ifconfig ${epair_srv}a mtu 1300
+	jexec rtr2 sysctl net.inet.ip.forwarding=1
+	jexec rtr2 route add default 198.51.100.1
+
+	vnet_mkjail srv ${epair_srv}b
+	jexec srv ifconfig ${epair_srv}b 203.0.113.2/24 up
+	jexec srv ifconfig ${epair_srv}b mtu 1300
+	jexec srv route add default 203.0.113.1
+
+	# Sanity checks
+	atf_check -s exit:0 -o ignore \
+	    ping -c 1 192.0.2.2
+	atf_check -s exit:0 -o ignore \
+	    ping -c 1 198.51.100.1
+	atf_check -s exit:0 -o ignore \
+	    ping -c 1 198.51.100.2
+	atf_check -s exit:0 -o ignore \
+	    ping -c 1 203.0.113.1
+	atf_check -s exit:0 -o ignore \
+	    ping -c 1 203.0.113.2
+
+	jexec rtr pfctl -e
+	pft_set_rules rtr \
+	    "block proto icmp" \
+	    "pass proto sctp"
+
+	# Make sure SCTP traffic passes
+	echo "foo" | jexec srv nc --sctp -N -l 1234 &
+	sleep 1
+
+	out=$(nc --sctp -N -w 3 203.0.113.2 1234)
+	if [ "$out" != "foo" ]; then
+		jexec rtr pfctl -ss -vv
+		jexec rtr pfctl -sr -vv
+		atf_fail "SCTP connection failed"
+	fi
+
+	# Do we see ICMP traffic if we send overly large traffic?
+	echo "foo" | jexec srv nc --sctp -N -l 1234 >/dev/null &
+	sleep 1
+
+	atf_check -s exit:0 -o not-match:".*destination unreachable:.*" \
+	    netstat -s -p icmp
+
+	# Generate traffic that will be fragmented by rtr2, and will provoke an
+	# ICMP unreachable - need to frag (mtu 1300) message
+	dd if=/dev/random bs=1600 count=1 | nc --sctp -N -w 3 203.0.113.2 1234
+
+	# We'd expect to see an ICMP message
+	atf_check -s exit:0 -o match:".*destination unreachable: 1" \
+	    netstat -s -p icmp
+}
+
+related_icmp_cleanup()
+{
+	pft_cleanup
+}
+
 atf_init_test_cases()
 {
 	atf_add_test_case "basic_v4"
@@ -757,4 +842,5 @@ atf_init_test_cases()
 	atf_add_test_case "rdr_v4"
 	atf_add_test_case "pfsync"
 	atf_add_test_case "timeout"
+	atf_add_test_case "related_icmp"
 }