git: 5de77e952a2f - main - pf: remove the last hand-rolled IPv6 extension header loop

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Thu, 10 Oct 2024 12:37:19 UTC
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=5de77e952a2f497722bb204dcd8b5e669adc1dd7

commit 5de77e952a2f497722bb204dcd8b5e669adc1dd7
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2024-09-30 14:59:24 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2024-10-10 12:10:40 +0000

    pf: remove the last hand-rolled IPv6 extension header loop
    
    Replace the IPv6 header walking loop in pf_test_state_icmp() with
    the common function pf_walk_header6().  For that, pf_walk_header6()
    can now extract both the information wether it is a fragment and
    the final protocol if it is the first fragment.  This allows to
    match the icmp6 too big packet of a first fragment to the reassembled
    packet's state.  This is neccesary if a refragmented fragment is
    to big for the Path-MTU.
    Note that pd.proto contains the real protocol number for the first
    fragment and IPPROTO_FRAGMENT for later fragments.  pd.virtual_protocol
    is set to PF_VPROTO_FRAGMENT for all fragments.
    ok mcbride@
    
    Obtained from:  OpenBSD, bluhm <bluhm@openbsd.org>, 90b3c57e94
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    Differential Revision:  https://reviews.freebsd.org/D46931
---
 sys/net/pfvar.h          |   6 +--
 sys/netpfil/pf/pf.c      | 130 +++++++++++++++++++++++++++--------------------
 sys/netpfil/pf/pf_norm.c |  18 +------
 3 files changed, 81 insertions(+), 73 deletions(-)

diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index e27c5e666de8..4b8f7e45e03b 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -1612,9 +1612,9 @@ struct pf_pdesc {
 	u_int16_t	 flags;		/* Let SCRUB trigger behavior in
 					 * state code. Easier than tags */
 #define PFDESC_TCP_NORM	0x0001		/* TCP shall be statefully scrubbed */
-#define PFDESC_IP_REAS	0x0002		/* IP frags would've been reassembled */
 	u_int16_t	 virtual_proto;
 #define PF_VPROTO_FRAGMENT	256
+	int		 extoff;
 	sa_family_t	 af;
 	u_int8_t	 proto;
 	u_int8_t	 tos;
@@ -2362,8 +2362,8 @@ int	pf_normalize_ip(struct mbuf **, struct pfi_kkif *, u_short *,
 #endif /* INET */
 
 #ifdef INET6
-int	pf_walk_header6(struct mbuf *, uint8_t *, int *, int *, uint32_t *,
-	    u_short *);
+int	pf_walk_header6(struct mbuf *, struct ip6_hdr *, int *, int *, int *,
+	    uint8_t *, uint32_t *, u_short *);
 int	pf_normalize_ip6(struct mbuf **, struct pfi_kkif *, int,
 	    u_short *, struct pf_pdesc *);
 void	pf_poolmask(struct pf_addr *, struct pf_addr*,
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 891c490a0b1e..cd90cc1c85c4 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -7026,7 +7026,8 @@ pf_test_state_icmp(struct pf_kstate **state, struct pfi_kkif *kif,
 #endif /* INET */
 #ifdef INET6
 		struct ip6_hdr	h2_6;
-		int		terminal = 0;
+		int		fragoff2, extoff2;
+		u_int32_t	jumbolen;
 #endif /* INET6 */
 		int		ipoff2 = 0;
 		int		off2 = 0;
@@ -7078,47 +7079,16 @@ pf_test_state_icmp(struct pf_kstate **state, struct pfi_kkif *kif,
 				    "(ip6)\n"));
 				return (PF_DROP);
 			}
-			pd2.proto = h2_6.ip6_nxt;
+			off2 = ipoff2;
+			if (pf_walk_header6(m, &h2_6, &off2, &extoff2,
+				&fragoff2, &pd2.proto, &jumbolen,
+				reason) != PF_PASS)
+				return (PF_DROP);
+
 			pd2.src = (struct pf_addr *)&h2_6.ip6_src;
 			pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
 			pd2.ip_sum = NULL;
 			off2 = ipoff2 + sizeof(h2_6);
-			do {
-				switch (pd2.proto) {
-				case IPPROTO_FRAGMENT:
-					/*
-					 * ICMPv6 error messages for
-					 * non-first fragments
-					 */
-					REASON_SET(reason, PFRES_FRAG);
-					return (PF_DROP);
-				case IPPROTO_AH:
-				case IPPROTO_HOPOPTS:
-				case IPPROTO_ROUTING:
-				case IPPROTO_DSTOPTS: {
-					/* get next header and header length */
-					struct ip6_ext opt6;
-
-					if (!pf_pull_hdr(m, off2, &opt6,
-					    sizeof(opt6), NULL, reason,
-					    pd2.af)) {
-						DPFPRINTF(PF_DEBUG_MISC,
-						    ("pf: ICMPv6 short opt\n"));
-						return (PF_DROP);
-					}
-					if (pd2.proto == IPPROTO_AH)
-						off2 += (opt6.ip6e_len + 2) * 4;
-					else
-						off2 += (opt6.ip6e_len + 1) * 8;
-					pd2.proto = opt6.ip6e_nxt;
-					/* goto the next header */
-					break;
-				}
-				default:
-					terminal++;
-					break;
-				}
-			} while (!terminal);
 			break;
 #endif /* INET6 */
 		}
@@ -8537,28 +8507,44 @@ pf_walk_option6(struct mbuf *m, int off, int end, uint32_t *jumbolen,
 }
 
 int
-pf_walk_header6(struct mbuf *m, uint8_t *nxt, int *off, int *extoff,
-    uint32_t *jumbolen, u_short *reason)
+pf_walk_header6(struct mbuf *m, struct ip6_hdr *h, int *off, int *extoff,
+    int *fragoff, uint8_t *nxt, uint32_t *jumbolen, u_short *reason)
 {
+	struct ip6_frag		 frag;
 	struct ip6_ext		 ext;
 	struct ip6_rthdr	 rthdr;
-	struct ip6_hdr		*h = mtod(m, struct ip6_hdr *);
 	int			 rthdr_cnt = 0;
 
+	*off += sizeof(struct ip6_hdr);
+	*extoff = *fragoff = 0;
 	*nxt = h->ip6_nxt;
-	*off = sizeof(struct ip6_hdr);
-	*extoff = 0;
 	*jumbolen = 0;
 	for (;;) {
 		switch (*nxt) {
 		case IPPROTO_FRAGMENT:
+			if (*fragoff != 0) {
+				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple fragment"));
+				REASON_SET(reason, PFRES_FRAG);
+				return (PF_DROP);
+			}
 			/* jumbo payload packets cannot be fragmented */
 			if (*jumbolen != 0) {
 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 fragmented jumbo"));
 				REASON_SET(reason, PFRES_FRAG);
 				return (PF_DROP);
 			}
-			return (PF_PASS);
+			if (!pf_pull_hdr(m, *off, &frag, sizeof(frag), NULL,
+				reason, AF_INET6)) {
+				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short fragment"));
+				return (PF_DROP);
+			}
+			*fragoff = *off;
+			/* stop walking over non initial fragments */
+			if ((frag.ip6f_offlg & IP6F_OFF_MASK) != 0)
+				return (PF_PASS);
+			*off += sizeof(frag);
+			*nxt = frag.ip6f_nxt;
+			break;
 		case IPPROTO_ROUTING:
 			if (rthdr_cnt++) {
 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 multiple rthdr"));
@@ -8567,6 +8553,12 @@ pf_walk_header6(struct mbuf *m, uint8_t *nxt, int *off, int *extoff,
 			}
 			if (!pf_pull_hdr(m, *off, &rthdr, sizeof(rthdr), NULL,
 				reason, AF_INET6)) {
+				/* fragments may be short */
+				if (*fragoff != 0) {
+					*off = *fragoff;
+					*nxt = IPPROTO_FRAGMENT;
+					return (PF_PASS);
+				}
 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short rthdr"));
 				return (PF_DROP);
 			}
@@ -8581,11 +8573,19 @@ pf_walk_header6(struct mbuf *m, uint8_t *nxt, int *off, int *extoff,
 		case IPPROTO_DSTOPTS:
 			if (!pf_pull_hdr(m, *off, &ext, sizeof(ext), NULL,
 				reason, AF_INET6)) {
+				/* fragments may be short */
+				if (*fragoff != 0) {
+					*off = *fragoff;
+					*nxt = IPPROTO_FRAGMENT;
+					return (PF_PASS);
+				}
 				DPFPRINTF(PF_DEBUG_MISC, ("IPv6 short exthdr"));
 				return (PF_DROP);
 			}
-			*extoff = *off;
-			if (*nxt == IPPROTO_HOPOPTS) {
+			/* reassembly needs the ext header before the frag */
+			if (*fragoff == 0)
+				*extoff = *off;
+			if (*nxt == IPPROTO_HOPOPTS && *fragoff == 0) {
 				if (pf_walk_option6(m, *off + sizeof(ext),
 					*off + (ext.ip6e_len + 1) * 8, jumbolen,
 					reason) != PF_PASS)
@@ -8603,6 +8603,20 @@ pf_walk_header6(struct mbuf *m, uint8_t *nxt, int *off, int *extoff,
 				*off += (ext.ip6e_len + 1) * 8;
 			*nxt = ext.ip6e_nxt;
 			break;
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+		case IPPROTO_SCTP:
+		case IPPROTO_ICMPV6:
+			/* fragments may be short, ignore inner header then */
+			if (*fragoff != 0 && ntohs(h->ip6_plen) < *off +
+			    (*nxt == IPPROTO_TCP ? sizeof(struct tcphdr) :
+			    *nxt == IPPROTO_UDP ? sizeof(struct udphdr) :
+			    *nxt == IPPROTO_SCTP ? sizeof(struct sctphdr) :
+			    sizeof(struct icmp6_hdr))) {
+				*off = *fragoff;
+				*nxt = IPPROTO_FRAGMENT;
+			}
+			/* FALLTHROUGH */
 		default:
 			return (PF_PASS);
 		}
@@ -8705,7 +8719,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 #ifdef INET6
 	case AF_INET6: {
 		struct ip6_hdr *h;
-		int extoff;
+		int fragoff;
 		uint32_t jumbolen;
 		uint8_t nxt;
 
@@ -8719,8 +8733,10 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 			return (-1);
 		}
 
-		if (pf_walk_header6(m, &nxt, off, &extoff, &jumbolen, reason)
-		    != PF_PASS) {
+		h = mtod(m, struct ip6_hdr *);
+		*off = 0;
+		if (pf_walk_header6(m, h, off, &pd->extoff, &fragoff, &nxt,
+		    &jumbolen, reason) != PF_PASS) {
 			*action = PF_DROP;
 			return (-1);
 		}
@@ -8740,6 +8756,9 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 		pd->virtual_proto = pd->proto = h->ip6_nxt;
 		pd->act.rtableid = -1;
 
+		if (fragoff != 0)
+			pd->virtual_proto = PF_VPROTO_FRAGMENT;
+
 		/*
 		 * we do not support jumbogram.  if we keep going, zero ip6_plen
 		 * will do something bad, so drop the packet for now.
@@ -8750,7 +8769,7 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 		}
 
 		/* We do IP header normalization and packet reassembly here */
-		if (pf_normalize_ip6(m0, kif, *off, reason, pd) !=
+		if (pf_normalize_ip6(m0, kif, fragoff, reason, pd) !=
 		    PF_PASS) {
 			*action = PF_DROP;
 			return (-1);
@@ -8769,14 +8788,17 @@ pf_setup_pdesc(sa_family_t af, int dir, struct pf_pdesc *pd, struct mbuf **m0,
 		h = mtod(m, struct ip6_hdr *);
 		pd->virtual_proto = pd->proto = h->ip6_nxt;
 
-		/* recalc offset, refetch header, then update pd */
-		if (pf_walk_header6(m, &nxt, off, &extoff, &jumbolen, reason) !=
-		    PF_PASS) {
+		/* refetch header, recalc offset, then update pd */
+		h = mtod(m, struct ip6_hdr *);
+		*off = 0;
+
+		if (pf_walk_header6(m, h, off, &pd->extoff, &fragoff, &nxt,
+			&jumbolen, reason) != PF_PASS) {
 			*action = PF_DROP;
 			return (-1);
 		}
 
-		if (pd->proto == IPPROTO_FRAGMENT) {
+		if (fragoff != 0) {
 			/*
 			 * handle fragments that aren't reassembled by
 			 * normalization
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index a159528c1756..1bdcdc5573aa 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -1215,20 +1215,12 @@ pf_normalize_ip6(struct mbuf **m0, struct pfi_kkif *kif,
 	struct mbuf		*m;
 	struct pf_krule		*r;
 	struct ip6_frag		 frag;
-	int			 extoff;
-	uint32_t		 jumbolen;
-	uint8_t			 nxt;
 	bool			 scrub_compat;
 
 	PF_RULES_RASSERT();
 
-again:
 	m = *m0;
 
-	if (pf_walk_header6(m, &nxt, &off, &extoff, &jumbolen, reason)
-	    != PF_PASS)
-		return (PF_DROP);
-
 	r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
 	/*
 	 * Check if there are any scrub rules, matching or not.
@@ -1280,20 +1272,14 @@ again:
 	/* Offset now points to data portion. */
 	off += sizeof(frag);
 
-	if (nxt == IPPROTO_FRAGMENT) {
-		if (pd->flags & PFDESC_IP_REAS)
-			return (PF_DROP);
-
+	if (pd->virtual_proto == PF_VPROTO_FRAGMENT) {
 		/* Returns PF_DROP or *m0 is NULL or completely reassembled
 		 * mbuf. */
-		if (pf_reassemble6(m0, &frag, off, extoff, reason) != PF_PASS)
+		if (pf_reassemble6(m0, &frag, off, pd->extoff, reason) != PF_PASS)
 			return (PF_DROP);
-
-		pd->flags |= PFDESC_IP_REAS;
 		m = *m0;
 		if (m == NULL)
 			return (PF_DROP);
-		goto again;
 	}
 
 	return (PF_PASS);