git: f7bc2901f97f - stable/13 - pf: fix missing SCTP multihomed states

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Tue, 07 Nov 2023 15:47:23 UTC
The branch stable/13 has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=f7bc2901f97f480950d3763962ad9ed743a838fc

commit f7bc2901f97f480950d3763962ad9ed743a838fc
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2023-10-17 16:10:39 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2023-11-07 15:25:02 +0000

    pf: fix missing SCTP multihomed states
    
    The existing code to create extra states when SCTP endpoints supplied
    extra addresses missed a case. As a result we failed to generate all of
    the required states.
    
    Briefly, if host A has address 1 and 2 and host B has addres 3 and 4 we
    generated 1 - 3 and 2 - 3, as well as 1 - 4, but not 2 - 4.
    
    Store the list of endpoints supplied by each host and use those to
    generate all of the connection permutations.
    
    MFC after:      1 week
    Sponsored by:   Orange Business Services
    Differential Revision:  https://reviews.freebsd.org/D42361
    
    (cherry picked from commit d6d38b02ee57920cc02a306a6d8d2aec9c4b759c)
---
 sys/netpfil/pf/pf.c      | 253 ++++++++++++++++++++++++++++++++++++++++++++---
 sys/netpfil/pf/pf_norm.c |   2 +
 2 files changed, 240 insertions(+), 15 deletions(-)

diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 869c68ad308f..278aa07fab75 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -162,6 +162,34 @@ VNET_DEFINE(size_t, pf_allrulecount);
 VNET_DEFINE(struct pf_krule *, pf_rulemarker);
 #endif
 
+struct pf_sctp_endpoint;
+RB_HEAD(pf_sctp_endpoints, pf_sctp_endpoint);
+struct pf_sctp_source {
+	sa_family_t			af;
+	struct pf_addr			addr;
+	TAILQ_ENTRY(pf_sctp_source)	entry;
+};
+TAILQ_HEAD(pf_sctp_sources, pf_sctp_source);
+struct pf_sctp_endpoint
+{
+	uint32_t		 v_tag;
+	struct pf_sctp_sources	 sources;
+	RB_ENTRY(pf_sctp_endpoint)	entry;
+};
+static int
+pf_sctp_endpoint_compare(struct pf_sctp_endpoint *a, struct pf_sctp_endpoint *b)
+{
+	return (a->v_tag - b->v_tag);
+}
+RB_PROTOTYPE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare);
+RB_GENERATE(pf_sctp_endpoints, pf_sctp_endpoint, entry, pf_sctp_endpoint_compare);
+VNET_DEFINE_STATIC(struct pf_sctp_endpoints, pf_sctp_endpoints);
+#define V_pf_sctp_endpoints	VNET(pf_sctp_endpoints)
+static struct mtx_padalign pf_sctp_endpoints_mtx;
+MTX_SYSINIT(pf_sctp_endpoints_mtx, &pf_sctp_endpoints_mtx, "SCTP endpoints", MTX_DEF);
+#define	PF_SCTP_ENDPOINTS_LOCK()	mtx_lock(&pf_sctp_endpoints_mtx)
+#define	PF_SCTP_ENDPOINTS_UNLOCK()	mtx_unlock(&pf_sctp_endpoints_mtx)
+
 /*
  * Queue for pf_intr() sends.
  */
@@ -290,6 +318,7 @@ static int		 pf_test_state_udp(struct pf_kstate **, int,
 static int		 pf_test_state_icmp(struct pf_kstate **, int,
 			    struct pfi_kkif *, struct mbuf *, int,
 			    void *, struct pf_pdesc *, u_short *);
+static void		 pf_sctp_multihome_detach_addr(const struct pf_kstate *);
 static void		 pf_sctp_multihome_delayed(struct pf_pdesc *, int,
 			    struct pfi_kkif *, struct pf_kstate *, int);
 static int		 pf_test_state_sctp(struct pf_kstate **,
@@ -1100,6 +1129,7 @@ pf_cleanup(void)
 		m_freem(pfse->pfse_m);
 		free(pfse, M_PFTEMP);
 	}
+	MPASS(RB_EMPTY(&V_pf_sctp_endpoints));
 
 	uma_zdestroy(V_pf_sources_z);
 	uma_zdestroy(V_pf_state_z);
@@ -1319,6 +1349,8 @@ pf_detach_state(struct pf_kstate *s)
 	struct pf_state_key *sks = s->key[PF_SK_STACK];
 	struct pf_keyhash *kh;
 
+	pf_sctp_multihome_detach_addr(s);
+
 	if (sks != NULL) {
 		kh = &V_pf_keyhash[pf_hashkey(sks)];
 		PF_HASHROW_LOCK(kh);
@@ -5259,7 +5291,7 @@ pf_test_state_sctp(struct pf_kstate **state, struct pfi_kkif *kif,
     struct mbuf *m, int off, void *h, struct pf_pdesc *pd, u_short *reason)
 {
 	struct pf_state_key_cmp	 key;
-	struct pf_state_peer	*src; //, *dst;
+	struct pf_state_peer	*src, *dst;
 	struct sctphdr		*sh = &pd->hdr.sctp;
 	u_int8_t		 psrc; //, pdst;
 
@@ -5282,9 +5314,11 @@ pf_test_state_sctp(struct pf_kstate **state, struct pfi_kkif *kif,
 
 	if (pd->dir == (*state)->direction) {
 		src = &(*state)->src;
+		dst = &(*state)->dst;
 		psrc = PF_PEER_SRC;
 	} else {
 		src = &(*state)->dst;
+		dst = &(*state)->src;
 		psrc = PF_PEER_DST;
 	}
 
@@ -5295,6 +5329,12 @@ pf_test_state_sctp(struct pf_kstate **state, struct pfi_kkif *kif,
 			(*state)->timeout = PFTM_TCP_OPENING;
 		}
 	}
+	if (pd->sctp_flags & PFDESC_SCTP_INIT_ACK) {
+		MPASS(dst->scrub != NULL);
+		if (dst->scrub->pfss_v_tag == 0)
+			dst->scrub->pfss_v_tag = pd->sctp_initiate_tag;
+	}
+
 	if (pd->sctp_flags & PFDESC_SCTP_COOKIE) {
 		if (src->state < SCTP_ESTABLISHED) {
 			pf_set_protostate(*state, psrc, SCTP_ESTABLISHED);
@@ -5341,37 +5381,222 @@ pf_test_state_sctp(struct pf_kstate **state, struct pfi_kkif *kif,
 	return (PF_PASS);
 }
 
+static void
+pf_sctp_multihome_detach_addr(const struct pf_kstate *s)
+{
+	struct pf_sctp_endpoint key;
+	struct pf_sctp_endpoint *ep;
+	struct pf_state_key *sks = s->key[PF_SK_STACK];
+	struct pf_sctp_source *i, *tmp;
+
+	if (sks == NULL || sks->proto != IPPROTO_SCTP || s->dst.scrub == NULL)
+		return;
+
+	PF_SCTP_ENDPOINTS_LOCK();
+
+	key.v_tag = s->dst.scrub->pfss_v_tag;
+	ep  = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
+	if (ep != NULL) {
+		/* XXX Actually remove! */
+		TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) {
+			if (pf_addr_cmp(&i->addr,
+			    &s->key[PF_SK_WIRE]->addr[s->direction == PF_OUT],
+			    s->key[PF_SK_WIRE]->af) == 0) {
+				TAILQ_REMOVE(&ep->sources, i, entry);
+				free(i, M_PFTEMP);
+				break;
+			}
+		}
+
+		if (TAILQ_EMPTY(&ep->sources)) {
+			RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
+			free(ep, M_PFTEMP);
+		}
+	}
+
+	/* Other direction. */
+	key.v_tag = s->src.scrub->pfss_v_tag;
+	ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
+	if (ep != NULL) {
+		TAILQ_FOREACH_SAFE(i, &ep->sources, entry, tmp) {
+			if (pf_addr_cmp(&i->addr,
+			    &s->key[PF_SK_WIRE]->addr[s->direction == PF_IN],
+			    s->key[PF_SK_WIRE]->af) == 0) {
+				TAILQ_REMOVE(&ep->sources, i, entry);
+				free(i, M_PFTEMP);
+				break;
+			}
+		}
+
+		if (TAILQ_EMPTY(&ep->sources)) {
+			RB_REMOVE(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
+			free(ep, M_PFTEMP);
+		}
+	}
+
+	PF_SCTP_ENDPOINTS_UNLOCK();
+}
+
+static void
+pf_sctp_multihome_add_addr(struct pf_pdesc *pd, struct pf_addr *a, uint32_t v_tag)
+{
+	struct pf_sctp_endpoint key = {
+		.v_tag = v_tag,
+	};
+	struct pf_sctp_source *i;
+	struct pf_sctp_endpoint *ep;
+
+	PF_SCTP_ENDPOINTS_LOCK();
+
+	ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
+	if (ep == NULL) {
+		ep = malloc(sizeof(struct pf_sctp_endpoint),
+		    M_PFTEMP, M_NOWAIT);
+		if (ep == NULL) {
+			PF_SCTP_ENDPOINTS_UNLOCK();
+			return;
+		}
+
+		ep->v_tag = v_tag;
+		TAILQ_INIT(&ep->sources);
+		RB_INSERT(pf_sctp_endpoints, &V_pf_sctp_endpoints, ep);
+	}
+
+	/* Avoid inserting duplicates. */
+	TAILQ_FOREACH(i, &ep->sources, entry) {
+		if (pf_addr_cmp(&i->addr, a, pd->af) == 0) {
+			PF_SCTP_ENDPOINTS_UNLOCK();
+			return;
+		}
+	}
+
+	i = malloc(sizeof(*i), M_PFTEMP, M_NOWAIT);
+	if (i == NULL) {
+		PF_SCTP_ENDPOINTS_UNLOCK();
+		return;
+	}
+
+	i->af = pd->af;
+	memcpy(&i->addr, a, sizeof(*a));
+	TAILQ_INSERT_TAIL(&ep->sources, i, entry);
+
+	PF_SCTP_ENDPOINTS_UNLOCK();
+}
+
 static void
 pf_sctp_multihome_delayed(struct pf_pdesc *pd, int off, struct pfi_kkif *kif,
     struct pf_kstate *s, int action)
 {
 	struct pf_sctp_multihome_job	*j, *tmp;
+	struct pf_sctp_source		*i;
 	int			 ret __unused;;
 	struct pf_kstate	*sm = NULL;
 	struct pf_krule		*ra = NULL;
 	struct pf_krule		*r = &V_pf_default_rule;
 	struct pf_kruleset	*rs = NULL;
+	bool do_extra = true;
 
 	PF_RULES_RLOCK_TRACKER;
 
+again:
 	TAILQ_FOREACH_SAFE(j, &pd->sctp_multihome_jobs, next, tmp) {
 		if (s == NULL || action != PF_PASS)
 			goto free;
 
+		/* Confirm we don't recurse here. */
+		MPASS(! (pd->sctp_flags & PFDESC_SCTP_ADD_IP));
+
 		switch (j->op) {
 		case  SCTP_ADD_IP_ADDRESS: {
+			uint32_t v_tag = pd->sctp_initiate_tag;
+
+			if (v_tag == 0) {
+				if (s->direction == pd->dir)
+					v_tag = s->src.scrub->pfss_v_tag;
+				else
+					v_tag = s->dst.scrub->pfss_v_tag;
+			}
+
+			/*
+			 * Avoid duplicating states. We'll already have
+			 * created a state based on the source address of
+			 * the packet, but SCTP endpoints may also list this
+			 * address again in the INIT(_ACK) parameters.
+			 */
+			if (pf_addr_cmp(&j->src, pd->src, pd->af) == 0) {
+				break;
+			}
+
 			j->pd.sctp_flags |= PFDESC_SCTP_ADD_IP;
 			PF_RULES_RLOCK();
+			sm = NULL;
+			/* XXX: May generated unwanted abort if we try to insert a duplicate state. */
 			ret = pf_test_rule(&r, &sm, pd->dir, kif,
 			    j->m, off, &j->pd, &ra, &rs, NULL);
 			PF_RULES_RUNLOCK();
 			SDT_PROBE4(pf, sctp, multihome, test, kif, r, j->m, ret);
-			if (sm) {
+			if (ret != PF_DROP && sm != NULL) {
 				/* Inherit v_tag values. */
-				sm->src.scrub->pfss_v_tag = s->src.scrub->pfss_flags;
-				sm->dst.scrub->pfss_v_tag = s->dst.scrub->pfss_flags;
+				if (sm->direction == s->direction) {
+					sm->src.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag;
+					sm->dst.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag;
+				} else {
+					sm->src.scrub->pfss_v_tag = s->dst.scrub->pfss_v_tag;
+					sm->dst.scrub->pfss_v_tag = s->src.scrub->pfss_v_tag;
+				}
 				PF_STATE_UNLOCK(sm);
+			} else {
+				/* If we try duplicate inserts? */
+				break;
+			}
+
+			/* Only add the addres if we've actually allowed the state. */
+			pf_sctp_multihome_add_addr(pd, &j->src, v_tag);
+
+			if (! do_extra) {
+				break;
 			}
+			/*
+			 * We need to do this for each of our source addresses.
+			 * Find those based on the verification tag.
+			 */
+			struct pf_sctp_endpoint key = {
+				.v_tag = pd->hdr.sctp.v_tag,
+			};
+			struct pf_sctp_endpoint *ep;
+
+			PF_SCTP_ENDPOINTS_LOCK();
+			ep = RB_FIND(pf_sctp_endpoints, &V_pf_sctp_endpoints, &key);
+			if (ep == NULL) {
+				PF_SCTP_ENDPOINTS_UNLOCK();
+				break;
+			}
+			MPASS(ep != NULL);
+
+			TAILQ_FOREACH(i, &ep->sources, entry) {
+				struct pf_sctp_multihome_job *nj;
+
+				/* SCTP can intermingle IPv4 and IPv6. */
+				if (i->af != pd->af)
+					continue;
+
+				nj = malloc(sizeof(*nj), M_PFTEMP, M_NOWAIT | M_ZERO);
+				if (! nj) {
+					continue;
+				}
+				memcpy(&nj->pd, &j->pd, sizeof(j->pd));
+				memcpy(&nj->src, &j->src, sizeof(nj->src));
+				nj->pd.src = &nj->src;
+				// New destination address!
+				memcpy(&nj->dst, &i->addr, sizeof(nj->dst));
+				nj->pd.dst = &nj->dst;
+				nj->m = j->m;
+				nj->op = j->op;
+
+				TAILQ_INSERT_TAIL(&pd->sctp_multihome_jobs, nj, next);
+			}
+			PF_SCTP_ENDPOINTS_UNLOCK();
+
 			break;
 		}
 		case SCTP_DEL_IP_ADDRESS: {
@@ -5409,11 +5634,18 @@ pf_sctp_multihome_delayed(struct pf_pdesc *pd, int off, struct pfi_kkif *kif,
 		default:
 			panic("Unknown op %#x", j->op);
 		}
-		}
+	}
 
-free:
+	free:
+		TAILQ_REMOVE(&pd->sctp_multihome_jobs, j, next);
 		free(j, M_PFTEMP);
 	}
+
+	/* We may have inserted extra work while processing the list. */
+	if (! TAILQ_EMPTY(&pd->sctp_multihome_jobs)) {
+		do_extra = false;
+		goto again;
+	}
 }
 
 static int
@@ -5446,15 +5678,6 @@ pf_multihome_scan(struct mbuf *m, int start, int len, struct pf_pdesc *pd,
 			    NULL, NULL, pd->af))
 				return (PF_DROP);
 
-			/*
-			 * Avoid duplicating states. We'll already have
-			 * created a state based on the source address of
-			 * the packet, but SCTP endpoints may also list this
-			 * address again in the INIT(_ACK) parameters.
-			 */
-			if (t.s_addr == pd->src->v4.s_addr)
-				break;
-
 			if (in_nullhost(t))
 				t.s_addr = pd->src->v4.s_addr;
 
diff --git a/sys/netpfil/pf/pf_norm.c b/sys/netpfil/pf/pf_norm.c
index a8ef4dc346b0..071387c12983 100644
--- a/sys/netpfil/pf/pf_norm.c
+++ b/sys/netpfil/pf/pf_norm.c
@@ -1552,6 +1552,8 @@ pf_normalize_sctp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
 		return (1);
 	}
 
+	dst->scrub->pfss_v_tag = pd->sctp_initiate_tag;
+
 	return (0);
 }