git: fcdb520c1b4e - main - pf: nat64

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Tue, 17 Dec 2024 10:07:43 UTC
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=fcdb520c1b4e1a5d5a2e54cb916dccbc848d32ba

commit fcdb520c1b4e1a5d5a2e54cb916dccbc848d32ba
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2024-10-07 08:52:01 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2024-12-17 10:07:12 +0000

    pf: nat64
    
    Since the IPv6 madness is not enough introduce NAT64 -- which is actually
    "af-to" a generic IP version translator for pf(4).
    Not everything perfect yet but lets fix these things in the tree.
    Insane amount of work done by sperreault@, mikeb@ and reyk@.
    Looked over by mcbride@ henning@ and myself at eurobsdcon.
    OK mcbride@ and general put it in from deraadt@
    
    Obtained from:  OpenBSD, claudio <claudio@openbsd.org>, 97326e01c9
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    Differential Revision:  https://reviews.freebsd.org/D47786
---
 sys/conf/files              |    1 +
 sys/modules/pf/Makefile     |    2 +-
 sys/net/if_pflog.h          |    4 +-
 sys/net/pfvar.h             |   33 +-
 sys/netpfil/pf/inet_nat64.c |  204 ++++++++
 sys/netpfil/pf/pf.c         | 1211 ++++++++++++++++++++++++++++++++++++++-----
 sys/netpfil/pf/pf.h         |    5 +-
 sys/netpfil/pf/pf_ioctl.c   |    1 +
 sys/netpfil/pf/pf_lb.c      |  240 +++++++--
 sys/netpfil/pf/pf_nl.c      |    2 +
 sys/netpfil/pf/pf_nl.h      |    1 +
 11 files changed, 1523 insertions(+), 181 deletions(-)

diff --git a/sys/conf/files b/sys/conf/files
index c1b7aac99c4c..428a2805768c 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -4559,6 +4559,7 @@ netpfil/pf/pf_table.c		optional pf inet
 netpfil/pf/pflow.c		optional pflow pf inet
 netpfil/pf/pfsync_nv.c		optional pfsync pf inet
 netpfil/pf/in4_cksum.c		optional pf inet
+netpfil/pf/inet_nat64.c		optional pf inet
 netsmb/smb_conn.c		optional netsmb
 netsmb/smb_crypt.c		optional netsmb
 netsmb/smb_dev.c		optional netsmb
diff --git a/sys/modules/pf/Makefile b/sys/modules/pf/Makefile
index 4a12730f3610..ad9790704cf1 100644
--- a/sys/modules/pf/Makefile
+++ b/sys/modules/pf/Makefile
@@ -2,7 +2,7 @@
 
 KMOD=	pf
 SRCS=	pf.c pf_if.c pf_lb.c pf_osfp.c pf_ioctl.c pf_norm.c pf_table.c \
-	pf_ruleset.c pf_nl.c pf_nv.c pf_syncookies.c in4_cksum.c \
+	pf_ruleset.c pf_nl.c pf_nv.c pf_syncookies.c in4_cksum.c inet_nat64.c \
 	bus_if.h device_if.h \
 	opt_pf.h opt_inet.h opt_inet6.h opt_bpf.h opt_sctp.h opt_global.h \
 	opt_kern_tls.h
diff --git a/sys/net/if_pflog.h b/sys/net/if_pflog.h
index b2052d5bd5f3..9734ca245eda 100644
--- a/sys/net/if_pflog.h
+++ b/sys/net/if_pflog.h
@@ -51,7 +51,9 @@ struct pfloghdr {
 	uid_t		rule_uid;
 	pid_t		rule_pid;
 	u_int8_t	dir;
-	u_int8_t	pad[3];
+	u_int8_t	pad1;	/* rewritten, on OpenBSD */
+	sa_family_t	naf;
+	u_int8_t	pad[1];
 	u_int32_t	ridentifier;
 	u_int8_t	reserve;	/* Appease broken software like Wireshark. */
 	u_int8_t	pad2[3];
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 232fa404e1d9..094bc38c4a1b 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -864,6 +864,7 @@ struct pf_krule {
 	u_int8_t		 flush;
 	u_int8_t		 prio;
 	u_int8_t		 set_prio[2];
+	sa_family_t		 naf;
 
 	struct {
 		struct pf_addr		addr;
@@ -986,6 +987,10 @@ struct pf_state_key {
 	TAILQ_HEAD(, pf_kstate)	 states[2];
 };
 
+#define PF_REVERSED_KEY(key, family)				\
+	((key[PF_SK_WIRE]->af != key[PF_SK_STACK]->af) &&	\
+	    (key[PF_SK_WIRE]->af != (family)))
+
 /* Keep synced with struct pf_kstate. */
 struct pf_state_cmp {
 	u_int64_t		 id;
@@ -1630,6 +1635,7 @@ struct pf_pdesc {
 #define PF_VPROTO_FRAGMENT	256
 	int		 extoff;
 	sa_family_t	 af;
+	sa_family_t	 naf;
 	u_int8_t	 proto;
 	u_int8_t	 tos;
 	u_int8_t	 ttl;
@@ -2429,6 +2435,9 @@ int	pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kkif *,
 	    int);
 int	pf_socket_lookup(struct pf_pdesc *);
 struct pf_state_key *pf_alloc_state_key(int);
+int	pf_translate(struct pf_pdesc *, struct pf_addr *, u_int16_t,
+	    struct pf_addr *, u_int16_t, u_int16_t, int);
+int	pf_translate_af(struct pf_pdesc *);
 void	pfr_initialize(void);
 void	pfr_cleanup(void);
 int	pfr_match_addr(struct pfr_ktable *, struct pf_addr *, sa_family_t);
@@ -2642,18 +2651,23 @@ int			 pf_step_out_of_keth_anchor(struct pf_keth_anchor_stackframe *,
 
 u_short			 pf_map_addr(u_int8_t, struct pf_krule *,
 			    struct pf_addr *, struct pf_addr *,
-			    struct pfi_kkif **nkif, struct pf_addr *);
+			    struct pfi_kkif **nkif, struct pf_addr *,
+			    struct pf_kpool *);
 u_short			 pf_map_addr_sn(u_int8_t, struct pf_krule *,
 			    struct pf_addr *, struct pf_addr *,
 			    struct pfi_kkif **nkif, struct pf_addr *,
-			    struct pf_ksrc_node **, struct pf_srchash **);
+			    struct pf_ksrc_node **, struct pf_srchash **,
+			    struct pf_kpool *);
+int			 pf_get_transaddr_af(struct pf_krule *,
+			    struct pf_pdesc *);
 u_short			 pf_get_translation(struct pf_pdesc *,
 			    int, struct pf_state_key **, struct pf_state_key **,
 			    struct pf_kanchor_stackframe *, struct pf_krule **,
 			    struct pf_udp_mapping **udp_mapping);
 
-struct pf_state_key	*pf_state_key_setup(struct pf_pdesc *,
-			    u_int16_t, u_int16_t);
+int			 pf_state_key_setup(struct pf_pdesc *,
+			    u_int16_t, u_int16_t,
+			    struct pf_state_key **sk, struct pf_state_key **nk);
 struct pf_state_key	*pf_state_key_clone(const struct pf_state_key *);
 void			 pf_rule_to_actions(struct pf_krule *,
 			    struct pf_rule_actions *);
@@ -2665,6 +2679,17 @@ void	pf_scrub(struct pf_pdesc *);
 struct pfi_kkif		*pf_kkif_create(int);
 void			 pf_kkif_free(struct pfi_kkif *);
 void			 pf_kkif_zero(struct pfi_kkif *);
+
+
+/* NAT64 functions. */
+int	  inet_nat64(int, const void *, void *, const void *, u_int8_t);
+int	  inet_nat64_inet(const void *, void *, const void *, u_int8_t);
+int	  inet_nat64_inet6(const void *, void *, const void *, u_int8_t);
+
+int	  inet_nat46(int, const void *, void *, const void *, u_int8_t);
+int	  inet_nat46_inet(const void *, void *, const void *, u_int8_t);
+int	  inet_nat46_inet6(const void *, void *, const void *, u_int8_t);
+
 #endif /* _KERNEL */
 
 #endif /* _NET_PFVAR_H_ */
diff --git a/sys/netpfil/pf/inet_nat64.c b/sys/netpfil/pf/inet_nat64.c
new file mode 100644
index 000000000000..7f62814c2383
--- /dev/null
+++ b/sys/netpfil/pf/inet_nat64.c
@@ -0,0 +1,204 @@
+/*	$OpenBSD: inet_nat64.c,v 1.1 2011/10/13 18:23:40 claudio Exp $	*/
+/*	$vantronix: inet_nat64.c,v 1.2 2011/02/28 14:57:58 mike Exp $	*/
+
+/*
+ * Copyright (c) 2011 Reyk Floeter <reyk@vantronix.net>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/mbuf.h>
+#include <netinet/in.h>
+#include <net/pfvar.h>
+
+union inet_nat64_addr {
+	u_int32_t	 u32[4];
+	u_int8_t	 u8[16];
+};
+
+static u_int32_t
+inet_nat64_mask(u_int32_t src, u_int32_t pfx, u_int8_t pfxlen)
+{
+	u_int32_t	u32;
+	if (pfxlen == 0)
+		return (src);
+	else if (pfxlen > 32)
+		pfxlen = 32;
+	u32 =
+	    (src & ~htonl(0xffffffff << (32 - pfxlen))) |
+	    (pfx & htonl(0xffffffff << (32 - pfxlen)));
+	return (u32);
+
+}
+
+int
+inet_nat64(int af, const void *src, void *dst,
+    const void *pfx, u_int8_t pfxlen)
+{
+	switch (af) {
+	case AF_INET:
+		return (inet_nat64_inet(src, dst, pfx, pfxlen));
+	case AF_INET6:
+		return (inet_nat64_inet6(src, dst, pfx, pfxlen));
+	default:
+#ifndef _KERNEL
+		errno = EAFNOSUPPORT;
+#endif
+		return (-1);
+	}
+	/* NOTREACHED */
+}
+
+int
+inet_nat64_inet(const void *src, void *dst, const void *pfx, u_int8_t pfxlen)
+{
+	const union inet_nat64_addr	*s = src;
+	const union inet_nat64_addr	*p = pfx;
+	union inet_nat64_addr		*d = dst;
+	int				 i, j;
+
+	switch (pfxlen) {
+	case 32:
+	case 40:
+	case 48:
+	case 56:
+	case 64:
+	case 96:
+		i = pfxlen / 8;
+		break;
+	default:
+		if (pfxlen < 96 || pfxlen > 128) {
+#ifndef _KERNEL
+			errno = EINVAL;
+#endif
+			return (-1);
+		}
+
+		/* as an extension, mask out any other bits */
+		d->u32[0] = inet_nat64_mask(s->u32[3], p->u32[3],
+		    (u_int8_t)(32 - (128 - pfxlen)));
+		return (0);
+	}
+
+	/* fill the octets with the source and skip reserved octet 8 */
+	for (j = 0; j < 4; j++) {
+		if (i == 8)
+			i++;
+		d->u8[j] = s->u8[i++];
+	}
+
+	return (0);
+}
+
+int
+inet_nat64_inet6(const void *src, void *dst, const void *pfx, u_int8_t pfxlen)
+{
+	const union inet_nat64_addr	*s = src;
+	const union inet_nat64_addr	*p = pfx;
+	union inet_nat64_addr		*d = dst;
+	int				 i, j;
+
+	/* first copy the prefix octets to the destination */
+	*d = *p;
+
+	switch (pfxlen) {
+	case 32:
+	case 40:
+	case 48:
+	case 56:
+	case 64:
+	case 96:
+		i = pfxlen / 8;
+		break;
+	default:
+		if (pfxlen < 96 || pfxlen > 128) {
+#ifndef _KERNEL
+			errno = EINVAL;
+#endif
+			return (-1);
+		}
+
+		/* as an extension, mask out any other bits */
+		d->u32[3] = inet_nat64_mask(s->u32[0], p->u32[3],
+		    (u_int8_t)(32 - (128 - pfxlen)));
+		return (0);
+	}
+
+	/* octet 8 is reserved and must be set to zero */
+	d->u8[8] = 0;
+
+	/* fill the other octets with the source and skip octet 8 */
+	for (j = 0; j < 4; j++) {
+		if (i == 8)
+			i++;
+		d->u8[i++] = s->u8[j];
+	}
+
+	return (0);
+}
+
+int
+inet_nat46(int af, const void *src, void *dst,
+    const void *pfx, u_int8_t pfxlen)
+{
+	if (pfxlen > 32) {
+#ifndef _KERNEL
+		errno = EINVAL;
+#endif
+		return (-1);
+	}
+
+	switch (af) {
+	case AF_INET:
+		return (inet_nat46_inet(src, dst, pfx, pfxlen));
+	case AF_INET6:
+		return (inet_nat46_inet6(src, dst, pfx, pfxlen));
+	default:
+#ifndef _KERNEL
+		errno = EAFNOSUPPORT;
+#endif
+		return (-1);
+	}
+	/* NOTREACHED */
+}
+
+int
+inet_nat46_inet(const void *src, void *dst, const void *pfx, u_int8_t pfxlen)
+{
+	const union inet_nat64_addr	*s = src;
+	const union inet_nat64_addr	*p = pfx;
+	union inet_nat64_addr		*d = dst;
+
+	/* set the remaining bits to the source */
+	d->u32[0] = inet_nat64_mask(s->u32[3], p->u32[0], pfxlen);
+
+	return (0);
+}
+
+int
+inet_nat46_inet6(const void *src, void *dst, const void *pfx, u_int8_t pfxlen)
+{
+	const union inet_nat64_addr	*s = src;
+	const union inet_nat64_addr	*p = pfx;
+	union inet_nat64_addr		*d = dst;
+
+	/* set the initial octets to zero */
+	d->u32[0] = d->u32[1] = d->u32[2] = 0;
+
+	/* now set the remaining bits to the source */
+	d->u32[3] = inet_nat64_mask(s->u32[0], p->u32[0], pfxlen);
+
+	return (0);
+}
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 95000bf0fd48..860f9a8fce64 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -301,7 +301,7 @@ static int		 pf_check_threshold(struct pf_threshold *);
 
 static void		 pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
 			    u_int16_t *, u_int16_t *, struct pf_addr *,
-			    u_int16_t, u_int8_t, sa_family_t);
+			    u_int16_t, u_int8_t, sa_family_t, sa_family_t);
 static int		 pf_modulate_sack(struct pf_pdesc *,
 			    struct tcphdr *, struct pf_state_peer *);
 int			 pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *,
@@ -310,6 +310,11 @@ static void		 pf_change_icmp(struct pf_addr *, u_int16_t *,
 			    struct pf_addr *, struct pf_addr *, u_int16_t,
 			    u_int16_t *, u_int16_t *, u_int16_t *,
 			    u_int16_t *, u_int8_t, sa_family_t);
+int			 pf_change_icmp_af(struct mbuf *, int,
+			    struct pf_pdesc *, struct pf_pdesc *,
+			    struct pf_addr *, struct pf_addr *, sa_family_t,
+			    sa_family_t);
+int			 pf_translate_icmp_af(int, void *);
 static void		 pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t,
 			    sa_family_t, struct pf_krule *, int);
 static void		 pf_detach_state(struct pf_kstate *);
@@ -607,11 +612,11 @@ pf_packet_rework_nat(struct mbuf *m, struct pf_pdesc *pd, int off,
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
 			pf_change_ap(m, pd->src, &th->th_sport, pd->ip_sum,
 			    &th->th_sum, &nk->addr[pd->sidx],
-			    nk->port[pd->sidx], 0, pd->af);
+			    nk->port[pd->sidx], 0, pd->af, pd->naf);
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
 			pf_change_ap(m, pd->dst, &th->th_dport, pd->ip_sum,
 			    &th->th_sum, &nk->addr[pd->didx],
-			    nk->port[pd->didx], 0, pd->af);
+			    nk->port[pd->didx], 0, pd->af, pd->naf);
 		m_copyback(m, off, sizeof(*th), (caddr_t)th);
 		break;
 	}
@@ -621,11 +626,11 @@ pf_packet_rework_nat(struct mbuf *m, struct pf_pdesc *pd, int off,
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af))
 			pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
 			    &uh->uh_sum, &nk->addr[pd->sidx],
-			    nk->port[pd->sidx], 1, pd->af);
+			    nk->port[pd->sidx], 1, pd->af, pd->naf);
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af))
 			pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
 			    &uh->uh_sum, &nk->addr[pd->didx],
-			    nk->port[pd->didx], 1, pd->af);
+			    nk->port[pd->didx], 1, pd->af, pd->naf);
 		m_copyback(m, off, sizeof(*uh), (caddr_t)uh);
 		break;
 	}
@@ -636,12 +641,12 @@ pf_packet_rework_nat(struct mbuf *m, struct pf_pdesc *pd, int off,
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af)) {
 			pf_change_ap(m, pd->src, &sh->src_port, pd->ip_sum,
 			    &checksum, &nk->addr[pd->sidx],
-			    nk->port[pd->sidx], 1, pd->af);
+			    nk->port[pd->sidx], 1, pd->af, pd->naf);
 		}
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af)) {
 			pf_change_ap(m, pd->dst, &sh->dest_port, pd->ip_sum,
 			    &checksum, &nk->addr[pd->didx],
-			    nk->port[pd->didx], 1, pd->af);
+			    nk->port[pd->didx], 1, pd->af, pd->naf);
 		}
 
 		break;
@@ -1423,7 +1428,12 @@ keyattach:
 
 			PF_HASHROW_LOCK(ih);
 			if (si->kif == s->kif &&
-			    si->direction == s->direction) {
+			    ((si->key[PF_SK_WIRE]->af == sk->af &&
+			    si->direction == s->direction) ||
+			    (si->key[PF_SK_WIRE]->af !=
+			    si->key[PF_SK_STACK]->af &&
+			    sk->af == si->key[PF_SK_STACK]->af &&
+			    si->direction != s->direction))) {
 				if (sk->proto == IPPROTO_TCP &&
 				    si->src.state >= TCPS_FIN_WAIT_2 &&
 				    si->dst.state >= TCPS_FIN_WAIT_2) {
@@ -1652,27 +1662,65 @@ copy:
 	return (0);
 }
 
-struct pf_state_key *
-pf_state_key_setup(struct pf_pdesc *pd, u_int16_t sport, u_int16_t dport)
+int
+pf_state_key_setup(struct pf_pdesc *pd, u_int16_t sport, u_int16_t dport,
+    struct pf_state_key **sk, struct pf_state_key **nk)
 {
-	struct pf_state_key *sk;
-
-	sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
-	if (sk == NULL)
-		return (NULL);
+	*sk = uma_zalloc(V_pf_state_key_z, M_NOWAIT);
+	if (*sk == NULL)
+		return (ENOMEM);
 
-	if (pf_state_key_addr_setup(pd, (struct pf_state_key_cmp *)sk,
+	if (pf_state_key_addr_setup(pd, (struct pf_state_key_cmp *)*sk,
 	    0)) {
-		uma_zfree(V_pf_state_key_z, sk);
-		return (NULL);
+		uma_zfree(V_pf_state_key_z, *sk);
+		*sk = NULL;
+		return (ENOMEM);
 	}
 
-	sk->port[pd->sidx] = sport;
-	sk->port[pd->didx] = dport;
-	sk->proto = pd->proto;
-	sk->af = pd->af;
+	(*sk)->port[pd->sidx] = sport;
+	(*sk)->port[pd->didx] = dport;
+	(*sk)->proto = pd->proto;
+	(*sk)->af = pd->af;
 
-	return (sk);
+	*nk = pf_state_key_clone(*sk);
+	if (*nk == NULL) {
+		uma_zfree(V_pf_state_key_z, *sk);
+		*sk = NULL;
+		return (ENOMEM);
+	}
+
+	if (pd->af != pd->naf) {
+		(*sk)->port[pd->sidx] = pd->osport;
+		(*sk)->port[pd->didx] = pd->odport;
+
+		(*nk)->af = pd->naf;
+
+		/*
+		 * We're overwriting an address here, so potentially there's bits of an IPv6
+		 * address left in here. Clear that out first.
+		 */
+		bzero(&(*nk)->addr[0], sizeof((*nk)->addr[0]));
+		bzero(&(*nk)->addr[1], sizeof((*nk)->addr[1]));
+
+		PF_ACPY(&(*nk)->addr[pd->af == pd->naf ? pd->sidx : pd->didx],
+		    &pd->nsaddr, pd->naf);
+		PF_ACPY(&(*nk)->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
+		    &pd->ndaddr, pd->naf);
+		(*nk)->port[pd->af == pd->naf ? pd->sidx : pd->didx] = pd->nsport;
+		(*nk)->port[pd->af == pd->naf ? pd->didx : pd->sidx] = pd->ndport;
+		switch (pd->proto) {
+		case IPPROTO_ICMP:
+			(*nk)->proto = IPPROTO_ICMPV6;
+			break;
+		case IPPROTO_ICMPV6:
+			(*nk)->proto = IPPROTO_ICMP;
+			break;
+		default:
+			(*nk)->proto = pd->proto;
+		}
+	}
+
+	return (0);
 }
 
 struct pf_state_key *
@@ -1816,6 +1864,28 @@ pf_find_state(struct pfi_kkif *kif, const struct pf_state_key_cmp *key,
 			}
 			return (s);
 		}
+
+	/* Look through the other list, in case of AF-TO */
+	idx = idx == PF_SK_WIRE ? PF_SK_STACK : PF_SK_WIRE;
+	TAILQ_FOREACH(s, &sk->states[idx], key_list[idx]) {
+		if (s->key[PF_SK_WIRE]->af == s->key[PF_SK_STACK]->af)
+			continue;
+		if (s->kif == V_pfi_all || s->kif == kif || s->orig_kif == kif) {
+			PF_STATE_LOCK(s);
+			PF_HASHROW_UNLOCK(kh);
+			if (__predict_false(s->timeout >= PFTM_MAX)) {
+				/*
+				 * State is either being processed by
+				 * pf_unlink_state() in an other thread, or
+				 * is scheduled for immediate expiry.
+				 */
+				PF_STATE_UNLOCK(s);
+				return (NULL);
+			}
+			return (s);
+		}
+	}
+
 	PF_HASHROW_UNLOCK(kh);
 
 	return (NULL);
@@ -3024,6 +3094,7 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
 		return (0);
 	case PF_ADDR_DYNIFTL:
 		return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
+	case PF_ADDR_NONE:
 	case PF_ADDR_NOROUTE:
 	case PF_ADDR_URPFFAILED:
 		return (0);
@@ -3123,13 +3194,14 @@ pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
 static void
 pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
         u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
-        sa_family_t af)
+        sa_family_t af, sa_family_t naf)
 {
 	struct pf_addr	ao;
 	u_int16_t	po = *p;
 
 	PF_ACPY(&ao, a, af);
-	PF_ACPY(a, an, af);
+	if (af == naf)
+		PF_ACPY(a, an, af);
 
 	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
 		*pc = ~*pc;
@@ -3139,33 +3211,77 @@ pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
 	switch (af) {
 #ifdef INET
 	case AF_INET:
-		*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
-		    ao.addr16[0], an->addr16[0], 0),
-		    ao.addr16[1], an->addr16[1], 0);
-		*p = pn;
+		switch (naf) {
+		case AF_INET:
+			*ic = pf_cksum_fixup(pf_cksum_fixup(*ic,
+			    ao.addr16[0], an->addr16[0], 0),
+			    ao.addr16[1], an->addr16[1], 0);
+			*p = pn;
 
-		*pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
-		    ao.addr16[0], an->addr16[0], u),
-		    ao.addr16[1], an->addr16[1], u);
+			*pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
+			    ao.addr16[0], an->addr16[0], u),
+			    ao.addr16[1], an->addr16[1], u);
 
-		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
+			*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
+			break;
+#ifdef INET6
+		case AF_INET6:
+			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+			   pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+			    ao.addr16[0], an->addr16[0], u),
+			    ao.addr16[1], an->addr16[1], u),
+			    0,            an->addr16[2], u),
+			    0,            an->addr16[3], u),
+			    0,            an->addr16[4], u),
+			    0,            an->addr16[5], u),
+			    0,            an->addr16[6], u),
+			    0,            an->addr16[7], u),
+			    po, pn, u);
+
+			/* XXXKP TODO *ic checksum? */
+			break;
+#endif /* INET6 */
+		}
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
-		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
-		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
-		    pf_cksum_fixup(pf_cksum_fixup(*pc,
-		    ao.addr16[0], an->addr16[0], u),
-		    ao.addr16[1], an->addr16[1], u),
-		    ao.addr16[2], an->addr16[2], u),
-		    ao.addr16[3], an->addr16[3], u),
-		    ao.addr16[4], an->addr16[4], u),
-		    ao.addr16[5], an->addr16[5], u),
-		    ao.addr16[6], an->addr16[6], u),
-		    ao.addr16[7], an->addr16[7], u);
-
-		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
+		switch (naf) {
+#ifdef INET
+		case AF_INET:
+			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+			    ao.addr16[0], an->addr16[0], u),
+			    ao.addr16[1], an->addr16[1], u),
+			    ao.addr16[2], 0,             u),
+			    ao.addr16[3], 0,             u),
+			    ao.addr16[4], 0,             u),
+			    ao.addr16[5], 0,             u),
+			    ao.addr16[6], 0,             u),
+			    ao.addr16[7], 0,             u),
+			    po, pn, u);
+
+			/* XXXKP TODO *ic checksum? */
+			break;
+#endif /* INET */
+		case AF_INET6:
+			*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+			    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
+			    pf_cksum_fixup(pf_cksum_fixup(*pc,
+			    ao.addr16[0], an->addr16[0], u),
+			    ao.addr16[1], an->addr16[1], u),
+			    ao.addr16[2], an->addr16[2], u),
+			    ao.addr16[3], an->addr16[3], u),
+			    ao.addr16[4], an->addr16[4], u),
+			    ao.addr16[5], an->addr16[5], u),
+			    ao.addr16[6], an->addr16[6], u),
+			    ao.addr16[7], an->addr16[7], u);
+
+			*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
+			break;
+		}
 		break;
 #endif /* INET6 */
 	}
@@ -3314,6 +3430,394 @@ pf_change_icmp(struct pf_addr *ia, u_int16_t *ip, struct pf_addr *oa,
 	}
 }
 
+int
+pf_translate_af(struct pf_pdesc *pd)
+{
+#if defined(INET) && defined(INET6)
+	struct mbuf		*mp;
+	struct ip		*ip4;
+	struct ip6_hdr		*ip6;
+	struct icmp6_hdr	*icmp;
+	int			 hlen;
+
+	hlen = pd->naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
+
+	/* trim the old header */
+	m_adj(pd->m, pd->off);
+
+	/* prepend a new one */
+	M_PREPEND(pd->m, hlen, M_NOWAIT);
+	if (pd->m == NULL)
+		return (-1);
+
+	switch (pd->naf) {
+	case AF_INET:
+		ip4 = mtod(pd->m, struct ip *);
+		bzero(ip4, hlen);
+		ip4->ip_v = IPVERSION;
+		ip4->ip_hl = hlen >> 2;
+		ip4->ip_len = htons(hlen + (pd->tot_len - pd->off));
+		ip_fillid(ip4);
+		ip4->ip_off = htons(IP_DF);
+		ip4->ip_ttl = pd->ttl;
+		ip4->ip_p = pd->proto;
+		ip4->ip_src = pd->nsaddr.v4;
+		ip4->ip_dst = pd->ndaddr.v4;
+		pd->src = (struct pf_addr *)&ip4->ip_src;
+		pd->dst = (struct pf_addr *)&ip4->ip_dst;
+		break;
+	case AF_INET6:
+		ip6 = mtod(pd->m, struct ip6_hdr *);
+		bzero(ip6, hlen);
+		ip6->ip6_vfc = IPV6_VERSION;
+		ip6->ip6_plen = htons(pd->tot_len - pd->off);
+		ip6->ip6_nxt = pd->proto;
+		if (!pd->ttl || pd->ttl > IPV6_DEFHLIM)
+			ip6->ip6_hlim = IPV6_DEFHLIM;
+		else
+			ip6->ip6_hlim = pd->ttl;
+		ip6->ip6_src = pd->nsaddr.v6;
+		ip6->ip6_dst = pd->ndaddr.v6;
+		pd->src = (struct pf_addr *)&ip6->ip6_src;
+		pd->dst = (struct pf_addr *)&ip6->ip6_dst;
+		break;
+	default:
+		return (-1);
+	}
+
+	/* recalculate icmp/icmp6 checksums */
+	if (pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6) {
+		int off;
+		if ((mp = m_pulldown(pd->m, hlen, sizeof(*icmp), &off)) ==
+		    NULL) {
+			pd->m = NULL;
+			return (-1);
+		}
+		icmp = (struct icmp6_hdr *)(mp->m_data + off);
+		icmp->icmp6_cksum = 0;
+		icmp->icmp6_cksum = pd->naf == AF_INET ?
+		    in4_cksum(pd->m, 0, hlen, ntohs(ip4->ip_len) - hlen) :
+		    in6_cksum(pd->m, IPPROTO_ICMPV6, hlen,
+			ntohs(ip6->ip6_plen));
+	}
+#endif /* INET && INET6 */
+
+	return (0);
+}
+
+int
+pf_change_icmp_af(struct mbuf *m, int off, struct pf_pdesc *pd,
+    struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst,
+    sa_family_t af, sa_family_t naf)
+{
+#if defined(INET) && defined(INET6)
+	struct mbuf	*n = NULL;
+	struct ip	*ip4;
+	struct ip6_hdr	*ip6;
+	int		 hlen, olen, mlen;
+
+	if (af == naf || (af != AF_INET && af != AF_INET6) ||
+	    (naf != AF_INET && naf != AF_INET6))
+		return (-1);
+
+	/* split the mbuf chain on the inner ip/ip6 header boundary */
+	if ((n = m_split(m, off, M_NOWAIT)) == NULL)
+		return (-1);
+
+	/* old header */
+	olen = pd2->off - off;
+	/* new header */
+	hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
+	/* data lenght */
+	mlen = m->m_pkthdr.len - pd2->off;
+
+	/* trim old header */
+	m_adj(n, olen);
+
+	/* prepend a new one */
+	M_PREPEND(n, hlen, M_NOWAIT);
+	if (n == NULL)
+		return (-1);
+
+	/* translate inner ip/ip6 header */
+	switch (naf) {
+	case AF_INET:
+		ip4 = mtod(n, struct ip *);
+		bzero(ip4, sizeof(*ip4));
+		ip4->ip_v = IPVERSION;
+		ip4->ip_hl = sizeof(*ip4) >> 2;
+		ip4->ip_len = htons(sizeof(*ip4) + mlen);
+		ip_fillid(ip4);
+		ip4->ip_off = htons(IP_DF);
+		ip4->ip_ttl = pd2->ttl;
+		if (pd2->proto == IPPROTO_ICMPV6)
+			ip4->ip_p = IPPROTO_ICMP;
+		else
+			ip4->ip_p = pd2->proto;
+		ip4->ip_src = src->v4;
+		ip4->ip_dst = dst->v4;
+		ip4->ip_sum = in_cksum(n, ip4->ip_hl << 2);
+		break;
+	case AF_INET6:
+		ip6 = mtod(n, struct ip6_hdr *);
+		bzero(ip6, sizeof(*ip6));
+		ip6->ip6_vfc = IPV6_VERSION;
+		ip6->ip6_plen = htons(mlen);
+		if (pd2->proto == IPPROTO_ICMP)
+			ip6->ip6_nxt = IPPROTO_ICMPV6;
+		else
+			ip6->ip6_nxt = pd2->proto;
+		if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM)
+			ip6->ip6_hlim = IPV6_DEFHLIM;
+		else
+			ip6->ip6_hlim = pd2->ttl;
+		ip6->ip6_src = src->v6;
+		ip6->ip6_dst = dst->v6;
+		break;
+	}
+
+	/* adjust payload offset and total packet length */
+	pd2->off += hlen - olen;
+	pd->tot_len += hlen - olen;
+
+	/* merge modified inner packet with the original header */
+	mlen = n->m_pkthdr.len;
+	m_cat(m, n);
+	m->m_pkthdr.len += mlen;
+#endif /* INET && INET6 */
+
+	return (0);
+}
+
+#define PTR_IP(field)	(offsetof(struct ip, field))
+#define PTR_IP6(field)	(offsetof(struct ip6_hdr, field))
+
+int
+pf_translate_icmp_af(int af, void *arg)
+{
+#if defined(INET) && defined(INET6)
+	struct icmp		*icmp4;
+	struct icmp6_hdr	*icmp6;
+	u_int32_t		 mtu;
+	int32_t			 ptr = -1;
+	u_int8_t		 type;
+	u_int8_t		 code;
+
+	switch (af) {
+	case AF_INET:
+		icmp6 = arg;
+		type = icmp6->icmp6_type;
+		code = icmp6->icmp6_code;
+		mtu = ntohl(icmp6->icmp6_mtu);
+
+		switch (type) {
+		case ICMP6_ECHO_REQUEST:
+			type = ICMP_ECHO;
+			break;
+		case ICMP6_ECHO_REPLY:
+			type = ICMP_ECHOREPLY;
+			break;
+		case ICMP6_DST_UNREACH:
+			type = ICMP_UNREACH;
+			switch (code) {
+			case ICMP6_DST_UNREACH_NOROUTE:
+			case ICMP6_DST_UNREACH_BEYONDSCOPE:
+			case ICMP6_DST_UNREACH_ADDR:
+				code = ICMP_UNREACH_HOST;
+				break;
+			case ICMP6_DST_UNREACH_ADMIN:
+				code = ICMP_UNREACH_HOST_PROHIB;
+				break;
+			case ICMP6_DST_UNREACH_NOPORT:
+				code = ICMP_UNREACH_PORT;
+				break;
+			default:
+				return (-1);
+			}
+			break;
+		case ICMP6_PACKET_TOO_BIG:
+			type = ICMP_UNREACH;
+			code = ICMP_UNREACH_NEEDFRAG;
+			mtu -= 20;
+			break;
+		case ICMP6_TIME_EXCEEDED:
+			type = ICMP_TIMXCEED;
+			break;
+		case ICMP6_PARAM_PROB:
+			switch (code) {
+			case ICMP6_PARAMPROB_HEADER:
+				type = ICMP_PARAMPROB;
+				code = ICMP_PARAMPROB_ERRATPTR;
+				ptr = ntohl(icmp6->icmp6_pptr);
+
+				if (ptr == PTR_IP6(ip6_vfc))
+					; /* preserve */
+				else if (ptr == PTR_IP6(ip6_vfc) + 1)
+					ptr = PTR_IP(ip_tos);
+				else if (ptr == PTR_IP6(ip6_plen) ||
+				    ptr == PTR_IP6(ip6_plen) + 1)
+					ptr = PTR_IP(ip_len);
+				else if (ptr == PTR_IP6(ip6_nxt))
+					ptr = PTR_IP(ip_p);
+				else if (ptr == PTR_IP6(ip6_hlim))
+					ptr = PTR_IP(ip_ttl);
+				else if (ptr >= PTR_IP6(ip6_src) &&
+				    ptr < PTR_IP6(ip6_dst))
+					ptr = PTR_IP(ip_src);
+				else if (ptr >= PTR_IP6(ip6_dst) &&
+				    ptr < sizeof(struct ip6_hdr))
+					ptr = PTR_IP(ip_dst);
+				else {
+					return (-1);
+				}
+				break;
+			case ICMP6_PARAMPROB_NEXTHEADER:
+				type = ICMP_UNREACH;
+				code = ICMP_UNREACH_PROTOCOL;
+				break;
+			default:
+				return (-1);
+			}
+			break;
+		default:
+			return (-1);
+		}
+		if (icmp6->icmp6_type != type) {
+			icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
+			    icmp6->icmp6_type, type, 0);
+			icmp6->icmp6_type = type;
+		}
+		if (icmp6->icmp6_code != code) {
+			icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
+			    icmp6->icmp6_code, code, 0);
+			icmp6->icmp6_code = code;
+		}
+		if (icmp6->icmp6_mtu != htonl(mtu)) {
+			icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
+			    htons(ntohl(icmp6->icmp6_mtu)), htons(mtu), 0);
+			/* aligns well with a icmpv4 nextmtu */
+			icmp6->icmp6_mtu = htonl(mtu);
+		}
+		if (ptr >= 0 && icmp6->icmp6_pptr != htonl(ptr)) {
+			icmp6->icmp6_cksum = pf_cksum_fixup(icmp6->icmp6_cksum,
+			    htons(ntohl(icmp6->icmp6_pptr)), htons(ptr), 0);
+			/* icmpv4 pptr is a one most significant byte */
+			icmp6->icmp6_pptr = htonl(ptr << 24);
+		}
+		break;
+	case AF_INET6:
+		icmp4 = arg;
+		type = icmp4->icmp_type;
+		code = icmp4->icmp_code;
+		mtu = ntohs(icmp4->icmp_nextmtu);
+
+		switch (type) {
+		case ICMP_ECHO:
+			type = ICMP6_ECHO_REQUEST;
+			break;
+		case ICMP_ECHOREPLY:
+			type = ICMP6_ECHO_REPLY;
+			break;
+		case ICMP_UNREACH:
+			type = ICMP6_DST_UNREACH;
+			switch (code) {
+			case ICMP_UNREACH_NET:
+			case ICMP_UNREACH_HOST:
+			case ICMP_UNREACH_NET_UNKNOWN:
*** 1551 LINES SKIPPED ***