git: e5c4987e3fc1 - main - pf: fix dummynet + NAT

From: Kristof Provost <kp_at_FreeBSD.org>
Date: Thu, 28 Oct 2021 08:50:41 UTC
The branch main has been updated by kp:

URL: https://cgit.FreeBSD.org/src/commit/?id=e5c4987e3fc1997264f4c3a10d3047a9379b9b15

commit e5c4987e3fc1997264f4c3a10d3047a9379b9b15
Author:     Kristof Provost <kp@FreeBSD.org>
AuthorDate: 2021-10-26 07:59:42 +0000
Commit:     Kristof Provost <kp@FreeBSD.org>
CommitDate: 2021-10-28 08:41:17 +0000

    pf: fix dummynet + NAT
    
    Dummynet differs from ALTQ in that ALTQ schedules packets after they
    leave pf. Dummynet schedules them after they leave pf, but then
    re-injects them.
    We currently deal with this by ensuring we don't re-schedule a packet we
    get from dummynet, but this produces unexpected results when combined
    with NAT, as dummynet processing is done after the NAT transformation.
    In other words, the second time the packet is handed to pf it may have a
    different source and destination address.
    
    Simplify this by moving dummynet processing to after all other pf
    processing, and not re-processing (but always passing) packets from
    dummynet.
    
    This fixes NAT of dummynet delayed packets, and also reduces processing
    overhead (because we only do state/rule lookup for each dummynet packet
    once, rather than twice).
    
    MFC after:      3 weeks
    Sponsored by:   Rubicon Communications, LLC ("Netgate")
    Differential Revision:  https://reviews.freebsd.org/D32665
---
 sys/netpfil/pf/pf.c | 73 +++++++++++++++++++++++------------------------------
 1 file changed, 32 insertions(+), 41 deletions(-)

diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index bb7667a3e270..61eea329a7f7 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -6456,6 +6456,13 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb *
 			}
 			pd.pf_mtag->flags |= PF_PACKET_LOOPED;
 			m_tag_delete(m, ipfwtag);
+			if (rr->info & IPFW_IS_DUMMYNET) {
+				/* Dummynet re-injects packets after they've
+				 * completed their delay. We've already
+				 * processed them, so pass unconditionally. */
+				PF_RULES_RUNLOCK();
+				return (PF_PASS);
+			}
 		}
 		if (pd.pf_mtag && pd.pf_mtag->flags & PF_FASTFWD_OURS_PRESENT) {
 			m->m_flags |= M_FASTFWD_OURS;
@@ -6704,47 +6711,6 @@ done:
 	}
 #endif /* ALTQ */
 
-	if (s && (s->dnpipe || s->dnrpipe)) {
-		pd.act.dnpipe = s->dnpipe;
-		pd.act.dnrpipe = s->dnrpipe;
-		pd.act.flags = s->state_flags;
-	} else if (r->dnpipe || r->dnrpipe) {
-		pd.act.dnpipe = r->dnpipe;
-		pd.act.dnrpipe = r->dnrpipe;
-		pd.act.flags = r->free_flags;
-	}
-	if ((pd.act.dnpipe || pd.act.dnrpipe) && !PACKET_LOOPED(&pd)) {
-		if (ip_dn_io_ptr == NULL) {
-			action = PF_DROP;
-			REASON_SET(&reason, PFRES_MEMORY);
-		} else {
-			struct ip_fw_args dnflow;
-
-			if (pd.pf_mtag == NULL &&
-			    ((pd.pf_mtag = pf_get_mtag(m)) == NULL)) {
-				action = PF_DROP;
-				REASON_SET(&reason, PFRES_MEMORY);
-				if (s)
-					PF_STATE_UNLOCK(s);
-				return (action);
-			}
-
-			if (pf_pdesc_to_dnflow(dir, &pd, r, s, &dnflow)) {
-				ip_dn_io_ptr(m0, &dnflow);
-
-				if (*m0 == NULL) {
-					if (s)
-						PF_STATE_UNLOCK(s);
-					return (action);
-				} else {
-					/* This is dummynet fast io processing */
-					m_tag_delete(*m0, m_tag_first(*m0));
-					pd.pf_mtag->flags &= ~PF_PACKET_LOOPED;
-				}
-			}
-		}
-	}
-
 	/*
 	 * connections redirected to loopback should not match sockets
 	 * bound specifically to loopback due to security implications,
@@ -6885,6 +6851,31 @@ done:
 			pf_route(m0, r, dir, kif->pfik_ifp, s, &pd, inp);
 			return (action);
 		}
+		/* Dummynet processing. */
+		if (s && (s->dnpipe || s->dnrpipe)) {
+			pd.act.dnpipe = s->dnpipe;
+			pd.act.dnrpipe = s->dnrpipe;
+			pd.act.flags = s->state_flags;
+		} else if (r->dnpipe || r->dnrpipe) {
+			pd.act.dnpipe = r->dnpipe;
+			pd.act.dnrpipe = r->dnrpipe;
+			pd.act.flags = r->free_flags;
+		}
+		if (pd.act.dnpipe || pd.act.dnrpipe) {
+			if (ip_dn_io_ptr == NULL) {
+				m_freem(*m0);
+				*m0 = NULL;
+				REASON_SET(&reason, PFRES_MEMORY);
+			} else {
+				struct ip_fw_args dnflow;
+
+				if (pf_pdesc_to_dnflow(dir, &pd, r, s, &dnflow)) {
+					ip_dn_io_ptr(m0, &dnflow);
+					if (*m0 == NULL)
+						action = PF_DROP;
+				}
+			}
+		}
 		break;
 	}