kern/176763: [pf] [patch] Removing pf Source entries locks kernel.
Kajetan Staszkiewicz
vegeta at tuxpowered.net
Mon Nov 18 17:20:02 UTC 2013
The following reply was made to PR kern/176763; it has been noted by GNATS.
From: Kajetan Staszkiewicz <vegeta at tuxpowered.net>
To: bug-followup at freebsd.org
Cc:
Subject: Re: kern/176763: [pf] [patch] Removing pf Source entries locks kernel.
Date: Mon, 18 Nov 2013 18:12:36 +0100
--Boundary-00=_EqkiSNwqoUJOzB0
Content-Type: Text/Plain;
charset="us-ascii"
Content-Transfer-Encoding: 7bit
The attached patch is for FreeBSD 10. It adds a new parameter "-c" to pfctl
which when killing src_nodes, also kills states linked to the found nodes.
--
| pozdrawiam / greetings | powered by Debian, FreeBSD and CentOS |
| Kajetan Staszkiewicz | jabber,email: vegeta()tuxpowered net |
| Vegeta | www: http://vegeta.tuxpowered.net |
`------------------------^---------------------------------------'
--Boundary-00=_EqkiSNwqoUJOzB0
Content-Type: text/x-patch;
charset="UTF-8";
name="link-states-to-src-nodes.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename="link-states-to-src-nodes.patch"
# Removing src_nodes causes the list of states to be fully searched through
# to find ones linked to the given src_node. With large amount of src_nodes
# and states (for example when under a DDoS attack) this operation can take
# many seconds to complete.
#
# Provide a list of states linked to each src_node and use the list to make
# the operation faster. Add new parameter "-c" to pfctl which, when
# killing src_nodes, also kills states linked to found nodes.
#
# kajetan.staszkiewicz at innogames.de
# Work sponsored by InnoGames GmbH
#
diff --git a/sbin/pfctl/pfctl.8 b/sbin/pfctl/pfctl.8
index 5c0e7b3..61c5711 100644
--- a/sbin/pfctl/pfctl.8
+++ b/sbin/pfctl/pfctl.8
@@ -42,7 +42,8 @@
.Op Fl F Ar modifier
.Op Fl f Ar file
.Op Fl i Ar interface
-.Op Fl K Ar host | network
+.Oo Fl K Ar host | network
+.Op Fl c Oc
.Xo
.Oo Fl k
.Ar host | network | label | id
@@ -189,6 +190,10 @@ as the anchor name:
.Bd -literal -offset indent
# pfctl -a '*' -sr
.Ed
+.It Fl c
+When removing source tracking entries, remove state entries linked to
+them. This option can be only used in conjunction with
+.Fl K .
.It Fl D Ar macro Ns = Ns Ar value
Define
.Ar macro
diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c
index 90a2bb5..6a2dd90 100644
--- a/sbin/pfctl/pfctl.c
+++ b/sbin/pfctl/pfctl.c
@@ -236,7 +236,7 @@ usage(void)
fprintf(stderr, "usage: %s [-AdeghmNnOPqRrvz] ", __progname);
fprintf(stderr, "[-a anchor] [-D macro=value] [-F modifier]\n");
- fprintf(stderr, "\t[-f file] [-i interface] [-K host | network]\n");
+ fprintf(stderr, "\t[-f file] [-i interface] [-K host | network [-c]]\n");
fprintf(stderr, "\t[-k host | network | label | id] ");
fprintf(stderr, "[-o level] [-p device]\n");
fprintf(stderr, "\t[-s modifier] ");
@@ -449,10 +449,10 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts)
struct pfioc_src_node_kill psnk;
struct addrinfo *res[2], *resp[2];
struct sockaddr last_src, last_dst;
- int killed, sources, dests;
+ int killed, killed_states, sources, dests;
int ret_ga;
- killed = sources = dests = 0;
+ killed = killed_states = sources = dests = 0;
memset(&psnk, 0, sizeof(psnk));
memset(&psnk.psnk_src.addr.v.a.mask, 0xff,
@@ -462,6 +462,8 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts)
pfctl_addrprefix(src_node_kill[0], &psnk.psnk_src.addr.v.a.mask);
+ psnk.psnk_kill_linked_states = opts & PF_OPT_KILLLINKEDSTATES;
+
if ((ret_ga = getaddrinfo(src_node_kill[0], NULL, NULL, &res[0]))) {
errx(1, "getaddrinfo: %s", gai_strerror(ret_ga));
/* NOTREACHED */
@@ -529,20 +531,23 @@ pfctl_kill_src_nodes(int dev, const char *iface, int opts)
if (ioctl(dev, DIOCKILLSRCNODES, &psnk))
err(1, "DIOCKILLSRCNODES");
killed += psnk.psnk_killed;
+ killed_states += psnk.psnk_killed_states;
}
freeaddrinfo(res[1]);
} else {
if (ioctl(dev, DIOCKILLSRCNODES, &psnk))
err(1, "DIOCKILLSRCNODES");
killed += psnk.psnk_killed;
+ killed_states += psnk.psnk_killed_states;
}
}
freeaddrinfo(res[0]);
if ((opts & PF_OPT_QUIET) == 0)
- fprintf(stderr, "killed %d src nodes from %d sources and %d "
- "destinations\n", killed, sources, dests);
+ fprintf(stderr, "killed %d src nodes and %d linked states "
+ "from %d sources and %d destinations\n",
+ killed, killed_states, sources, dests);
return (0);
}
@@ -2002,11 +2007,14 @@ main(int argc, char *argv[])
usage();
while ((ch = getopt(argc, argv,
- "a:AdD:eqf:F:ghi:k:K:mnNOo:Pp:rRs:t:T:vx:z")) != -1) {
+ "a:AcdD:eqf:F:ghi:k:K:mnNOo:Pp:rRs:t:T:vx:z")) != -1) {
switch (ch) {
case 'a':
anchoropt = optarg;
break;
+ case 'c':
+ opts |= PF_OPT_KILLLINKEDSTATES;
+ break;
case 'd':
opts |= PF_OPT_DISABLE;
mode = O_RDWR;
diff --git a/sbin/pfctl/pfctl_parser.h b/sbin/pfctl/pfctl_parser.h
index 4560d66..b272b0b 100644
--- a/sbin/pfctl/pfctl_parser.h
+++ b/sbin/pfctl/pfctl_parser.h
@@ -51,6 +51,7 @@
#define PF_OPT_NUMERIC 0x1000
#define PF_OPT_MERGE 0x2000
#define PF_OPT_RECURSE 0x4000
+#define PF_OPT_KILLLINKEDSTATES 0x8000
#define PF_TH_ALL 0xFF
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index c16591b..e5395e3 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -697,6 +697,7 @@ struct pf_threshold {
struct pf_src_node {
LIST_ENTRY(pf_src_node) entry;
+ TAILQ_HEAD(, pf_state) state_list;
struct pf_addr addr;
struct pf_addr raddr;
union pf_rule_ptr rule;
@@ -787,6 +788,7 @@ struct pf_state {
TAILQ_ENTRY(pf_state) sync_list;
TAILQ_ENTRY(pf_state) key_list[2];
LIST_ENTRY(pf_state) entry;
+ TAILQ_ENTRY(pf_state) srcnode_link;
struct pf_state_peer src;
struct pf_state_peer dst;
union pf_rule_ptr rule;
@@ -1445,6 +1447,8 @@ struct pfioc_src_node_kill {
struct pf_rule_addr psnk_src;
struct pf_rule_addr psnk_dst;
u_int psnk_killed;
+ u_int psnk_killed_states;
+ u_int psnk_kill_linked_states;
};
struct pfioc_state_kill {
diff --git a/sys/netpfil/pf/pf.c b/sys/netpfil/pf/pf.c
index 2de8c40..9da73c5 100644
--- a/sys/netpfil/pf/pf.c
+++ b/sys/netpfil/pf/pf.c
@@ -652,6 +652,8 @@ pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
rule->max_src_conn_rate.limit,
rule->max_src_conn_rate.seconds);
+ TAILQ_INIT(&(*sn)->state_list);
+
(*sn)->af = af;
(*sn)->rule.ptr = rule;
PF_ACPY(&(*sn)->addr, src, af);
@@ -1482,6 +1484,7 @@ static void
pf_src_tree_remove_state(struct pf_state *s)
{
u_int32_t timeout;
+ struct pf_srchash *sh = NULL;
if (s->src_node != NULL) {
if (s->src.tcp_est)
@@ -1493,6 +1496,12 @@ pf_src_tree_remove_state(struct pf_state *s)
V_pf_default_rule.timeout[PFTM_SRC_NODE];
s->src_node->expire = time_uptime + timeout;
}
+ sh = &V_pf_srchash[pf_hashsrc(&s->src_node->addr, s->src_node->af)];
+ PF_HASHROW_LOCK(sh);
+ if (!TAILQ_EMPTY(&s->src_node->state_list))
+ TAILQ_REMOVE(&s->src_node->state_list, s, srcnode_link);
+ PF_HASHROW_UNLOCK(sh);
+
}
if (s->nat_src_node != s->src_node && s->nat_src_node != NULL) {
if (--s->nat_src_node->states <= 0) {
@@ -1502,6 +1511,11 @@ pf_src_tree_remove_state(struct pf_state *s)
V_pf_default_rule.timeout[PFTM_SRC_NODE];
s->nat_src_node->expire = time_uptime + timeout;
}
+ sh = &V_pf_srchash[pf_hashsrc(&s->nat_src_node->addr, s->nat_src_node->af)];
+ PF_HASHROW_LOCK(sh);
+ if (!TAILQ_EMPTY(&s->nat_src_node->state_list))
+ TAILQ_REMOVE(&s->nat_src_node->state_list, s, srcnode_link);
+ PF_HASHROW_UNLOCK(sh);
}
s->src_node = s->nat_src_node = NULL;
}
@@ -3407,6 +3421,7 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
int tag, u_int16_t bproto_sum, u_int16_t bip_sum, int hdrlen)
{
struct pf_state *s = NULL;
+ struct pf_srchash *sh = NULL;
struct pf_src_node *sn = NULL;
struct tcphdr *th = pd->hdr.tcp;
u_int16_t mss = V_tcp_mssdflt;
@@ -3505,14 +3520,22 @@ pf_create_state(struct pf_rule *r, struct pf_rule *nr, struct pf_rule *a,
s->expire = time_uptime;
if (sn != NULL) {
+ sh = &V_pf_srchash[pf_hashsrc(&sn->addr, sn->af)];
+ PF_HASHROW_LOCK(sh);
s->src_node = sn;
s->src_node->states++;
+ TAILQ_INSERT_HEAD(&sn->state_list, s, srcnode_link);
+ PF_HASHROW_UNLOCK(sh);
}
if (nsn != NULL) {
/* XXX We only modify one side for now. */
+ sh = &V_pf_srchash[pf_hashsrc(&nsn->addr, nsn->af)];
+ PF_HASHROW_LOCK(sh);
PF_ACPY(&nsn->raddr, &nk->addr[1], pd->af);
s->nat_src_node = nsn;
s->nat_src_node->states++;
+ TAILQ_INSERT_HEAD(&nsn->state_list, s, srcnode_link);
+ PF_HASHROW_UNLOCK(sh);
}
if (pd->proto == IPPROTO_TCP) {
if ((pd->flags & PFDESC_TCP_NORM) && pf_normalize_tcp_init(m,
diff --git a/sys/netpfil/pf/pf_ioctl.c b/sys/netpfil/pf/pf_ioctl.c
index 2b0f2cd..0267ef0 100644
--- a/sys/netpfil/pf/pf_ioctl.c
+++ b/sys/netpfil/pf/pf_ioctl.c
@@ -150,7 +150,8 @@ struct cdev *pf_dev;
*/
static void pf_clear_states(void);
static int pf_clear_tables(void);
-static void pf_clear_srcnodes(struct pf_src_node *);
+static u_int32_t pf_clear_srcnodes(struct pf_src_node *,
+ int kill_states);
static void pf_tbladdr_copyout(struct pf_addr_wrap *);
/*
@@ -3134,7 +3135,7 @@ DIOCCHANGEADDR_error:
case DIOCCLRSRCNODES: {
- pf_clear_srcnodes(NULL);
+ pf_clear_srcnodes(NULL, 0);
pf_purge_expired_src_nodes();
V_pf_status.src_nodes = 0;
break;
@@ -3145,7 +3146,7 @@ DIOCCHANGEADDR_error:
(struct pfioc_src_node_kill *)addr;
struct pf_srchash *sh;
struct pf_src_node *sn;
- u_int i, killed = 0;
+ u_int i, killed = 0, killed_states = 0;
for (i = 0, sh = V_pf_srchash; i < V_pf_srchashmask;
i++, sh++) {
@@ -3166,7 +3167,7 @@ DIOCCHANGEADDR_error:
&sn->raddr, sn->af)) {
/* Handle state to src_node linkage */
if (sn->states != 0)
- pf_clear_srcnodes(sn);
+ killed_states += pf_clear_srcnodes(sn, psnk->psnk_kill_linked_states);
sn->expire = 1;
killed++;
}
@@ -3177,6 +3178,7 @@ DIOCCHANGEADDR_error:
pf_purge_expired_src_nodes();
psnk->psnk_killed = killed;
+ psnk->psnk_killed_states = killed_states;
break;
}
@@ -3360,24 +3362,12 @@ pf_clear_tables(void)
return (error);
}
-static void
-pf_clear_srcnodes(struct pf_src_node *n)
+static u_int32_t
+pf_clear_srcnodes(struct pf_src_node *n, int kill_states)
{
struct pf_state *s;
int i;
-
- for (i = 0; i <= V_pf_hashmask; i++) {
- struct pf_idhash *ih = &V_pf_idhash[i];
-
- PF_HASHROW_LOCK(ih);
- LIST_FOREACH(s, &ih->states, entry) {
- if (n == NULL || n == s->src_node)
- s->src_node = NULL;
- if (n == NULL || n == s->nat_src_node)
- s->nat_src_node = NULL;
- }
- PF_HASHROW_UNLOCK(ih);
- }
+ int killed_states = 0;
if (n == NULL) {
struct pf_srchash *sh;
@@ -3386,6 +3376,19 @@ pf_clear_srcnodes(struct pf_src_node *n)
i++, sh++) {
PF_HASHROW_LOCK(sh);
LIST_FOREACH(n, &sh->nodes, entry) {
+ while (!TAILQ_EMPTY(&n->state_list)) {
+ s = TAILQ_FIRST(&n->state_list);
+ if (kill_states) {
+ pf_unlink_state(s, 0);
+ killed_states++;
+ } else {
+ PF_STATE_LOCK(s);
+ TAILQ_REMOVE(&n->state_list, s, srcnode_link);
+ s->src_node = NULL;
+ s->nat_src_node = NULL;
+ PF_STATE_UNLOCK(s);
+ }
+ }
n->expire = 1;
n->states = 0;
}
@@ -3393,9 +3396,24 @@ pf_clear_srcnodes(struct pf_src_node *n)
}
} else {
/* XXX: hash slot should already be locked here. */
+ while (!TAILQ_EMPTY(&n->state_list)) {
+ s = TAILQ_FIRST(&n->state_list);
+ if (kill_states) {
+ pf_unlink_state(s, 0);
+ killed_states++;
+ } else {
+ PF_STATE_LOCK(s);
+ TAILQ_REMOVE(&n->state_list, s, srcnode_link);
+ s->src_node = NULL;
+ s->nat_src_node = NULL;
+ PF_STATE_UNLOCK(s);
+ }
+ }
n->expire = 1;
n->states = 0;
}
+
+ return killed_states;
}
/*
* XXX - Check for version missmatch!!!
@@ -3459,7 +3477,7 @@ shutdown_pf(void)
pf_clear_states();
- pf_clear_srcnodes(NULL);
+ pf_clear_srcnodes(NULL, 0);
/* status does not use malloced mem so no need to cleanup */
/* fingerprints and interfaces have thier own cleanup code */
--Boundary-00=_EqkiSNwqoUJOzB0--
More information about the freebsd-pf
mailing list