[patch] ipfw interface tracking and opcode rewriting
Alexander V. Chernikov
melifaro at FreeBSD.org
Wed Apr 24 16:02:25 UTC 2013
Hello list!
Currently ipfw uses strncmp() function to do interface matching which is
quite slow.
Additionally, ipfw_insn_if opcode is quite big and given that struct
ip_fw occupy 48 bytes
(without first instruction) which gives us good chance that part of
interface name will be on the second cache line on amd64.
Pure synthetic testing (ipfw with 1 and 2 'ipfw count ip from any to any
recv ifaceX') shows about 3.8% performance loss (190kpps out of 5.1
mpps) for each rule,
while indexed version shows about 2.0% and 1.2% for first and second rule.
Additionally, our production (8.3-based firewalls with old strncmp)
shows about 40% kernel time spent in strncmp on 1-2mpps (each packet
traverses 5-6 such rules).
Here is the patch which does the following:
1) adds interface tracking for ipfw. Every interface is tracked
regardless of its usage in the ruleset. This simplifies locking and
makes easier to port such functionality to userland.
2) adds general opcode rewriting system permitting kernel to
algorithmically (stateless) or statefully (involving extrernal data)
rewrite user-supplied opcodes with possible size change.
This can be used to deprecate opcodes which are now superseded by newer
ones while keeping ABI (and we currently have such opcodes).
3) Store (and track) inderface index for non-wildcard interface inside
opcode.
If there are no objections I would like to commit (possibly updated
vesrion) in the middle of the next week.
-------------- next part --------------
Index: sys/netinet/ip_fw.h
===================================================================
--- sys/netinet/ip_fw.h (revision 248704)
+++ sys/netinet/ip_fw.h (working copy)
@@ -341,6 +341,7 @@ typedef struct _ipfw_insn_if {
union {
struct in_addr ip;
int glob;
+ unsigned int if_idx; /* Interface index (kernel) */
} p;
char name[IFNAMSIZ];
} ipfw_insn_if;
@@ -495,6 +496,8 @@ typedef struct _ipfw_insn_icmp6 {
* queue(3) macros for portability and readability.
*/
+#define IP_FW_RULE_REWRITTEN 0x01 /* Rule is modified by rewriter */
+
struct ip_fw {
struct ip_fw *x_next; /* linked list of rules */
struct ip_fw *next_rule; /* ptr to next [skipto] rule */
@@ -505,7 +508,7 @@ struct ip_fw {
uint16_t rulenum; /* rule number */
uint8_t set; /* rule set (0..31) */
#define RESVD_SET 31 /* set for default and persistent rules */
- uint8_t _pad; /* padding */
+ uint8_t flags; /* padding */
uint32_t id; /* rule id */
/* These fields are present in all rules. */
Index: sys/modules/ipfw/Makefile
===================================================================
--- sys/modules/ipfw/Makefile (revision 248704)
+++ sys/modules/ipfw/Makefile (working copy)
@@ -8,6 +8,7 @@ KMOD= ipfw
SRCS= ip_fw2.c ip_fw_pfil.c
SRCS+= ip_fw_dynamic.c ip_fw_log.c
SRCS+= ip_fw_sockopt.c ip_fw_table.c
+SRCS+= ip_fw_iface.c ip_fw_rewrite.c
SRCS+= opt_inet.h opt_inet6.h opt_ipdivert.h opt_ipfw.h opt_ipsec.h
CFLAGS+= -DIPFIREWALL
Index: sys/netpfil/ipfw/ip_fw2.c
===================================================================
--- sys/netpfil/ipfw/ip_fw2.c (revision 248704)
+++ sys/netpfil/ipfw/ip_fw2.c (working copy)
@@ -353,17 +353,17 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd,
if (ifp == NULL) /* no iface with this packet, match fails */
return 0;
/* Check by name or by IP address */
- if (cmd->name[0] != '\0') { /* match by name */
- if (cmd->name[0] == '\1') /* use tablearg to match */
+ if (cmd->o.arg1 != 0) { /* match by name */
+ if (cmd->o.arg1 == 1) /* use tablearg to match */
return ipfw_lookup_table_extended(chain, cmd->p.glob,
ifp->if_xname, tablearg, IPFW_TABLE_INTERFACE);
/* Check name */
- if (cmd->p.glob) {
+ if (cmd->p.if_idx) {
+ if (ifp->if_index == cmd->p.if_idx)
+ return (1);
+ } else {
if (fnmatch(cmd->name, ifp->if_xname, 0) == 0)
return(1);
- } else {
- if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0)
- return(1);
}
} else {
#ifdef __FreeBSD__ /* and OSX too ? */
@@ -2667,6 +2667,8 @@ vnet_ipfw_init(const void *unused)
IPFW_LOCK_INIT(chain);
ipfw_dyn_init(chain);
+ ipfw_ifhash_init(chain);
+ ipfw_rewrite_init(chain);
/* First set up some values that are compile time options */
V_ipfw_vnet_ready = 1; /* Open for business */
@@ -2708,6 +2710,7 @@ vnet_ipfw_uninit(const void *unused)
(void)ipfw_attach_hooks(0 /* detach */);
V_ip_fw_ctl_ptr = NULL;
IPFW_UH_WLOCK(chain);
+ ipfw_ifhash_detach(chain); /* detach eventhandlers */
IPFW_UH_WUNLOCK(chain);
IPFW_UH_WLOCK(chain);
@@ -2722,9 +2725,14 @@ vnet_ipfw_uninit(const void *unused)
rule = chain->map[i];
rule->x_next = reap;
reap = rule;
+ /* Clear rewrites if any */
+ if (rule->flags & IP_FW_RULE_REWRITTEN)
+ ipfw_relocate_rewrite(chain, rule->cmd, NULL);
}
if (chain->map)
free(chain->map, M_IPFW);
+ ipfw_rewrite_free(chain);
+ ipfw_ifhash_free(chain);
IPFW_WUNLOCK(chain);
IPFW_UH_WUNLOCK(chain);
if (reap != NULL)
Index: sys/netpfil/ipfw/ip_fw_private.h
===================================================================
--- sys/netpfil/ipfw/ip_fw_private.h (revision 248704)
+++ sys/netpfil/ipfw/ip_fw_private.h (working copy)
@@ -212,6 +212,13 @@ VNET_DECLARE(int, autoinc_step);
VNET_DECLARE(unsigned int, fw_tables_max);
#define V_fw_tables_max VNET(fw_tables_max)
+
+#define CMDSIZE(rule) (((struct ip_fw *)(rule))->cmd_len * sizeof(uint32_t))
+
+
+struct ip_fw_if_data;
+struct ip_fw_rw_data;
+
struct ip_fw_chain {
struct ip_fw *rules; /* list of rules */
struct ip_fw *reap; /* list of rules to reap */
@@ -232,8 +239,42 @@ struct ip_fw_chain {
#endif
uint32_t id; /* ruleset id */
uint32_t gencnt; /* generation count */
+ struct ip_fw_if_data *if_data; /* Interface tracking data */
+ struct ip_fw_rw_data *rewrite_data; /* Rule rewrite data */
};
+/* ip_fw_rewrite.c */
+struct ip_fw_rw_info {
+ void *sptr; /* State created by ipfw_prepare_rewrite() */
+ int count; /* Number of opcodes requesting rewrite */
+ int states; /* Number of opcodes with stateful rewrite */
+ int lendiff; /* Difference with oridinal rule len (insns) */
+};
+
+void ipfw_rewrite_init(struct ip_fw_chain *chain);
+void ipfw_rewrite_free(struct ip_fw_chain *chain);
+int ipfw_rewrite_len(struct ip_fw_chain *chain);
+void *ipfw_prepare_rewrite(struct ip_fw_chain *chain, ipfw_insn *cmd,
+ int cmd_len, struct ip_fw_rw_info *rwi);
+void ipfw_perform_rewrite(struct ip_fw_chain *chain, ipfw_insn *kcmd,
+ void *state);
+void ipfw_relocate_rewrite(struct ip_fw_chain *chain, ipfw_insn *old,
+ ipfw_insn *new);
+int ipfw_export_rewrite(struct ip_fw_chain *chain, ipfw_insn *kcmd,
+ ipfw_insn *target);
+
+void ipfw_check_rewrite(struct ip_fw_chain *chain, ipfw_insn *insn,
+ struct ip_fw_rw_info *rwi);
+void ipfw_update_rewrite(struct ip_fw_chain *chain, ipfw_insn *insn,
+ void *state, uintptr_t val);
+
+
+/* ip_fw_iface.c */
+void ipfw_ifhash_init(struct ip_fw_chain *chain);
+void ipfw_ifhash_free(struct ip_fw_chain *chain);
+void ipfw_ifhash_detach(struct ip_fw_chain *chain);
+
+
struct sockopt; /* used by tcp_var.h */
/* Macro for working with various counters */
@@ -295,7 +336,8 @@ struct sockopt; /* used by tcp_var.h */
/* In ip_fw_sockopt.c */
int ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id);
-int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule);
+int ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule,
+ struct ip_fw_rw_info *rwi);
int ipfw_ctl(struct sockopt *sopt);
int ipfw_chk(struct ip_fw_args *args);
void ipfw_reap_rules(struct ip_fw *head);
Index: sys/netpfil/ipfw/ip_fw_sockopt.c
===================================================================
--- sys/netpfil/ipfw/ip_fw_sockopt.c (revision 248971)
+++ sys/netpfil/ipfw/ip_fw_sockopt.c (working copy)
@@ -73,6 +73,8 @@ MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct
* static variables followed by global ones (none in this file)
*/
+static void ipfw_export_header(struct ip_fw *krule, struct ip_fw *dst);
+
/*
* Find the smallest rule >= key, id.
* We could use bsearch but it is so simple that we code it directly
@@ -153,7 +155,8 @@ swap_map(struct ip_fw_chain *chain, struct ip_fw *
* Must be called without IPFW_UH held
*/
int
-ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
+ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule,
+ struct ip_fw_rw_info *rwi)
{
struct ip_fw *rule;
int i, l, insert_before;
@@ -163,7 +166,8 @@ int
return (EINVAL);
l = RULESIZE(input_rule);
- rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
+ rule = malloc(l + rwi->lendiff * sizeof(uint32_t), M_IPFW,
+ M_WAITOK | M_ZERO);
/* get_map returns with IPFW_UH_WLOCK if successful */
map = get_map(chain, 1, 0 /* not locked */);
if (map == NULL) {
@@ -171,7 +175,15 @@ int
return ENOSPC;
}
- bcopy(input_rule, rule, l);
+ if (rwi->sptr == NULL)
+ bcopy(input_rule, rule, l);
+ else {
+ /* Copy header and first instuction */
+ bcopy(input_rule, rule, sizeof(struct ip_fw));
+ rule->flags |= IP_FW_RULE_REWRITTEN;
+ ipfw_perform_rewrite(chain, rule->cmd, rwi->sptr);
+ }
+
/* clear fields not settable from userland */
rule->x_next = NULL;
rule->next_rule = NULL;
@@ -366,6 +378,14 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg)
rule = chain->map[i];
if (keep_rule(rule, cmd, new_set, num))
map[ofs++] = rule;
+ else {
+ /* Clear rewrites if any */
+ if (rule->flags & IP_FW_RULE_REWRITTEN) {
+ printf("Moving rule %p to clear list\n", rule);
+ ipfw_relocate_rewrite(chain,
+ rule->cmd, NULL);
+ }
+ }
}
/* 3. copy the final part of the map */
bcopy(chain->map + end, map + ofs,
@@ -384,6 +404,7 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg)
ipfw_expire_dyn_rules(chain, rule, RESVD_SET);
rule->x_next = chain->reap;
chain->reap = rule;
+ printf("Adding rule %p to reap list\n", rule);
}
break;
@@ -517,7 +538,8 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t ar
* Rules are simple, so this mostly need to check rule sizes.
*/
static int
-check_ipfw_struct(struct ip_fw *rule, int size)
+check_ipfw_struct(struct ip_fw_chain *chain, struct ip_fw *rule, int size,
+ struct ip_fw_rw_info *rwi)
{
int l, cmdlen = 0;
int have_action=0;
@@ -696,6 +718,7 @@ static int
case O_VIA:
if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
goto bad_size;
+ ipfw_check_rewrite(chain, cmd, rwi);
break;
case O_ALTQ:
@@ -868,6 +891,13 @@ int convert_rule_to_8(struct ip_fw *rule);
#endif
+static void
+ipfw_export_header(struct ip_fw *krule, struct ip_fw *dst)
+{
+
+ memcpy(dst, krule, sizeof(struct ip_fw) - sizeof(ipfw_insn));
+}
+
/*
* Copy the static and dynamic rules to the supplied buffer
* and return the amount of space actually used.
@@ -887,11 +917,28 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf
rule = chain->map[i];
if (is7) {
- /* Convert rule to FreeBSd 7.2 format */
- l = RULESIZE7(rule);
+ /* Convert rule to FreeBSD 7.2 format */
+ if (rule->flags & IP_FW_RULE_REWRITTEN)
+ l = ipfw_export_rewrite(chain, rule->cmd, NULL);
+ else
+ l = CMDSIZE(rule);
+
+ /*
+ * Add header length.
+ * v.8 rule header is 4 bytes bigger.
+ */
+ l += sizeof(struct ip_fw7) - sizeof(ipfw_insn);
+
if (bp + l + sizeof(uint32_t) <= ep) {
int error;
bcopy(rule, bp, l + sizeof(uint32_t));
+
+ if (rule->flags & IP_FW_RULE_REWRITTEN) {
+ ipfw_export_rewrite(chain, rule->cmd, dst->cmd);
+ ipfw_export_header(rule, dst);
+ } else
+ bcopy(rule, bp, l + sizeof(uint32_t));
+
error = convert_rule_to_7((struct ip_fw *) bp);
if (error)
return 0; /*XXX correct? */
@@ -910,14 +957,23 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf
continue; /* go to next rule */
}
- /* normal mode, don't touch rules */
- l = RULESIZE(rule);
+ if (rule->flags & IP_FW_RULE_REWRITTEN)
+ l = ipfw_export_rewrite(chain, rule->cmd, NULL);
+ else
+ l = CMDSIZE(rule);
+ /* Add header length */
+ l += sizeof(struct ip_fw) - sizeof(ipfw_insn);
+
if (bp + l > ep) { /* should not happen */
printf("overflow dumping static rules\n");
break;
}
dst = (struct ip_fw *)bp;
- bcopy(rule, dst, l);
+ if (rule->flags & IP_FW_RULE_REWRITTEN) {
+ ipfw_export_rewrite(chain, rule->cmd, dst->cmd);
+ ipfw_export_header(rule, dst);
+ } else
+ bcopy(rule, dst, l);
/*
* XXX HACK. Store the disable mask in the "next"
* pointer in a wild attempt to keep the ABI the same.
@@ -949,6 +1005,7 @@ ipfw_ctl(struct sockopt *sopt)
uint32_t opt;
char xbuf[128];
ip_fw3_opheader *op3 = NULL;
+ struct ip_fw_rw_info rwi;
error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
if (error)
@@ -998,7 +1055,7 @@ ipfw_ctl(struct sockopt *sopt)
for (;;) {
int len = 0, want;
- size = chain->static_len;
+ size = chain->static_len + ipfw_rewrite_len(chain);
size += ipfw_dyn_len();
if (size >= sopt->sopt_valsize)
break;
@@ -1027,6 +1084,8 @@ ipfw_ctl(struct sockopt *sopt)
error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
sizeof(struct ip_fw7) );
+ memset(&rwi, 0, sizeof(rwi));
+
/*
* If the size of commands equals RULESIZE7 then we assume
* a FreeBSD7.2 binary is talking to us (set is7=1).
@@ -1042,15 +1101,21 @@ ipfw_ctl(struct sockopt *sopt)
if (error)
return error;
if (error == 0)
- error = check_ipfw_struct(rule, RULESIZE(rule));
+ error = check_ipfw_struct(chain, rule, RULESIZE(rule),
+ &rwi);
} else {
is7 = 0;
if (error == 0)
- error = check_ipfw_struct(rule, sopt->sopt_valsize);
+ error = check_ipfw_struct(chain, rule,
+ sopt->sopt_valsize, &rwi);
}
if (error == 0) {
+ /* Prepare rewrite, if needed */
+ if (rwi.count > 0)
+ rwi.sptr = ipfw_prepare_rewrite(chain,
+ rule->cmd, rule->cmd_len, &rwi);
/* locking is done within ipfw_add_rule() */
- error = ipfw_add_rule(chain, rule);
+ error = ipfw_add_rule(chain, rule, &rwi);
size = RULESIZE(rule);
if (!error && sopt->sopt_dir == SOPT_GET) {
if (is7) {
@@ -1350,7 +1415,7 @@ convert_rule_to_7(struct ip_fw *rule)
bcopy(rule, tmp, RULE_MAXSIZE);
/* Copy fields */
- rule7->_pad = tmp->_pad;
+ rule7->_pad = 0;
rule7->set = tmp->set;
rule7->rulenum = tmp->rulenum;
rule7->cmd_len = tmp->cmd_len;
@@ -1423,7 +1488,7 @@ convert_rule_to_8(struct ip_fw *rule)
}
}
- rule->_pad = tmp->_pad;
+ rule->flags = 0;
rule->set = tmp->set;
rule->rulenum = tmp->rulenum;
rule->cmd_len = tmp->cmd_len;
--- /dev/null 2013-04-24 17:20:19.000000000 +0400
+++ sys/netpfil/ipfw/ip_fw_rewrite.c 2013-04-24 17:19:15.278097243 +0400
@@ -0,0 +1,835 @@
+/*-
+ * Copyright (c) 2013 Yandex LLC.
+ * Author: Alexander V. Chernikov <melifaro at yandex-team.ru>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Rule opcode rewriting system for ipfw.
+ * System permits automatic algoritmic (stateless) or statefull (
+ * requiring access/monidifcation to external data) of opcodes.
+ * Modification is done by calling special per-opcode dependent
+ * callbacks. Saving unmodified user-supplied rules, size recalculation,
+ * rule export and relocation is handled by subsystem.
+ * Writing opcode modificator requires adding it to rewrites[] array
+ * and filling appropriate callbacks (at least 'convert' one.
+ */
+
+#include "opt_ipfw.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/fnv_hash.h>
+#include <sys/socket.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* hooks */
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_iface.h>
+
+#define NO_REWRITE 0
+#define STATELESS_REWRITE 1
+#define STATEFUL_REWRITE 2
+
+struct ip_fw_rewrite {
+ uint32_t opcode;
+
+ /*
+ * Checks if given opcode needs to be changed. Called (indirectly)
+ * from check_ipfw_struct() without holding any locks. Fuction should
+ * quickly check if given opcode needs to be rewritten and set @len to
+ * size difference (in bytes) between new (altered) opcode size and
+ * old one. Note that &len hould be aligned to u32.
+ *
+ * Params:
+ * @chain - pointer to current ifpw chain
+ * @insn - given ipfw_instn
+ * @len - pointer to length diff (in insns)
+ *
+ * Returns:
+ * NO_REWRITE - no need to convert
+ * STATELESS_REWRITE - (algoritmic) conversion required.
+ * STATEFUL_REWRITE - stateful conversion required.
+ *
+ * Callback is OPTIONAL, defaults to STATELESS_REWRITE if not set.
+ */
+ int (*check)(struct ip_fw_chain *, ipfw_insn *, int *);
+
+ /*
+ * Prepares state for given opcode if needed. Called without
+ * holding any locks permitting to allocate any amount of memory.
+ * Note that result (and actual state usage) has to be consistent
+ * with *check (and other) callbacks.
+ *
+ * Params:
+ * @chain - pointer to current ifpw chain
+ * @insn - given ipfw_instn
+ * @pstate - pointer to pointer to state
+ *
+ * Returns:
+ * NO_REWRITE - no need to convert
+ * STATELESS_REWRITE - (algoritmic) conversion can be done.
+ * STATEFUL_REWRITE - stateful conversion required, state is saved to
+ * given pointer.
+ *
+ * Callback is OPTIONAL, defaults to STATELESS_REWRITE if not set.
+ */
+ int (*prepare)(struct ip_fw_chain *, ipfw_insn *insn, void **);
+
+ /*
+ * Performs opcode conversion. Called with chain WLOCK held.
+ * Note that opcode copy is handled automatically if
+ * NO_REWRITE is returned. @len has to be filled otherwise.
+ *
+ * Params:
+ * @chain - pointer to current ifpw chain
+ * @_old - userland ipfw_instn
+ * @_new - kernel ipfw_insn
+ * @state - pointer to state saved
+ * @len - pointer to opcode length (in instructions)
+ *
+ * Returns:
+ * NO_REWRITE - no need to convert
+ * STATELESS_REWRITE - (algoritmic) conversion is done.
+ * STATEFUL_REWRITE - stateful conversion is done, state is consumed.
+ *
+ * Callback is MANDATORY.
+ */
+ int (*convert)(struct ip_fw_chain *, ipfw_insn *, ipfw_insn *, void *, int *);
+
+ /*
+ * Performs state cleanup (rule deletion). Called with chain WLOCK held.
+ * State hint can be provided.
+ *
+ * Params:
+ * @chain - pointer to current ifpw chain
+ * @insn - kernel ipfw_insn
+ * @state - pointer to state hint
+ *
+ * Callback is OPTIONAL.
+ */
+ void (*clear)(struct ip_fw_chain *, ipfw_insn *, void *);
+
+ /*
+ * Performs opcode-dependent update.
+ * Flag/argument can be provided.
+ *
+ * Params:
+ * @chain - pointer to current ifpw chain
+ * @insn - kernel ipfw_insn
+ * @state - pointer to opcode-dependent data
+ * @val - opcode-dependet value
+ *
+ * Callback is OPTIONAL.
+ */
+ void (*update)(struct ip_fw_chain *, ipfw_insn *, void *, uintptr_t);
+
+ /*
+ * Dispatches memory relocation of given opcode, Called with WLOCK held.
+ * Actual copy is already done at the moment of call.
+ *
+ * Params:
+ * @chain - pointer to current ifpw chain
+ * @_old - kernel ipfw_insn
+ * @_old - new kernel ipfw_insn
+ *
+ * Callback is OPTIONAL.
+ */
+ void (*move)(struct ip_fw_chain *, ipfw_insn *, ipfw_insn *);
+};
+
+/* Opcode callbacks */
+static int
+convertable_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn);
+
+static void move_insn_if(struct ip_fw_chain *chain, ipfw_insn *_old, ipfw_insn *_new);
+static void update_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn,
+ void *_iface_mask, uintptr_t new_id);
+static void clear_insn_if(struct ip_fw_chain *chain, ipfw_insn *_src, void *data);
+static int convert_insn_if(struct ip_fw_chain *chain, ipfw_insn *_old, ipfw_insn *_new,
+ void *state, int *len);
+static int prepare_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn, void **pstate);
+static int check_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn, int *len);
+
+
+/* Note opcodes MUST be in asceding order */
+struct ip_fw_rewrite rewrites[] = {
+ {
+ O_RECV,
+ check_insn_if,
+ prepare_insn_if,
+ convert_insn_if,
+ clear_insn_if,
+ update_insn_if,
+ move_insn_if,
+ },
+ {
+ O_XMIT,
+ check_insn_if,
+ prepare_insn_if,
+ convert_insn_if,
+ clear_insn_if,
+ update_insn_if,
+ move_insn_if,
+ },
+ {
+ O_VIA,
+ check_insn_if,
+ prepare_insn_if,
+ convert_insn_if,
+ clear_insn_if,
+ update_insn_if,
+ move_insn_if,
+ },
+};
+
+struct rewrite_rule_ptr {
+ TAILQ_ENTRY(rewrite_rule_ptr) next;
+ int cmd_klen; /* Kernel opcodes len (insns) */
+ int cmd_len; /* Original opcodes len (insns) */
+ ipfw_insn *kcmd; /* Kernel rule version */
+ void **states; /* opcode states */
+ int states_count; /* number of states */
+ ipfw_insn cmd[1]; /* Original opcodes */
+};
+TAILQ_HEAD(rewrite_rule_head, rewrite_rule_ptr);
+
+struct ip_fw_rw_data {
+ struct rewrite_rule_head *hash;
+ size_t hsize;
+ int lendiff; /* sizeof(kern) - sizeof(user) */
+};
+
+#define DEFAULT_HASH_SIZE 32
+#define PTR_HASH_PRIME 31
+
+static struct ip_fw_rewrite *ipfw_find_rewrite(uint32_t opcode);
+
+void
+ipfw_rewrite_init(struct ip_fw_chain *chain)
+{
+ struct ip_fw_rw_data *rwd;
+ struct rewrite_rule_head *rh;
+ int i;
+
+ rwd = malloc(sizeof(struct ip_fw_rw_data), M_IPFW, M_WAITOK | M_ZERO);
+
+ rwd->hsize = DEFAULT_HASH_SIZE;
+ rwd->hash = malloc(sizeof(struct rewrite_rule_head) * rwd->hsize,
+ M_IPFW, M_WAITOK | M_ZERO);
+
+ for (i = 0, rh = rwd->hash; i < rwd->hsize; i++, rh++)
+ TAILQ_INIT(rh);
+
+ chain->rewrite_data = rwd;
+}
+
+void
+ipfw_rewrite_free(struct ip_fw_chain *chain)
+{
+ struct ip_fw_rw_data *rwd;
+
+ rwd = chain->rewrite_data;
+ chain->rewrite_data = NULL;
+
+ /* Assume every rule to be already removed */
+ free(rwd->hash, M_IPFW);
+ free(rwd, M_IPFW);
+}
+
+int
+ipfw_rewrite_len(struct ip_fw_chain *chain)
+{
+ struct ip_fw_rw_data *rwd;
+
+ rwd = chain->rewrite_data;
+
+ return (rwd->lendiff);
+}
+
+/*
+ * Prepares given rule for modification:
+ * allocates memory for rule and number of states reported
+ * by 'check' callbacks. Calls 'prepare' callback for
+ * every opcode in rule.
+ *
+ * Returns state to be passed to ipfw_store_rule.
+ */
+void *
+ipfw_prepare_rewrite(struct ip_fw_chain *chain, ipfw_insn *ucmd,
+ int cmd_len, struct ip_fw_rw_info *rwi)
+{
+ int i, l, cmdlen, size, states_count;
+ struct rewrite_rule_ptr *rptr;
+ ipfw_insn *cmd;
+ struct ip_fw_rewrite *rewrite;
+ void **pstate;
+
+ /*
+ * Allocate memory for rule header, opcodes and state array.
+ */
+ size = sizeof(struct rewrite_rule_ptr) +
+ (cmd_len - 1) * sizeof(uint32_t);
+
+ size = roundup(size, sizeof(void *));
+
+ rptr = malloc(size + rwi->states * sizeof(void *), M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ /* Save original opcodes */
+ memcpy(rptr->cmd, ucmd, cmd_len * sizeof(uint32_t));
+ rptr->cmd_len = cmd_len;
+ rptr->cmd_klen = rptr->cmd_len + rwi->lendiff;
+
+ rptr->states = (void **)((char *)rptr + size);
+ rptr->states_count = rwi->states;
+ pstate = rptr->states;
+ states_count = rptr->states_count;
+
+ CTR4(KTR_NET, "Prepare rule rewrite: cmd %p len %d klen %d rptr %p",
+ ucmd, rptr->cmd_len, rptr->cmd_klen, rptr);
+
+ for (l = cmd_len, cmd = ucmd ;
+ l > 0 ; l -= cmdlen, cmd += cmdlen) {
+ cmdlen = F_LEN(cmd);
+
+ if ((rewrite = ipfw_find_rewrite(cmd->opcode)) == NULL)
+ continue;
+
+ if (rewrite->prepare == NULL)
+ continue;
+
+ i = rewrite->prepare(chain, cmd, pstate);
+
+ if (i == STATEFUL_REWRITE) {
+ CTR3(KTR_NET, "New stateful rewrite %p val %p count %d",
+ pstate, *pstate, states_count);
+ pstate++;
+ states_count--;
+
+ KASSERT(states_count >= 0,
+ ("prepare_rewrite state overflow"));
+ }
+ }
+
+ return ((void *)rptr);
+}
+
+static int
+hash_ptr(struct ip_fw_rw_data *rwd, ipfw_insn *cmd)
+{
+ return (uintptr_t)cmd % PTR_HASH_PRIME;
+}
+
+/*
+ * Fills in kernel rule with modified opcodes. Updates old rule state
+ * with new kernel pointer. Actual rewriting and header copy is done
+ * in ipfw_run_rewrite().
+ */
+void
+ipfw_perform_rewrite(struct ip_fw_chain *chain, ipfw_insn *kcmd, void *state)
+{
+ struct rewrite_rule_ptr *rptr;
+ struct rewrite_rule_head *rh;
+ struct ip_fw_rw_data *rwd;
+ struct ip_fw_rewrite *rewrite;
+ ipfw_insn *ucmd;
+ void **pstate;
+ int i, l, ucmdlen, kcmdlen, states_count;
+
+ rwd = chain->rewrite_data;
+
+ rptr = (struct rewrite_rule_ptr *)state;
+ rptr->kcmd = kcmd;
+ pstate = rptr->states;
+ states_count = rptr->states_count;
+
+ CTR3(KTR_NET, "Linking kcmd %p to orig %p idx %d",
+ kcmd, rptr, hash_ptr(rwd, kcmd));
+
+ rh = &rwd->hash[hash_ptr(rwd, kcmd)];
+ TAILQ_INSERT_TAIL(rh, rptr, next);
+
+ ucmd = rptr->cmd;
+
+ for (l = rptr->cmd_len; l > 0 ;
+ l -= ucmdlen, ucmd += ucmdlen, kcmd += kcmdlen) {
+ ucmdlen = F_LEN(ucmd);
+
+ if ((rewrite = ipfw_find_rewrite(ucmd->opcode)) == NULL) {
+ /* No conversion required, copy as is */
+ kcmdlen = ucmdlen;
+ memcpy(kcmd, ucmd, ucmdlen * sizeof(ipfw_insn));
+ continue;
+ }
+
+ i = rewrite->convert(chain, ucmd, kcmd, *pstate, &kcmdlen);
+ CTR2("RW for %d st %p returned %d", ucmd->cmd, *pstate, i);
+
+ if (i == NO_REWRITE) {
+ kcmdlen = ucmdlen;
+ memcpy(kcmd, ucmd, ucmdlen * sizeof(ipfw_insn));
+ } else if (i == STATEFUL_REWRITE) {
+ pstate++;
+ states_count--;
+
+ KASSERT(states_count >= 0, ("rewrite state overflow"));
+ }
+ }
+
+ /* Save size difference */
+ rwd->lendiff += rptr->cmd_klen - rptr->cmd_len;
+ CTR2(KTR_NET, "old len: %d, new: %d", rptr->cmd_len, rptr->cmd_klen);
+}
+
+/*
+ * Handle rule moving to new place (or deletion).
+ * Updates kernel rule pointer and run opcode callbacks via
+ * ipfw_move_rewrite() or clears state via ipfw_clear_rewrite()
+ * int latter case.
+ */
+void
+ipfw_relocate_rewrite(struct ip_fw_chain *chain, ipfw_insn *old, ipfw_insn *new)
+{
+ struct rewrite_rule_ptr *rptr;
+ struct rewrite_rule_head *rh;
+ struct ip_fw_rw_data *rwd;
+ struct ip_fw_rewrite *rewrite;
+ int l, cmdlen;
+
+ rwd = chain->rewrite_data;
+
+ rh = &rwd->hash[hash_ptr(rwd, old)];
+
+ TAILQ_FOREACH(rptr, rh, next) {
+ if (rptr->kcmd == old)
+ break;
+ }
+
+ CTR3(KTR_NET, "Moving %p idx %p to %p", rptr, hash_ptr(rwd, old), new);
+
+ KASSERT(rptr != NULL, ("ipfw_relocate_rewrite: old rule not found"));
+
+ TAILQ_REMOVE(rh, rptr, next);
+
+ if (new == NULL) {
+ /* Clear states (if any) and delete original rule */
+ for (l = rptr->cmd_klen; l > 0; l -= cmdlen, old += cmdlen) {
+ cmdlen = F_LEN(old);
+
+ if ((rewrite = ipfw_find_rewrite(old->opcode)) == NULL)
+ continue;
+
+ if (rewrite->clear == NULL)
+ continue;
+
+ CTR1(KTR_NET, "clear-state for opcode %u", old->opcode);
+ rewrite->clear(chain, old, NULL);
+ }
+
+ /* Update size difference */
+ rwd->lendiff -= rptr->cmd_klen - rptr->cmd_len;
+ free(rptr, M_IPFW);
+ } else {
+ /* Put to new slot */
+ rptr->kcmd = new;
+ rh = &rwd->hash[hash_ptr(rwd, new)];
+ TAILQ_INSERT_TAIL(rh, rptr, next);
+
+ /* Update instructions pointers */
+ for (l = rptr->cmd_klen; l > 0 ;
+ l -= cmdlen, old += cmdlen, new += cmdlen) {
+ cmdlen = F_LEN(old);
+
+ if ((rewrite = ipfw_find_rewrite(old->opcode)) == NULL)
+ continue;
+
+ if (rewrite->move == NULL)
+ continue;
+
+ rewrite->move(chain, old, new);
+ }
+ }
+}
+
+/*
+ * Exports modified rule to userland. Returns userland rule length
+ * (used in initial size-checking calculations). Copies userland rule version
+ * with updated counters to supplied buffer.
+ */
+int
+ipfw_export_rewrite(struct ip_fw_chain *chain, ipfw_insn *kcmd, ipfw_insn *target)
+{
+ struct rewrite_rule_ptr *rptr;
+ struct rewrite_rule_head *rh;
+ struct ip_fw_rw_data *rwd;
+ ipfw_insn *ucmd;
+
+ rwd = chain->rewrite_data;
+
+ KASSERT(rw != NULL, ("ipfw_export_rewrite: rewrite not initialized"));
+
+ rh = &rwd->hash[hash_ptr(rwd, kcmd)];
+
+ TAILQ_FOREACH(rptr, rh, next) {
+ if (rptr->kcmd == kcmd)
+ break;
+ }
+
+ KASSERT(rptr != NULL, ("ipfw_export_rewrite: kcmd not found"));
+ ucmd = rptr->cmd;
+
+ if (target != NULL)
+ memcpy(target, rptr->cmd, rptr->cmd_len * sizeof(uint32_t));
+
+ return (rptr->cmd_len * sizeof(uint32_t));
+}
+
+/*
+ * bsearch() helper function.
+ */
+static int
+rewrite_comp(const void *_key, const void *_member)
+{
+ uint32_t opcode;
+ struct ip_fw_rewrite *rewrite;
+
+ opcode = *((uint32_t *)_key);
+ rewrite = (struct ip_fw_rewrite *)_member;
+
+ if (opcode < rewrite->opcode)
+ return (-1);
+ else if (opcode == rewrite->opcode)
+ return (0);
+ else
+ return (1);
+}
+
+
+static struct ip_fw_rewrite *
+ipfw_find_rewrite(uint32_t opcode)
+{
+ size_t count;
+ struct ip_fw_rewrite *rewrite;
+
+ count = sizeof(rewrites) / sizeof(struct ip_fw_rewrite);
+
+ rewrite = (struct ip_fw_rewrite *)bsearch(&opcode, rewrites,
+ count, sizeof(struct ip_fw_rewrite), rewrite_comp);
+
+ return (rewrite);
+}
+
+
+/*
+ * Checks if given opcode needs to be changed.
+ * Updates @rwi appropriate fields if instruction needs to be
+ * stateless/stafully rewritten possibly with changed size.
+ */
+void
+ipfw_check_rewrite(struct ip_fw_chain *chain, ipfw_insn *insn,
+ struct ip_fw_rw_info *rwi)
+{
+ struct ip_fw_rewrite *rewrite;
+ int i = 0, len = 0;
+
+ if ((rewrite = ipfw_find_rewrite(insn->opcode)) == NULL)
+ i = NO_REWRITE;
+ else if (rewrite->check == NULL)
+ i = STATELESS_REWRITE;
+ else
+ i = rewrite->check(chain, insn, &len);
+
+ if (len != 0)
+ rwi->lendiff += len;
+
+ if (i == STATELESS_REWRITE)
+ rwi->count++;
+
+ if (i == STATEFUL_REWRITE) {
+ rwi->count++;
+ rwi->states++;
+ }
+
+ if (i != NO_REWRITE)
+ CTR4(KTR_NET, "opcode %d: count=%d states=%d len=%d",
+ insn->opcode, rwi->count, rwi->states, rwi->lendiff);
+}
+
+/*
+ * Call opcode-dependent 'update' callback.
+ */
+void
+ipfw_update_rewrite(struct ip_fw_chain *chain, ipfw_insn *insn,
+ void *state, uintptr_t val)
+{
+ struct ip_fw_rewrite *rewrite;
+
+ if ((rewrite = ipfw_find_rewrite(insn->opcode)) == NULL)
+ return;
+
+ if (rewrite->update == NULL)
+ return;
+
+ rewrite->update(chain, insn, state, val);
+}
+
+/*******************************************************************
+ * *
+ * O_RECV | O_VIA | O_XMIT rewrite handling. *
+ * *
+ *******************************************************************/
+/*
+ * Converts insns_if to more compact form. Currently instruction
+ * is used to specify
+ * 1) interface name ( ->name[0] != ('\0' | '\1') AND p.glob == 0)
+ * 2) interface pattern ( ->name[0] != ('\0' | '\1') AND p.glob != 0)
+ * 3) eXtended table number ( ->name[0] == '\1')
+ * 4) interface address ( ->name[0] == '\0')
+ *
+ * We want to save iface index in case 1 (and to eliminate interface name at all).
+ * Given that, we do the following:
+ *
+ * p.glob is now p.if_idx (u_int) (glob if zero, iface index otherwise)
+ * o.arg1 works like ->name[0], so:
+ *
+ * 1) interface name (o.arg1 == 2, p.if_idx contains index)
+ * 2) interface pattern (o.arg1 == 2, p.if_idx == 0)
+ * 3) eXtended table number (o.arg1 == 1)
+ * 4) interface address (o.arg1 == 0)
+ */
+
+static int
+convertable_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn)
+{
+ ipfw_insn_if *cmd = (ipfw_insn_if *)insn;
+
+ /* Either IPv4 address or extended table (3) and (4) */
+ if (cmd->name[0] == '\0' || cmd->name[0] == '\1')
+ return (0);
+
+ return (1);
+}
+
+static int
+check_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn, int *len_diff)
+{
+ ipfw_insn_if *cmd = (ipfw_insn_if *)insn;
+
+ *len_diff = 0;
+
+ if (convertable_insn_if(chain, insn) == 0)
+ return (STATELESS_REWRITE);
+
+ /* Either interface name (1) or glob pattern (2). */
+
+ if (cmd->p.glob != 0)
+ return (STATELESS_REWRITE);
+ else
+ return (STATEFUL_REWRITE);
+}
+
+static int
+prepare_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn, void **pstate)
+{
+ struct iface_mask *ifm;
+ struct ipfw_insn_ptr *insn_ptr;
+ ipfw_insn_if *cmd = (ipfw_insn_if *)insn;
+
+ if (convertable_insn_if(chain, insn) == 0)
+ return (STATELESS_REWRITE);
+
+ if (cmd->p.glob != 0) {
+ /* Glob pattern (2), no state needed, */
+ return (STATELESS_REWRITE);
+ }
+
+ /* Allocate data used by convert callback */
+ insn_ptr = malloc(sizeof(struct ipfw_insn_ptr), M_IPFW,
+ M_WAITOK | M_ZERO);
+ ifm = malloc(sizeof(struct iface_mask), M_IPFW, M_WAITOK | M_ZERO);
+
+ TAILQ_INIT(&ifm->instructions);
+ TAILQ_INSERT_TAIL(&ifm->instructions, insn_ptr, next);
+
+ CTR3(KTR_NET, "pstate %p, val %p insns %p", pstate, ifm, insn);
+
+ *pstate = ifm;
+ return (STATEFUL_REWRITE);
+}
+
+static int
+convert_insn_if(struct ip_fw_chain *chain, ipfw_insn *_old, ipfw_insn *_new,
+ void *state, int *len)
+{
+ struct iface_mask *ifm, *ifm2;
+ struct ipfw_insn_ptr *insn_ptr;
+ ipfw_insn_if *cmd_old = (ipfw_insn_if *)_old;
+ ipfw_insn_if *cmd_new = (ipfw_insn_if *)_new;
+
+ /* Set length anyway */
+ *len = F_INSN_SIZE(ipfw_insn_if);
+ memcpy(cmd_new, cmd_old, sizeof(ipfw_insn_if));
+
+ if (convertable_insn_if(chain, _old) == 0) {
+ /*
+ * case (3, eX table): o.arg1 = 1
+ * case (4, ifaddr): o.arg1 = 0
+ */
+
+ cmd_new->o.arg1 = (cmd_old->name[0] == '\1') ? 1 : 0;
+
+ return (STATELESS_REWRITE);
+ }
+
+ /*
+ * Prepare instruction for altering.
+ * case (1, ifname): o.arg1 = 2; p_if_idx == interface index
+ * case (2, glob): o.arg1 = 2' p.if_idx = 0
+ */
+ memcpy(cmd_new, cmd_old, sizeof(ipfw_insn_if));
+ cmd_new->o.arg1 = 2;
+
+ if (cmd_old->p.glob) {
+ /* Interface mask (2). Copy as is and set index */
+ cmd_new->p.if_idx = 0;
+ return (STATELESS_REWRITE);
+ }
+
+ /* Interface name. */
+ ifm = (struct iface_mask *)state;
+ insn_ptr = TAILQ_FIRST(&ifm->instructions);
+
+ insn_ptr->insn = _new;
+
+ if ((ifm2 = ipfw_search_ifname(chain, cmd_old->name)) != NULL) {
+ /* Interface found, link entry here */
+ TAILQ_INSERT_TAIL(&ifm2->instructions, insn_ptr, next);
+ ifm2->refcount++;
+ cmd_new->p.if_idx = ifm2->idx;
+ if (ifm2->flags & IPFW_IFLAG_FAKE)
+ cmd_new->p.if_idx |= IPFW_FAKE_IDX;
+
+ free(ifm, M_IPFW);
+ return (STATEFUL_REWRITE);
+ }
+
+ /* Interface not found, add and mark as unexistent */
+ strlcpy(ifm->name, cmd_old->name, IFNAMSIZ);
+ ifm->flags |= IPFW_IFLAG_FAKE;
+ ifm->refcount++;
+ ipfw_add_ifname(chain, ifm);
+ cmd_new->p.if_idx = ifm->idx | IPFW_FAKE_IDX;
+ /* Add instruction back (add_ifname reinits list) */
+ TAILQ_INSERT_TAIL(&ifm->instructions, insn_ptr, next);
+
+ return (STATEFUL_REWRITE);
+}
+
+static void
+clear_insn_if(struct ip_fw_chain *chain, ipfw_insn *_src, void *data)
+{
+ struct iface_mask *ifm;
+ ipfw_insn_if *cmd;
+ struct ipfw_insn_ptr *insn_ptr = (struct ipfw_insn_ptr *)data;
+
+ cmd = (ipfw_insn_if *)_src;
+
+ /* State is used for interface names, skip other cases */
+ if (cmd->o.arg1 != 2)
+ return;
+
+ ifm = ipfw_search_ifindex(chain, cmd->p.if_idx);
+ KASSERT(ifm != NULL, ("no ifp found for index %u", cmd->p.if_idx));
+
+ if (insn_ptr == NULL) {
+ TAILQ_FOREACH(insn_ptr, &ifm->instructions, next) {
+ if (insn_ptr->insn == _src)
+ break;
+ }
+
+ KASSERT(insn_ptr != NULL, ("no insns found"));
+ }
+
+ /* Remove instruction from interface */
+ TAILQ_REMOVE(&ifm->instructions, insn_ptr, next);
+ ifm->refcount--;
+
+ free(insn_ptr, M_IPFW);
+}
+
+static void
+update_insn_if(struct ip_fw_chain *chain, ipfw_insn *insn, void *_iface_mask,
+ uintptr_t new_id)
+{
+ struct ip_fw_if_data *ifd;
+ ipfw_insn_if *cmd;
+
+ IPFW_WLOCK_ASSERT(chain);
+
+ ifd = chain->if_data;
+ cmd = (ipfw_insn_if *)insn;
+
+ CTR2(KTR_NET, "updating insn: ifi %u -> %u",
+ cmd->p.if_idx, (uint32_t)new_id);
+
+ cmd->p.if_idx = (uint32_t)new_id;
+}
+
+static void
+move_insn_if(struct ip_fw_chain *chain, ipfw_insn *_old, ipfw_insn *_new)
+{
+ struct iface_mask *ifm;
+ ipfw_insn_if *cmd;
+ struct ipfw_insn_ptr *insn_ptr;
+
+ cmd = (ipfw_insn_if *)_old;
+
+ /* State is used for interface names, skip other cases */
+ if (cmd->o.arg1 != 2)
+ return;
+
+ ifm = ipfw_search_ifindex(chain, cmd->p.if_idx);
+ KASSERT(ifm != NULL, ("no ifp found for index %u", cmd->p.if_idx));
+
+ TAILQ_FOREACH(insn_ptr, &ifm->instructions, next) {
+ if (insn_ptr->insn == _old)
+ break;
+ }
+
+ KASSERT(insn_ptr != NULL, ("no insns found"));
+
+ insn_ptr->insn = _new;
+}
+
+
--- /dev/null 2013-04-24 17:20:19.000000000 +0400
+++ sys/netpfil/ipfw/ip_fw_iface.c 2013-04-24 17:18:35.546357594 +0400
@@ -0,0 +1,467 @@
+/*-
+ * Copyright (c) 2013 Yandex LLC.
+ * Author: Alexander V. Chernikov <melifaro at yandex-team.ru>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ * Interface tracking for ipfw.
+ */
+
+#include "opt_ipfw.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/fnv_hash.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* hooks */
+#include <netinet/ip_fw.h>
+
+#include <netpfil/ipfw/ip_fw_private.h>
+#include <netpfil/ipfw/ip_fw_iface.h>
+
+#define IPFW_IFHASH_IDX(idx, hsize) ((idx) % (hsize))
+#define IPFW_IFHASH_NAME(name, hsize) (fnv_32_str(name, FNV1_32_INIT) % (hsize))
+
+TAILQ_HEAD(iface_mask_head, iface_mask);
+
+struct ip_fw_if_data {
+ struct iface_mask_head *masks; /* Interface name hash */
+ size_t masks_count, masks_hsize;
+ struct iface_mask_head *real_ifaces; /* 'Real' interface index hash */
+ size_t real_count, real_hsize;
+ struct iface_mask_head *fake_ifaces; /* Nonexistent interface index hash */
+ size_t fake_count, fake_hsize;
+ eventhandler_tag arrival, departure;
+ u_short fake_idx;
+};
+
+static void ipfw_ifhash_init_int(struct iface_mask_head **phash, size_t hsize);
+static void ipfw_ifnet_init(struct ip_fw_chain *chain, struct iface_mask *ifm);
+
+/*
+ * Mappings:
+ * 'iface_mask' -> idx
+ * if_index -> iface_mask
+ * fake_index -> iface_mask
+ *
+ * List of masks
+ *
+ */
+static void ifnet_arrival(void *arg, struct ifnet *ifp);
+static void ifnet_departure(void *arg, struct ifnet *ifp);
+
+/*
+ * Find interface structure by name.
+ * Called with either UH or chain readlock held.
+ */
+struct iface_mask *
+ipfw_search_ifname(struct ip_fw_chain *chain, char *name)
+{
+ struct iface_mask *ifm;
+ struct ip_fw_if_data *ifd;
+ struct iface_mask_head *ifh;
+ int i;
+
+ ifd = chain->if_data;
+
+ i = IPFW_IFHASH_NAME(name, ifd->masks_hsize);
+
+ ifh = &ifd->masks[i];
+ TAILQ_FOREACH(ifm, ifh, name_next) {
+ if (strcmp(name, ifm->name) == 0)
+ return (ifm);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Find interface structure by real or fake ifindex.
+ * Called with either UH or chain readlock held.
+ */
+struct iface_mask *
+ipfw_search_ifindex(struct ip_fw_chain *chain, uint32_t idx)
+{
+ struct iface_mask *ifm;
+ struct ip_fw_if_data *ifd;
+ struct iface_mask_head *ifh;
+ int i;
+
+ ifd = chain->if_data;
+
+ if (idx & IPFW_FAKE_IDX) {
+ idx &= ~IPFW_FAKE_IDX;
+ i = IPFW_IFHASH_IDX(idx, ifd->fake_hsize);
+ ifh = &ifd->fake_ifaces[i];
+ } else {
+ i = IPFW_IFHASH_IDX(idx, ifd->real_hsize);
+ ifh = &ifd->real_ifaces[i];
+ }
+
+ TAILQ_FOREACH(ifm, ifh, idx_next) {
+ if (ifm->idx == idx)
+ return (ifm);
+ }
+
+ return (NULL);
+}
+
+void
+ipfw_add_ifname(struct ip_fw_chain *chain, struct iface_mask *ifm)
+{
+ struct ip_fw_if_data *ifd;
+ struct iface_mask_head *ifh;
+ struct iface_mask *iftemp;
+ int i;
+
+ ifd = chain->if_data;
+
+ ipfw_ifnet_init(chain, ifm);
+
+ /* Add to named hash */
+ i = IPFW_IFHASH_NAME(ifm->name, ifd->masks_hsize);
+ ifh = &ifd->masks[i];
+ TAILQ_INSERT_TAIL(ifh, ifm, name_next);
+
+ if (ifm->flags & IPFW_IFLAG_FAKE) {
+ /* Add to fake interfaces hash */
+ ifm->idx = ++ifd->fake_idx;
+ i = IPFW_IFHASH_IDX(ifm->idx, ifd->fake_hsize);
+ ifh = &ifd->fake_ifaces[i];
+ } else {
+ /* Add to real interfaces hash */
+ i = IPFW_IFHASH_IDX(ifm->idx, ifd->real_hsize);
+ ifh = &ifd->real_ifaces[i];
+
+ /* Check index for consistency */
+ TAILQ_FOREACH(iftemp, ifh, idx_next) {
+ KASSERT(iftemp->idx != ifm->idx,
+ ("Non-fake if %s w idx %d found (%s)!",
+ iftemp->name, ifm->idx, ifm->name));
+ }
+ }
+
+ TAILQ_INSERT_TAIL(ifh, ifm, idx_next);
+}
+
+static void
+ifnet_arrival(void *arg, struct ifnet *ifp)
+{
+ struct ip_fw_chain *chain = (struct ip_fw_chain *)arg;
+ struct ip_fw_if_data *ifd;
+ struct iface_mask *iftemp, *ifm;
+ struct iface_mask_head *ifh;
+ struct ipfw_insn_ptr *insn_ptr;
+ int i;
+
+ iftemp = malloc(sizeof(struct iface_mask), M_IPFW, M_WAITOK | M_ZERO);
+
+ iftemp->ifp = ifp;
+ iftemp->idx = ifp->if_index;
+ strlcpy(iftemp->name, ifp->if_xname, IFNAMSIZ);
+
+ IPFW_UH_WLOCK(chain);
+ IPFW_WLOCK(chain);
+
+ ifd = chain->if_data;
+
+ if (ifd == NULL || ifd->arrival == NULL) {
+ /* We're shutting down */
+ IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
+ free(iftemp, M_IPFW);
+ return;
+ }
+
+ ifm = ipfw_search_ifname(chain, iftemp->name);
+
+ if (ifm != NULL) {
+ /* Found. Let's update index */
+ KASSERT(ifm->flags & IPFW_IFLAG_FAKE,
+ ("Non-fake interface found for %s", ifm->name));
+
+ ifm->flags &= ~IPFW_IFLAG_FAKE;
+ /* Relink to real index */
+ i = IPFW_IFHASH_IDX(ifm->idx, ifd->fake_hsize);
+ ifh = &ifd->fake_ifaces[i];
+ TAILQ_REMOVE(ifh, ifm, idx_next);
+
+ i = IPFW_IFHASH_IDX(iftemp->idx, ifd->real_hsize);
+ ifh = &ifd->real_ifaces[i];
+ TAILQ_INSERT_TAIL(ifh, ifm, idx_next);
+
+ CTR2(KTR_NET, "ifnet upgrade: fake %u -> %u", ifm->idx,
+ iftemp->idx);
+ /* Notify consumers */
+ TAILQ_FOREACH(insn_ptr, &ifm->instructions, next)
+ ipfw_update_rewrite(chain, insn_ptr->insn, ifm,
+ (uintptr_t)iftemp->idx);
+
+ ifm->idx = iftemp->idx;
+ } else {
+ /* Not found. Add to list */
+ ifm = iftemp;
+ iftemp = NULL;
+
+ ipfw_ifnet_init(chain, ifm);
+
+ CTR2(KTR_NET, "ifmp=%p uc=%u", ifm, ifm->refcount);
+
+ /* Add to named hash */
+ i = IPFW_IFHASH_NAME(ifm->name, ifd->masks_hsize);
+ ifh = &ifd->masks[i];
+ TAILQ_INSERT_TAIL(ifh, ifm, name_next);
+
+ /* Add to real interfaces hash */
+ i = IPFW_IFHASH_IDX(ifm->idx, ifd->real_hsize);
+ ifh = &ifd->real_ifaces[i];
+
+ /* Check index for consistency */
+ TAILQ_FOREACH(iftemp, ifh, idx_next) {
+ KASSERT(iftemp->idx != ifm->idx,
+ ("Non-fake if %s w idx %d found (%s)!",
+ iftemp->name, ifm->idx, ifm->name));
+ }
+
+ TAILQ_INSERT_TAIL(ifh, ifm, idx_next);
+
+ CTR3(KTR_NET, "new iface %p, idx %u uc=%u", ifm->name,
+ ifm->idx, ifm->refcount);
+ }
+ IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
+
+ if (iftemp != NULL)
+ free(iftemp, M_IPFW);
+}
+
+static void
+ifnet_departure(void *arg, struct ifnet *ifp)
+{
+ struct ip_fw_chain *chain = (struct ip_fw_chain *)arg;
+ struct ip_fw_if_data *ifd;
+ struct iface_mask *ifm;
+ struct iface_mask_head *ifh;
+ struct ipfw_insn_ptr *insn_ptr;
+ int i;
+
+ IPFW_UH_WLOCK(chain);
+ IPFW_WLOCK(chain);
+
+ if ((ifd = chain->if_data) == NULL) {
+ /* We're shutting down */
+ IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
+ return;
+ }
+
+ ifm = ipfw_search_ifname(chain, ifp->if_xname);
+
+ if (ifm == NULL) {
+ IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
+ printf("ipfw: unknown iface %s departure\n", ifp->if_xname);
+ return;
+ }
+
+ KASSERT((ifm->flags & IPFW_IFLAG_FAKE) == 0,
+ ("Fake interface found for %s", ifm->name));
+
+ /* Check if we need to save given interface. */
+ if (ifm->refcount == 0) {
+ CTR1(KTR_NET, "Deleting interface %p", ifm);
+ /* Delete from name hash */
+ i = IPFW_IFHASH_NAME(ifm->name, ifd->masks_hsize);
+ ifh = &ifd->masks[i];
+ TAILQ_REMOVE(ifh, ifm, name_next);
+
+ /* Delete from real iface hash */
+ i = IPFW_IFHASH_IDX(ifm->idx, ifd->real_hsize);
+ ifh = &ifd->real_ifaces[i];
+ TAILQ_REMOVE(ifh, ifm, idx_next);
+
+ IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
+
+ free(ifm, M_IPFW);
+ return;
+ }
+
+ CTR1(KTR_NET, "Interface uc=%u", ifm->refcount);
+
+ /* Interface is used. Move to fake hash */
+ ifm->flags |= IPFW_IFLAG_FAKE;
+ /* Relink to fake index */
+ i = IPFW_IFHASH_IDX(ifm->idx, ifd->real_hsize);
+ ifh = &ifd->real_ifaces[i];
+ TAILQ_REMOVE(ifh, ifm, idx_next);
+
+ /* Alloc fake index */
+ ifd->fake_idx++;
+ i = IPFW_IFHASH_IDX(ifd->fake_idx, ifd->fake_hsize);
+ ifh = &ifd->fake_ifaces[i];
+ TAILQ_INSERT_TAIL(ifh, ifm, idx_next);
+
+ CTR2(KTR_NET, "Interface %p departure, fake index %u",
+ ifm, ifd->fake_idx);
+
+ /* Notify consumers */
+ TAILQ_FOREACH(insn_ptr, &ifm->instructions, next)
+ ipfw_update_rewrite(chain, insn_ptr->insn, ifm,
+ (uintptr_t)(ifd->fake_idx | IPFW_FAKE_IDX));
+
+ ifm->idx = ifd->fake_idx;
+
+ IPFW_WUNLOCK(chain);
+ IPFW_UH_WUNLOCK(chain);
+}
+
+static void
+ipfw_ifnet_init(struct ip_fw_chain *chain, struct iface_mask *ifm)
+{
+
+ TAILQ_INIT(&ifm->instructions);
+}
+
+
+static void
+ipfw_ifhash_init_int(struct iface_mask_head **phash, size_t hsize)
+{
+ struct iface_mask_head *ifh;
+ int i;
+
+ ifh = malloc(sizeof(struct iface_mask_head) * hsize, M_IPFW,
+ M_WAITOK | M_ZERO);
+
+ *phash = ifh;
+
+ for (i = 0; i < hsize; i++, ifh++)
+ TAILQ_INIT(ifh);
+}
+
+void
+ipfw_ifhash_init(struct ip_fw_chain *chain)
+{
+ struct ip_fw_if_data *ifd;
+ struct iface_mask_head *ifh;
+ struct iface_mask *ifm;
+ struct ifnet *ifp;
+ int i;
+
+ ifd = malloc(sizeof(struct ip_fw_if_data), M_IPFW, M_WAITOK | M_ZERO);
+ chain->if_data = ifd;
+
+ ifd->masks_hsize = ifd->real_hsize = ifd->fake_hsize = 32;
+
+ ipfw_ifhash_init_int(&ifd->masks, ifd->masks_hsize);
+ ipfw_ifhash_init_int(&ifd->real_ifaces, ifd->fake_hsize);
+ ipfw_ifhash_init_int(&ifd->fake_ifaces, ifd->real_hsize);
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ ifm = malloc(sizeof(struct iface_mask), M_IPFW, M_WAITOK | M_ZERO);
+ strlcpy(ifm->name, ifp->if_xname, IFNAMSIZ);
+ ifm->ifp = ifp;
+ ifm->idx = ifp->if_index;
+
+ ipfw_ifnet_init(chain, ifm);
+
+ i = IPFW_IFHASH_IDX(ifm->idx, ifd->real_hsize);
+ ifh = &ifd->real_ifaces[i];
+ TAILQ_INSERT_TAIL(ifh, ifm, idx_next);
+
+ i = IPFW_IFHASH_NAME(ifm->name, ifd->masks_hsize);
+ ifh = &ifd->masks[i];
+ TAILQ_INSERT_TAIL(ifh, ifm, name_next);
+
+ CTR2(KTR_NET, "init iface %p idx %u", ifm, ifm->idx);
+
+ }
+ IFNET_RUNLOCK();
+
+ /* XXX: there is a gap between RUNLOCK and interface registration */
+
+ ifd->arrival = EVENTHANDLER_REGISTER(ifnet_arrival_event,
+ ifnet_arrival, chain, EVENTHANDLER_PRI_ANY);
+
+ ifd->departure = EVENTHANDLER_REGISTER(ifnet_departure_event,
+ ifnet_departure, chain, EVENTHANDLER_PRI_ANY);
+}
+
+void
+ipfw_ifhash_detach(struct ip_fw_chain *chain)
+{
+ struct ip_fw_if_data *ifd;
+
+ ifd = chain->if_data;
+
+ EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifd->arrival);
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifd->departure);
+
+ ifd->arrival = NULL;
+ ifd->departure = NULL;
+}
+
+
+void
+ipfw_ifhash_free(struct ip_fw_chain *chain)
+{
+ struct ip_fw_if_data *ifd;
+ struct iface_mask_head *ifh;
+ struct iface_mask *ifm, *ifm_next;
+ int i;
+
+ ifd = chain->if_data;
+ chain->if_data = NULL;
+
+ ifh = ifd->masks;
+
+ for (i = 0; i < ifd->masks_hsize; i++, ifh++) {
+ TAILQ_FOREACH_SAFE(ifm, ifh, name_next, ifm_next) {
+ /*
+ * Assume every consumer to free its
+ * iface-specific data beforehand.
+ */
+ free(ifm, M_IPFW);
+ }
+ }
+
+ free(ifd->masks, M_IPFW);
+ free(ifd->real_ifaces, M_IPFW);
+ free(ifd->fake_ifaces, M_IPFW);
+
+ free(ifd, M_IPFW);
+}
+
--- /dev/null 2013-04-24 17:22:00.000000000 +0400
+++ sys/netpfil/ipfw/ip_fw_iface.h 2013-04-22 19:09:56.624996491 +0400
@@ -0,0 +1,55 @@
+/*-
+ * Copyright (c) 2013 Yandex LLC.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _IP_FW_IFACE_H_
+#define _IP_FW_IFACE_H_
+
+struct ipfw_insn_ptr {
+ TAILQ_ENTRY(ipfw_insn_ptr) next;
+ ipfw_insn *insn;
+};
+
+struct iface_mask {
+ struct ifnet *ifp;
+ uint32_t idx; /* Saved interface index */
+ uint32_t flags; /* Pad */
+ uint32_t refcount; /* Usage count */
+ char name[IFNAMSIZ]; /* Interface/mask */
+ TAILQ_ENTRY(iface_mask) idx_next;
+ TAILQ_ENTRY(iface_mask) name_next;
+ TAILQ_HEAD(rule_list, ipfw_insn_ptr) instructions; /* instructions using given mask */
+};
+#define IPFW_IFLAG_FAKE 0x01
+
+#define IPFW_FAKE_IDX (1 << 31)
+
+struct iface_mask *ipfw_search_ifname(struct ip_fw_chain *chain, char *name);
+struct iface_mask *ipfw_search_ifindex(struct ip_fw_chain *chain, uint32_t idx);
+void ipfw_add_ifname(struct ip_fw_chain *chain, struct iface_mask *ifm);
+
+#endif
+
More information about the freebsd-ipfw
mailing list