git: fc727ad63d3f - main - ipfw: add [fw]mark implementation for ipfw

From: Alexander V. Chernikov <melifaro_at_FreeBSD.org>
Date: Tue, 25 Apr 2023 12:42:24 UTC
The branch main has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=fc727ad63d3f8d3a4be693c2c9c36200a37200ec

commit fc727ad63d3f8d3a4be693c2c9c36200a37200ec
Author:     Boris Lytochkin <lytboris@gmail.com>
AuthorDate: 2023-04-25 12:38:36 +0000
Commit:     Alexander V. Chernikov <melifaro@FreeBSD.org>
CommitDate: 2023-04-25 12:40:23 +0000

    ipfw: add [fw]mark implementation for ipfw
    
    Packet Mark is an analogue to ipfw tags with O(1) lookup from mbuf while
    regular tags require a single-linked list traversal.
    Mark is a 32-bit number that can be looked up in a table
    [with 'number' table-type], matched or compared with a number with optional
    mask applied before comparison.
    Having generic nature, Mark can be used in a variety of needs.
    For example, it could be used as a security group: mark will hold a security
    group id and represent a group of packet flows that shares same access
    control policy.
    
    Reviewed By: pauamma_gundo.com
    Differential Revision: https://reviews.freebsd.org/D39555
    MFC after:      1 month
---
 sbin/ipfw/ipfw.8                     | 54 +++++++++++++++++++---
 sbin/ipfw/ipfw2.c                    | 87 +++++++++++++++++++++++++++++++++++-
 sbin/ipfw/ipfw2.h                    |  3 ++
 sbin/ipfw/tables.c                   | 49 ++++++++------------
 sys/netinet/ip_fw.h                  | 11 ++++-
 sys/netinet/ip_var.h                 | 17 +++++++
 sys/netpfil/ipfw/ip_fw2.c            | 30 +++++++++++++
 sys/netpfil/ipfw/ip_fw_log.c         | 26 ++++++++---
 sys/netpfil/ipfw/ip_fw_pfil.c        |  3 +-
 sys/netpfil/ipfw/ip_fw_private.h     |  7 +--
 sys/netpfil/ipfw/ip_fw_sockopt.c     |  9 ++++
 sys/netpfil/ipfw/ip_fw_table.c       |  1 +
 sys/netpfil/ipfw/ip_fw_table_value.c | 16 ++++---
 13 files changed, 259 insertions(+), 54 deletions(-)

diff --git a/sbin/ipfw/ipfw.8 b/sbin/ipfw/ipfw.8
index 884797304b78..b7a55088c751 100644
--- a/sbin/ipfw/ipfw.8
+++ b/sbin/ipfw/ipfw.8
@@ -759,7 +759,7 @@ A packet can have multiple tags at the same time.
 Tags are "sticky", meaning once a tag is applied to a packet by a
 matching rule it exists until explicit removal.
 Tags are kept with the packet everywhere within the kernel, but are
-lost when packet leaves the kernel, for example, on transmitting
+lost when the packet leaves the kernel, for example, on transmitting
 packet out to the network or sending packet to a
 .Xr divert 4
 socket.
@@ -793,6 +793,27 @@ keyword, the tag with the number
 is searched among the tags attached to this packet and,
 if found, removed from it.
 Other tags bound to packet, if present, are left untouched.
+.It Cm setmark Ar value | tablearg
+When a packet matches a rule with the
+.Cm setmark
+keyword, a 32-bit numeric mark is assigned to the packet.
+The mark is an extension to the tags.
+As tags, mark is "sticky" so the value is kept the same within the kernel and
+is lost when the packet leaves the kernel.
+Unlike tags, mark can be matched as a lookup table key or compared with bitwise
+mask applied against another value.
+Each packet can have only one mark, so
+.Cm setmark
+always overwrites the previous mark value.
+.Pp
+The initial mark value is 0.
+To check the current mark value, use the
+.Cm mark
+rule option.
+Mark
+.Ar value
+can be entered as decimal or hexadecimal (if prefixed by 0x), and they
+are always printed as hexadecimal.
 .It Cm altq Ar queue
 When a packet matches a rule with the
 .Cm altq
@@ -1845,7 +1866,8 @@ set of parameters as specified in the rule.
 One or more
 of source and destination addresses and ports can be
 specified.
-.It Cm lookup Bro Cm dst-ip | dst-port | dst-mac | src-ip | src-port | src-mac | uid | jail Brc Ar name
+.It Cm lookup Bro Cm dst-ip | dst-port | dst-mac | src-ip | src-port | src-mac | uid |
+.Cm jail | dscp | mark Brc Ar name
 Search an entry in lookup table
 .Ar name
 that matches the field specified as argument.
@@ -2017,6 +2039,23 @@ specified in the same way as
 Tags can be applied to the packet using
 .Cm tag
 rule action parameter (see it's description for details on tags).
+.It Cm mark Ar value[:bitmask] | tablearg[:bitmask]
+Matches packets whose mark is equal to
+.Ar value
+with optional
+.Ar bitmask
+applied to it.
+.Cm tablearg
+can also be used instead of an explicit
+.Ar value
+to match a value supplied by the last table lookup.
+.Pp
+Both
+.Ar value
+and
+.Ar bitmask
+can be entered as decimal or hexadecimal (if prefixed by 0x), and they
+are always printed as hexadecimal.
 .It Cm tcpack Ar ack
 TCP packets only.
 Match if the TCP header acknowledgment number field is set to
@@ -2359,7 +2398,7 @@ The following value types are supported:
 .Bl -tag -width indent
 .It Ar value-mask : Ar value-type Ns Op , Ns Ar value-mask
 .It Ar value-type : Ar skipto | pipe | fib | nat | dscp | tag | divert |
-.Ar netgraph | limit | ipv4
+.Ar netgraph | limit | ipv4 | ipv6 | mark
 .It Cm skipto
 rule number to jump to.
 .It Cm pipe
@@ -2382,16 +2421,19 @@ maximum number of connections.
 IPv4 nexthop to fwd packets to.
 .It Cm ipv6
 IPv6 nexthop to fwd packets to.
+.It Cm mark
+mark value to match/set.
 .El
 .Pp
 The
 .Cm tablearg
 argument can be used with the following actions:
 .Cm nat, pipe, queue, divert, tee, netgraph, ngtee, fwd, skipto, setfib ,
+.Cm setmark ,
 action parameters:
 .Cm tag, untag ,
 rule options:
-.Cm limit, tagged .
+.Cm limit, tagged, mark .
 .Pp
 When used with the
 .Cm skipto
@@ -3326,8 +3368,8 @@ Obey transparent proxy rules only, packet aliasing is not performed.
 .It Cm skip_global
 Skip instance in case of global state lookup (see below).
 .It Cm port_range Ar lower-upper
-Set the aliasing ports between the ranges given. Upper port has to be greater
-than lower.
+Set the aliasing ports between the ranges given.
+Upper port has to be greater than lower.
 .El
 .Pp
 Some special values can be supplied instead of
diff --git a/sbin/ipfw/ipfw2.c b/sbin/ipfw/ipfw2.c
index 4ef724160625..3a50fb40bc19 100644
--- a/sbin/ipfw/ipfw2.c
+++ b/sbin/ipfw/ipfw2.c
@@ -288,6 +288,7 @@ static struct _s_x rule_actions[] = {
 	{ "return",		TOK_RETURN },
 	{ "eaction",		TOK_EACTION },
 	{ "tcp-setmss",		TOK_TCPSETMSS },
+	{ "setmark",		TOK_SETMARK },
 	{ NULL, 0 }	/* terminator */
 };
 
@@ -313,6 +314,7 @@ static struct _s_x lookup_keys[] = {
 	{ "uid",		LOOKUP_UID },
 	{ "jail",		LOOKUP_JAIL },
 	{ "dscp",		LOOKUP_DSCP },
+	{ "mark",		LOOKUP_MARK },
 	{ NULL,			0 },
 };
 
@@ -391,6 +393,7 @@ static struct _s_x rule_options[] = {
 	{ "src-ip6",		TOK_SRCIP6 },
 	{ "lookup",		TOK_LOOKUP },
 	{ "flow",		TOK_FLOW },
+	{ "mark",		TOK_MARK },
 	{ "defer-action",	TOK_SKIPACTION },
 	{ "defer-immediate-action",	TOK_SKIPACTION },
 	{ "//",			TOK_COMMENT },
@@ -1102,6 +1105,45 @@ fill_dscp(ipfw_insn *cmd, char *av, int cblen)
 	}
 }
 
+/*
+ * Fill the body of the command with mark value and mask.
+ */
+static void
+fill_mark(ipfw_insn *cmd, char *av, int cblen)
+{
+	uint32_t *value, *mask;
+	char *value_str;
+
+	cmd->opcode = O_MARK;
+	cmd->len |= F_INSN_SIZE(ipfw_insn_u32) + 1;
+
+	CHECK_CMDLEN;
+
+	value = (uint32_t *)(cmd + 1);
+	mask = value + 1;
+
+	value_str = strsep(&av, ":");
+
+	if (strcmp(value_str, "tablearg") == 0) {
+		cmd->arg1 = IP_FW_TARG;
+		*value = 0;
+	} else {
+		/* This is not a tablearg */
+		cmd->arg1 |= 0x8000;
+		*value = strtoul(value_str, NULL, 0);
+	}
+	if (av)
+		*mask = strtoul(av, NULL, 0);
+	else
+		*mask = 0xFFFFFFFF;
+
+	if ((*value & *mask) != *value)
+		errx(EX_DATAERR, "Static mark value: some bits in value are"
+		    " set that will be masked out by mask "
+		    "(%#x & %#x) = %#x != %#x",
+		    *value, *mask, (*value & *mask), *value);
+}
+
 static struct _s_x icmpcodes[] = {
       { "net",			ICMP_UNREACH_NET },
       { "host",			ICMP_UNREACH_HOST },
@@ -1788,6 +1830,19 @@ print_instruction(struct buf_pr *bp, const struct format_opts *fo,
 	case O_SKIP_ACTION:
 		bprintf(bp, " defer-immediate-action");
 		break;
+	case O_MARK:
+		bprintf(bp, " mark");
+		if (cmd->arg1 == IP_FW_TARG)
+			bprintf(bp, " tablearg");
+		else
+			bprintf(bp, " %#x",
+			    ((const ipfw_insn_u32 *)cmd)->d[0]);
+
+		if (((const ipfw_insn_u32 *)cmd)->d[1] != 0xFFFFFFFF)
+			bprintf(bp, ":%#x",
+			    ((const ipfw_insn_u32 *)cmd)->d[1]);
+		break;
+
 	default:
 		bprintf(bp, " [opcode %d len %d]", cmd->opcode,
 		    cmd->len);
@@ -2031,6 +2086,13 @@ print_action_instruction(struct buf_pr *bp, const struct format_opts *fo,
 		else
 			bprint_uint_arg(bp, "call ", cmd->arg1);
 		break;
+	case O_SETMARK:
+		if (cmd->arg1 == IP_FW_TARG) {
+			bprintf(bp, "setmark tablearg");
+			break;
+		}
+		bprintf(bp, "setmark %#x", ((const ipfw_insn_u32 *)cmd)->d[0]);
+		break;
 	default:
 		bprintf(bp, "** unrecognized action %d len %d ",
 			cmd->opcode, cmd->len);
@@ -2175,7 +2237,7 @@ static const int action_opcodes[] = {
 	O_CHECK_STATE, O_ACCEPT, O_COUNT, O_DENY, O_REJECT,
 	O_UNREACH6, O_SKIPTO, O_PIPE, O_QUEUE, O_DIVERT, O_TEE,
 	O_NETGRAPH, O_NGTEE, O_FORWARD_IP, O_FORWARD_IP6, O_NAT,
-	O_SETFIB, O_SETDSCP, O_REASS, O_CALLRETURN,
+	O_SETFIB, O_SETDSCP, O_REASS, O_CALLRETURN, O_SETMARK,
 	/* keep the following opcodes at the end of the list */
 	O_EXTERNAL_ACTION, O_EXTERNAL_INSTANCE, O_EXTERNAL_DATA
 };
@@ -4244,6 +4306,23 @@ chkarg:
 		fill_cmd(action, O_CALLRETURN, F_NOT, 0);
 		break;
 
+	case TOK_SETMARK: {
+		action->opcode = O_SETMARK;
+		action->len = F_INSN_SIZE(ipfw_insn_u32);
+		NEED1("missing mark");
+		if (strcmp(*av, "tablearg") == 0) {
+			action->arg1 = IP_FW_TARG;
+		} else {
+		        ((ipfw_insn_u32 *)action)->d[0] =
+		            strtoul(*av, NULL, 0);
+			/* This is not a tablearg */
+			action->arg1 |= 0x8000;
+		}
+		av++;
+		CHECK_CMDLEN;
+		break;
+	}
+
 	case TOK_TCPSETMSS: {
 		u_long mss;
 		uint16_t idx;
@@ -5131,6 +5210,12 @@ read_options:
 			fill_cmd(cmd, O_SKIP_ACTION, 0, 0);
 			break;
 
+		case TOK_MARK:
+			NEED1("missing mark value:mask");
+			fill_mark(cmd, *av, cblen);
+			av++;
+			break;
+
 		default:
 			errx(EX_USAGE, "unrecognised option [%d] %s\n", i, s);
 		}
diff --git a/sbin/ipfw/ipfw2.h b/sbin/ipfw/ipfw2.h
index dd7699987434..a554f9b9f6fc 100644
--- a/sbin/ipfw/ipfw2.h
+++ b/sbin/ipfw/ipfw2.h
@@ -321,6 +321,9 @@ enum tokens {
 
 	TOK_TCPSETMSS,
 
+	TOK_MARK,
+	TOK_SETMARK,
+
 	TOK_SKIPACTION,
 };
 
diff --git a/sbin/ipfw/tables.c b/sbin/ipfw/tables.c
index 9e6390492e96..37cbd7a2d7ae 100644
--- a/sbin/ipfw/tables.c
+++ b/sbin/ipfw/tables.c
@@ -106,6 +106,7 @@ static struct _s_x tablevaltypes[] = {
       { "limit",	IPFW_VTYPE_LIMIT },
       { "ipv4",		IPFW_VTYPE_NH4 },
       { "ipv6",		IPFW_VTYPE_NH6 },
+      { "mark",		IPFW_VTYPE_MARK },
       { NULL, 0 }
 };
 
@@ -916,7 +917,7 @@ table_do_modify_record(int cmd, ipfw_obj_header *oh,
 
 	memcpy(pbuf, oh, sizeof(*oh));
 	oh = (ipfw_obj_header *)pbuf;
-	oh->opheader.version = 1;
+	oh->opheader.version = 1; /* Current version */
 
 	ctlv = (ipfw_obj_ctlv *)(oh + 1);
 	ctlv->count = count;
@@ -1662,6 +1663,11 @@ tentry_fill_value(ipfw_obj_header *oh __unused, ipfw_obj_tentry *tent,
 			}
 			etype = "ipv6";
 			break;
+		case IPFW_VTYPE_MARK:
+			v->mark = strtol(n, &e, 16);
+			if (*e != '\0')
+				etype = "mark";
+			break;
 		}
 
 		if (etype != NULL)
@@ -1878,6 +1884,9 @@ table_show_value(char *buf, size_t bufsize, ipfw_table_value *v,
 			    NI_NUMERICHOST) == 0)
 				l = snprintf(buf, sz, "%s,", abuf);
 			break;
+		case IPFW_VTYPE_MARK:
+			l = snprintf(buf, sz, "%#x,", v->mark);
+			break;
 		}
 
 		buf += l;
@@ -2034,37 +2043,17 @@ ipfw_list_ta(int ac __unused, char *av[] __unused)
 }
 
 
-/* Copy of current kernel table_value structure */
-struct _table_value {
-	uint32_t	tag;		/* O_TAG/O_TAGGED */
-	uint32_t	pipe;		/* O_PIPE/O_QUEUE */
-	uint16_t	divert;		/* O_DIVERT/O_TEE */
-	uint16_t	skipto;		/* skipto, CALLRET */
-	uint32_t	netgraph;	/* O_NETGRAPH/O_NGTEE */
-	uint32_t	fib;		/* O_SETFIB */
-	uint32_t	nat;		/* O_NAT */
-	uint32_t	nh4;
-	uint8_t		dscp;
-	uint8_t		spare0;
-	uint16_t	spare1;
-	/* -- 32 bytes -- */
-	struct in6_addr	nh6;
-	uint32_t	limit;		/* O_LIMIT */
-	uint32_t	zoneid;
-	uint64_t	refcnt;		/* Number of references */
-};
-
 static int
 compare_values(const void *_a, const void *_b)
 {
-	const struct _table_value *a, *b;
+	const ipfw_table_value *a, *b;
 
-	a = (const struct _table_value *)_a;
-	b = (const struct _table_value *)_b;
+	a = (const ipfw_table_value *)_a;
+	b = (const ipfw_table_value *)_b;
 
-	if (a->spare1 < b->spare1)
+	if (a->kidx < b->kidx)
 		return (-1);
-	else if (a->spare1 > b->spare1)
+	else if (a->kidx > b->kidx)
 		return (1);
 
 	return (0);
@@ -2075,7 +2064,7 @@ ipfw_list_values(int ac __unused, char *av[] __unused)
 {
 	char buf[128];
 	ipfw_obj_lheader *olh;
-	struct _table_value *v;
+	ipfw_table_value *v;
 	uint32_t i, vmask;
 	int error;
 
@@ -2087,13 +2076,13 @@ ipfw_list_values(int ac __unused, char *av[] __unused)
 
 	table_print_valheader(buf, sizeof(buf), vmask);
 	printf("HEADER: %s\n", buf);
-	v = (struct _table_value *)(olh + 1);
+	v = (ipfw_table_value *)(olh + 1);
 	qsort(v, olh->count, olh->objsize, compare_values);
 	for (i = 0; i < olh->count; i++) {
 		table_show_value(buf, sizeof(buf), (ipfw_table_value *)v,
 		    vmask, 0);
-		printf("[%u] refs=%lu %s\n", v->spare1, (u_long)v->refcnt, buf);
-		v = (struct _table_value *)((caddr_t)v + olh->objsize);
+		printf("[%u] refs=%lu %s\n", v->kidx, (u_long)v->refcnt, buf);
+		v = (ipfw_table_value *)((caddr_t)v + olh->objsize);
 	}
 
 	free(olh);
diff --git a/sys/netinet/ip_fw.h b/sys/netinet/ip_fw.h
index cbf03a5a6f8e..fff76b5f840d 100644
--- a/sys/netinet/ip_fw.h
+++ b/sys/netinet/ip_fw.h
@@ -298,6 +298,9 @@ enum ipfw_opcodes {		/* arguments (4 byte each)	*/
 	O_MAC_SRC_LOOKUP,	/* arg1=table number, u32=value */
 	O_MAC_DST_LOOKUP,	/* arg1=table number, u32=value */
 
+	O_SETMARK,		/* u32 = value */
+	O_MARK,			/* 2 u32 = value, bitmask */
+
 	O_LAST_OPCODE		/* not an opcode!		*/
 };
 
@@ -314,6 +317,7 @@ enum ipfw_table_lookup_type {
 	LOOKUP_DSCP,
 	LOOKUP_DST_MAC,
 	LOOKUP_SRC_MAC,
+	LOOKUP_MARK,
 };
 
 /*
@@ -790,6 +794,7 @@ struct _ipfw_dyn_rule {
 #define	IPFW_VTYPE_LIMIT	0x00000100	/* limit */
 #define	IPFW_VTYPE_NH4		0x00000200	/* IPv4 nexthop */
 #define	IPFW_VTYPE_NH6		0x00000400	/* IPv6 nexthop */
+#define	IPFW_VTYPE_MARK		0x00000800	/* [fw]mark */
 
 /* MAC/InfiniBand/etc address length */
 #define	IPFW_MAX_L2_ADDR_LEN	20
@@ -888,6 +893,7 @@ struct tflow_entry {
 	} a;
 };
 
+/* 64-byte structure representing multi-field table value */
 typedef struct _ipfw_table_value {
 	uint32_t	tag;		/* O_TAG/O_TAGGED */
 	uint32_t	pipe;		/* O_PIPE/O_QUEUE */
@@ -899,11 +905,12 @@ typedef struct _ipfw_table_value {
 	uint32_t	nh4;
 	uint8_t		dscp;
 	uint8_t		spare0;
-	uint16_t	spare1;
+	uint16_t	kidx;		/* value kernel index */
 	struct in6_addr	nh6;
 	uint32_t	limit;		/* O_LIMIT */
 	uint32_t	zoneid;		/* scope zone id for nh6 */
-	uint64_t	reserved;
+	uint32_t	mark;		/* O_SETMARK/O_MARK */
+	uint32_t	refcnt;		/* XXX 64-bit in kernel */
 } ipfw_table_value;
 
 /* Table entry TLV */
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index 2dfd7ddb4822..c25bae4b394f 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -272,13 +272,30 @@ void	in_delayed_cksum(struct mbuf *m);
  * On entry, the structure is valid if slot>0, and refers to the starting
  * rules. 'info' contains the reason for reinject, e.g. divert port,
  * divert direction, and so on.
+ *
+ * Packet Mark is an analogue to ipfw tags with O(1) lookup from mbuf while
+ * regular tags require a single-linked list traversal. Mark is a 32-bit
+ * number that can be looked up in a table [with 'number' table-type], matched
+ * or compared with a number with optional mask applied before comparison.
+ * Having generic nature, Mark can be used in a variety of needs.
+ * For example, it could be used as a security group: mark will hold a
+ * security group id and represent a group of packet flows that shares same
+ * access control policy.
+ * O_MASK opcode can match mark value bitwise so one can build a hierarchical
+ * model designating different meanings for a bit range(s).
  */
 struct ipfw_rule_ref {
+/* struct m_tag spans 24 bytes above this point, see mbuf_tags(9) */
+	/* spare space just to be save in case struct m_tag grows */
+/* -- 32 bytes -- */
 	uint32_t	slot;		/* slot for matching rule	*/
 	uint32_t	rulenum;	/* matching rule number		*/
 	uint32_t	rule_id;	/* matching rule id		*/
 	uint32_t	chain_id;	/* ruleset id			*/
 	uint32_t	info;		/* see below			*/
+	uint32_t	pkt_mark;	/* packet mark			*/
+	uint32_t	spare[2];
+/* -- 64 bytes -- */
 };
 
 enum {
diff --git a/sys/netpfil/ipfw/ip_fw2.c b/sys/netpfil/ipfw/ip_fw2.c
index f2e914e24007..4b347b2d7d2d 100644
--- a/sys/netpfil/ipfw/ip_fw2.c
+++ b/sys/netpfil/ipfw/ip_fw2.c
@@ -2127,6 +2127,11 @@ do {								\
 							eh->ether_shost;
 						keylen = ETHER_ADDR_LEN;
 						break;
+					case LOOKUP_MARK:
+						key = args->rule.pkt_mark;
+						pkey = &key;
+						keylen = sizeof(key);
+						break;
 					}
 					if (keylen == 0)
 						break;
@@ -2773,6 +2778,19 @@ do {								\
 				}
 				break;
 			}
+
+			case O_MARK: {
+				uint32_t mark;
+				if (cmd->arg1 == IP_FW_TARG)
+					mark = TARG_VAL(chain, tablearg, mark);
+				else
+					mark = ((ipfw_insn_u32 *)cmd)->d[0];
+				match =
+				    (args->rule.pkt_mark &
+				    ((ipfw_insn_u32 *)cmd)->d[1]) ==
+				    (mark & ((ipfw_insn_u32 *)cmd)->d[1]);
+				break;
+			}
 				
 			/*
 			 * The second set of opcodes represents 'actions',
@@ -3276,6 +3294,18 @@ do {								\
 				done = 1;	/* exit outer loop */
 				break;
 			}
+
+			case O_SETMARK: {
+				l = 0;		/* exit inner loop */
+				args->rule.pkt_mark = (
+				    (cmd->arg1 == IP_FW_TARG) ?
+				    TARG_VAL(chain, tablearg, mark) :
+				    ((ipfw_insn_u32 *)cmd)->d[0]);
+
+				IPFW_INC_RULE_COUNTER(f, pktlen);
+				break;
+			}
+
 			case O_EXTERNAL_ACTION:
 				l = 0; /* in any case exit inner loop */
 				retval = ipfw_run_eaction(chain, args,
diff --git a/sys/netpfil/ipfw/ip_fw_log.c b/sys/netpfil/ipfw/ip_fw_log.c
index b5d2f998adc8..4fe95ea9a6fd 100644
--- a/sys/netpfil/ipfw/ip_fw_log.c
+++ b/sys/netpfil/ipfw/ip_fw_log.c
@@ -104,7 +104,7 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
 {
 	char *action;
 	int limit_reached = 0;
-	char action2[92], proto[128], fragment[32];
+	char action2[92], proto[128], fragment[32], mark_str[24];
 
 	if (V_fw_verbose == 0) {
 		if (args->flags & IPFW_ARGS_LENMASK)
@@ -276,6 +276,14 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
 				snprintf(SNPARGS(action2, 0), "Call %d",
 				    cmd->arg1);
 			break;
+		case O_SETMARK:
+			if (cmd->arg1 == IP_FW_TARG)
+				snprintf(SNPARGS(action2, 0), "SetMark %#x",
+				    TARG(cmd->arg1, mark));
+			else
+				snprintf(SNPARGS(action2, 0), "SetMark %#x",
+				    ((ipfw_insn_u32 *)cmd)->d[0]);
+			break;
 		case O_EXTERNAL_ACTION:
 			snprintf(SNPARGS(action2, 0), "Eaction %s",
 			    ((struct named_object *)SRV_OBJECT(chain,
@@ -410,14 +418,22 @@ ipfw_log(struct ip_fw_chain *chain, struct ip_fw *f, u_int hlen,
 				    (ipoff & IP_MF) ? "+" : "");
 		}
 	}
+
+	/* [fw]mark */
+	if (args->rule.pkt_mark)
+		snprintf(SNPARGS(mark_str, 0), " mark:%#x",
+		    args->rule.pkt_mark);
+	else
+		mark_str[0] = '\0';
+
 #ifdef __FreeBSD__
-	log(LOG_SECURITY | LOG_INFO, "ipfw: %d %s %s %s via %s%s\n",
-	    f ? f->rulenum : -1, action, proto,
+	log(LOG_SECURITY | LOG_INFO, "ipfw: %d %s %s%s %s via %s%s\n",
+	    f ? f->rulenum : -1, action, proto, mark_str,
 	    args->flags & IPFW_ARGS_OUT ? "out" : "in", args->ifp->if_xname,
 	    fragment);
 #else
-	log(LOG_SECURITY | LOG_INFO, "ipfw: %d %s %s [no if info]%s\n",
-	    f ? f->rulenum : -1, action, proto, fragment);
+	log(LOG_SECURITY | LOG_INFO, "ipfw: %d %s %s%s [no if info]%s\n",
+	    f ? f->rulenum : -1, action, proto, mark_str, fragment);
 #endif
 	if (limit_reached)
 		log(LOG_SECURITY | LOG_NOTICE,
diff --git a/sys/netpfil/ipfw/ip_fw_pfil.c b/sys/netpfil/ipfw/ip_fw_pfil.c
index ec46c077d8bb..72cc25f647fe 100644
--- a/sys/netpfil/ipfw/ip_fw_pfil.c
+++ b/sys/netpfil/ipfw/ip_fw_pfil.c
@@ -146,6 +146,7 @@ again:
 	args.m = *m0;
 	args.ifp = ifp;
 	args.inp = inp;
+	args.rule.pkt_mark = 0;
 
 	ipfw = ipfw_chk(&args);
 	*m0 = args.m;
@@ -356,7 +357,7 @@ again:
 			return (PFIL_PASS);
 		args.flags |= IPFW_ARGS_REF;
 	}
-	args.m = *m0,
+	args.m = *m0;
 
 	ipfw = ipfw_chk(&args);
 	*m0 = args.m;
diff --git a/sys/netpfil/ipfw/ip_fw_private.h b/sys/netpfil/ipfw/ip_fw_private.h
index 87a40c940c23..abb3cd965680 100644
--- a/sys/netpfil/ipfw/ip_fw_private.h
+++ b/sys/netpfil/ipfw/ip_fw_private.h
@@ -330,12 +330,13 @@ struct table_value {
 	uint16_t	divert;		/* O_DIVERT/O_TEE */
 	uint16_t	skipto;		/* skipto, CALLRET */
 	uint32_t	netgraph;	/* O_NETGRAPH/O_NGTEE */
-	uint32_t	fib;		/* O_SETFIB */
-	uint32_t	nat;		/* O_NAT */
+	uint16_t	fib;		/* O_SETFIB */
+	uint16_t	nat;		/* O_NAT */
+	uint32_t	mark;		/* O_SETMARK/O_MARK */
 	uint32_t	nh4;
 	uint8_t		dscp;
 	uint8_t		spare0;
-	uint16_t	spare1;
+	uint16_t	kidx;		/* value kernel index */
 	/* -- 32 bytes -- */
 	struct in6_addr	nh6;
 	uint32_t	limit;		/* O_LIMIT */
diff --git a/sys/netpfil/ipfw/ip_fw_sockopt.c b/sys/netpfil/ipfw/ip_fw_sockopt.c
index e8dd59eacc09..0065f7d95c00 100644
--- a/sys/netpfil/ipfw/ip_fw_sockopt.c
+++ b/sys/netpfil/ipfw/ip_fw_sockopt.c
@@ -566,6 +566,8 @@ import_rule0(struct rule_check_info *ci)
 			break;
 		case O_SETFIB:
 		case O_SETDSCP:
+		case O_SETMARK:
+		case O_MARK:
 			if (cmd->arg1 == IP_FW_TABLEARG)
 				cmd->arg1 = IP_FW_TARG;
 			else
@@ -650,6 +652,8 @@ export_rule0(struct ip_fw *krule, struct ip_fw_rule0 *urule, int len)
 			break;
 		case O_SETFIB:
 		case O_SETDSCP:
+		case O_SETMARK:
+		case O_MARK:
 			if (cmd->arg1 == IP_FW_TARG)
 				cmd->arg1 = IP_FW_TABLEARG;
 			else
@@ -1939,6 +1943,7 @@ check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci)
 			break;
 
 		case O_DSCP:
+		case O_MARK:
 			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1)
 				goto bad_size;
 			break;
@@ -2001,6 +2006,10 @@ check_ipfw_rule_body(ipfw_insn *cmd, int cmd_len, struct rule_check_info *ci)
 		case O_CHECK_STATE:
 			ci->object_opcodes++;
 			goto check_size;
+		case O_SETMARK:
+			if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
+				goto bad_size;
+			goto check_action;
 		case O_REJECT:
 			/* "unreach needfrag" has variable len. */
 			if ((cmdlen == F_INSN_SIZE(ipfw_insn) ||
diff --git a/sys/netpfil/ipfw/ip_fw_table.c b/sys/netpfil/ipfw/ip_fw_table.c
index 202a49840b38..52955410fef2 100644
--- a/sys/netpfil/ipfw/ip_fw_table.c
+++ b/sys/netpfil/ipfw/ip_fw_table.c
@@ -2760,6 +2760,7 @@ classify_srcdst(ipfw_insn *cmd, uint16_t *puidx, uint8_t *ptype)
 		case LOOKUP_UID:
 		case LOOKUP_JAIL:
 		case LOOKUP_DSCP:
+		case LOOKUP_MARK:
 			*ptype = IPFW_TABLE_NUMBER;
 			break;
 		case LOOKUP_DST_MAC:
diff --git a/sys/netpfil/ipfw/ip_fw_table_value.c b/sys/netpfil/ipfw/ip_fw_table_value.c
index 025ee5923a10..eb268ab9b98d 100644
--- a/sys/netpfil/ipfw/ip_fw_table_value.c
+++ b/sys/netpfil/ipfw/ip_fw_table_value.c
@@ -114,6 +114,7 @@ mask_table_value(struct table_value *src, struct table_value *dst,
 	_MCPY(netgraph, IPFW_VTYPE_NETGRAPH);
 	_MCPY(fib, IPFW_VTYPE_FIB);
 	_MCPY(nat, IPFW_VTYPE_NAT);
+	_MCPY(mark, IPFW_VTYPE_MARK);
 	_MCPY(dscp, IPFW_VTYPE_DSCP);
 	_MCPY(nh4, IPFW_VTYPE_NH4);
 	_MCPY(nh6, IPFW_VTYPE_NH6);
@@ -615,6 +616,7 @@ ipfw_import_table_value_legacy(uint32_t value, struct table_value *v)
 	v->nh4 = value; /* host format */
 	v->dscp = value;
 	v->limit = value;
+	v->mark = value;
 }
 
 /*
@@ -653,6 +655,7 @@ ipfw_import_table_value_v1(ipfw_table_value *iv)
 	v.nh6 = iv->nh6;
 	v.limit = iv->limit;
 	v.zoneid = iv->zoneid;
+	v.mark = iv->mark;
 
 	memcpy(iv, &v, sizeof(ipfw_table_value));
 }
@@ -679,33 +682,34 @@ ipfw_export_table_value_v1(struct table_value *v, ipfw_table_value *piv)
 	iv.nh4 = v->nh4;
 	iv.nh6 = v->nh6;
 	iv.zoneid = v->zoneid;
+	iv.mark = v->mark;
 
 	memcpy(piv, &iv, sizeof(iv));
 }
 
 /*
- * Exports real value data into ipfw_table_value structure.
- * Utilizes "spare1" field to store kernel index.
+ * Exports real value data into ipfw_table_value structure including refcnt.
  */
 static int
 dump_tvalue(struct namedobj_instance *ni, struct named_object *no, void *arg)
 {
 	struct vdump_args *da;
 	struct table_val_link *ptv;
-	struct table_value *v;
+	ipfw_table_value *v;
 
 	da = (struct vdump_args *)arg;
 	ptv = (struct table_val_link *)no;
 
-	v = (struct table_value *)ipfw_get_sopt_space(da->sd, sizeof(*v));
+	v = (ipfw_table_value *)ipfw_get_sopt_space(da->sd, sizeof(*v));
 	/* Out of memory, returning */
 	if (v == NULL) {
 		da->error = ENOMEM;
 		return (ENOMEM);
 	}
 
-	memcpy(v, ptv->pval, sizeof(*v));
-	v->spare1 = ptv->no.kidx;
+	ipfw_export_table_value_v1(ptv->pval, v);
+	v->refcnt = ptv->pval->refcnt;
+	v->kidx = ptv->no.kidx;
 	return (0);
 }