git: 080f68d0ab0c - main - mlx5_core: Add steering support for IPsec with IPv6

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Tue, 07 Jan 2025 00:53:55 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=080f68d0ab0c87950ecd9b393a156b1e4d12c825

commit 080f68d0ab0c87950ecd9b393a156b1e4d12c825
Author:     Ariel Ehrenberg <aehrenberg@nvidia.com>
AuthorDate: 2024-12-04 09:32:54 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2025-01-07 00:53:37 +0000

    mlx5_core: Add steering support for IPsec with IPv6
    
    ipv6 flow tables were not connected to previous FS tables.
    Created an additional table to serve as IPsec RX root.
    This table has 2 rules for redirecting the received packets
    to ipv4/ipv6 based on the IP family in the packet header.
    
    Sponsored by:      NVidia networking
---
 sys/dev/mlx5/mlx5_accel/ipsec.h           |   2 +
 sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c   | 157 +++++++++++++++++++++++++++---
 sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c |   4 +-
 3 files changed, 149 insertions(+), 14 deletions(-)

diff --git a/sys/dev/mlx5/mlx5_accel/ipsec.h b/sys/dev/mlx5/mlx5_accel/ipsec.h
index 95742c4099f1..361b9f72d873 100644
--- a/sys/dev/mlx5/mlx5_accel/ipsec.h
+++ b/sys/dev/mlx5/mlx5_accel/ipsec.h
@@ -43,6 +43,7 @@ struct mlx5e_priv;
 struct mlx5e_tx_wqe;
 struct mlx5e_ipsec_tx;
 struct mlx5e_ipsec_rx;
+struct mlx5e_ipsec_rx_ip_type;
 
 struct aes_gcm_keymat {
 	u64   seq_iv;
@@ -128,6 +129,7 @@ struct mlx5e_ipsec {
 	struct mlx5e_ipsec_tx *tx;
 	struct mlx5e_ipsec_rx *rx_ipv4;
 	struct mlx5e_ipsec_rx *rx_ipv6;
+	struct mlx5e_ipsec_rx_ip_type *rx_ip_type;
 	struct mlx5e_ipsec_aso *aso;
 	u32 pdn;
 	u32 mkey;
diff --git a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c
index f7950bf61269..fb9ca94278db 100644
--- a/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c
+++ b/sys/dev/mlx5/mlx5_accel/mlx5_ipsec_fs.c
@@ -138,6 +138,14 @@ struct mlx5e_ipsec_rx_roce {
 	struct mlx5_flow_namespace *ns_rdma;
 };
 
+struct mlx5e_ipsec_rx_ip_type {
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_handle *ipv4_rule;
+	struct mlx5_flow_handle *ipv6_rule;
+	struct mlx5e_ipsec_miss miss;
+};
+
 struct mlx5e_ipsec_rx {
 	struct mlx5e_ipsec_ft ft;
 	struct mlx5e_ipsec_miss pol;
@@ -497,6 +505,16 @@ static void setup_fte_addr6(struct mlx5_flow_spec *spec, __be32 *saddr,
                             outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 0xff, 16);
 }
 
+static void
+setup_fte_ip_version(struct mlx5_flow_spec *spec, u8 family)
+{
+        spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
+
+        MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version);
+        MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version,
+                 family == AF_INET ? 4 : 6);
+}
+
 static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
@@ -1598,9 +1616,18 @@ static void ipsec_fs_rx_roce_table_destroy(struct mlx5e_ipsec_rx_roce *rx_roce)
 	mlx5_destroy_flow_table(rx_roce->ft);
 }
 
+static void
+ipsec_fs_rx_ip_type_catchall_rule_destroy(struct mlx5e_ipsec_rx_ip_type* rx_ip_type)
+{
+	mlx5_del_flow_rules(&rx_ip_type->ipv4_rule);
+	mlx5_del_flow_rules(&rx_ip_type->ipv6_rule);
+	mlx5_del_flow_rules(&rx_ip_type->miss.rule);
+	mlx5_destroy_flow_group(rx_ip_type->miss.group);
+	rx_ip_type->miss.group = NULL;
+}
+
 static void ipsec_fs_rx_table_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx)
 {
-	mutex_lock(&rx->ft.mutex);
 	if (rx->chains) {
 		ipsec_chains_destroy(rx->chains);
 	} else {
@@ -1610,7 +1637,6 @@ static void ipsec_fs_rx_table_destroy(struct mlx5_core_dev *mdev, struct mlx5e_i
 	mlx5_destroy_flow_table(rx->ft.sa);
 	mlx5_destroy_flow_table(rx->ft.status);
 	ipsec_fs_rx_roce_table_destroy(&rx->roce);
-	mutex_unlock(&rx->ft.mutex);
 }
 
 static void ipsec_roce_setup_udp_dport(struct mlx5_flow_spec *spec, u16 dport)
@@ -1831,6 +1857,90 @@ out:
 	return err;
 }
 
+static int
+ipsec_fs_rx_ip_type_catchall_rules_create(struct mlx5e_priv *priv,
+                                          struct mlx5_flow_destination *defdst)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5e_ipsec *ipsec = priv->ipsec;
+	struct mlx5_flow_destination dst = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec) {
+		return -ENOMEM;
+	}
+	dst.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+	/* Set rule for ipv4 packets */
+	dst.ft = ipsec->rx_ipv4->ft.pol;
+	setup_fte_ip_version(spec, AF_INET);
+	rule = mlx5_add_flow_rules(ipsec->rx_ip_type->ft, spec, &flow_act, &dst, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add ipv4 rule to ip_type table err=%d\n",
+			      err);
+		goto out;
+	}
+	ipsec->rx_ip_type->ipv4_rule = rule;
+
+	/* Set rule for ipv6 packets */
+	dst.ft = ipsec->rx_ipv6->ft.pol;
+	setup_fte_ip_version(spec, AF_INET6);
+	rule = mlx5_add_flow_rules(ipsec->rx_ip_type->ft, spec, &flow_act, &dst, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		mlx5_core_err(mdev, "Failed to add ipv6 rule to ip_type table err=%d\n",
+			      err);
+		goto fail_add_ipv6_rule;
+	}
+	ipsec->rx_ip_type->ipv6_rule = rule;
+
+	/* set miss rule */
+	err = ipsec_miss_create(mdev, ipsec->rx_ip_type->ft, &ipsec->rx_ip_type->miss, defdst);
+	if (err) {
+		mlx5_core_err(mdev, "Failed to add miss rule to ip_type table err=%d\n",
+			          err);
+		goto fail_miss_rule;
+	}
+
+	goto out;
+
+fail_miss_rule:
+	mlx5_del_flow_rules(&ipsec->rx_ip_type->ipv6_rule);
+fail_add_ipv6_rule:
+	mlx5_del_flow_rules(&ipsec->rx_ip_type->ipv4_rule);
+out:
+	kvfree(spec);
+	return err;
+}
+
+static int
+ipsec_fs_rx_ip_type_table_create(struct mlx5e_priv *priv,
+                                 int level)
+{
+	struct mlx5e_ipsec *ipsec = priv->ipsec;
+	struct mlx5_flow_table *ft;
+	int err = 0;
+
+	/* Create rx ip type table */
+	ft = ipsec_rx_ft_create(ipsec->rx_ip_type->ns, level, 0, 1);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		goto out;
+	}
+	ipsec->rx_ip_type->ft = ft;
+
+	priv->fts.ipsec_ft = priv->ipsec->rx_ip_type->ft;
+
+out:
+	return err;
+}
+
 static int ipsec_fs_rx_table_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx,
 				    int rx_init_level, int rdma_init_level)
 {
@@ -1996,6 +2106,7 @@ void mlx5e_accel_ipsec_fs_rx_catchall_rules_destroy(struct mlx5e_priv *priv)
 	if (!priv->ipsec)
 		return;
 
+	ipsec_fs_rx_ip_type_catchall_rule_destroy(priv->ipsec->rx_ip_type);
 	ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv4);
 	ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv6);
 }
@@ -2019,6 +2130,13 @@ int mlx5e_accel_ipsec_fs_rx_catchall_rules(struct mlx5e_priv *priv)
 	err = ipsec_fs_rx_catchall_rules(priv, ipsec->rx_ipv4, &dest);
 	if (err)
 		ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv6);
+
+	err = ipsec_fs_rx_ip_type_catchall_rules_create(priv, &dest);
+	if (err) {
+		ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv6);
+		ipsec_fs_rx_catchall_rules_destroy(priv->mdev, priv->ipsec->rx_ipv4);
+	}
+
 out:
 	return err;
 }
@@ -2032,6 +2150,7 @@ void mlx5e_accel_ipsec_fs_rx_tables_destroy(struct mlx5e_priv *priv)
 	if (!ipsec)
 		return;
 
+	mlx5_destroy_flow_table(ipsec->rx_ip_type->ft);
 	ipsec_fs_rx_table_destroy(mdev, ipsec->rx_ipv6);
 	ipsec_fs_rx_table_destroy(mdev, ipsec->rx_ipv4);
 }
@@ -2045,18 +2164,24 @@ int mlx5e_accel_ipsec_fs_rx_tables_create(struct mlx5e_priv *priv)
 	if (!ipsec)
 		return 0;
 
-	err = ipsec_fs_rx_table_create(ipsec->mdev, ipsec->rx_ipv4, 0, 0);
+	err = ipsec_fs_rx_ip_type_table_create(priv, 0);
 	if (err)
-		goto out;
+		return err;
 
-	err = ipsec_fs_rx_table_create(ipsec->mdev, ipsec->rx_ipv6, 4, 1);
-	if (err) {
-		ipsec_fs_rx_table_destroy(priv->mdev, ipsec->rx_ipv4);
-		goto out;
-	}
+	err = ipsec_fs_rx_table_create(ipsec->mdev, ipsec->rx_ipv4, 1, 0);
+	if (err)
+		goto err_ipv4_table;
 
-	priv->fts.ipsec_ft = priv->ipsec->rx_ipv4->ft.pol;
-out:
+	err = ipsec_fs_rx_table_create(ipsec->mdev, ipsec->rx_ipv6, 5, 1);
+	if (err)
+		goto err_ipv6_table;
+
+	return 0;
+
+err_ipv6_table:
+	ipsec_fs_rx_table_destroy(priv->mdev, ipsec->rx_ipv4);
+err_ipv4_table:
+	mlx5_destroy_flow_table(ipsec->rx_ip_type->ft);
 	return err;
 }
 
@@ -2067,6 +2192,7 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec)
 	mutex_destroy(&ipsec->rx_ipv4->ft.mutex);
 	mutex_destroy(&ipsec->tx->ft.mutex);
 	ipsec_fs_destroy_counters(ipsec);
+	kfree(ipsec->rx_ip_type);
 	kfree(ipsec->rx_ipv6);
 	kfree(ipsec->rx_ipv4);
 	kfree(ipsec->tx);
@@ -2089,9 +2215,13 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
 	if (!ipsec->tx)
 		return -ENOMEM;
 
+	ipsec->rx_ip_type = kzalloc(sizeof(*ipsec->rx_ip_type), GFP_KERNEL);
+	if (!ipsec->rx_ip_type)
+		goto err_tx;
+
 	ipsec->rx_ipv4 = kzalloc(sizeof(*ipsec->rx_ipv4), GFP_KERNEL);
 	if (!ipsec->rx_ipv4)
-		goto err_tx;
+		goto err_ip_type;
 
 	ipsec->rx_ipv6 = kzalloc(sizeof(*ipsec->rx_ipv6), GFP_KERNEL);
 	if (!ipsec->rx_ipv6)
@@ -2103,6 +2233,7 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec)
 
 	ipsec->tx->ns = tns;
 	mutex_init(&ipsec->tx->ft.mutex);
+	ipsec->rx_ip_type->ns = rns;
 	ipsec->rx_ipv4->ns = rns;
 	ipsec->rx_ipv6->ns = rns;
 	mutex_init(&ipsec->rx_ipv4->ft.mutex);
@@ -2116,6 +2247,8 @@ err_rx_ipv6:
 	kfree(ipsec->rx_ipv6);
 err_rx_ipv4:
 	kfree(ipsec->rx_ipv4);
+err_ip_type:
+	kfree(ipsec->rx_ip_type);
 err_tx:
 	kfree(ipsec->tx);
 	return err;
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c
index f8be5b9e881c..6e24395b5577 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c
@@ -1626,7 +1626,7 @@ mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv)
 
 	ft->num_groups = 0;
 	ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE;
-	ft_attr.level = (priv->ipsec) ? 8 : 0;
+	ft_attr.level = (priv->ipsec) ? 9 : 0;
 	ft->t = mlx5_create_flow_table(priv->fts.ns, &ft_attr);
 
 	if (IS_ERR(ft->t)) {
@@ -2014,7 +2014,7 @@ mlx5e_create_vxlan_flow_table(struct mlx5e_priv *priv)
 
 	ft->num_groups = 0;
 	ft_attr.max_fte = MLX5E_VXLAN_TABLE_SIZE;
-	ft_attr.level = (priv->ipsec) ? 9 : 1;
+	ft_attr.level = (priv->ipsec) ? 10 : 1;
 	ft->t = mlx5_create_flow_table(priv->fts.ns, &ft_attr);
 
 	if (IS_ERR(ft->t)) {