git: 81b38bce0794 - main - mlx5e tls: Ensure all allocated tags have a hw context associated

From: Konstantin Belousov <kib_at_FreeBSD.org>
Date: Sat, 23 Nov 2024 10:05:07 UTC
The branch main has been updated by kib:

URL: https://cgit.FreeBSD.org/src/commit/?id=81b38bce07940b7a4001dfeb8cd63774229ca950

commit 81b38bce07940b7a4001dfeb8cd63774229ca950
Author:     Andrew Gallatin <gallatin@FreeBSD.org>
AuthorDate: 2024-11-23 09:43:17 +0000
Commit:     Konstantin Belousov <kib@FreeBSD.org>
CommitDate: 2024-11-23 10:01:50 +0000

    mlx5e tls: Ensure all allocated tags have a hw context associated
    
    Ensure all allocated tags have a hardware context associated.
    The hardware context allocation is moved into the zone import
    routine, as suggested by kib.  This is safe because these zone
    allocations are always done in a sleepable context.
    
    I have removed the now pointless num_resources tracking,
    and added sysctls / tunables to control UMA zone limits
    for these tls tags, as well as a tunable to let the
    driver pre-allocate tags at boot.
    
    MFC after:      2 weeks
---
 sys/dev/mlx5/mlx5_en/en_hw_tls.h      |   2 +-
 sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c | 132 ++++++++++++++++++++++++----------
 2 files changed, 95 insertions(+), 39 deletions(-)

diff --git a/sys/dev/mlx5/mlx5_en/en_hw_tls.h b/sys/dev/mlx5/mlx5_en/en_hw_tls.h
index 2018198e5e52..d637314e040e 100644
--- a/sys/dev/mlx5/mlx5_en/en_hw_tls.h
+++ b/sys/dev/mlx5/mlx5_en/en_hw_tls.h
@@ -84,7 +84,7 @@ struct mlx5e_tls {
 	struct workqueue_struct *wq;
 	uma_zone_t zone;
 	uint32_t max_resources;		/* max number of resources */
-	volatile uint32_t num_resources;	/* current number of resources */
+	int zone_max;
 	int init;			/* set when ready */
 	char zname[32];
 };
diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c
index b5caa3ba53dd..8b5121066bdf 100644
--- a/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c
+++ b/sys/dev/mlx5/mlx5_en/mlx5_en_hw_tls.c
@@ -81,17 +81,57 @@ static const char *mlx5e_tls_stats_desc[] = {
 
 static void mlx5e_tls_work(struct work_struct *);
 
+/*
+ * Expand the tls tag UMA zone in a sleepable context
+ */
+
+static void
+mlx5e_prealloc_tags(struct mlx5e_priv *priv, int nitems)
+{
+	struct mlx5e_tls_tag **tags;
+	int i;
+
+	tags = malloc(sizeof(tags[0]) * nitems,
+	    M_MLX5E_TLS, M_WAITOK);
+	for (i = 0; i < nitems; i++)
+		tags[i] = uma_zalloc(priv->tls.zone, M_WAITOK);
+	__compiler_membar();
+	for (i = 0; i < nitems; i++)
+		uma_zfree(priv->tls.zone, tags[i]);
+	free(tags, M_MLX5E_TLS);
+}
+
 static int
 mlx5e_tls_tag_import(void *arg, void **store, int cnt, int domain, int flags)
 {
 	struct mlx5e_tls_tag *ptag;
-	int i;
+	struct mlx5e_priv *priv = arg;
+	int err, i;
+
+	/*
+	 * mlx5_tls_open_tis() sleeps on a firmware command, so
+	 * zone allocations must be done from a sleepable context.
+	 * Note that the uma_zalloc() in mlx5e_tls_snd_tag_alloc()
+	 * is done with M_NOWAIT so that hitting the zone limit does
+	 * not cause the allocation to pause forever.
+	 */
 
 	for (i = 0; i != cnt; i++) {
 		ptag = malloc_domainset(sizeof(*ptag), M_MLX5E_TLS,
 		    mlx5_dev_domainset(arg), flags | M_ZERO);
+		if (ptag == NULL)
+			return (i);
+		ptag->tls = &priv->tls;
 		mtx_init(&ptag->mtx, "mlx5-tls-tag-mtx", NULL, MTX_DEF);
 		INIT_WORK(&ptag->work, mlx5e_tls_work);
+		err = mlx5_tls_open_tis(priv->mdev, 0, priv->tdn,
+		    priv->pdn, &ptag->tisn);
+		if (err) {
+			MLX5E_TLS_STAT_INC(ptag, tx_error, 1);
+			free(ptag, M_MLX5E_TLS);
+			return (i);
+		}
+
 		store[i] = ptag;
 	}
 	return (i);
@@ -114,7 +154,6 @@ mlx5e_tls_tag_release(void *arg, void **store, int cnt)
 
 		if (ptag->tisn != 0) {
 			mlx5_tls_close_tis(priv->mdev, ptag->tisn);
-			atomic_add_32(&ptls->num_resources, -1U);
 		}
 
 		mtx_destroy(&ptag->mtx);
@@ -136,20 +175,38 @@ mlx5e_tls_tag_zfree(struct mlx5e_tls_tag *ptag)
 	/* avoid leaking keys */
 	memset(ptag->crypto_params, 0, sizeof(ptag->crypto_params));
 
-	/* update number of TIS contexts */
-	if (ptag->tisn == 0)
-		atomic_add_32(&ptag->tls->num_resources, -1U);
-
 	/* return tag to UMA */
 	uma_zfree(ptag->tls->zone, ptag);
 }
 
+static int
+mlx5e_max_tag_proc(SYSCTL_HANDLER_ARGS)
+{
+	struct mlx5e_priv *priv = (struct mlx5e_priv *)arg1;
+	struct mlx5e_tls *ptls = &priv->tls;
+	int err;
+	unsigned int max_tags;
+
+	max_tags = ptls->zone_max;
+	err = sysctl_handle_int(oidp, &max_tags, arg2, req);
+	if (err != 0 || req->newptr == NULL )
+		return err;
+	if (max_tags == ptls->zone_max)
+		return 0;
+	if (max_tags > priv->tls.max_resources || max_tags == 0)
+		return (EINVAL);
+	ptls->zone_max = max_tags;
+	uma_zone_set_max(ptls->zone, ptls->zone_max);
+	return 0;
+}
+
 int
 mlx5e_tls_init(struct mlx5e_priv *priv)
 {
 	struct mlx5e_tls *ptls = &priv->tls;
 	struct sysctl_oid *node;
-	uint32_t x;
+	uint32_t max_dek, max_tis, x;
+	int zone_max = 0, prealloc_tags = 0;
 
 	if (MLX5_CAP_GEN(priv->mdev, tls_tx) == 0 ||
 	    MLX5_CAP_GEN(priv->mdev, log_max_dek) == 0)
@@ -164,13 +221,31 @@ mlx5e_tls_init(struct mlx5e_priv *priv)
 	snprintf(ptls->zname, sizeof(ptls->zname),
 	    "mlx5_%u_tls", device_get_unit(priv->mdev->pdev->dev.bsddev));
 
+
+	TUNABLE_INT_FETCH("hw.mlx5.tls_max_tags", &zone_max);
+	TUNABLE_INT_FETCH("hw.mlx5.tls_prealloc_tags", &prealloc_tags);
+
 	ptls->zone = uma_zcache_create(ptls->zname,
 	     sizeof(struct mlx5e_tls_tag), NULL, NULL, NULL, NULL,
-	     mlx5e_tls_tag_import, mlx5e_tls_tag_release, priv->mdev,
-	     UMA_ZONE_UNMANAGED);
+	     mlx5e_tls_tag_import, mlx5e_tls_tag_release, priv,
+	     UMA_ZONE_UNMANAGED | (prealloc_tags ? UMA_ZONE_NOFREE : 0));
 
 	/* shared between RX and TX TLS */
-	ptls->max_resources = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_dek) - 1);
+	max_dek = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_dek) - 1);
+	max_tis = 1U << (MLX5_CAP_GEN(priv->mdev, log_max_tis) - 1);
+	ptls->max_resources = MIN(max_dek, max_tis);
+
+	if (zone_max != 0) {
+		ptls->zone_max = zone_max;
+		if (ptls->zone_max > priv->tls.max_resources)
+			ptls->zone_max = priv->tls.max_resources;
+	} else {
+		ptls->zone_max = priv->tls.max_resources;
+	}
+
+	uma_zone_set_max(ptls->zone, ptls->zone_max);
+	if (prealloc_tags != 0)
+		mlx5e_prealloc_tags(priv, ptls->zone_max);
 
 	for (x = 0; x != MLX5E_TLS_STATS_NUM; x++)
 		ptls->stats.arg[x] = counter_u64_alloc(M_WAITOK);
@@ -183,6 +258,10 @@ mlx5e_tls_init(struct mlx5e_priv *priv)
 	if (node == NULL)
 		return (0);
 
+	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tls_max_tag",
+	    CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_MPSAFE, priv, 0, mlx5e_max_tag_proc,
+	    "IU", "Max number of TLS offload session tags");
+
 	mlx5e_create_counter_stats(&ptls->ctx,
 	    SYSCTL_CHILDREN(node), "stats",
 	    mlx5e_tls_stats_desc, MLX5E_TLS_STATS_NUM,
@@ -206,9 +285,6 @@ mlx5e_tls_cleanup(struct mlx5e_priv *priv)
 	uma_zdestroy(ptls->zone);
 	destroy_workqueue(ptls->wq);
 
-	/* check if all resources are freed */
-	MPASS(priv->tls.num_resources == 0);
-
 	for (x = 0; x != MLX5E_TLS_STATS_NUM; x++)
 		counter_u64_free(ptls->stats.arg[x]);
 }
@@ -334,28 +410,16 @@ mlx5e_tls_snd_tag_alloc(if_t ifp,
 	if (priv->gone != 0 || priv->tls.init == 0)
 		return (EOPNOTSUPP);
 
-	/* allocate new tag from zone, if any */
 	ptag = uma_zalloc(priv->tls.zone, M_WAITOK);
+ 	if (ptag == NULL)
+ 		return (ENOMEM);
 
 	/* sanity check default values */
 	MPASS(ptag->dek_index == 0);
 	MPASS(ptag->dek_index_ok == 0);
 
-	/* setup TLS tag */
-	ptag->tls = &priv->tls;
-
 	/* check if there is no TIS context */
-	if (ptag->tisn == 0) {
-		uint32_t value;
-
-		value = atomic_fetchadd_32(&priv->tls.num_resources, 1U);
-
-		/* check resource limits */
-		if (value >= priv->tls.max_resources) {
-			error = ENOMEM;
-			goto failure;
-		}
-	}
+	KASSERT(ptag->tisn != 0, ("ptag %p w/0 tisn", ptag));
 
 	en = &params->tls.tls->params;
 
@@ -448,17 +512,9 @@ mlx5e_tls_snd_tag_alloc(if_t ifp,
 	/* reset state */
 	ptag->state = MLX5E_TLS_ST_INIT;
 
-	/*
-	 *  Try to immediately init the tag.  We may fail if the NIC's
-	 *  resources are tied up with send tags that are in the work
-	 *  queue, waiting to be freed.  So if we fail, put ourselves
-	 *  on the queue so as to try again after resouces have been freed.
-	 */
 	error = mlx5e_tls_st_init(priv, ptag);
-	if (error != 0) {
-		queue_work(priv->tls.wq, &ptag->work);
-		flush_work(&ptag->work);
-	}
+	if (error != 0)
+		goto failure;
 
 	return (0);