git: 22fe926a62b7 - main - gve: Add feature to change TX/RX ring size
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 04 Apr 2025 23:25:06 UTC
The branch main has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=22fe926a62b7bca771d46502dd6a8c202f25b5be commit 22fe926a62b7bca771d46502dd6a8c202f25b5be Author: Vee Agarwal <veethebee@google.com> AuthorDate: 2025-04-04 22:53:33 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2025-04-04 23:24:49 +0000 gve: Add feature to change TX/RX ring size This change introduces new sysctl handlers that allow the user to change RX/TX ring sizes. As before, the default ring sizes will come from the device (usually 1024). We also get the max/min limits from the device. In the case min values are not provided we have statically defined constants for the min values. Additionally, if the modify ring option is not enabled on the device, changing ring sizes via sysctl will not be possible. When changing ring sizes, the interface turns down momentarily while allocating/freeing resources as necessary. Signed-off-by: Vee Agarwal <veethebee@google.com> Reviewed by: markj MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D49428 --- share/man/man4/gve.4 | 17 ++++++++++ sys/dev/gve/gve.h | 10 ++++++ sys/dev/gve/gve_adminq.c | 53 ++++++++++++++++++++++++++++- sys/dev/gve/gve_adminq.h | 14 ++++++-- sys/dev/gve/gve_main.c | 52 +++++++++++++++++++++++++++-- sys/dev/gve/gve_sysctl.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 227 insertions(+), 6 deletions(-) diff --git a/share/man/man4/gve.4 b/share/man/man4/gve.4 index 2ae96c93e37d..754071e2fad8 100644 --- a/share/man/man4/gve.4 +++ b/share/man/man4/gve.4 @@ -86,6 +86,12 @@ Change the TX queue count to 4 for the gve0 interface: .Pp Change the RX queue count to 4 for the gve0 interface: .D1 sysctl dev.gve.0.num_rx_queues=4 +.Pp +Change the TX ring size to 512 for the gve0 interface: +.D1 sysctl dev.gve.0.tx_ring_size=512 +.Pp +Change the RX ring size to 512 for the gve0 interface: +.D1 sysctl dev.gve.0.rx_ring_size=512 .Sh DIAGNOSTICS The following messages are recorded during driver initialization: .Bl -diag @@ -230,6 +236,17 @@ If this also fails, a device reset will be triggered. .Pp Note: sysctl nodes for queue stats remain available even if a queue is removed. .Pp +.It Va dev.gve.X.rx_ring_size and dev.gve.X.tx_ring_size +Run-time tunables that represent the current ring size for RX/TX queues. +The default value is set to device defaults for ring size. +.Pp +This call turns down the interface while setting up the queues with the new ring size, +which may potentially cause any new packets to be dropped. +This call can fail if the system is not able to provide the driver with enough resources. +In that situation, the driver will try to revert to the previous ring size for RX/TX queues. +If this also fails, the device will be in an unhealthy state and will need to be reloaded. +This value must be a power of 2 and within the defined range. +.Pp .El .Sh LIMITATIONS .Nm diff --git a/sys/dev/gve/gve.h b/sys/dev/gve/gve.h index 2b49ee5ad45a..5b298b889ed6 100644 --- a/sys/dev/gve/gve.h +++ b/sys/dev/gve/gve.h @@ -63,6 +63,10 @@ */ #define GVE_QPL_DIVISOR 16 +/* Ring Size Limits */ +#define GVE_DEFAULT_MIN_RX_RING_SIZE 512 +#define GVE_DEFAULT_MIN_TX_RING_SIZE 256 + static MALLOC_DEFINE(M_GVE, "gve", "gve allocations"); struct gve_dma_handle { @@ -529,12 +533,17 @@ struct gve_priv { uint16_t num_event_counters; uint16_t default_num_queues; uint16_t tx_desc_cnt; + uint16_t max_tx_desc_cnt; + uint16_t min_tx_desc_cnt; uint16_t rx_desc_cnt; + uint16_t max_rx_desc_cnt; + uint16_t min_rx_desc_cnt; uint16_t rx_pages_per_qpl; uint64_t max_registered_pages; uint64_t num_registered_pages; uint32_t supported_features; uint16_t max_mtu; + bool modify_ringsize_enabled; struct gve_dma_handle counter_array_mem; __be32 *counters; @@ -622,6 +631,7 @@ gve_is_qpl(struct gve_priv *priv) void gve_schedule_reset(struct gve_priv *priv); int gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt); int gve_adjust_rx_queues(struct gve_priv *priv, uint16_t new_queue_cnt); +int gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx); /* Register access functions defined in gve_utils.c */ uint32_t gve_reg_bar_read_4(struct gve_priv *priv, bus_size_t offset); diff --git a/sys/dev/gve/gve_adminq.c b/sys/dev/gve/gve_adminq.c index dd03f817f45a..3415d2fa4b60 100644 --- a/sys/dev/gve/gve_adminq.c +++ b/sys/dev/gve/gve_adminq.c @@ -59,6 +59,7 @@ void gve_parse_device_option(struct gve_priv *priv, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_modify_ring **dev_op_modify_ring, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) { uint32_t req_feat_mask = be32toh(option->required_features_mask); @@ -121,6 +122,34 @@ void gve_parse_device_option(struct gve_priv *priv, *dev_op_dqo_qpl = (void *)(option + 1); break; + case GVE_DEV_OPT_ID_MODIFY_RING: + if (option_length < (sizeof(**dev_op_modify_ring) - + sizeof(struct gve_ring_size_bound)) || + req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING) { + device_printf(priv->dev, GVE_DEVICE_OPTION_ERROR_FMT, + "Modify Ring", (int)sizeof(**dev_op_modify_ring), + GVE_DEV_OPT_REQ_FEAT_MASK_MODIFY_RING, + option_length, req_feat_mask); + break; + } + + if (option_length > sizeof(**dev_op_modify_ring)) { + device_printf(priv->dev, GVE_DEVICE_OPTION_TOO_BIG_FMT, + "Modify Ring"); + } + *dev_op_modify_ring = (void *)(option + 1); + + /* Min ring size included; set the minimum ring size. */ + if (option_length == sizeof(**dev_op_modify_ring)) { + priv->min_rx_desc_cnt = max( + be16toh((*dev_op_modify_ring)->min_ring_size.rx), + GVE_DEFAULT_MIN_RX_RING_SIZE); + priv->min_tx_desc_cnt = max( + be16toh((*dev_op_modify_ring)->min_ring_size.tx), + GVE_DEFAULT_MIN_TX_RING_SIZE); + } + break; + case GVE_DEV_OPT_ID_JUMBO_FRAMES: if (option_length < sizeof(**dev_op_jumbo_frames) || req_feat_mask != GVE_DEV_OPT_REQ_FEAT_MASK_JUMBO_FRAMES) { @@ -155,6 +184,7 @@ gve_process_device_options(struct gve_priv *priv, struct gve_device_option_gqi_qpl **dev_op_gqi_qpl, struct gve_device_option_dqo_rda **dev_op_dqo_rda, struct gve_device_option_dqo_qpl **dev_op_dqo_qpl, + struct gve_device_option_modify_ring **dev_op_modify_ring, struct gve_device_option_jumbo_frames **dev_op_jumbo_frames) { char *desc_end = (char *)descriptor + be16toh(descriptor->total_length); @@ -176,6 +206,7 @@ gve_process_device_options(struct gve_priv *priv, dev_op_gqi_qpl, dev_op_dqo_rda, dev_op_dqo_qpl, + dev_op_modify_ring, dev_op_jumbo_frames); dev_opt = (void *)((char *)(dev_opt + 1) + be16toh(dev_opt->option_length)); } @@ -390,8 +421,18 @@ gve_adminq_set_mtu(struct gve_priv *priv, uint32_t mtu) { static void gve_enable_supported_features(struct gve_priv *priv, uint32_t supported_features_mask, + const struct gve_device_option_modify_ring *dev_op_modify_ring, const struct gve_device_option_jumbo_frames *dev_op_jumbo_frames) { + if (dev_op_modify_ring && + (supported_features_mask & GVE_SUP_MODIFY_RING_MASK)) { + if (bootverbose) + device_printf(priv->dev, "MODIFY RING device option enabled.\n"); + priv->modify_ringsize_enabled = true; + priv->max_rx_desc_cnt = be16toh(dev_op_modify_ring->max_ring_size.rx); + priv->max_tx_desc_cnt = be16toh(dev_op_modify_ring->max_ring_size.tx); + } + if (dev_op_jumbo_frames && (supported_features_mask & GVE_SUP_JUMBO_FRAMES_MASK)) { if (bootverbose) @@ -410,6 +451,7 @@ gve_adminq_describe_device(struct gve_priv *priv) struct gve_device_option_gqi_qpl *dev_op_gqi_qpl = NULL; struct gve_device_option_dqo_rda *dev_op_dqo_rda = NULL; struct gve_device_option_dqo_qpl *dev_op_dqo_qpl = NULL; + struct gve_device_option_modify_ring *dev_op_modify_ring = NULL; struct gve_device_option_jumbo_frames *dev_op_jumbo_frames = NULL; uint32_t supported_features_mask = 0; int rc; @@ -438,10 +480,15 @@ gve_adminq_describe_device(struct gve_priv *priv) bus_dmamap_sync(desc_mem.tag, desc_mem.map, BUS_DMASYNC_POSTREAD); + /* Default min in case device options don't have min values */ + priv->min_rx_desc_cnt = GVE_DEFAULT_MIN_RX_RING_SIZE; + priv->min_tx_desc_cnt = GVE_DEFAULT_MIN_TX_RING_SIZE; + rc = gve_process_device_options(priv, desc, &dev_op_gqi_qpl, &dev_op_dqo_rda, &dev_op_dqo_qpl, + &dev_op_modify_ring, &dev_op_jumbo_frames); if (rc != 0) goto free_device_descriptor; @@ -489,8 +536,12 @@ gve_adminq_describe_device(struct gve_priv *priv) priv->default_num_queues = be16toh(desc->default_num_queues); priv->supported_features = supported_features_mask; + /* Default max to current in case modify ring size option is disabled */ + priv->max_rx_desc_cnt = priv->rx_desc_cnt; + priv->max_tx_desc_cnt = priv->tx_desc_cnt; + gve_enable_supported_features(priv, supported_features_mask, - dev_op_jumbo_frames); + dev_op_modify_ring, dev_op_jumbo_frames); for (i = 0; i < ETHER_ADDR_LEN; i++) priv->mac[i] = desc->mac[i]; diff --git a/sys/dev/gve/gve_adminq.h b/sys/dev/gve/gve_adminq.h index 37a7cb3ecbb8..bc51046a3037 100644 --- a/sys/dev/gve/gve_adminq.h +++ b/sys/dev/gve/gve_adminq.h @@ -153,13 +153,21 @@ struct gve_device_option_dqo_qpl { _Static_assert(sizeof(struct gve_device_option_dqo_qpl) == 8, "gve: bad admin queue struct length"); +struct gve_ring_size_bound { + __be16 rx; + __be16 tx; +}; + +_Static_assert(sizeof(struct gve_ring_size_bound) == 4, + "gve: bad admin queue struct length"); + struct gve_device_option_modify_ring { __be32 supported_features_mask; - __be16 max_rx_ring_size; - __be16 max_tx_ring_size; + struct gve_ring_size_bound max_ring_size; + struct gve_ring_size_bound min_ring_size; }; -_Static_assert(sizeof(struct gve_device_option_modify_ring) == 8, +_Static_assert(sizeof(struct gve_device_option_modify_ring) == 12, "gve: bad admin queue struct length"); struct gve_device_option_jumbo_frames { diff --git a/sys/dev/gve/gve_main.c b/sys/dev/gve/gve_main.c index 39556b85f493..8a00deedef36 100644 --- a/sys/dev/gve/gve_main.c +++ b/sys/dev/gve/gve_main.c @@ -32,10 +32,10 @@ #include "gve_adminq.h" #include "gve_dqo.h" -#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.2\n" +#define GVE_DRIVER_VERSION "GVE-FBSD-1.3.3\n" #define GVE_VERSION_MAJOR 1 #define GVE_VERSION_MINOR 3 -#define GVE_VERSION_SUB 2 +#define GVE_VERSION_SUB 3 #define GVE_DEFAULT_RX_COPYBREAK 256 @@ -260,6 +260,54 @@ gve_adjust_tx_queues(struct gve_priv *priv, uint16_t new_queue_cnt) return (err); } +int +gve_adjust_ring_sizes(struct gve_priv *priv, uint16_t new_desc_cnt, bool is_rx) +{ + int err; + uint16_t prev_desc_cnt; + + GVE_IFACE_LOCK_ASSERT(priv->gve_iface_lock); + + gve_down(priv); + + if (is_rx) { + gve_free_rx_rings(priv, 0, priv->rx_cfg.num_queues); + prev_desc_cnt = priv->rx_desc_cnt; + priv->rx_desc_cnt = new_desc_cnt; + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + if (err != 0) { + device_printf(priv->dev, + "Failed to allocate rings. Trying to start back up with previous ring size."); + priv->rx_desc_cnt = prev_desc_cnt; + err = gve_alloc_rx_rings(priv, 0, priv->rx_cfg.num_queues); + } + } else { + gve_free_tx_rings(priv, 0, priv->tx_cfg.num_queues); + prev_desc_cnt = priv->tx_desc_cnt; + priv->tx_desc_cnt = new_desc_cnt; + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); + if (err != 0) { + device_printf(priv->dev, + "Failed to allocate rings. Trying to start back up with previous ring size."); + priv->tx_desc_cnt = prev_desc_cnt; + err = gve_alloc_tx_rings(priv, 0, priv->tx_cfg.num_queues); + } + } + + if (err != 0) { + device_printf(priv->dev, "Failed to allocate rings! Cannot start device back up!"); + return (err); + } + + err = gve_up(priv); + if (err != 0) { + gve_schedule_reset(priv); + return (err); + } + + return (0); +} + static int gve_set_mtu(if_t ifp, uint32_t new_mtu) { diff --git a/sys/dev/gve/gve_sysctl.c b/sys/dev/gve/gve_sysctl.c index 8f52ffad6f3e..f7c7b5803865 100644 --- a/sys/dev/gve/gve_sysctl.c +++ b/sys/dev/gve/gve_sysctl.c @@ -354,6 +354,83 @@ gve_sysctl_num_rx_queues(SYSCTL_HANDLER_ARGS) return (err); } +static int +gve_check_ring_size(struct gve_priv *priv, int val, bool is_rx) +{ + if (!powerof2(val) || val == 0) { + device_printf(priv->dev, + "Requested ring size (%u) must be a power of 2\n", val); + return (EINVAL); + } + + if (val < (is_rx ? priv->min_rx_desc_cnt : priv->min_tx_desc_cnt)) { + device_printf(priv->dev, + "Requested ring size (%u) cannot be less than %d\n", val, + (is_rx ? priv->min_rx_desc_cnt : priv->min_tx_desc_cnt)); + return (EINVAL); + } + + + if (val > (is_rx ? priv->max_rx_desc_cnt : priv->max_tx_desc_cnt)) { + device_printf(priv->dev, + "Requested ring size (%u) cannot be greater than %d\n", val, + (is_rx ? priv->max_rx_desc_cnt : priv->max_tx_desc_cnt)); + return (EINVAL); + } + + return (0); +} + +static int +gve_sysctl_tx_ring_size(SYSCTL_HANDLER_ARGS) +{ + struct gve_priv *priv = arg1; + int val; + int err; + + val = priv->tx_desc_cnt; + err = sysctl_handle_int(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + err = gve_check_ring_size(priv, val, /*is_rx=*/false); + if (err != 0) + return (err); + + if (val != priv->tx_desc_cnt) { + GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); + err = gve_adjust_ring_sizes(priv, val, /*is_rx=*/false); + GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); + } + + return (err); +} + +static int +gve_sysctl_rx_ring_size(SYSCTL_HANDLER_ARGS) +{ + struct gve_priv *priv = arg1; + int val; + int err; + + val = priv->rx_desc_cnt; + err = sysctl_handle_int(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + err = gve_check_ring_size(priv, val, /*is_rx=*/true); + if (err != 0) + return (err); + + if (val != priv->rx_desc_cnt) { + GVE_IFACE_LOCK_LOCK(priv->gve_iface_lock); + err = gve_adjust_ring_sizes(priv, val, /*is_rx=*/true); + GVE_IFACE_LOCK_UNLOCK(priv->gve_iface_lock); + } + + return (err); +} + static void gve_setup_sysctl_writables(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct gve_priv *priv) @@ -365,6 +442,16 @@ gve_setup_sysctl_writables(struct sysctl_ctx_list *ctx, SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "num_rx_queues", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, gve_sysctl_num_rx_queues, "I", "Number of RX queues"); + + if (priv->modify_ringsize_enabled) { + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_ring_size", + CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + gve_sysctl_tx_ring_size, "I", "TX ring size"); + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ring_size", + CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, + gve_sysctl_rx_ring_size, "I", "RX ring size"); + } } void gve_setup_sysctl(struct gve_priv *priv)