svn commit: r292094 - stable/10/sys/dev/ixl
Steven Hartland
steven.hartland at multiplay.co.uk
Fri Dec 11 12:22:40 UTC 2015
This should have referenced https://reviews.freebsd.org/D4265
On 11/12/2015 12:16, Steven Hartland wrote:
> Author: smh
> Date: Fri Dec 11 12:16:05 2015
> New Revision: 292094
> URL: https://svnweb.freebsd.org/changeset/base/292094
>
> Log:
> MFC r277084,r277088,r277130,r277143,r277151,r277262
>
> r277084: Intel I40E updates ixl v1.3.0 and ixlv 1.2.0 featuring RSS
> r277088, r277130, r277143, r277151 & r277262: Misc RSS fixes
>
> The main change is the addition of RSS, which is not supported in stable/10,
> however these commits also include other bug fixes. In order to bring those
> fixes in and facilitate easier merge of future updates the RSS changes are
> maintained but left disabled by the removal of the opt_rss.h include.
>
> Sponsored by: Multiplay
>
> Modified:
> stable/10/sys/dev/ixl/if_ixl.c
> stable/10/sys/dev/ixl/if_ixlv.c
> stable/10/sys/dev/ixl/ixl.h
> stable/10/sys/dev/ixl/ixl_txrx.c
> Directory Properties:
> stable/10/ (props changed)
>
> Modified: stable/10/sys/dev/ixl/if_ixl.c
> ==============================================================================
> --- stable/10/sys/dev/ixl/if_ixl.c Fri Dec 11 11:08:00 2015 (r292093)
> +++ stable/10/sys/dev/ixl/if_ixl.c Fri Dec 11 12:16:05 2015 (r292094)
> @@ -37,10 +37,14 @@
> #include "ixl.h"
> #include "ixl_pf.h"
>
> +#ifdef RSS
> +#include <net/rss_config.h>
> +#endif
> +
> /*********************************************************************
> * Driver version
> *********************************************************************/
> -char ixl_driver_version[] = "1.2.8";
> +char ixl_driver_version[] = "1.3.1";
>
> /*********************************************************************
> * PCI Device ID Table
> @@ -174,7 +178,7 @@ static void ixl_stat_update48(struct i40
> static void ixl_stat_update32(struct i40e_hw *, u32, bool,
> u64 *, u64 *);
>
> -#ifdef IXL_DEBUG
> +#ifdef IXL_DEBUG_SYSCTL
> static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS);
> static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS);
> static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS);
> @@ -427,7 +431,7 @@ ixl_attach(device_t dev)
> OID_AUTO, "dynamic_tx_itr", CTLFLAG_RW,
> &ixl_dynamic_tx_itr, 0, "Dynamic TX ITR");
>
> -#ifdef IXL_DEBUG
> +#ifdef IXL_DEBUG_SYSCTL
> SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
> SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
> OID_AUTO, "link_status", CTLTYPE_STRING | CTLFLAG_RD,
> @@ -662,8 +666,9 @@ ixl_attach(device_t dev)
>
> /* Reset port's advertised speeds */
> if (!i40e_is_40G_device(hw->device_id)) {
> - pf->advertised_speed = 0x7;
> - ixl_set_advertised_speeds(pf, 0x7);
> + pf->advertised_speed =
> + (hw->device_id == I40E_DEV_ID_10G_BASE_T) ? 0x7 : 0x6;
> + ixl_set_advertised_speeds(pf, pf->advertised_speed);
> }
>
> /* Register for VLAN events */
> @@ -1407,6 +1412,12 @@ ixl_media_status(struct ifnet * ifp, str
> case I40E_PHY_TYPE_10GBASE_SFPP_CU:
> ifmr->ifm_active |= IFM_10G_TWINAX;
> break;
> + case I40E_PHY_TYPE_10GBASE_KR:
> + /*
> + ** this is not technically correct
> + ** but FreeBSD does not have the media
> + ** type defined yet, so its a compromise.
> + */
> case I40E_PHY_TYPE_10GBASE_SR:
> ifmr->ifm_active |= IFM_10G_SR;
> break;
> @@ -1721,8 +1732,10 @@ ixl_local_timer(void *arg)
> vsi->active_queues |= ((u64)1 << que->me);
> }
> if (que->busy >= IXL_MAX_TX_BUSY) {
> +#ifdef IXL_DEBUG
> device_printf(dev,"Warning queue %d "
> "appears to be hung!\n", i);
> +#endif
> que->busy = IXL_QUEUE_HUNG;
> ++hung;
> }
> @@ -1765,6 +1778,15 @@ ixl_update_link_status(struct ixl_pf *pf
> "Full Duplex", ixl_fc_string[fc]);
> }
> vsi->link_active = TRUE;
> + /*
> + ** Warn user if link speed on NPAR enabled
> + ** partition is not at least 10GB
> + */
> + if (hw->func_caps.npar_enable &&
> + (hw->phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
> + hw->phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
> + device_printf(dev, "The partition detected link"
> + "speed that is less than 10Gbps\n");
> if_link_state_change(ifp, LINK_STATE_UP);
> }
> } else { /* Link down */
> @@ -1901,6 +1923,7 @@ ixl_assign_vsi_msix(struct ixl_pf *pf)
>
> /* Now set up the stations */
> for (int i = 0; i < vsi->num_queues; i++, vector++, que++) {
> + int cpu_id = i;
> rid = vector + 1;
> txr = &que->txr;
> que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
> @@ -1921,14 +1944,23 @@ ixl_assign_vsi_msix(struct ixl_pf *pf)
> }
> bus_describe_intr(dev, que->res, que->tag, "q%d", i);
> /* Bind the vector to a CPU */
> - bus_bind_intr(dev, que->res, i);
> +#ifdef RSS
> + cpu_id = rss_getcpu(i % rss_getnumbuckets());
> +#endif
> + bus_bind_intr(dev, que->res, cpu_id);
> que->msix = vector;
> TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que);
> TASK_INIT(&que->task, 0, ixl_handle_que, que);
> que->tq = taskqueue_create_fast("ixl_que", M_NOWAIT,
> taskqueue_thread_enqueue, &que->tq);
> - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
> - device_get_nameunit(pf->dev));
> +#ifdef RSS
> + taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
> + cpu_id, "%s (bucket %d)",
> + device_get_nameunit(dev), cpu_id);
> +#else
> + taskqueue_start_threads(&que->tq, 1, PI_NET,
> + "%s que", device_get_nameunit(dev));
> +#endif
> }
>
> return (0);
> @@ -1995,6 +2027,12 @@ ixl_init_msix(struct ixl_pf *pf)
> if ((ixl_max_queues != 0) && (ixl_max_queues <= queues))
> queues = ixl_max_queues;
>
> +#ifdef RSS
> + /* If we're doing RSS, clamp at the number of RSS buckets */
> + if (queues > rss_getnumbuckets())
> + queues = rss_getnumbuckets();
> +#endif
> +
> /*
> ** Want one vector (RX/TX pair) per queue
> ** plus an additional for the admin queue.
> @@ -2015,6 +2053,25 @@ ixl_init_msix(struct ixl_pf *pf)
> "Using MSIX interrupts with %d vectors\n", vectors);
> pf->msix = vectors;
> pf->vsi.num_queues = queues;
> +#ifdef RSS
> + /*
> + * If we're doing RSS, the number of queues needs to
> + * match the number of RSS buckets that are configured.
> + *
> + * + If there's more queues than RSS buckets, we'll end
> + * up with queues that get no traffic.
> + *
> + * + If there's more RSS buckets than queues, we'll end
> + * up having multiple RSS buckets map to the same queue,
> + * so there'll be some contention.
> + */
> + if (queues != rss_getnumbuckets()) {
> + device_printf(dev,
> + "%s: queues (%d) != RSS buckets (%d)"
> + "; performance will be impacted.\n",
> + __func__, queues, rss_getnumbuckets());
> + }
> +#endif
> return (vectors);
> }
> msi:
> @@ -2383,7 +2440,8 @@ ixl_setup_interface(device_t dev, struct
> if (aq_error == I40E_ERR_UNKNOWN_PHY) {
> /* Need delay to detect fiber correctly */
> i40e_msec_delay(200);
> - aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities_resp, NULL);
> + aq_error = i40e_aq_get_phy_capabilities(hw, FALSE,
> + TRUE, &abilities_resp, NULL);
> if (aq_error == I40E_ERR_UNKNOWN_PHY)
> device_printf(dev, "Unknown PHY type detected!\n");
> else
> @@ -3043,7 +3101,6 @@ ixl_add_sysctls_eth_stats(struct sysctl_
> "Multicast Packets Transmitted"},
> {ð_stats->tx_broadcast, "bcast_pkts_txd",
> "Broadcast Packets Transmitted"},
> - {ð_stats->tx_discards, "tx_discards", "Discarded TX packets"},
> // end
> {0,0,0}
> };
> @@ -3126,19 +3183,45 @@ static void ixl_config_rss(struct ixl_vs
> struct ixl_pf *pf = (struct ixl_pf *)vsi->back;
> struct i40e_hw *hw = vsi->hw;
> u32 lut = 0;
> - u64 set_hena, hena;
> - int i, j;
> + u64 set_hena = 0, hena;
> + int i, j, que_id;
> +#ifdef RSS
> + u32 rss_hash_config;
> + u32 rss_seed[IXL_KEYSZ];
> +#else
> + u32 rss_seed[IXL_KEYSZ] = {0x41b01687,
> + 0x183cfd8c, 0xce880440, 0x580cbc3c,
> + 0x35897377, 0x328b25e1, 0x4fa98922,
> + 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1};
> +#endif
>
> - static const u32 seed[I40E_PFQF_HKEY_MAX_INDEX + 1] = {0x41b01687,
> - 0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377,
> - 0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d,
> - 0xcd15a2c1, 0xe8580225, 0x4a1e9d11, 0xfe5731be};
> +#ifdef RSS
> + /* Fetch the configured RSS key */
> + rss_getkey((uint8_t *) &rss_seed);
> +#endif
>
> /* Fill out hash function seed */
> - for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
> - wr32(hw, I40E_PFQF_HKEY(i), seed[i]);
> + for (i = 0; i < IXL_KEYSZ; i++)
> + wr32(hw, I40E_PFQF_HKEY(i), rss_seed[i]);
>
> /* Enable PCTYPES for RSS: */
> +#ifdef RSS
> + rss_hash_config = rss_gethashconfig();
> + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP);
> +#else
> set_hena =
> ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
> ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) |
> @@ -3151,7 +3234,7 @@ static void ixl_config_rss(struct ixl_vs
> ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) |
> ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6) |
> ((u64)1 << I40E_FILTER_PCTYPE_L2_PAYLOAD);
> -
> +#endif
> hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) |
> ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32);
> hena |= set_hena;
> @@ -3162,8 +3245,19 @@ static void ixl_config_rss(struct ixl_vs
> for (i = j = 0; i < pf->hw.func_caps.rss_table_size; i++, j++) {
> if (j == vsi->num_queues)
> j = 0;
> +#ifdef RSS
> + /*
> + * Fetch the RSS bucket id for the given indirection entry.
> + * Cap it at the number of configured buckets (which is
> + * num_queues.)
> + */
> + que_id = rss_get_indirection_to_bucket(i);
> + que_id = que_id % vsi->num_queues;
> +#else
> + que_id = j;
> +#endif
> /* lut = 4-byte sliding window of 4 lut entries */
> - lut = (lut << 8) | (j &
> + lut = (lut << 8) | (que_id &
> ((0x1 << pf->hw.func_caps.rss_table_entry_width) - 1));
> /* On i = 3, we have 4 entries in lut; write to the register */
> if ((i & 3) == 3)
> @@ -3401,7 +3495,7 @@ ixl_add_hw_filters(struct ixl_vsi *vsi,
> a = malloc(sizeof(struct i40e_aqc_add_macvlan_element_data) * cnt,
> M_DEVBUF, M_NOWAIT | M_ZERO);
> if (a == NULL) {
> - device_printf(dev, "add hw filter failed to get memory\n");
> + device_printf(dev, "add_hw_filters failed to get memory\n");
> return;
> }
>
> @@ -3426,8 +3520,8 @@ ixl_add_hw_filters(struct ixl_vsi *vsi,
> if (j > 0) {
> err = i40e_aq_add_macvlan(hw, vsi->seid, a, j, NULL);
> if (err)
> - device_printf(dev, "aq_add_macvlan failure %d\n",
> - hw->aq.asq_last_status);
> + device_printf(dev, "aq_add_macvlan err %d, aq_error %d\n",
> + err, hw->aq.asq_last_status);
> else
> vsi->hw_filters_add += j;
> }
> @@ -3476,6 +3570,7 @@ ixl_del_hw_filters(struct ixl_vsi *vsi,
> err = i40e_aq_remove_macvlan(hw, vsi->seid, d, j, NULL);
> /* NOTE: returns ENOENT every time but seems to work fine,
> so we'll ignore that specific error. */
> + // TODO: Does this still occur on current firmwares?
> if (err && hw->aq.asq_last_status != I40E_AQ_RC_ENOENT) {
> int sc = 0;
> for (int i = 0; i < j; i++)
> @@ -3828,29 +3923,6 @@ ixl_update_stats_counters(struct ixl_pf
> pf->stat_offsets_loaded,
> &osd->link_xoff_tx, &nsd->link_xoff_tx);
>
> - /* Priority flow control stats */
> -#if 0
> - for (int i = 0; i < 8; i++) {
> - ixl_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i),
> - pf->stat_offsets_loaded,
> - &osd->priority_xon_rx[i],
> - &nsd->priority_xon_rx[i]);
> - ixl_stat_update32(hw, I40E_GLPRT_PXONTXC(hw->port, i),
> - pf->stat_offsets_loaded,
> - &osd->priority_xon_tx[i],
> - &nsd->priority_xon_tx[i]);
> - ixl_stat_update32(hw, I40E_GLPRT_PXOFFTXC(hw->port, i),
> - pf->stat_offsets_loaded,
> - &osd->priority_xoff_tx[i],
> - &nsd->priority_xoff_tx[i]);
> - ixl_stat_update32(hw,
> - I40E_GLPRT_RXON2OFFCNT(hw->port, i),
> - pf->stat_offsets_loaded,
> - &osd->priority_xon_2_xoff[i],
> - &nsd->priority_xon_2_xoff[i]);
> - }
> -#endif
> -
> /* Packet size stats rx */
> ixl_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port),
> I40E_GLPRT_PRC64L(hw->port),
> @@ -4377,6 +4449,15 @@ ixl_set_advertised_speeds(struct ixl_pf
> return (EAGAIN);
> }
>
> + /*
> + ** This seems a bit heavy handed, but we
> + ** need to get a reinit on some devices
> + */
> + IXL_PF_LOCK(pf);
> + ixl_stop(pf);
> + ixl_init_locked(pf);
> + IXL_PF_UNLOCK(pf);
> +
> return (0);
> }
>
> @@ -4521,7 +4602,7 @@ ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS)
> }
>
>
> -#ifdef IXL_DEBUG
> +#ifdef IXL_DEBUG_SYSCTL
> static int
> ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS)
> {
> @@ -4630,6 +4711,16 @@ ixl_sysctl_sw_filter_list(SYSCTL_HANDLER
>
> #define IXL_SW_RES_SIZE 0x14
> static int
> +ixl_res_alloc_cmp(const void *a, const void *b)
> +{
> + const struct i40e_aqc_switch_resource_alloc_element_resp *one, *two;
> + one = (struct i40e_aqc_switch_resource_alloc_element_resp *)a;
> + two = (struct i40e_aqc_switch_resource_alloc_element_resp *)b;
> +
> + return ((int)one->resource_type - (int)two->resource_type);
> +}
> +
> +static int
> ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS)
> {
> struct ixl_pf *pf = (struct ixl_pf *)arg1;
> @@ -4647,6 +4738,7 @@ ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_A
> return (ENOMEM);
> }
>
> + bzero(resp, sizeof(resp));
> error = i40e_aq_get_switch_resource_alloc(hw, &num_entries,
> resp,
> IXL_SW_RES_SIZE,
> @@ -4657,9 +4749,14 @@ ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_A
> sbuf_delete(buf);
> return error;
> }
> - device_printf(dev, "Num_entries: %d\n", num_entries);
> +
> + /* Sort entries by type for display */
> + qsort(resp, num_entries,
> + sizeof(struct i40e_aqc_switch_resource_alloc_element_resp),
> + &ixl_res_alloc_cmp);
>
> sbuf_cat(buf, "\n");
> + sbuf_printf(buf, "# of entries: %d\n", num_entries);
> sbuf_printf(buf,
> "Type | Guaranteed | Total | Used | Un-allocated\n"
> " | (this) | (all) | (this) | (all) \n");
> @@ -4847,5 +4944,5 @@ ixl_sysctl_dump_txd(SYSCTL_HANDLER_ARGS)
> sbuf_delete(buf);
> return error;
> }
> -#endif
> +#endif /* IXL_DEBUG_SYSCTL */
>
>
> Modified: stable/10/sys/dev/ixl/if_ixlv.c
> ==============================================================================
> --- stable/10/sys/dev/ixl/if_ixlv.c Fri Dec 11 11:08:00 2015 (r292093)
> +++ stable/10/sys/dev/ixl/if_ixlv.c Fri Dec 11 12:16:05 2015 (r292094)
> @@ -37,10 +37,14 @@
> #include "ixl.h"
> #include "ixlv.h"
>
> +#ifdef RSS
> +#include <net/rss_config.h>
> +#endif
> +
> /*********************************************************************
> * Driver version
> *********************************************************************/
> -char ixlv_driver_version[] = "1.1.18";
> +char ixlv_driver_version[] = "1.2.1";
>
> /*********************************************************************
> * PCI Device ID Table
> @@ -1161,7 +1165,11 @@ ixlv_init_msix(struct ixlv_sc *sc)
> /* Override with hardcoded value if sane */
> if ((ixlv_max_queues != 0) && (ixlv_max_queues <= queues))
> queues = ixlv_max_queues;
> -
> +#ifdef RSS
> + /* If we're doing RSS, clamp at the number of RSS buckets */
> + if (queues > rss_getnumbuckets())
> + queues = rss_getnumbuckets();
> +#endif
> /* Enforce the VF max value */
> if (queues > IXLV_MAX_QUEUES)
> queues = IXLV_MAX_QUEUES;
> @@ -1181,6 +1189,26 @@ ixlv_init_msix(struct ixlv_sc *sc)
> goto fail;
> }
>
> +#ifdef RSS
> + /*
> + * If we're doing RSS, the number of queues needs to
> + * match the number of RSS buckets that are configured.
> + *
> + * + If there's more queues than RSS buckets, we'll end
> + * up with queues that get no traffic.
> + *
> + * + If there's more RSS buckets than queues, we'll end
> + * up having multiple RSS buckets map to the same queue,
> + * so there'll be some contention.
> + */
> + if (queues != rss_getnumbuckets()) {
> + device_printf(dev,
> + "%s: queues (%d) != RSS buckets (%d)"
> + "; performance will be impacted.\n",
> + __func__, queues, rss_getnumbuckets());
> + }
> +#endif
> +
> if (pci_alloc_msix(dev, &vectors) == 0) {
> device_printf(sc->dev,
> "Using MSIX interrupts with %d vectors\n", vectors);
> @@ -1352,6 +1380,7 @@ ixlv_assign_msix(struct ixlv_sc *sc)
> int error, rid, vector = 1;
>
> for (int i = 0; i < vsi->num_queues; i++, vector++, que++) {
> + int cpu_id = i;
> rid = vector + 1;
> txr = &que->txr;
> que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
> @@ -1372,15 +1401,25 @@ ixlv_assign_msix(struct ixlv_sc *sc)
> }
> bus_describe_intr(dev, que->res, que->tag, "que %d", i);
> /* Bind the vector to a CPU */
> - bus_bind_intr(dev, que->res, i);
> +#ifdef RSS
> + cpu_id = rss_getcpu(i % rss_getnumbuckets());
> +#endif
> + bus_bind_intr(dev, que->res, cpu_id);
> que->msix = vector;
> vsi->que_mask |= (u64)(1 << que->msix);
> TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que);
> TASK_INIT(&que->task, 0, ixlv_handle_que, que);
> que->tq = taskqueue_create_fast("ixlv_que", M_NOWAIT,
> taskqueue_thread_enqueue, &que->tq);
> - taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que",
> - device_get_nameunit(sc->dev));
> +#ifdef RSS
> + taskqueue_start_threads_pinned(&que->tq, 1, PI_NET,
> + cpu_id, "%s (bucket %d)",
> + device_get_nameunit(dev), cpu_id);
> +#else
> + taskqueue_start_threads(&que->tq, 1, PI_NET,
> + "%s que", device_get_nameunit(dev));
> +#endif
> +
> }
>
> return (0);
> @@ -2521,16 +2560,18 @@ ixlv_config_rss(struct ixlv_sc *sc)
> struct i40e_hw *hw = &sc->hw;
> struct ixl_vsi *vsi = &sc->vsi;
> u32 lut = 0;
> - u64 set_hena, hena;
> - int i, j;
> -
> - /* set up random bits */
> - static const u32 seed[I40E_VFQF_HKEY_MAX_INDEX + 1] = {
> - 0x794221b4, 0xbca0c5ab, 0x6cd5ebd9, 0x1ada6127,
> - 0x983b3aa1, 0x1c4e71eb, 0x7f6328b2, 0xfcdc0da0,
> - 0xc135cafa, 0x7a6f7e2d, 0xe7102d28, 0x163cd12e,
> - 0x4954b126 };
> -
> + u64 set_hena = 0, hena;
> + int i, j, que_id;
> +#ifdef RSS
> + u32 rss_hash_config;
> + u32 rss_seed[IXL_KEYSZ];
> +#else
> + u32 rss_seed[IXL_KEYSZ] = {0x41b01687,
> + 0x183cfd8c, 0xce880440, 0x580cbc3c,
> + 0x35897377, 0x328b25e1, 0x4fa98922,
> + 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1};
> +#endif
> +
> /* Don't set up RSS if using a single queue */
> if (vsi->num_queues == 1) {
> wr32(hw, I40E_VFQF_HENA(0), 0);
> @@ -2539,11 +2580,32 @@ ixlv_config_rss(struct ixlv_sc *sc)
> return;
> }
>
> +#ifdef RSS
> + /* Fetch the configured RSS key */
> + rss_getkey((uint8_t *) &rss_seed);
> +#endif
> /* Fill out hash function seed */
> - for (i = 0; i <= I40E_VFQF_HKEY_MAX_INDEX; i++)
> - wr32(hw, I40E_VFQF_HKEY(i), seed[i]);
> + for (i = 0; i <= IXL_KEYSZ; i++)
> + wr32(hw, I40E_VFQF_HKEY(i), rss_seed[i]);
>
> /* Enable PCTYPES for RSS: */
> +#ifdef RSS
> + rss_hash_config = rss_gethashconfig();
> + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
> + if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6)
> + set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP);
> +#else
> set_hena =
> ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
> ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) |
> @@ -2556,7 +2618,7 @@ ixlv_config_rss(struct ixlv_sc *sc)
> ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) |
> ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6) |
> ((u64)1 << I40E_FILTER_PCTYPE_L2_PAYLOAD);
> -
> +#endif
> hena = (u64)rd32(hw, I40E_VFQF_HENA(0)) |
> ((u64)rd32(hw, I40E_VFQF_HENA(1)) << 32);
> hena |= set_hena;
> @@ -2564,16 +2626,26 @@ ixlv_config_rss(struct ixlv_sc *sc)
> wr32(hw, I40E_VFQF_HENA(1), (u32)(hena >> 32));
>
> /* Populate the LUT with max no. of queues in round robin fashion */
> - for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; j++) {
> + for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++, j++) {
> if (j == vsi->num_queues)
> j = 0;
> +#ifdef RSS
> + /*
> + * Fetch the RSS bucket id for the given indirection entry.
> + * Cap it at the number of configured buckets (which is
> + * num_queues.)
> + */
> + que_id = rss_get_indirection_to_bucket(i);
> + que_id = que_id % vsi->num_queues;
> +#else
> + que_id = j;
> +#endif
> /* lut = 4-byte sliding window of 4 lut entries */
> - lut = (lut << 8) | (j & 0xF);
> + lut = (lut << 8) | (que_id & 0xF);
> /* On i = 3, we have 4 entries in lut; write to the register */
> - if ((j & 3) == 3) {
> + if ((i & 3) == 3) {
> wr32(hw, I40E_VFQF_HLUT(i), lut);
> DDPRINTF(sc->dev, "HLUT(%2d): %#010x", i, lut);
> - i++;
> }
> }
> ixl_flush(hw);
>
> Modified: stable/10/sys/dev/ixl/ixl.h
> ==============================================================================
> --- stable/10/sys/dev/ixl/ixl.h Fri Dec 11 11:08:00 2015 (r292093)
> +++ stable/10/sys/dev/ixl/ixl.h Fri Dec 11 12:16:05 2015 (r292094)
> @@ -93,7 +93,7 @@
> #include "i40e_type.h"
> #include "i40e_prototype.h"
>
> -#ifdef IXL_DEBUG
> +#if defined(IXL_DEBUG) || defined(IXL_DEBUG_SYSCTL)
> #include <sys/sbuf.h>
>
> #define MAC_FORMAT "%02x:%02x:%02x:%02x:%02x:%02x"
> @@ -101,7 +101,13 @@
> (mac_addr)[0], (mac_addr)[1], (mac_addr)[2], (mac_addr)[3], \
> (mac_addr)[4], (mac_addr)[5]
> #define ON_OFF_STR(is_set) ((is_set) ? "On" : "Off")
> +#endif /* IXL_DEBUG || IXL_DEBUG_SYSCTL */
>
> +#ifdef IXL_DEBUG
> +/* Enable debug sysctls */
> +#ifndef IXL_DEBUG_SYSCTL
> +#define IXL_DEBUG_SYSCTL 1
> +#endif
>
> #define _DBG_PRINTF(S, ...) printf("%s: " S "\n", __func__, ##__VA_ARGS__)
> #define _DEV_DBG_PRINTF(dev, S, ...) device_printf(dev, "%s: " S "\n", __func__, ##__VA_ARGS__)
> @@ -128,7 +134,7 @@
>
> #define HW_DEBUGOUT(...) if (DEBUG_HW) _DBG_PRINTF(__VA_ARGS__)
>
> -#else
> +#else /* no IXL_DEBUG */
> #define DEBUG_INIT 0
> #define DEBUG_IOCTL 0
> #define DEBUG_HW 0
> @@ -144,7 +150,7 @@
> #define IOCTL_DBG_IF2(...)
> #define IOCTL_DBG_IF(...)
> #define HW_DEBUGOUT(...)
> -#endif
> +#endif /* IXL_DEBUG */
>
> /* Tunables */
>
> @@ -214,6 +220,7 @@
> #define IXL_MAX_TSO_SEGS 66
> #define IXL_SPARSE_CHAIN 6
> #define IXL_QUEUE_HUNG 0x80000000
> +#define IXL_KEYSZ 10
>
> /* ERJ: hardware can support ~1.5k filters between all functions */
> #define IXL_MAX_FILTERS 256
>
> Modified: stable/10/sys/dev/ixl/ixl_txrx.c
> ==============================================================================
> --- stable/10/sys/dev/ixl/ixl_txrx.c Fri Dec 11 11:08:00 2015 (r292093)
> +++ stable/10/sys/dev/ixl/ixl_txrx.c Fri Dec 11 12:16:05 2015 (r292094)
> @@ -42,6 +42,10 @@
> #include "opt_inet6.h"
> #include "ixl.h"
>
> +#ifdef RSS
> +#include <net/rss_config.h>
> +#endif
> +
> /* Local Prototypes */
> static void ixl_rx_checksum(struct mbuf *, u32, u32, u8);
> static void ixl_refresh_mbufs(struct ixl_queue *, int);
> @@ -65,14 +69,33 @@ ixl_mq_start(struct ifnet *ifp, struct m
> struct ixl_queue *que;
> struct tx_ring *txr;
> int err, i;
> +#ifdef RSS
> + u32 bucket_id;
> +#endif
>
> - /* check if flowid is set */
> - if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
> - i = m->m_pkthdr.flowid % vsi->num_queues;
> - else
> + /*
> + ** Which queue to use:
> + **
> + ** When doing RSS, map it to the same outbound
> + ** queue as the incoming flow would be mapped to.
> + ** If everything is setup correctly, it should be
> + ** the same bucket that the current CPU we're on is.
> + */
> + if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
> +#ifdef RSS
> + if (rss_hash2bucket(m->m_pkthdr.flowid,
> + M_HASHTYPE_GET(m), &bucket_id) == 0) {
> + i = bucket_id % vsi->num_queues;
> + } else
> +#endif
> + i = m->m_pkthdr.flowid % vsi->num_queues;
> + } else
> i = curcpu % vsi->num_queues;
> -
> - /* Check for a hung queue and pick alternative */
> + /*
> + ** This may not be perfect, but until something
> + ** better comes along it will keep from scheduling
> + ** on stalled queues.
> + */
> if (((1 << i) & vsi->active_queues) == 0)
> i = ffsl(vsi->active_queues);
>
> @@ -1089,8 +1112,8 @@ int
> ixl_init_rx_ring(struct ixl_queue *que)
> {
> struct rx_ring *rxr = &que->rxr;
> -#if defined(INET6) || defined(INET)
> struct ixl_vsi *vsi = que->vsi;
> +#if defined(INET6) || defined(INET)
> struct ifnet *ifp = vsi->ifp;
> struct lro_ctrl *lro = &rxr->lro;
> #endif
> @@ -1345,6 +1368,63 @@ ixl_rx_discard(struct rx_ring *rxr, int
> return;
> }
>
> +#ifdef RSS
> +/*
> +** ixl_ptype_to_hash: parse the packet type
> +** to determine the appropriate hash.
> +*/
> +static inline int
> +ixl_ptype_to_hash(u8 ptype)
> +{
> + struct i40e_rx_ptype_decoded decoded;
> + u8 ex = 0;
> +
> + decoded = decode_rx_desc_ptype(ptype);
> + ex = decoded.outer_frag;
> +
> + if (!decoded.known)
> + return M_HASHTYPE_OPAQUE;
> +
> + if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
> + return M_HASHTYPE_OPAQUE;
> +
> + /* Note: anything that gets to this point is IP */
> + if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
> + switch (decoded.inner_prot) {
> + case I40E_RX_PTYPE_INNER_PROT_TCP:
> + if (ex)
> + return M_HASHTYPE_RSS_TCP_IPV6_EX;
> + else
> + return M_HASHTYPE_RSS_TCP_IPV6;
> + case I40E_RX_PTYPE_INNER_PROT_UDP:
> + if (ex)
> + return M_HASHTYPE_RSS_UDP_IPV6_EX;
> + else
> + return M_HASHTYPE_RSS_UDP_IPV6;
> + default:
> + if (ex)
> + return M_HASHTYPE_RSS_IPV6_EX;
> + else
> + return M_HASHTYPE_RSS_IPV6;
> + }
> + }
> + if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
> + switch (decoded.inner_prot) {
> + case I40E_RX_PTYPE_INNER_PROT_TCP:
> + return M_HASHTYPE_RSS_TCP_IPV4;
> + case I40E_RX_PTYPE_INNER_PROT_UDP:
> + if (ex)
> + return M_HASHTYPE_RSS_UDP_IPV4_EX;
> + else
> + return M_HASHTYPE_RSS_UDP_IPV4;
> + default:
> + return M_HASHTYPE_RSS_IPV4;
> + }
> + }
> + /* We should never get here!! */
> + return M_HASHTYPE_OPAQUE;
> +}
> +#endif /* RSS */
>
> /*********************************************************************
> *
> @@ -1542,8 +1622,14 @@ ixl_rxeof(struct ixl_queue *que, int cou
> rxr->bytes += sendmp->m_pkthdr.len;
> if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
> ixl_rx_checksum(sendmp, status, error, ptype);
> +#ifdef RSS
> + sendmp->m_pkthdr.flowid =
> + le32toh(cur->wb.qword0.hi_dword.rss);
> + M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
> +#else
> sendmp->m_pkthdr.flowid = que->msix;
> M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
> +#endif
> }
> next_desc:
> bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
>
More information about the svn-src-stable-10
mailing list