svn commit: r366442 - in stable/12: share/man/man4 sys/dev/cxgbe sys/dev/cxgbe/common sys/dev/cxgbe/firmware
Navdeep Parhar
np at FreeBSD.org
Mon Oct 5 08:51:05 UTC 2020
Author: np
Date: Mon Oct 5 08:51:03 2020
New Revision: 366442
URL: https://svnweb.freebsd.org/changeset/base/366442
Log:
MFC r365871. This needs a couple other revisions which aren't in
stable/12 yet to actually work but is being committed out of order to
ease other cxgbe MFCs.
r365871:
cxgbe(4): add support for stateless offloads for VXLAN traffic.
Hardware assistance includes checksumming (tx and rx), TSO, and RSS on
the inner traffic in a VXLAN tunnel.
Sponsored by: Chelsio Communications
Modified:
stable/12/share/man/man4/cxgbe.4
stable/12/sys/dev/cxgbe/adapter.h
stable/12/sys/dev/cxgbe/common/common.h
stable/12/sys/dev/cxgbe/common/t4_hw.c
stable/12/sys/dev/cxgbe/firmware/t6fw_cfg.txt
stable/12/sys/dev/cxgbe/t4_main.c
stable/12/sys/dev/cxgbe/t4_sge.c
Directory Properties:
stable/12/ (props changed)
Modified: stable/12/share/man/man4/cxgbe.4
==============================================================================
--- stable/12/share/man/man4/cxgbe.4 Mon Oct 5 07:26:06 2020 (r366441)
+++ stable/12/share/man/man4/cxgbe.4 Mon Oct 5 08:51:03 2020 (r366442)
@@ -31,7 +31,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd Dec 10, 2019
+.Dd September 17, 2020
.Dt CXGBE 4
.Os
.Sh NAME
@@ -61,8 +61,8 @@ driver provides support for PCI Express Ethernet adapt
the Chelsio Terminator 4, Terminator 5, and Terminator 6 ASICs (T4, T5, and T6).
The driver supports Jumbo Frames, Transmit/Receive checksum offload,
TCP segmentation offload (TSO), Large Receive Offload (LRO), VLAN
-tag insertion/extraction, VLAN checksum offload, VLAN TSO, and
-Receive Side Steering (RSS).
+tag insertion/extraction, VLAN checksum offload, VLAN TSO, VXLAN checksum
+offload, VXLAN TSO, and Receive Side Steering (RSS).
For further hardware information and questions related to hardware
requirements, see
.Pa http://www.chelsio.com/ .
Modified: stable/12/sys/dev/cxgbe/adapter.h
==============================================================================
--- stable/12/sys/dev/cxgbe/adapter.h Mon Oct 5 07:26:06 2020 (r366441)
+++ stable/12/sys/dev/cxgbe/adapter.h Mon Oct 5 08:51:03 2020 (r366442)
@@ -121,6 +121,7 @@ enum {
TX_SGL_SEGS = 39,
TX_SGL_SEGS_TSO = 38,
TX_SGL_SEGS_EO_TSO = 30, /* XXX: lower for IPv6. */
+ TX_SGL_SEGS_VXLAN_TSO = 37,
TX_WR_FLITS = SGE_MAX_WR_LEN / 8
};
@@ -286,6 +287,7 @@ struct port_info {
int nvi;
int up_vis;
int uld_vis;
+ bool vxlan_tcam_entry;
struct tx_sched_params *sched_params;
@@ -593,6 +595,8 @@ struct sge_txq {
uint64_t txpkts0_pkts; /* # of frames in type0 coalesced tx WRs */
uint64_t txpkts1_pkts; /* # of frames in type1 coalesced tx WRs */
uint64_t raw_wrs; /* # of raw work requests (alloc_wr_mbuf) */
+ uint64_t vxlan_tso_wrs; /* # of VXLAN TSO work requests */
+ uint64_t vxlan_txcsum;
/* stats for not-that-common events */
} __aligned(CACHE_LINE_SIZE);
@@ -611,6 +615,7 @@ struct sge_rxq {
uint64_t rxcsum; /* # of times hardware assisted with checksum */
uint64_t vlan_extraction;/* # of times VLAN tag was extracted */
+ uint64_t vxlan_rxcsum;
/* stats for not-that-common events */
@@ -833,6 +838,11 @@ struct adapter {
struct sge sge;
int lro_timeout;
int sc_do_rxcopy;
+
+ int vxlan_port;
+ u_int vxlan_refcount;
+ int rawf_base;
+ int nrawf;
struct taskqueue *tq[MAX_NCHAN]; /* General purpose taskqueues */
struct port_info *port[MAX_NPORTS];
Modified: stable/12/sys/dev/cxgbe/common/common.h
==============================================================================
--- stable/12/sys/dev/cxgbe/common/common.h Mon Oct 5 07:26:06 2020 (r366441)
+++ stable/12/sys/dev/cxgbe/common/common.h Mon Oct 5 08:51:03 2020 (r366442)
@@ -249,7 +249,7 @@ struct tp_params {
uint32_t max_rx_pdu;
uint32_t max_tx_pdu;
uint64_t hash_filter_mask;
- __be16 err_vec_mask;
+ bool rx_pkt_encap;
int8_t fcoe_shift;
int8_t port_shift;
Modified: stable/12/sys/dev/cxgbe/common/t4_hw.c
==============================================================================
--- stable/12/sys/dev/cxgbe/common/t4_hw.c Mon Oct 5 07:26:06 2020 (r366441)
+++ stable/12/sys/dev/cxgbe/common/t4_hw.c Mon Oct 5 08:51:03 2020 (r366442)
@@ -9627,19 +9627,11 @@ int t4_init_tp_params(struct adapter *adap, bool sleep
read_filter_mode_and_ingress_config(adap, sleep_ok);
- /*
- * Cache a mask of the bits that represent the error vector portion of
- * rx_pkt.err_vec. T6+ can use a compressed error vector to make room
- * for information about outer encapsulation (GENEVE/VXLAN/NVGRE).
- */
- tpp->err_vec_mask = htobe16(0xffff);
if (chip_id(adap) > CHELSIO_T5) {
v = t4_read_reg(adap, A_TP_OUT_CONFIG);
- if (v & F_CRXPKTENC) {
- tpp->err_vec_mask =
- htobe16(V_T6_COMPR_RXERR_VEC(M_T6_COMPR_RXERR_VEC));
- }
- }
+ tpp->rx_pkt_encap = v & F_CRXPKTENC;
+ } else
+ tpp->rx_pkt_encap = false;
rx_len = t4_read_reg(adap, A_TP_PMM_RX_PAGE_SIZE);
tx_len = t4_read_reg(adap, A_TP_PMM_TX_PAGE_SIZE);
Modified: stable/12/sys/dev/cxgbe/firmware/t6fw_cfg.txt
==============================================================================
--- stable/12/sys/dev/cxgbe/firmware/t6fw_cfg.txt Mon Oct 5 07:26:06 2020 (r366441)
+++ stable/12/sys/dev/cxgbe/firmware/t6fw_cfg.txt Mon Oct 5 08:51:03 2020 (r366442)
@@ -146,7 +146,8 @@
nethctrl = 1024
neq = 2048
nqpcq = 8192
- nexactf = 456
+ nexactf = 454
+ nrawf = 2
cmask = all
pmask = all
ncrypto_lookaside = 16
@@ -272,7 +273,7 @@
[fini]
version = 0x1
- checksum = 0x13640470
+ checksum = 0xa92352a8
#
# $FreeBSD$
#
Modified: stable/12/sys/dev/cxgbe/t4_main.c
==============================================================================
--- stable/12/sys/dev/cxgbe/t4_main.c Mon Oct 5 07:26:06 2020 (r366441)
+++ stable/12/sys/dev/cxgbe/t4_main.c Mon Oct 5 08:51:03 2020 (r366442)
@@ -41,9 +41,11 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/kernel.h>
#include <sys/bus.h>
+#include <sys/eventhandler.h>
#include <sys/module.h>
#include <sys/malloc.h>
#include <sys/queue.h>
+#include <sys/refcount.h>
#include <sys/taskqueue.h>
#include <sys/pciio.h>
#include <dev/pci/pcireg.h>
@@ -1021,6 +1023,8 @@ t4_attach(device_t dev)
sc->policy = NULL;
rw_init(&sc->policy_lock, "connection offload policy");
+ refcount_init(&sc->vxlan_refcount, 0);
+
rc = t4_map_bars_0_and_4(sc);
if (rc != 0)
goto done; /* error message displayed already */
@@ -1661,6 +1665,7 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
{
struct ifnet *ifp;
struct sbuf *sb;
+ struct adapter *sc = vi->adapter;
vi->xact_addr_filt = -1;
callout_init(&vi->tick, 1);
@@ -1691,23 +1696,31 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
ifp->if_capabilities = T4_CAP;
ifp->if_capenable = T4_CAP_ENABLE;
+ ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
+ CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
+ if (chip_id(sc) >= CHELSIO_T6) {
+ ifp->if_capabilities |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
+ ifp->if_capenable |= IFCAP_VXLAN_HWCSUM | IFCAP_VXLAN_HWTSO;
+ ifp->if_hwassist |= CSUM_INNER_IP6_UDP | CSUM_INNER_IP6_TCP |
+ CSUM_INNER_IP6_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP |
+ CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_ENCAP_VXLAN;
+ }
+
#ifdef TCP_OFFLOAD
if (vi->nofldrxq != 0)
ifp->if_capabilities |= IFCAP_TOE;
#endif
#ifdef RATELIMIT
- if (is_ethoffload(vi->adapter) && vi->nofldtxq != 0) {
+ if (is_ethoffload(sc) && vi->nofldtxq != 0) {
ifp->if_capabilities |= IFCAP_TXRTLMT;
ifp->if_capenable |= IFCAP_TXRTLMT;
}
#endif
- ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
- CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
ifp->if_hw_tsomax = IP_MAXPACKET;
ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
#ifdef RATELIMIT
- if (is_ethoffload(vi->adapter) && vi->nofldtxq != 0)
+ if (is_ethoffload(sc) && vi->nofldtxq != 0)
ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
#endif
ifp->if_hw_tsomaxsegsize = 65536;
@@ -2021,6 +2034,18 @@ cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, cadd
if (mask & IFCAP_NOMAP)
ifp->if_capenable ^= IFCAP_NOMAP;
+ if (mask & IFCAP_VXLAN_HWCSUM) {
+ ifp->if_capenable ^= IFCAP_VXLAN_HWCSUM;
+ ifp->if_hwassist ^= CSUM_INNER_IP6_UDP |
+ CSUM_INNER_IP6_TCP | CSUM_INNER_IP |
+ CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP;
+ }
+ if (mask & IFCAP_VXLAN_HWTSO) {
+ ifp->if_capenable ^= IFCAP_VXLAN_HWTSO;
+ ifp->if_hwassist ^= CSUM_INNER_IP6_TSO |
+ CSUM_INNER_IP_TSO;
+ }
+
#ifdef VLAN_CAPABILITIES
VLAN_CAPABILITIES(ifp);
#endif
@@ -4231,6 +4256,19 @@ get_params__post_init(struct adapter *sc)
MPASS(sc->tids.hpftid_base == 0);
MPASS(sc->tids.tid_base == sc->tids.nhpftids);
}
+
+ param[0] = FW_PARAM_PFVF(RAWF_START);
+ param[1] = FW_PARAM_PFVF(RAWF_END);
+ rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val);
+ if (rc != 0) {
+ device_printf(sc->dev,
+ "failed to query rawf parameters: %d.\n", rc);
+ return (rc);
+ }
+ if ((int)val[1] > (int)val[0]) {
+ sc->rawf_base = val[0];
+ sc->nrawf = val[1] - val[0] + 1;
+ }
}
/*
@@ -4856,6 +4894,7 @@ update_mac_settings(struct ifnet *ifp, int flags)
struct port_info *pi = vi->pi;
struct adapter *sc = pi->adapter;
int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1;
+ uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
ASSERT_SYNCHRONIZED_OP(sc);
KASSERT(flags, ("%s: not told what to update.", __func__));
@@ -4942,7 +4981,7 @@ update_mac_settings(struct ifnet *ifp, int flags)
rc = -rc;
for (j = 0; j < i; j++) {
if_printf(ifp,
- "failed to add mc address"
+ "failed to add mcast address"
" %02x:%02x:%02x:"
"%02x:%02x:%02x rc=%d\n",
mcaddr[j][0], mcaddr[j][1],
@@ -4952,15 +4991,37 @@ update_mac_settings(struct ifnet *ifp, int flags)
}
goto mcfail;
}
+ del = 0;
}
rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0);
if (rc != 0)
- if_printf(ifp, "failed to set mc address hash: %d", rc);
+ if_printf(ifp, "failed to set mcast address hash: %d\n",
+ rc);
+ if (del == 0) {
+ /* We clobbered the VXLAN entry if there was one. */
+ pi->vxlan_tcam_entry = false;
+ }
mcfail:
if_maddr_runlock(ifp);
}
+ if (IS_MAIN_VI(vi) && sc->vxlan_refcount > 0 &&
+ pi->vxlan_tcam_entry == false) {
+ rc = t4_alloc_raw_mac_filt(sc, vi->viid, match_all_mac,
+ match_all_mac, sc->rawf_base + pi->port_id, 1, pi->port_id,
+ true);
+ if (rc < 0) {
+ rc = -rc;
+ if_printf(ifp, "failed to add VXLAN TCAM entry: %d.\n",
+ rc);
+ } else {
+ MPASS(rc == sc->rawf_base + pi->port_id);
+ rc = 0;
+ pi->vxlan_tcam_entry = true;
+ }
+ }
+
return (rc);
}
@@ -10080,6 +10141,7 @@ clear_stats(struct adapter *sc, u_int port_id)
#endif
rxq->rxcsum = 0;
rxq->vlan_extraction = 0;
+ rxq->vxlan_rxcsum = 0;
rxq->fl.cl_allocated = 0;
rxq->fl.cl_recycled = 0;
@@ -10098,6 +10160,8 @@ clear_stats(struct adapter *sc, u_int port_id)
txq->txpkts0_pkts = 0;
txq->txpkts1_pkts = 0;
txq->raw_wrs = 0;
+ txq->vxlan_tso_wrs = 0;
+ txq->vxlan_txcsum = 0;
mp_ring_reset_stats(txq->r);
}
@@ -10873,6 +10937,116 @@ DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL)
}
#endif
+static eventhandler_tag vxlan_start_evtag;
+static eventhandler_tag vxlan_stop_evtag;
+
+struct vxlan_evargs {
+ struct ifnet *ifp;
+ uint16_t port;
+};
+
+static void
+t4_vxlan_start(struct adapter *sc, void *arg)
+{
+ struct vxlan_evargs *v = arg;
+ struct port_info *pi;
+ uint8_t match_all_mac[ETHER_ADDR_LEN] = {0};
+ int i, rc;
+
+ if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
+ return;
+ if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxst") != 0)
+ return;
+
+ if (sc->vxlan_refcount == 0) {
+ sc->vxlan_port = v->port;
+ sc->vxlan_refcount = 1;
+ t4_write_reg(sc, A_MPS_RX_VXLAN_TYPE,
+ V_VXLAN(v->port) | F_VXLAN_EN);
+ for_each_port(sc, i) {
+ pi = sc->port[i];
+ if (pi->vxlan_tcam_entry == true)
+ continue;
+ rc = t4_alloc_raw_mac_filt(sc, pi->vi[0].viid,
+ match_all_mac, match_all_mac,
+ sc->rawf_base + pi->port_id, 1, pi->port_id, true);
+ if (rc < 0) {
+ rc = -rc;
+ log(LOG_ERR,
+ "%s: failed to add VXLAN TCAM entry: %d.\n",
+ device_get_name(pi->vi[0].dev), rc);
+ } else {
+ MPASS(rc == sc->rawf_base + pi->port_id);
+ rc = 0;
+ pi->vxlan_tcam_entry = true;
+ }
+ }
+ } else if (sc->vxlan_port == v->port) {
+ sc->vxlan_refcount++;
+ } else {
+ log(LOG_ERR, "%s: VXLAN already configured on port %d; "
+ "ignoring attempt to configure it on port %d\n",
+ device_get_nameunit(sc->dev), sc->vxlan_port, v->port);
+ }
+ end_synchronized_op(sc, 0);
+}
+
+static void
+t4_vxlan_stop(struct adapter *sc, void *arg)
+{
+ struct vxlan_evargs *v = arg;
+
+ if (sc->nrawf == 0 || chip_id(sc) <= CHELSIO_T5)
+ return;
+ if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4vxsp") != 0)
+ return;
+
+ /*
+ * VXLANs may have been configured before the driver was loaded so we
+ * may see more stops than starts. This is not handled cleanly but at
+ * least we keep the refcount sane.
+ */
+ if (sc->vxlan_port != v->port)
+ goto done;
+ if (sc->vxlan_refcount == 0) {
+ log(LOG_ERR,
+ "%s: VXLAN operation on port %d was stopped earlier; "
+ "ignoring attempt to stop it again.\n",
+ device_get_nameunit(sc->dev), sc->vxlan_port);
+ } else if (--sc->vxlan_refcount == 0) {
+ t4_set_reg_field(sc, A_MPS_RX_VXLAN_TYPE, F_VXLAN_EN, 0);
+ }
+done:
+ end_synchronized_op(sc, 0);
+}
+
+static void
+t4_vxlan_start_handler(void *arg __unused, struct ifnet *ifp,
+ sa_family_t family, u_int port)
+{
+ struct vxlan_evargs v;
+
+ MPASS(family == AF_INET || family == AF_INET6);
+ v.ifp = ifp;
+ v.port = port;
+
+ t4_iterate(t4_vxlan_start, &v);
+}
+
+static void
+t4_vxlan_stop_handler(void *arg __unused, struct ifnet *ifp, sa_family_t family,
+ u_int port)
+{
+ struct vxlan_evargs v;
+
+ MPASS(family == AF_INET || family == AF_INET6);
+ v.ifp = ifp;
+ v.port = port;
+
+ t4_iterate(t4_vxlan_stop, &v);
+}
+
+
static struct sx mlu; /* mod load unload */
SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload");
@@ -10913,6 +11087,14 @@ mod_event(module_t mod, int cmd, void *arg)
#endif
t4_tracer_modload();
tweak_tunables();
+ vxlan_start_evtag =
+ EVENTHANDLER_REGISTER(vxlan_start,
+ t4_vxlan_start_handler, NULL,
+ EVENTHANDLER_PRI_ANY);
+ vxlan_stop_evtag =
+ EVENTHANDLER_REGISTER(vxlan_stop,
+ t4_vxlan_stop_handler, NULL,
+ EVENTHANDLER_PRI_ANY);
}
sx_xunlock(&mlu);
break;
@@ -10949,6 +11131,10 @@ mod_event(module_t mod, int cmd, void *arg)
sx_sunlock(&t4_list_lock);
if (t4_sge_extfree_refs() == 0) {
+ EVENTHANDLER_DEREGISTER(vxlan_start,
+ vxlan_start_evtag);
+ EVENTHANDLER_DEREGISTER(vxlan_stop,
+ vxlan_stop_evtag);
t4_tracer_modunload();
#ifdef INET6
t4_clip_modunload();
Modified: stable/12/sys/dev/cxgbe/t4_sge.c
==============================================================================
--- stable/12/sys/dev/cxgbe/t4_sge.c Mon Oct 5 07:26:06 2020 (r366441)
+++ stable/12/sys/dev/cxgbe/t4_sge.c Mon Oct 5 08:51:03 2020 (r366442)
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_vlan_var.h>
+#include <net/if_vxlan.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
@@ -271,8 +272,9 @@ static int find_refill_source(struct adapter *, int, b
static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
static inline void get_pkt_gl(struct mbuf *, struct sglist *);
-static inline u_int txpkt_len16(u_int, u_int);
-static inline u_int txpkt_vm_len16(u_int, u_int);
+static inline u_int txpkt_len16(u_int, const u_int);
+static inline u_int txpkt_vm_len16(u_int, const u_int);
+static inline void calculate_mbuf_len16(struct adapter *, struct mbuf *);
static inline u_int txpkts0_len16(u_int);
static inline u_int txpkts1_len16(void);
static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int);
@@ -1867,13 +1869,42 @@ eth_rx(struct adapter *sc, struct sge_rxq *rxq, const
#if defined(INET) || defined(INET6)
struct lro_ctrl *lro = &rxq->lro;
#endif
+ uint16_t err_vec, tnl_type, tnlhdr_len;
static const int sw_hashtype[4][2] = {
{M_HASHTYPE_NONE, M_HASHTYPE_NONE},
{M_HASHTYPE_RSS_IPV4, M_HASHTYPE_RSS_IPV6},
{M_HASHTYPE_RSS_TCP_IPV4, M_HASHTYPE_RSS_TCP_IPV6},
{M_HASHTYPE_RSS_UDP_IPV4, M_HASHTYPE_RSS_UDP_IPV6},
};
+ static const int sw_csum_flags[2][2] = {
+ {
+ /* IP, inner IP */
+ CSUM_ENCAP_VXLAN |
+ CSUM_L3_CALC | CSUM_L3_VALID |
+ CSUM_L4_CALC | CSUM_L4_VALID |
+ CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID |
+ CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
+ /* IP, inner IP6 */
+ CSUM_ENCAP_VXLAN |
+ CSUM_L3_CALC | CSUM_L3_VALID |
+ CSUM_L4_CALC | CSUM_L4_VALID |
+ CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
+ },
+ {
+ /* IP6, inner IP */
+ CSUM_ENCAP_VXLAN |
+ CSUM_L4_CALC | CSUM_L4_VALID |
+ CSUM_INNER_L3_CALC | CSUM_INNER_L3_VALID |
+ CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
+
+ /* IP6, inner IP6 */
+ CSUM_ENCAP_VXLAN |
+ CSUM_L4_CALC | CSUM_L4_VALID |
+ CSUM_INNER_L4_CALC | CSUM_INNER_L4_VALID,
+ },
+ };
+
MPASS(plen > sc->params.sge.fl_pktshift);
m0 = get_fl_payload(sc, fl, plen);
if (__predict_false(m0 == NULL))
@@ -1888,23 +1919,73 @@ eth_rx(struct adapter *sc, struct sge_rxq *rxq, const
m0->m_pkthdr.flowid = be32toh(d->rss.hash_val);
cpl = (const void *)(&d->rss + 1);
- if (cpl->csum_calc && !(cpl->err_vec & sc->params.tp.err_vec_mask)) {
- if (ifp->if_capenable & IFCAP_RXCSUM &&
- cpl->l2info & htobe32(F_RXF_IP)) {
- m0->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
- CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+ if (sc->params.tp.rx_pkt_encap) {
+ const uint16_t ev = be16toh(cpl->err_vec);
+
+ err_vec = G_T6_COMPR_RXERR_VEC(ev);
+ tnl_type = G_T6_RX_TNL_TYPE(ev);
+ tnlhdr_len = G_T6_RX_TNLHDR_LEN(ev);
+ } else {
+ err_vec = be16toh(cpl->err_vec);
+ tnl_type = 0;
+ tnlhdr_len = 0;
+ }
+ if (cpl->csum_calc && err_vec == 0) {
+ int ipv6 = !!(cpl->l2info & htobe32(F_RXF_IP6));
+
+ /* checksum(s) calculated and found to be correct. */
+
+ MPASS((cpl->l2info & htobe32(F_RXF_IP)) ^
+ (cpl->l2info & htobe32(F_RXF_IP6)));
+ m0->m_pkthdr.csum_data = be16toh(cpl->csum);
+ if (tnl_type == 0) {
+ if (!ipv6 && ifp->if_capenable & IFCAP_RXCSUM) {
+ m0->m_pkthdr.csum_flags = CSUM_L3_CALC |
+ CSUM_L3_VALID | CSUM_L4_CALC |
+ CSUM_L4_VALID;
+ } else if (ipv6 && ifp->if_capenable & IFCAP_RXCSUM_IPV6) {
+ m0->m_pkthdr.csum_flags = CSUM_L4_CALC |
+ CSUM_L4_VALID;
+ }
rxq->rxcsum++;
- } else if (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
- cpl->l2info & htobe32(F_RXF_IP6)) {
- m0->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
- CSUM_PSEUDO_HDR);
- rxq->rxcsum++;
- }
+ } else {
+ MPASS(tnl_type == RX_PKT_TNL_TYPE_VXLAN);
+ if (__predict_false(cpl->ip_frag)) {
+ /*
+ * csum_data is for the inner frame (which is an
+ * IP fragment) and is not 0xffff. There is no
+ * way to pass the inner csum_data to the stack.
+ * We don't want the stack to use the inner
+ * csum_data to validate the outer frame or it
+ * will get rejected. So we fix csum_data here
+ * and let sw do the checksum of inner IP
+ * fragments.
+ *
+ * XXX: Need 32b for csum_data2 in an rx mbuf.
+ * Maybe stuff it into rcv_tstmp?
+ */
+ m0->m_pkthdr.csum_data = 0xffff;
+ if (ipv6) {
+ m0->m_pkthdr.csum_flags = CSUM_L4_CALC |
+ CSUM_L4_VALID;
+ } else {
+ m0->m_pkthdr.csum_flags = CSUM_L3_CALC |
+ CSUM_L3_VALID | CSUM_L4_CALC |
+ CSUM_L4_VALID;
+ }
+ } else {
+ int outer_ipv6;
- if (__predict_false(cpl->ip_frag))
- m0->m_pkthdr.csum_data = be16toh(cpl->csum);
- else
- m0->m_pkthdr.csum_data = 0xffff;
+ MPASS(m0->m_pkthdr.csum_data == 0xffff);
+
+ outer_ipv6 = tnlhdr_len >=
+ sizeof(struct ether_header) +
+ sizeof(struct ip6_hdr);
+ m0->m_pkthdr.csum_flags =
+ sw_csum_flags[outer_ipv6][ipv6];
+ }
+ rxq->vxlan_rxcsum++;
+ }
}
if (cpl->vlan_ex) {
@@ -1929,7 +2010,7 @@ eth_rx(struct adapter *sc, struct sge_rxq *rxq, const
}
#if defined(INET) || defined(INET6)
- if (rxq->iq.flags & IQ_LRO_ENABLED &&
+ if (rxq->iq.flags & IQ_LRO_ENABLED && tnl_type == 0 &&
(M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV4 ||
M_HASHTYPE_GET(m0) == M_HASHTYPE_RSS_TCP_IPV6)) {
if (sort_before_lro(lro)) {
@@ -2101,10 +2182,10 @@ mbuf_nsegs(struct mbuf *m)
{
M_ASSERTPKTHDR(m);
- KASSERT(m->m_pkthdr.l5hlen > 0,
+ KASSERT(m->m_pkthdr.inner_l5hlen > 0,
("%s: mbuf %p missing information on # of segments.", __func__, m));
- return (m->m_pkthdr.l5hlen);
+ return (m->m_pkthdr.inner_l5hlen);
}
static inline void
@@ -2112,7 +2193,7 @@ set_mbuf_nsegs(struct mbuf *m, uint8_t nsegs)
{
M_ASSERTPKTHDR(m);
- m->m_pkthdr.l5hlen = nsegs;
+ m->m_pkthdr.inner_l5hlen = nsegs;
}
static inline int
@@ -2237,63 +2318,108 @@ alloc_wr_mbuf(int len, int how)
return (m);
}
-static inline int
+static inline bool
needs_hwcsum(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP |
+ CSUM_IP_TSO | CSUM_INNER_IP | CSUM_INNER_IP_UDP |
+ CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO | CSUM_IP6_UDP |
+ CSUM_IP6_TCP | CSUM_IP6_TSO | CSUM_INNER_IP6_UDP |
+ CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_IP |
- CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6));
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
-static inline int
+static inline bool
needs_tso(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP_TSO | CSUM_IP6_TSO |
+ CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & CSUM_TSO);
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
-static inline int
+static inline bool
+needs_vxlan_csum(struct mbuf *m)
+{
+
+ M_ASSERTPKTHDR(m);
+
+ return (m->m_pkthdr.csum_flags & CSUM_ENCAP_VXLAN);
+}
+
+static inline bool
+needs_vxlan_tso(struct mbuf *m)
+{
+ const uint32_t csum_flags = CSUM_ENCAP_VXLAN | CSUM_INNER_IP_TSO |
+ CSUM_INNER_IP6_TSO;
+
+ M_ASSERTPKTHDR(m);
+
+ return ((m->m_pkthdr.csum_flags & csum_flags) != 0 &&
+ (m->m_pkthdr.csum_flags & csum_flags) != CSUM_ENCAP_VXLAN);
+}
+
+static inline bool
+needs_inner_tcp_csum(struct mbuf *m)
+{
+ const uint32_t csum_flags = CSUM_INNER_IP_TSO | CSUM_INNER_IP6_TSO;
+
+ M_ASSERTPKTHDR(m);
+
+ return (m->m_pkthdr.csum_flags & csum_flags);
+}
+
+static inline bool
needs_l3_csum(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP | CSUM_IP_TSO | CSUM_INNER_IP |
+ CSUM_INNER_IP_TSO;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TSO));
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
-static inline int
-needs_tcp_csum(struct mbuf *m)
+static inline bool
+needs_outer_tcp_csum(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP_TCP | CSUM_IP_TSO | CSUM_IP6_TCP |
+ CSUM_IP6_TSO;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TCP_IPV6 | CSUM_TSO));
+
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
#ifdef RATELIMIT
-static inline int
-needs_l4_csum(struct mbuf *m)
+static inline bool
+needs_outer_l4_csum(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP_TCP | CSUM_IP_TSO |
+ CSUM_IP6_UDP | CSUM_IP6_TCP | CSUM_IP6_TSO;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_UDP_IPV6 |
- CSUM_TCP_IPV6 | CSUM_TSO));
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
-static inline int
-needs_udp_csum(struct mbuf *m)
+static inline bool
+needs_outer_udp_csum(struct mbuf *m)
{
+ const uint32_t csum_flags = CSUM_IP_UDP | CSUM_IP6_UDP;
M_ASSERTPKTHDR(m);
- return (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6));
+
+ return (m->m_pkthdr.csum_flags & csum_flags);
}
#endif
-static inline int
+static inline bool
needs_vlan_insertion(struct mbuf *m)
{
@@ -2440,6 +2566,23 @@ count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cf
}
/*
+ * The maximum number of segments that can fit in a WR.
+ */
+static int
+max_nsegs_allowed(struct mbuf *m)
+{
+
+ if (needs_tso(m)) {
+ if (needs_vxlan_tso(m))
+ return (TX_SGL_SEGS_VXLAN_TSO);
+ else
+ return (TX_SGL_SEGS_TSO);
+ }
+
+ return (TX_SGL_SEGS);
+}
+
+/*
* Analyze the mbuf to determine its tx needs. The mbuf passed in may change:
* a) caller can assume it's been freed if this function returns with an error.
* b) it may get defragged up if the gather list is too long for the hardware.
@@ -2474,7 +2617,7 @@ restart:
M_ASSERTPKTHDR(m0);
MPASS(m0->m_pkthdr.len > 0);
nsegs = count_mbuf_nsegs(m0, 0, &cflags);
- if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) {
+ if (nsegs > max_nsegs_allowed(m0)) {
if (defragged++ > 0) {
rc = EFBIG;
goto fail;
@@ -2502,18 +2645,15 @@ restart:
}
set_mbuf_nsegs(m0, nsegs);
set_mbuf_cflags(m0, cflags);
- if (sc->flags & IS_VF)
- set_mbuf_len16(m0, txpkt_vm_len16(nsegs, needs_tso(m0)));
- else
- set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0)));
+ calculate_mbuf_len16(sc, m0);
#ifdef RATELIMIT
/*
* Ethofld is limited to TCP and UDP for now, and only when L4 hw
- * checksumming is enabled. needs_l4_csum happens to check for all the
- * right things.
+ * checksumming is enabled. needs_outer_l4_csum happens to check for
+ * all the right things.
*/
- if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0)))
+ if (__predict_false(needs_eo(m0) && !needs_outer_l4_csum(m0)))
m0->m_pkthdr.snd_tag = NULL;
#endif
@@ -2541,21 +2681,27 @@ restart:
switch (eh_type) {
#ifdef INET6
case ETHERTYPE_IPV6:
- {
- struct ip6_hdr *ip6 = l3hdr;
-
- MPASS(!needs_tso(m0) || ip6->ip6_nxt == IPPROTO_TCP);
-
- m0->m_pkthdr.l3hlen = sizeof(*ip6);
+ m0->m_pkthdr.l3hlen = sizeof(struct ip6_hdr);
break;
- }
#endif
#ifdef INET
case ETHERTYPE_IP:
{
struct ip *ip = l3hdr;
- m0->m_pkthdr.l3hlen = ip->ip_hl * 4;
+ if (needs_vxlan_csum(m0)) {
+ /* Driver will do the outer IP hdr checksum. */
+ ip->ip_sum = 0;
+ if (needs_vxlan_tso(m0)) {
+ const uint16_t ipl = ip->ip_len;
+
+ ip->ip_len = 0;
+ ip->ip_sum = ~in_cksum_hdr(ip);
+ ip->ip_len = ipl;
+ } else
+ ip->ip_sum = in_cksum_hdr(ip);
+ }
+ m0->m_pkthdr.l3hlen = ip->ip_hl << 2;
break;
}
#endif
@@ -2565,8 +2711,59 @@ restart:
__func__, eh_type);
}
+ if (needs_vxlan_csum(m0)) {
+ m0->m_pkthdr.l4hlen = sizeof(struct udphdr);
+ m0->m_pkthdr.l5hlen = sizeof(struct vxlan_header);
+
+ /* Inner headers. */
+ eh = m_advance(&m, &offset, m0->m_pkthdr.l3hlen +
+ sizeof(struct udphdr) + sizeof(struct vxlan_header));
+ eh_type = ntohs(eh->ether_type);
+ if (eh_type == ETHERTYPE_VLAN) {
+ struct ether_vlan_header *evh = (void *)eh;
+
+ eh_type = ntohs(evh->evl_proto);
+ m0->m_pkthdr.inner_l2hlen = sizeof(*evh);
+ } else
+ m0->m_pkthdr.inner_l2hlen = sizeof(*eh);
+ l3hdr = m_advance(&m, &offset, m0->m_pkthdr.inner_l2hlen);
+
+ switch (eh_type) {
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ m0->m_pkthdr.inner_l3hlen = sizeof(struct ip6_hdr);
+ break;
+#endif
+#ifdef INET
+ case ETHERTYPE_IP:
+ {
+ struct ip *ip = l3hdr;
+
+ m0->m_pkthdr.inner_l3hlen = ip->ip_hl << 2;
+ break;
+ }
+#endif
+ default:
+ panic("%s: VXLAN hw offload requested with unknown "
+ "ethertype 0x%04x. if_cxgbe must be compiled"
+ " with the same INET/INET6 options as the kernel.",
+ __func__, eh_type);
+ }
#if defined(INET) || defined(INET6)
- if (needs_tcp_csum(m0)) {
+ if (needs_inner_tcp_csum(m0)) {
+ tcp = m_advance(&m, &offset, m0->m_pkthdr.inner_l3hlen);
+ m0->m_pkthdr.inner_l4hlen = tcp->th_off * 4;
+ }
+#endif
+ MPASS((m0->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
+ m0->m_pkthdr.csum_flags &= CSUM_INNER_IP6_UDP |
+ CSUM_INNER_IP6_TCP | CSUM_INNER_IP6_TSO | CSUM_INNER_IP |
+ CSUM_INNER_IP_UDP | CSUM_INNER_IP_TCP | CSUM_INNER_IP_TSO |
+ CSUM_ENCAP_VXLAN;
+ }
+
+#if defined(INET) || defined(INET6)
+ if (needs_outer_tcp_csum(m0)) {
tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen);
m0->m_pkthdr.l4hlen = tcp->th_off * 4;
#ifdef RATELIMIT
@@ -2576,7 +2773,7 @@ restart:
V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1));
} else
set_mbuf_eo_tsclk_tsoff(m0, 0);
- } else if (needs_udp_csum(m)) {
+ } else if (needs_outer_udp_csum(m0)) {
m0->m_pkthdr.l4hlen = sizeof(struct udphdr);
#endif
}
@@ -3524,6 +3721,9 @@ alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vlan_extraction",
CTLFLAG_RD, &rxq->vlan_extraction,
"# of times hardware extracted 802.1Q tag");
+ SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_rxcsum",
+ CTLFLAG_RD, &rxq->vxlan_rxcsum,
+ "# of times hardware assisted with inner checksum (VXLAN) ");
add_fl_sysctls(sc, &vi->ctx, oid, &rxq->fl);
@@ -4178,6 +4378,11 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int
"# of frames tx'd using type1 txpkts work requests");
SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "raw_wrs", CTLFLAG_RD,
&txq->raw_wrs, "# of raw work requests (non-packets)");
+ SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_tso_wrs",
+ CTLFLAG_RD, &txq->vxlan_tso_wrs, "# of VXLAN TSO work requests");
+ SYSCTL_ADD_UQUAD(&vi->ctx, children, OID_AUTO, "vxlan_txcsum",
+ CTLFLAG_RD, &txq->vxlan_txcsum,
+ "# of times hardware assisted with inner checksums (VXLAN)");
mp_ring_sysctls(txq->r, &vi->ctx, children);
@@ -4427,27 +4632,25 @@ get_pkt_gl(struct mbuf *m, struct sglist *gl)
KASSERT(gl->sg_nseg == mbuf_nsegs(m),
("%s: nsegs changed for mbuf %p from %d to %d", __func__, m,
mbuf_nsegs(m), gl->sg_nseg));
- KASSERT(gl->sg_nseg > 0 &&
- gl->sg_nseg <= (needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS),
+ KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m),
("%s: %d segments, should have been 1 <= nsegs <= %d", __func__,
- gl->sg_nseg, needs_tso(m) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS));
+ gl->sg_nseg, max_nsegs_allowed(m)));
}
/*
* len16 for a txpkt WR with a GL. Includes the firmware work request header.
*/
static inline u_int
-txpkt_len16(u_int nsegs, u_int tso)
+txpkt_len16(u_int nsegs, const u_int extra)
{
u_int n;
MPASS(nsegs > 0);
nsegs--; /* first segment is part of ulptx_sgl */
- n = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_core) +
+ n = extra + sizeof(struct fw_eth_tx_pkt_wr) +
+ sizeof(struct cpl_tx_pkt_core) +
sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
- if (tso)
- n += sizeof(struct cpl_tx_pkt_lso_core);
return (howmany(n, 16));
}
@@ -4457,22 +4660,43 @@ txpkt_len16(u_int nsegs, u_int tso)
* request header.
*/
static inline u_int
-txpkt_vm_len16(u_int nsegs, u_int tso)
+txpkt_vm_len16(u_int nsegs, const u_int extra)
{
u_int n;
MPASS(nsegs > 0);
nsegs--; /* first segment is part of ulptx_sgl */
- n = sizeof(struct fw_eth_tx_pkt_vm_wr) +
+ n = extra + sizeof(struct fw_eth_tx_pkt_vm_wr) +
sizeof(struct cpl_tx_pkt_core) +
sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1));
- if (tso)
- n += sizeof(struct cpl_tx_pkt_lso_core);
return (howmany(n, 16));
}
+static inline void
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list