svn commit: r359000 - head/sys/dev/vmware/vmxnet3

Patrick Kelsey pkelsey at FreeBSD.org
Sat Mar 14 20:08:06 UTC 2020


Author: pkelsey
Date: Sat Mar 14 20:08:04 2020
New Revision: 359000
URL: https://svnweb.freebsd.org/changeset/base/359000

Log:
  Fix if_vmx receive checksum offload bug and harden against the device skipping receive descriptors
  
  This fixes a bug where the checksum offload status of received packets
  was being taken from the first descriptor instead of the last, which
  affected LRO packets.
  
  The driver has been hardened against the device skipping receive
  descriptors, although it is not believed that this can occur given the
  way this implementation configures the receive rings.
  
  Additionally, for packets received with the error indicator set, the
  driver now forces the length of all fragments in that packet to zero
  prior to passing it to iflib.  Such packets should wind up being
  discarded at some point in the stack anyway, but this removes any
  questions by killing them in the driver.
  
  Counters have been added (and exposed via sysctls) for skipped receive
  descriptors, zero-length packets received, and packets received with
  the error indicator set so that these conditions can be easily
  observed in the field.
  
  PR:		243126, 243392, 240628
  Reported by:	avg, alexandr.oleynikov at gmail.com, Harald Schmalzbauer
  Reviewed by:	gallatin
  MFC after:	1 week
  Differential Revision:	https://reviews.freebsd.org/D23949

Modified:
  head/sys/dev/vmware/vmxnet3/if_vmx.c
  head/sys/dev/vmware/vmxnet3/if_vmxvar.h

Modified: head/sys/dev/vmware/vmxnet3/if_vmx.c
==============================================================================
--- head/sys/dev/vmware/vmxnet3/if_vmx.c	Sat Mar 14 19:58:50 2020	(r358999)
+++ head/sys/dev/vmware/vmxnet3/if_vmx.c	Sat Mar 14 20:08:04 2020	(r359000)
@@ -1494,6 +1494,7 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
 	int cqidx;
 	uint16_t total_len;
 	uint8_t nfrags;
+	uint8_t i;
 	uint8_t flid;
 
 	sc = vsc;
@@ -1517,6 +1518,7 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
 		KASSERT(rxcd->sop && rxcd->eop,
 		    ("%s: zero-length packet without both sop and eop set",
 			__func__));
+		rxc->vxcr_zero_length++;
 		if (++cqidx == rxc->vxcr_ndesc) {
 			cqidx = 0;
 			rxc->vxcr_gen ^= 1;
@@ -1572,31 +1574,6 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
 		}
 	}
 
-	/* VLAN */
-	if (rxcd->vlan) {
-		ri->iri_flags |= M_VLANTAG;
-		ri->iri_vtag = rxcd->vtag;
-	}
-
-	/* Checksum offload */
-	if (!rxcd->no_csum) {
-		uint32_t csum_flags = 0;
-
-		if (rxcd->ipv4) {
-			csum_flags |= CSUM_IP_CHECKED;
-			if (rxcd->ipcsum_ok)
-				csum_flags |= CSUM_IP_VALID;
-		}
-		if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
-			csum_flags |= CSUM_L4_CALC;
-			if (rxcd->csum_ok) {
-				csum_flags |= CSUM_L4_VALID;
-				ri->iri_csum_data = 0xffff;
-			}
-		}
-		ri->iri_csum_flags = csum_flags;
-	}
-
 	/*
 	 * The queue numbering scheme used for rxcd->qid is as follows:
 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
@@ -1632,6 +1609,46 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
 	ri->iri_nfrags = nfrags;
 	ri->iri_len = total_len;
 
+	/*
+	 * If there's an error, the last descriptor in the packet will
+	 * have the error indicator set.  In this case, set all
+	 * fragment lengths to zero.  This will cause iflib to discard
+	 * the packet, but process all associated descriptors through
+	 * the refill mechanism.
+	 */
+	if (__predict_false(rxcd->error)) {
+		rxc->vxcr_pkt_errors++;
+		for (i = 0; i < nfrags; i++) {
+			frag = &ri->iri_frags[i];
+			frag->irf_len = 0;
+		}
+	} else {
+		/* Checksum offload information is in the last descriptor. */
+		if (!rxcd->no_csum) {
+			uint32_t csum_flags = 0;
+
+			if (rxcd->ipv4) {
+				csum_flags |= CSUM_IP_CHECKED;
+				if (rxcd->ipcsum_ok)
+					csum_flags |= CSUM_IP_VALID;
+			}
+			if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
+				csum_flags |= CSUM_L4_CALC;
+				if (rxcd->csum_ok) {
+					csum_flags |= CSUM_L4_VALID;
+					ri->iri_csum_data = 0xffff;
+				}
+			}
+			ri->iri_csum_flags = csum_flags;
+		}
+
+		/* VLAN information is in the last descriptor. */
+		if (rxcd->vlan) {
+			ri->iri_flags |= M_VLANTAG;
+			ri->iri_vtag = rxcd->vtag;
+		}
+	}
+
 	return (0);
 }
 
@@ -1645,14 +1662,13 @@ vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
 	uint64_t *paddrs;
 	int count;
 	int len;
-	int pidx;
+	int idx;
 	int i;
 	uint8_t flid;
 	uint8_t btype;
 
 	count = iru->iru_count;
 	len = iru->iru_buf_size;
-	pidx = iru->iru_pidx;
 	flid = iru->iru_flidx;
 	paddrs = iru->iru_paddrs;
 
@@ -1666,17 +1682,32 @@ vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
 	 * command ring 1 is filled with BTYPE_BODY descriptors.
 	 */
 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
-	for (i = 0; i < count; i++) {
-		rxd[pidx].addr = paddrs[i];
-		rxd[pidx].len = len;
-		rxd[pidx].btype = btype;
-		rxd[pidx].gen = rxr->vxrxr_gen;
+	/*
+	 * The refill entries from iflib will advance monotonically,
+	 * but the refilled descriptors may not be contiguous due to
+	 * earlier skipping of descriptors by the device.  The refill
+	 * entries from iflib need an entire state update, while the
+	 * descriptors previously skipped by the device only need to
+	 * have their generation numbers updated.
+	 */
+	idx = rxr->vxrxr_refill_start;
+	i = 0;
+	do {
+		if (idx == iru->iru_idxs[i]) {
+			rxd[idx].addr = paddrs[i];
+			rxd[idx].len = len;
+			rxd[idx].btype = btype;
+			i++;
+		} else
+			rxr->vxrxr_desc_skips++;
+		rxd[idx].gen = rxr->vxrxr_gen;
 
-		if (++pidx == rxr->vxrxr_ndesc) {
-			pidx = 0;
+		if (++idx == rxr->vxrxr_ndesc) {
+			idx = 0;
 			rxr->vxrxr_gen ^= 1;
 		}
-	}
+	} while (i != count);
+	rxr->vxrxr_refill_start = idx;
 }
 
 static void
@@ -1825,6 +1856,8 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet
 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
+		rxr->vxrxr_desc_skips = 0;
+		rxr->vxrxr_refill_start = 0;
 		/*
 		 * iflib has zeroed out the descriptor array during the
 		 * prior attach or stop
@@ -1834,6 +1867,8 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet
 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
 		rxr = &rxq->vxrxq_cmd_ring[i];
 		rxr->vxrxr_gen = 0;
+		rxr->vxrxr_desc_skips = 0;
+		rxr->vxrxr_refill_start = 0;
 		bzero(rxr->vxrxr_rxd,
 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
 	}
@@ -1841,6 +1876,8 @@ vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet
 	rxc = &rxq->vxrxq_comp_ring;
 	rxc->vxcr_next = 0;
 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
+	rxc->vxcr_zero_length = 0;
+	rxc->vxcr_pkt_errors = 0;
 	/*
 	 * iflib has zeroed out the descriptor array during the prior attach
 	 * or stop
@@ -2285,14 +2322,22 @@ vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
+		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
+		    &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
+		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
+		    &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
+		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
+		    &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
+		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
+		    &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
 	}
 }
 

Modified: head/sys/dev/vmware/vmxnet3/if_vmxvar.h
==============================================================================
--- head/sys/dev/vmware/vmxnet3/if_vmxvar.h	Sat Mar 14 19:58:50 2020	(r358999)
+++ head/sys/dev/vmware/vmxnet3/if_vmxvar.h	Sat Mar 14 20:08:04 2020	(r359000)
@@ -63,6 +63,8 @@ struct vmxnet3_rxring {
 	u_int			 vxrxr_ndesc;
 	int			 vxrxr_gen;
 	bus_addr_t		 vxrxr_paddr;
+	uint64_t		 vxrxr_desc_skips;
+	uint16_t		 vxrxr_refill_start;
 };
 
 struct vmxnet3_comp_ring {
@@ -78,6 +80,8 @@ struct vmxnet3_comp_ring {
 	u_int			 vxcr_ndesc;
 	int			 vxcr_gen;
 	bus_addr_t		 vxcr_paddr;
+	uint64_t		 vxcr_zero_length;
+	uint64_t		 vxcr_pkt_errors;
 };
 
 struct vmxnet3_txqueue {


More information about the svn-src-head mailing list