svn commit: r318647 - in head: share/man/man4 sys/conf sys/dev/ena sys/modules sys/modules/ena

Zbigniew Bodek zbb at FreeBSD.org
Mon May 22 14:46:15 UTC 2017


Author: zbb
Date: Mon May 22 14:46:13 2017
New Revision: 318647
URL: https://svnweb.freebsd.org/changeset/base/318647

Log:
  Add support for Amazon Elastic Network Adapter (ENA) NIC
  
  ENA is a networking interface designed to make good use of modern CPU
  features and system architectures.
  
  The ENA device exposes a lightweight management interface with a
  minimal set of memory mapped registers and extendable command set
  through an Admin Queue.
  
  The driver supports a range of ENA devices, is link-speed independent
  (i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
  a negotiated and extendable feature set.
  
  Some ENA devices support SR-IOV. This driver is used for both the
  SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
  
  ENA devices enable high speed and low overhead network traffic
  processing by providing multiple Tx/Rx queue pairs (the maximum number
  is advertised by the device via the Admin Queue), a dedicated MSI-X
  interrupt vector per Tx/Rx queue pair, and CPU cacheline optimized
  data placement.
  
  The ENA driver supports industry standard TCP/IP offload features such
  as checksum offload and TCP transmit segmentation offload (TSO).
  Receive-side scaling (RSS) is supported for multi-core scaling.
  
  The ENA driver and its corresponding devices implement health
  monitoring mechanisms such as watchdog, enabling the device and driver
  to recover in a manner transparent to the application, as well as
  debug logs.
  
  Some of the ENA devices support a working mode called Low-latency
  Queue (LLQ), which saves several more microseconds. This feature will
  be implemented for driver in future releases.
  
  Submitted by:	Michal Krawczyk <mk at semihalf.com>
  		Jakub Palider <jpa at semihalf.com>
  		Jan Medala <jan at semihalf.com>
  Obtained from: Semihalf
  Sponsored by: Amazon.com Inc.
  Differential revision: https://reviews.freebsd.org/D10427

Added:
  head/share/man/man4/ena.4   (contents, props changed)
  head/sys/dev/ena/
  head/sys/dev/ena/ena.c   (contents, props changed)
  head/sys/dev/ena/ena.h   (contents, props changed)
  head/sys/dev/ena/ena_sysctl.c   (contents, props changed)
  head/sys/dev/ena/ena_sysctl.h   (contents, props changed)
  head/sys/modules/ena/
  head/sys/modules/ena/Makefile   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/modules/Makefile

Added: head/share/man/man4/ena.4
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/share/man/man4/ena.4	Mon May 22 14:46:13 2017	(r318647)
@@ -0,0 +1,255 @@
+.\" Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\"
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\"
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in
+.\"    the documentation and/or other materials provided with the
+.\"    distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+.\" "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+.\" LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+.\" A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+.\" OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+.\" OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd May 04, 2017
+.Dt ENA 4
+.Os
+.Sh NAME
+.Nm ena
+.Nd "FreeBSD kernel driver for Elastic Network Adapter (ENA) family"
+.Sh SYNOPSIS
+To compile this driver into the kernel,
+place the following line in your
+kernel configuration file:
+.Bd -ragged -offset indent
+.Cd "device ena"
+.Ed
+.Pp
+Alternatively, to load the driver as a
+module at boot time, place the following line in
+.Xr loader.conf 5 :
+.Bd -literal -offset indent
+if_ena_load="YES"
+.Ed
+.Sh DESCRIPTION
+The ENA is a networking interface designed to make good use of modern CPU
+features and system architectures.
+.Pp
+The ENA device exposes a lightweight management interface with a
+minimal set of memory mapped registers and extendable command set
+through an Admin Queue.
+.Pp
+The driver supports a range of ENA devices, is link-speed independent
+(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
+a negotiated and extendable feature set.
+.Pp
+Some ENA devices support SR-IOV. This driver is used for both the
+SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
+.Pp
+The ENA devices enable high speed and low overhead network traffic
+processing by providing multiple Tx/Rx queue pairs (the maximum number
+is advertised by the device via the Admin Queue), a dedicated MSI-X
+interrupt vector per Tx/Rx queue pair, and CPU cacheline optimized
+data placement.
+.Pp
+The
+.Nm
+driver supports industry standard TCP/IP offload features such
+as checksum offload and TCP transmit segmentation offload (TSO).
+Receive-side scaling (RSS) is supported for multi-core scaling.
+.Pp
+The
+.Nm
+driver and its corresponding devices implement health
+monitoring mechanisms such as watchdog, enabling the device and driver
+to recover in a manner transparent to the application, as well as
+debug logs.
+.Pp
+Some of the ENA devices support a working mode called Low-latency
+Queue (LLQ), which saves several more microseconds. This feature will
+be implemented for driver in future releases.
+.Sh HARDWARE
+Supported PCI vendor ID/device IDs:
+.Pp
+.Bl -bullet -compact
+.It
+1d0f:0ec2 - ENA PF
+.It
+1d0f:1ec2 - ENA PF with LLQ support
+.It
+1d0f:ec20 - ENA VF
+.It
+1d0f:ec21 - ENA VF with LLQ support
+.El
+.Sh DIAGNOSTICS
+.Ss Device initialization phase:
+.Bl -diag
+.It ena%d: failed to init mmio read less
+.Pp
+Error occured during initialization of the mmio register read request.
+.It ena%d: Can not reset device
+.Pp
+Device could not be reset; device may not be responding or is already
+during reset.
+.It ena%d: device version is too low
+.Pp
+Version of the controller is too low and it is not supported by the driver.
+.It ena%d: Invalid dma width value %d
+.Pp
+The controller is able to request dma transcation width. Device stopped
+responding or it demanded invalid value.
+.It ena%d: Can not initialize ena admin queue with device
+.Pp
+Initialization of the Admin Queue failed; device may not be responding or there
+was a problem with initialization of the resources.
+.It ena%d: Cannot get attribute for ena device rc: %d
+.Pp
+Failed to get attributes of the device from the controller.
+.It ena%d: Cannot configure aenq groups rc: %d
+.Pp
+Errors occured when trying to configure AENQ groups.
+.El
+.Ss Driver initialisation/shutdown phase:
+.Bl -diag
+.It ena%d: PCI resource allocation failed!
+.It ena%d: allocating ena_dev failed
+.It ena%d: failed to pmap registers bar
+.It ena%d: Error while setting up bufring
+.It ena%d: Error with initialization of IO rings
+.It ena%d: can not allocate ifnet structure
+.It ena%d: Error with network interface setup
+.It ena%d: Failed to enable and set the admin interrupts
+.It ena%d: Failed to allocate %d, vectors %d
+.It ena%d: Failed to enable MSIX, vectors %d rc %d
+.It ena%d: Error with MSI-X enablement
+.It ena%d: could not allocate irq vector: %d
+.It ena%d: Unable to allocate bus resource: registers
+.Pp
+Resource allocation failed when initializing the device; driver will not
+be attached.
+.It ena%d: ENA device init failed (err: %d)
+.Pp
+Device initialization failed; driver will not be attached.
+.It ena%d: could not activate irq vector: %d
+.Pp
+Error occured when trying to activate interrupt vectors for Admin Queue.
+.It ena%d: failed to register interrupt handler for irq %ju: %d
+.Pp
+Error occured when trying to register Admin Queue interrupt handler.
+.It ena%d: Cannot setup mgmnt queue intr
+.Pp
+Error occured during configuration of the Admin Queue interrupts.
+.It ena%d: Enable MSI-X failed
+.Pp
+Configuration of the MSI-X for Admin Queue failed; there could be lack
+of resources or interrupts could not have been configured; driver will
+not be attached.
+.It ena%d: VLAN is in use, detach first
+.Pp
+VLANs are being used when trying to detach the driver; VLANs should be detached
+first and then detach routine should be called again.
+.It ena%d: Unmapped RX DMA tag associations
+.It ena%d: Unmapped TX DMA tag associations
+.Pp
+Error occured when trying to destroy RX/TX DMA tag.
+.It ena%d: Cannot init RSS
+.It ena%d: Cannot fill indirect table
+.It ena%d: Cannot fill indirect table
+.It ena%d: Cannot fill hash function
+.It ena%d: Cannot fill hash control
+.It ena%d: WARNING: RSS was not properly initialized, it will affect bandwidth
+.Pp
+Error occured during initialization of one of RSS resources; device is still
+going to work but it will affect performance because all RX packets will be
+passed to queue 0 and there will be no hash information.
+.It ena%d: failed to tear down irq: %d
+.It ena%d: dev has no parent while releasing res for irq: %d
+Release of the interrupts failed.
+.El
+.Ss Additional diagnostic:
+.Bl -diag
+.It ena%d: Cannot get attribute for ena device
+.Pp
+This message appears when trying to change MTU and driver is unable to get
+attributes from the device.
+.It ena%d: Invalid MTU setting. new_mtu: %d
+.Pp
+Requested MTU value is not supported and will not be set.
+.It ena%d: keep alive watchdog timeout
+.Pp
+Device stopped responding and will be reset.
+.It ena%d: Found a Tx that wasn't completed on time, qid %d, index %d.
+.Pp
+Packet was pushed to the NIC but not sent within given time limit; it may
+be caused by hang of the IO queue.
+.It ena%d: The number of lost tx completion is aboce the threshold (%d > %d). Reset the device
+.Pp
+If too many Tx wasn't completed on time the device is going to be reset; it may
+be caused by hanged queue or device.
+.It ena%d: trigger reset is on
+.Pp
+Device will be reset; reset is triggered either by watchdog or if too many TX
+packets were not completed on time.
+.It ena%d: invalid value recvd
+.Pp
+Link status received from the device in the AENQ handler is invalid.
+.It ena%d: Allocation for Tx Queue %u failed
+.It ena%d: Allocation for Rx Queue %u failed
+.It ena%d: Unable to create Rx DMA map for buffer %d
+.It ena%d: Failed to create io TX queue #%d rc: %d
+.It ena%d: Failed to get TX queue handlers. TX queue num %d rc: %d
+.It ena%d: Failed to create io RX queue[%d] rc: %d
+.It ena%d: Failed to get RX queue handlers. RX queue num %d rc: %d
+.It ena%d: failed to request irq
+.It ena%d: could not allocate irq vector: %d
+.It ena%d: failed to register interrupt handler for irq %ju: %d
+.Pp
+IO resources initialization failed. Interface will not be brought up.
+.It ena%d: LRO[%d] Initialization failed!
+.Pp
+Initialization of the LRO for the RX ring failed.
+.It ena%d: failed to alloc buffer for rx queue
+.It ena%d: failed to add buffer for rx queue %d
+.It ena%d: refilled rx queue %d with %d pages only
+.Pp
+Allocation of resources used on RX path failed; if happened during
+initialization of the IO queue, the interface will not be brought up.
+.It ena%d: ioctl promisc/allmulti
+.Pp
+IOCTL request for the device to work in promiscuous/allmulti mode; see
+.Xr ifconfig 8
+for more details.
+.It ena%d: too many fragments. Last fragment: %d!
+.Pp
+Packet with unsupported number of segments was queued for sending to the
+device; packet will be dropped.
+.Sh SUPPORT
+If an issue is identified with the released source code with a supported adapter
+email the specific information related to the issue to
+.Aq Mt mk at semihalf.com
+and
+.Aq Mt mw at semihalf.com .
+.Sh SEE ALSO
+.Xr vlan 4 ,
+.Xr ifconfig 8
+.Sh AUTHORS
+The
+.Nm
+driver was written by
+.An Semihalf.

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Mon May 22 14:21:45 2017	(r318646)
+++ head/sys/conf/files	Mon May 22 14:46:13 2017	(r318647)
@@ -1584,6 +1584,12 @@ dev/e1000/e1000_mbx.c		optional em \
 dev/e1000/e1000_osdep.c		optional em \
 	compile-with "${NORMAL_C} -I$S/dev/e1000"
 dev/et/if_et.c			optional et
+dev/ena/ena.c			optional ena \
+	compile-with "${NORMAL_C} -I$S/contrib"
+dev/ena/ena_sysctl.c 		optional ena \
+	compile-with "${NORMAL_C} -I$S/contrib"
+contrib/ena-com/ena_com.c	optional ena
+contrib/ena-com/ena_eth_com.c	optional ena
 dev/ep/if_ep.c			optional ep
 dev/ep/if_ep_isa.c		optional ep isa
 dev/ep/if_ep_pccard.c		optional ep pccard

Added: head/sys/dev/ena/ena.c
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/sys/dev/ena/ena.c	Mon May 22 14:46:13 2017	(r318647)
@@ -0,0 +1,3769 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) 2015-2017 Amazon.com, Inc. or its affiliates.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/endian.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/rman.h>
+#include <sys/smp.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/sysctl.h>
+#include <sys/taskqueue.h>
+#include <sys/time.h>
+#include <sys/eventhandler.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/in_cksum.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_arp.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+#include <net/rss_config.h>
+#include <net/if_types.h>
+#include <net/if_vlan_var.h>
+
+#include <netinet/in_rss.h>
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <dev/pci/pcivar.h>
+#include <dev/pci/pcireg.h>
+
+#include "ena.h"
+#include "ena_sysctl.h"
+
+/*********************************************************
+ *  Function prototypes
+ *********************************************************/
+static int	ena_probe(device_t);
+static void	ena_intr_msix_mgmnt(void *);
+static int	ena_allocate_pci_resources(struct ena_adapter*);
+static void	ena_free_pci_resources(struct ena_adapter *);
+static int	ena_change_mtu(if_t, int);
+static inline void ena_alloc_counters(counter_u64_t *, int);
+static inline void ena_free_counters(counter_u64_t *, int);
+static inline void ena_reset_counters(counter_u64_t *, int);
+static void	ena_init_io_rings_common(struct ena_adapter *,
+    struct ena_ring *, uint16_t);
+static int	ena_init_io_rings(struct ena_adapter *);
+static void	ena_free_io_ring_resources(struct ena_adapter *, unsigned int);
+static void	ena_free_all_io_rings_resources(struct ena_adapter *);
+static int	ena_setup_tx_dma_tag(struct ena_adapter *);
+static int	ena_free_tx_dma_tag(struct ena_adapter *);
+static int	ena_setup_rx_dma_tag(struct ena_adapter *);
+static int	ena_free_rx_dma_tag(struct ena_adapter *);
+static int	ena_setup_tx_resources(struct ena_adapter *, int);
+static void	ena_free_tx_resources(struct ena_adapter *, int);
+static int	ena_setup_all_tx_resources(struct ena_adapter *);
+static void	ena_free_all_tx_resources(struct ena_adapter *);
+static int	ena_setup_rx_resources(struct ena_adapter *, unsigned int);
+static void	ena_free_rx_resources(struct ena_adapter *, unsigned int);
+static int	ena_setup_all_rx_resources(struct ena_adapter *);
+static void	ena_free_all_rx_resources(struct ena_adapter *);
+static inline int ena_alloc_rx_mbuf(struct ena_adapter *, struct ena_ring *,
+    struct ena_rx_buffer *);
+static void	ena_free_rx_mbuf(struct ena_adapter *, struct ena_ring *,
+    struct ena_rx_buffer *);
+static int	ena_refill_rx_bufs(struct ena_ring *, uint32_t);
+static void	ena_free_rx_bufs(struct ena_adapter *, unsigned int);
+static void	ena_refill_all_rx_bufs(struct ena_adapter *);
+static void	ena_free_all_rx_bufs(struct ena_adapter *);
+static void	ena_free_tx_bufs(struct ena_adapter *, unsigned int);
+static void	ena_free_all_tx_bufs(struct ena_adapter *);
+static void	ena_destroy_all_tx_queues(struct ena_adapter *);
+static void	ena_destroy_all_rx_queues(struct ena_adapter *);
+static void	ena_destroy_all_io_queues(struct ena_adapter *);
+static int	ena_create_io_queues(struct ena_adapter *);
+static int	ena_tx_cleanup(struct ena_ring *);
+static int	ena_rx_cleanup(struct ena_ring *);
+static int	validate_tx_req_id(struct ena_ring *, uint16_t);
+static void	ena_rx_hash_mbuf(struct ena_ring *, struct ena_com_rx_ctx *,
+    struct mbuf *);
+static struct mbuf* ena_rx_mbuf(struct ena_ring *, struct ena_com_rx_buf_info *,
+    struct ena_com_rx_ctx *, uint16_t *);
+static inline void ena_rx_checksum(struct ena_ring *, struct ena_com_rx_ctx *,
+    struct mbuf *);
+static void	ena_handle_msix(void *);
+static int	ena_enable_msix(struct ena_adapter *);
+static void	ena_setup_mgmnt_intr(struct ena_adapter *);
+static void	ena_setup_io_intr(struct ena_adapter *);
+static int	ena_request_mgmnt_irq(struct ena_adapter *);
+static int	ena_request_io_irq(struct ena_adapter *);
+static void	ena_free_mgmnt_irq(struct ena_adapter *);
+static void	ena_free_io_irq(struct ena_adapter *);
+static void	ena_free_irqs(struct ena_adapter*);
+static void	ena_disable_msix(struct ena_adapter *);
+static void	ena_unmask_all_io_irqs(struct ena_adapter *);
+static int	ena_rss_configure(struct ena_adapter *);
+static int	ena_up_complete(struct ena_adapter *);
+static int	ena_up(struct ena_adapter *);
+static void	ena_down(struct ena_adapter *);
+static uint64_t	ena_get_counter(if_t, ift_counter);
+static int	ena_media_change(if_t);
+static void	ena_media_status(if_t, struct ifmediareq *);
+static void	ena_init(void *);
+static int	ena_ioctl(if_t, u_long, caddr_t);
+static int	ena_get_dev_offloads(struct ena_com_dev_get_features_ctx *);
+static void	ena_update_host_info(struct ena_admin_host_info *, if_t);
+static void	ena_update_hwassist(struct ena_adapter *);
+static int	ena_setup_ifnet(device_t, struct ena_adapter *,
+    struct ena_com_dev_get_features_ctx *);
+static void	ena_tx_csum(struct ena_com_tx_ctx *, struct mbuf *);
+static int	ena_xmit_mbuf(struct ena_ring *, struct mbuf *);
+static void	ena_start_xmit(struct ena_ring *);
+static int	ena_mq_start(if_t, struct mbuf *);
+static void	ena_deferred_mq_start(void *, int);
+static void	ena_qflush(if_t);
+static int	ena_calc_io_queue_num(struct ena_adapter *,
+    struct ena_com_dev_get_features_ctx *);
+static int	ena_calc_queue_size(struct ena_adapter *, uint16_t *,
+    uint16_t *, struct ena_com_dev_get_features_ctx *);
+static int	ena_rss_init_default(struct ena_adapter *);
+static void	ena_rss_init_default_deferred(void *);
+static void	ena_config_host_info(struct ena_com_dev *);
+static int	ena_attach(device_t);
+static int	ena_detach(device_t);
+static int	ena_device_init(struct ena_adapter *, device_t,
+    struct ena_com_dev_get_features_ctx *, int *);
+static int	ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *,
+    int);
+static void ena_update_on_link_change(void *, struct ena_admin_aenq_entry *);
+static void	unimplemented_aenq_handler(void *,
+    struct ena_admin_aenq_entry *);
+static void	ena_timer_service(void *);
+
+static char ena_version[] = DEVICE_NAME DRV_MODULE_NAME " v" DRV_MODULE_VERSION;
+
+static SYSCTL_NODE(_hw, OID_AUTO, ena, CTLFLAG_RD, 0, "ENA driver parameters");
+
+/*
+ * Tuneable number of buffers in the buf-ring (drbr)
+ */
+static int ena_buf_ring_size = 4096;
+SYSCTL_INT(_hw_ena, OID_AUTO, buf_ring_size, CTLFLAG_RWTUN,
+    &ena_buf_ring_size, 0, "Size of the bufring");
+
+
+static ena_vendor_info_t ena_vendor_info_array[] = {
+    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_PF, 0},
+    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_PF, 0},
+    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_VF, 0},
+    { PCI_VENDOR_ID_AMAZON, PCI_DEV_ID_ENA_LLQ_VF, 0},
+    /* Last entry */
+    { 0, 0, 0 }
+};
+
+/*
+ * Contains pointers to event handlers, e.g. link state chage.
+ */
+static struct ena_aenq_handlers aenq_handlers;
+
+void
+ena_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
+{
+	if (error)
+		return;
+	*(bus_addr_t *) arg = segs[0].ds_addr;
+	return;
+}
+
+int
+ena_dma_alloc(device_t dmadev, bus_size_t size,
+    ena_mem_handle_t *dma , int mapflags)
+{
+	struct ena_adapter* adapter = device_get_softc(dmadev);
+	uint32_t maxsize = ((size - 1)/PAGE_SIZE + 1) * PAGE_SIZE;
+	uint64_t dma_space_addr = ENA_DMA_BIT_MASK(adapter->dma_width);
+	int error;
+
+	if (dma_space_addr == 0)
+		dma_space_addr = BUS_SPACE_MAXADDR;
+	error = bus_dma_tag_create(bus_get_dma_tag(dmadev), /* parent */
+	    8, 0,	      /* alignment, bounds */
+	    dma_space_addr,   /* lowaddr */
+	    dma_space_addr,   /* highaddr */
+	    NULL, NULL,	      /* filter, filterarg */
+	    maxsize,	      /* maxsize */
+	    1,		      /* nsegments */
+	    maxsize,	      /* maxsegsize */
+	    BUS_DMA_ALLOCNOW, /* flags */
+	    NULL,	      /* lockfunc */
+	    NULL,	      /* lockarg */
+	    &dma->tag);
+	if (error) {
+		device_printf(dmadev,
+		"%s: bus_dma_tag_create failed: %d\n",
+		__func__, error);
+		goto fail_tag;
+	}
+
+	error = bus_dmamem_alloc(dma->tag, (void**) &dma->vaddr,
+	    BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->map);
+	if (error) {
+		device_printf(dmadev,
+		"%s: bus_dmamem_alloc(%ju) failed: %d\n",
+		__func__, (uintmax_t)size, error);
+		goto fail_map_create;
+	}
+
+	dma->paddr = 0;
+	error = bus_dmamap_load(dma->tag, dma->map, dma->vaddr,
+	    size, ena_dmamap_callback, &dma->paddr, mapflags);
+	if (error || dma->paddr == 0) {
+		device_printf(dmadev,
+		"%s: bus_dmamap_load failed: %d\n",
+		__func__, error);
+		goto fail_map_load;
+	}
+
+	return (0);
+
+fail_map_load:
+	bus_dmamap_unload(dma->tag, dma->map);
+fail_map_create:
+	bus_dmamem_free(dma->tag, dma->vaddr, dma->map);
+	bus_dma_tag_destroy(dma->tag);
+fail_tag:
+	dma->tag = NULL;
+
+	return (error);
+}
+
+static int
+ena_allocate_pci_resources(struct ena_adapter* adapter)
+{
+	device_t pdev = adapter->pdev;
+	int rid;
+
+	rid = PCIR_BAR(ENA_REG_BAR);
+	adapter->memory = NULL;
+	adapter->registers = bus_alloc_resource_any(pdev, SYS_RES_MEMORY,
+	    &rid, RF_ACTIVE);
+	if (adapter->registers == NULL) {
+		device_printf(pdev, "Unable to allocate bus resource: "
+		    "registers\n");
+		return (ENXIO);
+	}
+
+	return (0);
+}
+
+static void
+ena_free_pci_resources(struct ena_adapter *adapter)
+{
+	device_t pdev = adapter->pdev;
+
+	if (adapter->memory != NULL) {
+		bus_release_resource(pdev, SYS_RES_MEMORY,
+		    PCIR_BAR(ENA_MEM_BAR), adapter->memory);
+	}
+
+	if (adapter->registers != NULL) {
+		bus_release_resource(pdev, SYS_RES_MEMORY,
+		    PCIR_BAR(ENA_REG_BAR), adapter->registers);
+	}
+
+	return;
+}
+
+static int
+ena_probe(device_t dev)
+{
+	ena_vendor_info_t *ent;
+	char		adapter_name[60];
+	uint16_t	pci_vendor_id = 0;
+	uint16_t	pci_device_id = 0;
+
+	pci_vendor_id = pci_get_vendor(dev);
+	pci_device_id = pci_get_device(dev);
+
+	ent = ena_vendor_info_array;
+	while (ent->vendor_id != 0) {
+		if ((pci_vendor_id == ent->vendor_id) &&
+		    (pci_device_id == ent->device_id)) {
+			ena_trace(ENA_DBG, "vendor=%x device=%x ",
+			    pci_vendor_id, pci_device_id);
+
+			sprintf(adapter_name, DEVICE_DESC);
+			device_set_desc_copy(dev, adapter_name);
+			return (BUS_PROBE_DEFAULT);
+		}
+
+		ent++;
+
+	}
+
+	return (ENXIO);
+}
+
+static int
+ena_change_mtu(if_t ifp, int new_mtu)
+{
+	struct ena_adapter *adapter = if_getsoftc(ifp);
+	struct ena_com_dev_get_features_ctx get_feat_ctx;
+	int rc, old_mtu, max_frame;
+
+	rc = ena_com_get_dev_attr_feat(adapter->ena_dev, &get_feat_ctx);
+	if (rc) {
+		device_printf(adapter->pdev,
+		    "Cannot get attribute for ena device\n");
+		return (ENXIO);
+	}
+
+	/* Save old MTU in case of fail */
+	old_mtu = if_getmtu(ifp);
+
+	/* Change MTU and calculate max frame */
+	if_setmtu(ifp, new_mtu);
+	max_frame = ETHER_MAX_FRAME(ifp, ETHERTYPE_VLAN, 1);
+
+	if ((new_mtu < ENA_MIN_FRAME_LEN) ||
+	    (new_mtu > get_feat_ctx.dev_attr.max_mtu) ||
+	    (max_frame > ENA_MAX_FRAME_LEN)) {
+		device_printf(adapter->pdev, "Invalid MTU setting. "
+		    "new_mtu: %d\n", new_mtu);
+		goto error;
+	}
+
+	rc = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
+	if (rc != 0)
+		goto error;
+
+	return (0);
+error:
+	if_setmtu(ifp, old_mtu);
+	return (EINVAL);
+}
+
+static inline void
+ena_alloc_counters(counter_u64_t *begin, int size)
+{
+	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
+
+	for (; begin < end; ++begin)
+		*begin = counter_u64_alloc(M_WAITOK);
+}
+
+static inline void
+ena_free_counters(counter_u64_t *begin, int size)
+{
+	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
+
+	for (; begin < end; ++begin)
+		counter_u64_free(*begin);
+}
+
+static inline void
+ena_reset_counters(counter_u64_t *begin, int size)
+{
+	counter_u64_t *end = (counter_u64_t *)((char *)begin + size);
+
+	for (; begin < end; ++begin)
+		counter_u64_zero(*begin);
+}
+
+static void
+ena_init_io_rings_common(struct ena_adapter *adapter, struct ena_ring *ring,
+    uint16_t qid)
+{
+
+	ring->qid = qid;
+	ring->adapter = adapter;
+	ring->ena_dev = adapter->ena_dev;
+}
+
+static int
+ena_init_io_rings(struct ena_adapter *adapter)
+{
+	struct ena_com_dev *ena_dev;
+	struct ena_ring *txr, *rxr;
+	struct ena_que *que;
+	int i;
+	int rc;
+
+	ena_dev = adapter->ena_dev;
+
+	for (i = 0; i < adapter->num_queues; i++) {
+		txr = &adapter->tx_ring[i];
+		rxr = &adapter->rx_ring[i];
+
+		/* TX/RX common ring state */
+		ena_init_io_rings_common(adapter, txr, i);
+		ena_init_io_rings_common(adapter, rxr, i);
+
+		/* TX specific ring state */
+		txr->ring_size = adapter->tx_ring_size;
+		txr->tx_max_header_size = ena_dev->tx_max_header_size;
+		txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type;
+		txr->smoothed_interval =
+		    ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
+
+		/* Allocate a buf ring */
+		txr->br = buf_ring_alloc(ena_buf_ring_size, M_DEVBUF,
+		    M_WAITOK, &txr->ring_mtx);
+		if (txr->br == NULL) {
+			device_printf(adapter->pdev,
+			    "Error while setting up bufring\n");
+			rc = ENOMEM;
+			goto err_bufr_free;
+		}
+
+		/* Alloc TX statistics. */
+		ena_alloc_counters((counter_u64_t *)&txr->tx_stats,
+		    sizeof(txr->tx_stats));
+
+		/* RX specific ring state */
+		rxr->ring_size = adapter->rx_ring_size;
+		rxr->rx_small_copy_len = adapter->small_copy_len;
+		rxr->smoothed_interval =
+		    ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+
+		/* Alloc RX statistics. */
+		ena_alloc_counters((counter_u64_t *)&rxr->rx_stats,
+		    sizeof(rxr->rx_stats));
+
+		/* Initialize locks */
+		snprintf(txr->mtx_name, nitems(txr->mtx_name), "%s:tx(%d)",
+		    device_get_nameunit(adapter->pdev), i);
+		snprintf(rxr->mtx_name, nitems(rxr->mtx_name), "%s:rx(%d)",
+		    device_get_nameunit(adapter->pdev), i);
+
+		mtx_init(&txr->ring_mtx, txr->mtx_name, NULL, MTX_DEF);
+		mtx_init(&rxr->ring_mtx, rxr->mtx_name, NULL, MTX_DEF);
+
+		que = &adapter->que[i];
+		que->adapter = adapter;
+		que->id = i;
+		que->tx_ring = txr;
+		que->rx_ring = rxr;
+
+		txr->que = que;
+		rxr->que = que;
+	}
+
+	return 0;
+
+err_bufr_free:
+	while (i--)
+		ena_free_io_ring_resources(adapter, i);
+
+	return (rc);
+}
+
+static void
+ena_free_io_ring_resources(struct ena_adapter *adapter, unsigned int qid)
+{
+	struct ena_ring *txr = &adapter->tx_ring[qid];
+	struct ena_ring *rxr = &adapter->rx_ring[qid];
+
+	ena_free_counters((counter_u64_t *)&txr->tx_stats,
+	    sizeof(txr->tx_stats));
+	ena_free_counters((counter_u64_t *)&rxr->rx_stats,
+	    sizeof(rxr->rx_stats));
+
+	mtx_destroy(&txr->ring_mtx);
+	mtx_destroy(&rxr->ring_mtx);
+
+	drbr_free(txr->br, M_DEVBUF);
+
+}
+
+static void
+ena_free_all_io_rings_resources(struct ena_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_queues; i++)
+		ena_free_io_ring_resources(adapter, i);
+
+}
+
+static int
+ena_setup_tx_dma_tag(struct ena_adapter *adapter)
+{
+	int ret;
+
+	/* Create DMA tag for Tx buffers */
+	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev),
+	    1, 0,				  /* alignment, bounds 	*/
+	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr 		*/
+	    ENA_DMA_BIT_MASK(adapter->dma_width), /* highaddr 		*/
+	    NULL, NULL,				  /* filter, filterarg 	*/
+	    ENA_TSO_MAXSIZE,			  /* maxsize 		*/
+	    adapter->max_tx_sgl_size,		  /* nsegments 		*/
+	    ENA_TSO_MAXSIZE,			  /* maxsegsize 	*/
+	    0,					  /* flags 		*/
+	    NULL,				  /* lockfunc 		*/
+	    NULL,				  /* lockfuncarg 	*/
+	    &adapter->tx_buf_tag);
+
+	if (ret != 0)
+		device_printf(adapter->pdev, "Unable to create Tx DMA tag\n");
+
+	return (ret);
+}
+
+static int
+ena_free_tx_dma_tag(struct ena_adapter *adapter)
+{
+	int ret;
+
+	ret = bus_dma_tag_destroy(adapter->tx_buf_tag);
+
+	if (ret == 0)
+		adapter->tx_buf_tag = NULL;
+
+	return (ret);
+}
+
+static int
+ena_setup_rx_dma_tag(struct ena_adapter *adapter)
+{
+	int ret;
+
+	/* Create DMA tag for Rx buffers*/
+	ret = bus_dma_tag_create(bus_get_dma_tag(adapter->pdev), /* parent */
+	    1, 0,				  /* alignment, bounds 	*/
+	    ENA_DMA_BIT_MASK(adapter->dma_width), /* lowaddr 		*/
+	    ENA_DMA_BIT_MASK(adapter->dma_width), /* highaddr 		*/
+	    NULL, NULL,				  /* filter, filterarg 	*/
+	    MJUM16BYTES,			  /* maxsize 		*/
+	    1,					  /* nsegments 		*/
+	    MJUM16BYTES,			  /* maxsegsize 	*/
+	    0,					  /* flags 		*/
+	    NULL,				  /* lockfunc 		*/
+	    NULL,				  /* lockarg 		*/
+	    &adapter->rx_buf_tag);
+
+	if (ret != 0)
+		device_printf(adapter->pdev, "Unable to create Rx DMA tag\n");
+
+	return (ret);
+}
+
+static int
+ena_free_rx_dma_tag(struct ena_adapter *adapter)
+{
+	int ret;
+
+	ret = bus_dma_tag_destroy(adapter->rx_buf_tag);
+
+	if (ret == 0)
+		adapter->rx_buf_tag = NULL;
+
+	return (ret);
+}
+
+
+/**
+ * ena_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Returns 0 on success, otherwise on failure.
+ **/
+static int
+ena_setup_tx_resources(struct ena_adapter *adapter, int qid)
+{
+	struct ena_que *que = &adapter->que[qid];
+	struct ena_ring *tx_ring = que->tx_ring;
+	int size, i, err;
+#ifdef	RSS
+	cpuset_t cpu_mask;
+#endif
+
+	size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size;
+
+	tx_ring->tx_buffer_info = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (!tx_ring->tx_buffer_info)
+		goto err_tx_buffer_info;
+
+	size = sizeof(uint16_t) * tx_ring->ring_size;
+	tx_ring->free_tx_ids = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (!tx_ring->free_tx_ids)
+		goto err_tx_reqs;
+
+	/* Req id stack for TX OOO completions */
+	for (i = 0; i < tx_ring->ring_size; i++)
+		tx_ring->free_tx_ids[i] = i;
+
+	/* Reset TX statistics. */
+	ena_reset_counters((counter_u64_t *)&tx_ring->tx_stats,
+	    sizeof(tx_ring->tx_stats));
+
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
+	/* Make sure that drbr is empty */
+	drbr_flush(adapter->ifp, tx_ring->br);
+
+	/* ... and create the buffer DMA maps */
+	for (i = 0; i < tx_ring->ring_size; i++) {
+		err = bus_dmamap_create(adapter->tx_buf_tag, 0,
+		    &tx_ring->tx_buffer_info[i].map);
+		if (err != 0) {
+			device_printf(adapter->pdev,
+			    "Unable to create Tx DMA map for buffer %d\n", i);
+			goto err_tx_map;
+		}
+	}
+
+	/* Allocate taskqueues */
+	TASK_INIT(&tx_ring->enqueue_task, 0, ena_deferred_mq_start, tx_ring);
+	tx_ring->enqueue_tq = taskqueue_create_fast("ena_tx_enque", M_NOWAIT,
+	    taskqueue_thread_enqueue, &tx_ring->enqueue_tq);
+	if (tx_ring->enqueue_tq == NULL) {
+		device_printf(adapter->pdev,
+		    "Unable to create taskqueue for enqueue task\n");
+		i = tx_ring->ring_size;
+		goto err_tx_map;
+	}
+
+	/* RSS set cpu for thread */
+#ifdef RSS
+	CPU_SETOF(que->cpu, &cpu_mask);
+	taskqueue_start_threads_cpuset(&tx_ring->enqueue_tq, 1, PI_NET,
+	    &cpu_mask, "%s tx_ring enq (bucket %d)",
+	    device_get_nameunit(adapter->pdev), que->cpu);
+#else /* RSS */
+	taskqueue_start_threads(&tx_ring->enqueue_tq, 1, PI_NET,
+	    "%s txeq %d", device_get_nameunit(adapter->pdev), que->cpu);
+#endif /* RSS */
+
+	return (0);
+
+err_tx_map:
+	while (i--) {
+		bus_dmamap_destroy(adapter->tx_buf_tag,
+		    tx_ring->tx_buffer_info[i].map);
+	}
+	ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->free_tx_ids);
+err_tx_reqs:
+	ENA_MEM_FREE(adapter->ena_dev->dmadev, tx_ring->tx_buffer_info);
+err_tx_buffer_info:
+	return (ENOMEM);
+}
+
+/**
+ * ena_free_tx_resources - Free Tx Resources per Queue
+ * @adapter: network interface device structure
+ * @qid: queue index
+ *
+ * Free all transmit software resources
+ **/
+static void
+ena_free_tx_resources(struct ena_adapter *adapter, int qid)
+{
+	struct ena_ring *tx_ring = &adapter->tx_ring[qid];
+
+	while (taskqueue_cancel(tx_ring->enqueue_tq, &tx_ring->enqueue_task,
+	    NULL))
+		taskqueue_drain(tx_ring->enqueue_tq, &tx_ring->enqueue_task);
+
+	taskqueue_free(tx_ring->enqueue_tq);
+
+	/* Flush buffer ring, */
+	drbr_flush(adapter->ifp, tx_ring->br);
+
+	/* Free buffer DMA maps, */
+	for (int i = 0; i < tx_ring->ring_size; i++) {
+		m_freem(tx_ring->tx_buffer_info[i].mbuf);
+		tx_ring->tx_buffer_info[i].mbuf = NULL;
+		bus_dmamap_unload(adapter->tx_buf_tag,
+		    tx_ring->tx_buffer_info[i].map);
+		bus_dmamap_destroy(adapter->tx_buf_tag,
+		    tx_ring->tx_buffer_info[i].map);
+	}
+
+	/* And free allocated memory. */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list