svn commit: r265481 - stable/9/sys/dev/cxgbe
Navdeep Parhar
np at FreeBSD.org
Wed May 7 04:00:06 UTC 2014
Author: np
Date: Wed May 7 04:00:05 2014
New Revision: 265481
URL: http://svnweb.freebsd.org/changeset/base/265481
Log:
MFC r255050, r255052.
r255050:
Implement support for rx buffer packing. Enable it by default for T5
cards.
This is a T4 and T5 chip feature which lets the chip deliver multiple
Ethernet frames in a single buffer. This is more efficient within the
chip, in the driver, and reduces wastage of space in rx buffers.
- Always allocate rx buffers from the jumbop zone, no matter what the
MTU is. Do not use the normal cluster refcounting mechanism.
- Reserve space for an mbuf and a refcount in the cluster itself and let
the chip DMA multiple frames in the rest.
- Use the embedded mbuf for the first frame and allocate mbufs on the
fly for any additional frames delivered in the cluster. Each of these
mbufs has a reference on the underlying cluster.
r255052:
Fix the sysctl that displays whether buffer packing is enabled
or not.
Modified:
stable/9/sys/dev/cxgbe/adapter.h
stable/9/sys/dev/cxgbe/t4_sge.c
Directory Properties:
stable/9/sys/ (props changed)
stable/9/sys/dev/ (props changed)
Modified: stable/9/sys/dev/cxgbe/adapter.h
==============================================================================
--- stable/9/sys/dev/cxgbe/adapter.h Wed May 7 03:17:21 2014 (r265480)
+++ stable/9/sys/dev/cxgbe/adapter.h Wed May 7 04:00:05 2014 (r265481)
@@ -128,9 +128,9 @@ enum {
RX_FL_ESIZE = EQ_ESIZE, /* 8 64bit addresses */
#if MJUMPAGESIZE != MCLBYTES
- FL_BUF_SIZES = 4, /* cluster, jumbop, jumbo9k, jumbo16k */
+ FL_BUF_SIZES_MAX = 5, /* cluster, jumbop, jumbo9k, jumbo16k, extra */
#else
- FL_BUF_SIZES = 3, /* cluster, jumbo9k, jumbo16k */
+ FL_BUF_SIZES_MAX = 4, /* cluster, jumbo9k, jumbo16k, extra */
#endif
CTRL_EQ_QSIZE = 128,
@@ -165,6 +165,7 @@ enum {
MASTER_PF = (1 << 3),
ADAP_SYSCTL_CTX = (1 << 4),
TOM_INIT_DONE = (1 << 5),
+ BUF_PACKING_OK = (1 << 6),
CXGBE_BUSY = (1 << 9),
@@ -231,12 +232,11 @@ struct port_info {
};
struct fl_sdesc {
- struct mbuf *m;
bus_dmamap_t map;
caddr_t cl;
- uint8_t tag_idx; /* the sc->fl_tag this map comes from */
+ uint8_t tag_idx; /* the fl->tag entry this map comes from */
#ifdef INVARIANTS
- __be64 ba_tag;
+ __be64 ba_hwtag;
#endif
};
@@ -358,9 +358,22 @@ struct sge_eq {
uint32_t unstalled; /* recovered from stall */
};
+struct fl_buf_info {
+ u_int size;
+ int type;
+ int hwtag:4; /* tag in low 4 bits of the pa. */
+ uma_zone_t zone;
+};
+#define FL_BUF_SIZES(sc) (sc->sge.fl_buf_sizes)
+#define FL_BUF_SIZE(sc, x) (sc->sge.fl_buf_info[x].size)
+#define FL_BUF_TYPE(sc, x) (sc->sge.fl_buf_info[x].type)
+#define FL_BUF_HWTAG(sc, x) (sc->sge.fl_buf_info[x].hwtag)
+#define FL_BUF_ZONE(sc, x) (sc->sge.fl_buf_info[x].zone)
+
enum {
FL_STARVING = (1 << 0), /* on the adapter's list of starving fl's */
FL_DOOMED = (1 << 1), /* about to be destroyed */
+ FL_BUF_PACKING = (1 << 2), /* buffer packing enabled */
};
#define FL_RUNNING_LOW(fl) (fl->cap - fl->needed <= fl->lowat)
@@ -369,7 +382,8 @@ enum {
struct sge_fl {
bus_dma_tag_t desc_tag;
bus_dmamap_t desc_map;
- bus_dma_tag_t tag[FL_BUF_SIZES];
+ bus_dma_tag_t tag[FL_BUF_SIZES_MAX]; /* only first FL_BUF_SIZES(sc) are
+ valid */
uint8_t tag_idx;
struct mtx fl_lock;
char lockname[16];
@@ -382,11 +396,13 @@ struct sge_fl {
uint16_t qsize; /* size (# of entries) of the queue */
uint16_t cntxt_id; /* SGE context id for the freelist */
uint32_t cidx; /* consumer idx (buffer idx, NOT hw desc idx) */
+ uint32_t rx_offset; /* offset in fl buf (when buffer packing) */
uint32_t pidx; /* producer idx (buffer idx, NOT hw desc idx) */
uint32_t needed; /* # of buffers needed to fill up fl. */
uint32_t lowat; /* # of buffers <= this means fl needs help */
uint32_t pending; /* # of bufs allocated since last doorbell */
- unsigned int dmamap_failed;
+ u_int dmamap_failed;
+ struct mbuf *mstash[8];
TAILQ_ENTRY(sge_fl) link; /* All starving freelists */
};
@@ -518,6 +534,9 @@ struct sge {
int eq_start;
struct sge_iq **iqmap; /* iq->cntxt_id to iq mapping */
struct sge_eq **eqmap; /* eq->cntxt_id to eq mapping */
+
+ u_int fl_buf_sizes __aligned(CACHE_LINE_SIZE);
+ struct fl_buf_info fl_buf_info[FL_BUF_SIZES_MAX];
};
struct rss_header;
Modified: stable/9/sys/dev/cxgbe/t4_sge.c
==============================================================================
--- stable/9/sys/dev/cxgbe/t4_sge.c Wed May 7 03:17:21 2014 (r265480)
+++ stable/9/sys/dev/cxgbe/t4_sge.c Wed May 7 04:00:05 2014 (r265481)
@@ -56,19 +56,6 @@ __FBSDID("$FreeBSD$");
#include "common/t4_regs_values.h"
#include "common/t4_msg.h"
-struct fl_buf_info {
- int size;
- int type;
- uma_zone_t zone;
-};
-
-/* Filled up by t4_sge_modload */
-static struct fl_buf_info fl_buf_info[FL_BUF_SIZES];
-
-#define FL_BUF_SIZE(x) (fl_buf_info[x].size)
-#define FL_BUF_TYPE(x) (fl_buf_info[x].type)
-#define FL_BUF_ZONE(x) (fl_buf_info[x].zone)
-
#ifdef T4_PKT_TIMESTAMP
#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
#else
@@ -85,7 +72,8 @@ TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_
/*
* Pad ethernet payload up to this boundary.
* -1: driver should figure out a good value.
- * Any power of 2, from 32 to 4096 (both inclusive) is a valid value.
+ * 0: disable padding.
+ * Any power of 2 from 32 to 4096 (both inclusive) is also a valid value.
*/
static int fl_pad = -1;
TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad);
@@ -107,6 +95,33 @@ TUNABLE_INT("hw.cxgbe.spg_len", &spg_len
static int cong_drop = 0;
TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop);
+/*
+ * Deliver multiple frames in the same free list buffer if they fit.
+ * -1: let the driver decide whether to enable buffer packing or not.
+ * 0: disable buffer packing.
+ * 1: enable buffer packing.
+ */
+static int buffer_packing = -1;
+TUNABLE_INT("hw.cxgbe.buffer_packing", &buffer_packing);
+
+/*
+ * Start next frame in a packed buffer at this boundary.
+ * -1: driver should figure out a good value.
+ * T4:
+ * ---
+ * if fl_pad != 0
+ * value specified here will be overridden by fl_pad.
+ * else
+ * power of 2 from 32 to 4096 (both inclusive) is a valid value here.
+ * T5:
+ * ---
+ * 16, or a power of 2 from 64 to 4096 (both inclusive) is a valid value.
+ */
+static int fl_pack = -1;
+static int t4_fl_pack;
+static int t5_fl_pack;
+TUNABLE_INT("hw.cxgbe.fl_pack", &fl_pack);
+
/* Used to track coalesced tx work request */
struct txpkts {
uint64_t *flitp; /* ptr to flit where next pkt should start */
@@ -123,12 +138,15 @@ struct sgl {
};
static int service_iq(struct sge_iq *, int);
-static struct mbuf *get_fl_payload(struct adapter *, struct sge_fl *, uint32_t,
+static struct mbuf *get_fl_payload1(struct adapter *, struct sge_fl *, uint32_t,
+ int *);
+static struct mbuf *get_fl_payload2(struct adapter *, struct sge_fl *, uint32_t,
int *);
static int t4_eth_rx(struct sge_iq *, const struct rss_header *, struct mbuf *);
static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
int);
-static inline void init_fl(struct sge_fl *, int, int, char *);
+static inline void init_fl(struct adapter *, struct sge_fl *, int, int, int,
+ char *);
static inline void init_eq(struct sge_eq *, int, int, uint8_t, uint16_t,
char *);
static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
@@ -170,8 +188,8 @@ static inline void ring_fl_db(struct ada
static int refill_fl(struct adapter *, struct sge_fl *, int);
static void refill_sfl(void *);
static int alloc_fl_sdesc(struct sge_fl *);
-static void free_fl_sdesc(struct sge_fl *);
-static void set_fl_tag_idx(struct sge_fl *, int);
+static void free_fl_sdesc(struct adapter *, struct sge_fl *);
+static void set_fl_tag_idx(struct adapter *, struct sge_fl *, int);
static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
@@ -198,27 +216,20 @@ static int handle_fw_msg(struct sge_iq *
static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
/*
- * Called on MOD_LOAD. Fills up fl_buf_info[] and validates/calculates the SGE
- * tunables.
+ * Called on MOD_LOAD. Validates and calculates the SGE tunables.
*/
void
t4_sge_modload(void)
{
- int i;
- int bufsize[FL_BUF_SIZES] = {
- MCLBYTES,
-#if MJUMPAGESIZE != MCLBYTES
- MJUMPAGESIZE,
-#endif
- MJUM9BYTES,
- MJUM16BYTES
- };
+ int pad;
- for (i = 0; i < FL_BUF_SIZES; i++) {
- FL_BUF_SIZE(i) = bufsize[i];
- FL_BUF_TYPE(i) = m_gettype(bufsize[i]);
- FL_BUF_ZONE(i) = m_getzone(bufsize[i]);
- }
+ /* set pad to a reasonable powerof2 between 16 and 4096 (inclusive) */
+#if defined(__i386__) || defined(__amd64__)
+ pad = max(cpu_clflush_line_size, 16);
+#else
+ pad = max(CACHE_LINE_SIZE, 16);
+#endif
+ pad = min(pad, 4096);
if (fl_pktshift < 0 || fl_pktshift > 7) {
printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
@@ -226,23 +237,35 @@ t4_sge_modload(void)
fl_pktshift = 2;
}
- if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) {
- int pad;
-
-#if defined(__i386__) || defined(__amd64__)
- pad = max(cpu_clflush_line_size, 32);
-#else
- pad = max(CACHE_LINE_SIZE, 32);
-#endif
- pad = min(pad, 4096);
+ if (fl_pad != 0 &&
+ (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad))) {
if (fl_pad != -1) {
printf("Invalid hw.cxgbe.fl_pad value (%d),"
- " using %d instead.\n", fl_pad, pad);
+ " using %d instead.\n", fl_pad, max(pad, 32));
}
- fl_pad = pad;
+ fl_pad = max(pad, 32);
}
+ /*
+ * T4 has the same pad and pack boundary. If a pad boundary is set,
+ * pack boundary must be set to the same value. Otherwise take the
+ * specified value or auto-calculate something reasonable.
+ */
+ if (fl_pad)
+ t4_fl_pack = fl_pad;
+ else if (fl_pack < 32 || fl_pack > 4096 || !powerof2(fl_pack))
+ t4_fl_pack = max(pad, 32);
+ else
+ t4_fl_pack = fl_pack;
+
+ /* T5's pack boundary is independent of the pad boundary. */
+ if (fl_pack < 16 || fl_pack == 32 || fl_pack > 4096 ||
+ !powerof2(fl_pack))
+ t5_fl_pack = max(pad, 64);
+ else
+ t5_fl_pack = fl_pack;
+
if (spg_len != 64 && spg_len != 128) {
int len;
@@ -289,17 +312,41 @@ t4_tweak_chip_settings(struct adapter *s
int timer_max = M_TIMERVALUE0 * 1000 / sc->params.vpd.cclk;
int intr_pktcount[SGE_NCOUNTERS] = {1, 8, 16, 32}; /* 63 max */
uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
+ int sw_flbuf_sizes[] = {
+ MCLBYTES,
+#if MJUMPAGESIZE != MCLBYTES
+ MJUMPAGESIZE,
+#endif
+ MJUM9BYTES,
+ MJUM16BYTES,
+ MJUMPAGESIZE - MSIZE
+ };
KASSERT(sc->flags & MASTER_PF,
("%s: trying to change chip settings when not master.", __func__));
- m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
- V_INGPADBOUNDARY(M_INGPADBOUNDARY) | F_EGRSTATUSPAGESIZE;
+ m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
- V_INGPADBOUNDARY(ilog2(fl_pad) - 5) |
V_EGRSTATUSPAGESIZE(spg_len == 128);
+ if (is_t4(sc) && (fl_pad || buffer_packing)) {
+ /* t4_fl_pack has the correct value even when fl_pad = 0 */
+ m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
+ v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5);
+ } else if (is_t5(sc) && fl_pad) {
+ m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
+ v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5);
+ }
t4_set_reg_field(sc, A_SGE_CONTROL, m, v);
+ if (is_t5(sc) && buffer_packing) {
+ m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
+ if (t5_fl_pack == 16)
+ v = V_INGPACKBOUNDARY(0);
+ else
+ v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5);
+ t4_set_reg_field(sc, A_SGE_CONTROL2, m, v);
+ }
+
v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
@@ -310,9 +357,9 @@ t4_tweak_chip_settings(struct adapter *s
V_HOSTPAGESIZEPF7(PAGE_SHIFT - 10);
t4_write_reg(sc, A_SGE_HOST_PAGE_SIZE, v);
- for (i = 0; i < FL_BUF_SIZES; i++) {
+ for (i = 0; i < min(nitems(sw_flbuf_sizes), 16); i++) {
t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
- FL_BUF_SIZE(i));
+ sw_flbuf_sizes[i]);
}
v = V_THRESHOLD_0(intr_pktcount[0]) | V_THRESHOLD_1(intr_pktcount[1]) |
@@ -373,21 +420,48 @@ int
t4_read_chip_settings(struct adapter *sc)
{
struct sge *s = &sc->sge;
- int i, rc = 0;
+ int i, j, n, rc = 0;
uint32_t m, v, r;
uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
+ uint32_t sge_flbuf_sizes[16], sw_flbuf_sizes[] = {
+ MCLBYTES,
+#if MJUMPAGESIZE != MCLBYTES
+ MJUMPAGESIZE,
+#endif
+ MJUM9BYTES,
+ MJUM16BYTES
+ };
- m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
- V_INGPADBOUNDARY(M_INGPADBOUNDARY) | F_EGRSTATUSPAGESIZE;
+ m = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE | F_EGRSTATUSPAGESIZE;
v = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
- V_INGPADBOUNDARY(ilog2(fl_pad) - 5) |
V_EGRSTATUSPAGESIZE(spg_len == 128);
+ if (is_t4(sc) && (fl_pad || buffer_packing)) {
+ m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
+ v |= V_INGPADBOUNDARY(ilog2(t4_fl_pack) - 5);
+ } else if (is_t5(sc) && fl_pad) {
+ m |= V_INGPADBOUNDARY(M_INGPADBOUNDARY);
+ v |= V_INGPADBOUNDARY(ilog2(fl_pad) - 5);
+ }
r = t4_read_reg(sc, A_SGE_CONTROL);
if ((r & m) != v) {
device_printf(sc->dev, "invalid SGE_CONTROL(0x%x)\n", r);
rc = EINVAL;
}
+ if (is_t5(sc) && buffer_packing) {
+ m = V_INGPACKBOUNDARY(M_INGPACKBOUNDARY);
+ if (t5_fl_pack == 16)
+ v = V_INGPACKBOUNDARY(0);
+ else
+ v = V_INGPACKBOUNDARY(ilog2(t5_fl_pack) - 5);
+ r = t4_read_reg(sc, A_SGE_CONTROL2);
+ if ((r & m) != v) {
+ device_printf(sc->dev,
+ "invalid SGE_CONTROL2(0x%x)\n", r);
+ rc = EINVAL;
+ }
+ }
+
v = V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10) |
V_HOSTPAGESIZEPF1(PAGE_SHIFT - 10) |
V_HOSTPAGESIZEPF2(PAGE_SHIFT - 10) |
@@ -402,14 +476,45 @@ t4_read_chip_settings(struct adapter *sc
rc = EINVAL;
}
- for (i = 0; i < FL_BUF_SIZES; i++) {
- v = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
- if (v != FL_BUF_SIZE(i)) {
- device_printf(sc->dev,
- "invalid SGE_FL_BUFFER_SIZE[%d](0x%x)\n", i, v);
- rc = EINVAL;
+ /*
+ * Make a list of SGE FL buffer sizes programmed in the chip and tally
+ * it with the FL buffer sizes that we'd like to use.
+ */
+ n = 0;
+ for (i = 0; i < nitems(sge_flbuf_sizes); i++) {
+ r = t4_read_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i));
+ sge_flbuf_sizes[i] = r;
+ if (r == MJUMPAGESIZE - MSIZE &&
+ (sc->flags & BUF_PACKING_OK) == 0) {
+ sc->flags |= BUF_PACKING_OK;
+ FL_BUF_HWTAG(sc, n) = i;
+ FL_BUF_SIZE(sc, n) = MJUMPAGESIZE - MSIZE;
+ FL_BUF_TYPE(sc, n) = m_gettype(MJUMPAGESIZE);
+ FL_BUF_ZONE(sc, n) = m_getzone(MJUMPAGESIZE);
+ n++;
+ }
+ }
+ for (i = 0; i < nitems(sw_flbuf_sizes); i++) {
+ for (j = 0; j < nitems(sge_flbuf_sizes); j++) {
+ if (sw_flbuf_sizes[i] != sge_flbuf_sizes[j])
+ continue;
+ FL_BUF_HWTAG(sc, n) = j;
+ FL_BUF_SIZE(sc, n) = sw_flbuf_sizes[i];
+ FL_BUF_TYPE(sc, n) = m_gettype(sw_flbuf_sizes[i]);
+ FL_BUF_ZONE(sc, n) = m_getzone(sw_flbuf_sizes[i]);
+ n++;
+ break;
}
}
+ if (n == 0) {
+ device_printf(sc->dev, "no usable SGE FL buffer size.\n");
+ rc = EINVAL;
+ } else if (n == 1 && (sc->flags & BUF_PACKING_OK)) {
+ device_printf(sc->dev,
+ "no usable SGE FL buffer size when not packing buffers.\n");
+ rc = EINVAL;
+ }
+ FL_BUF_SIZES(sc) = n;
r = t4_read_reg(sc, A_SGE_INGRESS_RX_THRESHOLD);
s->counter_val[0] = G_THRESHOLD_0(r);
@@ -496,6 +601,17 @@ t4_create_dma_tag(struct adapter *sc)
return (rc);
}
+static inline int
+enable_buffer_packing(struct adapter *sc)
+{
+
+ if (sc->flags & BUF_PACKING_OK &&
+ ((is_t5(sc) && buffer_packing) || /* 1 or -1 both ok for T5 */
+ (is_t4(sc) && buffer_packing == 1)))
+ return (1);
+ return (0);
+}
+
void
t4_sge_sysctls(struct adapter *sc, struct sysctl_ctx_list *ctx,
struct sysctl_oid_list *children)
@@ -512,6 +628,14 @@ t4_sge_sysctls(struct adapter *sc, struc
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_drop", CTLFLAG_RD,
NULL, cong_drop, "congestion drop setting");
+
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "buffer_packing", CTLFLAG_RD,
+ NULL, enable_buffer_packing(sc),
+ "pack multiple frames in one fl buffer");
+
+ SYSCTL_ADD_INT(ctx, children, OID_AUTO, "fl_pack", CTLFLAG_RD,
+ NULL, is_t5(sc) ? t5_fl_pack : t4_fl_pack,
+ "payload pack boundary (bytes)");
}
int
@@ -703,7 +827,7 @@ t4_setup_port_queues(struct port_info *p
struct ifnet *ifp = pi->ifp;
struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
- int bufsize;
+ int bufsize, pack;
oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
NULL, "rx queues");
@@ -725,6 +849,7 @@ t4_setup_port_queues(struct port_info *p
* b) allocate queue iff it will take direct interrupts.
*/
bufsize = mtu_to_bufsize(ifp->if_mtu);
+ pack = enable_buffer_packing(sc);
for_each_rxq(pi, i, rxq) {
init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx, pi->qsize_rxq,
@@ -732,7 +857,7 @@ t4_setup_port_queues(struct port_info *p
snprintf(name, sizeof(name), "%s rxq%d-fl",
device_get_nameunit(pi->dev), i);
- init_fl(&rxq->fl, pi->qsize_rxq / 8, bufsize, name);
+ init_fl(sc, &rxq->fl, pi->qsize_rxq / 8, bufsize, pack, name);
if (sc->flags & INTR_DIRECT
#ifdef TCP_OFFLOAD
@@ -749,6 +874,7 @@ t4_setup_port_queues(struct port_info *p
#ifdef TCP_OFFLOAD
bufsize = mtu_to_bufsize_toe(sc, ifp->if_mtu);
+ pack = 0; /* XXX: think about this some more */
for_each_ofld_rxq(pi, i, ofld_rxq) {
init_iq(&ofld_rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
@@ -756,7 +882,8 @@ t4_setup_port_queues(struct port_info *p
snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
device_get_nameunit(pi->dev), i);
- init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, bufsize, name);
+ init_fl(sc, &ofld_rxq->fl, pi->qsize_rxq / 8, bufsize, pack,
+ name);
if (sc->flags & INTR_DIRECT ||
(sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
@@ -1030,7 +1157,12 @@ service_iq(struct sge_iq *iq, int budget
("%s: data for an iq (%p) with no freelist",
__func__, iq));
- m0 = get_fl_payload(sc, fl, lq, &fl_bufs_used);
+ m0 = fl->flags & FL_BUF_PACKING ?
+ get_fl_payload1(sc, fl, lq, &fl_bufs_used) :
+ get_fl_payload2(sc, fl, lq, &fl_bufs_used);
+
+ if (__predict_false(m0 == NULL))
+ goto process_iql;
#ifdef T4_PKT_TIMESTAMP
/*
* 60 bit timestamp for the payload is
@@ -1106,6 +1238,7 @@ service_iq(struct sge_iq *iq, int budget
}
}
+process_iql:
if (STAILQ_EMPTY(&iql))
break;
@@ -1151,13 +1284,100 @@ service_iq(struct sge_iq *iq, int budget
return (0);
}
+static int
+fill_mbuf_stash(struct sge_fl *fl)
+{
+ int i;
+
+ for (i = 0; i < nitems(fl->mstash); i++) {
+ if (fl->mstash[i] == NULL) {
+ struct mbuf *m;
+ if ((m = m_get(M_NOWAIT, MT_NOINIT)) == NULL)
+ return (ENOBUFS);
+ fl->mstash[i] = m;
+ }
+ }
+ return (0);
+}
+
static struct mbuf *
-get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
+get_mbuf_from_stash(struct sge_fl *fl)
+{
+ int i;
+
+ for (i = 0; i < nitems(fl->mstash); i++) {
+ if (fl->mstash[i] != NULL) {
+ struct mbuf *m;
+
+ m = fl->mstash[i];
+ fl->mstash[i] = NULL;
+ return (m);
+ } else
+ fl->mstash[i] = m_get(M_NOWAIT, MT_NOINIT);
+ }
+
+ return (m_get(M_NOWAIT, MT_NOINIT));
+}
+
+static void
+return_mbuf_to_stash(struct sge_fl *fl, struct mbuf *m)
+{
+ int i;
+
+ if (m == NULL)
+ return;
+
+ for (i = 0; i < nitems(fl->mstash); i++) {
+ if (fl->mstash[i] == NULL) {
+ fl->mstash[i] = m;
+ return;
+ }
+ }
+ m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
+ m_free(m);
+}
+
+/* buf can be any address within the buffer */
+static inline u_int *
+find_buf_refcnt(caddr_t buf)
+{
+ uintptr_t ptr = (uintptr_t)buf;
+
+ return ((u_int *)((ptr & ~(MJUMPAGESIZE - 1)) + MSIZE - sizeof(u_int)));
+}
+
+static inline struct mbuf *
+find_buf_mbuf(caddr_t buf)
+{
+ uintptr_t ptr = (uintptr_t)buf;
+
+ return ((struct mbuf *)(ptr & ~(MJUMPAGESIZE - 1)));
+}
+
+static void
+rxb_free(void *arg1, void *arg2)
+{
+ uma_zone_t zone = arg1;
+ caddr_t cl = arg2;
+#ifdef INVARIANTS
+ u_int refcount;
+
+ refcount = *find_buf_refcnt(cl);
+ KASSERT(refcount == 0, ("%s: cl %p refcount is %u", __func__,
+ cl - MSIZE, refcount));
+#endif
+ cl -= MSIZE;
+ uma_zfree(zone, cl);
+}
+
+static struct mbuf *
+get_fl_payload1(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
int *fl_bufs_used)
{
struct mbuf *m0, *m;
struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
unsigned int nbuf, len;
+ int pack_boundary = is_t4(sc) ? t4_fl_pack : t5_fl_pack;
/*
* No assertion for the fl lock because we don't need it. This routine
@@ -1168,29 +1388,194 @@ get_fl_payload(struct adapter *sc, struc
* lock but this routine does not).
*/
+ KASSERT(fl->flags & FL_BUF_PACKING,
+ ("%s: buffer packing disabled for fl %p", __func__, fl));
+
+ len = G_RSPD_LEN(len_newbuf);
+
+ if ((len_newbuf & F_RSPD_NEWBUF) == 0) {
+ KASSERT(fl->rx_offset > 0,
+ ("%s: packed frame but driver at offset=0", __func__));
+
+ /* A packed frame is guaranteed to fit entirely in this buf. */
+ KASSERT(FL_BUF_SIZE(sc, sd->tag_idx) - fl->rx_offset >= len,
+ ("%s: packing error. bufsz=%u, offset=%u, len=%u",
+ __func__, FL_BUF_SIZE(sc, sd->tag_idx), fl->rx_offset,
+ len));
+
+ m0 = get_mbuf_from_stash(fl);
+ if (m0 == NULL ||
+ m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR) != 0) {
+ return_mbuf_to_stash(fl, m0);
+ return (NULL);
+ }
+
+ bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
+ BUS_DMASYNC_POSTREAD);
+ if (len < RX_COPY_THRESHOLD) {
+#ifdef T4_PKT_TIMESTAMP
+ /* Leave room for a timestamp */
+ m0->m_data += 8;
+#endif
+ bcopy(sd->cl + fl->rx_offset, mtod(m0, caddr_t), len);
+ m0->m_pkthdr.len = len;
+ m0->m_len = len;
+ } else {
+ m0->m_pkthdr.len = len;
+ m0->m_len = len;
+ m_extaddref(m0, sd->cl + fl->rx_offset,
+ roundup2(m0->m_len, fl_pad),
+ find_buf_refcnt(sd->cl), rxb_free,
+ FL_BUF_ZONE(sc, sd->tag_idx), sd->cl);
+ }
+ fl->rx_offset += len;
+ fl->rx_offset = roundup2(fl->rx_offset, fl_pad);
+ fl->rx_offset = roundup2(fl->rx_offset, pack_boundary);
+ if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) {
+ fl->rx_offset = 0;
+ (*fl_bufs_used) += 1;
+ if (__predict_false(++fl->cidx == fl->cap))
+ fl->cidx = 0;
+ }
+
+ return (m0);
+ }
+
+ KASSERT(len_newbuf & F_RSPD_NEWBUF,
+ ("%s: only new buffer handled here", __func__));
+
+ nbuf = 0;
+
+ /*
+ * Move to the start of the next buffer if we are still in the middle of
+ * some buffer. This is the case where there was some room left in the
+ * previous buffer but not enough to fit this frame in its entirety.
+ */
+ if (fl->rx_offset > 0) {
+ KASSERT(roundup2(len, fl_pad) > FL_BUF_SIZE(sc, sd->tag_idx) -
+ fl->rx_offset, ("%s: frame (%u bytes) should have fit at "
+ "cidx %u offset %u bufsize %u", __func__, len, fl->cidx,
+ fl->rx_offset, FL_BUF_SIZE(sc, sd->tag_idx)));
+ nbuf++;
+ fl->rx_offset = 0;
+ sd++;
+ if (__predict_false(++fl->cidx == fl->cap)) {
+ sd = fl->sdesc;
+ fl->cidx = 0;
+ }
+ }
+
+ m0 = find_buf_mbuf(sd->cl);
+ if (m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR | M_NOFREE))
+ goto done;
+ bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD);
+ m0->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx));
+ m_extaddref(m0, sd->cl, roundup2(m0->m_len, fl_pad),
+ find_buf_refcnt(sd->cl), rxb_free, FL_BUF_ZONE(sc, sd->tag_idx),
+ sd->cl);
+ m0->m_pkthdr.len = len;
+
+ fl->rx_offset = roundup2(m0->m_len, fl_pad);
+ fl->rx_offset = roundup2(fl->rx_offset, pack_boundary);
+ if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) {
+ fl->rx_offset = 0;
+ nbuf++;
+ sd++;
+ if (__predict_false(++fl->cidx == fl->cap)) {
+ sd = fl->sdesc;
+ fl->cidx = 0;
+ }
+ }
+
+ m = m0;
+ len -= m->m_len;
+
+ while (len > 0) {
+ m->m_next = find_buf_mbuf(sd->cl);
+ m = m->m_next;
+
+ bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
+ BUS_DMASYNC_POSTREAD);
+
+ /* m_init for !M_PKTHDR can't fail so don't bother */
+ m_init(m, NULL, 0, M_NOWAIT, MT_DATA, M_NOFREE);
+ m->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx));
+ m_extaddref(m, sd->cl, roundup2(m->m_len, fl_pad),
+ find_buf_refcnt(sd->cl), rxb_free,
+ FL_BUF_ZONE(sc, sd->tag_idx), sd->cl);
+
+ fl->rx_offset = roundup2(m->m_len, fl_pad);
+ fl->rx_offset = roundup2(fl->rx_offset, pack_boundary);
+ if (fl->rx_offset >= FL_BUF_SIZE(sc, sd->tag_idx)) {
+ fl->rx_offset = 0;
+ nbuf++;
+ sd++;
+ if (__predict_false(++fl->cidx == fl->cap)) {
+ sd = fl->sdesc;
+ fl->cidx = 0;
+ }
+ }
+
+ len -= m->m_len;
+ }
+done:
+ (*fl_bufs_used) += nbuf;
+ return (m0);
+}
+
+static struct mbuf *
+get_fl_payload2(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
+ int *fl_bufs_used)
+{
+ struct mbuf *m0, *m;
+ struct fl_sdesc *sd = &fl->sdesc[fl->cidx];
+ unsigned int nbuf, len;
+
+ /*
+ * No assertion for the fl lock because we don't need it. This routine
+ * is called only from the rx interrupt handler and it only updates
+ * fl->cidx. (Contrast that with fl->pidx/fl->needed which could be
+ * updated in the rx interrupt handler or the starvation helper routine.
+ * That's why code that manipulates fl->pidx/fl->needed needs the fl
+ * lock but this routine does not).
+ */
+
+ KASSERT((fl->flags & FL_BUF_PACKING) == 0,
+ ("%s: buffer packing enabled for fl %p", __func__, fl));
if (__predict_false((len_newbuf & F_RSPD_NEWBUF) == 0))
panic("%s: cannot handle packed frames", __func__);
len = G_RSPD_LEN(len_newbuf);
- m0 = sd->m;
- sd->m = NULL; /* consumed */
+ /*
+ * We never want to run out of mbufs in between a frame when a frame
+ * spans multiple fl buffers. If the fl's mbuf stash isn't full and
+ * can't be filled up to the brim then fail early.
+ */
+ if (len > FL_BUF_SIZE(sc, sd->tag_idx) && fill_mbuf_stash(fl) != 0)
+ return (NULL);
+
+ m0 = get_mbuf_from_stash(fl);
+ if (m0 == NULL ||
+ m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR) != 0) {
+ return_mbuf_to_stash(fl, m0);
+ return (NULL);
+ }
bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map, BUS_DMASYNC_POSTREAD);
- m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR);
-#ifdef T4_PKT_TIMESTAMP
- /* Leave room for a timestamp */
- m0->m_data += 8;
-#endif
if (len < RX_COPY_THRESHOLD) {
+#ifdef T4_PKT_TIMESTAMP
+ /* Leave room for a timestamp */
+ m0->m_data += 8;
+#endif
/* copy data to mbuf, buffer will be recycled */
bcopy(sd->cl, mtod(m0, caddr_t), len);
m0->m_len = len;
} else {
bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
- m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx));
+ m_cljset(m0, sd->cl, FL_BUF_TYPE(sc, sd->tag_idx));
sd->cl = NULL; /* consumed */
- m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
+ m0->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx));
}
m0->m_pkthdr.len = len;
@@ -1205,23 +1590,23 @@ get_fl_payload(struct adapter *sc, struc
nbuf = 1; /* # of fl buffers used */
while (len > 0) {
- m->m_next = sd->m;
- sd->m = NULL; /* consumed */
+ /* Can't fail, we checked earlier that the stash was full. */
+ m->m_next = get_mbuf_from_stash(fl);
m = m->m_next;
bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
BUS_DMASYNC_POSTREAD);
+ /* m_init for !M_PKTHDR can't fail so don't bother */
m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
if (len <= MLEN) {
bcopy(sd->cl, mtod(m, caddr_t), len);
m->m_len = len;
} else {
- bus_dmamap_unload(fl->tag[sd->tag_idx],
- sd->map);
- m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx));
+ bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
+ m_cljset(m, sd->cl, FL_BUF_TYPE(sc, sd->tag_idx));
sd->cl = NULL; /* consumed */
- m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
+ m->m_len = min(len, FL_BUF_SIZE(sc, sd->tag_idx));
}
sd++;
@@ -1586,6 +1971,7 @@ void
t4_update_fl_bufsize(struct ifnet *ifp)
{
struct port_info *pi = ifp->if_softc;
+ struct adapter *sc = pi->adapter;
struct sge_rxq *rxq;
#ifdef TCP_OFFLOAD
struct sge_ofld_rxq *ofld_rxq;
@@ -1598,7 +1984,7 @@ t4_update_fl_bufsize(struct ifnet *ifp)
fl = &rxq->fl;
FL_LOCK(fl);
- set_fl_tag_idx(fl, bufsize);
+ set_fl_tag_idx(sc, fl, bufsize);
FL_UNLOCK(fl);
}
#ifdef TCP_OFFLOAD
@@ -1607,7 +1993,7 @@ t4_update_fl_bufsize(struct ifnet *ifp)
fl = &ofld_rxq->fl;
FL_LOCK(fl);
- set_fl_tag_idx(fl, bufsize);
+ set_fl_tag_idx(sc, fl, bufsize);
FL_UNLOCK(fl);
}
#endif
@@ -1641,11 +2027,15 @@ init_iq(struct sge_iq *iq, struct adapte
}
static inline void
-init_fl(struct sge_fl *fl, int qsize, int bufsize, char *name)
+init_fl(struct adapter *sc, struct sge_fl *fl, int qsize, int bufsize, int pack,
+ char *name)
{
+
fl->qsize = qsize;
strlcpy(fl->lockname, name, sizeof(fl->lockname));
- set_fl_tag_idx(fl, bufsize);
+ if (pack)
+ fl->flags |= FL_BUF_PACKING;
+ set_fl_tag_idx(sc, fl, bufsize);
}
static inline void
@@ -1774,7 +2164,7 @@ alloc_iq_fl(struct port_info *pi, struct
if (fl) {
mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
- for (i = 0; i < FL_BUF_SIZES; i++) {
+ for (i = 0; i < FL_BUF_SIZES(sc); i++) {
/*
* A freelist buffer must be 16 byte aligned as the SGE
@@ -1783,8 +2173,8 @@ alloc_iq_fl(struct port_info *pi, struct
*/
rc = bus_dma_tag_create(sc->dmat, 16, 0,
BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
- FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW,
- NULL, NULL, &fl->tag[i]);
+ FL_BUF_SIZE(sc, i), 1, FL_BUF_SIZE(sc, i),
+ BUS_DMA_ALLOCNOW, NULL, NULL, &fl->tag[i]);
if (rc != 0) {
device_printf(sc->dev,
"failed to create fl DMA tag[%d]: %d\n",
@@ -1813,7 +2203,9 @@ alloc_iq_fl(struct port_info *pi, struct
c.iqns_to_fl0congen |=
htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
- F_FW_IQ_CMD_FL0PADEN);
+ (fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
+ (fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN :
+ 0));
if (cong >= 0) {
c.iqns_to_fl0congen |=
htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
@@ -1934,12 +2326,21 @@ free_iq_fl(struct port_info *pi, struct
fl->desc);
if (fl->sdesc)
- free_fl_sdesc(fl);
+ free_fl_sdesc(sc, fl);
+
+ for (i = 0; i < nitems(fl->mstash); i++) {
+ struct mbuf *m = fl->mstash[i];
+
+ if (m != NULL) {
+ m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
+ m_free(m);
+ }
+ }
if (mtx_initialized(&fl->fl_lock))
mtx_destroy(&fl->fl_lock);
- for (i = 0; i < FL_BUF_SIZES; i++) {
+ for (i = 0; i < FL_BUF_SIZES(sc); i++) {
if (fl->tag[i])
bus_dma_tag_destroy(fl->tag[i]);
}
@@ -2100,6 +2501,10 @@ alloc_rxq(struct port_info *pi, struct s
"SGE context id of the queue");
SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
&rxq->fl.cidx, 0, "consumer index");
+ if (rxq->fl.flags & FL_BUF_PACKING) {
+ SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "rx_offset",
+ CTLFLAG_RD, &rxq->fl.rx_offset, 0, "packing rx offset");
+ }
SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
&rxq->fl.pidx, 0, "producer index");
@@ -2661,6 +3066,12 @@ refill_fl(struct adapter *sc, struct sge
int rc;
FL_LOCK_ASSERT_OWNED(fl);
+#ifdef INVARIANTS
+ if (fl->flags & FL_BUF_PACKING)
+ KASSERT(sd->tag_idx == 0,
+ ("%s: expected tag 0 but found tag %d at pidx %u instead",
+ __func__, sd->tag_idx, fl->pidx));
+#endif
if (nbufs > fl->needed)
nbufs = fl->needed;
@@ -2669,24 +3080,34 @@ refill_fl(struct adapter *sc, struct sge
if (sd->cl != NULL) {
- /*
- * This happens when a frame small enough to fit
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-stable-9
mailing list