svn commit: r240169 - in stable/9/sys: dev/cxgb/ulp/tom dev/cxgbe dev/cxgbe/common dev/cxgbe/firmware dev/cxgbe/tom modules/cxgbe/if_cxgbe modules/cxgbe/tom

Navdeep Parhar np at FreeBSD.org
Thu Sep 6 17:28:48 UTC 2012


Author: np
Date: Thu Sep  6 17:28:47 2012
New Revision: 240169
URL: http://svn.freebsd.org/changeset/base/240169

Log:
  MFC many cxgb and cxgbe features and fixes (r239258, r239259, r239264,
  r239266, r239336, r239338, r239339, r239341, r239344, r239514, r239527,
  r239528, r239544.
  
  r239258:
  Convert some fixed parameters to tunables (with reasonable default
  values).
  
  - cong_drop specifies what to do on congestion: nothing, backpressure,
    or drop.
  - fl_pktshift specifies the padding before Ethernet payload.
  - fl_pad specifies the boundary upto which to pad Ethernet payload.
  - spg_len controls the length of the status page.
  
  r239259:
  if_iqdrops should include frames truncated within the chip.
  
  r239264:
  Assume INET, INET6, and TCP_OFFLOAD when the driver is built out of tree and
  KERNBUILDDIR is not set.
  
  r239266:
  The size of the buffers in an Ethernet freelist has to be higher than the
  interface's MTU.  Initialize such freelists with correct values.
  
  This wasn't a problem for common MTUs (1500 and 9000) as the buffers (2048
  and 9216 in size) happened to have enough spare room.  I ran into it when
  playing around with unusual MTUs.
  
  r239336:
  Allow for a different handler for each type of firmware message.
  
  r239338:
  Add a routine (t4_set_tcb_field) to update arbitrary parts of a hardware
  TCB.  Filters are programmed by modifying the TCB too (via a different
  routine) and the reply to any TCB update is delivered via a
  CPL_SET_TCB_RPL.  Figure out whether the reply is for a filter-write or
  something else and route it appropriately.
  
  r239339:
  Make room for DDP page pods in the default configuration profile.  While
  here, bump up the L2 table's size to 4K entries.
  
  r239341:
  Initialize various DDP parameters in the main cxgbe(4) driver:
  
  - Setup multiple DDP page sizes.  When the driver attempts DDP it will
    try to combine physically contiguous pages into regions of these sizes.
  
  - Set the indicate size such that the payload carried in the indicate can
    be copied in the header mbuf (and the 16K rx buffer can be recycled).
  
  - Set DDP threshold to the max payload that the chip will coalesce and
    deliver to the driver (this is ~16K by default, which is also why the
    offload rx queue is backed by 16K buffers).  If the chip is able to
    coalesce up to the max it's allowed to, it's a good sign that the peer
    is transmitting in bulk without any TCP PSH.
  
  r239344:
  Support for TCP DDP (Direct Data Placement) in the T4 TOE module.
  
  Basically, this is automatic rx zero copy when feasible.  TCP payload is
  DMA'd directly into the userspace buffer described by the uio submitted
  in soreceive by an application.
  
  - Works with sockets that are being handled by the TCP offload engine
    of a T4 chip (you need t4_tom.ko module loaded after cxgbe, and an
    "ifconfig +toe" on the cxgbe interface).
  - Does not require any modification to the application.
  - Not enabled by default.  Use hw.t4nex.<X>.toe.ddp="1" to enable it.
  
  r239514:
  Minor cleanup: use bitwise ops instead of pointless wrappers around
  setbit/clrbit.
  
  r239527:
  Cannot hold a mutex around vm_fault_quick_hold_pages, so don't.  Tweak
  some comments while here.
  
  r239528:
  Avoid a NULL pointer dereference.
  
  r239544:
  Deal with the case where a syncache entry added by the TOE driver is
  evicted from the syncache but a later syncache_expand succeeds because
  of syncookies.  The TOE driver has to resort to more direct means to
  install its hooks in the socket in this case.

Added:
  stable/9/sys/dev/cxgbe/tom/t4_ddp.c
     - copied, changed from r239344, head/sys/dev/cxgbe/tom/t4_ddp.c
Modified:
  stable/9/sys/dev/cxgb/ulp/tom/cxgb_listen.c
  stable/9/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
  stable/9/sys/dev/cxgbe/adapter.h
  stable/9/sys/dev/cxgbe/common/t4_hw.h
  stable/9/sys/dev/cxgbe/common/t4_msg.h
  stable/9/sys/dev/cxgbe/firmware/t4fw_cfg.txt
  stable/9/sys/dev/cxgbe/offload.h
  stable/9/sys/dev/cxgbe/t4_main.c
  stable/9/sys/dev/cxgbe/t4_sge.c
  stable/9/sys/dev/cxgbe/tom/t4_connect.c
  stable/9/sys/dev/cxgbe/tom/t4_cpl_io.c
  stable/9/sys/dev/cxgbe/tom/t4_listen.c
  stable/9/sys/dev/cxgbe/tom/t4_tom.c
  stable/9/sys/dev/cxgbe/tom/t4_tom.h
  stable/9/sys/modules/cxgbe/if_cxgbe/Makefile
  stable/9/sys/modules/cxgbe/tom/Makefile
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/dev/   (props changed)
  stable/9/sys/modules/   (props changed)

Modified: stable/9/sys/dev/cxgb/ulp/tom/cxgb_listen.c
==============================================================================
--- stable/9/sys/dev/cxgb/ulp/tom/cxgb_listen.c	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgb/ulp/tom/cxgb_listen.c	Thu Sep  6 17:28:47 2012	(r240169)
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/ip.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
+#include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>
@@ -759,6 +760,15 @@ reset:
 		goto reset;
 	}
 
+	if (__predict_false(!(synqe->flags & TP_SYNQE_EXPANDED))) {
+		struct inpcb *new_inp = sotoinpcb(so);
+
+		INP_WLOCK(new_inp);
+		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
+		t3_offload_socket(tod, synqe, so);
+		INP_WUNLOCK(new_inp);
+	}
+
 	/* Remove the synq entry and release its reference on the lctx */
 	TAILQ_REMOVE(&lctx->synq, synqe, link);
 	inp = release_lctx(td, lctx);
@@ -1136,5 +1146,6 @@ t3_offload_socket(struct toedev *tod, vo
 	offload_socket(so, toep);
 	make_established(so, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt);
 	update_tid(td, toep, synqe->tid);
+	synqe->flags |= TP_SYNQE_EXPANDED;
 }
 #endif

Modified: stable/9/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h
==============================================================================
--- stable/9/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgb/ulp/tom/cxgb_toepcb.h	Thu Sep  6 17:28:47 2012	(r240169)
@@ -44,6 +44,7 @@
 #define TP_IS_A_SYNQ_ENTRY	(1 << 9)
 #define TP_ABORT_RPL_SENT	(1 << 10)
 #define TP_SEND_FIN          	(1 << 11)
+#define TP_SYNQE_EXPANDED	(1 << 12)
 
 struct toepcb {
 	TAILQ_ENTRY(toepcb) link; /* toep_list */

Modified: stable/9/sys/dev/cxgbe/adapter.h
==============================================================================
--- stable/9/sys/dev/cxgbe/adapter.h	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgbe/adapter.h	Thu Sep  6 17:28:47 2012	(r240169)
@@ -135,6 +135,7 @@ enum {
 #else
 	FL_BUF_SIZES = 3,	/* cluster, jumbo9k, jumbo16k */
 #endif
+	OFLD_BUF_SIZE = MJUM16BYTES,	/* size of fl buffer for TOE rxq */
 
 	CTRL_EQ_QSIZE = 128,
 
@@ -143,6 +144,12 @@ enum {
 	TX_WR_FLITS = SGE_MAX_WR_LEN / 8
 };
 
+#ifdef T4_PKT_TIMESTAMP
+#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
+#else
+#define RX_COPY_THRESHOLD MINCLSIZE
+#endif
+
 enum {
 	/* adapter intr_type */
 	INTR_INTX	= (1 << 0),
@@ -510,6 +517,7 @@ struct rss_header;
 typedef int (*cpl_handler_t)(struct sge_iq *, const struct rss_header *,
     struct mbuf *);
 typedef int (*an_handler_t)(struct sge_iq *, const struct rsp_ctrl *);
+typedef int (*fw_msg_handler_t)(struct adapter *, const __be64 *);
 
 struct adapter {
 	SLIST_ENTRY(adapter) link;
@@ -582,7 +590,8 @@ struct adapter {
 	struct callout sfl_callout;
 
 	an_handler_t an_handler __aligned(CACHE_LINE_SIZE);
-	cpl_handler_t cpl_handler[256];
+	fw_msg_handler_t fw_msg_handler[4];	/* NUM_FW6_TYPES */
+	cpl_handler_t cpl_handler[0xef];	/* NUM_CPL_CMDS */
 };
 
 #define ADAPTER_LOCK(sc)		mtx_lock(&(sc)->sc_lock)
@@ -741,6 +750,8 @@ void t4_os_link_changed(struct adapter *
 void t4_iterate(void (*)(struct adapter *, void *), void *);
 int t4_register_cpl_handler(struct adapter *, int, cpl_handler_t);
 int t4_register_an_handler(struct adapter *, an_handler_t);
+int t4_register_fw_msg_handler(struct adapter *, int, fw_msg_handler_t);
+int t4_filter_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *);
 
 /* t4_sge.c */
 void t4_sge_modload(void);

Modified: stable/9/sys/dev/cxgbe/common/t4_hw.h
==============================================================================
--- stable/9/sys/dev/cxgbe/common/t4_hw.h	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgbe/common/t4_hw.h	Thu Sep  6 17:28:47 2012	(r240169)
@@ -161,10 +161,12 @@ struct pagepod {
 #define S_PPOD_TAG    6
 #define M_PPOD_TAG    0xFFFFFF
 #define V_PPOD_TAG(x) ((x) << S_PPOD_TAG)
+#define G_PPOD_TAG(x) (((x) >> S_PPOD_TAG) & M_PPOD_TAG)
 
 #define S_PPOD_PGSZ    30
 #define M_PPOD_PGSZ    0x3
 #define V_PPOD_PGSZ(x) ((x) << S_PPOD_PGSZ)
+#define G_PPOD_PGSZ(x) (((x) >> S_PPOD_PGSZ) & M_PPOD_PGSZ)
 
 #define S_PPOD_TID    32
 #define M_PPOD_TID    0xFFFFFF

Modified: stable/9/sys/dev/cxgbe/common/t4_msg.h
==============================================================================
--- stable/9/sys/dev/cxgbe/common/t4_msg.h	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgbe/common/t4_msg.h	Thu Sep  6 17:28:47 2012	(r240169)
@@ -792,6 +792,14 @@ struct cpl_set_tcb_field {
 	__be64 val;
 };
 
+struct cpl_set_tcb_field_core {
+	union opcode_tid ot;
+	__be16 reply_ctrl;
+	__be16 word_cookie;
+	__be64 mask;
+	__be64 val;
+};
+
 /* cpl_set_tcb_field.word_cookie fields */
 #define S_WORD    0
 #define M_WORD    0x1F
@@ -1376,6 +1384,11 @@ struct cpl_rx_data_ack {
 	__be32 credit_dack;
 };
 
+struct cpl_rx_data_ack_core {
+	union opcode_tid ot;
+	__be32 credit_dack;
+};
+
 /* cpl_rx_data_ack.ack_seq fields */
 #define S_RX_CREDITS    0
 #define M_RX_CREDITS    0x3FFFFFF
@@ -2281,6 +2294,8 @@ enum {
 	FW6_TYPE_WR_RPL = 1,
 	FW6_TYPE_CQE = 2,
 	FW6_TYPE_OFLD_CONNECTION_WR_RPL = 3,
+
+	NUM_FW6_TYPES
 };
 
 struct cpl_fw6_msg_ofld_connection_wr_rpl {

Modified: stable/9/sys/dev/cxgbe/firmware/t4fw_cfg.txt
==============================================================================
--- stable/9/sys/dev/cxgbe/firmware/t4fw_cfg.txt	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgbe/firmware/t4fw_cfg.txt	Thu Sep  6 17:28:47 2012	(r240169)
@@ -20,7 +20,7 @@
 	filterMode = fragmentation, mpshittype, protocol, vlan, port, fcoe
 
 	# TP rx and tx payload memory (% of the total EDRAM + DDR3).
-	tp_pmrx = 40
+	tp_pmrx = 38
 	tp_pmtx = 60
 	tp_pmrx_pagesize = 64K
 	tp_pmtx_pagesize = 64K
@@ -67,7 +67,8 @@
 	# driver will mask off features it won't use
 	protocol = ofld
 
-	tp_l2t = 100
+	tp_l2t = 4096
+	tp_ddp = 2
 
 	# TCAM has 8K cells; each region must start at a multiple of 128 cell.
 	# Each entry in these categories takes 4 cells each.  nhash will use the
@@ -136,7 +137,7 @@
 
 [fini]
 	version = 0x1
-	checksum = 0xdb5813f9
+	checksum = 0x162df193
 #
 # $FreeBSD$
 #

Modified: stable/9/sys/dev/cxgbe/offload.h
==============================================================================
--- stable/9/sys/dev/cxgbe/offload.h	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgbe/offload.h	Thu Sep  6 17:28:47 2012	(r240169)
@@ -31,13 +31,16 @@
 #ifndef __T4_OFFLOAD_H__
 #define __T4_OFFLOAD_H__
 
-#define INIT_ULPTX_WR(w, wrlen, atomic, tid) do { \
-	(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
-	(w)->wr.wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
+#define INIT_ULPTX_WRH(w, wrlen, atomic, tid) do { \
+	(w)->wr_hi = htonl(V_FW_WR_OP(FW_ULPTX_WR) | V_FW_WR_ATOMIC(atomic)); \
+	(w)->wr_mid = htonl(V_FW_WR_LEN16(DIV_ROUND_UP(wrlen, 16)) | \
 			       V_FW_WR_FLOWID(tid)); \
-	(w)->wr.wr_lo = cpu_to_be64(0); \
+	(w)->wr_lo = cpu_to_be64(0); \
 } while (0)
 
+#define INIT_ULPTX_WR(w, wrlen, atomic, tid) \
+    INIT_ULPTX_WRH(&((w)->wr), wrlen, atomic, tid)
+
 #define INIT_TP_WR(w, tid) do { \
 	(w)->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | \
                               V_FW_WR_IMMDLEN(sizeof(*w) - sizeof(w->wr))); \

Modified: stable/9/sys/dev/cxgbe/t4_main.c
==============================================================================
--- stable/9/sys/dev/cxgbe/t4_main.c	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgbe/t4_main.c	Thu Sep  6 17:28:47 2012	(r240169)
@@ -306,6 +306,7 @@ static void cxgbe_vlan_config(void *, st
 static int cpl_not_handled(struct sge_iq *, const struct rss_header *,
     struct mbuf *);
 static int an_not_handled(struct sge_iq *, const struct rsp_ctrl *);
+static int fw_msg_not_handled(struct adapter *, const __be64 *);
 static int t4_sysctls(struct adapter *);
 static int cxgbe_sysctls(struct port_info *);
 static int sysctl_int_array(SYSCTL_HANDLER_ARGS);
@@ -345,8 +346,6 @@ static int del_filter(struct adapter *, 
 static void clear_filter(struct filter_entry *);
 static int set_filter_wr(struct adapter *, int);
 static int del_filter_wr(struct adapter *, int);
-static int filter_rpl(struct sge_iq *, const struct rss_header *,
-    struct mbuf *);
 static int get_sge_context(struct adapter *, struct t4_sge_context *);
 static int read_card_mem(struct adapter *, struct t4_mem_range *);
 #ifdef TCP_OFFLOAD
@@ -381,6 +380,10 @@ CTASSERT(offsetof(struct sge_ofld_rxq, i
 CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl));
 #endif
 
+/* No easy way to include t4_msg.h before adapter.h so we check this way */
+CTASSERT(ARRAY_SIZE(((struct adapter *)0)->cpl_handler) == NUM_CPL_CMDS);
+CTASSERT(ARRAY_SIZE(((struct adapter *)0)->fw_msg_handler) == NUM_FW6_TYPES);
+
 static int
 t4_probe(device_t dev)
 {
@@ -458,7 +461,9 @@ t4_attach(device_t dev)
 	sc->an_handler = an_not_handled;
 	for (i = 0; i < ARRAY_SIZE(sc->cpl_handler); i++)
 		sc->cpl_handler[i] = cpl_not_handled;
-	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, filter_rpl);
+	for (i = 0; i < ARRAY_SIZE(sc->fw_msg_handler); i++)
+		sc->fw_msg_handler[i] = fw_msg_not_handled;
+	t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl);
 
 	/* Prepare the adapter for operation */
 	rc = -t4_prep_adapter(sc);
@@ -510,18 +515,24 @@ t4_attach(device_t dev)
 		goto done; /* error message displayed already */
 
 	if (sc->flags & MASTER_PF) {
+		uint16_t indsz = min(RX_COPY_THRESHOLD - 1, M_INDICATESIZE);
 
 		/* final tweaks to some settings */
 
 		t4_load_mtus(sc, sc->params.mtus, sc->params.a_wnd,
 		    sc->params.b_wnd);
-		t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
+		/* 4K, 16K, 64K, 256K DDP "page sizes" */
+		t4_write_reg(sc, A_ULP_RX_TDDP_PSZ, V_HPZ0(0) | V_HPZ1(2) |
+		    V_HPZ2(4) | V_HPZ3(6));
+		t4_set_reg_field(sc, A_ULP_RX_CTL, F_TDDPTAGTCB, F_TDDPTAGTCB);
 		t4_set_reg_field(sc, A_TP_PARA_REG3, F_TUNNELCNGDROP0 |
-		    F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | F_TUNNELCNGDROP3, 0);
+		    F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 | F_TUNNELCNGDROP3,
+		    F_TUNNELCNGDROP0 | F_TUNNELCNGDROP1 | F_TUNNELCNGDROP2 |
+		    F_TUNNELCNGDROP3);
 		t4_set_reg_field(sc, A_TP_PARA_REG5,
 		    V_INDICATESIZE(M_INDICATESIZE) |
 		    F_REARMDDPOFFSET | F_RESETDDPOFFSET,
-		    V_INDICATESIZE(M_INDICATESIZE) |
+		    V_INDICATESIZE(indsz) |
 		    F_REARMDDPOFFSET | F_RESETDDPOFFSET);
 	} else {
 		/*
@@ -2942,7 +2953,8 @@ cxgbe_tick(void *arg)
 	ifp->if_omcasts = s->tx_mcast_frames - s->tx_pause;
 	ifp->if_imcasts = s->rx_mcast_frames - s->rx_pause;
 	ifp->if_iqdrops = s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 +
-	    s->rx_ovflow3;
+	    s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 +
+	    s->rx_trunc3;
 
 	drops = s->tx_drop;
 	for_each_txq(pi, i, txq)
@@ -2977,7 +2989,7 @@ cpl_not_handled(struct sge_iq *iq, const
 	panic("%s: opcode 0x%02x on iq %p with payload %p",
 	    __func__, rss->opcode, iq, m);
 #else
-	log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p",
+	log(LOG_ERR, "%s: opcode 0x%02x on iq %p with payload %p\n",
 	    __func__, rss->opcode, iq, m);
 	m_freem(m);
 #endif
@@ -3006,7 +3018,7 @@ an_not_handled(struct sge_iq *iq, const 
 #ifdef INVARIANTS
 	panic("%s: async notification on iq %p (ctrl %p)", __func__, iq, ctrl);
 #else
-	log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)",
+	log(LOG_ERR, "%s: async notification on iq %p (ctrl %p)\n",
 	    __func__, iq, ctrl);
 #endif
 	return (EDOOFUS);
@@ -3025,6 +3037,35 @@ t4_register_an_handler(struct adapter *s
 }
 
 static int
+fw_msg_not_handled(struct adapter *sc, const __be64 *rpl)
+{
+	__be64 *r = __DECONST(__be64 *, rpl);
+	struct cpl_fw6_msg *cpl = member2struct(cpl_fw6_msg, data, r);
+
+#ifdef INVARIANTS
+	panic("%s: fw_msg type %d", __func__, cpl->type);
+#else
+	log(LOG_ERR, "%s: fw_msg type %d\n", __func__, cpl->type);
+#endif
+	return (EDOOFUS);
+}
+
+int
+t4_register_fw_msg_handler(struct adapter *sc, int type, fw_msg_handler_t h)
+{
+	uintptr_t *loc, new;
+
+	if (type >= ARRAY_SIZE(sc->fw_msg_handler))
+		return (EINVAL);
+
+	new = h ? (uintptr_t)h : (uintptr_t)fw_msg_not_handled;
+	loc = (uintptr_t *) &sc->fw_msg_handler[type];
+	atomic_store_rel_ptr(loc, new);
+
+	return (0);
+}
+
+static int
 t4_sysctls(struct adapter *sc)
 {
 	struct sysctl_ctx_list *ctx;
@@ -3191,10 +3232,13 @@ t4_sysctls(struct adapter *sc)
 		sc->tt.ddp = 0;
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW,
 		    &sc->tt.ddp, 0, "DDP allowed");
-		sc->tt.indsz = M_INDICATESIZE;
+
+		sc->tt.indsz = G_INDICATESIZE(t4_read_reg(sc, A_TP_PARA_REG5));
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "indsz", CTLFLAG_RW,
 		    &sc->tt.indsz, 0, "DDP max indicate size allowed");
-		sc->tt.ddp_thres = 3*4096;
+
+		sc->tt.ddp_thres =
+		    G_RXCOALESCESIZE(t4_read_reg(sc, A_TP_PARA_REG2));
 		SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp_thres", CTLFLAG_RW,
 		    &sc->tt.ddp_thres, 0, "DDP threshold");
 	}
@@ -4961,8 +5005,8 @@ del_filter_wr(struct adapter *sc, int fi
 	return (0);
 }
 
-static int
-filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+int
+t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
 	struct adapter *sc = iq->adapter;
 	const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1);

Modified: stable/9/sys/dev/cxgbe/t4_sge.c
==============================================================================
--- stable/9/sys/dev/cxgbe/t4_sge.c	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgbe/t4_sge.c	Thu Sep  6 17:28:47 2012	(r240169)
@@ -68,12 +68,37 @@ static struct fl_buf_info fl_buf_info[FL
 #define FL_BUF_TYPE(x)	(fl_buf_info[x].type)
 #define FL_BUF_ZONE(x)	(fl_buf_info[x].zone)
 
-enum {
-	FL_PKTSHIFT = 2
-};
+/*
+ * Ethernet frames are DMA'd at this byte offset into the freelist buffer.
+ * 0-7 are valid values.
+ */
+static int fl_pktshift = 2;
+TUNABLE_INT("hw.cxgbe.fl_pktshift", &fl_pktshift);
 
-static int fl_pad = CACHE_LINE_SIZE;
-static int spg_len = 64;
+/*
+ * Pad ethernet payload up to this boundary.
+ * -1: driver should figure out a good value.
+ *  Any power of 2, from 32 to 4096 (both inclusive) is a valid value.
+ */
+static int fl_pad = -1;
+TUNABLE_INT("hw.cxgbe.fl_pad", &fl_pad);
+
+/*
+ * Status page length.
+ * -1: driver should figure out a good value.
+ *  64 or 128 are the only other valid values.
+ */
+static int spg_len = -1;
+TUNABLE_INT("hw.cxgbe.spg_len", &spg_len);
+
+/*
+ * Congestion drops.
+ * -1: no congestion feedback (not recommended).
+ *  0: backpressure the channel instead of dropping packets right away.
+ *  1: no backpressure, drop packets for the congested queue immediately.
+ */
+static int cong_drop = 0;
+TUNABLE_INT("hw.cxgbe.cong_drop", &cong_drop);
 
 /* Used to track coalesced tx work request */
 struct txpkts {
@@ -160,7 +185,7 @@ static void write_eqflush_wr(struct sge_
 static __be64 get_flit(bus_dma_segment_t *, int, int);
 static int handle_sge_egr_update(struct sge_iq *, const struct rss_header *,
     struct mbuf *);
-static int handle_fw_rpl(struct sge_iq *, const struct rss_header *,
+static int handle_fw_msg(struct sge_iq *, const struct rss_header *,
     struct mbuf *);
 
 static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
@@ -170,7 +195,8 @@ extern u_int cpu_clflush_line_size;
 #endif
 
 /*
- * Called on MOD_LOAD and fills up fl_buf_info[].
+ * Called on MOD_LOAD.  Fills up fl_buf_info[] and validates/calculates the SGE
+ * tunables.
  */
 void
 t4_sge_modload(void)
@@ -191,10 +217,49 @@ t4_sge_modload(void)
 		FL_BUF_ZONE(i) = m_getzone(bufsize[i]);
 	}
 
+	if (fl_pktshift < 0 || fl_pktshift > 7) {
+		printf("Invalid hw.cxgbe.fl_pktshift value (%d),"
+		    " using 2 instead.\n", fl_pktshift);
+		fl_pktshift = 2;
+	}
+
+	if (fl_pad < 32 || fl_pad > 4096 || !powerof2(fl_pad)) {
+		int pad;
+
 #if defined(__i386__) || defined(__amd64__)
-	fl_pad = max(cpu_clflush_line_size, 32);
-	spg_len = cpu_clflush_line_size > 64 ? 128 : 64;
+		pad = max(cpu_clflush_line_size, 32);
+#else
+		pad = max(CACHE_LINE_SIZE, 32);
 #endif
+		pad = min(pad, 4096);
+
+		if (fl_pad != -1) {
+			printf("Invalid hw.cxgbe.fl_pad value (%d),"
+			    " using %d instead.\n", fl_pad, pad);
+		}
+		fl_pad = pad;
+	}
+
+	if (spg_len != 64 && spg_len != 128) {
+		int len;
+
+#if defined(__i386__) || defined(__amd64__)
+		len = cpu_clflush_line_size > 64 ? 128 : 64;
+#else
+		len = 64;
+#endif
+		if (spg_len != -1) {
+			printf("Invalid hw.cxgbe.spg_len value (%d),"
+			    " using %d instead.\n", spg_len, len);
+		}
+		spg_len = len;
+	}
+
+	if (cong_drop < -1 || cong_drop > 1) {
+		printf("Invalid hw.cxgbe.cong_drop value (%d),"
+		    " using 0 instead.\n", cong_drop);
+		cong_drop = 0;
+	}
 }
 
 /**
@@ -215,7 +280,7 @@ t4_sge_init(struct adapter *sc)
 	ctrl_mask = V_PKTSHIFT(M_PKTSHIFT) | F_RXPKTCPLMODE |
 	    V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
 	    F_EGRSTATUSPAGESIZE;
-	ctrl_val = V_PKTSHIFT(FL_PKTSHIFT) | F_RXPKTCPLMODE |
+	ctrl_val = V_PKTSHIFT(fl_pktshift) | F_RXPKTCPLMODE |
 	    V_INGPADBOUNDARY(ilog2(fl_pad) - 5) |
 	    V_EGRSTATUSPAGESIZE(spg_len == 128);
 
@@ -296,11 +361,13 @@ t4_sge_init(struct adapter *sc)
 	sc->sge.timer_val[4] = G_TIMERVALUE4(v) / core_ticks_per_usec(sc);
 	sc->sge.timer_val[5] = G_TIMERVALUE5(v) / core_ticks_per_usec(sc);
 
-	t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_rpl);
-	t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_rpl);
+	t4_register_cpl_handler(sc, CPL_FW4_MSG, handle_fw_msg);
+	t4_register_cpl_handler(sc, CPL_FW6_MSG, handle_fw_msg);
 	t4_register_cpl_handler(sc, CPL_SGE_EGR_UPDATE, handle_sge_egr_update);
 	t4_register_cpl_handler(sc, CPL_RX_PKT, t4_eth_rx);
 
+	t4_register_fw_msg_handler(sc, FW6_TYPE_CMD_RPL, t4_handle_fw_rpl);
+
 	return (rc);
 }
 
@@ -477,6 +544,18 @@ port_intr_iq(struct port_info *pi, int i
 	return (iq);
 }
 
+static inline int
+mtu_to_bufsize(int mtu)
+{
+	int bufsize;
+
+	/* large enough for a frame even when VLAN extraction is disabled */
+	bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + mtu;
+	bufsize = roundup(bufsize + fl_pktshift, fl_pad);
+
+	return (bufsize);
+}
+
 int
 t4_setup_port_queues(struct port_info *pi)
 {
@@ -493,6 +572,7 @@ t4_setup_port_queues(struct port_info *p
 	struct adapter *sc = pi->adapter;
 	struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
 	struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
+	int bufsize = mtu_to_bufsize(pi->ifp->if_mtu);
 
 	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "rxq", CTLFLAG_RD,
 	    NULL, "rx queues");
@@ -522,7 +602,7 @@ t4_setup_port_queues(struct port_info *p
 
 		snprintf(name, sizeof(name), "%s rxq%d-fl",
 		    device_get_nameunit(pi->dev), i);
-		init_fl(&rxq->fl, pi->qsize_rxq / 8, pi->ifp->if_mtu, name);
+		init_fl(&rxq->fl, pi->qsize_rxq / 8, bufsize, name);
 
 		if (sc->flags & INTR_DIRECT
 #ifdef TCP_OFFLOAD
@@ -547,7 +627,7 @@ t4_setup_port_queues(struct port_info *p
 
 		snprintf(name, sizeof(name), "%s ofld_rxq%d-fl",
 		    device_get_nameunit(pi->dev), i);
-		init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, MJUM16BYTES, name);
+		init_fl(&ofld_rxq->fl, pi->qsize_rxq / 8, OFLD_BUF_SIZE, name);
 
 		if (sc->flags & INTR_DIRECT ||
 		    (sc->intr_count > 1 && pi->nofldrxq > pi->nrxq)) {
@@ -942,13 +1022,6 @@ service_iq(struct sge_iq *iq, int budget
 	return (0);
 }
 
-
-#ifdef T4_PKT_TIMESTAMP
-#define RX_COPY_THRESHOLD (MINCLSIZE - 8)
-#else
-#define RX_COPY_THRESHOLD MINCLSIZE
-#endif
-
 static struct mbuf *
 get_fl_payload(struct adapter *sc, struct sge_fl *fl, uint32_t len_newbuf,
     int *fl_bufs_used)
@@ -1050,9 +1123,9 @@ t4_eth_rx(struct sge_iq *iq, const struc
 	KASSERT(m0 != NULL, ("%s: no payload with opcode %02x", __func__,
 	    rss->opcode));
 
-	m0->m_pkthdr.len -= FL_PKTSHIFT;
-	m0->m_len -= FL_PKTSHIFT;
-	m0->m_data += FL_PKTSHIFT;
+	m0->m_pkthdr.len -= fl_pktshift;
+	m0->m_len -= fl_pktshift;
+	m0->m_data += fl_pktshift;
 
 	m0->m_pkthdr.rcvif = ifp;
 	m0->m_flags |= M_FLOWID;
@@ -1386,11 +1459,8 @@ t4_update_fl_bufsize(struct ifnet *ifp)
 	struct port_info *pi = ifp->if_softc;
 	struct sge_rxq *rxq;
 	struct sge_fl *fl;
-	int i, bufsize;
+	int i, bufsize = mtu_to_bufsize(ifp->if_mtu);
 
-	/* large enough for a frame even when VLAN extraction is disabled */
-	bufsize = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ifp->if_mtu;
-	bufsize = roundup(bufsize + FL_PKTSHIFT, fl_pad);
 	for_each_rxq(pi, i, rxq) {
 		fl = &rxq->fl;
 
@@ -1793,6 +1863,18 @@ free_mgmtq(struct adapter *sc)
 	return free_wrq(sc, &sc->sge.mgmtq);
 }
 
+static inline int
+tnl_cong(struct port_info *pi)
+{
+
+	if (cong_drop == -1)
+		return (-1);
+	else if (cong_drop == 1)
+		return (0);
+	else
+		return (1 << pi->tx_chan);
+}
+
 static int
 alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx,
     struct sysctl_oid *oid)
@@ -1801,7 +1883,7 @@ alloc_rxq(struct port_info *pi, struct s
 	struct sysctl_oid_list *children;
 	char name[16];
 
-	rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, 1 << pi->tx_chan);
+	rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, tnl_cong(pi));
 	if (rc != 0)
 		return (rc);
 
@@ -3433,17 +3515,15 @@ handle_sge_egr_update(struct sge_iq *iq,
 }
 
 static int
-handle_fw_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
+handle_fw_msg(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
 {
+	struct adapter *sc = iq->adapter;
 	const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
 
 	KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__,
 	    rss->opcode));
 
-	if (cpl->type == FW6_TYPE_CMD_RPL)
-		t4_handle_fw_rpl(iq->adapter, cpl->data);
-
-	return (0);
+	return (sc->fw_msg_handler[cpl->type](sc, &cpl->data[0]));
 }
 
 static int

Modified: stable/9/sys/dev/cxgbe/tom/t4_connect.c
==============================================================================
--- stable/9/sys/dev/cxgbe/tom/t4_connect.c	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgbe/tom/t4_connect.c	Thu Sep  6 17:28:47 2012	(r240169)
@@ -247,10 +247,14 @@ calc_opt2a(struct socket *so)
 	opt2 |= F_RX_COALESCE_VALID | V_RX_COALESCE(M_RX_COALESCE);
 	opt2 |= F_RSS_QUEUE_VALID | V_RSS_QUEUE(toep->ofld_rxq->iq.abs_id);
 
+#ifdef USE_DDP_RX_FLOW_CONTROL
+	if (toep->ulp_mode == ULP_MODE_TCPDDP)
+		opt2 |= F_RX_FC_VALID | F_RX_FC_DDP;
+#endif
+
 	return (htobe32(opt2));
 }
 
-
 void
 t4_init_connect_cpl_handlers(struct adapter *sc)
 {
@@ -320,7 +324,10 @@ t4_connect(struct toedev *tod, struct so
 
 	toep->tid = atid;
 	toep->l2te = e;
-	toep->ulp_mode = ULP_MODE_NONE;
+	if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)
+		set_tcpddp_ulp_mode(toep);
+	else
+		toep->ulp_mode = ULP_MODE_NONE;
 	SOCKBUF_LOCK(&so->so_rcv);
 	/* opt0 rcv_bufsiz initially, assumes its normal meaning later */
 	toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
@@ -354,7 +361,7 @@ t4_connect(struct toedev *tod, struct so
 
 	rc = t4_l2t_send(sc, wr, e);
 	if (rc == 0) {
-		toepcb_set_flag(toep, TPF_CPL_PENDING);
+		toep->flags |= TPF_CPL_PENDING;
 		return (0);
 	}
 

Modified: stable/9/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- stable/9/sys/dev/cxgbe/tom/t4_cpl_io.c	Thu Sep  6 16:38:55 2012	(r240168)
+++ stable/9/sys/dev/cxgbe/tom/t4_cpl_io.c	Thu Sep  6 17:28:47 2012	(r240169)
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
 #include "common/common.h"
 #include "common/t4_msg.h"
 #include "common/t4_regs.h"
+#include "common/t4_tcb.h"
 #include "tom/t4_tom_l2t.h"
 #include "tom/t4_tom.h"
 
@@ -80,7 +81,7 @@ send_flowc_wr(struct toepcb *toep, struc
 	unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN;
 	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 
-	KASSERT(!toepcb_flag(toep, TPF_FLOWC_WR_SENT),
+	KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT),
 	    ("%s: flowc for tid %u sent already", __func__, toep->tid));
 
 	CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid);
@@ -130,7 +131,7 @@ send_flowc_wr(struct toepcb *toep, struc
 		toep->txsd_pidx = 0;
 	toep->txsd_avail--;
 
-	toepcb_set_flag(toep, TPF_FLOWC_WR_SENT);
+	toep->flags |= TPF_FLOWC_WR_SENT;
         t4_wrq_tx(sc, wr);
 }
 
@@ -150,15 +151,15 @@ send_reset(struct adapter *sc, struct to
 	    inp->inp_flags & INP_DROPPED ? "inp dropped" :
 	    tcpstates[tp->t_state],
 	    toep->flags, inp->inp_flags,
-	    toepcb_flag(toep, TPF_ABORT_SHUTDOWN) ?
+	    toep->flags & TPF_ABORT_SHUTDOWN ?
 	    " (abort already in progress)" : "");
 
-	if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN))
+	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		return;	/* abort already in progress */
 
-	toepcb_set_flag(toep, TPF_ABORT_SHUTDOWN);
+	toep->flags |= TPF_ABORT_SHUTDOWN;
 
-	KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT),
+	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %d.", __func__, tid));
 
 	wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
@@ -173,7 +174,7 @@ send_reset(struct adapter *sc, struct to
 		req->rsvd0 = htobe32(snd_nxt);
 	else
 		req->rsvd0 = htobe32(tp->snd_nxt);
-	req->rsvd1 = !toepcb_flag(toep, TPF_TX_DATA_SENT);
+	req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT);
 	req->cmd = CPL_ABORT_SEND_RST;
 
 	/*
@@ -299,12 +300,14 @@ make_established(struct toepcb *toep, ui
 }
 
 static int
-send_rx_credits(struct adapter *sc, struct toepcb *toep, uint32_t credits)
+send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits)
 {
 	struct wrqe *wr;
 	struct cpl_rx_data_ack *req;
 	uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1);
 
+	KASSERT(credits >= 0, ("%s: %d credits", __func__, credits));
+
 	wr = alloc_wrqe(sizeof(*req), toep->ctrlq);
 	if (wr == NULL)
 		return (0);
@@ -323,25 +326,28 @@ t4_rcvd(struct toedev *tod, struct tcpcb
 	struct adapter *sc = tod->tod_softc;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
-	struct sockbuf *so_rcv = &so->so_rcv;
+	struct sockbuf *sb = &so->so_rcv;
 	struct toepcb *toep = tp->t_toe;
-	int must_send;
+	int credits;
 
 	INP_WLOCK_ASSERT(inp);
 
-	SOCKBUF_LOCK(so_rcv);
-	KASSERT(toep->enqueued >= so_rcv->sb_cc,
-	    ("%s: so_rcv->sb_cc > enqueued", __func__));
-	toep->rx_credits += toep->enqueued - so_rcv->sb_cc;
-	toep->enqueued = so_rcv->sb_cc;
-	SOCKBUF_UNLOCK(so_rcv);
-
-	must_send = toep->rx_credits + 16384 >= tp->rcv_wnd;
-	if (must_send || toep->rx_credits >= 15 * 1024) {
-		int credits;
+	SOCKBUF_LOCK(sb);
+	KASSERT(toep->sb_cc >= sb->sb_cc,
+	    ("%s: sb %p has more data (%d) than last time (%d).",
+	    __func__, sb, sb->sb_cc, toep->sb_cc));
+	toep->rx_credits += toep->sb_cc - sb->sb_cc;
+	toep->sb_cc = sb->sb_cc;
+	credits = toep->rx_credits;
+	SOCKBUF_UNLOCK(sb);
+
+	if (credits > 0 &&
+	    (credits + 16384 >= tp->rcv_wnd || credits >= 15 * 1024)) {
 
-		credits = send_rx_credits(sc, toep, toep->rx_credits);
+		credits = send_rx_credits(sc, toep, credits);
+		SOCKBUF_LOCK(sb);
 		toep->rx_credits -= credits;
+		SOCKBUF_UNLOCK(sb);
 		tp->rcv_wnd += credits;
 		tp->rcv_adv += credits;
 	}
@@ -358,12 +364,12 @@ close_conn(struct adapter *sc, struct to
 	unsigned int tid = toep->tid;
 
 	CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid,
-	    toepcb_flag(toep, TPF_FIN_SENT) ? ", IGNORED" : "");
+	    toep->flags & TPF_FIN_SENT ? ", IGNORED" : "");
 
-	if (toepcb_flag(toep, TPF_FIN_SENT))
+	if (toep->flags & TPF_FIN_SENT)
 		return (0);
 
-	KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT),
+	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, tid));
 
 	wr = alloc_wrqe(sizeof(*req), toep->ofld_txq);
@@ -381,8 +387,8 @@ close_conn(struct adapter *sc, struct to
         OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid));
 	req->rsvd = 0;
 
-	toepcb_set_flag(toep, TPF_FIN_SENT);
-	toepcb_clr_flag(toep, TPF_SEND_FIN);
+	toep->flags |= TPF_FIN_SENT;
+	toep->flags &= ~TPF_SEND_FIN;
 	t4_l2t_send(sc, wr, toep->l2te);
 
 	return (0);
@@ -534,17 +540,18 @@ t4_push_frames(struct adapter *sc, struc
 	struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx];
 
 	INP_WLOCK_ASSERT(inp);
-	KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT),
+	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
 
-	if (toep->ulp_mode != ULP_MODE_NONE)
+	if (__predict_false(toep->ulp_mode != ULP_MODE_NONE &&
+	    toep->ulp_mode != ULP_MODE_TCPDDP))
 		CXGBE_UNIMPLEMENTED("ulp_mode");
 
 	/*
 	 * This function doesn't resume by itself.  Someone else must clear the
 	 * flag and call this function.
 	 */
-	if (__predict_false(toepcb_flag(toep, TPF_TX_SUSPENDED)))
+	if (__predict_false(toep->flags & TPF_TX_SUSPENDED))
 		return;
 
 	do {
@@ -570,7 +577,7 @@ t4_push_frames(struct adapter *sc, struc
 				plen -= m->m_len;
 				if (plen == 0) {
 					/* Too few credits */
-					toepcb_set_flag(toep, TPF_TX_SUSPENDED);
+					toep->flags |= TPF_TX_SUSPENDED;
 					SOCKBUF_UNLOCK(sb);
 					return;
 				}
@@ -613,7 +620,7 @@ unlocked:
 			break;
 		}
 
-		if (__predict_false(toepcb_flag(toep, TPF_FIN_SENT)))
+		if (__predict_false(toep->flags & TPF_FIN_SENT))
 			panic("%s: excess tx.", __func__);
 
 		if (plen <= max_imm) {
@@ -624,7 +631,7 @@ unlocked:
 					toep->ofld_txq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
-				toepcb_set_flag(toep, TPF_TX_SUSPENDED);
+				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
@@ -642,7 +649,7 @@ unlocked:
 			wr = alloc_wrqe(roundup(wr_len, 16), toep->ofld_txq);
 			if (wr == NULL) {
 				/* XXX: how will we recover from this? */
-				toepcb_set_flag(toep, TPF_TX_SUSPENDED);
+				toep->flags |= TPF_TX_SUSPENDED;
 				return;
 			}
 			txwr = wrtod(wr);
@@ -671,7 +678,7 @@ unlocked:
 		sb->sb_sndptr = sb_sndptr;
 		SOCKBUF_UNLOCK(sb);
 
-		toepcb_set_flag(toep, TPF_TX_DATA_SENT);
+		toep->flags |= TPF_TX_DATA_SENT;
 
 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
 		txsd->plen = plen;
@@ -687,7 +694,7 @@ unlocked:
 	} while (m != NULL);
 
 	/* Send a FIN if requested, but only if there's no more data to send */
-	if (m == NULL && toepcb_flag(toep, TPF_SEND_FIN))
+	if (m == NULL && toep->flags & TPF_SEND_FIN)
 		close_conn(sc, toep);
 }
 
@@ -724,7 +731,7 @@ t4_send_fin(struct toedev *tod, struct t
 	    ("%s: inp %p dropped.", __func__, inp));
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
-	toepcb_set_flag(toep, TPF_SEND_FIN);
+	toep->flags |= TPF_SEND_FIN;
 	t4_push_frames(sc, toep);
 
 	return (0);
@@ -745,7 +752,7 @@ t4_send_rst(struct toedev *tod, struct t
 	KASSERT(toep != NULL, ("%s: toep is NULL", __func__));
 
 	/* hmmmm */
-	KASSERT(toepcb_flag(toep, TPF_FLOWC_WR_SENT),
+	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
 	    ("%s: flowc for tid %u [%s] not sent already",
 	    __func__, toep->tid, tcpstates[tp->t_state]));
 
@@ -765,7 +772,8 @@ do_peer_close(struct sge_iq *iq, const s
 	struct toepcb *toep = lookup_tid(sc, tid);
 	struct inpcb *inp = toep->inp;
 	struct tcpcb *tp = NULL;
-	struct socket *so = NULL;
+	struct socket *so;
+	struct sockbuf *sb;
 #ifdef INVARIANTS
 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
 #endif
@@ -782,13 +790,38 @@ do_peer_close(struct sge_iq *iq, const s
 	CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__,
 	    tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp);
 
-	if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN))
+	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		goto done;
 
+	tp->rcv_nxt++;	/* FIN */
+
 	so = inp->inp_socket;
+	sb = &so->so_rcv;
+	SOCKBUF_LOCK(sb);
+	if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) {
+		m = m_get(M_NOWAIT, MT_DATA);
+		if (m == NULL)
+			CXGBE_UNIMPLEMENTED("mbuf alloc failure");
+
+		m->m_len = be32toh(cpl->rcv_nxt) - tp->rcv_nxt;
+		m->m_flags |= M_DDP;	/* Data is already where it should be */
+		m->m_data = "nothing to see here";
+		tp->rcv_nxt = be32toh(cpl->rcv_nxt);
+
+		toep->ddp_flags &= ~(DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE);
+
+		KASSERT(toep->sb_cc >= sb->sb_cc,
+		    ("%s: sb %p has more data (%d) than last time (%d).",
+		    __func__, sb, sb->sb_cc, toep->sb_cc));
+		toep->rx_credits += toep->sb_cc - sb->sb_cc;
+#ifdef USE_DDP_RX_FLOW_CONTROL
+		toep->rx_credits -= m->m_len;	/* adjust for F_RX_FC_DDP */
+#endif
+		sbappendstream_locked(sb, m);
+		toep->sb_cc = sb->sb_cc;
+	}
+	socantrcvmore_locked(so);	/* unlocks the sockbuf */
 
-	socantrcvmore(so);
-	tp->rcv_nxt++;	/* FIN */
 	KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt),
 	    ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt,
 	    be32toh(cpl->rcv_nxt)));
@@ -855,7 +888,7 @@ do_close_con_rpl(struct sge_iq *iq, cons
 	CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x",
 	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags);
 
-	if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN))
+	if (toep->flags & TPF_ABORT_SHUTDOWN)
 		goto done;
 
 	so = inp->inp_socket;
@@ -953,7 +986,7 @@ do_abort_req(struct sge_iq *iq, const st
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
 
-	if (toepcb_flag(toep, TPF_SYNQE))
+	if (toep->flags & TPF_SYNQE)
 		return (do_abort_req_synqe(iq, rss, m));
 
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
@@ -974,19 +1007,19 @@ do_abort_req(struct sge_iq *iq, const st
 
 	CTR6(KTR_CXGBE,
 	    "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d",
-	    __func__, tid, tcpstates[tp->t_state], toep->flags, inp->inp_flags,
-	    cpl->status);
+	    __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags,
+	    inp->inp_flags, cpl->status);
 
 	/*
 	 * If we'd initiated an abort earlier the reply to it is responsible for
 	 * cleaning up resources.  Otherwise we tear everything down right here
 	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
 	 */
-	if (toepcb_flag(toep, TPF_ABORT_SHUTDOWN)) {
+	if (toep->flags & TPF_ABORT_SHUTDOWN) {
 		INP_WUNLOCK(inp);
 		goto done;
 	}
-	toepcb_set_flag(toep, TPF_ABORT_SHUTDOWN);
+	toep->flags |= TPF_ABORT_SHUTDOWN;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-stable-9 mailing list