svn commit: r348704 - in stable/11/sys/dev/cxgbe: cxgbei tom
Navdeep Parhar
np at FreeBSD.org
Wed Jun 5 21:46:59 UTC 2019
Author: np
Date: Wed Jun 5 21:46:56 2019
New Revision: 348704
URL: https://svnweb.freebsd.org/changeset/base/348704
Log:
MFC r348491:
cxgbe/t4_tom: adjust the hardware receive window to match changes to the
receive sockbuf's high water mark.
Calculate rx credits on the spot instead of tracking sbused/sb_cc and
rx_credits in the toepcb. The previous method worked when the high
water mark changed due to SB_AUTOSIZE but not when it was adjusted
directly (for example, by the soreserve in nfsrvd_addsock).
This fixes a connection hang while running iozone over an NFS mounted
share where nfsd's TCP sockets are being handled by t4_tom.
Sponsored by: Chelsio Communications
Approved by: re@ (gjb@)
Modified:
stable/11/sys/dev/cxgbe/cxgbei/cxgbei.c
stable/11/sys/dev/cxgbe/tom/t4_connect.c
stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c
stable/11/sys/dev/cxgbe/tom/t4_ddp.c
stable/11/sys/dev/cxgbe/tom/t4_listen.c
stable/11/sys/dev/cxgbe/tom/t4_tls.c
stable/11/sys/dev/cxgbe/tom/t4_tom.h
Directory Properties:
stable/11/ (props changed)
Modified: stable/11/sys/dev/cxgbe/cxgbei/cxgbei.c
==============================================================================
--- stable/11/sys/dev/cxgbe/cxgbei/cxgbei.c Wed Jun 5 20:31:09 2019 (r348703)
+++ stable/11/sys/dev/cxgbe/cxgbei/cxgbei.c Wed Jun 5 21:46:56 2019 (r348704)
@@ -651,7 +651,6 @@ do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_he
tp->t_rcvtime = ticks;
/* update rx credits */
- toep->rx_credits += pdu_len;
t4_rcvd(&toep->td->tod, tp); /* XXX: sc->tom_softc.tod */
so = inp->inp_socket;
Modified: stable/11/sys/dev/cxgbe/tom/t4_connect.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_connect.c Wed Jun 5 20:31:09 2019 (r348703)
+++ stable/11/sys/dev/cxgbe/tom/t4_connect.c Wed Jun 5 21:46:56 2019 (r348704)
@@ -380,8 +380,7 @@ t4_connect(struct toedev *tod, struct socket *so, stru
toep->vnet = so->so_vnet;
set_ulp_mode(toep, select_ulp_mode(so, sc, &settings));
SOCKBUF_LOCK(&so->so_rcv);
- /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
- toep->rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
+ toep->opt0_rcv_bufsize = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
SOCKBUF_UNLOCK(&so->so_rcv);
/*
@@ -435,7 +434,7 @@ t4_connect(struct toedev *tod, struct socket *so, stru
cpl->peer_ip_hi = *(uint64_t *)&inp->in6p_faddr.s6_addr[0];
cpl->peer_ip_lo = *(uint64_t *)&inp->in6p_faddr.s6_addr[8];
cpl->opt0 = calc_opt0(so, vi, toep->l2te, mtu_idx, rscale,
- toep->rx_credits, toep->ulp_mode, &settings);
+ toep->opt0_rcv_bufsize, toep->ulp_mode, &settings);
cpl->opt2 = calc_opt2a(so, toep, &settings);
} else {
struct cpl_act_open_req *cpl = wrtod(wr);
@@ -464,7 +463,7 @@ t4_connect(struct toedev *tod, struct socket *so, stru
inp_4tuple_get(inp, &cpl->local_ip, &cpl->local_port,
&cpl->peer_ip, &cpl->peer_port);
cpl->opt0 = calc_opt0(so, vi, toep->l2te, mtu_idx, rscale,
- toep->rx_credits, toep->ulp_mode, &settings);
+ toep->opt0_rcv_bufsize, toep->ulp_mode, &settings);
cpl->opt2 = calc_opt2a(so, toep, &settings);
}
Modified: stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c Wed Jun 5 20:31:09 2019 (r348703)
+++ stable/11/sys/dev/cxgbe/tom/t4_cpl_io.c Wed Jun 5 21:46:56 2019 (r348704)
@@ -409,20 +409,10 @@ make_established(struct toepcb *toep, uint32_t iss, ui
tp->irs = irs;
tcp_rcvseqinit(tp);
- tp->rcv_wnd = toep->rx_credits << 10;
+ tp->rcv_wnd = toep->opt0_rcv_bufsize << 10;
tp->rcv_adv += tp->rcv_wnd;
tp->last_ack_sent = tp->rcv_nxt;
- /*
- * If we were unable to send all rx credits via opt0, save the remainder
- * in rx_credits so that they can be handed over with the next credit
- * update.
- */
- SOCKBUF_LOCK(&so->so_rcv);
- bufsize = select_rcv_wnd(so);
- SOCKBUF_UNLOCK(&so->so_rcv);
- toep->rx_credits = bufsize - tp->rcv_wnd;
-
tp->iss = iss;
tcp_sendseqinit(tp);
tp->snd_una = iss + 1;
@@ -493,37 +483,29 @@ t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp)
struct socket *so = inp->inp_socket;
struct sockbuf *sb = &so->so_rcv;
struct toepcb *toep = tp->t_toe;
- int credits;
+ int rx_credits;
INP_WLOCK_ASSERT(inp);
-
SOCKBUF_LOCK_ASSERT(sb);
- KASSERT(toep->sb_cc >= sbused(sb),
- ("%s: sb %p has more data (%d) than last time (%d).",
- __func__, sb, sbused(sb), toep->sb_cc));
- credits = toep->sb_cc - sbused(sb);
- toep->sb_cc = sbused(sb);
+ rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
if (toep->ulp_mode == ULP_MODE_TLS) {
- if (toep->tls.rcv_over >= credits) {
- toep->tls.rcv_over -= credits;
- credits = 0;
+ if (toep->tls.rcv_over >= rx_credits) {
+ toep->tls.rcv_over -= rx_credits;
+ rx_credits = 0;
} else {
- credits -= toep->tls.rcv_over;
+ rx_credits -= toep->tls.rcv_over;
toep->tls.rcv_over = 0;
}
}
- toep->rx_credits += credits;
- if (toep->rx_credits > 0 &&
- (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 ||
- (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) ||
- toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) {
-
- credits = send_rx_credits(sc, toep, toep->rx_credits);
- toep->rx_credits -= credits;
- tp->rcv_wnd += credits;
- tp->rcv_adv += credits;
+ if (rx_credits > 0 &&
+ (tp->rcv_wnd <= 32 * 1024 || rx_credits >= 64 * 1024 ||
+ (rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) ||
+ sbused(sb) + tp->rcv_wnd < sb->sb_lowat)) {
+ rx_credits = send_rx_credits(sc, toep, rx_credits);
+ tp->rcv_wnd += rx_credits;
+ tp->rcv_adv += rx_credits;
} else if (toep->flags & TPF_FORCE_CREDITS)
send_rx_modulate(sc, toep);
}
@@ -1557,7 +1539,7 @@ do_rx_data(struct sge_iq *iq, const struct rss_header
struct tcpcb *tp;
struct socket *so;
struct sockbuf *sb;
- int len;
+ int len, rx_credits;
uint32_t ddp_placed = 0;
if (__predict_false(toep->flags & TPF_SYNQE)) {
@@ -1642,8 +1624,6 @@ do_rx_data(struct sge_iq *iq, const struct rss_header
if (!sbreserve_locked(sb, newsize, so, NULL))
sb->sb_flags &= ~SB_AUTOSIZE;
- else
- toep->rx_credits += newsize - hiwat;
}
if (toep->ulp_mode == ULP_MODE_TCPDDP) {
@@ -1681,19 +1661,12 @@ do_rx_data(struct sge_iq *iq, const struct rss_header
}
}
- KASSERT(toep->sb_cc >= sbused(sb),
- ("%s: sb %p has more data (%d) than last time (%d).",
- __func__, sb, sbused(sb), toep->sb_cc));
- toep->rx_credits += toep->sb_cc - sbused(sb);
sbappendstream_locked(sb, m, 0);
- toep->sb_cc = sbused(sb);
- if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) {
- int credits;
-
- credits = send_rx_credits(sc, toep, toep->rx_credits);
- toep->rx_credits -= credits;
- tp->rcv_wnd += credits;
- tp->rcv_adv += credits;
+ rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
+ if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) {
+ rx_credits = send_rx_credits(sc, toep, rx_credits);
+ tp->rcv_wnd += rx_credits;
+ tp->rcv_adv += rx_credits;
}
if (toep->ulp_mode == ULP_MODE_TCPDDP && toep->ddp.waiting_count > 0 &&
Modified: stable/11/sys/dev/cxgbe/tom/t4_ddp.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_ddp.c Wed Jun 5 20:31:09 2019 (r348703)
+++ stable/11/sys/dev/cxgbe/tom/t4_ddp.c Wed Jun 5 21:46:56 2019 (r348704)
@@ -309,9 +309,6 @@ insert_ddp_data(struct toepcb *toep, uint32_t n)
KASSERT(tp->rcv_wnd >= n, ("%s: negative window size", __func__));
tp->rcv_wnd -= n;
#endif
-#ifndef USE_DDP_RX_FLOW_CONTROL
- toep->rx_credits += n;
-#endif
CTR2(KTR_CXGBE, "%s: placed %u bytes before falling out of DDP",
__func__, n);
while (toep->ddp.active_count > 0) {
@@ -560,16 +557,10 @@ handle_ddp_data(struct toepcb *toep, __be32 ddp_report
if (!sbreserve_locked(sb, newsize, so, NULL))
sb->sb_flags &= ~SB_AUTOSIZE;
- else
- toep->rx_credits += newsize - hiwat;
}
SOCKBUF_UNLOCK(sb);
CURVNET_RESTORE();
-#ifndef USE_DDP_RX_FLOW_CONTROL
- toep->rx_credits += len;
-#endif
-
job->msgrcv = 1;
if (db->cancel_pending) {
/*
@@ -718,12 +709,9 @@ handle_ddp_close(struct toepcb *toep, struct tcpcb *tp
INP_WLOCK_ASSERT(toep->inp);
DDP_ASSERT_LOCKED(toep);
- len = be32toh(rcv_nxt) - tp->rcv_nxt;
+ len = be32toh(rcv_nxt) - tp->rcv_nxt;
tp->rcv_nxt += len;
-#ifndef USE_DDP_RX_FLOW_CONTROL
- toep->rx_credits += len;
-#endif
while (toep->ddp.active_count > 0) {
MPASS(toep->ddp.active_id != -1);
Modified: stable/11/sys/dev/cxgbe/tom/t4_listen.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_listen.c Wed Jun 5 20:31:09 2019 (r348703)
+++ stable/11/sys/dev/cxgbe/tom/t4_listen.c Wed Jun 5 21:46:56 2019 (r348704)
@@ -1395,7 +1395,6 @@ found:
mtu_idx = find_best_mtu_idx(sc, &inc, &settings);
rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0;
- /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
rx_credits = min(select_rcv_wnd(so) >> 10, M_RCV_BUFSIZ);
save_qids_in_synqe(synqe, vi, &settings);
@@ -1544,8 +1543,7 @@ reset:
toep->l2te = &sc->l2t->l2tab[synqe->l2e_idx];
toep->vnet = lctx->vnet;
set_ulp_mode(toep, synqe->ulp_mode);
- /* opt0 rcv_bufsiz initially, assumes its normal meaning later */
- toep->rx_credits = synqe->rcv_bufsize;
+ toep->opt0_rcv_bufsize = synqe->rcv_bufsize;
MPASS(be32toh(cpl->snd_isn) - 1 == synqe->iss);
MPASS(be32toh(cpl->rcv_isn) - 1 == synqe->irs);
Modified: stable/11/sys/dev/cxgbe/tom/t4_tls.c
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_tls.c Wed Jun 5 20:31:09 2019 (r348703)
+++ stable/11/sys/dev/cxgbe/tom/t4_tls.c Wed Jun 5 21:46:56 2019 (r348704)
@@ -1469,7 +1469,7 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_head
struct socket *so;
struct sockbuf *sb;
struct mbuf *tls_data;
- int len, pdu_length, pdu_overhead, sb_length;
+ int len, pdu_length, rx_credits;
KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
KASSERT(!(toep->flags & TPF_SYNQE),
@@ -1571,24 +1571,10 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_head
}
/*
- * Not all of the bytes on the wire are included in the socket
- * buffer (e.g. the MAC of the TLS record). However, those
- * bytes are included in the TCP sequence space. To handle
- * this, compute the delta for this TLS record in
- * 'pdu_overhead' and treat those bytes as having already been
- * "read" by the application for the purposes of expanding the
- * window. The meat of the TLS record passed to the
- * application ('sb_length') will still not be counted as
- * "read" until userland actually reads the bytes.
- *
- * XXX: Some of the calculations below are probably still not
- * really correct.
+ * Not all of the bytes on the wire are included in the socket buffer
+ * (e.g. the MAC of the TLS record). However, those bytes are included
+ * in the TCP sequence space.
*/
- sb_length = m->m_pkthdr.len;
- pdu_overhead = pdu_length - sb_length;
- toep->rx_credits += pdu_overhead;
- tp->rcv_wnd += pdu_overhead;
- tp->rcv_adv += pdu_overhead;
/* receive buffer autosize */
MPASS(toep->vnet == so->so_vnet);
@@ -1596,34 +1582,25 @@ do_rx_tls_cmp(struct sge_iq *iq, const struct rss_head
if (sb->sb_flags & SB_AUTOSIZE &&
V_tcp_do_autorcvbuf &&
sb->sb_hiwat < V_tcp_autorcvbuf_max &&
- sb_length > (sbspace(sb) / 8 * 7)) {
+ m->m_pkthdr.len > (sbspace(sb) / 8 * 7)) {
unsigned int hiwat = sb->sb_hiwat;
unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc,
V_tcp_autorcvbuf_max);
if (!sbreserve_locked(sb, newsize, so, NULL))
sb->sb_flags &= ~SB_AUTOSIZE;
- else
- toep->rx_credits += newsize - hiwat;
}
- KASSERT(toep->sb_cc >= sbused(sb),
- ("%s: sb %p has more data (%d) than last time (%d).",
- __func__, sb, sbused(sb), toep->sb_cc));
- toep->rx_credits += toep->sb_cc - sbused(sb);
sbappendstream_locked(sb, m, 0);
- toep->sb_cc = sbused(sb);
+ rx_credits = sbspace(sb) > tp->rcv_wnd ? sbspace(sb) - tp->rcv_wnd : 0;
#ifdef VERBOSE_TRACES
CTR5(KTR_CXGBE, "%s: tid %u PDU overhead %d rx_credits %u rcv_wnd %u",
- __func__, tid, pdu_overhead, toep->rx_credits, tp->rcv_wnd);
+ __func__, tid, pdu_overhead, rx_credits, tp->rcv_wnd);
#endif
- if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) {
- int credits;
-
- credits = send_rx_credits(sc, toep, toep->rx_credits);
- toep->rx_credits -= credits;
- tp->rcv_wnd += credits;
- tp->rcv_adv += credits;
+ if (rx_credits > 0 && sbused(sb) + tp->rcv_wnd < sb->sb_lowat) {
+ rx_credits = send_rx_credits(sc, toep, rx_credits);
+ tp->rcv_wnd += rx_credits;
+ tp->rcv_adv += rx_credits;
}
sorwakeup_locked(so);
Modified: stable/11/sys/dev/cxgbe/tom/t4_tom.h
==============================================================================
--- stable/11/sys/dev/cxgbe/tom/t4_tom.h Wed Jun 5 20:31:09 2019 (r348703)
+++ stable/11/sys/dev/cxgbe/tom/t4_tom.h Wed Jun 5 21:46:56 2019 (r348704)
@@ -178,9 +178,7 @@ struct toepcb {
u_int tx_nocompl; /* tx WR credits since last compl request */
u_int plen_nocompl; /* payload since last compl request */
- /* rx credit handling */
- u_int sb_cc; /* last noted value of so_rcv->sb_cc */
- int rx_credits; /* rx credits (in bytes) to be returned to hw */
+ int opt0_rcv_bufsize; /* XXX: save full opt0/opt2 for later? */
u_int ulp_mode; /* ULP mode */
void *ulpcb;
More information about the svn-src-stable-11
mailing list