[please review] TSO mbuf chain length limiting patch
Colin Percival
cperciva at freebsd.org
Sun Jun 3 20:59:38 UTC 2012
On 06/03/12 12:05, Andrew Gallatin wrote:
> On 06/03/12 12:51, Colin Percival wrote:
>> I've attached a new patch which:
>> 1. adds a IFCAP_TSO_MSS "capability" and a if_tx_tso_mss field to struct ifnet,
>> 2. sets these in netfront when the IFCAP_TSO4 flag is set,
>> 3. extends tcp_maxmtu to read this value,
>> 4. adds a tx_tso_mss field to struct tcpcb,
>> 5. makes tcp_mss_update set tx_tso_mss using tcp_maxmtu, and
>> 6. limits TSO lengths to tx_tso_mss in tcp_output.
>
> Looks good to me. I don't pretend to understand the bowels of
> the TCP stack, so I can't comment on the "sendalot" stuff to
> force segmentation.
The "sendalot" bit is rather confusing, and I'm not at all certain
that it's doing what it's supposed to be doing, but my reading of
the code is that it really means "there's more data buffered than
what we're sending right now so after we issue this write we need
to go back to the top and do another".
> I assume it works as well as your
> previous patch to solve the problems you were seeing with Xen?
Yes.
> One minor nit is that I envision this limit to always be
> 64K or less, so you could probably get away with stealing
> 16 bits from ifcspare.
With IPv6 I could imagine larger limits happening, so I figured
it was better to take one of the ints.
> The other trivial nit is that I would have made these new
> if_tx_tso_mss and t_tx_tso_mss fields unsigned.
Oops, quite right. I was trying to make sure I didn't break ABI,
but of course int and u_int should have the same size no matter
what platform we're on.
> Thanks so much for doing this!
Thanks for reviewing. Updated patch attached with s/int/u_int/ and
some other minor cleanups.
Can anyone review this for ABI safety?
--
Colin Percival
Security Officer Emeritus, FreeBSD | The power to serve
Founder, Tarsnap | www.tarsnap.com | Online backups for the truly paranoid
-------------- next part --------------
Index: netinet/tcp_input.c
===================================================================
--- netinet/tcp_input.c (revision 235780)
+++ netinet/tcp_input.c (working copy)
@@ -3298,6 +3298,7 @@
{
int mss = 0;
u_long maxmtu = 0;
+ u_int tx_tso_mss = 0;
struct inpcb *inp = tp->t_inpcb;
struct hc_metrics_lite metrics;
int origoffer;
@@ -3321,8 +3322,10 @@
/* Initialize. */
#ifdef INET6
if (isipv6) {
- maxmtu = tcp_maxmtu6(&inp->inp_inc, mtuflags);
+ maxmtu = tcp_maxmtu6_ext(&inp->inp_inc, mtuflags,
+ &tx_tso_mss);
tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
+ tp->t_tx_tso_mss = tx_tso_mss;
}
#endif
#if defined(INET) && defined(INET6)
@@ -3330,8 +3333,10 @@
#endif
#ifdef INET
{
- maxmtu = tcp_maxmtu(&inp->inp_inc, mtuflags);
+ maxmtu = tcp_maxmtu_ext(&inp->inp_inc, mtuflags,
+ &tx_tso_mss);
tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
+ tp->t_tx_tso_mss = tx_tso_mss;
}
#endif
Index: netinet/tcp_subr.c
===================================================================
--- netinet/tcp_subr.c (revision 235780)
+++ netinet/tcp_subr.c (working copy)
@@ -1708,7 +1708,7 @@
* tcp_mss_update to get the peer/interface MTU.
*/
u_long
-tcp_maxmtu(struct in_conninfo *inc, int *flags)
+tcp_maxmtu_ext(struct in_conninfo *inc, int *flags, u_int * tx_tso_mss)
{
struct route sro;
struct sockaddr_in *dst;
@@ -1738,15 +1738,26 @@
ifp->if_hwassist & CSUM_TSO)
*flags |= CSUM_TSO;
}
+ if (tx_tso_mss != NULL) {
+ if (ifp->if_capabilities & IFCAP_TSO_MSS)
+ *tx_tso_mss = ifp->if_tx_tso_mss;
+ }
RTFREE(sro.ro_rt);
}
return (maxmtu);
}
+
+u_long
+tcp_maxmtu(struct in_conninfo *inc, int *flags)
+{
+
+ return (tcp_maxmtu_ext(inc, flags, NULL));
+}
#endif /* INET */
#ifdef INET6
u_long
-tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+tcp_maxmtu6_ext(struct in_conninfo *inc, int *flags, u_int * tx_tso_mss)
{
struct route_in6 sro6;
struct ifnet *ifp;
@@ -1775,11 +1786,22 @@
ifp->if_hwassist & CSUM_TSO)
*flags |= CSUM_TSO;
}
+ if (tx_tso_mss != NULL) {
+ if (ifp->if_capabilities & IFCAP_TSO_MSS)
+ *tx_tso_mss = ifp->if_tx_tso_mss;
+ }
RTFREE(sro6.ro_rt);
}
return (maxmtu);
}
+
+u_long
+tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+{
+
+ return (tcp_maxmtu6_ext(inc, flags, NULL));
+}
#endif /* INET6 */
#ifdef IPSEC
Index: netinet/tcp_var.h
===================================================================
--- netinet/tcp_var.h (revision 235780)
+++ netinet/tcp_var.h (working copy)
@@ -208,7 +208,9 @@
u_int t_keepintvl; /* interval between keepalives */
u_int t_keepcnt; /* number of keepalives before close */
- uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */
+ uint32_t t_ispare[5]; /* 5 UTO */
+ uint32_t t_tx_tso_mss; /* max segment size for TSO offload */
+ uint32_t t_ispare2[2]; /* 2 TBD */
void *t_pspare2[4]; /* 4 TBD */
uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */
};
@@ -674,7 +676,9 @@
#endif
void tcp_input(struct mbuf *, int);
u_long tcp_maxmtu(struct in_conninfo *, int *);
+u_long tcp_maxmtu_ext(struct in_conninfo *, int *, u_int *);
u_long tcp_maxmtu6(struct in_conninfo *, int *);
+u_long tcp_maxmtu6_ext(struct in_conninfo *, int *, u_int *);
void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
int *);
void tcp_mss(struct tcpcb *, int);
Index: netinet/tcp_output.c
===================================================================
--- netinet/tcp_output.c (revision 235780)
+++ netinet/tcp_output.c (working copy)
@@ -748,6 +748,15 @@
}
/*
+ * Some drivers want an even shorter limit to the
+ * length sent via TSO; respect their wishes.
+ */
+ if (tp->t_tx_tso_mss != 0 && len > tp->t_tx_tso_mss) {
+ len = tp->t_tx_tso_mss;
+ sendalot = 1;
+ }
+
+ /*
* Prevent the last segment from being
* fractional unless the send sockbuf can
* be emptied.
Index: dev/xen/netfront/netfront.c
===================================================================
--- dev/xen/netfront/netfront.c (revision 235780)
+++ dev/xen/netfront/netfront.c (working copy)
@@ -135,6 +135,7 @@
* to mirror the Linux MAX_SKB_FRAGS constant.
*/
#define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
+#define TX_TSO_MSS (MAX_TX_REQ_FRAGS - 2) * MCLBYTES
#define RX_COPY_THRESHOLD 256
@@ -2040,6 +2041,9 @@
if (val) {
np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO;
printf(" feature-gso-tcp4");
+
+ np->xn_ifp->if_capabilities |= IFCAP_TSO_MSS;
+ np->xn_ifp->if_tx_tso_mss = TX_TSO_MSS;
}
printf("\n");
Index: net/if.h
===================================================================
--- net/if.h (revision 235780)
+++ net/if.h (working copy)
@@ -230,6 +230,7 @@
#define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */
#define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */
#define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */
+#define IFCAP_TSO_MSS 0x200000 /* TSO size is limited */
#define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM)
#define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6)
Index: net/if_var.h
===================================================================
--- net/if_var.h (revision 235780)
+++ net/if_var.h (working copy)
@@ -205,7 +205,8 @@
* be used with care where binary compatibility is required.
*/
char if_cspare[3];
- int if_ispare[4];
+ u_int if_tx_tso_mss; /* if IFCAP_TSO_MSS */
+ int if_ispare[3];
void *if_pspare[8]; /* 1 netmap, 7 TDB */
};
More information about the freebsd-net
mailing list