svn commit: r303656 - head/sys/netinet
Sepherosa Ziehau
sephe at FreeBSD.org
Tue Aug 2 06:36:48 UTC 2016
Author: sephe
Date: Tue Aug 2 06:36:47 2016
New Revision: 303656
URL: https://svnweb.freebsd.org/changeset/base/303656
Log:
tcp/lro: Implement hash table for LRO entries.
This significantly improves HTTP workload performance and reduces
HTTP workload latency.
Reviewed by: rrs, gallatin, hps
Obtained from: rrs, gallatin
Sponsored by: Netflix (rrs, gallatin) , Microsoft (sephe)
Differential Revision: https://reviews.freebsd.org/D6689
Modified:
head/sys/netinet/tcp_lro.c
head/sys/netinet/tcp_lro.h
Modified: head/sys/netinet/tcp_lro.c
==============================================================================
--- head/sys/netinet/tcp_lro.c Tue Aug 2 03:05:59 2016 (r303655)
+++ head/sys/netinet/tcp_lro.c Tue Aug 2 06:36:47 2016 (r303656)
@@ -68,19 +68,24 @@ static MALLOC_DEFINE(M_LRO, "LRO", "LRO
#endif
static void tcp_lro_rx_done(struct lro_ctrl *lc);
+static int tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m,
+ uint32_t csum, int use_hash);
static __inline void
-tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_entry *le)
+tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket,
+ struct lro_entry *le)
{
LIST_INSERT_HEAD(&lc->lro_active, le, next);
+ LIST_INSERT_HEAD(bucket, le, hash_next);
}
static __inline void
tcp_lro_active_remove(struct lro_entry *le)
{
- LIST_REMOVE(le, next);
+ LIST_REMOVE(le, next); /* active list */
+ LIST_REMOVE(le, hash_next); /* hash bucket */
}
int
@@ -95,7 +100,7 @@ tcp_lro_init_args(struct lro_ctrl *lc, s
{
struct lro_entry *le;
size_t size;
- unsigned i;
+ unsigned i, elements;
lc->lro_bad_csum = 0;
lc->lro_queued = 0;
@@ -110,6 +115,18 @@ tcp_lro_init_args(struct lro_ctrl *lc, s
LIST_INIT(&lc->lro_free);
LIST_INIT(&lc->lro_active);
+ /* create hash table to accelerate entry lookup */
+ if (lro_entries > lro_mbufs)
+ elements = lro_entries;
+ else
+ elements = lro_mbufs;
+ lc->lro_hash = phashinit_flags(elements, M_LRO, &lc->lro_hashsz,
+ HASH_NOWAIT);
+ if (lc->lro_hash == NULL) {
+ memset(lc, 0, sizeof(*lc));
+ return (ENOMEM);
+ }
+
/* compute size to allocate */
size = (lro_mbufs * sizeof(struct lro_mbuf_sort)) +
(lro_entries * sizeof(*le));
@@ -147,6 +164,13 @@ tcp_lro_free(struct lro_ctrl *lc)
m_freem(le->m_head);
}
+ /* free hash table */
+ if (lc->lro_hash != NULL) {
+ free(lc->lro_hash, M_LRO);
+ lc->lro_hash = NULL;
+ }
+ lc->lro_hashsz = 0;
+
/* free mbuf array, if any */
for (x = 0; x != lc->lro_mbuf_count; x++)
m_freem(lc->lro_mbuf_data[x].mb);
@@ -487,7 +511,7 @@ tcp_lro_flush_all(struct lro_ctrl *lc)
}
/* add packet to LRO engine */
- if (tcp_lro_rx(lc, mb, 0) != 0) {
+ if (tcp_lro_rx2(lc, mb, 0, 0) != 0) {
/* input packet to network layer */
(*lc->ifp->if_input)(lc->ifp, mb);
lc->lro_queued++;
@@ -561,8 +585,8 @@ tcp_lro_rx_ipv4(struct lro_ctrl *lc, str
}
#endif
-int
-tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
+static int
+tcp_lro_rx2(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum, int use_hash)
{
struct lro_entry *le;
struct ether_header *eh;
@@ -578,6 +602,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct m
tcp_seq seq;
int error, ip_len, l;
uint16_t eh_type, tcp_data_len;
+ struct lro_head *bucket;
/* We expect a contiguous header [eh, ip, tcp]. */
@@ -670,8 +695,41 @@ tcp_lro_rx(struct lro_ctrl *lc, struct m
seq = ntohl(th->th_seq);
+ if (!use_hash) {
+ bucket = &lc->lro_hash[0];
+ } else if (M_HASHTYPE_ISHASH(m)) {
+ bucket = &lc->lro_hash[m->m_pkthdr.flowid % lc->lro_hashsz];
+ } else {
+ uint32_t hash;
+
+ switch (eh_type) {
+#ifdef INET
+ case ETHERTYPE_IP:
+ hash = ip4->ip_src.s_addr + ip4->ip_dst.s_addr;
+ break;
+#endif
+#ifdef INET6
+ case ETHERTYPE_IPV6:
+ hash = ip6->ip6_src.s6_addr32[0] +
+ ip6->ip6_dst.s6_addr32[0];
+ hash += ip6->ip6_src.s6_addr32[1] +
+ ip6->ip6_dst.s6_addr32[1];
+ hash += ip6->ip6_src.s6_addr32[2] +
+ ip6->ip6_dst.s6_addr32[2];
+ hash += ip6->ip6_src.s6_addr32[3] +
+ ip6->ip6_dst.s6_addr32[3];
+ break;
+#endif
+ default:
+ hash = 0;
+ break;
+ }
+ hash += th->th_sport + th->th_dport;
+ bucket = &lc->lro_hash[hash % lc->lro_hashsz];
+ }
+
/* Try to find a matching previous segment. */
- LIST_FOREACH(le, &lc->lro_active, next) {
+ LIST_FOREACH(le, bucket, hash_next) {
if (le->eh_type != eh_type)
continue;
if (le->source_port != th->th_sport ||
@@ -779,7 +837,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct m
/* Start a new segment chain. */
le = LIST_FIRST(&lc->lro_free);
LIST_REMOVE(le, next);
- tcp_lro_active_insert(lc, le);
+ tcp_lro_active_insert(lc, bucket, le);
getmicrotime(&le->mtime);
/* Start filling in details. */
@@ -837,6 +895,13 @@ tcp_lro_rx(struct lro_ctrl *lc, struct m
return (0);
}
+int
+tcp_lro_rx(struct lro_ctrl *lc, struct mbuf *m, uint32_t csum)
+{
+
+ return tcp_lro_rx2(lc, m, csum, 1);
+}
+
void
tcp_lro_queue_mbuf(struct lro_ctrl *lc, struct mbuf *mb)
{
Modified: head/sys/netinet/tcp_lro.h
==============================================================================
--- head/sys/netinet/tcp_lro.h Tue Aug 2 03:05:59 2016 (r303655)
+++ head/sys/netinet/tcp_lro.h Tue Aug 2 06:36:47 2016 (r303656)
@@ -40,6 +40,7 @@
struct lro_entry {
LIST_ENTRY(lro_entry) next;
+ LIST_ENTRY(lro_entry) hash_next;
struct mbuf *m_head;
struct mbuf *m_tail;
union {
@@ -95,6 +96,8 @@ struct lro_ctrl {
unsigned short lro_ackcnt_lim; /* max # of aggregated ACKs */
unsigned lro_length_lim; /* max len of aggregated data */
+ u_long lro_hashsz;
+ struct lro_head *lro_hash;
struct lro_head lro_active;
struct lro_head lro_free;
};
More information about the svn-src-head
mailing list