Flow ID, LACP, and igb
T.C. Gubatayao
tgubatayao at barracuda.com
Thu Aug 29 21:51:44 UTC 2013
On Aug 29, 2013, at 5:40 PM, T.C. Gubatayao <tgubatayao at barracuda.com> wrote:
> On Aug 29, 2013, at 4:21 PM, Alan Somers <asomers at freebsd.org> wrote:
>
>> They're faster, but even with this change, jenkins_hash is still 6 times
>> slower than FNV hash.
>
> Actually, I think your test isn't accurately simulating memory access, which
> might be skewing the results.
>
> For example, from net/if_lagg.c:
>
> p = hash32_buf(&eh->ether_shost, ETHER_ADDR_LEN, p);
> p = hash32_buf(&eh->ether_dhost, ETHER_ADDR_LEN, p);
>
> These two calls can't both be aligned, since ETHER_ADDR_LEN is 6 octets. The
> same is true for the other hashed fields in the IP and TCP/UDP headers.
> Assuming the mbuf data pointer is aligned, the IP addresses and ports are both
> on 2-byte alignments (without VLAN or IP options). In your test, they're all
> aligned and in the same cache line.
>
> When I modify the test to simulate an mbuf, lookup3 beats FNV and hash32, and
> SipHash is only 2-3 times slower.
>
>> Also, your technique of copying the hashable fields into a separate buffer
>> would need modification to work with different types of packet and different
>> LAGG_F_HASH[234] flags. Because different packets have different hashable
>> fields, struct key would need to be expanded to include the vlan tag, IPV6
>> addresses, and IPv6 flowid. lagg_hashmbuf would then have to zero the unused
>> fields.
>
> Agreed, but this is relatively simple with a buffer on the stack, and does not
> require zeroes or padding. See my modified test, attached.
>
> T.C.
Attachment was stripped.
--- a/lagg_hash.c 2013-08-29 14:21:17.255307349 -0400
+++ b/lagg_hash.c 2013-08-29 17:26:14.055404918 -0400
@@ -7,35 +7,63 @@
#include <sys/hash.h>
#include <sys/fnv_hash.h>
#include <sys/time.h>
-
-uint32_t jenkins_hash32(const uint32_t *, size_t, uint32_t);
+#include <string.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
#define ITERATIONS 100000000
-typedef uint32_t do_hash_t(void);
+typedef uint32_t do_hash_t(uint32_t);
+
+/*
+ * Simulate mbuf data for a packet.
+ * No VLAN tagging and no IP options.
+ */
+struct _mbuf {
+ struct ether_header eh;
+ struct ip ip;
+ struct tcphdr th;
+} __attribute__((packed)) m = {
+ {
+ .ether_dhost = { 181, 16, 73, 9, 219, 22 },
+ .ether_shost = { 69, 170, 210, 11, 24, 120 },
+ .ether_type = 0x008
+ },
+ {
+ .ip_src.s_addr = 1329258245,
+ .ip_dst.s_addr = 1319097119,
+ .ip_p = 0x06
+ },
+ {
+ .th_sport = 12506,
+ .th_dport = 47804
+ }
+};
-// Pad the MACs with 0s because jenkins_hash operates on 32-bit inputs
-const uint8_t ether_shost[] = {181, 16, 73, 9, 219, 22, 0, 0};
-const uint8_t ether_dhost[] = {69, 170, 210, 111, 24, 120, 0, 0};
-const struct in_addr ip_src = {.s_addr = 1329258245};
-const struct in_addr ip_dst = {.s_addr = 1319097119};
-const uint32_t ports = 3132895450;
const uint8_t sipkey[16] = {7, 239, 255, 43, 68, 53, 56, 225,
98, 81, 177, 80, 92, 235, 242, 39};
+#define LAGG_F_HASHL2 0x1
+#define LAGG_F_HASHL3 0x2
+#define LAGG_F_HASHL4 0x4
+#define LAGG_F_HASHALL (LAGG_F_HASHL2|LAGG_F_HASHL3|LAGG_F_HASHL4)
+
/*
* Simulate how lagg_hashmbuf uses FNV hash for a TCP/IP packet
* No VLAN tagging
*/
-uint32_t do_fnv(void)
+uint32_t do_fnv(uint32_t flags)
{
uint32_t p = FNV1_32_INIT;
- p = fnv_32_buf(ether_shost, 6, p);
- p = fnv_32_buf(ether_dhost, 6, p);
- p = fnv_32_buf(&ip_src, sizeof(struct in_addr), p);
- p = fnv_32_buf(&ip_dst, sizeof(struct in_addr), p);
- p = fnv_32_buf(&ports, sizeof(ports), p);
+ if (flags & LAGG_F_HASHL2)
+ p = fnv_32_buf(&m.eh.ether_dhost, 12, p);
+ if (flags & LAGG_F_HASHL3)
+ p = fnv_32_buf(&m.ip.ip_src, 8, p);
+ if (flags & LAGG_F_HASHL4)
+ p = fnv_32_buf(&m.th.th_sport, 4, p);
+
return (p);
}
@@ -43,59 +71,74 @@
* Simulate how lagg_hashmbuf uses hash32 for a TCP/IP packet
* No VLAN tagging
*/
-uint32_t do_hash32(void)
+uint32_t do_hash32(uint32_t flags)
{
// Actually, if_lagg used a pseudorandom number determined at interface
// creation time. But this should have the same timing
// characteristics.
uint32_t p = HASHINIT;
- p = hash32_buf(ether_shost, 6, p);
- p = hash32_buf(ether_dhost, 6, p);
- p = hash32_buf(&ip_src, sizeof(struct in_addr), p);
- p = hash32_buf(&ip_dst, sizeof(struct in_addr), p);
- p = hash32_buf(&ports, sizeof(ports), p);
+ if (flags & LAGG_F_HASHL2)
+ p = hash32_buf(&m.eh.ether_dhost, 12, p);
+ if (flags & LAGG_F_HASHL3)
+ p = hash32_buf(&m.ip.ip_src, 8, p);
+ if (flags & LAGG_F_HASHL4)
+ p = hash32_buf(&m.th.th_sport, 4, p);
+
return (p);
}
+/* Simulate copying the info out of the mbuf. */
+static __inline size_t init_key(char *key, uint32_t flags)
+{
+ uint16_t etype;
+ size_t len = 0;
+
+ if (flags & LAGG_F_HASHL2) {
+ memcpy(key + len, &m.eh.ether_dhost, 12);
+ len += 12;
+ }
+
+ if (flags & LAGG_F_HASHL3) {
+ memcpy(key + len, &m.ip.ip_src, 8);
+ len += 8;
+ }
+
+ if (flags & LAGG_F_HASHL4) {
+ memcpy(key + len, &m.th.th_sport, 4);
+ len += 4;
+ }
+
+ return (len);
+}
+
/*
* Simulate how lagg_hashmbuf would use siphash24 for a TCP/IP packet
* No VLAN tagging
*/
-uint32_t do_siphash24(void)
+uint32_t do_siphash24(uint32_t flags)
{
SIPHASH_CTX ctx;
+ char key[26];
+ size_t len;
- SipHash24_Init(&ctx);
- SipHash_SetKey(&ctx, sipkey);
+ len = init_key(key, flags);
- SipHash_Update(&ctx, ether_shost, 6);
- SipHash_Update(&ctx, ether_dhost, 6);
- SipHash_Update(&ctx, &ip_src, sizeof(struct in_addr));
- SipHash_Update(&ctx, &ip_dst, sizeof(struct in_addr));
- SipHash_Update(&ctx, &ports, sizeof(ports));
- return (SipHash_End(&ctx) & 0xFFFFFFFF);
+ return (SipHash24(&ctx, sipkey, key, len) & 0xFFFFFFFF);
}
/*
* Simulate how lagg_hashmbuf would use lookup3 aka jenkins_hash
* No VLAN tagging
*/
-uint32_t do_jenkins(void)
+uint32_t do_jenkins(uint32_t flags)
{
- /* Jenkins hash does not recommend any specific initializer */
- uint32_t p = FNV1_32_INIT;
+ char key[26];
+ size_t len;
- /*
- * jenkins_hash uses 32-bit inputs, so we need to present the MACs as
- * arrays of 2 32-bit values
- */
- p = jenkins_hash32((uint32_t*)ether_shost, 2, p);
- p = jenkins_hash32((uint32_t*)ether_dhost, 2, p);
- p = jenkins_hash32((uint32_t*)&ip_src, sizeof(struct in_addr) / 4, p);
- p = jenkins_hash32((uint32_t*)&ip_dst, sizeof(struct in_addr) / 4, p);
- p = jenkins_hash32(&ports, sizeof(ports) / 4, p);
- return (p);
+ len = init_key(key, flags);
+
+ return (jenkins_hash(key, len, FNV1_32_INIT));
}
@@ -120,7 +163,7 @@
gettimeofday(&tv_old, NULL);
for (j=0; j<ITERATIONS; j++)
- funcs[i].f();
+ funcs[i].f(LAGG_F_HASHALL);
gettimeofday(&tv_new, NULL);
timersub(&tv_new, &tv_old, &tv_diff);
t = tv_diff.tv_sec + tv_diff.tv_usec / 1000000.;
More information about the freebsd-net
mailing list