svn commit: r301339 - in head: contrib/libucl/src contrib/libucl/uthash lib/libucl

Sat Jun 4 14:57:27 UTC 2016

Author: bapt
Date: Sat Jun  4 14:57:25 2016
New Revision: 301339
URL: https://svnweb.freebsd.org/changeset/base/301339

Log:
  Import libucl snapshot 20160604
  
  It replaces xxhash with mumhash
  It fixes issues with msgpack on non x86

Added:
  head/contrib/libucl/src/mum.h
     - copied unchanged from r301336, vendor/libucl/dist/src/mum.h
Deleted:
  head/contrib/libucl/src/xxhash.c
  head/contrib/libucl/src/xxhash.h
Modified:
  head/contrib/libucl/src/Makefile.am
  head/contrib/libucl/src/ucl_hash.c
  head/contrib/libucl/src/ucl_internal.h
  head/contrib/libucl/src/ucl_msgpack.c
  head/contrib/libucl/src/ucl_parser.c
  head/contrib/libucl/src/ucl_util.c
  head/contrib/libucl/uthash/uthash.h
  head/lib/libucl/Makefile
Directory Properties:
  head/contrib/libucl/   (props changed)

Modified: head/contrib/libucl/src/Makefile.am
==============================================================================

--- head/contrib/libucl/src/Makefile.am	Sat Jun  4 14:57:08 2016	(r301338)
+++ head/contrib/libucl/src/Makefile.am	Sat Jun  4 14:57:25 2016	(r301339)
@@ -12,8 +12,7 @@ libucl_la_SOURCES=	ucl_emitter.c \
 					ucl_schema.c \
 					ucl_util.c \
 					ucl_msgpack.c \
-					ucl_sexp.c \
-					xxhash.c
+					ucl_sexp.c
 libucl_la_CFLAGS=	$(libucl_common_cflags) \
 					@CURL_CFLAGS@
 libucl_la_LDFLAGS = -version-info @SO_VERSION@
@@ -25,7 +24,7 @@ libucl_la_LIBADD=	@LIBFETCH_LIBS@ \
 include_HEADERS=	$(top_srcdir)/include/ucl.h \
 					$(top_srcdir)/include/ucl++.h
 noinst_HEADERS=	ucl_internal.h \
-				xxhash.h \
+				mum.h \
 				ucl_hash.h \
 				ucl_chartable.h \
 				tree.h

Copied: head/contrib/libucl/src/mum.h (from r301336, vendor/libucl/dist/src/mum.h)
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ head/contrib/libucl/src/mum.h	Sat Jun  4 14:57:25 2016	(r301339, copy of r301336, vendor/libucl/dist/src/mum.h)
@@ -0,0 +1,417 @@
+/* Copyright (c) 2016 Vladimir Makarov <vmakarov at gcc.gnu.org>
+
+   Permission is hereby granted, free of charge, to any person
+   obtaining a copy of this software and associated documentation
+   files (the "Software"), to deal in the Software without
+   restriction, including without limitation the rights to use, copy,
+   modify, merge, publish, distribute, sublicense, and/or sell copies
+   of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* This file implements MUM (MUltiply and Mix) hashing.  We randomize
+   input data by 64x64-bit multiplication and mixing hi- and low-parts
+   of the multiplication result by using an addition and then mix it
+   into the current state.  We use prime numbers randomly generated
+   with the equal probability of their bit values for the
+   multiplication.  When all primes are used once, the state is
+   randomized and the same prime numbers are used again for data
+   randomization.
+
+   The MUM hashing passes all SMHasher tests.  Pseudo Random Number
+   Generator based on MUM also passes NIST Statistical Test Suite for
+   Random and Pseudorandom Number Generators for Cryptographic
+   Applications (version 2.2.1) with 1000 bitstreams each containing
+   1M bits.  MUM hashing is also faster Spooky64 and City64 on small
+   strings (at least upto 512-bit) on Haswell and Power7.  The MUM bulk
+   speed (speed on very long data) is bigger than Spooky and City on
+   Power7.  On Haswell the bulk speed is bigger than Spooky one and
+   close to City speed.  */
+
+#ifndef __MUM_HASH__
+#define __MUM_HASH__
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#ifdef _MSC_VER
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+/* Macro saying to use 128-bit integers implemented by GCC for some
+   targets.  */
+#ifndef _MUM_USE_INT128
+/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64.
+   HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT,
+   otherwise int. */
+#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
+#define _MUM_USE_INT128 1
+#else
+#define _MUM_USE_INT128 0
+#endif
+#endif
+
+#if defined(__GNUC__) && ((__GNUC__ == 4) &&  (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
+#define _MUM_FRESH_GCC
+#endif
+
+#if defined(__GNUC__) && !defined(__llvm__)
+#define _MUM_ATTRIBUTE_UNUSED  __attribute__((unused))
+#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
+#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
+#else
+#define _MUM_ATTRIBUTE_UNUSED
+#define _MUM_OPTIMIZE(opts)
+#define _MUM_TARGET(opts)
+#endif
+
+
+/* Here are different primes randomly generated with the equal
+   probability of their bit values.  They are used to randomize input
+   values.  */
+static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
+static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
+static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
+static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
+static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
+static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
+static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
+
+static uint64_t _mum_primes [] = {
+  0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
+  0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
+  0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
+  0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
+};
+
+/* Multiply 64-bit V and P and return sum of high and low parts of the
+   result.  */
+static inline uint64_t
+_mum (uint64_t v, uint64_t p) {
+  uint64_t hi, lo;
+#if _MUM_USE_INT128
+#if defined(__aarch64__)
+  /* AARCH64 needs 2 insns to calculate 128-bit result of the
+     multiplication.  If we use a generic code we actually call a
+     function doing 128x128->128 bit multiplication.  The function is
+     very slow.  */
+  lo = v * p, hi;
+  asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
+#else
+  __uint128_t r = (__uint128_t) v * (__uint128_t) p;
+  hi = (uint64_t) (r >> 64);
+  lo = (uint64_t) r;
+#endif
+#else
+  /* Implementation of 64x64->128-bit multiplication by four 32x32->64
+     bit multiplication.  */
+  uint64_t hv = v >> 32, hp = p >> 32;
+  uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
+  uint64_t rh =  hv * hp;
+  uint64_t rm_0 = hv * lp;
+  uint64_t rm_1 = hp * lv;
+  uint64_t rl =  lv * lp;
+  uint64_t t, carry = 0;
+
+  /* We could ignore a carry bit here if we did not care about the
+     same hash for 32-bit and 64-bit targets.  */
+  t = rl + (rm_0 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+  carry = t < rl;
+#endif
+  lo = t + (rm_1 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+  carry += lo < t;
+#endif
+  hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
+#endif
+  /* We could use XOR here too but, for some reasons, on Haswell and
+     Power7 using an addition improves hashing performance by 10% for
+     small strings.  */
+  return hi + lo;
+}
+
+#if defined(_MSC_VER)
+#define _mum_bswap_32(x) _byteswap_uint32_t (x)
+#define _mum_bswap_64(x) _byteswap_uint64_t (x)
+#elif defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#define _mum_bswap_32(x) OSSwapInt32 (x)
+#define _mum_bswap_64(x) OSSwapInt64 (x)
+#elif defined(__GNUC__)
+#define _mum_bswap32(x) __builtin_bswap32 (x)
+#define _mum_bswap64(x) __builtin_bswap64 (x)
+#else
+#include <byteswap.h>
+#define _mum_bswap32(x) bswap32 (x)
+#define _mum_bswap64(x) bswap64 (x)
+#endif
+
+static inline uint64_t
+_mum_le (uint64_t v) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+  return v;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  return _mum_bswap64 (v);
+#else
+#error "Unknown endianess"
+#endif
+}
+
+static inline uint32_t
+_mum_le32 (uint32_t v) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+  return v;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+  return _mum_bswap32 (v);
+#else
+#error "Unknown endianess"
+#endif
+}
+
+/* Macro defining how many times the most nested loop in
+   _mum_hash_aligned will be unrolled by the compiler (although it can
+   make an own decision:).  Use only a constant here to help a
+   compiler to unroll a major loop.
+
+   The macro value affects the result hash for strings > 128 bit.  The
+   unroll factor greatly affects the hashing speed.  We prefer the
+   speed.  */
+#ifndef _MUM_UNROLL_FACTOR_POWER
+#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 3
+#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 4
+#else
+#define _MUM_UNROLL_FACTOR_POWER 2
+#endif
+#endif
+
+#if _MUM_UNROLL_FACTOR_POWER < 1
+#error "too small unroll factor"
+#elif _MUM_UNROLL_FACTOR_POWER > 4
+#error "We have not enough primes for such unroll factor"
+#endif
+
+#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
+
+static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
+_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
+  uint64_t result = start;
+  const unsigned char *str = (const unsigned char *) key;
+  uint64_t u64;
+  int i;
+  size_t n;
+
+  result = _mum (result, _mum_block_start_prime);
+  while  (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
+    /* This loop could be vectorized when we have vector insns for
+       64x64->128-bit multiplication.  AVX2 currently only have a
+       vector insn for 4 32x32->64-bit multiplication.  */
+    for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
+      result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+    len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+    str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+    /* We will use the same prime numbers on the next iterations --
+       randomize the state.  */
+    result = _mum (result, _mum_unroll_prime);
+  }
+  n = len / sizeof (uint64_t);
+  for (i = 0; i < (int)n; i++)
+    result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+  len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
+  switch (len) {
+  case 7:
+    u64 = _mum_le32 (*(uint32_t *) str);
+    u64 |= (uint64_t) str[4] << 32;
+    u64 |= (uint64_t) str[5] << 40;
+    u64 |= (uint64_t) str[6] << 48;
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 6:
+    u64 = _mum_le32 (*(uint32_t *) str);
+    u64 |= (uint64_t) str[4] << 32;
+    u64 |= (uint64_t) str[5] << 40;
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 5:
+    u64 = _mum_le32 (*(uint32_t *) str);
+    u64 |= (uint64_t) str[4] << 32;
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 4:
+    u64 = _mum_le32 (*(uint32_t *) str);
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 3:
+    u64 = str[0];
+    u64 |= (uint64_t) str[1] << 8;
+    u64 |= (uint64_t) str[2] << 16;
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 2:
+    u64 = str[0];
+    u64 |= (uint64_t) str[1] << 8;
+    return result ^ _mum (u64, _mum_tail_prime);
+  case 1:
+    u64 = str[0];
+    return result ^ _mum (u64, _mum_tail_prime);
+  }
+  return result;
+}
+
+/* Final randomization of H.  */
+static inline uint64_t
+_mum_final (uint64_t h) {
+  h ^= _mum (h, _mum_finish_prime1);
+  h ^= _mum (h, _mum_finish_prime2);
+  return h;
+}
+
+#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
+
+/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
+   it is possible.  Although on modern Intel processors MULQ takes
+   3-cycles vs. 4 for MULX, MULX permits more freedom in insn
+   scheduling as it uses less fixed registers.  */
+static inline uint64_t _MUM_TARGET("arch=haswell")
+_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
+  return _mum_final (_mum_hash_aligned (seed + len, key, len));
+}
+#endif
+
+#ifndef _MUM_UNALIGNED_ACCESS
+#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
+    || defined(__s390__) || defined(__m32c__) || defined(cris)     \
+    || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
+    || defined(__aarch64__)
+#define _MUM_UNALIGNED_ACCESS 1
+#else
+#define _MUM_UNALIGNED_ACCESS 0
+#endif
+#endif
+
+/* When we need an aligned access to data being hashed we move part of
+   the unaligned data to an aligned block of given size and then
+   process it, repeating processing the data by the block.  */
+#ifndef _MUM_BLOCK_LEN
+#define _MUM_BLOCK_LEN 1024
+#endif
+
+#if _MUM_BLOCK_LEN < 8
+#error "too small block length"
+#endif
+
+static inline uint64_t
+#if defined(__x86_64__)
+_MUM_TARGET("inline-all-stringops")
+#endif
+_mum_hash_default (const void *key, size_t len, uint64_t seed) {
+  uint64_t result;
+  const unsigned char *str = (const unsigned char *) key;
+  size_t block_len;
+  uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
+
+  result = seed + len;
+  if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
+    result = _mum_hash_aligned (result, key, len);
+  else {
+    while (len != 0) {
+      block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
+      memmove (buf, str, block_len);
+      result = _mum_hash_aligned (result, buf, block_len);
+      len -= block_len;
+      str += block_len;
+    }
+  }
+  return _mum_final (result);
+}
+
+static inline uint64_t
+_mum_next_factor (void) {
+  uint64_t start = 0;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    start = (start << 8) | rand() % 256;
+  return start;
+}
+
+/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++  */
+
+/* Set random multiplicators depending on SEED.  */
+static inline void
+mum_hash_randomize (uint64_t seed) {
+  int i;
+
+  srand (seed);
+  _mum_hash_step_prime = _mum_next_factor ();
+  _mum_key_step_prime = _mum_next_factor ();
+  _mum_finish_prime1 = _mum_next_factor ();
+  _mum_finish_prime2 = _mum_next_factor ();
+  _mum_block_start_prime = _mum_next_factor ();
+  _mum_unroll_prime = _mum_next_factor ();
+  _mum_tail_prime = _mum_next_factor ();
+  for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
+    _mum_primes[i] = _mum_next_factor ();
+}
+
+/* Start hashing data with SEED.  Return the state.  */
+static inline uint64_t
+mum_hash_init (uint64_t seed) {
+  return seed;
+}
+
+/* Process data KEY with the state H and return the updated state.  */
+static inline uint64_t
+mum_hash_step (uint64_t h, uint64_t key)
+{
+  return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
+}
+
+/* Return the result of hashing using the current state H.  */
+static inline uint64_t
+mum_hash_finish (uint64_t h) {
+  return _mum_final (h);
+}
+
+/* Fast hashing of KEY with SEED.  The hash is always the same for the
+   same key on any target. */
+static inline size_t
+mum_hash64 (uint64_t key, uint64_t seed) {
+  return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
+}
+
+/* Hash data KEY of length LEN and SEED.  The hash depends on the
+   target endianess and the unroll factor.  */
+static inline uint64_t
+mum_hash (const void *key, size_t len, uint64_t seed) {
+#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
+  static int avx2_support = 0;
+
+  if (avx2_support > 0)
+    return _mum_hash_avx2 (key, len, seed);
+  else if (! avx2_support) {
+    __builtin_cpu_init ();
+    avx2_support =  __builtin_cpu_supports ("avx2") ? 1 : -1;
+    if (avx2_support > 0)
+      return _mum_hash_avx2 (key, len, seed);
+  }
+#endif
+  return _mum_hash_default (key, len, seed);
+}
+
+#endif

Modified: head/contrib/libucl/src/ucl_hash.c
==============================================================================
--- head/contrib/libucl/src/ucl_hash.c	Sat Jun  4 14:57:08 2016	(r301338)
+++ head/contrib/libucl/src/ucl_hash.c	Sat Jun  4 14:57:25 2016	(r301339)
@@ -25,6 +25,7 @@
 #include "ucl_hash.h"
 #include "khash.h"
 #include "kvec.h"
+#include "mum.h"
 
 #include <time.h>
 #include <limits.h>
@@ -99,20 +100,11 @@ static const unsigned char lc_map[256] =
 #define UCL64_BIT_HASH 1
 #endif
 
-#ifdef UCL64_BIT_HASH
 static inline uint32_t
 ucl_hash_func (const ucl_object_t *o)
 {
-	return XXH64 (o->key, o->keylen, ucl_hash_seed ());
+	return mum_hash (o->key, o->keylen, ucl_hash_seed ());
 }
-#else
-static inline uint32_t
-ucl_hash_func (const ucl_object_t *o)
-{
-	return XXH32 (o->key, o->keylen, ucl_hash_seed ());
-}
-#endif
-
 static inline int
 ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2)
 {
@@ -126,91 +118,60 @@ ucl_hash_equal (const ucl_object_t *k1, 
 KHASH_INIT (ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt, 1,
 		ucl_hash_func, ucl_hash_equal)
 
-#ifdef UCL64_BIT_HASH
 static inline uint32_t
 ucl_hash_caseless_func (const ucl_object_t *o)
 {
 	unsigned len = o->keylen;
-	unsigned leftover = o->keylen % 4;
+	unsigned leftover = o->keylen % 8;
 	unsigned fp, i;
 	const uint8_t* s = (const uint8_t*)o->key;
 	union {
 		struct {
-			unsigned char c1, c2, c3, c4;
+			unsigned char c1, c2, c3, c4, c5, c6, c7, c8;
 		} c;
-		uint32_t pp;
+		uint64_t pp;
 	} u;
-	XXH64_state_t st;
+	uint64_t r;
 
 	fp = len - leftover;
-	XXH64_reset (&st, ucl_hash_seed ());
+	r = ucl_hash_seed ();
 
-	for (i = 0; i != fp; i += 4) {
+	for (i = 0; i != fp; i += 8) {
 		u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
+		u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7];
 		u.c.c1 = lc_map[u.c.c1];
 		u.c.c2 = lc_map[u.c.c2];
 		u.c.c3 = lc_map[u.c.c3];
 		u.c.c4 = lc_map[u.c.c4];
-		XXH64_update (&st, &u.pp, sizeof (u));
+		u.c.c1 = lc_map[u.c.c5];
+		u.c.c2 = lc_map[u.c.c6];
+		u.c.c3 = lc_map[u.c.c7];
+		u.c.c4 = lc_map[u.c.c8];
+		r = mum_hash_step (r, u.pp);
 	}
 
 	u.pp = 0;
 	switch (leftover) {
+	case 7:
+		u.c.c7 = lc_map[(unsigned char)s[i++]];
+	case 6:
+		u.c.c6 = lc_map[(unsigned char)s[i++]];
+	case 5:
+		u.c.c5 = lc_map[(unsigned char)s[i++]];
+	case 4:
+		u.c.c4 = lc_map[(unsigned char)s[i++]];
 	case 3:
 		u.c.c3 = lc_map[(unsigned char)s[i++]];
 	case 2:
 		u.c.c2 = lc_map[(unsigned char)s[i++]];
 	case 1:
 		u.c.c1 = lc_map[(unsigned char)s[i]];
-		XXH64_update (&st, &u.pp, leftover);
+		r = mum_hash_step (r, u.pp);
 		break;
 	}
 
-	return XXH64_digest (&st);
+	return mum_hash_finish (r);
 }
-#else
-static inline uint32_t
-ucl_hash_caseless_func (const ucl_object_t *o)
-{
-	unsigned len = o->keylen;
-	unsigned leftover = o->keylen % 4;
-	unsigned fp, i;
-	const uint8_t* s = (const uint8_t*)o->key;
-	union {
-		struct {
-			unsigned char c1, c2, c3, c4;
-		} c;
-		uint32_t pp;
-	} u;
-	XXH32_state_t st;
-
-	fp = len - leftover;
-	XXH32_reset (&st, ucl_hash_seed ());
-
-	for (i = 0; i != fp; i += 4) {
-		u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
-		u.c.c1 = lc_map[u.c.c1];
-		u.c.c2 = lc_map[u.c.c2];
-		u.c.c3 = lc_map[u.c.c3];
-		u.c.c4 = lc_map[u.c.c4];
-		XXH32_update (&st, &u.pp, sizeof (u));
-	}
-
-	u.pp = 0;
-	switch (leftover) {
-	case 3:
-		u.c.c3 = lc_map[(unsigned char)s[i++]];
-	case 2:
-		u.c.c2 = lc_map[(unsigned char)s[i++]];
-	case 1:
-		u.c.c1 = lc_map[(unsigned char)s[i]];
-		XXH32_update (&st, &u.pp, leftover);
-		break;
-	}
-
-	return XXH32_digest (&st);
-}
-#endif
 
 static inline int
 ucl_hash_caseless_equal (const ucl_object_t *k1, const ucl_object_t *k2)

Modified: head/contrib/libucl/src/ucl_internal.h
==============================================================================
--- head/contrib/libucl/src/ucl_internal.h	Sat Jun  4 14:57:08 2016	(r301338)
+++ head/contrib/libucl/src/ucl_internal.h	Sat Jun  4 14:57:25 2016	(r301339)
@@ -93,7 +93,6 @@
 #include "uthash.h"
 #include "ucl.h"
 #include "ucl_hash.h"
-#include "xxhash.h"
 
 #ifdef HAVE_OPENSSL
 #include <openssl/evp.h>

Modified: head/contrib/libucl/src/ucl_msgpack.c
==============================================================================
--- head/contrib/libucl/src/ucl_msgpack.c	Sat Jun  4 14:57:08 2016	(r301338)
+++ head/contrib/libucl/src/ucl_msgpack.c	Sat Jun  4 14:57:25 2016	(r301339)
@@ -1423,6 +1423,10 @@ ucl_msgpack_parse_int (struct ucl_parser
 	int16_t iv16;
 	int32_t iv32;
 	int64_t iv64;
+	uint16_t uiv16;
+	uint32_t uiv32;
+	uint64_t uiv64;
+
 
 	if (len > remain) {
 		return -1;
@@ -1455,7 +1459,9 @@ ucl_msgpack_parse_int (struct ucl_parser
 		len = 2;
 		break;
 	case msgpack_uint16:
-		obj->value.iv = FROM_BE16 (*(uint16_t *)pos);
+		memcpy (&uiv16, pos, sizeof (uiv16));
+		uiv16 = FROM_BE16 (uiv16);
+		obj->value.iv = uiv16;
 		len = 2;
 		break;
 	case msgpack_int32:
@@ -1465,7 +1471,9 @@ ucl_msgpack_parse_int (struct ucl_parser
 		len = 4;
 		break;
 	case msgpack_uint32:
-		obj->value.iv = FROM_BE32 (*(uint32_t *)pos);
+		memcpy(&uiv32, pos, sizeof(uiv32));
+		uiv32 = FROM_BE32(uiv32);
+		obj->value.iv = uiv32;
 		len = 4;
 		break;
 	case msgpack_int64:
@@ -1475,7 +1483,9 @@ ucl_msgpack_parse_int (struct ucl_parser
 		len = 8;
 		break;
 	case msgpack_uint64:
-		obj->value.iv = FROM_BE64 (*(uint64_t *)pos);
+		memcpy(&uiv64, pos, sizeof(uiv64));
+		uiv64 = FROM_BE64(uiv64);
+		obj->value.iv = uiv64;
 		len = 8;
 		break;
 	default:
@@ -1498,6 +1508,7 @@ ucl_msgpack_parse_float (struct ucl_pars
 		uint32_t i;
 		float f;
 	} d;
+	uint64_t uiv64;
 
 	if (len > remain) {
 		return -1;
@@ -1507,13 +1518,16 @@ ucl_msgpack_parse_float (struct ucl_pars
 
 	switch (fmt) {
 	case msgpack_float32:
-		d.i = FROM_BE32 (*(uint32_t *)pos);
+		memcpy(&d.i, pos, sizeof(d.i));
+		d.i = FROM_BE32(d.i);
 		/* XXX: can be slow */
 		obj->value.dv = d.f;
 		len = 4;
 		break;
 	case msgpack_float64:
-		obj->value.iv = FROM_BE64 (*(uint64_t *)pos);
+		memcpy(&uiv64, pos, sizeof(uiv64));
+		uiv64 = FROM_BE64(uiv64);
+		obj->value.iv = uiv64;
 		len = 8;
 		break;
 	default:

Modified: head/contrib/libucl/src/ucl_parser.c
==============================================================================
--- head/contrib/libucl/src/ucl_parser.c	Sat Jun  4 14:57:08 2016	(r301338)
+++ head/contrib/libucl/src/ucl_parser.c	Sat Jun  4 14:57:25 2016	(r301339)
@@ -2597,12 +2597,7 @@ ucl_parser_add_chunk_full (struct ucl_pa
 		return false;
 	}
 
-	if (len == 0) {
-		parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
-		return true;
-	}
-
-	if (data == NULL) {
+	if (data == NULL && len != 0) {
 		ucl_create_err (&parser->err, "invalid chunk added");
 		return false;
 	}
@@ -2613,6 +2608,7 @@ ucl_parser_add_chunk_full (struct ucl_pa
 			ucl_create_err (&parser->err, "cannot allocate chunk structure");
 			return false;
 		}
+
 		chunk->begin = data;
 		chunk->remain = len;
 		chunk->pos = chunk->begin;
@@ -2631,12 +2627,27 @@ ucl_parser_add_chunk_full (struct ucl_pa
 			return false;
 		}
 
-		switch (parse_type) {
-		default:
-		case UCL_PARSE_UCL:
-			return ucl_state_machine (parser);
-		case UCL_PARSE_MSGPACK:
-			return ucl_parse_msgpack (parser);
+		if (len > 0) {
+			/* Need to parse something */
+			switch (parse_type) {
+			default:
+			case UCL_PARSE_UCL:
+				return ucl_state_machine (parser);
+			case UCL_PARSE_MSGPACK:
+				return ucl_parse_msgpack (parser);
+			}
+		}
+		else {
+			/* Just add empty chunk and go forward */
+			if (parser->top_obj == NULL) {
+				/*
+				 * In case of empty object, create one to indicate that we've
+				 * read something
+				 */
+				parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
+			}
+
+			return true;
 		}
 	}
 

Modified: head/contrib/libucl/src/ucl_util.c
==============================================================================
--- head/contrib/libucl/src/ucl_util.c	Sat Jun  4 14:57:08 2016	(r301338)
+++ head/contrib/libucl/src/ucl_util.c	Sat Jun  4 14:57:25 2016	(r301339)
@@ -975,6 +975,7 @@ ucl_include_file_single (const unsigned 
 		if (params->soft_fail) {
 			return false;
 		}
+
 		return (!params->must_exist || false);
 	}
 
@@ -1172,11 +1173,14 @@ ucl_include_file_single (const unsigned 
 
 	res = ucl_parser_add_chunk_full (parser, buf, buflen, params->priority,
 			params->strat, params->parse_type);
-	if (!res && !params->must_exist) {
-		/* Free error */
-		utstring_free (parser->err);
-		parser->err = NULL;
-		parser->state = UCL_STATE_AFTER_VALUE;
+
+	if (!res) {
+		if (!params->must_exist) {
+			/* Free error */
+			utstring_free (parser->err);
+			parser->err = NULL;
+			res = true;
+		}
 	}
 
 	/* Stop nesting the include, take 1 level off the stack */
@@ -1849,6 +1853,9 @@ ucl_parser_add_fd_priority (struct ucl_p
 			fd, strerror (errno));
 		return false;
 	}
+	if (st.st_size == 0) {
+		return true;
+	}
 	if ((buf = ucl_mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
 		ucl_create_err (&parser->err, "cannot mmap fd %d: %s",
 			fd, strerror (errno));

Modified: head/contrib/libucl/uthash/uthash.h
==============================================================================
--- head/contrib/libucl/uthash/uthash.h	Sat Jun  4 14:57:08 2016	(r301338)
+++ head/contrib/libucl/uthash/uthash.h	Sat Jun  4 14:57:25 2016	(r301339)
@@ -22,12 +22,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBI
 */
 
 #ifndef UTHASH_H
-#define UTHASH_H 
+#define UTHASH_H
 
 #include <string.h>   /* memcmp,strlen */
 #include <stddef.h>   /* ptrdiff_t */
 #include <stdlib.h>   /* exit() */
-#include "xxhash.h"
+#include "mum.h"
 
 /* These macros use decltype or the earlier __typeof GNU extension.
    As decltype is only available in newer compilers (VS2010 or gcc 4.3+
@@ -50,7 +50,7 @@ do {                                    
   char **_da_dst = (char**)(&(dst));                                             \
   *_da_dst = (char*)(src);                                                       \
 } while(0)
-#else 
+#else
 #define DECLTYPE_ASSIGN(dst,src)                                                 \
 do {                                                                             \
   (dst) = DECLTYPE(dst)(src);                                                    \
@@ -115,12 +115,12 @@ do {                                    
   if (!((tbl)->bloom_bv))  { uthash_fatal( "out of memory"); }                   \
   memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN);                                \
   (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE;                                       \
-} while (0) 
+} while (0)
 
 #define HASH_BLOOM_FREE(tbl)                                                     \
 do {                                                                             \
   uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN);                              \
-} while (0) 
+} while (0)
 
 #define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
 #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
@@ -132,9 +132,9 @@ do {                                    
   HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
 
 #else
-#define HASH_BLOOM_MAKE(tbl) 
-#define HASH_BLOOM_FREE(tbl) 
-#define HASH_BLOOM_ADD(tbl,hashv) 
+#define HASH_BLOOM_MAKE(tbl)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
 #define HASH_BLOOM_TEST(tbl,hashv) (1)
 #define HASH_BLOOM_BYTELEN 0
 #endif
@@ -170,7 +170,7 @@ do {                                    
   };                                                                             \
   HASH_ADD(hh,head,fieldname,keylen_in,add);                                     \
 } while(0)
- 
+
 #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add)                            \
 do {                                                                             \
  unsigned _ha_bkt;                                                               \
@@ -328,10 +328,10 @@ do {                                    
     }                                                                            \
 } while (0)
 #else
-#define HASH_FSCK(hh,head) 
+#define HASH_FSCK(hh,head)
 #endif
 
-/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to 
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
  * the descriptor to which this macro is defined for tuning the hash function.
  * The app can #include <unistd.h> to get the prototype for write(2). */
 #ifdef HASH_EMIT_KEYS
@@ -341,12 +341,12 @@ do {                                    
     write(HASH_EMIT_KEYS, &_klen, sizeof(_klen));                                \
     write(HASH_EMIT_KEYS, keyptr, fieldlen);                                     \
 } while (0)
-#else 
-#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)                    
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
 #endif
 
 /* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
-#ifdef HASH_FUNCTION 
+#ifdef HASH_FUNCTION
 #define HASH_FCN HASH_FUNCTION
 #else
 #define HASH_FCN HASH_XX
@@ -356,14 +356,14 @@ do {                                    
 
 #define HASH_XX(key,keylen,num_bkts,hashv,bkt)                                  \
 do {                                                                             \
-  hashv = XXH32 (key, keylen, XX_HASH_PRIME);                                    \
+  hashv = mum_hash (key, keylen, XX_HASH_PRIME);                                 \
   bkt = (hashv) & (num_bkts-1);                                                  \
 } while (0)
 
 
 
 /* key comparison function; return 0 if keys equal */
-#define HASH_KEYCMP(a,b,len) memcmp(a,b,len) 
+#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
 
 /* iterate over items in a known bucket to find desired item */
 #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out)                       \
@@ -404,36 +404,36 @@ do {                                    
     }                                                                            \
     if (hh_del->hh_next) {                                                       \
         hh_del->hh_next->hh_prev = hh_del->hh_prev;                              \
-    }                                                                
+    }
 
 /* Bucket expansion has the effect of doubling the number of buckets
  * and redistributing the items into the new buckets. Ideally the
  * items will distribute more or less evenly into the new buckets
  * (the extent to which this is true is a measure of the quality of
- * the hash function as it applies to the key domain). 
- * 
+ * the hash function as it applies to the key domain).
+ *
  * With the items distributed into more buckets, the chain length
  * (item count) in each bucket is reduced. Thus by expanding buckets
- * the hash keeps a bound on the chain length. This bounded chain 
+ * the hash keeps a bound on the chain length. This bounded chain
  * length is the essence of how a hash provides constant time lookup.
- * 
+ *
  * The calculation of tbl->ideal_chain_maxlen below deserves some
  * explanation. First, keep in mind that we're calculating the ideal
  * maximum chain length based on the *new* (doubled) bucket count.
  * In fractions this is just n/b (n=number of items,b=new num buckets).
- * Since the ideal chain length is an integer, we want to calculate 
+ * Since the ideal chain length is an integer, we want to calculate
  * ceil(n/b). We don't depend on floating point arithmetic in this
  * hash, so to calculate ceil(n/b) with integers we could write
- * 
+ *
  *      ceil(n/b) = (n/b) + ((n%b)?1:0)
- * 
+ *
  * and in fact a previous version of this hash did just that.
  * But now we have improved things a bit by recognizing that b is
  * always a power of two. We keep its base 2 log handy (call it lb),
  * so now we can write this with a bit shift and logical AND:
- * 
+ *
  *      ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
- * 
+ *
  */
 #define HASH_EXPAND_BUCKETS(tbl)                                                 \
 do {                                                                             \
@@ -485,7 +485,7 @@ do {                                    
 
 
 /* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
-/* Note that HASH_SORT assumes the hash handle name to be hh. 
+/* Note that HASH_SORT assumes the hash handle name to be hh.
  * HASH_SRT was added to allow the hash handle name to be passed in. */
 #define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
 #define HASH_SRT(hh,head,cmpfcn)                                                 \
@@ -575,10 +575,10 @@ do {                                    
  }                                                                               \
 } while (0)
 
-/* This function selects items from one hash into another hash. 
- * The end result is that the selected items have dual presence 
- * in both hashes. There is no copy of the items made; rather 
- * they are added into the new hash through a secondary hash 
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
  * hash handle that must be present in the structure. */
 #define HASH_SELECT(hh_dst, dst, hh_src, src, cond)                              \
 do {                                                                             \
@@ -638,7 +638,7 @@ do {                                    
 #ifdef NO_DECLTYPE
 #define HASH_ITER(hh,head,el,tmp)                                                \
 for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL);       \
-  el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL)) 
+  el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
 #else
 #define HASH_ITER(hh,head,el,tmp)                                                \
 for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL);                 \
@@ -646,7 +646,7 @@ for((el)=(head),(tmp)=DECLTYPE(el)((head
 #endif
 
 /* obtain a count of items in the hash */
-#define HASH_COUNT(head) HASH_CNT(hh,head) 
+#define HASH_COUNT(head) HASH_CNT(hh,head)
 #define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
 
 typedef struct UT_hash_bucket {
@@ -655,7 +655,7 @@ typedef struct UT_hash_bucket {
 
    /* expand_mult is normally set to 0. In this situation, the max chain length
     * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
-    * the bucket's chain exceeds this length, bucket expansion is triggered). 
+    * the bucket's chain exceeds this length, bucket expansion is triggered).
     * However, setting expand_mult to a non-zero value delays bucket expansion
     * (that would be triggered by additions to this particular bucket)
     * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
@@ -663,7 +663,7 @@ typedef struct UT_hash_bucket {
     * multiplier is to reduce bucket expansions, since they are expensive, in
     * situations where we know that a particular bucket tends to be overused.
     * It is better to let its chain length grow to a longer yet-still-bounded
-    * value, than to do an O(n) bucket expansion too often. 
+    * value, than to do an O(n) bucket expansion too often.
     */
    unsigned expand_mult;
 
@@ -689,7 +689,7 @@ typedef struct UT_hash_table {
     * hash distribution; reaching them in a chain traversal takes >ideal steps */
    unsigned nonideal_items;
 
-   /* ineffective expands occur when a bucket doubling was performed, but 
+   /* ineffective expands occur when a bucket doubling was performed, but
     * afterward, more than half the items in the hash had nonideal chain
     * positions. If this happens on two consecutive expansions we inhibit any
     * further expansion, as it's not helping; this happens when the hash

Modified: head/lib/libucl/Makefile
==============================================================================
--- head/lib/libucl/Makefile	Sat Jun  4 14:57:08 2016	(r301338)
+++ head/lib/libucl/Makefile	Sat Jun  4 14:57:25 2016	(r301339)
@@ -14,8 +14,7 @@ SRCS=		ucl_emitter_streamline.c \
 		ucl_parser.c  \
 		ucl_schema.c \
 		ucl_sexp.c \
-		ucl_util.c \

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***