svn commit: r301339 - in head: contrib/libucl/src contrib/libucl/uthash lib/libucl
Baptiste Daroussin
bapt at FreeBSD.org
Sat Jun 4 14:57:27 UTC 2016
Author: bapt
Date: Sat Jun 4 14:57:25 2016
New Revision: 301339
URL: https://svnweb.freebsd.org/changeset/base/301339
Log:
Import libucl snapshot 20160604
It replaces xxhash with mumhash
It fixes issues with msgpack on non x86
Added:
head/contrib/libucl/src/mum.h
- copied unchanged from r301336, vendor/libucl/dist/src/mum.h
Deleted:
head/contrib/libucl/src/xxhash.c
head/contrib/libucl/src/xxhash.h
Modified:
head/contrib/libucl/src/Makefile.am
head/contrib/libucl/src/ucl_hash.c
head/contrib/libucl/src/ucl_internal.h
head/contrib/libucl/src/ucl_msgpack.c
head/contrib/libucl/src/ucl_parser.c
head/contrib/libucl/src/ucl_util.c
head/contrib/libucl/uthash/uthash.h
head/lib/libucl/Makefile
Directory Properties:
head/contrib/libucl/ (props changed)
Modified: head/contrib/libucl/src/Makefile.am
==============================================================================
--- head/contrib/libucl/src/Makefile.am Sat Jun 4 14:57:08 2016 (r301338)
+++ head/contrib/libucl/src/Makefile.am Sat Jun 4 14:57:25 2016 (r301339)
@@ -12,8 +12,7 @@ libucl_la_SOURCES= ucl_emitter.c \
ucl_schema.c \
ucl_util.c \
ucl_msgpack.c \
- ucl_sexp.c \
- xxhash.c
+ ucl_sexp.c
libucl_la_CFLAGS= $(libucl_common_cflags) \
@CURL_CFLAGS@
libucl_la_LDFLAGS = -version-info @SO_VERSION@
@@ -25,7 +24,7 @@ libucl_la_LIBADD= @LIBFETCH_LIBS@ \
include_HEADERS= $(top_srcdir)/include/ucl.h \
$(top_srcdir)/include/ucl++.h
noinst_HEADERS= ucl_internal.h \
- xxhash.h \
+ mum.h \
ucl_hash.h \
ucl_chartable.h \
tree.h
Copied: head/contrib/libucl/src/mum.h (from r301336, vendor/libucl/dist/src/mum.h)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/contrib/libucl/src/mum.h Sat Jun 4 14:57:25 2016 (r301339, copy of r301336, vendor/libucl/dist/src/mum.h)
@@ -0,0 +1,417 @@
+/* Copyright (c) 2016 Vladimir Makarov <vmakarov at gcc.gnu.org>
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/* This file implements MUM (MUltiply and Mix) hashing. We randomize
+ input data by 64x64-bit multiplication and mixing hi- and low-parts
+ of the multiplication result by using an addition and then mix it
+ into the current state. We use prime numbers randomly generated
+ with the equal probability of their bit values for the
+ multiplication. When all primes are used once, the state is
+ randomized and the same prime numbers are used again for data
+ randomization.
+
+ The MUM hashing passes all SMHasher tests. Pseudo Random Number
+ Generator based on MUM also passes NIST Statistical Test Suite for
+ Random and Pseudorandom Number Generators for Cryptographic
+ Applications (version 2.2.1) with 1000 bitstreams each containing
+ 1M bits. MUM hashing is also faster Spooky64 and City64 on small
+ strings (at least upto 512-bit) on Haswell and Power7. The MUM bulk
+ speed (speed on very long data) is bigger than Spooky and City on
+ Power7. On Haswell the bulk speed is bigger than Spooky one and
+ close to City speed. */
+
+#ifndef __MUM_HASH__
+#define __MUM_HASH__
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#ifdef _MSC_VER
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+/* Macro saying to use 128-bit integers implemented by GCC for some
+ targets. */
+#ifndef _MUM_USE_INT128
+/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64.
+ HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT,
+ otherwise int. */
+#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
+#define _MUM_USE_INT128 1
+#else
+#define _MUM_USE_INT128 0
+#endif
+#endif
+
+#if defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
+#define _MUM_FRESH_GCC
+#endif
+
+#if defined(__GNUC__) && !defined(__llvm__)
+#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused))
+#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
+#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
+#else
+#define _MUM_ATTRIBUTE_UNUSED
+#define _MUM_OPTIMIZE(opts)
+#define _MUM_TARGET(opts)
+#endif
+
+
+/* Here are different primes randomly generated with the equal
+ probability of their bit values. They are used to randomize input
+ values. */
+static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
+static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
+static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
+static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
+static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
+static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
+static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
+
+static uint64_t _mum_primes [] = {
+ 0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
+ 0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
+ 0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
+ 0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
+};
+
+/* Multiply 64-bit V and P and return sum of high and low parts of the
+ result. */
+static inline uint64_t
+_mum (uint64_t v, uint64_t p) {
+ uint64_t hi, lo;
+#if _MUM_USE_INT128
+#if defined(__aarch64__)
+ /* AARCH64 needs 2 insns to calculate 128-bit result of the
+ multiplication. If we use a generic code we actually call a
+ function doing 128x128->128 bit multiplication. The function is
+ very slow. */
+ lo = v * p, hi;
+ asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
+#else
+ __uint128_t r = (__uint128_t) v * (__uint128_t) p;
+ hi = (uint64_t) (r >> 64);
+ lo = (uint64_t) r;
+#endif
+#else
+ /* Implementation of 64x64->128-bit multiplication by four 32x32->64
+ bit multiplication. */
+ uint64_t hv = v >> 32, hp = p >> 32;
+ uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
+ uint64_t rh = hv * hp;
+ uint64_t rm_0 = hv * lp;
+ uint64_t rm_1 = hp * lv;
+ uint64_t rl = lv * lp;
+ uint64_t t, carry = 0;
+
+ /* We could ignore a carry bit here if we did not care about the
+ same hash for 32-bit and 64-bit targets. */
+ t = rl + (rm_0 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+ carry = t < rl;
+#endif
+ lo = t + (rm_1 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+ carry += lo < t;
+#endif
+ hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
+#endif
+ /* We could use XOR here too but, for some reasons, on Haswell and
+ Power7 using an addition improves hashing performance by 10% for
+ small strings. */
+ return hi + lo;
+}
+
+#if defined(_MSC_VER)
+#define _mum_bswap_32(x) _byteswap_uint32_t (x)
+#define _mum_bswap_64(x) _byteswap_uint64_t (x)
+#elif defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#define _mum_bswap_32(x) OSSwapInt32 (x)
+#define _mum_bswap_64(x) OSSwapInt64 (x)
+#elif defined(__GNUC__)
+#define _mum_bswap32(x) __builtin_bswap32 (x)
+#define _mum_bswap64(x) __builtin_bswap64 (x)
+#else
+#include <byteswap.h>
+#define _mum_bswap32(x) bswap32 (x)
+#define _mum_bswap64(x) bswap64 (x)
+#endif
+
+static inline uint64_t
+_mum_le (uint64_t v) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+ return v;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return _mum_bswap64 (v);
+#else
+#error "Unknown endianess"
+#endif
+}
+
+static inline uint32_t
+_mum_le32 (uint32_t v) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+ return v;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return _mum_bswap32 (v);
+#else
+#error "Unknown endianess"
+#endif
+}
+
+/* Macro defining how many times the most nested loop in
+ _mum_hash_aligned will be unrolled by the compiler (although it can
+ make an own decision:). Use only a constant here to help a
+ compiler to unroll a major loop.
+
+ The macro value affects the result hash for strings > 128 bit. The
+ unroll factor greatly affects the hashing speed. We prefer the
+ speed. */
+#ifndef _MUM_UNROLL_FACTOR_POWER
+#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 3
+#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 4
+#else
+#define _MUM_UNROLL_FACTOR_POWER 2
+#endif
+#endif
+
+#if _MUM_UNROLL_FACTOR_POWER < 1
+#error "too small unroll factor"
+#elif _MUM_UNROLL_FACTOR_POWER > 4
+#error "We have not enough primes for such unroll factor"
+#endif
+
+#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
+
+static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
+_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
+ uint64_t result = start;
+ const unsigned char *str = (const unsigned char *) key;
+ uint64_t u64;
+ int i;
+ size_t n;
+
+ result = _mum (result, _mum_block_start_prime);
+ while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
+ /* This loop could be vectorized when we have vector insns for
+ 64x64->128-bit multiplication. AVX2 currently only have a
+ vector insn for 4 32x32->64-bit multiplication. */
+ for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
+ result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+ len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+ str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+ /* We will use the same prime numbers on the next iterations --
+ randomize the state. */
+ result = _mum (result, _mum_unroll_prime);
+ }
+ n = len / sizeof (uint64_t);
+ for (i = 0; i < (int)n; i++)
+ result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+ len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
+ switch (len) {
+ case 7:
+ u64 = _mum_le32 (*(uint32_t *) str);
+ u64 |= (uint64_t) str[4] << 32;
+ u64 |= (uint64_t) str[5] << 40;
+ u64 |= (uint64_t) str[6] << 48;
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 6:
+ u64 = _mum_le32 (*(uint32_t *) str);
+ u64 |= (uint64_t) str[4] << 32;
+ u64 |= (uint64_t) str[5] << 40;
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 5:
+ u64 = _mum_le32 (*(uint32_t *) str);
+ u64 |= (uint64_t) str[4] << 32;
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 4:
+ u64 = _mum_le32 (*(uint32_t *) str);
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 3:
+ u64 = str[0];
+ u64 |= (uint64_t) str[1] << 8;
+ u64 |= (uint64_t) str[2] << 16;
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 2:
+ u64 = str[0];
+ u64 |= (uint64_t) str[1] << 8;
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 1:
+ u64 = str[0];
+ return result ^ _mum (u64, _mum_tail_prime);
+ }
+ return result;
+}
+
+/* Final randomization of H. */
+static inline uint64_t
+_mum_final (uint64_t h) {
+ h ^= _mum (h, _mum_finish_prime1);
+ h ^= _mum (h, _mum_finish_prime2);
+ return h;
+}
+
+#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
+
+/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
+ it is possible. Although on modern Intel processors MULQ takes
+ 3-cycles vs. 4 for MULX, MULX permits more freedom in insn
+ scheduling as it uses less fixed registers. */
+static inline uint64_t _MUM_TARGET("arch=haswell")
+_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
+ return _mum_final (_mum_hash_aligned (seed + len, key, len));
+}
+#endif
+
+#ifndef _MUM_UNALIGNED_ACCESS
+#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
+ || defined(__s390__) || defined(__m32c__) || defined(cris) \
+ || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
+ || defined(__aarch64__)
+#define _MUM_UNALIGNED_ACCESS 1
+#else
+#define _MUM_UNALIGNED_ACCESS 0
+#endif
+#endif
+
+/* When we need an aligned access to data being hashed we move part of
+ the unaligned data to an aligned block of given size and then
+ process it, repeating processing the data by the block. */
+#ifndef _MUM_BLOCK_LEN
+#define _MUM_BLOCK_LEN 1024
+#endif
+
+#if _MUM_BLOCK_LEN < 8
+#error "too small block length"
+#endif
+
+static inline uint64_t
+#if defined(__x86_64__)
+_MUM_TARGET("inline-all-stringops")
+#endif
+_mum_hash_default (const void *key, size_t len, uint64_t seed) {
+ uint64_t result;
+ const unsigned char *str = (const unsigned char *) key;
+ size_t block_len;
+ uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
+
+ result = seed + len;
+ if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
+ result = _mum_hash_aligned (result, key, len);
+ else {
+ while (len != 0) {
+ block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
+ memmove (buf, str, block_len);
+ result = _mum_hash_aligned (result, buf, block_len);
+ len -= block_len;
+ str += block_len;
+ }
+ }
+ return _mum_final (result);
+}
+
+static inline uint64_t
+_mum_next_factor (void) {
+ uint64_t start = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ start = (start << 8) | rand() % 256;
+ return start;
+}
+
+/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++ */
+
+/* Set random multiplicators depending on SEED. */
+static inline void
+mum_hash_randomize (uint64_t seed) {
+ int i;
+
+ srand (seed);
+ _mum_hash_step_prime = _mum_next_factor ();
+ _mum_key_step_prime = _mum_next_factor ();
+ _mum_finish_prime1 = _mum_next_factor ();
+ _mum_finish_prime2 = _mum_next_factor ();
+ _mum_block_start_prime = _mum_next_factor ();
+ _mum_unroll_prime = _mum_next_factor ();
+ _mum_tail_prime = _mum_next_factor ();
+ for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
+ _mum_primes[i] = _mum_next_factor ();
+}
+
+/* Start hashing data with SEED. Return the state. */
+static inline uint64_t
+mum_hash_init (uint64_t seed) {
+ return seed;
+}
+
+/* Process data KEY with the state H and return the updated state. */
+static inline uint64_t
+mum_hash_step (uint64_t h, uint64_t key)
+{
+ return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
+}
+
+/* Return the result of hashing using the current state H. */
+static inline uint64_t
+mum_hash_finish (uint64_t h) {
+ return _mum_final (h);
+}
+
+/* Fast hashing of KEY with SEED. The hash is always the same for the
+ same key on any target. */
+static inline size_t
+mum_hash64 (uint64_t key, uint64_t seed) {
+ return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
+}
+
+/* Hash data KEY of length LEN and SEED. The hash depends on the
+ target endianess and the unroll factor. */
+static inline uint64_t
+mum_hash (const void *key, size_t len, uint64_t seed) {
+#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
+ static int avx2_support = 0;
+
+ if (avx2_support > 0)
+ return _mum_hash_avx2 (key, len, seed);
+ else if (! avx2_support) {
+ __builtin_cpu_init ();
+ avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1;
+ if (avx2_support > 0)
+ return _mum_hash_avx2 (key, len, seed);
+ }
+#endif
+ return _mum_hash_default (key, len, seed);
+}
+
+#endif
Modified: head/contrib/libucl/src/ucl_hash.c
==============================================================================
--- head/contrib/libucl/src/ucl_hash.c Sat Jun 4 14:57:08 2016 (r301338)
+++ head/contrib/libucl/src/ucl_hash.c Sat Jun 4 14:57:25 2016 (r301339)
@@ -25,6 +25,7 @@
#include "ucl_hash.h"
#include "khash.h"
#include "kvec.h"
+#include "mum.h"
#include <time.h>
#include <limits.h>
@@ -99,20 +100,11 @@ static const unsigned char lc_map[256] =
#define UCL64_BIT_HASH 1
#endif
-#ifdef UCL64_BIT_HASH
static inline uint32_t
ucl_hash_func (const ucl_object_t *o)
{
- return XXH64 (o->key, o->keylen, ucl_hash_seed ());
+ return mum_hash (o->key, o->keylen, ucl_hash_seed ());
}
-#else
-static inline uint32_t
-ucl_hash_func (const ucl_object_t *o)
-{
- return XXH32 (o->key, o->keylen, ucl_hash_seed ());
-}
-#endif
-
static inline int
ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2)
{
@@ -126,91 +118,60 @@ ucl_hash_equal (const ucl_object_t *k1,
KHASH_INIT (ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt, 1,
ucl_hash_func, ucl_hash_equal)
-#ifdef UCL64_BIT_HASH
static inline uint32_t
ucl_hash_caseless_func (const ucl_object_t *o)
{
unsigned len = o->keylen;
- unsigned leftover = o->keylen % 4;
+ unsigned leftover = o->keylen % 8;
unsigned fp, i;
const uint8_t* s = (const uint8_t*)o->key;
union {
struct {
- unsigned char c1, c2, c3, c4;
+ unsigned char c1, c2, c3, c4, c5, c6, c7, c8;
} c;
- uint32_t pp;
+ uint64_t pp;
} u;
- XXH64_state_t st;
+ uint64_t r;
fp = len - leftover;
- XXH64_reset (&st, ucl_hash_seed ());
+ r = ucl_hash_seed ();
- for (i = 0; i != fp; i += 4) {
+ for (i = 0; i != fp; i += 8) {
u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
+ u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7];
u.c.c1 = lc_map[u.c.c1];
u.c.c2 = lc_map[u.c.c2];
u.c.c3 = lc_map[u.c.c3];
u.c.c4 = lc_map[u.c.c4];
- XXH64_update (&st, &u.pp, sizeof (u));
+ u.c.c1 = lc_map[u.c.c5];
+ u.c.c2 = lc_map[u.c.c6];
+ u.c.c3 = lc_map[u.c.c7];
+ u.c.c4 = lc_map[u.c.c8];
+ r = mum_hash_step (r, u.pp);
}
u.pp = 0;
switch (leftover) {
+ case 7:
+ u.c.c7 = lc_map[(unsigned char)s[i++]];
+ case 6:
+ u.c.c6 = lc_map[(unsigned char)s[i++]];
+ case 5:
+ u.c.c5 = lc_map[(unsigned char)s[i++]];
+ case 4:
+ u.c.c4 = lc_map[(unsigned char)s[i++]];
case 3:
u.c.c3 = lc_map[(unsigned char)s[i++]];
case 2:
u.c.c2 = lc_map[(unsigned char)s[i++]];
case 1:
u.c.c1 = lc_map[(unsigned char)s[i]];
- XXH64_update (&st, &u.pp, leftover);
+ r = mum_hash_step (r, u.pp);
break;
}
- return XXH64_digest (&st);
+ return mum_hash_finish (r);
}
-#else
-static inline uint32_t
-ucl_hash_caseless_func (const ucl_object_t *o)
-{
- unsigned len = o->keylen;
- unsigned leftover = o->keylen % 4;
- unsigned fp, i;
- const uint8_t* s = (const uint8_t*)o->key;
- union {
- struct {
- unsigned char c1, c2, c3, c4;
- } c;
- uint32_t pp;
- } u;
- XXH32_state_t st;
-
- fp = len - leftover;
- XXH32_reset (&st, ucl_hash_seed ());
-
- for (i = 0; i != fp; i += 4) {
- u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
- u.c.c1 = lc_map[u.c.c1];
- u.c.c2 = lc_map[u.c.c2];
- u.c.c3 = lc_map[u.c.c3];
- u.c.c4 = lc_map[u.c.c4];
- XXH32_update (&st, &u.pp, sizeof (u));
- }
-
- u.pp = 0;
- switch (leftover) {
- case 3:
- u.c.c3 = lc_map[(unsigned char)s[i++]];
- case 2:
- u.c.c2 = lc_map[(unsigned char)s[i++]];
- case 1:
- u.c.c1 = lc_map[(unsigned char)s[i]];
- XXH32_update (&st, &u.pp, leftover);
- break;
- }
-
- return XXH32_digest (&st);
-}
-#endif
static inline int
ucl_hash_caseless_equal (const ucl_object_t *k1, const ucl_object_t *k2)
Modified: head/contrib/libucl/src/ucl_internal.h
==============================================================================
--- head/contrib/libucl/src/ucl_internal.h Sat Jun 4 14:57:08 2016 (r301338)
+++ head/contrib/libucl/src/ucl_internal.h Sat Jun 4 14:57:25 2016 (r301339)
@@ -93,7 +93,6 @@
#include "uthash.h"
#include "ucl.h"
#include "ucl_hash.h"
-#include "xxhash.h"
#ifdef HAVE_OPENSSL
#include <openssl/evp.h>
Modified: head/contrib/libucl/src/ucl_msgpack.c
==============================================================================
--- head/contrib/libucl/src/ucl_msgpack.c Sat Jun 4 14:57:08 2016 (r301338)
+++ head/contrib/libucl/src/ucl_msgpack.c Sat Jun 4 14:57:25 2016 (r301339)
@@ -1423,6 +1423,10 @@ ucl_msgpack_parse_int (struct ucl_parser
int16_t iv16;
int32_t iv32;
int64_t iv64;
+ uint16_t uiv16;
+ uint32_t uiv32;
+ uint64_t uiv64;
+
if (len > remain) {
return -1;
@@ -1455,7 +1459,9 @@ ucl_msgpack_parse_int (struct ucl_parser
len = 2;
break;
case msgpack_uint16:
- obj->value.iv = FROM_BE16 (*(uint16_t *)pos);
+ memcpy (&uiv16, pos, sizeof (uiv16));
+ uiv16 = FROM_BE16 (uiv16);
+ obj->value.iv = uiv16;
len = 2;
break;
case msgpack_int32:
@@ -1465,7 +1471,9 @@ ucl_msgpack_parse_int (struct ucl_parser
len = 4;
break;
case msgpack_uint32:
- obj->value.iv = FROM_BE32 (*(uint32_t *)pos);
+ memcpy(&uiv32, pos, sizeof(uiv32));
+ uiv32 = FROM_BE32(uiv32);
+ obj->value.iv = uiv32;
len = 4;
break;
case msgpack_int64:
@@ -1475,7 +1483,9 @@ ucl_msgpack_parse_int (struct ucl_parser
len = 8;
break;
case msgpack_uint64:
- obj->value.iv = FROM_BE64 (*(uint64_t *)pos);
+ memcpy(&uiv64, pos, sizeof(uiv64));
+ uiv64 = FROM_BE64(uiv64);
+ obj->value.iv = uiv64;
len = 8;
break;
default:
@@ -1498,6 +1508,7 @@ ucl_msgpack_parse_float (struct ucl_pars
uint32_t i;
float f;
} d;
+ uint64_t uiv64;
if (len > remain) {
return -1;
@@ -1507,13 +1518,16 @@ ucl_msgpack_parse_float (struct ucl_pars
switch (fmt) {
case msgpack_float32:
- d.i = FROM_BE32 (*(uint32_t *)pos);
+ memcpy(&d.i, pos, sizeof(d.i));
+ d.i = FROM_BE32(d.i);
/* XXX: can be slow */
obj->value.dv = d.f;
len = 4;
break;
case msgpack_float64:
- obj->value.iv = FROM_BE64 (*(uint64_t *)pos);
+ memcpy(&uiv64, pos, sizeof(uiv64));
+ uiv64 = FROM_BE64(uiv64);
+ obj->value.iv = uiv64;
len = 8;
break;
default:
Modified: head/contrib/libucl/src/ucl_parser.c
==============================================================================
--- head/contrib/libucl/src/ucl_parser.c Sat Jun 4 14:57:08 2016 (r301338)
+++ head/contrib/libucl/src/ucl_parser.c Sat Jun 4 14:57:25 2016 (r301339)
@@ -2597,12 +2597,7 @@ ucl_parser_add_chunk_full (struct ucl_pa
return false;
}
- if (len == 0) {
- parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
- return true;
- }
-
- if (data == NULL) {
+ if (data == NULL && len != 0) {
ucl_create_err (&parser->err, "invalid chunk added");
return false;
}
@@ -2613,6 +2608,7 @@ ucl_parser_add_chunk_full (struct ucl_pa
ucl_create_err (&parser->err, "cannot allocate chunk structure");
return false;
}
+
chunk->begin = data;
chunk->remain = len;
chunk->pos = chunk->begin;
@@ -2631,12 +2627,27 @@ ucl_parser_add_chunk_full (struct ucl_pa
return false;
}
- switch (parse_type) {
- default:
- case UCL_PARSE_UCL:
- return ucl_state_machine (parser);
- case UCL_PARSE_MSGPACK:
- return ucl_parse_msgpack (parser);
+ if (len > 0) {
+ /* Need to parse something */
+ switch (parse_type) {
+ default:
+ case UCL_PARSE_UCL:
+ return ucl_state_machine (parser);
+ case UCL_PARSE_MSGPACK:
+ return ucl_parse_msgpack (parser);
+ }
+ }
+ else {
+ /* Just add empty chunk and go forward */
+ if (parser->top_obj == NULL) {
+ /*
+ * In case of empty object, create one to indicate that we've
+ * read something
+ */
+ parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
+ }
+
+ return true;
}
}
Modified: head/contrib/libucl/src/ucl_util.c
==============================================================================
--- head/contrib/libucl/src/ucl_util.c Sat Jun 4 14:57:08 2016 (r301338)
+++ head/contrib/libucl/src/ucl_util.c Sat Jun 4 14:57:25 2016 (r301339)
@@ -975,6 +975,7 @@ ucl_include_file_single (const unsigned
if (params->soft_fail) {
return false;
}
+
return (!params->must_exist || false);
}
@@ -1172,11 +1173,14 @@ ucl_include_file_single (const unsigned
res = ucl_parser_add_chunk_full (parser, buf, buflen, params->priority,
params->strat, params->parse_type);
- if (!res && !params->must_exist) {
- /* Free error */
- utstring_free (parser->err);
- parser->err = NULL;
- parser->state = UCL_STATE_AFTER_VALUE;
+
+ if (!res) {
+ if (!params->must_exist) {
+ /* Free error */
+ utstring_free (parser->err);
+ parser->err = NULL;
+ res = true;
+ }
}
/* Stop nesting the include, take 1 level off the stack */
@@ -1849,6 +1853,9 @@ ucl_parser_add_fd_priority (struct ucl_p
fd, strerror (errno));
return false;
}
+ if (st.st_size == 0) {
+ return true;
+ }
if ((buf = ucl_mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
ucl_create_err (&parser->err, "cannot mmap fd %d: %s",
fd, strerror (errno));
Modified: head/contrib/libucl/uthash/uthash.h
==============================================================================
--- head/contrib/libucl/uthash/uthash.h Sat Jun 4 14:57:08 2016 (r301338)
+++ head/contrib/libucl/uthash/uthash.h Sat Jun 4 14:57:25 2016 (r301339)
@@ -22,12 +22,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBI
*/
#ifndef UTHASH_H
-#define UTHASH_H
+#define UTHASH_H
#include <string.h> /* memcmp,strlen */
#include <stddef.h> /* ptrdiff_t */
#include <stdlib.h> /* exit() */
-#include "xxhash.h"
+#include "mum.h"
/* These macros use decltype or the earlier __typeof GNU extension.
As decltype is only available in newer compilers (VS2010 or gcc 4.3+
@@ -50,7 +50,7 @@ do {
char **_da_dst = (char**)(&(dst)); \
*_da_dst = (char*)(src); \
} while(0)
-#else
+#else
#define DECLTYPE_ASSIGN(dst,src) \
do { \
(dst) = DECLTYPE(dst)(src); \
@@ -115,12 +115,12 @@ do {
if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
(tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
-} while (0)
+} while (0)
#define HASH_BLOOM_FREE(tbl) \
do { \
uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
-} while (0)
+} while (0)
#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
@@ -132,9 +132,9 @@ do {
HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
#else
-#define HASH_BLOOM_MAKE(tbl)
-#define HASH_BLOOM_FREE(tbl)
-#define HASH_BLOOM_ADD(tbl,hashv)
+#define HASH_BLOOM_MAKE(tbl)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
#define HASH_BLOOM_TEST(tbl,hashv) (1)
#define HASH_BLOOM_BYTELEN 0
#endif
@@ -170,7 +170,7 @@ do {
}; \
HASH_ADD(hh,head,fieldname,keylen_in,add); \
} while(0)
-
+
#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
do { \
unsigned _ha_bkt; \
@@ -328,10 +328,10 @@ do {
} \
} while (0)
#else
-#define HASH_FSCK(hh,head)
+#define HASH_FSCK(hh,head)
#endif
-/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
* the descriptor to which this macro is defined for tuning the hash function.
* The app can #include <unistd.h> to get the prototype for write(2). */
#ifdef HASH_EMIT_KEYS
@@ -341,12 +341,12 @@ do {
write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
write(HASH_EMIT_KEYS, keyptr, fieldlen); \
} while (0)
-#else
-#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
#endif
/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
-#ifdef HASH_FUNCTION
+#ifdef HASH_FUNCTION
#define HASH_FCN HASH_FUNCTION
#else
#define HASH_FCN HASH_XX
@@ -356,14 +356,14 @@ do {
#define HASH_XX(key,keylen,num_bkts,hashv,bkt) \
do { \
- hashv = XXH32 (key, keylen, XX_HASH_PRIME); \
+ hashv = mum_hash (key, keylen, XX_HASH_PRIME); \
bkt = (hashv) & (num_bkts-1); \
} while (0)
/* key comparison function; return 0 if keys equal */
-#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
+#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
/* iterate over items in a known bucket to find desired item */
#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
@@ -404,36 +404,36 @@ do {
} \
if (hh_del->hh_next) { \
hh_del->hh_next->hh_prev = hh_del->hh_prev; \
- }
+ }
/* Bucket expansion has the effect of doubling the number of buckets
* and redistributing the items into the new buckets. Ideally the
* items will distribute more or less evenly into the new buckets
* (the extent to which this is true is a measure of the quality of
- * the hash function as it applies to the key domain).
- *
+ * the hash function as it applies to the key domain).
+ *
* With the items distributed into more buckets, the chain length
* (item count) in each bucket is reduced. Thus by expanding buckets
- * the hash keeps a bound on the chain length. This bounded chain
+ * the hash keeps a bound on the chain length. This bounded chain
* length is the essence of how a hash provides constant time lookup.
- *
+ *
* The calculation of tbl->ideal_chain_maxlen below deserves some
* explanation. First, keep in mind that we're calculating the ideal
* maximum chain length based on the *new* (doubled) bucket count.
* In fractions this is just n/b (n=number of items,b=new num buckets).
- * Since the ideal chain length is an integer, we want to calculate
+ * Since the ideal chain length is an integer, we want to calculate
* ceil(n/b). We don't depend on floating point arithmetic in this
* hash, so to calculate ceil(n/b) with integers we could write
- *
+ *
* ceil(n/b) = (n/b) + ((n%b)?1:0)
- *
+ *
* and in fact a previous version of this hash did just that.
* But now we have improved things a bit by recognizing that b is
* always a power of two. We keep its base 2 log handy (call it lb),
* so now we can write this with a bit shift and logical AND:
- *
+ *
* ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
- *
+ *
*/
#define HASH_EXPAND_BUCKETS(tbl) \
do { \
@@ -485,7 +485,7 @@ do {
/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
-/* Note that HASH_SORT assumes the hash handle name to be hh.
+/* Note that HASH_SORT assumes the hash handle name to be hh.
* HASH_SRT was added to allow the hash handle name to be passed in. */
#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
#define HASH_SRT(hh,head,cmpfcn) \
@@ -575,10 +575,10 @@ do {
} \
} while (0)
-/* This function selects items from one hash into another hash.
- * The end result is that the selected items have dual presence
- * in both hashes. There is no copy of the items made; rather
- * they are added into the new hash through a secondary hash
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
* hash handle that must be present in the structure. */
#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
do { \
@@ -638,7 +638,7 @@ do {
#ifdef NO_DECLTYPE
#define HASH_ITER(hh,head,el,tmp) \
for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
- el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
+ el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
#else
#define HASH_ITER(hh,head,el,tmp) \
for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
@@ -646,7 +646,7 @@ for((el)=(head),(tmp)=DECLTYPE(el)((head
#endif
/* obtain a count of items in the hash */
-#define HASH_COUNT(head) HASH_CNT(hh,head)
+#define HASH_COUNT(head) HASH_CNT(hh,head)
#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
typedef struct UT_hash_bucket {
@@ -655,7 +655,7 @@ typedef struct UT_hash_bucket {
/* expand_mult is normally set to 0. In this situation, the max chain length
* threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
- * the bucket's chain exceeds this length, bucket expansion is triggered).
+ * the bucket's chain exceeds this length, bucket expansion is triggered).
* However, setting expand_mult to a non-zero value delays bucket expansion
* (that would be triggered by additions to this particular bucket)
* until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
@@ -663,7 +663,7 @@ typedef struct UT_hash_bucket {
* multiplier is to reduce bucket expansions, since they are expensive, in
* situations where we know that a particular bucket tends to be overused.
* It is better to let its chain length grow to a longer yet-still-bounded
- * value, than to do an O(n) bucket expansion too often.
+ * value, than to do an O(n) bucket expansion too often.
*/
unsigned expand_mult;
@@ -689,7 +689,7 @@ typedef struct UT_hash_table {
* hash distribution; reaching them in a chain traversal takes >ideal steps */
unsigned nonideal_items;
- /* ineffective expands occur when a bucket doubling was performed, but
+ /* ineffective expands occur when a bucket doubling was performed, but
* afterward, more than half the items in the hash had nonideal chain
* positions. If this happens on two consecutive expansions we inhibit any
* further expansion, as it's not helping; this happens when the hash
Modified: head/lib/libucl/Makefile
==============================================================================
--- head/lib/libucl/Makefile Sat Jun 4 14:57:08 2016 (r301338)
+++ head/lib/libucl/Makefile Sat Jun 4 14:57:25 2016 (r301339)
@@ -14,8 +14,7 @@ SRCS= ucl_emitter_streamline.c \
ucl_parser.c \
ucl_schema.c \
ucl_sexp.c \
- ucl_util.c \
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-head
mailing list