svn commit: r354371 - in vendor/zstd/dist: . contrib/adaptive-compression contrib/gen_html contrib/pzstd contrib/seekable_format/examples doc doc/educational_decoder examples lib lib/common lib/com...
Conrad Meyer
cem at FreeBSD.org
Wed Nov 6 06:42:03 UTC 2019
Author: cem
Date: Wed Nov 6 06:42:00 2019
New Revision: 354371
URL: https://svnweb.freebsd.org/changeset/base/354371
Log:
Import Zstd 1.4.4
Added:
vendor/zstd/dist/lib/compress/zstd_cwksp.h (contents, props changed)
vendor/zstd/dist/tests/fuzz/dictionary_loader.c (contents, props changed)
vendor/zstd/dist/tests/fuzz/fuzz_data_producer.c (contents, props changed)
vendor/zstd/dist/tests/fuzz/fuzz_data_producer.h (contents, props changed)
vendor/zstd/dist/tests/golden-compression/
vendor/zstd/dist/tests/golden-compression/huffman-compressed-larger
- copied unchanged from r354370, vendor/zstd/dist/tests/files/huffman-compressed-larger
vendor/zstd/dist/tests/golden-decompression/
vendor/zstd/dist/tests/golden-decompression/rle-first-block.zst (contents, props changed)
Deleted:
vendor/zstd/dist/contrib/adaptive-compression/Makefile
vendor/zstd/dist/contrib/adaptive-compression/README.md
vendor/zstd/dist/contrib/adaptive-compression/adapt.c
vendor/zstd/dist/contrib/adaptive-compression/datagencli.c
vendor/zstd/dist/contrib/adaptive-compression/test-correctness.sh
vendor/zstd/dist/contrib/adaptive-compression/test-performance.sh
vendor/zstd/dist/contrib/gen_html/.gitignore
vendor/zstd/dist/contrib/pzstd/.gitignore
vendor/zstd/dist/contrib/seekable_format/examples/.gitignore
vendor/zstd/dist/examples/.gitignore
vendor/zstd/dist/lib/.gitignore
vendor/zstd/dist/programs/.gitignore
vendor/zstd/dist/tests/.gitignore
vendor/zstd/dist/tests/files/huffman-compressed-larger
vendor/zstd/dist/zlibWrapper/.gitignore
Modified:
vendor/zstd/dist/CHANGELOG
vendor/zstd/dist/Makefile
vendor/zstd/dist/README.md
vendor/zstd/dist/appveyor.yml
vendor/zstd/dist/doc/educational_decoder/Makefile
vendor/zstd/dist/doc/educational_decoder/harness.c
vendor/zstd/dist/doc/educational_decoder/zstd_decompress.c
vendor/zstd/dist/doc/educational_decoder/zstd_decompress.h
vendor/zstd/dist/doc/zstd_compression_format.md
vendor/zstd/dist/doc/zstd_manual.html
vendor/zstd/dist/examples/streaming_compression.c
vendor/zstd/dist/examples/streaming_decompression.c
vendor/zstd/dist/lib/Makefile
vendor/zstd/dist/lib/README.md
vendor/zstd/dist/lib/common/bitstream.h
vendor/zstd/dist/lib/common/compiler.h
vendor/zstd/dist/lib/common/fse.h
vendor/zstd/dist/lib/common/fse_decompress.c
vendor/zstd/dist/lib/common/mem.h
vendor/zstd/dist/lib/common/pool.c
vendor/zstd/dist/lib/common/threading.c
vendor/zstd/dist/lib/common/threading.h
vendor/zstd/dist/lib/common/xxhash.c
vendor/zstd/dist/lib/common/zstd_internal.h
vendor/zstd/dist/lib/compress/zstd_compress.c
vendor/zstd/dist/lib/compress/zstd_compress_internal.h
vendor/zstd/dist/lib/compress/zstd_compress_literals.c
vendor/zstd/dist/lib/compress/zstd_compress_literals.h
vendor/zstd/dist/lib/compress/zstd_compress_sequences.c
vendor/zstd/dist/lib/compress/zstd_compress_sequences.h
vendor/zstd/dist/lib/compress/zstd_double_fast.c
vendor/zstd/dist/lib/compress/zstd_fast.c
vendor/zstd/dist/lib/compress/zstd_lazy.c
vendor/zstd/dist/lib/compress/zstd_ldm.c
vendor/zstd/dist/lib/compress/zstd_opt.c
vendor/zstd/dist/lib/compress/zstdmt_compress.c
vendor/zstd/dist/lib/decompress/huf_decompress.c
vendor/zstd/dist/lib/decompress/zstd_decompress.c
vendor/zstd/dist/lib/decompress/zstd_decompress_block.c
vendor/zstd/dist/lib/deprecated/zbuff.h
vendor/zstd/dist/lib/dictBuilder/cover.c
vendor/zstd/dist/lib/dictBuilder/zdict.c
vendor/zstd/dist/lib/legacy/zstd_v01.c
vendor/zstd/dist/lib/legacy/zstd_v02.c
vendor/zstd/dist/lib/legacy/zstd_v03.c
vendor/zstd/dist/lib/legacy/zstd_v04.c
vendor/zstd/dist/lib/legacy/zstd_v05.c
vendor/zstd/dist/lib/legacy/zstd_v06.c
vendor/zstd/dist/lib/legacy/zstd_v07.c
vendor/zstd/dist/lib/libzstd.pc.in
vendor/zstd/dist/lib/zstd.h
vendor/zstd/dist/programs/README.md
vendor/zstd/dist/programs/benchzstd.c
vendor/zstd/dist/programs/benchzstd.h
vendor/zstd/dist/programs/datagen.c
vendor/zstd/dist/programs/dibio.c
vendor/zstd/dist/programs/fileio.c
vendor/zstd/dist/programs/fileio.h
vendor/zstd/dist/programs/platform.h
vendor/zstd/dist/programs/timefn.h
vendor/zstd/dist/programs/util.c
vendor/zstd/dist/programs/util.h
vendor/zstd/dist/programs/zstd.1
vendor/zstd/dist/programs/zstd.1.md
vendor/zstd/dist/programs/zstdcli.c
vendor/zstd/dist/programs/zstdgrep.1
vendor/zstd/dist/programs/zstdless.1
vendor/zstd/dist/tests/Makefile
vendor/zstd/dist/tests/decodecorpus.c
vendor/zstd/dist/tests/fullbench.c
vendor/zstd/dist/tests/fuzz/Makefile
vendor/zstd/dist/tests/fuzz/README.md
vendor/zstd/dist/tests/fuzz/block_decompress.c
vendor/zstd/dist/tests/fuzz/block_round_trip.c
vendor/zstd/dist/tests/fuzz/dictionary_decompress.c
vendor/zstd/dist/tests/fuzz/dictionary_round_trip.c
vendor/zstd/dist/tests/fuzz/fuzz.h
vendor/zstd/dist/tests/fuzz/fuzz.py
vendor/zstd/dist/tests/fuzz/fuzz_helpers.h
vendor/zstd/dist/tests/fuzz/regression_driver.c
vendor/zstd/dist/tests/fuzz/simple_compress.c
vendor/zstd/dist/tests/fuzz/simple_decompress.c
vendor/zstd/dist/tests/fuzz/simple_round_trip.c
vendor/zstd/dist/tests/fuzz/stream_decompress.c
vendor/zstd/dist/tests/fuzz/stream_round_trip.c
vendor/zstd/dist/tests/fuzz/zstd_frame_info.c
vendor/zstd/dist/tests/fuzz/zstd_helpers.c
vendor/zstd/dist/tests/fuzz/zstd_helpers.h
vendor/zstd/dist/tests/fuzzer.c
vendor/zstd/dist/tests/playTests.sh
vendor/zstd/dist/tests/poolTests.c
vendor/zstd/dist/tests/regression/method.c
vendor/zstd/dist/tests/regression/results.csv
vendor/zstd/dist/tests/zbufftest.c
vendor/zstd/dist/tests/zstreamtest.c
vendor/zstd/dist/zlibWrapper/Makefile
vendor/zstd/dist/zlibWrapper/examples/fitblk.c
vendor/zstd/dist/zlibWrapper/examples/zwrapbench.c
vendor/zstd/dist/zlibWrapper/gzclose.c
vendor/zstd/dist/zlibWrapper/gzlib.c
vendor/zstd/dist/zlibWrapper/gzread.c
vendor/zstd/dist/zlibWrapper/gzwrite.c
vendor/zstd/dist/zlibWrapper/zstd_zlibwrapper.c
Modified: vendor/zstd/dist/CHANGELOG
==============================================================================
--- vendor/zstd/dist/CHANGELOG Wed Nov 6 06:38:34 2019 (r354370)
+++ vendor/zstd/dist/CHANGELOG Wed Nov 6 06:42:00 2019 (r354371)
@@ -1,3 +1,34 @@
+v1.4.4
+perf: Improved decompression speed, by > 10%, by @terrelln
+perf: Better compression speed when re-using a context, by @felixhandte
+perf: Fix compression ratio when compressing large files with small dictionary, by @senhuang42
+perf: zstd reference encoder can generate RLE blocks, by @bimbashrestha
+perf: minor generic speed optimization, by @davidbolvansky
+api: new ability to extract sequences from the parser for analysis, by @bimbashrestha
+api: fixed decoding of magic-less frames, by @terrelln
+api: fixed ZSTD_initCStream_advanced() performance with fast modes, reported by @QrczakMK
+cli: Named pipes support, by @bimbashrestha
+cli: short tar's extension support, by @stokito
+cli: command --output-dir-flat= , generates target files into requested directory, by @senhuang42
+cli: commands --stream-size=# and --size-hint=#, by @nmagerko
+cli: command --exclude-compressed, by @shashank0791
+cli: faster `-t` test mode
+cli: improved some error messages, by @vangyzen
+cli: rare deadlock condition within dictionary builder, by @terrelln
+build: single-file decoder with emscripten compilation script, by @cwoffenden
+build: fixed zlibWrapper compilation on Visual Studio, reported by @bluenlive
+build: fixed deprecation warning for certain gcc version, reported by @jasonma163
+build: fix compilation on old gcc versions, by @cemeyer
+build: improved installation directories for cmake script, by Dmitri Shubin
+pack: modified pkgconfig, for better integration into openwrt, requested by @neheb
+misc: Improved documentation : ZSTD_CLEVEL, DYNAMIC_BMI2, ZSTD_CDict, function deprecation, zstd format
+misc: fixed educational decoder : accept larger literals section, and removed UNALIGNED() macro
+
+v1.4.3
+bug: Fix Dictionary Compression Ratio Regression by @cyan4973 (#1709)
+bug: Fix Buffer Overflow in legacy v0.3 decompression by @felixhandte (#1722)
+build: Add support for IAR C/C++ Compiler for Arm by @joseph0918 (#1705)
+
v1.4.2
bug: Fix bug in zstd-0.5 decoder by @terrelln (#1696)
bug: Fix seekable decompression in-memory API by @iburinoc (#1695)
Modified: vendor/zstd/dist/Makefile
==============================================================================
--- vendor/zstd/dist/Makefile Wed Nov 6 06:38:34 2019 (r354370)
+++ vendor/zstd/dist/Makefile Wed Nov 6 06:42:00 2019 (r354371)
@@ -69,6 +69,7 @@ test: MOREFLAGS += -g -DDEBUGLEVEL=$(DEBUGLEVEL) -Werr
test:
MOREFLAGS="$(MOREFLAGS)" $(MAKE) -j -C $(PRGDIR) allVariants
$(MAKE) -C $(TESTDIR) $@
+ ZSTD=../../programs/zstd $(MAKE) -C doc/educational_decoder test
## shortest: same as `make check`
.PHONY: shortest
@@ -99,8 +100,8 @@ man:
contrib: lib
$(MAKE) -C contrib/pzstd all
$(MAKE) -C contrib/seekable_format/examples all
- $(MAKE) -C contrib/adaptive-compression all
$(MAKE) -C contrib/largeNbDicts all
+ cd contrib/single_file_decoder/ ; ./build_test.sh
.PHONY: cleanTabs
cleanTabs:
@@ -116,7 +117,6 @@ clean:
@$(MAKE) -C contrib/gen_html $@ > $(VOID)
@$(MAKE) -C contrib/pzstd $@ > $(VOID)
@$(MAKE) -C contrib/seekable_format/examples $@ > $(VOID)
- @$(MAKE) -C contrib/adaptive-compression $@ > $(VOID)
@$(MAKE) -C contrib/largeNbDicts $@ > $(VOID)
@$(RM) zstd$(EXT) zstdmt$(EXT) tmp*
@$(RM) -r lz4
Modified: vendor/zstd/dist/README.md
==============================================================================
--- vendor/zstd/dist/README.md Wed Nov 6 06:38:34 2019 (r354370)
+++ vendor/zstd/dist/README.md Wed Nov 6 06:42:00 2019 (r354371)
@@ -15,6 +15,7 @@ a list of known ports and bindings is provided on [Zst
[![Build status][AppveyorDevBadge]][AppveyorLink]
[![Build status][CircleDevBadge]][CircleLink]
[![Build status][CirrusDevBadge]][CirrusLink]
+[![Fuzzing Status][OSSFuzzBadge]][OSSFuzzLink]
[travisDevBadge]: https://travis-ci.org/facebook/zstd.svg?branch=dev "Continuous Integration test suite"
[travisLink]: https://travis-ci.org/facebook/zstd
@@ -24,6 +25,8 @@ a list of known ports and bindings is provided on [Zst
[CircleLink]: https://circleci.com/gh/facebook/zstd
[CirrusDevBadge]: https://api.cirrus-ci.com/github/facebook/zstd.svg?branch=dev
[CirrusLink]: https://cirrus-ci.com/github/facebook/zstd
+[OSSFuzzBadge]: https://oss-fuzz-build-logs.storage.googleapis.com/badges/zstd.svg
+[OSSFuzzLink]: https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:zstd
## Benchmarks
Modified: vendor/zstd/dist/appveyor.yml
==============================================================================
--- vendor/zstd/dist/appveyor.yml Wed Nov 6 06:38:34 2019 (r354370)
+++ vendor/zstd/dist/appveyor.yml Wed Nov 6 06:42:00 2019 (r354371)
@@ -1,3 +1,7 @@
+# Following tests are run _only_ on master branch
+# To reproduce these tests, it's possible to push into a branch `appveyorTest`
+# or a branch `visual*`, they will intentionnally trigger `master` tests
+
-
version: 1.0.{build}
branches:
@@ -169,13 +173,16 @@
sh -e playTests.sh --test-large-data &&
fullbench.exe -i1 &&
fullbench.exe -i1 -P0 &&
- fuzzer_VS2008_%PLATFORM%_Release.exe %FUZZERTEST% &&
- fuzzer_VS2010_%PLATFORM%_Release.exe %FUZZERTEST% &&
fuzzer_VS2012_%PLATFORM%_Release.exe %FUZZERTEST% &&
fuzzer_VS2013_%PLATFORM%_Release.exe %FUZZERTEST% &&
fuzzer_VS2015_%PLATFORM%_Release.exe %FUZZERTEST%
)
+
+# The following tests are for regular pushes
+# into `dev` or some feature branch
+# There run less tests, for shorter feedback loop
+
-
version: 1.0.{build}
environment:
@@ -248,4 +255,12 @@
MD5sum build/VS2010/bin/%PLATFORM%_%CONFIGURATION%/*.exe &&
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\fuzzer.exe tests\fuzzer_VS2015_%PLATFORM%_%CONFIGURATION%.exe &&
COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe tests\
+ )
+
+
+ test_script:
+ - ECHO Testing %COMPILER% %PLATFORM% %CONFIGURATION%
+ - if [%HOST%]==[mingw] (
+ set "CC=%COMPILER%" &&
+ make check
)
Modified: vendor/zstd/dist/doc/educational_decoder/Makefile
==============================================================================
--- vendor/zstd/dist/doc/educational_decoder/Makefile Wed Nov 6 06:38:34 2019 (r354370)
+++ vendor/zstd/dist/doc/educational_decoder/Makefile Wed Nov 6 06:42:00 2019 (r354371)
@@ -1,15 +1,33 @@
+# ################################################################
+# Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under both the BSD-style license (found in the
+# LICENSE file in the root directory of this source tree) and the GPLv2 (found
+# in the COPYING file in the root directory of this source tree).
+# ################################################################
+
+ZSTD ?= zstd # note: requires zstd installation on local system
+
+UNAME?= $(shell uname)
+ifeq ($(UNAME), SunOS)
+DIFF ?= gdiff
+else
+DIFF ?= diff
+endif
+
HARNESS_FILES=*.c
MULTITHREAD_LDFLAGS = -pthread
DEBUGFLAGS= -g -DZSTD_DEBUG=1
CPPFLAGS += -I$(ZSTDDIR) -I$(ZSTDDIR)/common -I$(ZSTDDIR)/compress \
-I$(ZSTDDIR)/dictBuilder -I$(ZSTDDIR)/deprecated -I$(PRGDIR)
-CFLAGS ?= -O3
+CFLAGS ?= -O2
CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
- -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
- -Wstrict-prototypes -Wundef \
+ -Wstrict-aliasing=1 -Wswitch-enum \
+ -Wredundant-decls -Wstrict-prototypes -Wundef \
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
- -Wredundant-decls
+ -std=c99
CFLAGS += $(DEBUGFLAGS)
CFLAGS += $(MOREFLAGS)
FLAGS = $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $(MULTITHREAD_LDFLAGS)
@@ -18,17 +36,27 @@ harness: $(HARNESS_FILES)
$(CC) $(FLAGS) $^ -o $@
clean:
- @$(RM) -f harness
- @$(RM) -rf harness.dSYM
+ @$(RM) harness
+ @$(RM) -rf harness.dSYM # MacOS specific
test: harness
- @zstd README.md -o tmp.zst
+ #
+ # Testing single-file decompression with educational decoder
+ #
+ @$(ZSTD) -f README.md -o tmp.zst
@./harness tmp.zst tmp
- @diff -s tmp README.md
- @$(RM) -f tmp*
- @zstd --train harness.c zstd_decompress.c zstd_decompress.h README.md
- @zstd -D dictionary README.md -o tmp.zst
+ @$(DIFF) -s tmp README.md
+ @$(RM) tmp*
+ #
+ # Testing dictionary decompression with education decoder
+ #
+ # note : files are presented multiple for training, to reach minimum threshold
+ @$(ZSTD) --train harness.c zstd_decompress.c zstd_decompress.h README.md \
+ harness.c zstd_decompress.c zstd_decompress.h README.md \
+ harness.c zstd_decompress.c zstd_decompress.h README.md \
+ -o dictionary
+ @$(ZSTD) -f README.md -D dictionary -o tmp.zst
@./harness tmp.zst tmp dictionary
- @diff -s tmp README.md
- @$(RM) -f tmp* dictionary
- @make clean
+ @$(DIFF) -s tmp README.md
+ @$(RM) tmp* dictionary
+ @$(MAKE) clean
Modified: vendor/zstd/dist/doc/educational_decoder/harness.c
==============================================================================
--- vendor/zstd/dist/doc/educational_decoder/harness.c Wed Nov 6 06:38:34 2019 (r354370)
+++ vendor/zstd/dist/doc/educational_decoder/harness.c Wed Nov 6 06:42:00 2019 (r354371)
@@ -21,88 +21,90 @@ typedef unsigned char u8;
// Protect against allocating too much memory for output
#define MAX_OUTPUT_SIZE ((size_t)1024 * 1024 * 1024)
-u8 *input;
-u8 *output;
-u8 *dict;
-
-size_t read_file(const char *path, u8 **ptr) {
- FILE *f = fopen(path, "rb");
+static size_t read_file(const char *path, u8 **ptr)
+{
+ FILE* const f = fopen(path, "rb");
if (!f) {
- fprintf(stderr, "failed to open file %s\n", path);
+ fprintf(stderr, "failed to open file %s \n", path);
exit(1);
}
fseek(f, 0L, SEEK_END);
- size_t size = ftell(f);
+ size_t const size = (size_t)ftell(f);
rewind(f);
*ptr = malloc(size);
if (!ptr) {
- fprintf(stderr, "failed to allocate memory to hold %s\n", path);
+ fprintf(stderr, "failed to allocate memory to hold %s \n", path);
exit(1);
}
- size_t pos = 0;
- while (!feof(f)) {
- size_t read = fread(&(*ptr)[pos], 1, size, f);
- if (ferror(f)) {
- fprintf(stderr, "error while reading file %s\n", path);
- exit(1);
- }
- pos += read;
+ size_t const read = fread(*ptr, 1, size, f);
+ if (read != size) { /* must read everything in one pass */
+ fprintf(stderr, "error while reading file %s \n", path);
+ exit(1);
}
fclose(f);
- return pos;
+ return read;
}
-void write_file(const char *path, const u8 *ptr, size_t size) {
- FILE *f = fopen(path, "wb");
+static void write_file(const char *path, const u8 *ptr, size_t size)
+{
+ FILE* const f = fopen(path, "wb");
+ if (!f) {
+ fprintf(stderr, "failed to open file %s \n", path);
+ exit(1);
+ }
size_t written = 0;
while (written < size) {
- written += fwrite(&ptr[written], 1, size, f);
+ written += fwrite(ptr+written, 1, size, f);
if (ferror(f)) {
fprintf(stderr, "error while writing file %s\n", path);
exit(1);
- }
- }
+ } }
fclose(f);
}
-int main(int argc, char **argv) {
+int main(int argc, char **argv)
+{
if (argc < 3) {
- fprintf(stderr, "usage: %s <file.zst> <out_path> [dictionary]\n",
+ fprintf(stderr, "usage: %s <file.zst> <out_path> [dictionary] \n",
argv[0]);
return 1;
}
- size_t input_size = read_file(argv[1], &input);
+ u8* input;
+ size_t const input_size = read_file(argv[1], &input);
+
+ u8* dict = NULL;
size_t dict_size = 0;
if (argc >= 4) {
dict_size = read_file(argv[3], &dict);
}
- size_t decompressed_size = ZSTD_get_decompressed_size(input, input_size);
- if (decompressed_size == (size_t)-1) {
- decompressed_size = MAX_COMPRESSION_RATIO * input_size;
+ size_t out_capacity = ZSTD_get_decompressed_size(input, input_size);
+ if (out_capacity == (size_t)-1) {
+ out_capacity = MAX_COMPRESSION_RATIO * input_size;
fprintf(stderr, "WARNING: Compressed data does not contain "
"decompressed size, going to assume the compression "
"ratio is at most %d (decompressed size of at most "
- "%zu)\n",
- MAX_COMPRESSION_RATIO, decompressed_size);
+ "%u) \n",
+ MAX_COMPRESSION_RATIO, (unsigned)out_capacity);
}
- if (decompressed_size > MAX_OUTPUT_SIZE) {
+ if (out_capacity > MAX_OUTPUT_SIZE) {
fprintf(stderr,
- "Required output size too large for this implementation\n");
+ "Required output size too large for this implementation \n");
return 1;
}
- output = malloc(decompressed_size);
+
+ u8* const output = malloc(out_capacity);
if (!output) {
- fprintf(stderr, "failed to allocate memory\n");
+ fprintf(stderr, "failed to allocate memory \n");
return 1;
}
@@ -110,16 +112,17 @@ int main(int argc, char **argv) {
if (dict) {
parse_dictionary(parsed_dict, dict, dict_size);
}
- size_t decompressed =
- ZSTD_decompress_with_dict(output, decompressed_size,
- input, input_size, parsed_dict);
+ size_t const decompressed_size =
+ ZSTD_decompress_with_dict(output, out_capacity,
+ input, input_size,
+ parsed_dict);
free_dictionary(parsed_dict);
- write_file(argv[2], output, decompressed);
+ write_file(argv[2], output, decompressed_size);
free(input);
free(output);
free(dict);
- input = output = dict = NULL;
+ return 0;
}
Modified: vendor/zstd/dist/doc/educational_decoder/zstd_decompress.c
==============================================================================
--- vendor/zstd/dist/doc/educational_decoder/zstd_decompress.c Wed Nov 6 06:38:34 2019 (r354370)
+++ vendor/zstd/dist/doc/educational_decoder/zstd_decompress.c Wed Nov 6 06:42:00 2019 (r354371)
@@ -395,7 +395,7 @@ size_t ZSTD_decompress_with_dict(void *const dst, cons
/* this decoder assumes decompression of a single frame */
decode_frame(&out, &in, parsed_dict);
- return out.ptr - (u8 *)dst;
+ return (size_t)(out.ptr - (u8 *)dst);
}
/******* FRAME DECODING ******************************************************/
@@ -416,7 +416,7 @@ static void decompress_data(frame_context_t *const ctx
static void decode_frame(ostream_t *const out, istream_t *const in,
const dictionary_t *const dict) {
- const u32 magic_number = IO_read_bits(in, 32);
+ const u32 magic_number = (u32)IO_read_bits(in, 32);
// Zstandard frame
//
// "Magic_Number
@@ -497,7 +497,7 @@ static void parse_frame_header(frame_header_t *const h
// 3 Reserved_bit
// 2 Content_Checksum_flag
// 1-0 Dictionary_ID_flag"
- const u8 descriptor = IO_read_bits(in, 8);
+ const u8 descriptor = (u8)IO_read_bits(in, 8);
// decode frame header descriptor into flags
const u8 frame_content_size_flag = descriptor >> 6;
@@ -521,7 +521,7 @@ static void parse_frame_header(frame_header_t *const h
//
// Bit numbers 7-3 2-0
// Field name Exponent Mantissa"
- u8 window_descriptor = IO_read_bits(in, 8);
+ u8 window_descriptor = (u8)IO_read_bits(in, 8);
u8 exponent = window_descriptor >> 3;
u8 mantissa = window_descriptor & 7;
@@ -541,7 +541,7 @@ static void parse_frame_header(frame_header_t *const h
const int bytes_array[] = {0, 1, 2, 4};
const int bytes = bytes_array[dictionary_id_flag];
- header->dictionary_id = IO_read_bits(in, bytes * 8);
+ header->dictionary_id = (u32)IO_read_bits(in, bytes * 8);
} else {
header->dictionary_id = 0;
}
@@ -633,8 +633,8 @@ static void decompress_data(frame_context_t *const ctx
//
// The next 2 bits represent the Block_Type, while the remaining 21 bits
// represent the Block_Size. Format is little-endian."
- last_block = IO_read_bits(in, 1);
- const int block_type = IO_read_bits(in, 2);
+ last_block = (int)IO_read_bits(in, 1);
+ const int block_type = (int)IO_read_bits(in, 2);
const size_t block_len = IO_read_bits(in, 21);
switch (block_type) {
@@ -748,8 +748,8 @@ static size_t decode_literals(frame_context_t *const c
// types"
//
// size_format takes between 1 and 2 bits
- int block_type = IO_read_bits(in, 2);
- int size_format = IO_read_bits(in, 2);
+ int block_type = (int)IO_read_bits(in, 2);
+ int size_format = (int)IO_read_bits(in, 2);
if (block_type <= 1) {
// Raw or RLE literals block
@@ -833,6 +833,7 @@ static size_t decode_literals_compressed(frame_context
// bits (0-1023)."
num_streams = 1;
// Fall through as it has the same size format
+ /* fallthrough */
case 1:
// "4 streams. Both Compressed_Size and Regenerated_Size use 10 bits
// (0-1023)."
@@ -855,8 +856,7 @@ static size_t decode_literals_compressed(frame_context
// Impossible
IMPOSSIBLE();
}
- if (regenerated_size > MAX_LITERALS_SIZE ||
- compressed_size >= regenerated_size) {
+ if (regenerated_size > MAX_LITERALS_SIZE) {
CORRUPTION();
}
@@ -1005,7 +1005,7 @@ static const i16 SEQ_MATCH_LENGTH_DEFAULT_DIST[53] = {
static const u32 SEQ_LITERAL_LENGTH_BASELINES[36] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 18, 20, 22, 24, 28, 32, 40,
- 48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65538};
+ 48, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536};
static const u8 SEQ_LITERAL_LENGTH_EXTRA_BITS[36] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
@@ -1021,7 +1021,7 @@ static const u8 SEQ_MATCH_LENGTH_EXTRA_BITS[53] = {
2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
/// Offset decoding is simpler so we just need a maximum code value
-static const u8 SEQ_MAX_CODES[3] = {35, -1, 52};
+static const u8 SEQ_MAX_CODES[3] = {35, (u8)-1, 52};
static void decompress_sequences(frame_context_t *const ctx,
istream_t *const in,
@@ -1132,7 +1132,7 @@ static void decompress_sequences(frame_context_t *cons
// a single 1-bit and then fills the byte with 0-7 0 bits of padding."
const int padding = 8 - highest_set_bit(src[len - 1]);
// The offset starts at the end because FSE streams are read backwards
- i64 bit_offset = len * 8 - padding;
+ i64 bit_offset = (i64)(len * 8 - (size_t)padding);
// "The bitstream starts with initial state values, each using the required
// number of bits in their respective accuracy, decoded previously from
@@ -1409,7 +1409,7 @@ size_t ZSTD_get_decompressed_size(const void *src, con
// get decompressed size from ZSTD frame header
{
- const u32 magic_number = IO_read_bits(&in, 32);
+ const u32 magic_number = (u32)IO_read_bits(&in, 32);
if (magic_number == 0xFD2FB528U) {
// ZSTD frame
@@ -1418,7 +1418,7 @@ size_t ZSTD_get_decompressed_size(const void *src, con
if (header.frame_content_size == 0 && !header.single_segment_flag) {
// Content size not provided, we can't tell
- return -1;
+ return (size_t)-1;
}
return header.frame_content_size;
@@ -1529,7 +1529,7 @@ void free_dictionary(dictionary_t *const dict) {
/******* END DICTIONARY PARSING ***********************************************/
/******* IO STREAM OPERATIONS *************************************************/
-#define UNALIGNED() ERROR("Attempting to operate on a non-byte aligned stream")
+
/// Reads `num` bits from a bitstream, and updates the internal offset
static inline u64 IO_read_bits(istream_t *const in, const int num_bits) {
if (num_bits > 64 || num_bits <= 0) {
@@ -1608,7 +1608,7 @@ static inline const u8 *IO_get_read_ptr(istream_t *con
INP_SIZE();
}
if (in->bit_offset != 0) {
- UNALIGNED();
+ ERROR("Attempting to operate on a non-byte aligned stream");
}
const u8 *const ptr = in->ptr;
in->ptr += len;
@@ -1634,7 +1634,7 @@ static inline void IO_advance_input(istream_t *const i
INP_SIZE();
}
if (in->bit_offset != 0) {
- UNALIGNED();
+ ERROR("Attempting to operate on a non-byte aligned stream");
}
in->ptr += len;
Modified: vendor/zstd/dist/doc/educational_decoder/zstd_decompress.h
==============================================================================
--- vendor/zstd/dist/doc/educational_decoder/zstd_decompress.h Wed Nov 6 06:38:34 2019 (r354370)
+++ vendor/zstd/dist/doc/educational_decoder/zstd_decompress.h Wed Nov 6 06:42:00 2019 (r354371)
@@ -7,6 +7,8 @@
* in the COPYING file in the root directory of this source tree).
*/
+#include <stddef.h> /* size_t */
+
/******* EXPOSED TYPES ********************************************************/
/*
* Contains the parsed contents of a dictionary
@@ -39,7 +41,7 @@ size_t ZSTD_get_decompressed_size(const void *const sr
* Return a valid dictionary_t pointer for use with dictionary initialization
* or decompression
*/
-dictionary_t* create_dictionary();
+dictionary_t* create_dictionary(void);
/*
* Parse a provided dictionary blob for use in decompression
Modified: vendor/zstd/dist/doc/zstd_compression_format.md
==============================================================================
--- vendor/zstd/dist/doc/zstd_compression_format.md Wed Nov 6 06:38:34 2019 (r354370)
+++ vendor/zstd/dist/doc/zstd_compression_format.md Wed Nov 6 06:42:00 2019 (r354371)
@@ -16,7 +16,7 @@ Distribution of this document is unlimited.
### Version
-0.3.2 (17/07/19)
+0.3.4 (16/08/19)
Introduction
@@ -358,6 +358,7 @@ It may be followed by an optional `Content_Checksum`
__`Block_Type`__
The next 2 bits represent the `Block_Type`.
+`Block_Type` influences the meaning of `Block_Size`.
There are 4 block types :
| Value | 0 | 1 | 2 | 3 |
@@ -384,9 +385,12 @@ There are 4 block types :
__`Block_Size`__
The upper 21 bits of `Block_Header` represent the `Block_Size`.
-`Block_Size` is the size of the block excluding the header.
-A block can contain any number of bytes (even zero), up to
-`Block_Maximum_Decompressed_Size`, which is the smallest of:
+When `Block_Type` is `Compressed_Block` or `Raw_Block`,
+`Block_Size` is the size of `Block_Content`, hence excluding `Block_Header`.
+When `Block_Type` is `RLE_Block`, `Block_Content`’s size is always 1,
+and `Block_Size` represents the number of times this byte must be repeated.
+A block can contain and decompress into any number of bytes (even zero),
+up to `Block_Maximum_Decompressed_Size`, which is the smallest of:
- Window_Size
- 128 KB
@@ -1103,18 +1107,18 @@ It follows the following build rule :
The table has a size of `Table_Size = 1 << Accuracy_Log`.
Each cell describes the symbol decoded,
-and instructions to get the next state.
+and instructions to get the next state (`Number_of_Bits` and `Baseline`).
Symbols are scanned in their natural order for "less than 1" probabilities.
Symbols with this probability are being attributed a single cell,
starting from the end of the table and retreating.
These symbols define a full state reset, reading `Accuracy_Log` bits.
-All remaining symbols are allocated in their natural order.
-Starting from symbol `0` and table position `0`,
+Then, all remaining symbols, sorted in natural order, are allocated cells.
+Starting from symbol `0` (if it exists), and table position `0`,
each symbol gets allocated as many cells as its probability.
Cell allocation is spreaded, not linear :
-each successor position follow this rule :
+each successor position follows this rule :
```
position += (tableSize>>1) + (tableSize>>3) + 3;
@@ -1126,40 +1130,41 @@ A position is skipped if already occupied by a "less t
each position in the table, switching to the next symbol when enough
states have been allocated to the current one.
-The result is a list of state values.
-Each state will decode the current symbol.
+The process guarantees that the table is entirely filled.
+Each cell corresponds to a state value, which contains the symbol being decoded.
-To get the `Number_of_Bits` and `Baseline` required for next state,
-it's first necessary to sort all states in their natural order.
-The lower states will need 1 more bit than higher ones.
+To add the `Number_of_Bits` and `Baseline` required to retrieve next state,
+it's first necessary to sort all occurrences of each symbol in state order.
+Lower states will need 1 more bit than higher ones.
The process is repeated for each symbol.
__Example__ :
-Presuming a symbol has a probability of 5.
-It receives 5 state values. States are sorted in natural order.
+Presuming a symbol has a probability of 5,
+it receives 5 cells, corresponding to 5 state values.
+These state values are then sorted in natural order.
-Next power of 2 is 8.
-Space of probabilities is divided into 8 equal parts.
-Presuming the `Accuracy_Log` is 7, it defines 128 states.
+Next power of 2 after 5 is 8.
+Space of probabilities must be divided into 8 equal parts.
+Presuming the `Accuracy_Log` is 7, it defines a space of 128 states.
Divided by 8, each share is 16 large.
-In order to reach 8, 8-5=3 lowest states will count "double",
-doubling the number of shares (32 in width),
-requiring one more bit in the process.
+In order to reach 8 shares, 8-5=3 lowest states will count "double",
+doubling their shares (32 in width), hence requiring one more bit.
Baseline is assigned starting from the higher states using fewer bits,
-and proceeding naturally, then resuming at the first state,
-each takes its allocated width from Baseline.
+increasing at each state, then resuming at the first state,
+each state takes its allocated width from Baseline.
-| state order | 0 | 1 | 2 | 3 | 4 |
-| ---------------- | ----- | ----- | ------ | ---- | ----- |
-| width | 32 | 32 | 32 | 16 | 16 |
-| `Number_of_Bits` | 5 | 5 | 5 | 4 | 4 |
-| range number | 2 | 4 | 6 | 0 | 1 |
-| `Baseline` | 32 | 64 | 96 | 0 | 16 |
-| range | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 |
+| state value | 1 | 39 | 77 | 84 | 122 |
+| state order | 0 | 1 | 2 | 3 | 4 |
+| ---------------- | ----- | ----- | ------ | ---- | ------ |
+| width | 32 | 32 | 32 | 16 | 16 |
+| `Number_of_Bits` | 5 | 5 | 5 | 4 | 4 |
+| range number | 2 | 4 | 6 | 0 | 1 |
+| `Baseline` | 32 | 64 | 96 | 0 | 16 |
+| range | 32-63 | 64-95 | 96-127 | 0-15 | 16-31 |
-The next state is determined from current state
+During decoding, the next state value is determined from current state value,
by reading the required `Number_of_Bits`, and adding the specified `Baseline`.
See [Appendix A] for the results of this process applied to the default distributions.
@@ -1653,6 +1658,8 @@ or at least provide a meaningful error code explaining
Version changes
---------------
+- 0.3.4 : clarifications for FSE decoding table
+- 0.3.3 : clarifications for field Block_Size
- 0.3.2 : remove additional block size restriction on compressed blocks
- 0.3.1 : minor clarification regarding offset history update rules
- 0.3.0 : minor edits to match RFC8478
Modified: vendor/zstd/dist/doc/zstd_manual.html
==============================================================================
--- vendor/zstd/dist/doc/zstd_manual.html Wed Nov 6 06:38:34 2019 (r354370)
+++ vendor/zstd/dist/doc/zstd_manual.html Wed Nov 6 06:42:00 2019 (r354371)
@@ -1,10 +1,10 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>zstd 1.4.2 Manual</title>
+<title>zstd 1.4.4 Manual</title>
</head>
<body>
-<h1>zstd 1.4.2 Manual</h1>
+<h1>zstd 1.4.4 Manual</h1>
<hr>
<a name="Contents"></a><h2>Contents</h2>
<ol>
@@ -27,10 +27,16 @@
<li><a href="#Chapter17">Advanced compression functions</a></li>
<li><a href="#Chapter18">Advanced decompression functions</a></li>
<li><a href="#Chapter19">Advanced streaming functions</a></li>
-<li><a href="#Chapter20">Buffer-less and synchronous inner streaming functions</a></li>
-<li><a href="#Chapter21">Buffer-less streaming compression (synchronous mode)</a></li>
-<li><a href="#Chapter22">Buffer-less streaming decompression (synchronous mode)</a></li>
-<li><a href="#Chapter23">Block level API</a></li>
+<li><a href="#Chapter20">! ZSTD_initCStream_usingDict() :</a></li>
+<li><a href="#Chapter21">! ZSTD_initCStream_advanced() :</a></li>
+<li><a href="#Chapter22">! ZSTD_initCStream_usingCDict() :</a></li>
+<li><a href="#Chapter23">! ZSTD_initCStream_usingCDict_advanced() :</a></li>
+<li><a href="#Chapter24">This function is deprecated, and is equivalent to:</a></li>
+<li><a href="#Chapter25">This function is deprecated, and is equivalent to:</a></li>
+<li><a href="#Chapter26">Buffer-less and synchronous inner streaming functions</a></li>
+<li><a href="#Chapter27">Buffer-less streaming compression (synchronous mode)</a></li>
+<li><a href="#Chapter28">Buffer-less streaming decompression (synchronous mode)</a></li>
+<li><a href="#Chapter29">Block level API</a></li>
</ol>
<hr>
<a name="Chapter1"></a><h2>Introduction</h2><pre>
@@ -157,9 +163,13 @@ size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx);
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
int compressionLevel);
-</b><p> Same as ZSTD_compress(), using an explicit ZSTD_CCtx
- The function will compress at requested compression level,
- ignoring any other parameter
+</b><p> Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
+ Important : in order to behave similarly to `ZSTD_compress()`,
+ this function compresses at requested compression level,
+ __ignoring any other parameter__ .
+ If any advanced parameter was set using the advanced API,
+ they will all be reset. Only `compressionLevel` remains.
+
</p></pre><BR>
<h3>Decompression context</h3><pre> When decompressing many times,
@@ -199,18 +209,26 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
</b>/* compression parameters<b>
* Note: When compressing with a ZSTD_CDict these parameters are superseded
- * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict()
- * for more info (superseded-by-cdict). */
- ZSTD_c_compressionLevel=100, </b>/* Update all compression parameters according to pre-defined cLevel table<b>
+ * by the parameters used to construct the ZSTD_CDict.
+ * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */
+ ZSTD_c_compressionLevel=100, </b>/* Set compression parameters according to pre-defined cLevel table.<b>
+ * Note that exact compression parameters are dynamically determined,
+ * depending on both compression level and srcSize (when known).
* Default level is ZSTD_CLEVEL_DEFAULT==3.
* Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
* Note 1 : it's possible to pass a negative compression level.
- * Note 2 : setting a level sets all default values of other compression parameters */
+ * Note 2 : setting a level resets all other compression parameters to default */
+ </b>/* Advanced compression parameters :<b>
+ * It's possible to pin down compression parameters to some specific values.
+ * In which case, these values are no longer dynamically selected by the compressor */
ZSTD_c_windowLog=101, </b>/* Maximum allowed back-reference distance, expressed as power of 2.<b>
+ * This will set a memory budget for streaming decompression,
+ * with larger values requiring more memory
+ * and typically compressing more.
* Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX.
* Special: value 0 means "use default windowLog".
* Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT
- * requires explicitly allowing such window size at decompression stage if using streaming. */
+ * requires explicitly allowing such size at streaming decompression stage. */
ZSTD_c_hashLog=102, </b>/* Size of the initial probe table, as a power of 2.<b>
* Resulting memory usage is (1 << (hashLog+2)).
* Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
@@ -221,13 +239,13 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
* Resulting memory usage is (1 << (chainLog+2)).
* Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
* Larger tables result in better and slower compression.
- * This parameter is useless when using "fast" strategy.
+ * This parameter is useless for "fast" strategy.
* It's still useful when using "dfast" strategy,
* in which case it defines a secondary probe table.
* Special: value 0 means "use default chainLog". */
ZSTD_c_searchLog=104, </b>/* Number of search attempts, as a power of 2.<b>
* More attempts result in better and slower compression.
- * This parameter is useless when using "fast" and "dFast" strategies.
+ * This parameter is useless for "fast" and "dFast" strategies.
* Special: value 0 means "use default searchLog". */
ZSTD_c_minMatch=105, </b>/* Minimum size of searched matches.<b>
* Note that Zstandard can still find matches of smaller size,
@@ -282,7 +300,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
ZSTD_c_contentSizeFlag=200, </b>/* Content size will be written into frame header _whenever known_ (default:1)<b>
* Content size must be known at the beginning of compression.
* This is automatically the case when using ZSTD_compress2(),
- * For streaming variants, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
+ * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */
ZSTD_c_checksumFlag=201, </b>/* A 32-bits checksum of content is written at end of frame (default:0) */<b>
ZSTD_c_dictIDFlag=202, </b>/* When applicable, dictionary's ID is written into frame header (default:1) */<b>
@@ -301,7 +319,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
* Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
* 0 means default, which is dynamically determined based on compression parameters.
* Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
- * The minimum size is automatically and transparently enforced */
+ * The minimum size is automatically and transparently enforced. */
ZSTD_c_overlapLog=402, </b>/* Control the overlap size, as a fraction of window size.<b>
* The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
* It helps preserve compression ratio, while each job is compressed in parallel.
@@ -324,6 +342,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
* ZSTD_c_forceAttachDict
* ZSTD_c_literalCompressionMode
* ZSTD_c_targetCBlockSize
+ * ZSTD_c_srcSizeHint
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
* note : never ever use experimentalParam? names directly;
* also, the enums values themselves are unstable and can still change.
@@ -334,6 +353,7 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx);
ZSTD_c_experimentalParam4=1001,
ZSTD_c_experimentalParam5=1002,
ZSTD_c_experimentalParam6=1003,
+ ZSTD_c_experimentalParam7=1004
} ZSTD_cParameter;
</b></pre><BR>
<pre><b>typedef struct {
@@ -672,12 +692,17 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
<pre><b>ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
int compressionLevel);
-</b><p> When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once.
- ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost.
+</b><p> When compressing multiple messages or blocks using the same dictionary,
+ it's recommended to digest the dictionary only once, since it's a costly operation.
+ ZSTD_createCDict() will create a state from digesting a dictionary.
+ The resulting state can be used for future compression operations with very limited startup cost.
ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
- `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict.
- Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content.
- Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data.
+ @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict.
+ Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content.
+ Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer,
+ in which case the only thing that it transports is the @compressionLevel.
+ This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively,
+ expecting a ZSTD_CDict parameter with any data, including those without a known dictionary.
</p></pre><BR>
<pre><b>size_t ZSTD_freeCDict(ZSTD_CDict* CDict);
@@ -794,7 +819,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
It's a CPU consuming operation, with non-negligible impact on latency.
If there is a need to use the same prefix multiple times, consider loadDictionary instead.
- Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dm_rawContent).
+ Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent).
Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation.
</p></pre><BR>
@@ -838,7 +863,7 @@ size_t ZSTD_freeDStream(ZSTD_DStream* zds);
Note 2 : Prefix buffer is referenced. It **must** outlive decompression.
Prefix buffer must remain unmodified up to the end of frame,
reached when ZSTD_decompressStream() returns 0.
- Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
+ Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent).
Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section)
Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
A full dictionary is more costly, as it requires building tables.
@@ -865,6 +890,24 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
<BR></pre>
<pre><b>typedef struct {
+ unsigned int matchPos; </b>/* Match pos in dst */<b>
+ </b>/* If seqDef.offset > 3, then this is seqDef.offset - 3<b>
+ * If seqDef.offset < 3, then this is the corresponding repeat offset
+ * But if seqDef.offset < 3 and litLength == 0, this is the
+ * repeat offset before the corresponding repeat offset
+ * And if seqDef.offset == 3 and litLength == 0, this is the
+ * most recent repeat offset - 1
+ */
+ unsigned int offset;
+ unsigned int litLength; </b>/* Literal length */<b>
+ unsigned int matchLength; </b>/* Match length */<b>
+ </b>/* 0 when seq not rep and seqDef.offset otherwise<b>
+ * when litLength == 0 this will be <= 4, otherwise <= 3 like normal
+ */
+ unsigned int rep;
+} ZSTD_Sequence;
+</b></pre><BR>
+<pre><b>typedef struct {
unsigned windowLog; </b>/**< largest match distance : larger == more compression, more memory needed during decompression */<b>
unsigned chainLog; </b>/**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */<b>
unsigned hashLog; </b>/**< dispatch table : larger == faster, more memory */<b>
@@ -893,21 +936,12 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
</b></pre><BR>
<pre><b>typedef enum {
ZSTD_dlm_byCopy = 0, </b>/**< Copy dictionary content internally */<b>
- ZSTD_dlm_byRef = 1, </b>/**< Reference dictionary content -- the dictionary buffer must outlive its users. */<b>
+ ZSTD_dlm_byRef = 1 </b>/**< Reference dictionary content -- the dictionary buffer must outlive its users. */<b>
} ZSTD_dictLoadMethod_e;
</b></pre><BR>
<pre><b>typedef enum {
- </b>/* Opened question : should we have a format ZSTD_f_auto ?<b>
- * Today, it would mean exactly the same as ZSTD_f_zstd1.
- * But, in the future, should several formats become supported,
- * on the compression side, it would mean "default format".
- * On the decompression side, it would mean "automatic format detection",
- * so that ZSTD_f_zstd1 would mean "accept *only* zstd frames".
- * Since meaning is a little different, another option could be to define different enums for compression and decompression.
- * This question could be kept for later, when there are actually multiple formats to support,
- * but there is also the question of pinning enum values, and pinning value `0` is especially important */
ZSTD_f_zstd1 = 0, </b>/* zstd frame format, specified in zstd_compression_format.md (default) */<b>
- ZSTD_f_zstd1_magicless = 1, </b>/* Variant of zstd frame format, without initial 4-bytes magic number.<b>
+ ZSTD_f_zstd1_magicless = 1 </b>/* Variant of zstd frame format, without initial 4-bytes magic number.<b>
* Useful to save 4 bytes per generated frame.
* Decoder cannot recognise automatically this format, requiring this instruction. */
} ZSTD_format_e;
@@ -918,7 +952,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* to evolve and should be considered only in the context of extremely
* advanced performance tuning.
*
- * Zstd currently supports the use of a CDict in two ways:
+ * Zstd currently supports the use of a CDict in three ways:
*
* - The contents of the CDict can be copied into the working context. This
* means that the compression can search both the dictionary and input
@@ -934,6 +968,12 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* working context's tables can be reused). For small inputs, this can be
* faster than copying the CDict's tables.
*
+ * - The CDict's tables are not used at all, and instead we use the working
+ * context alone to reload the dictionary and use params based on the source
+ * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict().
+ * This method is effective when the dictionary sizes are very small relative
+ * to the input size, and the input size is fairly large to begin with.
+ *
* Zstd has a simple internal heuristic that selects which strategy to use
* at the beginning of a compression. However, if experimentation shows that
* Zstd is making poor choices, it is possible to override that choice with
@@ -942,6 +982,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
ZSTD_dictDefaultAttach = 0, </b>/* Use the default heuristic. */<b>
ZSTD_dictForceAttach = 1, </b>/* Never copy the dictionary. */<b>
ZSTD_dictForceCopy = 2, </b>/* Always copy the dictionary. */<b>
+ ZSTD_dictForceLoad = 3 </b>/* Always reload the dictionary */<b>
} ZSTD_dictAttachPref_e;
</b></pre><BR>
<pre><b>typedef enum {
@@ -950,7 +991,7 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
* levels will be compressed. */
ZSTD_lcm_huffman = 1, </b>/**< Always attempt Huffman compression. Uncompressed literals will still be<b>
* emitted if Huffman compression is not profitable. */
- ZSTD_lcm_uncompressed = 2, </b>/**< Always emit uncompressed literals. */<b>
+ ZSTD_lcm_uncompressed = 2 </b>/**< Always emit uncompressed literals. */<b>
} ZSTD_literalCompressionMode_e;
</b></pre><BR>
<a name="Chapter15"></a><h2>Frame size functions</h2><pre></pre>
@@ -999,20 +1040,38 @@ size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
or an error code (if srcSize is too small)
</p></pre><BR>
+<pre><b>size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+ size_t outSeqsSize, const void* src, size_t srcSize);
+</b><p> Extract sequences from the sequence store
+ zc can be used to insert custom compression params.
+ This function invokes ZSTD_compress2
+ @return : number of sequences extracted
+
+</p></pre><BR>
+
<a name="Chapter16"></a><h2>Memory management</h2><pre></pre>
<pre><b>size_t ZSTD_estimateCCtxSize(int compressionLevel);
size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
size_t ZSTD_estimateDCtxSize(void);
-</b><p> These functions make it possible to estimate memory usage
- of a future {D,C}Ctx, before its creation.
- ZSTD_estimateCCtxSize() will provide a budget large enough for any compression level up to selected one.
- It will also consider src size to be arbitrarily "large", which is worst case.
- If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
- ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
- ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
- Note : CCtx size estimation is only correct for single-threaded compression.
+</b><p> These functions make it possible to estimate memory usage of a future
+ {D,C}Ctx, before its creation.
+
+ ZSTD_estimateCCtxSize() will provide a budget large enough for any
+ compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(),
+ this estimate does not include space for a window buffer, so this estimate
+ is guaranteed to be enough for single-shot compressions, but not streaming
+ compressions. It will however assume the input may be arbitrarily large,
+ which is the worst case. If srcSize is known to always be small,
+ ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation.
+ ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with
+ ZSTD_getCParams() to create cParams from compressionLevel.
+ ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with
+ ZSTD_CCtxParams_setParameter().
+
+ Note: only single-threaded compression is supported. This function will
+ return an error code if ZSTD_c_nbWorkers is >= 1.
</p></pre><BR>
<pre><b>size_t ZSTD_estimateCStreamSize(int compressionLevel);
@@ -1085,7 +1144,8 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL,
</b><p> Create a digested dictionary for compression
Dictionary content is just referenced, not duplicated.
As a consequence, `dictBuffer` **must** outlive CDict,
- and its content must remain unmodified throughout the lifetime of CDict.
+ and its content must remain unmodified throughout the lifetime of CDict.
+ note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef
</p></pre><BR>
<pre><b>ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
@@ -1116,7 +1176,9 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL,
const void* src, size_t srcSize,
const void* dict,size_t dictSize,
ZSTD_parameters params);
-</b><p> Same as ZSTD_compress_usingDict(), with fine-tune control over compression parameters (by structure)
+</b><p> Note : this function is now DEPRECATED.
+ It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
+ This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x
</p></pre><BR>
<pre><b>size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
@@ -1124,7 +1186,9 @@ static ZSTD_customMem const ZSTD_defaultCMem = { NULL,
const void* src, size_t srcSize,
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-vendor
mailing list