svn commit: r342845 - head/usr.bin/gzip
Xin LI
delphij at FreeBSD.org
Mon Jan 7 08:27:13 UTC 2019
Author: delphij
Date: Mon Jan 7 08:27:11 2019
New Revision: 342845
URL: https://svnweb.freebsd.org/changeset/base/342845
Log:
Port NetBSD improvements:
- Add -l support for xz files
- Add lzip support to gzip based on the example lzip decoder.
Obtained from: NetBSD
MFC after: 2 weeks
Relnotes: yes
Added:
head/usr.bin/gzip/unlz.c (contents, props changed)
Modified:
head/usr.bin/gzip/gzip.1
head/usr.bin/gzip/gzip.c
head/usr.bin/gzip/unxz.c
Modified: head/usr.bin/gzip/gzip.1
==============================================================================
--- head/usr.bin/gzip/gzip.1 Mon Jan 7 07:12:51 2019 (r342844)
+++ head/usr.bin/gzip/gzip.1 Mon Jan 7 08:27:11 2019 (r342845)
@@ -1,4 +1,4 @@
-.\" $NetBSD: gzip.1,v 1.30 2017/10/22 17:36:49 abhinav Exp $
+.\" $NetBSD: gzip.1,v 1.31 2018/10/26 22:10:15 christos Exp $
.\"
.\" Copyright (c) 1997, 2003, 2004, 2008, 2009, 2015, 2017 Matthew R. Green
.\" All rights reserved.
@@ -25,7 +25,7 @@
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
-.Dd November 21, 2017
+.Dd January 7, 2018
.Dt GZIP 1
.Os
.Sh NAME
@@ -109,6 +109,7 @@ This version of
is also capable of decompressing files compressed using
.Xr compress 1 ,
.Xr bzip2 1 ,
+.Ar lzip ,
or
.Xr xz 1 .
.Sh OPTIONS
@@ -224,7 +225,7 @@ This implementation of
was ported based on the
.Nx
.Nm
-version 20170803,
+version 20181111,
and first appeared in
.Fx 7.0 .
.Sh AUTHORS
Modified: head/usr.bin/gzip/gzip.c
==============================================================================
--- head/usr.bin/gzip/gzip.c Mon Jan 7 07:12:51 2019 (r342844)
+++ head/usr.bin/gzip/gzip.c Mon Jan 7 08:27:11 2019 (r342845)
@@ -1,4 +1,4 @@
-/* $NetBSD: gzip.c,v 1.113 2018/06/12 00:42:17 kamil Exp $ */
+/* $NetBSD: gzip.c,v 1.116 2018/10/27 11:39:12 skrll Exp $ */
/*-
* SPDX-License-Identifier: BSD-2-Clause-NetBSD
@@ -84,6 +84,9 @@ enum filetype {
#ifndef NO_XZ_SUPPORT
FT_XZ,
#endif
+#ifndef NO_LZ_SUPPORT
+ FT_LZ,
+#endif
FT_LAST,
FT_UNKNOWN
};
@@ -110,6 +113,11 @@ enum filetype {
#define XZ_MAGIC "\3757zXZ"
#endif
+#ifndef NO_LZ_SUPPORT
+#define LZ_SUFFIX ".lz"
+#define LZ_MAGIC "LZIP"
+#endif
+
#define GZ_SUFFIX ".gz"
#define BUFLEN (64 * 1024)
@@ -155,6 +163,9 @@ static suffixes_t suffixes[] = {
#ifndef NO_XZ_SUPPORT
SUFFIX(XZ_SUFFIX, ""),
#endif
+#ifndef NO_LZ_SUPPORT
+ SUFFIX(LZ_SUFFIX, ""),
+#endif
SUFFIX(GZ_SUFFIX, ""), /* Overwritten by -S "" */
#endif /* SMALL */
#undef SUFFIX
@@ -162,7 +173,7 @@ static suffixes_t suffixes[] = {
#define NUM_SUFFIXES (nitems(suffixes))
#define SUFFIX_MAXLEN 30
-static const char gzip_version[] = "FreeBSD gzip 20171121";
+static const char gzip_version[] = "FreeBSD gzip 20190107";
#ifndef SMALL
static const char gzip_copyright[] = \
@@ -246,6 +257,7 @@ static void display_license(void);
static const suffixes_t *check_suffix(char *, int);
static ssize_t read_retry(int, void *, size_t);
static ssize_t write_retry(int, const void *, size_t);
+static void print_list_out(off_t, off_t, const char*);
#ifdef SMALL
#define infile_set(f,t) infile_set(f)
@@ -289,8 +301,13 @@ static off_t unpack(int, int, char *, size_t, off_t *)
#ifndef NO_XZ_SUPPORT
static off_t unxz(int, int, char *, size_t, off_t *);
+static off_t unxz_len(int);
#endif
+#ifndef NO_LZ_SUPPORT
+static off_t unlz(int, int, char *, size_t, off_t *);
+#endif
+
#ifdef SMALL
#define getopt_long(a,b,c,d,e) getopt(a,b,c)
#else
@@ -1159,6 +1176,11 @@ file_gettype(u_char *buf)
return FT_XZ;
else
#endif
+#ifndef NO_LZ_SUPPORT
+ if (memcmp(buf, LZ_MAGIC, 4) == 0)
+ return FT_LZ;
+ else
+#endif
return FT_UNKNOWN;
}
@@ -1632,14 +1654,23 @@ file_uncompress(char *file, char *outfile, size_t outs
#ifndef NO_XZ_SUPPORT
case FT_XZ:
if (lflag) {
- maybe_warnx("no -l with xz files");
- goto lose;
+ size = unxz_len(fd);
+ print_list_out(in_size, size, file);
+ return -1;
}
-
size = unxz(fd, zfd, NULL, 0, NULL);
break;
#endif
+#ifndef NO_LZ_SUPPORT
+ case FT_LZ:
+ if (lflag) {
+ maybe_warnx("no -l with lzip files");
+ goto lose;
+ }
+ size = unlz(fd, zfd, NULL, 0, NULL);
+ break;
+#endif
#ifndef SMALL
case FT_UNKNOWN:
if (lflag) {
@@ -1872,6 +1903,12 @@ handle_stdin(void)
(char *)header1, sizeof header1, &gsize);
break;
#endif
+#ifndef NO_LZ_SUPPORT
+ case FT_LZ:
+ usize = unlz(STDIN_FILENO, STDOUT_FILENO,
+ (char *)header1, sizeof header1, &gsize);
+ break;
+#endif
}
#ifndef SMALL
@@ -2197,6 +2234,12 @@ print_list(int fd, off_t out, const char *outfile, tim
#else
(void)&ts; /* XXX */
#endif
+ print_list_out(out, in, outfile);
+}
+
+static void
+print_list_out(off_t out, off_t in, const char *outfile)
+{
printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in);
print_ratio(in, out, stdout);
printf(" %s\n", outfile);
@@ -2270,6 +2313,9 @@ display_version(void)
#endif
#ifndef NO_XZ_SUPPORT
#include "unxz.c"
+#endif
+#ifndef NO_LZ_SUPPORT
+#include "unlz.c"
#endif
static ssize_t
Added: head/usr.bin/gzip/unlz.c
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/usr.bin/gzip/unlz.c Mon Jan 7 08:27:11 2019 (r342845)
@@ -0,0 +1,646 @@
+/* $NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $ */
+
+/*-
+ * Copyright (c) 2018 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Christos Zoulas.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* Lzd - Educational decompressor for the lzip format
+ Copyright (C) 2013-2018 Antonio Diaz Diaz.
+
+ This program is free software. Redistribution and use in source and
+ binary forms, with or without modification, are permitted provided
+ that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+*/
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <unistd.h>
+
+#define LZ_STATES 12
+
+#define LITERAL_CONTEXT_BITS 3
+#define POS_STATE_BITS 2
+#define POS_STATES (1 << POS_STATE_BITS)
+#define POS_STATE_MASK (POS_STATES - 1)
+
+#define STATES 4
+#define DIS_SLOT_BITS 6
+
+#define DIS_MODEL_START 4
+#define DIS_MODEL_END 14
+
+#define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2))
+#define DIS_ALIGN_BITS 4
+#define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS)
+
+#define LOW_BITS 3
+#define MID_BITS 3
+#define HIGH_BITS 8
+
+#define LOW_SYMBOLS (1 << LOW_BITS)
+#define MID_SYMBOLS (1 << MID_BITS)
+#define HIGH_SYMBOLS (1 << HIGH_BITS)
+
+#define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
+
+#define MIN_MATCH_LEN 2
+
+#define BIT_MODEL_MOVE_BITS 5
+#define BIT_MODEL_TOTAL_BITS 11
+#define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS)
+#define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2)
+
+static const int lz_st_next[] = {
+ 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
+};
+
+static bool
+lz_st_is_char(int st) {
+ return st < 7;
+}
+
+static int
+lz_st_get_char(int st) {
+ return lz_st_next[st];
+}
+
+static int
+lz_st_get_match(int st) {
+ return st < 7 ? 7 : 10;
+}
+
+static int
+lz_st_get_rep(int st) {
+ return st < 7 ? 8 : 11;
+}
+
+static int
+lz_st_get_short_rep(int st) {
+ return st < 7 ? 9 : 11;
+}
+
+struct lz_len_model {
+ int choice1;
+ int choice2;
+ int bm_low[POS_STATES][LOW_SYMBOLS];
+ int bm_mid[POS_STATES][MID_SYMBOLS];
+ int bm_high[HIGH_SYMBOLS];
+};
+
+static uint32_t lz_crc[256];
+
+static void
+lz_crc_init(void)
+{
+ for (unsigned i = 0; i < nitems(lz_crc); i++) {
+ unsigned c = i;
+ for (unsigned j = 0; j < 8; j++) {
+ if (c & 1)
+ c = 0xEDB88320U ^ (c >> 1);
+ else
+ c >>= 1;
+ }
+ lz_crc[i] = c;
+ }
+}
+
+static void
+lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
+{
+ for (size_t i = 0; i < len; i++)
+ *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
+}
+
+struct lz_range_decoder {
+ FILE *fp;
+ uint32_t code;
+ uint32_t range;
+};
+
+static int
+lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
+{
+ rd->fp = fp;
+ rd->code = 0;
+ rd->range = ~0;
+ for (int i = 0; i < 5; i++)
+ rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
+ return ferror(rd->fp) ? -1 : 0;
+}
+
+static unsigned
+lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
+{
+ unsigned symbol = 0;
+
+ for (int i = num_bits; i > 0; i--) {
+ rd->range >>= 1;
+ symbol <<= 1;
+ if (rd->code >= rd->range) {
+ rd->code -= rd->range;
+ symbol |= 1;
+ }
+ if (rd->range <= 0x00FFFFFFU) {
+ rd->range <<= 8;
+ rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
+ }
+ }
+
+ return symbol;
+}
+
+static unsigned
+lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
+{
+ unsigned symbol;
+ const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
+
+ if(rd->code < bound) {
+ rd->range = bound;
+ *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
+ symbol = 0;
+ }
+ else {
+ rd->range -= bound;
+ rd->code -= bound;
+ *bm -= *bm >> BIT_MODEL_MOVE_BITS;
+ symbol = 1;
+ }
+
+ if (rd->range <= 0x00FFFFFFU) {
+ rd->range <<= 8;
+ rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
+ }
+ return symbol;
+}
+
+static unsigned
+lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
+{
+ unsigned symbol = 1;
+
+ for (int i = 0; i < num_bits; i++)
+ symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
+
+ return symbol - (1 << num_bits);
+}
+
+static unsigned
+lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
+{
+ unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
+ unsigned reversed_symbol = 0;
+
+ for (int i = 0; i < num_bits; i++) {
+ reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
+ symbol >>= 1;
+ }
+
+ return reversed_symbol;
+}
+
+static unsigned
+lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
+{
+ unsigned symbol = 1;
+
+ for (int i = 7; i >= 0; i--) {
+ const unsigned match_bit = (match_byte >> i) & 1;
+ const unsigned bit = lz_rd_decode_bit(rd,
+ &bm[symbol + (match_bit << 8) + 0x100]);
+ symbol = (symbol << 1) | bit;
+ if (match_bit != bit) {
+ while (symbol < 0x100) {
+ symbol = (symbol << 1) |
+ lz_rd_decode_bit(rd, &bm[symbol]);
+ }
+ break;
+ }
+ }
+ return symbol & 0xFF;
+}
+
+static unsigned
+lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
+ int pos_state)
+{
+ if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
+ return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
+
+ if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
+ return LOW_SYMBOLS +
+ lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
+ }
+
+ return LOW_SYMBOLS + MID_SYMBOLS +
+ lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
+}
+
+struct lz_decoder {
+ FILE *fin, *fout;
+ off_t pos, ppos, spos, dict_size;
+ bool wrapped;
+ uint32_t crc;
+ uint8_t *obuf;
+ struct lz_range_decoder rdec;
+};
+
+static int
+lz_flush(struct lz_decoder *lz)
+{
+ off_t offs = lz->pos - lz->spos;
+ if (offs <= 0)
+ return -1;
+
+ size_t size = (size_t)offs;
+ lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
+ if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
+ return -1;
+
+ lz->wrapped = lz->pos >= lz->dict_size;
+ if (lz->wrapped) {
+ lz->ppos += lz->pos;
+ lz->pos = 0;
+ }
+ lz->spos = lz->pos;
+ return 0;
+}
+
+static void
+lz_destroy(struct lz_decoder *lz)
+{
+ if (lz->fin)
+ fclose(lz->fin);
+ if (lz->fout)
+ fclose(lz->fout);
+ free(lz->obuf);
+}
+
+static int
+lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
+{
+ memset(lz, 0, sizeof(*lz));
+
+ lz->fin = fdopen(dup(fin), "r");
+ if (lz->fin == NULL)
+ goto out;
+
+ lz->fout = fdopen(dup(fdout), "w");
+ if (lz->fout == NULL)
+ goto out;
+
+ lz->pos = lz->ppos = lz->spos = 0;
+ lz->crc = ~0;
+ lz->dict_size = dict_size;
+ lz->wrapped = false;
+
+ lz->obuf = malloc(dict_size);
+ if (lz->obuf == NULL)
+ goto out;
+
+ if (lz_rd_create(&lz->rdec, lz->fin) == -1)
+ goto out;
+ return 0;
+out:
+ lz_destroy(lz);
+ return -1;
+}
+
+static uint8_t
+lz_peek(const struct lz_decoder *lz, unsigned ahead)
+{
+ off_t diff = lz->pos - ahead - 1;
+
+ if (diff >= 0)
+ return lz->obuf[diff];
+
+ if (lz->wrapped)
+ return lz->obuf[lz->dict_size + diff];
+
+ return 0;
+}
+
+static void
+lz_put(struct lz_decoder *lz, uint8_t b)
+{
+ lz->obuf[lz->pos++] = b;
+ if (lz->dict_size == lz->pos)
+ lz_flush(lz);
+}
+
+static off_t
+lz_get_data_position(const struct lz_decoder *lz)
+{
+ return lz->ppos + lz->pos;
+}
+
+static unsigned
+lz_get_crc(const struct lz_decoder *lz)
+{
+ return lz->crc ^ 0xffffffffU;
+}
+
+static void
+lz_bm_init(int *a, size_t l)
+{
+ for (size_t i = 0; i < l; i++)
+ a[i] = BIT_MODEL_INIT;
+}
+
+#define LZ_BM_INIT(a) lz_bm_init(a, nitems(a))
+#define LZ_BM_INIT2(a) do { \
+ size_t l = nitems(a[0]); \
+ for (size_t i = 0; i < nitems(a); i++) \
+ lz_bm_init(a[i], l); \
+} while (/*CONSTCOND*/0)
+
+#define LZ_MODEL_INIT(a) do { \
+ a.choice1 = BIT_MODEL_INIT; \
+ a.choice2 = BIT_MODEL_INIT; \
+ LZ_BM_INIT2(a.bm_low); \
+ LZ_BM_INIT2(a.bm_mid); \
+ LZ_BM_INIT(a.bm_high); \
+} while (/*CONSTCOND*/0)
+
+static bool
+lz_decode_member(struct lz_decoder *lz)
+{
+ int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
+ int bm_match[LZ_STATES][POS_STATES];
+ int bm_rep[4][LZ_STATES];
+ int bm_len[LZ_STATES][POS_STATES];
+ int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
+ int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
+ int bm_align[DIS_ALIGN_SIZE];
+
+ LZ_BM_INIT2(bm_literal);
+ LZ_BM_INIT2(bm_match);
+ LZ_BM_INIT2(bm_rep);
+ LZ_BM_INIT2(bm_len);
+ LZ_BM_INIT2(bm_dis_slot);
+ LZ_BM_INIT(bm_dis);
+ LZ_BM_INIT(bm_align);
+
+ struct lz_len_model match_len_model;
+ struct lz_len_model rep_len_model;
+
+ LZ_MODEL_INIT(match_len_model);
+ LZ_MODEL_INIT(rep_len_model);
+
+ struct lz_range_decoder *rd = &lz->rdec;
+ unsigned rep[4] = { 0 };
+
+
+ int state = 0;
+
+ while (!feof(lz->fin) && !ferror(lz->fin)) {
+ const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
+ // bit 1
+ if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
+ const uint8_t prev_byte = lz_peek(lz, 0);
+ const int literal_state =
+ prev_byte >> (8 - LITERAL_CONTEXT_BITS);
+ int *bm = bm_literal[literal_state];
+ if (lz_st_is_char(state))
+ lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
+ else {
+ int peek = lz_peek(lz, rep[0]);
+ lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
+ }
+ state = lz_st_get_char(state);
+ continue;
+ }
+ int len;
+ // bit 2
+ if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
+ // bit 3
+ if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
+ // bit 4
+ if (lz_rd_decode_bit(rd,
+ &bm_len[state][pos_state]) == 0)
+ {
+ state = lz_st_get_short_rep(state);
+ lz_put(lz, lz_peek(lz, rep[0]));
+ continue;
+ }
+ } else {
+ unsigned distance;
+ // bit 4
+ if (lz_rd_decode_bit(rd, &bm_rep[2][state])
+ == 0)
+ distance = rep[1];
+ else {
+ // bit 5
+ if (lz_rd_decode_bit(rd,
+ &bm_rep[3][state]) == 0)
+ distance = rep[2];
+ else {
+ distance = rep[3];
+ rep[3] = rep[2];
+ }
+ rep[2] = rep[1];
+ }
+ rep[1] = rep[0];
+ rep[0] = distance;
+ }
+ state = lz_st_get_rep(state);
+ len = MIN_MATCH_LEN +
+ lz_rd_decode_len(rd, &rep_len_model, pos_state);
+ } else {
+ rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
+ len = MIN_MATCH_LEN +
+ lz_rd_decode_len(rd, &match_len_model, pos_state);
+ const int len_state =
+ MIN(len - MIN_MATCH_LEN, STATES - 1);
+ rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
+ DIS_SLOT_BITS);
+ if (rep[0] >= DIS_MODEL_START) {
+ const unsigned dis_slot = rep[0];
+ const int direct_bits = (dis_slot >> 1) - 1;
+ rep[0] = (2 | (dis_slot & 1)) << direct_bits;
+ if (dis_slot < DIS_MODEL_END)
+ rep[0] += lz_rd_decode_tree_reversed(rd,
+ &bm_dis[rep[0] - dis_slot],
+ direct_bits);
+ else {
+ rep[0] += lz_rd_decode(rd, direct_bits
+ - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
+ rep[0] += lz_rd_decode_tree_reversed(rd,
+ bm_align, DIS_ALIGN_BITS);
+ if (rep[0] == 0xFFFFFFFFU) {
+ lz_flush(lz);
+ return len == MIN_MATCH_LEN;
+ }
+ }
+ }
+ state = lz_st_get_match(state);
+ if (rep[0] >= lz->dict_size ||
+ (rep[0] >= lz->pos && !lz->wrapped)) {
+ lz_flush(lz);
+ return false;
+ }
+ }
+ for (int i = 0; i < len; i++)
+ lz_put(lz, lz_peek(lz, rep[0]));
+ }
+ lz_flush(lz);
+ return false;
+}
+
+/*
+ * 0-3 CRC32 of the uncompressed data
+ * 4-11 size of the uncompressed data
+ * 12-19 member size including header and trailer
+ */
+#define TRAILER_SIZE 20
+
+
+static off_t
+lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
+{
+ struct lz_decoder lz;
+ off_t rv = -1;
+
+ if (lz_create(&lz, fin, fdout, dict_size) == -1)
+ return -1;
+
+ if (!lz_decode_member(&lz))
+ goto out;
+
+ uint8_t trailer[TRAILER_SIZE];
+
+ for(size_t i = 0; i < nitems(trailer); i++)
+ trailer[i] = (uint8_t)getc(lz.fin);
+
+ unsigned crc = 0;
+ for (int i = 3; i >= 0; --i) {
+ crc <<= 8;
+ crc += trailer[i];
+ }
+
+ int64_t data_size = 0;
+ for (int i = 11; i >= 4; --i) {
+ data_size <<= 8;
+ data_size += trailer[i];
+ }
+
+ if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
+ goto out;
+
+ rv = 0;
+ for (int i = 19; i >= 12; --i) {
+ rv <<= 8;
+ rv += trailer[i];
+ }
+ if (insize)
+ *insize = rv;
+#if 0
+ /* Does not work with pipes */
+ rv = ftello(lz.fout);
+#else
+ rv = data_size;
+#endif
+out:
+ lz_destroy(&lz);
+ return rv;
+}
+
+
+/*
+ * 0-3 magic
+ * 4 version
+ * 5 coded dict_size
+ */
+#define HDR_SIZE 6
+#define MIN_DICTIONARY_SIZE (1 << 12)
+#define MAX_DICTIONARY_SIZE (1 << 29)
+
+static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
+
+static unsigned
+lz_get_dict_size(unsigned char c)
+{
+ unsigned dict_size = 1 << (c & 0x1f);
+ dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
+ if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
+ return 0;
+ return dict_size;
+}
+
+static off_t
+unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
+{
+ if (lz_crc[0] == 0)
+ lz_crc_init();
+
+ char header[HDR_SIZE];
+
+ if (prelen > sizeof(header))
+ return -1;
+ if (pre && prelen)
+ memcpy(header, pre, prelen);
+
+ ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
+ switch (nr) {
+ case -1:
+ return -1;
+ case 0:
+ return prelen ? -1 : 0;
+ default:
+ if ((size_t)nr != sizeof(header) - prelen)
+ return -1;
+ break;
+ }
+
+ if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
+ return -1;
+
+ unsigned dict_size = lz_get_dict_size(header[5]);
+ if (dict_size == 0)
+ return -1;
+
+ return lz_decode(fin, fout, dict_size, bytes_in);
+}
Modified: head/usr.bin/gzip/unxz.c
==============================================================================
--- head/usr.bin/gzip/unxz.c Mon Jan 7 07:12:51 2019 (r342844)
+++ head/usr.bin/gzip/unxz.c Mon Jan 7 08:27:11 2019 (r342845)
@@ -1,4 +1,4 @@
-/* $NetBSD: unxz.c,v 1.7 2017/08/04 07:27:08 mrg Exp $ */
+/* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */
/*-
* SPDX-License-Identifier: BSD-2-Clause-NetBSD
@@ -156,3 +156,322 @@ unxz(int i, int o, char *pre, size_t prelen, off_t *by
}
}
}
+
+#include <stdbool.h>
+
+/*
+ * Copied various bits and pieces from xz support code or brute force
+ * replacements.
+ */
+
+#define my_min(A,B) ((A)<(B)?(A):(B))
+
+// Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
+// We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
+#if BUFSIZ <= 1024
+# define IO_BUFFER_SIZE 8192
+#else
+# define IO_BUFFER_SIZE (BUFSIZ & ~7U)
+#endif
+
+/// is_sparse() accesses the buffer as uint64_t for maximum speed.
+/// Use an union to make sure that the buffer is properly aligned.
+typedef union {
+ uint8_t u8[IO_BUFFER_SIZE];
+ uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
+ uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
+} io_buf;
+
+
+static bool
+io_pread(int fd, io_buf *buf, size_t size, off_t pos)
+{
+ // Using lseek() and read() is more portable than pread() and
+ // for us it is as good as real pread().
+ if (lseek(fd, pos, SEEK_SET) != pos) {
+ return true;
+ }
+
+ const size_t amount = read(fd, buf, size);
+ if (amount == SIZE_MAX)
+ return true;
+
+ if (amount != size) {
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Most of the following is copied (mostly verbatim) from the xz
+ * distribution, from file src/xz/list.c
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+//
+/// \file list.c
+/// \brief Listing information about .xz files
+//
+// Author: Lasse Collin
+//
+// This file has been put into the public domain.
+// You can do whatever you want with this file.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+/// Information about a .xz file
+typedef struct {
+ /// Combined Index of all Streams in the file
+ lzma_index *idx;
+
+ /// Total amount of Stream Padding
+ uint64_t stream_padding;
+
+ /// Highest memory usage so far
+ uint64_t memusage_max;
+
+ /// True if all Blocks so far have Compressed Size and
+ /// Uncompressed Size fields
+ bool all_have_sizes;
+
+ /// Oldest XZ Utils version that will decompress the file
+ uint32_t min_version;
+
+} xz_file_info;
+
+#define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
+
+
+/// \brief Parse the Index(es) from the given .xz file
+///
+/// \param xfi Pointer to structure where the decoded information
+/// is stored.
+/// \param pair Input file
+///
+/// \return On success, false is returned. On error, true is returned.
+///
+// TODO: This function is pretty big. liblzma should have a function that
+// takes a callback function to parse the Index(es) from a .xz file to make
+// it easy for applications.
+static bool
+parse_indexes(xz_file_info *xfi, int src_fd)
+{
+ struct stat st;
+
+ fstat(src_fd, &st);
+ if (st.st_size <= 0) {
+ return true;
+ }
+
+ if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
+ return true;
+ }
+
+ io_buf buf;
+ lzma_stream_flags header_flags;
+ lzma_stream_flags footer_flags;
+ lzma_ret ret;
+
+ // lzma_stream for the Index decoder
+ lzma_stream strm = LZMA_STREAM_INIT;
+
+ // All Indexes decoded so far
+ lzma_index *combined_index = NULL;
+
+ // The Index currently being decoded
+ lzma_index *this_index = NULL;
+
+ // Current position in the file. We parse the file backwards so
+ // initialize it to point to the end of the file.
+ off_t pos = st.st_size;
+
+ // Each loop iteration decodes one Index.
+ do {
+ // Check that there is enough data left to contain at least
+ // the Stream Header and Stream Footer. This check cannot
+ // fail in the first pass of this loop.
+ if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
+ goto error;
+ }
+
+ pos -= LZMA_STREAM_HEADER_SIZE;
+ lzma_vli stream_padding = 0;
+
+ // Locate the Stream Footer. There may be Stream Padding which
+ // we must skip when reading backwards.
+ while (true) {
+ if (pos < LZMA_STREAM_HEADER_SIZE) {
+ goto error;
+ }
+
+ if (io_pread(src_fd, &buf,
+ LZMA_STREAM_HEADER_SIZE, pos))
+ goto error;
+
+ // Stream Padding is always a multiple of four bytes.
+ int i = 2;
+ if (buf.u32[i] != 0)
+ break;
+
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list