From 73b16af8feec390afbabd9356d6e5e83c0390838 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 15 May 2015 10:20:47 +0200 Subject: busybox: imported from http://www.busybox.net/downloads/busybox-1.13.3.tar.bz2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bjørn Mork --- archival/Config.in | 292 +++ archival/Kbuild | 23 + archival/ar.c | 87 + archival/bbunzip.c | 384 ++++ archival/bbunzip_test.sh | 61 + archival/bbunzip_test2.sh | 10 + archival/bbunzip_test3.sh | 23 + archival/bz/LICENSE | 44 + archival/bz/README | 90 + archival/bz/blocksort.c | 1072 ++++++++++ archival/bz/bzlib.c | 429 ++++ archival/bz/bzlib.h | 65 + archival/bz/bzlib_private.h | 219 ++ archival/bz/compress.c | 687 +++++++ archival/bz/huffman.c | 229 +++ archival/bzip2.c | 185 ++ archival/cpio.c | 354 ++++ archival/dpkg.c | 1781 +++++++++++++++++ archival/dpkg_deb.c | 104 + archival/gzip.c | 2102 ++++++++++++++++++++ archival/libunarchive/Kbuild | 51 + archival/libunarchive/data_align.c | 15 + archival/libunarchive/data_extract_all.c | 148 ++ archival/libunarchive/data_extract_to_buffer.c | 17 + archival/libunarchive/data_extract_to_stdout.c | 14 + archival/libunarchive/data_skip.c | 12 + archival/libunarchive/decompress_bunzip2.c | 724 +++++++ archival/libunarchive/decompress_uncompress.c | 307 +++ archival/libunarchive/decompress_unlzma.c | 503 +++++ archival/libunarchive/decompress_unzip.c | 1253 ++++++++++++ archival/libunarchive/filter_accept_all.c | 17 + archival/libunarchive/filter_accept_list.c | 19 + .../libunarchive/filter_accept_list_reassign.c | 51 + archival/libunarchive/filter_accept_reject_list.c | 36 + archival/libunarchive/find_list_entry.c | 54 + archival/libunarchive/get_header_ar.c | 127 ++ archival/libunarchive/get_header_cpio.c | 182 ++ archival/libunarchive/get_header_tar.c | 378 ++++ archival/libunarchive/get_header_tar_bz2.c | 21 + archival/libunarchive/get_header_tar_gz.c | 36 + archival/libunarchive/get_header_tar_lzma.c | 24 + archival/libunarchive/header_list.c | 11 + archival/libunarchive/header_skip.c | 10 + archival/libunarchive/header_verbose_list.c | 58 + archival/libunarchive/init_handle.c | 22 + archival/libunarchive/open_transformer.c | 64 + archival/libunarchive/seek_by_jump.c | 19 + archival/libunarchive/seek_by_read.c | 16 + archival/libunarchive/unpack_ar_archive.c | 21 + archival/rpm.c | 407 ++++ archival/rpm2cpio.c | 89 + archival/tar.c | 988 +++++++++ archival/unzip.c | 565 ++++++ archival/unzip_doc.txt.bz2 | Bin 0 -> 11359 bytes 54 files changed, 14500 insertions(+) create mode 100644 archival/Config.in create mode 100644 archival/Kbuild create mode 100644 archival/ar.c create mode 100644 archival/bbunzip.c create mode 100644 archival/bbunzip_test.sh create mode 100644 archival/bbunzip_test2.sh create mode 100644 archival/bbunzip_test3.sh create mode 100644 archival/bz/LICENSE create mode 100644 archival/bz/README create mode 100644 archival/bz/blocksort.c create mode 100644 archival/bz/bzlib.c create mode 100644 archival/bz/bzlib.h create mode 100644 archival/bz/bzlib_private.h create mode 100644 archival/bz/compress.c create mode 100644 archival/bz/huffman.c create mode 100644 archival/bzip2.c create mode 100644 archival/cpio.c create mode 100644 archival/dpkg.c create mode 100644 archival/dpkg_deb.c create mode 100644 archival/gzip.c create mode 100644 archival/libunarchive/Kbuild create mode 100644 archival/libunarchive/data_align.c create mode 100644 archival/libunarchive/data_extract_all.c create mode 100644 archival/libunarchive/data_extract_to_buffer.c create mode 100644 archival/libunarchive/data_extract_to_stdout.c create mode 100644 archival/libunarchive/data_skip.c create mode 100644 archival/libunarchive/decompress_bunzip2.c create mode 100644 archival/libunarchive/decompress_uncompress.c create mode 100644 archival/libunarchive/decompress_unlzma.c create mode 100644 archival/libunarchive/decompress_unzip.c create mode 100644 archival/libunarchive/filter_accept_all.c create mode 100644 archival/libunarchive/filter_accept_list.c create mode 100644 archival/libunarchive/filter_accept_list_reassign.c create mode 100644 archival/libunarchive/filter_accept_reject_list.c create mode 100644 archival/libunarchive/find_list_entry.c create mode 100644 archival/libunarchive/get_header_ar.c create mode 100644 archival/libunarchive/get_header_cpio.c create mode 100644 archival/libunarchive/get_header_tar.c create mode 100644 archival/libunarchive/get_header_tar_bz2.c create mode 100644 archival/libunarchive/get_header_tar_gz.c create mode 100644 archival/libunarchive/get_header_tar_lzma.c create mode 100644 archival/libunarchive/header_list.c create mode 100644 archival/libunarchive/header_skip.c create mode 100644 archival/libunarchive/header_verbose_list.c create mode 100644 archival/libunarchive/init_handle.c create mode 100644 archival/libunarchive/open_transformer.c create mode 100644 archival/libunarchive/seek_by_jump.c create mode 100644 archival/libunarchive/seek_by_read.c create mode 100644 archival/libunarchive/unpack_ar_archive.c create mode 100644 archival/rpm.c create mode 100644 archival/rpm2cpio.c create mode 100644 archival/tar.c create mode 100644 archival/unzip.c create mode 100644 archival/unzip_doc.txt.bz2 (limited to 'archival') diff --git a/archival/Config.in b/archival/Config.in new file mode 100644 index 0000000..0b5cf37 --- /dev/null +++ b/archival/Config.in @@ -0,0 +1,292 @@ +# +# For a description of the syntax of this configuration file, +# see scripts/kbuild/config-language.txt. +# + +menu "Archival Utilities" + +config FEATURE_SEAMLESS_LZMA + bool "Make tar, rpm, modprobe etc understand .lzma data" + default n + help + Make tar, rpm, modprobe etc understand .lzma data. + +config FEATURE_SEAMLESS_BZ2 + bool "Make tar, rpm, modprobe etc understand .bz2 data" + default n + help + Make tar, rpm, modprobe etc understand .bz2 data. + +config FEATURE_SEAMLESS_GZ + bool "Make tar, rpm, modprobe etc understand .gz data" + default n + help + Make tar, rpm, modprobe etc understand .gz data. + +config FEATURE_SEAMLESS_Z + bool "Make tar and gunzip understand .Z data" + default n + help + Make tar and gunzip understand .Z data. + +config AR + bool "ar" + default n + help + ar is an archival utility program used to create, modify, and + extract contents from archives. An archive is a single file holding + a collection of other files in a structure that makes it possible to + retrieve the original individual files (called archive members). + The original files' contents, mode (permissions), timestamp, owner, + and group are preserved in the archive, and can be restored on + extraction. + + The stored filename is limited to 15 characters. (for more information + see long filename support). + ar has 60 bytes of overheads for every stored file. + + This implementation of ar can extract archives, it cannot create or + modify them. + On an x86 system, the ar applet adds about 1K. + + Unless you have a specific application which requires ar, you should + probably say N here. + +config FEATURE_AR_LONG_FILENAMES + bool "Support for long filenames (not need for debs)" + default n + depends on AR + help + By default the ar format can only store the first 15 characters of + the filename, this option removes that limitation. + It supports the GNU ar long filename method which moves multiple long + filenames into a the data section of a new ar entry. + +config BUNZIP2 + bool "bunzip2" + default n + help + bunzip2 is a compression utility using the Burrows-Wheeler block + sorting text compression algorithm, and Huffman coding. Compression + is generally considerably better than that achieved by more + conventional LZ77/LZ78-based compressors, and approaches the + performance of the PPM family of statistical compressors. + + Unless you have a specific application which requires bunzip2, you + should probably say N here. + +config BZIP2 + bool "bzip2" + default n + help + bzip2 is a compression utility using the Burrows-Wheeler block + sorting text compression algorithm, and Huffman coding. Compression + is generally considerably better than that achieved by more + conventional LZ77/LZ78-based compressors, and approaches the + performance of the PPM family of statistical compressors. + + Unless you have a specific application which requires bzip2, you + should probably say N here. + +config CPIO + bool "cpio" + default n + help + cpio is an archival utility program used to create, modify, and + extract contents from archives. + cpio has 110 bytes of overheads for every stored file. + + This implementation of cpio can extract cpio archives created in the + "newc" or "crc" format, it cannot create or modify them. + + Unless you have a specific application which requires cpio, you + should probably say N here. + +config FEATURE_CPIO_O + bool "Support for archive creation" + default n + depends on CPIO + help + This implementation of cpio can create cpio archives in the "newc" + format only. + +config DPKG + bool "dpkg" + default n + select FEATURE_SEAMLESS_GZ + help + dpkg is a medium-level tool to install, build, remove and manage + Debian packages. + + This implementation of dpkg has a number of limitations, + you should use the official dpkg if possible. + +config DPKG_DEB + bool "dpkg_deb" + default n + select FEATURE_SEAMLESS_GZ + help + dpkg-deb unpacks and provides information about Debian archives. + + This implementation of dpkg-deb cannot pack archives. + + Unless you have a specific application which requires dpkg-deb, + say N here. + +config FEATURE_DPKG_DEB_EXTRACT_ONLY + bool "Extract only (-x)" + default n + depends on DPKG_DEB + help + This reduces dpkg-deb to the equivalent of + "ar -p data.tar.gz | tar -zx". However it saves space as none + of the extra dpkg-deb, ar or tar options are needed, they are linked + to internally. + +config GUNZIP + bool "gunzip" + default n + help + gunzip is used to decompress archives created by gzip. + You can use the `-t' option to test the integrity of + an archive, without decompressing it. + +config GZIP + bool "gzip" + default n + help + gzip is used to compress files. + It's probably the most widely used UNIX compression program. + +config RPM2CPIO + bool "rpm2cpio" + default n + help + Converts an RPM file into a CPIO archive. + +config RPM + bool "rpm" + default n + help + Mini RPM applet - queries and extracts RPM packages. + +config TAR + bool "tar" + default n + help + tar is an archiving program. It's commonly used with gzip to + create compressed archives. It's probably the most widely used + UNIX archive program. + +if TAR + +config FEATURE_TAR_CREATE + bool "Enable archive creation" + default y + depends on TAR + help + If you enable this option you'll be able to create + tar archives using the `-c' option. + +config FEATURE_TAR_AUTODETECT + bool "Autodetect gz/bz2 compressed tarballs" + default n + depends on FEATURE_SEAMLESS_Z || FEATURE_SEAMLESS_GZ || FEATURE_SEAMLESS_BZ2 || FEATURE_SEAMLESS_LZMA + help + With this option tar can automatically detect gzip/bzip2 compressed + tarballs. Currently it works only on files (not pipes etc). + +config FEATURE_TAR_FROM + bool "Enable -X (exclude from) and -T (include from) options)" + default n + depends on TAR + help + If you enable this option you'll be able to specify + a list of files to include or exclude from an archive. + +config FEATURE_TAR_OLDGNU_COMPATIBILITY + bool "Support for old tar header format" + default N + depends on TAR + help + This option is required to unpack archives created in + the old GNU format; help to kill this old format by + repacking your ancient archives with the new format. + +config FEATURE_TAR_OLDSUN_COMPATIBILITY + bool "Enable untarring of tarballs with checksums produced by buggy Sun tar" + default N + depends on TAR + help + This option is required to unpack archives created by some old + version of Sun's tar (it was calculating checksum using signed + arithmetic). It is said to be fixed in newer Sun tar, but "old" + tarballs still exist. + +config FEATURE_TAR_GNU_EXTENSIONS + bool "Support for GNU tar extensions (long filenames)" + default y + depends on TAR + help + With this option busybox supports GNU long filenames and + linknames. + +config FEATURE_TAR_LONG_OPTIONS + bool "Enable long options" + default n + depends on TAR && GETOPT_LONG + help + Enable use of long options, increases size by about 400 Bytes + +config FEATURE_TAR_UNAME_GNAME + bool "Enable use of user and group names" + default n + depends on TAR + help + Enables use of user and group names in tar. This affects contents + listings (-t) and preserving permissions when unpacking (-p). + +200 bytes. + +endif #tar + +config UNCOMPRESS + bool "uncompress" + default n + help + uncompress is used to decompress archives created by compress. + Not much used anymore, replaced by gzip/gunzip. + +config UNLZMA + bool "unlzma" + default n + help + unlzma is a compression utility using the Lempel-Ziv-Markov chain + compression algorithm, and range coding. Compression + is generally considerably better than that achieved by the bzip2 + compressors. + + The BusyBox unlzma applet is limited to de-compression only. + On an x86 system, this applet adds about 4K. + + Unless you have a specific application which requires unlzma, you + should probably say N here. + +config FEATURE_LZMA_FAST + bool "Optimize unlzma for speed" + default n + depends on UNLZMA + help + This option reduces decompression time by about 33% at the cost of + a 2K bigger binary. + +config UNZIP + bool "unzip" + default n + help + unzip will list or extract files from a ZIP archive, + commonly found on DOS/WIN systems. The default behavior + (with no options) is to extract the archive into the + current directory. Use the `-d' option to extract to a + directory of your choice. + +endmenu diff --git a/archival/Kbuild b/archival/Kbuild new file mode 100644 index 0000000..72dbdda --- /dev/null +++ b/archival/Kbuild @@ -0,0 +1,23 @@ +# Makefile for busybox +# +# Copyright (C) 1999-2005 by Erik Andersen +# +# Licensed under the GPL v2, see the file LICENSE in this tarball. + +libs-y += libunarchive/ + +lib-y:= +lib-$(CONFIG_AR) += ar.o +lib-$(CONFIG_BUNZIP2) += bbunzip.o +lib-$(CONFIG_BZIP2) += bzip2.o bbunzip.o +lib-$(CONFIG_UNLZMA) += bbunzip.o +lib-$(CONFIG_CPIO) += cpio.o +lib-$(CONFIG_DPKG) += dpkg.o +lib-$(CONFIG_DPKG_DEB) += dpkg_deb.o +lib-$(CONFIG_GUNZIP) += bbunzip.o +lib-$(CONFIG_GZIP) += gzip.o bbunzip.o +lib-$(CONFIG_RPM2CPIO) += rpm2cpio.o +lib-$(CONFIG_RPM) += rpm.o +lib-$(CONFIG_TAR) += tar.o +lib-$(CONFIG_UNCOMPRESS) += bbunzip.o +lib-$(CONFIG_UNZIP) += unzip.o diff --git a/archival/ar.c b/archival/ar.c new file mode 100644 index 0000000..dbff677 --- /dev/null +++ b/archival/ar.c @@ -0,0 +1,87 @@ +/* vi: set sw=4 ts=4: */ +/* + * Mini ar implementation for busybox + * + * Copyright (C) 2000 by Glenn McGrath + * + * Based in part on BusyBox tar, Debian dpkg-deb and GNU ar. + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + * + * There is no single standard to adhere to so ar may not portable + * between different systems + * http://www.unix-systems.org/single_unix_specification_v2/xcu/ar.html + */ + +#include "libbb.h" +#include "unarchive.h" + +static void FAST_FUNC header_verbose_list_ar(const file_header_t *file_header) +{ + const char *mode = bb_mode_string(file_header->mode); + char *mtime; + + mtime = ctime(&file_header->mtime); + mtime[16] = ' '; + memmove(&mtime[17], &mtime[20], 4); + mtime[21] = '\0'; + printf("%s %d/%d%7d %s %s\n", &mode[1], file_header->uid, file_header->gid, + (int) file_header->size, &mtime[4], file_header->name); +} + +#define AR_CTX_PRINT 0x01 +#define AR_CTX_LIST 0x02 +#define AR_CTX_EXTRACT 0x04 +#define AR_OPT_PRESERVE_DATE 0x08 +#define AR_OPT_VERBOSE 0x10 +#define AR_OPT_CREATE 0x20 +#define AR_OPT_INSERT 0x40 + +int ar_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int ar_main(int argc, char **argv) +{ + static const char msg_unsupported_err[] ALIGN1 = + "archive %s is not supported"; + + archive_handle_t *archive_handle; + unsigned opt; + + archive_handle = init_handle(); + + /* Prepend '-' to the first argument if required */ + opt_complementary = "--:p:t:x:-1:p--tx:t--px:x--pt"; + opt = getopt32(argv, "ptxovcr"); + + if (opt & AR_CTX_PRINT) { + archive_handle->action_data = data_extract_to_stdout; + } + if (opt & AR_CTX_LIST) { + archive_handle->action_header = header_list; + } + if (opt & AR_CTX_EXTRACT) { + archive_handle->action_data = data_extract_all; + } + if (opt & AR_OPT_PRESERVE_DATE) { + archive_handle->ah_flags |= ARCHIVE_PRESERVE_DATE; + } + if (opt & AR_OPT_VERBOSE) { + archive_handle->action_header = header_verbose_list_ar; + } + if (opt & AR_OPT_CREATE) { + bb_error_msg_and_die(msg_unsupported_err, "creation"); + } + if (opt & AR_OPT_INSERT) { + bb_error_msg_and_die(msg_unsupported_err, "insertion"); + } + + archive_handle->src_fd = xopen(argv[optind++], O_RDONLY); + + while (optind < argc) { + archive_handle->filter = filter_accept_list; + llist_add_to(&(archive_handle->accept), argv[optind++]); + } + + unpack_ar_archive(archive_handle); + + return EXIT_SUCCESS; +} diff --git a/archival/bbunzip.c b/archival/bbunzip.c new file mode 100644 index 0000000..75489f2 --- /dev/null +++ b/archival/bbunzip.c @@ -0,0 +1,384 @@ +/* vi: set sw=4 ts=4: */ +/* + * Common code for gunzip-like applets + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +enum { + OPT_STDOUT = 0x1, + OPT_FORCE = 0x2, +/* gunzip and bunzip2 only: */ + OPT_VERBOSE = 0x4, + OPT_DECOMPRESS = 0x8, + OPT_TEST = 0x10, +}; + +static +int open_to_or_warn(int to_fd, const char *filename, int flags, int mode) +{ + int fd = open3_or_warn(filename, flags, mode); + if (fd < 0) { + return 1; + } + xmove_fd(fd, to_fd); + return 0; +} + +int FAST_FUNC bbunpack(char **argv, + char* (*make_new_name)(char *filename), + USE_DESKTOP(long long) int (*unpacker)(unpack_info_t *info) +) +{ + struct stat stat_buf; + USE_DESKTOP(long long) int status; + char *filename, *new_name; + smallint exitcode = 0; + unpack_info_t info; + + do { + /* NB: new_name is *maybe* malloc'ed! */ + new_name = NULL; + filename = *argv; /* can be NULL - 'streaming' bunzip2 */ + + if (filename && LONE_DASH(filename)) + filename = NULL; + + /* Open src */ + if (filename) { + if (stat(filename, &stat_buf) != 0) { + bb_simple_perror_msg(filename); + err: + exitcode = 1; + goto free_name; + } + if (open_to_or_warn(STDIN_FILENO, filename, O_RDONLY, 0)) + goto err; + } + + /* Special cases: test, stdout */ + if (option_mask32 & (OPT_STDOUT|OPT_TEST)) { + if (option_mask32 & OPT_TEST) + if (open_to_or_warn(STDOUT_FILENO, bb_dev_null, O_WRONLY, 0)) + goto err; + filename = NULL; + } + + /* Open dst if we are going to unpack to file */ + if (filename) { + new_name = make_new_name(filename); + if (!new_name) { + bb_error_msg("%s: unknown suffix - ignored", filename); + goto err; + } + + /* -f: overwrite existing output files */ + if (option_mask32 & OPT_FORCE) { + unlink(new_name); + } + + /* O_EXCL: "real" bunzip2 doesn't overwrite files */ + /* GNU gunzip does not bail out, but goes to next file */ + if (open_to_or_warn(STDOUT_FILENO, new_name, O_WRONLY | O_CREAT | O_EXCL, + stat_buf.st_mode)) + goto err; + } + + /* Check that the input is sane */ + if (isatty(STDIN_FILENO) && (option_mask32 & OPT_FORCE) == 0) { + bb_error_msg_and_die("compressed data not read from terminal, " + "use -f to force it"); + } + + /* memset(&info, 0, sizeof(info)); */ + info.mtime = 0; /* so far it has one member only */ + status = unpacker(&info); + if (status < 0) + exitcode = 1; + + if (filename) { + char *del = new_name; + if (status >= 0) { + /* TODO: restore other things? */ + if (info.mtime) { + struct utimbuf times; + + times.actime = info.mtime; + times.modtime = info.mtime; + /* Close first. + * On some systems calling utime + * then closing resets the mtime. */ + close(STDOUT_FILENO); + /* Ignoring errors */ + utime(new_name, ×); + } + + /* Delete _compressed_ file */ + del = filename; + /* restore extension (unless tgz -> tar case) */ + if (new_name == filename) + filename[strlen(filename)] = '.'; + } + xunlink(del); + +#if 0 /* Currently buggy - wrong name: "a.gz: 261% - replaced with a.gz" */ + /* Extreme bloat for gunzip compat */ + if (ENABLE_DESKTOP && (option_mask32 & OPT_VERBOSE) && status >= 0) { + fprintf(stderr, "%s: %u%% - replaced with %s\n", + filename, (unsigned)(stat_buf.st_size*100 / (status+1)), new_name); + } +#endif + + free_name: + if (new_name != filename) + free(new_name); + } + } while (*argv && *++argv); + + return exitcode; +} + +#if ENABLE_BUNZIP2 || ENABLE_UNLZMA || ENABLE_UNCOMPRESS + +static +char* make_new_name_generic(char *filename, const char *expected_ext) +{ + char *extension = strrchr(filename, '.'); + if (!extension || strcmp(extension + 1, expected_ext) != 0) { + /* Mimic GNU gunzip - "real" bunzip2 tries to */ + /* unpack file anyway, to file.out */ + return NULL; + } + *extension = '\0'; + return filename; +} + +#endif + + +/* + * Modified for busybox by Glenn McGrath + * Added support output to stdout by Thomas Lundquist + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#if ENABLE_BUNZIP2 + +static +char* make_new_name_bunzip2(char *filename) +{ + return make_new_name_generic(filename, "bz2"); +} + +static +USE_DESKTOP(long long) int unpack_bunzip2(unpack_info_t *info UNUSED_PARAM) +{ + return unpack_bz2_stream_prime(STDIN_FILENO, STDOUT_FILENO); +} + +int bunzip2_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int bunzip2_main(int argc UNUSED_PARAM, char **argv) +{ + getopt32(argv, "cfvdt"); + argv += optind; + if (applet_name[2] == 'c') + option_mask32 |= OPT_STDOUT; + + return bbunpack(argv, make_new_name_bunzip2, unpack_bunzip2); +} + +#endif + + +/* + * Gzip implementation for busybox + * + * Based on GNU gzip v1.2.4 Copyright (C) 1992-1993 Jean-loup Gailly. + * + * Originally adjusted for busybox by Sven Rudolph + * based on gzip sources + * + * Adjusted further by Erik Andersen to support files as + * well as stdin/stdout, and to generally behave itself wrt command line + * handling. + * + * General cleanup to better adhere to the style guide and make use of standard + * busybox functions by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + * + * gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface + * Copyright (C) 1992-1993 Jean-loup Gailly + * The unzip code was written and put in the public domain by Mark Adler. + * Portions of the lzw code are derived from the public domain 'compress' + * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies, + * Ken Turkowski, Dave Mack and Peter Jannesen. + * + * See the license_msg below and the file COPYING for the software license. + * See the file algorithm.doc for the compression algorithms and file formats. + */ + +#if ENABLE_GUNZIP + +static +char* make_new_name_gunzip(char *filename) +{ + char *extension = strrchr(filename, '.'); + + if (!extension) + return NULL; + + extension++; + if (strcmp(extension, "tgz" + 1) == 0 +#if ENABLE_FEATURE_SEAMLESS_Z + || (extension[0] == 'Z' && extension[1] == '\0') +#endif + ) { + extension[-1] = '\0'; + } else if (strcmp(extension, "tgz") == 0) { + filename = xstrdup(filename); + extension = strrchr(filename, '.'); + extension[2] = 'a'; + extension[3] = 'r'; + } else { + return NULL; + } + return filename; +} + +static +USE_DESKTOP(long long) int unpack_gunzip(unpack_info_t *info) +{ + USE_DESKTOP(long long) int status = -1; + + /* do the decompression, and cleanup */ + if (xread_char(STDIN_FILENO) == 0x1f) { + unsigned char magic2; + + magic2 = xread_char(STDIN_FILENO); + if (ENABLE_FEATURE_SEAMLESS_Z && magic2 == 0x9d) { + status = unpack_Z_stream(STDIN_FILENO, STDOUT_FILENO); + } else if (magic2 == 0x8b) { + status = unpack_gz_stream_with_info(STDIN_FILENO, STDOUT_FILENO, info); + } else { + goto bad_magic; + } + if (status < 0) { + bb_error_msg("error inflating"); + } + } else { + bad_magic: + bb_error_msg("invalid magic"); + /* status is still == -1 */ + } + return status; +} + +/* + * Linux kernel build uses gzip -d -n. We accept and ignore it. + * Man page says: + * -n --no-name + * gzip: do not save the original file name and time stamp. + * (The original name is always saved if the name had to be truncated.) + * gunzip: do not restore the original file name/time even if present + * (remove only the gzip suffix from the compressed file name). + * This option is the default when decompressing. + * -N --name + * gzip: always save the original file name and time stamp (this is the default) + * gunzip: restore the original file name and time stamp if present. + */ + +int gunzip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int gunzip_main(int argc UNUSED_PARAM, char **argv) +{ + getopt32(argv, "cfvdtn"); + argv += optind; + /* if called as zcat */ + if (applet_name[1] == 'c') + option_mask32 |= OPT_STDOUT; + + return bbunpack(argv, make_new_name_gunzip, unpack_gunzip); +} + +#endif + + +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs + * + * Based on bunzip.c from busybox + * + * Licensed under GPL v2, see file LICENSE in this tarball for details. + */ + +#if ENABLE_UNLZMA + +static +char* make_new_name_unlzma(char *filename) +{ + return make_new_name_generic(filename, "lzma"); +} + +static +USE_DESKTOP(long long) int unpack_unlzma(unpack_info_t *info UNUSED_PARAM) +{ + return unpack_lzma_stream(STDIN_FILENO, STDOUT_FILENO); +} + +int unlzma_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int unlzma_main(int argc UNUSED_PARAM, char **argv) +{ + getopt32(argv, "cf"); + argv += optind; + /* lzmacat? */ + if (applet_name[4] == 'c') + option_mask32 |= OPT_STDOUT; + + return bbunpack(argv, make_new_name_unlzma, unpack_unlzma); +} + +#endif + + +/* + * Uncompress applet for busybox (c) 2002 Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#if ENABLE_UNCOMPRESS + +static +char* make_new_name_uncompress(char *filename) +{ + return make_new_name_generic(filename, "Z"); +} + +static +USE_DESKTOP(long long) int unpack_uncompress(unpack_info_t *info UNUSED_PARAM) +{ + USE_DESKTOP(long long) int status = -1; + + if ((xread_char(STDIN_FILENO) != 0x1f) || (xread_char(STDIN_FILENO) != 0x9d)) { + bb_error_msg("invalid magic"); + } else { + status = unpack_Z_stream(STDIN_FILENO, STDOUT_FILENO); + } + return status; +} + +int uncompress_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int uncompress_main(int argc UNUSED_PARAM, char **argv) +{ + getopt32(argv, "cf"); + argv += optind; + + return bbunpack(argv, make_new_name_uncompress, unpack_uncompress); +} + +#endif diff --git a/archival/bbunzip_test.sh b/archival/bbunzip_test.sh new file mode 100644 index 0000000..b8e31bf --- /dev/null +++ b/archival/bbunzip_test.sh @@ -0,0 +1,61 @@ +#!/bin/sh +# Test that concatenated gz files are unpacking correctly. +# It also tests that unpacking in general is working right. +# Since zip code has many corner cases, run it for a few hours +# to get a decent coverage (200000 tests or more). + +gzip="gzip" +gunzip="../busybox gunzip" +# Or the other way around: +#gzip="../busybox gzip" +#gunzip="gunzip" + +c=0 +i=$PID +while true; do + c=$((c+1)) + + # RANDOM is not very random on some shells. Spice it up. + # 100003 is prime + len1=$(( (((RANDOM*RANDOM)^i) & 0x7ffffff) % 100003 )) + i=$((i * 1664525 + 1013904223)) + len2=$(( (((RANDOM*RANDOM)^i) & 0x7ffffff) % 100003 )) + + # Just using urandom will make gzip use method 0 (store) - + # not good for test coverage! + cat /dev/urandom | while true; do read junk; echo "junk $c $i $junk"; done \ + | dd bs=$len1 count=1 >z1 2>/dev/null + cat /dev/urandom | while true; do read junk; echo "junk $c $i $junk"; done \ + | dd bs=$len2 count=1 >z2 2>/dev/null + + $gzip zz.gz + $gzip >zz.gz + $gunzip -c zz.gz >z9 || { + echo "Exitcode $?" + exit + } + sum=`cat z1 z2 | md5sum` + sum9=`md5sum /dev/null + $gzip z1 + $gzip z2 + cat z1.gz z2.gz z1.gz z2.gz >zz.gz + $gunzip -c zz.gz >z9 || { + echo "Exitcode $? (2)" + exit + } + sum9=`md5sum /dev/null diff --git a/archival/bbunzip_test3.sh b/archival/bbunzip_test3.sh new file mode 100644 index 0000000..2dc4afd --- /dev/null +++ b/archival/bbunzip_test3.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# Leak test for gunzip. Watch top for growing process size. +# In this case we look for leaks in "concatenated .gz" code - +# we feed gunzip with a stream of .gz files. + +i=$PID +c=0 +while true; do + c=$((c + 1)) + echo "Block# $c" >&2 + # RANDOM is not very random on some shells. Spice it up. + i=$((i * 1664525 + 1013904223)) + # 100003 is prime + len=$(( (((RANDOM*RANDOM)^i) & 0x7ffffff) % 100003 )) + + # Just using urandom will make gzip use method 0 (store) - + # not good for test coverage! + cat /dev/urandom \ + | while true; do read junk; echo "junk $c $i $junk"; done \ + | dd bs=$len count=1 2>/dev/null \ + | gzip >xxx.gz + cat xxx.gz xxx.gz xxx.gz xxx.gz xxx.gz xxx.gz xxx.gz xxx.gz +done | ../busybox gunzip -c >/dev/null diff --git a/archival/bz/LICENSE b/archival/bz/LICENSE new file mode 100644 index 0000000..da43465 --- /dev/null +++ b/archival/bz/LICENSE @@ -0,0 +1,44 @@ +bzip2 applet in busybox is based on lightly-modified source +of bzip2 version 1.0.4. bzip2 source is distributed +under the following conditions (copied verbatim from LICENSE file) +=========================================================== + + +This program, "bzip2", the associated library "libbzip2", and all +documentation, are copyright (C) 1996-2006 Julian R Seward. All +rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + +3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + +4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Julian Seward, Cambridge, UK. +jseward@bzip.org +bzip2/libbzip2 version 1.0.4 of 20 December 2006 diff --git a/archival/bz/README b/archival/bz/README new file mode 100644 index 0000000..fffd47b --- /dev/null +++ b/archival/bz/README @@ -0,0 +1,90 @@ +This file is an abridged version of README from bzip2 1.0.4 +Build instructions (which are not relevant to busyboxed bzip2) +are removed. +=========================================================== + + +This is the README for bzip2/libzip2. +This version is fully compatible with the previous public releases. + +------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in this file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ + +Please read and be aware of the following: + + +WARNING: + + This program and library (attempts to) compress data by + performing several non-trivial transformations on it. + Unless you are 100% familiar with *all* the algorithms + contained herein, and with the consequences of modifying them, + you should NOT meddle with the compression or decompression + machinery. Incorrect changes can and very likely *will* + lead to disastrous loss of data. + + +DISCLAIMER: + + I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE + USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED. + + Every compression of a file implies an assumption that the + compressed file can be decompressed to reproduce the original. + Great efforts in design, coding and testing have been made to + ensure that this program works correctly. However, the complexity + of the algorithms, and, in particular, the presence of various + special cases in the code which occur with very low but non-zero + probability make it impossible to rule out the possibility of bugs + remaining in the program. DO NOT COMPRESS ANY DATA WITH THIS + PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE POSSIBILITY, HOWEVER + SMALL, THAT THE DATA WILL NOT BE RECOVERABLE. + + That is not to say this program is inherently unreliable. + Indeed, I very much hope the opposite is true. bzip2/libbzip2 + has been carefully constructed and extensively tested. + + +PATENTS: + + To the best of my knowledge, bzip2/libbzip2 does not use any + patented algorithms. However, I do not have the resources + to carry out a patent search. Therefore I cannot give any + guarantee of the above statement. + + +I hope you find bzip2 useful. Feel free to contact me at + jseward@bzip.org +if you have any suggestions or queries. Many people mailed me with +comments, suggestions and patches after the releases of bzip-0.15, +bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1, +1.0.2 and 1.0.3, and the changes in bzip2 are largely a result of this +feedback. I thank you for your comments. + +bzip2's "home" is http://www.bzip.org/ + +Julian Seward +jseward@bzip.org +Cambridge, UK. + +18 July 1996 (version 0.15) +25 August 1996 (version 0.21) + 7 August 1997 (bzip2, version 0.1) +29 August 1997 (bzip2, version 0.1pl2) +23 August 1998 (bzip2, version 0.9.0) + 8 June 1999 (bzip2, version 0.9.5) + 4 Sept 1999 (bzip2, version 0.9.5d) + 5 May 2000 (bzip2, version 1.0pre8) +30 December 2001 (bzip2, version 1.0.2pre1) +15 February 2005 (bzip2, version 1.0.3) +20 December 2006 (bzip2, version 1.0.4) diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c new file mode 100644 index 0000000..0e73ffe --- /dev/null +++ b/archival/bz/blocksort.c @@ -0,0 +1,1072 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Block sorting machinery ---*/ +/*--- blocksort.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib_private.h" */ + +#define mswap(zz1, zz2) \ +{ \ + int32_t zztmp = zz1; \ + zz1 = zz2; \ + zz2 = zztmp; \ +} + +static +/* No measurable speed gain with inlining */ +/* ALWAYS_INLINE */ +void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn) +{ + while (zzn > 0) { + mswap(ptr[zzp1], ptr[zzp2]); + zzp1++; + zzp2++; + zzn--; + } +} + +static +ALWAYS_INLINE +int32_t mmin(int32_t a, int32_t b) +{ + return (a < b) ? a : b; +} + + +/*---------------------------------------------*/ +/*--- Fallback O(N log(N)^2) sorting ---*/ +/*--- algorithm, for repetitive blocks ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +inline +void fallbackSimpleSort(uint32_t* fmap, + uint32_t* eclass, + int32_t lo, + int32_t hi) +{ + int32_t i, j, tmp; + uint32_t ec_tmp; + + if (lo == hi) return; + + if (hi - lo > 3) { + for (i = hi-4; i >= lo; i--) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4) + fmap[j-4] = fmap[j]; + fmap[j-4] = tmp; + } + } + + for (i = hi-1; i >= lo; i--) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) + fmap[j-1] = fmap[j]; + fmap[j-1] = tmp; + } +} + + +/*---------------------------------------------*/ +#define fpush(lz,hz) { \ + stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + sp++; \ +} + +#define fpop(lz,hz) { \ + sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ +} + +#define FALLBACK_QSORT_SMALL_THRESH 10 +#define FALLBACK_QSORT_STACK_SIZE 100 + +static +void fallbackQSort3(uint32_t* fmap, + uint32_t* eclass, + int32_t loSt, + int32_t hiSt) +{ + int32_t unLo, unHi, ltLo, gtHi, n, m; + int32_t sp, lo, hi; + uint32_t med, r, r3; + int32_t stackLo[FALLBACK_QSORT_STACK_SIZE]; + int32_t stackHi[FALLBACK_QSORT_STACK_SIZE]; + + r = 0; + + sp = 0; + fpush(loSt, hiSt); + + while (sp > 0) { + AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004); + + fpop(lo, hi); + if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { + fallbackSimpleSort(fmap, eclass, lo, hi); + continue; + } + + /* Random partitioning. Median of 3 sometimes fails to + * avoid bad cases. Median of 9 seems to help but + * looks rather expensive. This too seems to work but + * is cheaper. Guidance for the magic constants + * 7621 and 32768 is taken from Sedgewick's algorithms + * book, chapter 35. + */ + r = ((r * 7621) + 1) % 32768; + r3 = r % 3; + if (r3 == 0) + med = eclass[fmap[lo]]; + else if (r3 == 1) + med = eclass[fmap[(lo+hi)>>1]]; + else + med = eclass[fmap[hi]]; + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) break; + n = (int32_t)eclass[fmap[unLo]] - (int32_t)med; + if (n == 0) { + mswap(fmap[unLo], fmap[ltLo]); + ltLo++; + unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) break; + n = (int32_t)eclass[fmap[unHi]] - (int32_t)med; + if (n == 0) { + mswap(fmap[unHi], fmap[gtHi]); + gtHi--; unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; + } + + AssertD(unHi == unLo-1, "fallbackQSort3(2)"); + + if (gtHi < ltLo) continue; + + n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n); + m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + if (n - lo > hi - m) { + fpush(lo, n); + fpush(m, hi); + } else { + fpush(m, hi); + fpush(lo, n); + } + } +} + +#undef fpush +#undef fpop +#undef FALLBACK_QSORT_SMALL_THRESH +#undef FALLBACK_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + * nblock > 0 + * eclass exists for [0 .. nblock-1] + * ((uint8_t*)eclass) [0 .. nblock-1] holds block + * ptr exists for [0 .. nblock-1] + * + * Post: + * ((uint8_t*)eclass) [0 .. nblock-1] holds block + * All other areas of eclass destroyed + * fmap [0 .. nblock-1] holds sorted order + * bhtab[0 .. 2+(nblock/32)] destroyed +*/ + +#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) +#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) +#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) +#define WORD_BH(zz) bhtab[(zz) >> 5] +#define UNALIGNED_BH(zz) ((zz) & 0x01f) + +static +void fallbackSort(uint32_t* fmap, + uint32_t* eclass, + uint32_t* bhtab, + int32_t nblock) +{ + int32_t ftab[257]; + int32_t ftabCopy[256]; + int32_t H, i, j, k, l, r, cc, cc1; + int32_t nNotDone; + int32_t nBhtab; + uint8_t* eclass8 = (uint8_t*)eclass; + + /* + * Initial 1-char radix sort to generate + * initial fmap and initial BH bits. + */ + for (i = 0; i < 257; i++) ftab[i] = 0; + for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; + for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; + + j = ftab[0]; /* bbox: optimized */ + for (i = 1; i < 257; i++) { + j += ftab[i]; + ftab[i] = j; + } + + for (i = 0; i < nblock; i++) { + j = eclass8[i]; + k = ftab[j] - 1; + ftab[j] = k; + fmap[k] = i; + } + + nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */ + for (i = 0; i < nBhtab; i++) bhtab[i] = 0; + for (i = 0; i < 256; i++) SET_BH(ftab[i]); + + /* + * Inductively refine the buckets. Kind-of an + * "exponential radix sort" (!), inspired by the + * Manber-Myers suffix array construction algorithm. + */ + + /*-- set sentinel bits for block-end detection --*/ + for (i = 0; i < 32; i++) { + SET_BH(nblock + 2*i); + CLEAR_BH(nblock + 2*i + 1); + } + + /*-- the log(N) loop --*/ + H = 1; + while (1) { + j = 0; + for (i = 0; i < nblock; i++) { + if (ISSET_BH(i)) + j = i; + k = fmap[i] - H; + if (k < 0) + k += nblock; + eclass[k] = j; + } + + nNotDone = 0; + r = -1; + while (1) { + + /*-- find the next non-singleton bucket --*/ + k = r + 1; + while (ISSET_BH(k) && UNALIGNED_BH(k)) + k++; + if (ISSET_BH(k)) { + while (WORD_BH(k) == 0xffffffff) k += 32; + while (ISSET_BH(k)) k++; + } + l = k - 1; + if (l >= nblock) + break; + while (!ISSET_BH(k) && UNALIGNED_BH(k)) + k++; + if (!ISSET_BH(k)) { + while (WORD_BH(k) == 0x00000000) k += 32; + while (!ISSET_BH(k)) k++; + } + r = k - 1; + if (r >= nblock) + break; + + /*-- now [l, r] bracket current bucket --*/ + if (r > l) { + nNotDone += (r - l + 1); + fallbackQSort3(fmap, eclass, l, r); + + /*-- scan bucket and generate header bits-- */ + cc = -1; + for (i = l; i <= r; i++) { + cc1 = eclass[fmap[i]]; + if (cc != cc1) { + SET_BH(i); + cc = cc1; + }; + } + } + } + + H *= 2; + if (H > nblock || nNotDone == 0) + break; + } + + /* + * Reconstruct the original block in + * eclass8 [0 .. nblock-1], since the + * previous phase destroyed it. + */ + j = 0; + for (i = 0; i < nblock; i++) { + while (ftabCopy[j] == 0) + j++; + ftabCopy[j]--; + eclass8[fmap[i]] = (uint8_t)j; + } + AssertH(j < 256, 1005); +} + +#undef SET_BH +#undef CLEAR_BH +#undef ISSET_BH +#undef WORD_BH +#undef UNALIGNED_BH + + +/*---------------------------------------------*/ +/*--- The main, O(N^2 log(N)) sorting ---*/ +/*--- algorithm. Faster for "normal" ---*/ +/*--- non-repetitive blocks. ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +NOINLINE +int mainGtU( + uint32_t i1, + uint32_t i2, + uint8_t* block, + uint16_t* quadrant, + uint32_t nblock, + int32_t* budget) +{ + int32_t k; + uint8_t c1, c2; + uint16_t s1, s2; + +/* Loop unrolling here is actually very useful + * (generated code is much simpler), + * code size increase is only 270 bytes (i386) + * but speeds up compression 10% overall + */ + +#if CONFIG_BZIP2_FEATURE_SPEED >= 1 + +#define TIMES_8(code) \ + code; code; code; code; \ + code; code; code; code; +#define TIMES_12(code) \ + code; code; code; code; \ + code; code; code; code; \ + code; code; code; code; + +#else + +#define TIMES_8(code) \ +{ \ + int nn = 8; \ + do { \ + code; \ + } while (--nn); \ +} +#define TIMES_12(code) \ +{ \ + int nn = 12; \ + do { \ + code; \ + } while (--nn); \ +} + +#endif + + AssertD(i1 != i2, "mainGtU"); + TIMES_12( + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + ) + + k = nblock + 8; + + do { + TIMES_8( + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + ) + + if (i1 >= nblock) i1 -= nblock; + if (i2 >= nblock) i2 -= nblock; + + (*budget)--; + k -= 8; + } while (k >= 0); + + return False; +} +#undef TIMES_8 +#undef TIMES_12 + +/*---------------------------------------------*/ +/* + * Knuth's increments seem to work better + * than Incerpi-Sedgewick here. Possibly + * because the number of elems to sort is + * usually small, typically <= 20. + */ +static +const int32_t incs[14] = { + 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, + 797161, 2391484 +}; + +static +void mainSimpleSort(uint32_t* ptr, + uint8_t* block, + uint16_t* quadrant, + int32_t nblock, + int32_t lo, + int32_t hi, + int32_t d, + int32_t* budget) +{ + int32_t i, j, h, bigN, hp; + uint32_t v; + + bigN = hi - lo + 1; + if (bigN < 2) return; + + hp = 0; + while (incs[hp] < bigN) hp++; + hp--; + + for (; hp >= 0; hp--) { + h = incs[hp]; + + i = lo + h; + while (1) { + /*-- copy 1 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + +/* 1.5% overall speedup, +290 bytes */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 3 + /*-- copy 2 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 3 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; +#endif + if (*budget < 0) return; + } + } +} + + +/*---------------------------------------------*/ +/* + * The following is an implementation of + * an elegant 3-way quicksort for strings, + * described in a paper "Fast Algorithms for + * Sorting and Searching Strings", by Robert + * Sedgewick and Jon L. Bentley. + */ + +static +ALWAYS_INLINE +uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c) +{ + uint8_t t; + if (a > b) { + t = a; + a = b; + b = t; + }; + /* here b >= a */ + if (b > c) { + b = c; + if (a > b) + b = a; + } + return b; +} + +#define mpush(lz,hz,dz) \ +{ \ + stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + stackD [sp] = dz; \ + sp++; \ +} + +#define mpop(lz,hz,dz) \ +{ \ + sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ + dz = stackD [sp]; \ +} + +#define mnextsize(az) (nextHi[az] - nextLo[az]) + +#define mnextswap(az,bz) \ +{ \ + int32_t tz; \ + tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ + tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ + tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \ +} + +#define MAIN_QSORT_SMALL_THRESH 20 +#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) +#define MAIN_QSORT_STACK_SIZE 100 + +static +void mainQSort3(uint32_t* ptr, + uint8_t* block, + uint16_t* quadrant, + int32_t nblock, + int32_t loSt, + int32_t hiSt, + int32_t dSt, + int32_t* budget) +{ + int32_t unLo, unHi, ltLo, gtHi, n, m, med; + int32_t sp, lo, hi, d; + + int32_t stackLo[MAIN_QSORT_STACK_SIZE]; + int32_t stackHi[MAIN_QSORT_STACK_SIZE]; + int32_t stackD [MAIN_QSORT_STACK_SIZE]; + + int32_t nextLo[3]; + int32_t nextHi[3]; + int32_t nextD [3]; + + sp = 0; + mpush(loSt, hiSt, dSt); + + while (sp > 0) { + AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001); + + mpop(lo, hi, d); + if (hi - lo < MAIN_QSORT_SMALL_THRESH + || d > MAIN_QSORT_DEPTH_THRESH + ) { + mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget); + if (*budget < 0) + return; + continue; + } + med = (int32_t) mmed3(block[ptr[lo ] + d], + block[ptr[hi ] + d], + block[ptr[(lo+hi) >> 1] + d]); + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) + break; + n = ((int32_t)block[ptr[unLo]+d]) - med; + if (n == 0) { + mswap(ptr[unLo], ptr[ltLo]); + ltLo++; + unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) + break; + n = ((int32_t)block[ptr[unHi]+d]) - med; + if (n == 0) { + mswap(ptr[unHi], ptr[gtHi]); + gtHi--; + unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) + break; + mswap(ptr[unLo], ptr[unHi]); + unLo++; + unHi--; + } + + AssertD(unHi == unLo-1, "mainQSort3(2)"); + + if (gtHi < ltLo) { + mpush(lo, hi, d + 1); + continue; + } + + n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n); + m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; + nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; + nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; + + if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1); + if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2); + if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1); + + AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)"); + AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)"); + + mpush(nextLo[0], nextHi[0], nextD[0]); + mpush(nextLo[1], nextHi[1], nextD[1]); + mpush(nextLo[2], nextHi[2], nextD[2]); + } +} + +#undef mpush +#undef mpop +#undef mnextsize +#undef mnextswap +#undef MAIN_QSORT_SMALL_THRESH +#undef MAIN_QSORT_DEPTH_THRESH +#undef MAIN_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + * nblock > N_OVERSHOOT + * block32 exists for [0 .. nblock-1 +N_OVERSHOOT] + * ((uint8_t*)block32) [0 .. nblock-1] holds block + * ptr exists for [0 .. nblock-1] + * + * Post: + * ((uint8_t*)block32) [0 .. nblock-1] holds block + * All other areas of block32 destroyed + * ftab[0 .. 65536] destroyed + * ptr [0 .. nblock-1] holds sorted order + * if (*budget < 0), sorting was abandoned + */ + +#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) +#define SETMASK (1 << 21) +#define CLEARMASK (~(SETMASK)) + +static NOINLINE +void mainSort(EState* state, + uint32_t* ptr, + uint8_t* block, + uint16_t* quadrant, + uint32_t* ftab, + int32_t nblock, + int32_t* budget) +{ + int32_t i, j, k, ss, sb; + uint8_t c1; + int32_t numQSorted; + uint16_t s; + Bool bigDone[256]; + /* bbox: moved to EState to save stack + int32_t runningOrder[256]; + int32_t copyStart[256]; + int32_t copyEnd [256]; + */ +#define runningOrder (state->mainSort__runningOrder) +#define copyStart (state->mainSort__copyStart) +#define copyEnd (state->mainSort__copyEnd) + + /*-- set up the 2-byte frequency table --*/ + /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */ + memset(ftab, 0, 65537 * sizeof(ftab[0])); + + j = block[0] << 8; + i = nblock - 1; +/* 3%, +300 bytes */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 2 + for (; i >= 3; i -= 4) { + quadrant[i] = 0; + j = (j >> 8) | (((uint16_t)block[i]) << 8); + ftab[j]++; + quadrant[i-1] = 0; + j = (j >> 8) | (((uint16_t)block[i-1]) << 8); + ftab[j]++; + quadrant[i-2] = 0; + j = (j >> 8) | (((uint16_t)block[i-2]) << 8); + ftab[j]++; + quadrant[i-3] = 0; + j = (j >> 8) | (((uint16_t)block[i-3]) << 8); + ftab[j]++; + } +#endif + for (; i >= 0; i--) { + quadrant[i] = 0; + j = (j >> 8) | (((uint16_t)block[i]) << 8); + ftab[j]++; + } + + /*-- (emphasises close relationship of block & quadrant) --*/ + for (i = 0; i < BZ_N_OVERSHOOT; i++) { + block [nblock+i] = block[i]; + quadrant[nblock+i] = 0; + } + + /*-- Complete the initial radix sort --*/ + j = ftab[0]; /* bbox: optimized */ + for (i = 1; i <= 65536; i++) { + j += ftab[i]; + ftab[i] = j; + } + + s = block[0] << 8; + i = nblock - 1; +#if CONFIG_BZIP2_FEATURE_SPEED >= 2 + for (; i >= 3; i -= 4) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i; + s = (s >> 8) | (block[i-1] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i-1; + s = (s >> 8) | (block[i-2] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i-2; + s = (s >> 8) | (block[i-3] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i-3; + } +#endif + for (; i >= 0; i--) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i; + } + + /* + * Now ftab contains the first loc of every small bucket. + * Calculate the running order, from smallest to largest + * big bucket. + */ + for (i = 0; i <= 255; i++) { + bigDone [i] = False; + runningOrder[i] = i; + } + + { + int32_t vv; + /* bbox: was: int32_t h = 1; */ + /* do h = 3 * h + 1; while (h <= 256); */ + uint32_t h = 364; + + do { + /*h = h / 3;*/ + h = (h * 171) >> 9; /* bbox: fast h/3 */ + for (i = h; i <= 255; i++) { + vv = runningOrder[i]; + j = i; + while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { + runningOrder[j] = runningOrder[j-h]; + j = j - h; + if (j <= (h - 1)) + goto zero; + } + zero: + runningOrder[j] = vv; + } + } while (h != 1); + } + + /* + * The main sorting loop. + */ + + numQSorted = 0; + + for (i = 0; i <= 255; i++) { + + /* + * Process big buckets, starting with the least full. + * Basically this is a 3-step process in which we call + * mainQSort3 to sort the small buckets [ss, j], but + * also make a big effort to avoid the calls if we can. + */ + ss = runningOrder[i]; + + /* + * Step 1: + * Complete the big bucket [ss] by quicksorting + * any unsorted small buckets [ss, j], for j != ss. + * Hopefully previous pointer-scanning phases have already + * completed many of the small buckets [ss, j], so + * we don't have to sort them at all. + */ + for (j = 0; j <= 255; j++) { + if (j != ss) { + sb = (ss << 8) + j; + if (!(ftab[sb] & SETMASK)) { + int32_t lo = ftab[sb] & CLEARMASK; + int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; + if (hi > lo) { + mainQSort3( + ptr, block, quadrant, nblock, + lo, hi, BZ_N_RADIX, budget + ); + if (*budget < 0) return; + numQSorted += (hi - lo + 1); + } + } + ftab[sb] |= SETMASK; + } + } + + AssertH(!bigDone[ss], 1006); + + /* + * Step 2: + * Now scan this big bucket [ss] so as to synthesise the + * sorted order for small buckets [t, ss] for all t, + * including, magically, the bucket [ss,ss] too. + * This will avoid doing Real Work in subsequent Step 1's. + */ + { + for (j = 0; j <= 255; j++) { + copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; + copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; + } + for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { + k = ptr[j] - 1; + if (k < 0) + k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[copyStart[c1]++] = k; + } + for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { + k = ptr[j]-1; + if (k < 0) + k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[copyEnd[c1]--] = k; + } + } + + /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. + * Necessity for this case is demonstrated by compressing + * a sequence of approximately 48.5 million of character + * 251; 1.0.0/1.0.1 will then die here. */ + AssertH((copyStart[ss]-1 == copyEnd[ss]) \ + || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007); + + for (j = 0; j <= 255; j++) + ftab[(j << 8) + ss] |= SETMASK; + + /* + * Step 3: + * The [ss] big bucket is now done. Record this fact, + * and update the quadrant descriptors. Remember to + * update quadrants in the overshoot area too, if + * necessary. The "if (i < 255)" test merely skips + * this updating for the last bucket processed, since + * updating for the last bucket is pointless. + * + * The quadrant array provides a way to incrementally + * cache sort orderings, as they appear, so as to + * make subsequent comparisons in fullGtU() complete + * faster. For repetitive blocks this makes a big + * difference (but not big enough to be able to avoid + * the fallback sorting mechanism, exponential radix sort). + * + * The precise meaning is: at all times: + * + * for 0 <= i < nblock and 0 <= j <= nblock + * + * if block[i] != block[j], + * + * then the relative values of quadrant[i] and + * quadrant[j] are meaningless. + * + * else { + * if quadrant[i] < quadrant[j] + * then the string starting at i lexicographically + * precedes the string starting at j + * + * else if quadrant[i] > quadrant[j] + * then the string starting at j lexicographically + * precedes the string starting at i + * + * else + * the relative ordering of the strings starting + * at i and j has not yet been determined. + * } + */ + bigDone[ss] = True; + + if (i < 255) { + int32_t bbStart = ftab[ss << 8] & CLEARMASK; + int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; + int32_t shifts = 0; + + while ((bbSize >> shifts) > 65534) shifts++; + + for (j = bbSize-1; j >= 0; j--) { + int32_t a2update = ptr[bbStart + j]; + uint16_t qVal = (uint16_t)(j >> shifts); + quadrant[a2update] = qVal; + if (a2update < BZ_N_OVERSHOOT) + quadrant[a2update + nblock] = qVal; + } + AssertH(((bbSize-1) >> shifts) <= 65535, 1002); + } + } +#undef runningOrder +#undef copyStart +#undef copyEnd +} + +#undef BIGFREQ +#undef SETMASK +#undef CLEARMASK + + +/*---------------------------------------------*/ +/* Pre: + * nblock > 0 + * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] + * ((uint8_t*)arr2)[0 .. nblock-1] holds block + * arr1 exists for [0 .. nblock-1] + * + * Post: + * ((uint8_t*)arr2) [0 .. nblock-1] holds block + * All other areas of block destroyed + * ftab[0 .. 65536] destroyed + * arr1[0 .. nblock-1] holds sorted order + */ +static NOINLINE +void BZ2_blockSort(EState* s) +{ + /* In original bzip2 1.0.4, it's a parameter, but 30 + * (which was the default) should work ok. */ + enum { wfact = 30 }; + + uint32_t* ptr = s->ptr; + uint8_t* block = s->block; + uint32_t* ftab = s->ftab; + int32_t nblock = s->nblock; + uint16_t* quadrant; + int32_t budget; + int32_t i; + + if (nblock < 10000) { + fallbackSort(s->arr1, s->arr2, ftab, nblock); + } else { + /* Calculate the location for quadrant, remembering to get + * the alignment right. Assumes that &(block[0]) is at least + * 2-byte aligned -- this should be ok since block is really + * the first section of arr2. + */ + i = nblock + BZ_N_OVERSHOOT; + if (i & 1) i++; + quadrant = (uint16_t*)(&(block[i])); + + /* (wfact-1) / 3 puts the default-factor-30 + * transition point at very roughly the same place as + * with v0.1 and v0.9.0. + * Not that it particularly matters any more, since the + * resulting compressed stream is now the same regardless + * of whether or not we use the main sort or fallback sort. + */ + budget = nblock * ((wfact-1) / 3); + + mainSort(s, ptr, block, quadrant, ftab, nblock, &budget); + if (budget < 0) { + fallbackSort(s->arr1, s->arr2, ftab, nblock); + } + } + + s->origPtr = -1; + for (i = 0; i < s->nblock; i++) + if (ptr[i] == 0) { + s->origPtr = i; + break; + }; + + AssertH(s->origPtr != -1, 1003); +} + + +/*-------------------------------------------------------------*/ +/*--- end blocksort.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/bzlib.c b/archival/bz/bzlib.c new file mode 100644 index 0000000..9957c2f --- /dev/null +++ b/archival/bz/bzlib.c @@ -0,0 +1,429 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Library top-level functions. ---*/ +/*--- bzlib.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* CHANGES + * 0.9.0 -- original version. + * 0.9.0a/b -- no changes in this file. + * 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress(). + * fixed bzWrite/bzRead to ignore zero-length requests. + * fixed bzread to correctly handle read requests after EOF. + * wrong parameter order in call to bzDecompressInit in + * bzBuffToBuffDecompress. Fixed. + */ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +/*--- Compression stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +#if BZ_LIGHT_DEBUG +static +void bz_assert_fail(int errcode) +{ + /* if (errcode == 1007) bb_error_msg_and_die("probably bad RAM"); */ + bb_error_msg_and_die("internal error %d", errcode); +} +#endif + +/*---------------------------------------------------*/ +static +void prepare_new_block(EState* s) +{ + int i; + s->nblock = 0; + s->numZ = 0; + s->state_out_pos = 0; + BZ_INITIALISE_CRC(s->blockCRC); + /* inlined memset would be nice to have here */ + for (i = 0; i < 256; i++) + s->inUse[i] = 0; + s->blockNo++; +} + + +/*---------------------------------------------------*/ +static +ALWAYS_INLINE +void init_RL(EState* s) +{ + s->state_in_ch = 256; + s->state_in_len = 0; +} + + +static +int isempty_RL(EState* s) +{ + return (s->state_in_ch >= 256 || s->state_in_len <= 0); +} + + +/*---------------------------------------------------*/ +static +void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k) +{ + int32_t n; + EState* s; + + s = xzalloc(sizeof(EState)); + s->strm = strm; + + n = 100000 * blockSize100k; + s->arr1 = xmalloc(n * sizeof(uint32_t)); + s->mtfv = (uint16_t*)s->arr1; + s->ptr = (uint32_t*)s->arr1; + s->arr2 = xmalloc((n + BZ_N_OVERSHOOT) * sizeof(uint32_t)); + s->block = (uint8_t*)s->arr2; + s->ftab = xmalloc(65537 * sizeof(uint32_t)); + + s->crc32table = crc32_filltable(NULL, 1); + + s->state = BZ_S_INPUT; + s->mode = BZ_M_RUNNING; + s->blockSize100k = blockSize100k; + s->nblockMAX = n - 19; + + strm->state = s; + /*strm->total_in = 0;*/ + strm->total_out = 0; + init_RL(s); + prepare_new_block(s); +} + + +/*---------------------------------------------------*/ +static +void add_pair_to_block(EState* s) +{ + int32_t i; + uint8_t ch = (uint8_t)(s->state_in_ch); + for (i = 0; i < s->state_in_len; i++) { + BZ_UPDATE_CRC(s, s->blockCRC, ch); + } + s->inUse[s->state_in_ch] = 1; + switch (s->state_in_len) { + case 3: + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + /* fall through */ + case 2: + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + /* fall through */ + case 1: + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + break; + default: + s->inUse[s->state_in_len - 4] = 1; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)(s->state_in_len - 4); + s->nblock++; + break; + } +} + + +/*---------------------------------------------------*/ +static +void flush_RL(EState* s) +{ + if (s->state_in_ch < 256) add_pair_to_block(s); + init_RL(s); +} + + +/*---------------------------------------------------*/ +#define ADD_CHAR_TO_BLOCK(zs, zchh0) \ +{ \ + uint32_t zchh = (uint32_t)(zchh0); \ + /*-- fast track the common case --*/ \ + if (zchh != zs->state_in_ch && zs->state_in_len == 1) { \ + uint8_t ch = (uint8_t)(zs->state_in_ch); \ + BZ_UPDATE_CRC(zs, zs->blockCRC, ch); \ + zs->inUse[zs->state_in_ch] = 1; \ + zs->block[zs->nblock] = (uint8_t)ch; \ + zs->nblock++; \ + zs->state_in_ch = zchh; \ + } \ + else \ + /*-- general, uncommon cases --*/ \ + if (zchh != zs->state_in_ch || zs->state_in_len == 255) { \ + if (zs->state_in_ch < 256) \ + add_pair_to_block(zs); \ + zs->state_in_ch = zchh; \ + zs->state_in_len = 1; \ + } else { \ + zs->state_in_len++; \ + } \ +} + + +/*---------------------------------------------------*/ +static +void /*Bool*/ copy_input_until_stop(EState* s) +{ + /*Bool progress_in = False;*/ + +#ifdef SAME_CODE_AS_BELOW + if (s->mode == BZ_M_RUNNING) { + /*-- fast track the common case --*/ + while (1) { + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*progress_in = True;*/ + ADD_CHAR_TO_BLOCK(s, (uint32_t)(*(uint8_t*)(s->strm->next_in))); + s->strm->next_in++; + s->strm->avail_in--; + /*s->strm->total_in++;*/ + } + } else +#endif + { + /*-- general, uncommon case --*/ + while (1) { + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + //# /*-- flush/finish end? --*/ + //# if (s->avail_in_expect == 0) break; + /*progress_in = True;*/ + ADD_CHAR_TO_BLOCK(s, *(uint8_t*)(s->strm->next_in)); + s->strm->next_in++; + s->strm->avail_in--; + /*s->strm->total_in++;*/ + //# s->avail_in_expect--; + } + } + /*return progress_in;*/ +} + + +/*---------------------------------------------------*/ +static +void /*Bool*/ copy_output_until_stop(EState* s) +{ + /*Bool progress_out = False;*/ + + while (1) { + /*-- no output space? --*/ + if (s->strm->avail_out == 0) break; + + /*-- block done? --*/ + if (s->state_out_pos >= s->numZ) break; + + /*progress_out = True;*/ + *(s->strm->next_out) = s->zbits[s->state_out_pos]; + s->state_out_pos++; + s->strm->avail_out--; + s->strm->next_out++; + s->strm->total_out++; + } + /*return progress_out;*/ +} + + +/*---------------------------------------------------*/ +static +void /*Bool*/ handle_compress(bz_stream *strm) +{ + /*Bool progress_in = False;*/ + /*Bool progress_out = False;*/ + EState* s = strm->state; + + while (1) { + if (s->state == BZ_S_OUTPUT) { + /*progress_out |=*/ copy_output_until_stop(s); + if (s->state_out_pos < s->numZ) break; + if (s->mode == BZ_M_FINISHING + //# && s->avail_in_expect == 0 + && s->strm->avail_in == 0 + && isempty_RL(s)) + break; + prepare_new_block(s); + s->state = BZ_S_INPUT; +#ifdef FLUSH_IS_UNUSED + if (s->mode == BZ_M_FLUSHING + && s->avail_in_expect == 0 + && isempty_RL(s)) + break; +#endif + } + + if (s->state == BZ_S_INPUT) { + /*progress_in |=*/ copy_input_until_stop(s); + //#if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { + if (s->mode != BZ_M_RUNNING && s->strm->avail_in == 0) { + flush_RL(s); + BZ2_compressBlock(s, (s->mode == BZ_M_FINISHING)); + s->state = BZ_S_OUTPUT; + } else + if (s->nblock >= s->nblockMAX) { + BZ2_compressBlock(s, 0); + s->state = BZ_S_OUTPUT; + } else + if (s->strm->avail_in == 0) { + break; + } + } + } + + /*return progress_in || progress_out;*/ +} + + +/*---------------------------------------------------*/ +static +int BZ2_bzCompress(bz_stream *strm, int action) +{ + /*Bool progress;*/ + EState* s; + + s = strm->state; + + switch (s->mode) { + case BZ_M_RUNNING: + if (action == BZ_RUN) { + /*progress =*/ handle_compress(strm); + /*return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;*/ + return BZ_RUN_OK; + } +#ifdef FLUSH_IS_UNUSED + else + if (action == BZ_FLUSH) { + //#s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FLUSHING; + goto case_BZ_M_FLUSHING; + } +#endif + else + /*if (action == BZ_FINISH)*/ { + //#s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FINISHING; + goto case_BZ_M_FINISHING; + } + +#ifdef FLUSH_IS_UNUSED + case_BZ_M_FLUSHING: + case BZ_M_FLUSHING: + /*if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR;*/ + /*progress =*/ handle_compress(strm); + if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + return BZ_FLUSH_OK; + s->mode = BZ_M_RUNNING; + return BZ_RUN_OK; +#endif + + case_BZ_M_FINISHING: + /*case BZ_M_FINISHING:*/ + default: + /*if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR;*/ + /*progress =*/ handle_compress(strm); + /*if (!progress) return BZ_SEQUENCE_ERROR;*/ + //#if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + //# return BZ_FINISH_OK; + if (s->strm->avail_in > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + return BZ_FINISH_OK; + /*s->mode = BZ_M_IDLE;*/ + return BZ_STREAM_END; + } + /* return BZ_OK; --not reached--*/ +} + + +/*---------------------------------------------------*/ +#if ENABLE_FEATURE_CLEAN_UP +static +void BZ2_bzCompressEnd(bz_stream *strm) +{ + EState* s; + + s = strm->state; + free(s->arr1); + free(s->arr2); + free(s->ftab); + free(s->crc32table); + free(strm->state); +} +#endif + + +/*---------------------------------------------------*/ +/*--- Misc convenience stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +#ifdef EXAMPLE_CODE_FOR_MEM_TO_MEM_COMPRESSION +static +int BZ2_bzBuffToBuffCompress(char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k) +{ + bz_stream strm; + int ret; + + if (dest == NULL || destLen == NULL || + source == NULL || + blockSize100k < 1 || blockSize100k > 9) + return BZ_PARAM_ERROR; + + BZ2_bzCompressInit(&strm, blockSize100k); + + strm.next_in = source; + strm.next_out = dest; + strm.avail_in = sourceLen; + strm.avail_out = *destLen; + + ret = BZ2_bzCompress(&strm, BZ_FINISH); + if (ret == BZ_FINISH_OK) goto output_overflow; + if (ret != BZ_STREAM_END) goto errhandler; + + /* normal termination */ + *destLen -= strm.avail_out; + BZ2_bzCompressEnd(&strm); + return BZ_OK; + + output_overflow: + BZ2_bzCompressEnd(&strm); + return BZ_OUTBUFF_FULL; + + errhandler: + BZ2_bzCompressEnd(&strm); + return ret; +} +#endif + +/*-------------------------------------------------------------*/ +/*--- end bzlib.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/bzlib.h b/archival/bz/bzlib.h new file mode 100644 index 0000000..1bb811c --- /dev/null +++ b/archival/bz/bzlib.h @@ -0,0 +1,65 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Public header file for the library. ---*/ +/*--- bzlib.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +#define BZ_RUN 0 +#define BZ_FLUSH 1 +#define BZ_FINISH 2 + +#define BZ_OK 0 +#define BZ_RUN_OK 1 +#define BZ_FLUSH_OK 2 +#define BZ_FINISH_OK 3 +#define BZ_STREAM_END 4 +#define BZ_SEQUENCE_ERROR (-1) +#define BZ_PARAM_ERROR (-2) +#define BZ_MEM_ERROR (-3) +#define BZ_DATA_ERROR (-4) +#define BZ_DATA_ERROR_MAGIC (-5) +#define BZ_IO_ERROR (-6) +#define BZ_UNEXPECTED_EOF (-7) +#define BZ_OUTBUFF_FULL (-8) +#define BZ_CONFIG_ERROR (-9) + +typedef struct bz_stream { + void *state; + char *next_in; + char *next_out; + unsigned avail_in; + unsigned avail_out; + /*unsigned long long total_in;*/ + unsigned long long total_out; +} bz_stream; + +/*-- Core (low-level) library functions --*/ + +static void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k); +static int BZ2_bzCompress(bz_stream *strm, int action); +#if ENABLE_FEATURE_CLEAN_UP +static void BZ2_bzCompressEnd(bz_stream *strm); +#endif + +/*-------------------------------------------------------------*/ +/*--- end bzlib.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/bzlib_private.h b/archival/bz/bzlib_private.h new file mode 100644 index 0000000..6430ce4 --- /dev/null +++ b/archival/bz/bzlib_private.h @@ -0,0 +1,219 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Private header file for the library. ---*/ +/*--- bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib.h" */ + +/*-- General stuff. --*/ + +typedef unsigned char Bool; + +#define True ((Bool)1) +#define False ((Bool)0) + +#if BZ_LIGHT_DEBUG +static void bz_assert_fail(int errcode) NORETURN; +#define AssertH(cond, errcode) \ +do { \ + if (!(cond)) \ + bz_assert_fail(errcode); \ +} while (0) +#else +#define AssertH(cond, msg) do { } while (0) +#endif + +#if BZ_DEBUG +#define AssertD(cond, msg) \ +do { \ + if (!(cond)) \ + bb_error_msg_and_die("(debug build): internal error %s", msg); \ +} while (0) +#else +#define AssertD(cond, msg) do { } while (0) +#endif + + +/*-- Header bytes. --*/ + +#define BZ_HDR_B 0x42 /* 'B' */ +#define BZ_HDR_Z 0x5a /* 'Z' */ +#define BZ_HDR_h 0x68 /* 'h' */ +#define BZ_HDR_0 0x30 /* '0' */ + +#define BZ_HDR_BZh0 0x425a6830 + +/*-- Constants for the back end. --*/ + +#define BZ_MAX_ALPHA_SIZE 258 +#define BZ_MAX_CODE_LEN 23 + +#define BZ_RUNA 0 +#define BZ_RUNB 1 + +#define BZ_N_GROUPS 6 +#define BZ_G_SIZE 50 +#define BZ_N_ITERS 4 + +#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) + + +/*-- Stuff for doing CRCs. --*/ + +#define BZ_INITIALISE_CRC(crcVar) \ +{ \ + crcVar = 0xffffffffL; \ +} + +#define BZ_FINALISE_CRC(crcVar) \ +{ \ + crcVar = ~(crcVar); \ +} + +#define BZ_UPDATE_CRC(s, crcVar, cha) \ +{ \ + crcVar = (crcVar << 8) ^ s->crc32table[(crcVar >> 24) ^ ((uint8_t)cha)]; \ +} + + +/*-- States and modes for compression. --*/ + +#define BZ_M_IDLE 1 +#define BZ_M_RUNNING 2 +#define BZ_M_FLUSHING 3 +#define BZ_M_FINISHING 4 + +#define BZ_S_OUTPUT 1 +#define BZ_S_INPUT 2 + +#define BZ_N_RADIX 2 +#define BZ_N_QSORT 12 +#define BZ_N_SHELL 18 +#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) + + +/*-- Structure holding all the compression-side stuff. --*/ + +typedef struct EState { + /* pointer back to the struct bz_stream */ + bz_stream *strm; + + /* mode this stream is in, and whether inputting */ + /* or outputting data */ + int32_t mode; + int32_t state; + + /* remembers avail_in when flush/finish requested */ +/* bbox: not needed, strm->avail_in always has the same value */ +/* commented out with '//#' throughout the code */ + /* uint32_t avail_in_expect; */ + + /* for doing the block sorting */ + int32_t origPtr; + uint32_t *arr1; + uint32_t *arr2; + uint32_t *ftab; + + /* aliases for arr1 and arr2 */ + uint32_t *ptr; + uint8_t *block; + uint16_t *mtfv; + uint8_t *zbits; + + /* guess what */ + uint32_t *crc32table; + + /* run-length-encoding of the input */ + uint32_t state_in_ch; + int32_t state_in_len; + + /* input and output limits and current posns */ + int32_t nblock; + int32_t nblockMAX; + int32_t numZ; + int32_t state_out_pos; + + /* the buffer for bit stream creation */ + uint32_t bsBuff; + int32_t bsLive; + + /* block and combined CRCs */ + uint32_t blockCRC; + uint32_t combinedCRC; + + /* misc administratium */ + int32_t blockNo; + int32_t blockSize100k; + + /* stuff for coding the MTF values */ + int32_t nMTF; + + /* map of bytes used in block */ + int32_t nInUse; + Bool inUse[256] ALIGNED(sizeof(long)); + uint8_t unseqToSeq[256]; + + /* stuff for coding the MTF values */ + int32_t mtfFreq [BZ_MAX_ALPHA_SIZE]; + uint8_t selector [BZ_MAX_SELECTORS]; + uint8_t selectorMtf[BZ_MAX_SELECTORS]; + + uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + + /* stack-saving measures: these can be local, but they are too big */ + int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 + /* second dimension: only 3 needed; 4 makes index calculations faster */ + uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4]; +#endif + int32_t BZ2_hbMakeCodeLengths__heap [BZ_MAX_ALPHA_SIZE + 2]; + int32_t BZ2_hbMakeCodeLengths__weight[BZ_MAX_ALPHA_SIZE * 2]; + int32_t BZ2_hbMakeCodeLengths__parent[BZ_MAX_ALPHA_SIZE * 2]; + + int32_t mainSort__runningOrder[256]; + int32_t mainSort__copyStart[256]; + int32_t mainSort__copyEnd[256]; +} EState; + + +/*-- compression. --*/ + +static void +BZ2_blockSort(EState*); + +static void +BZ2_compressBlock(EState*, int); + +static void +BZ2_bsInitWrite(EState*); + +static void +BZ2_hbAssignCodes(int32_t*, uint8_t*, int32_t, int32_t, int32_t); + +static void +BZ2_hbMakeCodeLengths(EState*, uint8_t*, int32_t*, int32_t, int32_t); + +/*-------------------------------------------------------------*/ +/*--- end bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/compress.c b/archival/bz/compress.c new file mode 100644 index 0000000..640b887 --- /dev/null +++ b/archival/bz/compress.c @@ -0,0 +1,687 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Compression machinery (not incl block sorting) ---*/ +/*--- compress.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* CHANGES + * 0.9.0 -- original version. + * 0.9.0a/b -- no changes in this file. + * 0.9.0c -- changed setting of nGroups in sendMTFValues() + * so as to do a bit better on small files +*/ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +/*--- Bit stream I/O ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void BZ2_bsInitWrite(EState* s) +{ + s->bsLive = 0; + s->bsBuff = 0; +} + + +/*---------------------------------------------------*/ +static NOINLINE +void bsFinishWrite(EState* s) +{ + while (s->bsLive > 0) { + s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } +} + + +/*---------------------------------------------------*/ +static +/* Helps only on level 5, on other levels hurts. ? */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 +ALWAYS_INLINE +#endif +void bsW(EState* s, int32_t n, uint32_t v) +{ + while (s->bsLive >= 8) { + s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } + s->bsBuff |= (v << (32 - s->bsLive - n)); + s->bsLive += n; +} + + +/*---------------------------------------------------*/ +static +void bsPutU32(EState* s, unsigned u) +{ + bsW(s, 8, (u >> 24) & 0xff); + bsW(s, 8, (u >> 16) & 0xff); + bsW(s, 8, (u >> 8) & 0xff); + bsW(s, 8, u & 0xff); +} + + +/*---------------------------------------------------*/ +static +void bsPutU16(EState* s, unsigned u) +{ + bsW(s, 8, (u >> 8) & 0xff); + bsW(s, 8, u & 0xff); +} + + +/*---------------------------------------------------*/ +/*--- The back end proper ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void makeMaps_e(EState* s) +{ + int i; + s->nInUse = 0; + for (i = 0; i < 256; i++) { + if (s->inUse[i]) { + s->unseqToSeq[i] = s->nInUse; + s->nInUse++; + } + } +} + + +/*---------------------------------------------------*/ +static NOINLINE +void generateMTFValues(EState* s) +{ + uint8_t yy[256]; + int32_t i, j; + int32_t zPend; + int32_t wr; + int32_t EOB; + + /* + * After sorting (eg, here), + * s->arr1[0 .. s->nblock-1] holds sorted order, + * and + * ((uint8_t*)s->arr2)[0 .. s->nblock-1] + * holds the original block data. + * + * The first thing to do is generate the MTF values, + * and put them in + * ((uint16_t*)s->arr1)[0 .. s->nblock-1]. + * Because there are strictly fewer or equal MTF values + * than block values, ptr values in this area are overwritten + * with MTF values only when they are no longer needed. + * + * The final compressed bitstream is generated into the + * area starting at + * &((uint8_t*)s->arr2)[s->nblock] + * + * These storage aliases are set up in bzCompressInit(), + * except for the last one, which is arranged in + * compressBlock(). + */ + uint32_t* ptr = s->ptr; + uint8_t* block = s->block; + uint16_t* mtfv = s->mtfv; + + makeMaps_e(s); + EOB = s->nInUse+1; + + for (i = 0; i <= EOB; i++) + s->mtfFreq[i] = 0; + + wr = 0; + zPend = 0; + for (i = 0; i < s->nInUse; i++) + yy[i] = (uint8_t) i; + + for (i = 0; i < s->nblock; i++) { + uint8_t ll_i; + AssertD(wr <= i, "generateMTFValues(1)"); + j = ptr[i] - 1; + if (j < 0) + j += s->nblock; + ll_i = s->unseqToSeq[block[j]]; + AssertD(ll_i < s->nInUse, "generateMTFValues(2a)"); + + if (yy[0] == ll_i) { + zPend++; + } else { + if (zPend > 0) { + zPend--; + while (1) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (uint32_t)(zPend - 2) / 2; + /* bbox: unsigned div is easier */ + }; + zPend = 0; + } + { + register uint8_t rtmp; + register uint8_t* ryy_j; + register uint8_t rll_i; + rtmp = yy[1]; + yy[1] = yy[0]; + ryy_j = &(yy[1]); + rll_i = ll_i; + while (rll_i != rtmp) { + register uint8_t rtmp2; + ryy_j++; + rtmp2 = rtmp; + rtmp = *ryy_j; + *ryy_j = rtmp2; + }; + yy[0] = rtmp; + j = ryy_j - &(yy[0]); + mtfv[wr] = j+1; + wr++; + s->mtfFreq[j+1]++; + } + + } + } + + if (zPend > 0) { + zPend--; + while (1) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; + wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; + wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) + break; + zPend = (uint32_t)(zPend - 2) / 2; + /* bbox: unsigned div is easier */ + }; + zPend = 0; + } + + mtfv[wr] = EOB; + wr++; + s->mtfFreq[EOB]++; + + s->nMTF = wr; +} + + +/*---------------------------------------------------*/ +#define BZ_LESSER_ICOST 0 +#define BZ_GREATER_ICOST 15 + +static NOINLINE +void sendMTFValues(EState* s) +{ + int32_t v, t, i, j, gs, ge, totc, bt, bc, iter; + int32_t nSelectors, alphaSize, minLen, maxLen, selCtr; + int32_t nGroups, nBytes; + + /* + * uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * is a global since the decoder also needs it. + * + * int32_t code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * int32_t rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * are also globals only used in this proc. + * Made global to keep stack frame size small. + */ +#define code sendMTFValues__code +#define rfreq sendMTFValues__rfreq +#define len_pack sendMTFValues__len_pack + + uint16_t cost[BZ_N_GROUPS]; + int32_t fave[BZ_N_GROUPS]; + + uint16_t* mtfv = s->mtfv; + + alphaSize = s->nInUse + 2; + for (t = 0; t < BZ_N_GROUPS; t++) + for (v = 0; v < alphaSize; v++) + s->len[t][v] = BZ_GREATER_ICOST; + + /*--- Decide how many coding tables to use ---*/ + AssertH(s->nMTF > 0, 3001); + if (s->nMTF < 200) nGroups = 2; else + if (s->nMTF < 600) nGroups = 3; else + if (s->nMTF < 1200) nGroups = 4; else + if (s->nMTF < 2400) nGroups = 5; else + nGroups = 6; + + /*--- Generate an initial set of coding tables ---*/ + { + int32_t nPart, remF, tFreq, aFreq; + + nPart = nGroups; + remF = s->nMTF; + gs = 0; + while (nPart > 0) { + tFreq = remF / nPart; + ge = gs - 1; + aFreq = 0; + while (aFreq < tFreq && ge < alphaSize-1) { + ge++; + aFreq += s->mtfFreq[ge]; + } + + if (ge > gs + && nPart != nGroups && nPart != 1 + && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */ + ) { + aFreq -= s->mtfFreq[ge]; + ge--; + } + + for (v = 0; v < alphaSize; v++) + if (v >= gs && v <= ge) + s->len[nPart-1][v] = BZ_LESSER_ICOST; + else + s->len[nPart-1][v] = BZ_GREATER_ICOST; + + nPart--; + gs = ge + 1; + remF -= aFreq; + } + } + + /* + * Iterate up to BZ_N_ITERS times to improve the tables. + */ + for (iter = 0; iter < BZ_N_ITERS; iter++) { + for (t = 0; t < nGroups; t++) + fave[t] = 0; + + for (t = 0; t < nGroups; t++) + for (v = 0; v < alphaSize; v++) + s->rfreq[t][v] = 0; + +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 + /* + * Set up an auxiliary length table which is used to fast-track + * the common case (nGroups == 6). + */ + if (nGroups == 6) { + for (v = 0; v < alphaSize; v++) { + s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; + s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; + s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; + } + } +#endif + nSelectors = 0; + totc = 0; + gs = 0; + while (1) { + /*--- Set group start & end marks. --*/ + if (gs >= s->nMTF) + break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) + ge = s->nMTF-1; + + /* + * Calculate the cost of this group as coded + * by each of the coding tables. + */ + for (t = 0; t < nGroups; t++) + cost[t] = 0; +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + register uint32_t cost01, cost23, cost45; + register uint16_t icv; + cost01 = cost23 = cost45 = 0; +#define BZ_ITER(nn) \ + icv = mtfv[gs+(nn)]; \ + cost01 += s->len_pack[icv][0]; \ + cost23 += s->len_pack[icv][1]; \ + cost45 += s->len_pack[icv][2]; + BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); + BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); + BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); + BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); + BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); + BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); + BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); + BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); + BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); + BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); +#undef BZ_ITER + cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; + cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; + cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; + + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + uint16_t icv = mtfv[i]; + for (t = 0; t < nGroups; t++) + cost[t] += s->len[t][icv]; + } + } + /* + * Find the coding table which is best for this group, + * and record its identity in the selector table. + */ + /*bc = 999999999;*/ + /*bt = -1;*/ + bc = cost[0]; + bt = 0; + for (t = 1 /*0*/; t < nGroups; t++) { + if (cost[t] < bc) { + bc = cost[t]; + bt = t; + } + } + totc += bc; + fave[bt]++; + s->selector[nSelectors] = bt; + nSelectors++; + + /* + * Increment the symbol frequencies for the selected table. + */ +/* 1% faster compress. +800 bytes */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 4 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ +#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++ + BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); + BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); + BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); + BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); + BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); + BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); + BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); + BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); + BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); + BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); +#undef BZ_ITUR + gs = ge + 1; + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + while (gs <= ge) { + s->rfreq[bt][mtfv[gs]]++; + gs++; + } + /* already is: gs = ge + 1; */ + } + } + + /* + * Recompute the tables based on the accumulated frequencies. + */ + /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See + * comment in huffman.c for details. */ + for (t = 0; t < nGroups; t++) + BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/); + } + + AssertH(nGroups < 8, 3002); + AssertH(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3003); + + /*--- Compute MTF values for the selectors. ---*/ + { + uint8_t pos[BZ_N_GROUPS], ll_i, tmp2, tmp; + + for (i = 0; i < nGroups; i++) + pos[i] = i; + for (i = 0; i < nSelectors; i++) { + ll_i = s->selector[i]; + j = 0; + tmp = pos[j]; + while (ll_i != tmp) { + j++; + tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + }; + pos[0] = tmp; + s->selectorMtf[i] = j; + } + }; + + /*--- Assign actual codes for the tables. --*/ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; + if (s->len[t][i] < minLen) minLen = s->len[t][i]; + } + AssertH(!(maxLen > 17 /*20*/), 3004); + AssertH(!(minLen < 1), 3005); + BZ2_hbAssignCodes(&(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize); + } + + /*--- Transmit the mapping table. ---*/ + { + /* bbox: optimized a bit more than in bzip2 */ + int inUse16 = 0; + for (i = 0; i < 16; i++) { + if (sizeof(long) <= 4) { + inUse16 = inUse16*2 + + ((*(uint32_t*)&(s->inUse[i * 16 + 0]) + | *(uint32_t*)&(s->inUse[i * 16 + 4]) + | *(uint32_t*)&(s->inUse[i * 16 + 8]) + | *(uint32_t*)&(s->inUse[i * 16 + 12])) != 0); + } else { /* Our CPU can do better */ + inUse16 = inUse16*2 + + ((*(uint64_t*)&(s->inUse[i * 16 + 0]) + | *(uint64_t*)&(s->inUse[i * 16 + 8])) != 0); + } + } + + nBytes = s->numZ; + bsW(s, 16, inUse16); + + inUse16 <<= (sizeof(int)*8 - 16); /* move 15th bit into sign bit */ + for (i = 0; i < 16; i++) { + if (inUse16 < 0) { + unsigned v16 = 0; + for (j = 0; j < 16; j++) + v16 = v16*2 + s->inUse[i * 16 + j]; + bsW(s, 16, v16); + } + inUse16 <<= 1; + } + } + + /*--- Now the selectors. ---*/ + nBytes = s->numZ; + bsW(s, 3, nGroups); + bsW(s, 15, nSelectors); + for (i = 0; i < nSelectors; i++) { + for (j = 0; j < s->selectorMtf[i]; j++) + bsW(s, 1, 1); + bsW(s, 1, 0); + } + + /*--- Now the coding tables. ---*/ + nBytes = s->numZ; + + for (t = 0; t < nGroups; t++) { + int32_t curr = s->len[t][0]; + bsW(s, 5, curr); + for (i = 0; i < alphaSize; i++) { + while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ }; + while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ }; + bsW(s, 1, 0); + } + } + + /*--- And finally, the block data proper ---*/ + nBytes = s->numZ; + selCtr = 0; + gs = 0; + while (1) { + if (gs >= s->nMTF) + break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) + ge = s->nMTF-1; + AssertH(s->selector[selCtr] < nGroups, 3006); + +/* Costs 1300 bytes and is _slower_ (on Intel Core 2) */ +#if 0 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + uint16_t mtfv_i; + uint8_t* s_len_sel_selCtr = &(s->len[s->selector[selCtr]][0]); + int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]); +#define BZ_ITAH(nn) \ + mtfv_i = mtfv[gs+(nn)]; \ + bsW(s, s_len_sel_selCtr[mtfv_i], s_code_sel_selCtr[mtfv_i]) + BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); + BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); + BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); + BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); + BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); + BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); + BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); + BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); + BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); + BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); +#undef BZ_ITAH + gs = ge+1; + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + /* code is bit bigger, but moves multiply out of the loop */ + uint8_t* s_len_sel_selCtr = &(s->len [s->selector[selCtr]][0]); + int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]); + while (gs <= ge) { + bsW(s, + s_len_sel_selCtr[mtfv[gs]], + s_code_sel_selCtr[mtfv[gs]] + ); + gs++; + } + /* already is: gs = ge+1; */ + } + selCtr++; + } + AssertH(selCtr == nSelectors, 3007); +#undef code +#undef rfreq +#undef len_pack +} + + +/*---------------------------------------------------*/ +static +void BZ2_compressBlock(EState* s, int is_last_block) +{ + if (s->nblock > 0) { + BZ_FINALISE_CRC(s->blockCRC); + s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); + s->combinedCRC ^= s->blockCRC; + if (s->blockNo > 1) + s->numZ = 0; + + BZ2_blockSort(s); + } + + s->zbits = &((uint8_t*)s->arr2)[s->nblock]; + + /*-- If this is the first block, create the stream header. --*/ + if (s->blockNo == 1) { + BZ2_bsInitWrite(s); + /*bsPutU8(s, BZ_HDR_B);*/ + /*bsPutU8(s, BZ_HDR_Z);*/ + /*bsPutU8(s, BZ_HDR_h);*/ + /*bsPutU8(s, BZ_HDR_0 + s->blockSize100k);*/ + bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k); + } + + if (s->nblock > 0) { + /*bsPutU8(s, 0x31);*/ + /*bsPutU8(s, 0x41);*/ + /*bsPutU8(s, 0x59);*/ + /*bsPutU8(s, 0x26);*/ + bsPutU32(s, 0x31415926); + /*bsPutU8(s, 0x53);*/ + /*bsPutU8(s, 0x59);*/ + bsPutU16(s, 0x5359); + + /*-- Now the block's CRC, so it is in a known place. --*/ + bsPutU32(s, s->blockCRC); + + /* + * Now a single bit indicating (non-)randomisation. + * As of version 0.9.5, we use a better sorting algorithm + * which makes randomisation unnecessary. So always set + * the randomised bit to 'no'. Of course, the decoder + * still needs to be able to handle randomised blocks + * so as to maintain backwards compatibility with + * older versions of bzip2. + */ + bsW(s, 1, 0); + + bsW(s, 24, s->origPtr); + generateMTFValues(s); + sendMTFValues(s); + } + + /*-- If this is the last block, add the stream trailer. --*/ + if (is_last_block) { + /*bsPutU8(s, 0x17);*/ + /*bsPutU8(s, 0x72);*/ + /*bsPutU8(s, 0x45);*/ + /*bsPutU8(s, 0x38);*/ + bsPutU32(s, 0x17724538); + /*bsPutU8(s, 0x50);*/ + /*bsPutU8(s, 0x90);*/ + bsPutU16(s, 0x5090); + bsPutU32(s, s->combinedCRC); + bsFinishWrite(s); + } +} + + +/*-------------------------------------------------------------*/ +/*--- end compress.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bz/huffman.c b/archival/bz/huffman.c new file mode 100644 index 0000000..676b1af --- /dev/null +++ b/archival/bz/huffman.c @@ -0,0 +1,229 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Huffman coding low-level stuff ---*/ +/*--- huffman.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) +#define DEPTHOF(zz1) ((zz1) & 0x000000ff) +#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) + +#define ADDWEIGHTS(zw1,zw2) \ + (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ + (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) + +#define UPHEAP(z) \ +{ \ + int32_t zz, tmp; \ + zz = z; \ + tmp = heap[zz]; \ + while (weight[tmp] < weight[heap[zz >> 1]]) { \ + heap[zz] = heap[zz >> 1]; \ + zz >>= 1; \ + } \ + heap[zz] = tmp; \ +} + + +/* 90 bytes, 0.3% of overall compress speed */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 1 + +/* macro works better than inline (gcc 4.2.1) */ +#define DOWNHEAP1(heap, weight, Heap) \ +{ \ + int32_t zz, yy, tmp; \ + zz = 1; \ + tmp = heap[zz]; \ + while (1) { \ + yy = zz << 1; \ + if (yy > nHeap) \ + break; \ + if (yy < nHeap \ + && weight[heap[yy+1]] < weight[heap[yy]]) \ + yy++; \ + if (weight[tmp] < weight[heap[yy]]) \ + break; \ + heap[zz] = heap[yy]; \ + zz = yy; \ + } \ + heap[zz] = tmp; \ +} + +#else + +static +void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap) +{ + int32_t zz, yy, tmp; + zz = 1; + tmp = heap[zz]; + while (1) { + yy = zz << 1; + if (yy > nHeap) + break; + if (yy < nHeap + && weight[heap[yy + 1]] < weight[heap[yy]]) + yy++; + if (weight[tmp] < weight[heap[yy]]) + break; + heap[zz] = heap[yy]; + zz = yy; + } + heap[zz] = tmp; +} + +#endif + +/*---------------------------------------------------*/ +static +void BZ2_hbMakeCodeLengths(EState *s, + uint8_t *len, + int32_t *freq, + int32_t alphaSize, + int32_t maxLen) +{ + /* + * Nodes and heap entries run from 1. Entry 0 + * for both the heap and nodes is a sentinel. + */ + int32_t nNodes, nHeap, n1, n2, i, j, k; + Bool tooLong; + + /* bbox: moved to EState to save stack + int32_t heap [BZ_MAX_ALPHA_SIZE + 2]; + int32_t weight[BZ_MAX_ALPHA_SIZE * 2]; + int32_t parent[BZ_MAX_ALPHA_SIZE * 2]; + */ +#define heap (s->BZ2_hbMakeCodeLengths__heap) +#define weight (s->BZ2_hbMakeCodeLengths__weight) +#define parent (s->BZ2_hbMakeCodeLengths__parent) + + for (i = 0; i < alphaSize; i++) + weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + + while (1) { + nNodes = alphaSize; + nHeap = 0; + + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + UPHEAP(nHeap); + } + + AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001); + + while (nHeap > 1) { + n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap); + n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap); + nNodes++; + parent[n1] = parent[n2] = nNodes; + weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + UPHEAP(nHeap); + } + + AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002); + + tooLong = False; + for (i = 1; i <= alphaSize; i++) { + j = 0; + k = i; + while (parent[k] >= 0) { + k = parent[k]; + j++; + } + len[i-1] = j; + if (j > maxLen) + tooLong = True; + } + + if (!tooLong) + break; + + /* 17 Oct 04: keep-going condition for the following loop used + to be 'i < alphaSize', which missed the last element, + theoretically leading to the possibility of the compressor + looping. However, this count-scaling step is only needed if + one of the generated Huffman code words is longer than + maxLen, which up to and including version 1.0.2 was 20 bits, + which is extremely unlikely. In version 1.0.3 maxLen was + changed to 17 bits, which has minimal effect on compression + ratio, but does mean this scaling step is used from time to + time, enough to verify that it works. + + This means that bzip2-1.0.3 and later will only produce + Huffman codes with a maximum length of 17 bits. However, in + order to preserve backwards compatibility with bitstreams + produced by versions pre-1.0.3, the decompressor must still + handle lengths of up to 20. */ + + for (i = 1; i <= alphaSize; i++) { + j = weight[i] >> 8; + /* bbox: yes, it is a signed division. + * don't replace with shift! */ + j = 1 + (j / 2); + weight[i] = j << 8; + } + } +#undef heap +#undef weight +#undef parent +} + + +/*---------------------------------------------------*/ +static +void BZ2_hbAssignCodes(int32_t *code, + uint8_t *length, + int32_t minLen, + int32_t maxLen, + int32_t alphaSize) +{ + int32_t n, vec, i; + + vec = 0; + for (n = minLen; n <= maxLen; n++) { + for (i = 0; i < alphaSize; i++) { + if (length[i] == n) { + code[i] = vec; + vec++; + }; + } + vec <<= 1; + } +} + + +/*-------------------------------------------------------------*/ +/*--- end huffman.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/bzip2.c b/archival/bzip2.c new file mode 100644 index 0000000..8eb5ca9 --- /dev/null +++ b/archival/bzip2.c @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2007 Denys Vlasenko + * + * This file uses bzip2 library code which is written + * by Julian Seward . + * See README and LICENSE files in bz/ directory for more information + * about bzip2 library code. + */ + +#include "libbb.h" +#include "unarchive.h" + +#define CONFIG_BZIP2_FEATURE_SPEED 1 + +/* Speed test: + * Compiled with gcc 4.2.1, run on Athlon 64 1800 MHz (512K L2 cache). + * Stock bzip2 is 26.4% slower than bbox bzip2 at SPEED 1 + * (time to compress gcc-4.2.1.tar is 126.4% compared to bbox). + * At SPEED 5 difference is 32.7%. + * + * Test run of all CONFIG_BZIP2_FEATURE_SPEED values on a 11Mb text file: + * Size Time (3 runs) + * 0: 10828 4.145 4.146 4.148 + * 1: 11097 3.845 3.860 3.861 + * 2: 11392 3.763 3.767 3.768 + * 3: 11892 3.722 3.724 3.727 + * 4: 12740 3.637 3.640 3.644 + * 5: 17273 3.497 3.509 3.509 + */ + + +#define BZ_DEBUG 0 +/* Takes ~300 bytes, detects corruption caused by bad RAM etc */ +#define BZ_LIGHT_DEBUG 0 + +#include "bz/bzlib.h" + +#include "bz/bzlib_private.h" + +#include "bz/blocksort.c" +#include "bz/bzlib.c" +#include "bz/compress.c" +#include "bz/huffman.c" + +/* No point in being shy and having very small buffer here. + * bzip2 internal buffers are much bigger anyway, hundreds of kbytes. + * If iobuf is several pages long, malloc() may use mmap, + * making iobuf is page aligned and thus (maybe) have one memcpy less + * if kernel is clever enough. + */ +enum { + IOBUF_SIZE = 8 * 1024 +}; + +static uint8_t level; + +/* NB: compressStream() has to return -1 on errors, not die. + * bbunpack() will correctly clean up in this case + * (delete incomplete .bz2 file) + */ + +/* Returns: + * -1 on errors + * total written bytes so far otherwise + */ +static +USE_DESKTOP(long long) int bz_write(bz_stream *strm, void* rbuf, ssize_t rlen, void *wbuf) +{ + int n, n2, ret; + + strm->avail_in = rlen; + strm->next_in = rbuf; + while (1) { + strm->avail_out = IOBUF_SIZE; + strm->next_out = wbuf; + + ret = BZ2_bzCompress(strm, rlen ? BZ_RUN : BZ_FINISH); + if (ret != BZ_RUN_OK /* BZ_RUNning */ + && ret != BZ_FINISH_OK /* BZ_FINISHing, but not done yet */ + && ret != BZ_STREAM_END /* BZ_FINISHed */ + ) { + bb_error_msg_and_die("internal error %d", ret); + } + + n = IOBUF_SIZE - strm->avail_out; + if (n) { + n2 = full_write(STDOUT_FILENO, wbuf, n); + if (n2 != n) { + if (n2 >= 0) + errno = 0; /* prevent bogus error message */ + bb_perror_msg(n2 >= 0 ? "short write" : "write error"); + return -1; + } + } + + if (ret == BZ_STREAM_END) + break; + if (rlen && strm->avail_in == 0) + break; + } + return 0 USE_DESKTOP( + strm->total_out ); +} + +static +USE_DESKTOP(long long) int compressStream(unpack_info_t *info UNUSED_PARAM) +{ + USE_DESKTOP(long long) int total; + ssize_t count; + bz_stream bzs; /* it's small */ +#define strm (&bzs) + char *iobuf; +#define rbuf iobuf +#define wbuf (iobuf + IOBUF_SIZE) + + iobuf = xmalloc(2 * IOBUF_SIZE); + BZ2_bzCompressInit(strm, level); + + while (1) { + count = full_read(STDIN_FILENO, rbuf, IOBUF_SIZE); + if (count < 0) { + bb_perror_msg("read error"); + total = -1; + break; + } + /* if count == 0, bz_write finalizes compression */ + total = bz_write(strm, rbuf, count, wbuf); + if (count == 0 || total < 0) + break; + } + +#if ENABLE_FEATURE_CLEAN_UP + BZ2_bzCompressEnd(strm); + free(iobuf); +#endif + return total; +} + +static +char* make_new_name_bzip2(char *filename) +{ + return xasprintf("%s.bz2", filename); +} + +int bzip2_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int bzip2_main(int argc UNUSED_PARAM, char **argv) +{ + unsigned opt; + + /* standard bzip2 flags + * -d --decompress force decompression + * -z --compress force compression + * -k --keep keep (don't delete) input files + * -f --force overwrite existing output files + * -t --test test compressed file integrity + * -c --stdout output to standard out + * -q --quiet suppress noncritical error messages + * -v --verbose be verbose (a 2nd -v gives more) + * -s --small use less memory (at most 2500k) + * -1 .. -9 set block size to 100k .. 900k + * --fast alias for -1 + * --best alias for -9 + */ + + opt_complementary = "s2"; /* -s means -2 (compatibility) */ + /* Must match bbunzip's constants OPT_STDOUT, OPT_FORCE! */ + opt = getopt32(argv, "cfv" USE_BUNZIP2("dt") "123456789qzs"); +#if ENABLE_BUNZIP2 /* bunzip2_main may not be visible... */ + if (opt & 0x18) // -d and/or -t + return bunzip2_main(argc, argv); + opt >>= 5; +#else + opt >>= 3; +#endif + opt = (uint8_t)opt; /* isolate bits for -1..-8 */ + opt |= 0x100; /* if nothing else, assume -9 */ + level = 1; + while (!(opt & 1)) { + level++; + opt >>= 1; + } + + argv += optind; + option_mask32 &= 0x7; /* ignore all except -cfv */ + return bbunpack(argv, make_new_name_bzip2, compressStream); +} diff --git a/archival/cpio.c b/archival/cpio.c new file mode 100644 index 0000000..1c30d89 --- /dev/null +++ b/archival/cpio.c @@ -0,0 +1,354 @@ +/* vi: set sw=4 ts=4: */ +/* + * Mini cpio implementation for busybox + * + * Copyright (C) 2001 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + * + * Limitations: + * Doesn't check CRC's + * Only supports new ASCII and CRC formats + * + */ +#include "libbb.h" +#include "unarchive.h" + +#if ENABLE_FEATURE_CPIO_O +static off_t cpio_pad4(off_t size) +{ + int i; + + i = (- size) & 3; + size += i; + while (--i >= 0) + bb_putchar('\0'); + return size; +} + +/* Return value will become exit code. + * It's ok to exit instead of return. */ +static int cpio_o(void) +{ + static const char trailer[] ALIGN1 = "TRAILER!!!"; + struct name_s { + struct name_s *next; + char name[1]; + }; + struct inodes_s { + struct inodes_s *next; + struct name_s *names; + struct stat st; + }; + + struct inodes_s *links = NULL; + off_t bytes = 0; /* output bytes count */ + + while (1) { + const char *name; + char *line; + struct stat st; + + line = xmalloc_fgetline(stdin); + + if (line) { + /* Strip leading "./[./]..." from the filename */ + name = line; + while (name[0] == '.' && name[1] == '/') { + while (*++name == '/') + continue; + } + if (!*name) { /* line is empty */ + free(line); + continue; + } + if (lstat(name, &st)) { + abort_cpio_o: + bb_simple_perror_msg_and_die(name); + } + + if (!(S_ISLNK(st.st_mode) || S_ISREG(st.st_mode))) + st.st_size = 0; /* paranoia */ + + /* Store hardlinks for later processing, dont output them */ + if (!S_ISDIR(st.st_mode) && st.st_nlink > 1) { + struct name_s *n; + struct inodes_s *l; + + /* Do we have this hardlink remembered? */ + l = links; + while (1) { + if (l == NULL) { + /* Not found: add new item to "links" list */ + l = xzalloc(sizeof(*l)); + l->st = st; + l->next = links; + links = l; + break; + } + if (l->st.st_ino == st.st_ino) { + /* found */ + break; + } + l = l->next; + } + /* Add new name to "l->names" list */ + n = xmalloc(sizeof(*n) + strlen(name)); + strcpy(n->name, name); + n->next = l->names; + l->names = n; + + free(line); + continue; + } + + } else { /* line == NULL: EOF */ + next_link: + if (links) { + /* Output hardlink's data */ + st = links->st; + name = links->names->name; + links->names = links->names->next; + /* GNU cpio is reported to emit file data + * only for the last instance. Mimic that. */ + if (links->names == NULL) + links = links->next; + else + st.st_size = 0; + /* NB: we leak links->names and/or links, + * this is intended (we exit soon anyway) */ + } else { + /* If no (more) hardlinks to output, + * output "trailer" entry */ + name = trailer; + /* st.st_size == 0 is a must, but for uniformity + * in the output, we zero out everything */ + memset(&st, 0, sizeof(st)); + /* st.st_nlink = 1; - GNU cpio does this */ + } + } + + bytes += printf("070701" + "%08X%08X%08X%08X%08X%08X%08X" + "%08X%08X%08X%08X" /* GNU cpio uses uppercase hex */ + /* strlen+1: */ "%08X" + /* chksum: */ "00000000" /* (only for "070702" files) */ + /* name,NUL: */ "%s%c", + (unsigned)(uint32_t) st.st_ino, + (unsigned)(uint32_t) st.st_mode, + (unsigned)(uint32_t) st.st_uid, + (unsigned)(uint32_t) st.st_gid, + (unsigned)(uint32_t) st.st_nlink, + (unsigned)(uint32_t) st.st_mtime, + (unsigned)(uint32_t) st.st_size, + (unsigned)(uint32_t) major(st.st_dev), + (unsigned)(uint32_t) minor(st.st_dev), + (unsigned)(uint32_t) major(st.st_rdev), + (unsigned)(uint32_t) minor(st.st_rdev), + (unsigned)(strlen(name) + 1), + name, '\0'); + bytes = cpio_pad4(bytes); + + if (st.st_size) { + if (S_ISLNK(st.st_mode)) { + char *lpath = xmalloc_readlink_or_warn(name); + if (!lpath) + goto abort_cpio_o; + bytes += printf("%s", lpath); + free(lpath); + } else { /* S_ISREG */ + int fd = xopen(name, O_RDONLY); + fflush(stdout); + /* We must abort if file got shorter too! */ + bb_copyfd_exact_size(fd, STDOUT_FILENO, st.st_size); + bytes += st.st_size; + close(fd); + } + bytes = cpio_pad4(bytes); + } + + if (!line) { + if (name != trailer) + goto next_link; + /* TODO: GNU cpio pads trailer to 512 bytes, do we want that? */ + return EXIT_SUCCESS; + } + + free(line); + } /* end of "while (1)" */ +} +#endif + +/* GNU cpio 2.9 --help (abridged): + + Modes: + -i, --extract Extract files from an archive + -o, --create Create the archive + -p, --pass-through Copy-pass mode [was ist das?!] + -t, --list List the archive + + Options valid in any mode: + --block-size=SIZE I/O block size = SIZE * 512 bytes + -B I/O block size = 5120 bytes + -c Use the old portable (ASCII) archive format + -C, --io-size=NUMBER I/O block size in bytes + -f, --nonmatching Only copy files that do not match given pattern + -F, --file=FILE Use FILE instead of standard input or output + -H, --format=FORMAT Use given archive FORMAT + -M, --message=STRING Print STRING when the end of a volume of the + backup media is reached + -n, --numeric-uid-gid If -v, show numeric UID and GID + --quiet Do not print the number of blocks copied + --rsh-command=COMMAND Use remote COMMAND instead of rsh + -v, --verbose Verbosely list the files processed + -V, --dot Print a "." for each file processed + -W, --warning=FLAG Control warning display: 'none','truncate','all'; + multiple options accumulate + + Options valid only in --extract mode: + -b, --swap Swap both halfwords of words and bytes of + halfwords in the data (equivalent to -sS) + -r, --rename Interactively rename files + -s, --swap-bytes Swap the bytes of each halfword in the files + -S, --swap-halfwords Swap the halfwords of each word (4 bytes) + --to-stdout Extract files to standard output + -E, --pattern-file=FILE Read additional patterns specifying filenames to + extract or list from FILE + --only-verify-crc Verify CRC's, don't actually extract the files + + Options valid only in --create mode: + -A, --append Append to an existing archive + -O FILE File to use instead of standard output + + Options valid only in --pass-through mode: + -l, --link Link files instead of copying them, when possible + + Options valid in --extract and --create modes: + --absolute-filenames Do not strip file system prefix components from + the file names + --no-absolute-filenames Create all files relative to the current dir + + Options valid in --create and --pass-through modes: + -0, --null A list of filenames is terminated by a NUL + -a, --reset-access-time Reset the access times of files after reading them + -I FILE File to use instead of standard input + -L, --dereference Dereference symbolic links (copy the files + that they point to instead of copying the links) + -R, --owner=[USER][:.][GROUP] Set owner of created files + + Options valid in --extract and --pass-through modes: + -d, --make-directories Create leading directories where needed + -m, --preserve-modification-time Retain mtime when creating files + --no-preserve-owner Do not change the ownership of the files + --sparse Write files with blocks of zeros as sparse files + -u, --unconditional Replace all files unconditionally + */ + +int cpio_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int cpio_main(int argc UNUSED_PARAM, char **argv) +{ + archive_handle_t *archive_handle; + char *cpio_filename; + USE_FEATURE_CPIO_O(const char *cpio_fmt = "";) + unsigned opt; + enum { + CPIO_OPT_EXTRACT = (1 << 0), + CPIO_OPT_TEST = (1 << 1), + CPIO_OPT_UNCONDITIONAL = (1 << 2), + CPIO_OPT_VERBOSE = (1 << 3), + CPIO_OPT_FILE = (1 << 4), + CPIO_OPT_CREATE_LEADING_DIR = (1 << 5), + CPIO_OPT_PRESERVE_MTIME = (1 << 6), + CPIO_OPT_CREATE = (1 << 7), + CPIO_OPT_FORMAT = (1 << 8), + }; + +#if ENABLE_GETOPT_LONG + applet_long_options = + "extract\0" No_argument "i" + "list\0" No_argument "t" +#if ENABLE_FEATURE_CPIO_O + "create\0" No_argument "o" + "format\0" Required_argument "H" +#endif + ; +#endif + + /* Initialize */ + archive_handle = init_handle(); + archive_handle->src_fd = STDIN_FILENO; + archive_handle->seek = seek_by_read; + archive_handle->ah_flags = ARCHIVE_EXTRACT_NEWER; + +#if ENABLE_FEATURE_CPIO_O + opt = getopt32(argv, "ituvF:dmoH:", &cpio_filename, &cpio_fmt); + + if (opt & CPIO_OPT_CREATE) { + if (*cpio_fmt != 'n') + bb_show_usage(); + if (opt & CPIO_OPT_FILE) { + fclose(stdout); + stdout = fopen_for_write(cpio_filename); + /* Paranoia: I don't trust libc that much */ + xdup2(fileno(stdout), STDOUT_FILENO); + } + return cpio_o(); + } +#else + opt = getopt32(argv, "ituvF:dm", &cpio_filename); +#endif + argv += optind; + + /* One of either extract or test options must be given */ + if ((opt & (CPIO_OPT_TEST | CPIO_OPT_EXTRACT)) == 0) { + bb_show_usage(); + } + + if (opt & CPIO_OPT_TEST) { + /* if both extract and test options are given, ignore extract option */ + if (opt & CPIO_OPT_EXTRACT) { + opt &= ~CPIO_OPT_EXTRACT; + } + archive_handle->action_header = header_list; + } + if (opt & CPIO_OPT_EXTRACT) { + archive_handle->action_data = data_extract_all; + } + if (opt & CPIO_OPT_UNCONDITIONAL) { + archive_handle->ah_flags |= ARCHIVE_EXTRACT_UNCONDITIONAL; + archive_handle->ah_flags &= ~ARCHIVE_EXTRACT_NEWER; + } + if (opt & CPIO_OPT_VERBOSE) { + if (archive_handle->action_header == header_list) { + archive_handle->action_header = header_verbose_list; + } else { + archive_handle->action_header = header_list; + } + } + if (opt & CPIO_OPT_FILE) { /* -F */ + archive_handle->src_fd = xopen(cpio_filename, O_RDONLY); + archive_handle->seek = seek_by_jump; + } + if (opt & CPIO_OPT_CREATE_LEADING_DIR) { + archive_handle->ah_flags |= ARCHIVE_CREATE_LEADING_DIRS; + } + if (opt & CPIO_OPT_PRESERVE_MTIME) { + archive_handle->ah_flags |= ARCHIVE_PRESERVE_DATE; + } + + while (*argv) { + archive_handle->filter = filter_accept_list; + llist_add_to(&(archive_handle->accept), *argv); + argv++; + } + + /* see get_header_cpio */ + archive_handle->ah_priv[2] = (void*) ~(ptrdiff_t)0; + while (get_header_cpio(archive_handle) == EXIT_SUCCESS) + continue; + + if (archive_handle->ah_priv[2] != (void*) ~(ptrdiff_t)0) + printf("%lu blocks\n", (unsigned long)(ptrdiff_t)(archive_handle->ah_priv[2])); + + return EXIT_SUCCESS; +} diff --git a/archival/dpkg.c b/archival/dpkg.c new file mode 100644 index 0000000..1bc8d27 --- /dev/null +++ b/archival/dpkg.c @@ -0,0 +1,1781 @@ +/* vi: set sw=4 ts=4: */ +/* + * mini dpkg implementation for busybox. + * this is not meant as a replacement for dpkg + * + * written by glenn mcgrath with the help of others + * copyright (c) 2001 by glenn mcgrath + * + * started life as a busybox implementation of udpkg + * + * licensed under gplv2 or later, see file license in this tarball for details. + */ + +/* + * known difference between busybox dpkg and the official dpkg that i don't + * consider important, its worth keeping a note of differences anyway, just to + * make it easier to maintain. + * - the first value for the confflile: field isnt placed on a new line. + * - when installing a package the status: field is placed at the end of the + * section, rather than just after the package: field. + * + * bugs that need to be fixed + * - (unknown, please let me know when you find any) + * + */ + +#include "libbb.h" +#include +#include "unarchive.h" + +/* note: if you vary hash_prime sizes be aware, + * 1) tweaking these will have a big effect on how much memory this program uses. + * 2) for computational efficiency these hash tables should be at least 20% + * larger than the maximum number of elements stored in it. + * 3) all _hash_prime's must be a prime number or chaos is assured, if your looking + * for a prime, try http://www.utm.edu/research/primes/lists/small/10000.txt + * 4) if you go bigger than 15 bits you may get into trouble (untested) as its + * sometimes cast to an unsigned, if you go to 16 bit you will overlap + * int's and chaos is assured, 16381 is the max prime for 14 bit field + */ + +/* NAME_HASH_PRIME, Stores package names and versions, + * I estimate it should be at least 50% bigger than PACKAGE_HASH_PRIME, + * as there a lot of duplicate version numbers */ +#define NAME_HASH_PRIME 16381 + +/* PACKAGE_HASH_PRIME, Maximum number of unique packages, + * It must not be smaller than STATUS_HASH_PRIME, + * Currently only packages from status_hashtable are stored in here, but in + * future this may be used to store packages not only from a status file, + * but an available_hashtable, and even multiple packages files. + * Package can be stored more than once if they have different versions. + * e.g. The same package may have different versions in the status file + * and available file */ +#define PACKAGE_HASH_PRIME 10007 +typedef struct edge_s { + unsigned operator:4; /* was:3 */ + unsigned type:4; + unsigned name:16; /* was:14 */ + unsigned version:16; /* was:14 */ +} edge_t; + +typedef struct common_node_s { + unsigned name:16; /* was:14 */ + unsigned version:16; /* was:14 */ + unsigned num_of_edges:16; /* was:14 */ + edge_t **edge; +} common_node_t; + +/* Currently it doesnt store packages that have state-status of not-installed + * So it only really has to be the size of the maximum number of packages + * likely to be installed at any one time, so there is a bit of leeway here */ +#define STATUS_HASH_PRIME 8191 +typedef struct status_node_s { + unsigned package:16; /* was:14 */ /* has to fit PACKAGE_HASH_PRIME */ + unsigned status:16; /* was:14 */ /* has to fit STATUS_HASH_PRIME */ +} status_node_t; + + +/* Globals */ +struct globals { + char *name_hashtable[NAME_HASH_PRIME + 1]; + common_node_t *package_hashtable[PACKAGE_HASH_PRIME + 1]; + status_node_t *status_hashtable[STATUS_HASH_PRIME + 1]; +}; +#define G (*ptr_to_globals) +#define name_hashtable (G.name_hashtable ) +#define package_hashtable (G.package_hashtable) +#define status_hashtable (G.status_hashtable ) +#define INIT_G() do { \ + SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \ +} while (0) + + +/* Even numbers are for 'extras', like ored dependencies or null */ +enum edge_type_e { + EDGE_NULL = 0, + EDGE_PRE_DEPENDS = 1, + EDGE_OR_PRE_DEPENDS = 2, + EDGE_DEPENDS = 3, + EDGE_OR_DEPENDS = 4, + EDGE_REPLACES = 5, + EDGE_PROVIDES = 7, + EDGE_CONFLICTS = 9, + EDGE_SUGGESTS = 11, + EDGE_RECOMMENDS = 13, + EDGE_ENHANCES = 15 +}; +enum operator_e { + VER_NULL = 0, + VER_EQUAL = 1, + VER_LESS = 2, + VER_LESS_EQUAL = 3, + VER_MORE = 4, + VER_MORE_EQUAL = 5, + VER_ANY = 6 +}; + +typedef struct deb_file_s { + char *control_file; + char *filename; + unsigned package:16; /* was:14 */ +} deb_file_t; + + +static void make_hash(const char *key, unsigned *start, unsigned *decrement, const int hash_prime) +{ + unsigned long hash_num = key[0]; + int len = strlen(key); + int i; + + /* Maybe i should have uses a "proper" hashing algorithm here instead + * of making one up myself, seems to be working ok though. */ + for (i = 1; i < len; i++) { + /* shifts the ascii based value and adds it to previous value + * shift amount is mod 24 because long int is 32 bit and data + * to be shifted is 8, don't want to shift data to where it has + * no effect*/ + hash_num += (key[i] + key[i-1]) << ((key[i] * i) % 24); + } + *start = (unsigned) hash_num % hash_prime; + *decrement = (unsigned) 1 + (hash_num % (hash_prime - 1)); +} + +/* this adds the key to the hash table */ +static int search_name_hashtable(const char *key) +{ + unsigned probe_address; + unsigned probe_decrement; + + make_hash(key, &probe_address, &probe_decrement, NAME_HASH_PRIME); + while (name_hashtable[probe_address] != NULL) { + if (strcmp(name_hashtable[probe_address], key) == 0) { + return probe_address; + } + probe_address -= probe_decrement; + if ((int)probe_address < 0) { + probe_address += NAME_HASH_PRIME; + } + } + name_hashtable[probe_address] = xstrdup(key); + return probe_address; +} + +/* this DOESNT add the key to the hashtable + * TODO make it consistent with search_name_hashtable + */ +static unsigned search_status_hashtable(const char *key) +{ + unsigned probe_address; + unsigned probe_decrement; + + make_hash(key, &probe_address, &probe_decrement, STATUS_HASH_PRIME); + while (status_hashtable[probe_address] != NULL) { + if (strcmp(key, name_hashtable[package_hashtable[status_hashtable[probe_address]->package]->name]) == 0) { + break; + } + probe_address -= probe_decrement; + if ((int)probe_address < 0) { + probe_address += STATUS_HASH_PRIME; + } + } + return probe_address; +} + +/* Need to rethink version comparison, maybe the official dpkg has something i can use ? */ +static int version_compare_part(const char *version1, const char *version2) +{ + int upstream_len1 = 0; + int upstream_len2 = 0; + char *name1_char; + char *name2_char; + int len1 = 0; + int len2 = 0; + int tmp_int; + int ver_num1; + int ver_num2; + + if (version1 == NULL) { + version1 = xstrdup(""); + } + if (version2 == NULL) { + version2 = xstrdup(""); + } + upstream_len1 = strlen(version1); + upstream_len2 = strlen(version2); + + while ((len1 < upstream_len1) || (len2 < upstream_len2)) { + /* Compare non-digit section */ + tmp_int = strcspn(&version1[len1], "0123456789"); + name1_char = xstrndup(&version1[len1], tmp_int); + len1 += tmp_int; + tmp_int = strcspn(&version2[len2], "0123456789"); + name2_char = xstrndup(&version2[len2], tmp_int); + len2 += tmp_int; + tmp_int = strcmp(name1_char, name2_char); + free(name1_char); + free(name2_char); + if (tmp_int != 0) { + return tmp_int; + } + + /* Compare digits */ + tmp_int = strspn(&version1[len1], "0123456789"); + name1_char = xstrndup(&version1[len1], tmp_int); + len1 += tmp_int; + tmp_int = strspn(&version2[len2], "0123456789"); + name2_char = xstrndup(&version2[len2], tmp_int); + len2 += tmp_int; + ver_num1 = atoi(name1_char); + ver_num2 = atoi(name2_char); + free(name1_char); + free(name2_char); + if (ver_num1 < ver_num2) { + return -1; + } + if (ver_num1 > ver_num2) { + return 1; + } + } + return 0; +} + +/* if ver1 < ver2 return -1, + * if ver1 = ver2 return 0, + * if ver1 > ver2 return 1, + */ +static int version_compare(const unsigned ver1, const unsigned ver2) +{ + char *ch_ver1 = name_hashtable[ver1]; + char *ch_ver2 = name_hashtable[ver2]; + + char epoch1, epoch2; + char *deb_ver1, *deb_ver2; + char *ver1_ptr, *ver2_ptr; + char *upstream_ver1; + char *upstream_ver2; + int result; + + /* Compare epoch */ + if (ch_ver1[1] == ':') { + epoch1 = ch_ver1[0]; + ver1_ptr = strchr(ch_ver1, ':') + 1; + } else { + epoch1 = '0'; + ver1_ptr = ch_ver1; + } + if (ch_ver2[1] == ':') { + epoch2 = ch_ver2[0]; + ver2_ptr = strchr(ch_ver2, ':') + 1; + } else { + epoch2 = '0'; + ver2_ptr = ch_ver2; + } + if (epoch1 < epoch2) { + return -1; + } + else if (epoch1 > epoch2) { + return 1; + } + + /* Compare upstream version */ + upstream_ver1 = xstrdup(ver1_ptr); + upstream_ver2 = xstrdup(ver2_ptr); + + /* Chop off debian version, and store for later use */ + deb_ver1 = strrchr(upstream_ver1, '-'); + deb_ver2 = strrchr(upstream_ver2, '-'); + if (deb_ver1) { + deb_ver1[0] = '\0'; + deb_ver1++; + } + if (deb_ver2) { + deb_ver2[0] = '\0'; + deb_ver2++; + } + result = version_compare_part(upstream_ver1, upstream_ver2); + if (!result) + /* Compare debian versions */ + result = version_compare_part(deb_ver1, deb_ver2); + + free(upstream_ver1); + free(upstream_ver2); + return result; +} + +static int test_version(const unsigned version1, const unsigned version2, const unsigned operator) +{ + const int version_result = version_compare(version1, version2); + switch (operator) { + case VER_ANY: + return TRUE; + case VER_EQUAL: + return (version_result == 0); + case VER_LESS: + return (version_result < 0); + case VER_LESS_EQUAL: + return (version_result <= 0); + case VER_MORE: + return (version_result > 0); + case VER_MORE_EQUAL: + return (version_result >= 0); + } + return FALSE; +} + +static int search_package_hashtable(const unsigned name, const unsigned version, const unsigned operator) +{ + unsigned probe_address; + unsigned probe_decrement; + + make_hash(name_hashtable[name], &probe_address, &probe_decrement, PACKAGE_HASH_PRIME); + while (package_hashtable[probe_address] != NULL) { + if (package_hashtable[probe_address]->name == name) { + if (operator == VER_ANY) { + return probe_address; + } + if (test_version(package_hashtable[probe_address]->version, version, operator)) { + return probe_address; + } + } + probe_address -= probe_decrement; + if ((int)probe_address < 0) { + probe_address += PACKAGE_HASH_PRIME; + } + } + return probe_address; +} + +/* + * This function searches through the entire package_hashtable looking + * for a package which provides "needle". It returns the index into + * the package_hashtable for the providing package. + * + * needle is the index into name_hashtable of the package we are + * looking for. + * + * start_at is the index in the package_hashtable to start looking + * at. If start_at is -1 then start at the beginning. This is to allow + * for repeated searches since more than one package might provide + * needle. + * + * FIXME: I don't think this is very efficient, but I thought I'd keep + * it simple for now until it proves to be a problem. + */ +static int search_for_provides(int needle, int start_at) +{ + int i, j; + common_node_t *p; + for (i = start_at + 1; i < PACKAGE_HASH_PRIME; i++) { + p = package_hashtable[i]; + if (p == NULL) + continue; + for (j = 0; j < p->num_of_edges; j++) + if (p->edge[j]->type == EDGE_PROVIDES && p->edge[j]->name == needle) + return i; + } + return -1; +} + +/* + * Add an edge to a node + */ +static void add_edge_to_node(common_node_t *node, edge_t *edge) +{ + node->edge = xrealloc_vector(node->edge, 2, node->num_of_edges); + node->edge[node->num_of_edges++] = edge; +} + +/* + * Create one new node and one new edge for every dependency. + * + * Dependencies which contain multiple alternatives are represented as + * an EDGE_OR_PRE_DEPENDS or EDGE_OR_DEPENDS node, followed by a + * number of EDGE_PRE_DEPENDS or EDGE_DEPENDS nodes. The name field of + * the OR edge contains the full dependency string while the version + * field contains the number of EDGE nodes which follow as part of + * this alternative. + */ +static void add_split_dependencies(common_node_t *parent_node, const char *whole_line, unsigned edge_type) +{ + char *line = xstrdup(whole_line); + char *line2; + char *line_ptr1 = NULL; + char *line_ptr2 = NULL; + char *field; + char *field2; + char *version; + edge_t *edge; + edge_t *or_edge; + int offset_ch; + + field = strtok_r(line, ",", &line_ptr1); + do { + /* skip leading spaces */ + field += strspn(field, " "); + line2 = xstrdup(field); + field2 = strtok_r(line2, "|", &line_ptr2); + or_edge = NULL; + if ((edge_type == EDGE_DEPENDS || edge_type == EDGE_PRE_DEPENDS) + && (strcmp(field, field2) != 0) + ) { + or_edge = xzalloc(sizeof(edge_t)); + or_edge->type = edge_type + 1; + or_edge->name = search_name_hashtable(field); + //or_edge->version = 0; // tracks the number of alternatives + add_edge_to_node(parent_node, or_edge); + } + + do { + edge = xmalloc(sizeof(edge_t)); + edge->type = edge_type; + + /* Skip any extra leading spaces */ + field2 += strspn(field2, " "); + + /* Get dependency version info */ + version = strchr(field2, '('); + if (version == NULL) { + edge->operator = VER_ANY; + /* Get the versions hash number, adding it if the number isnt already in there */ + edge->version = search_name_hashtable("ANY"); + } else { + /* Skip leading ' ' or '(' */ + version += strspn(version, " ("); + /* Calculate length of any operator characters */ + offset_ch = strspn(version, "<=>"); + /* Determine operator */ + if (offset_ch > 0) { + if (strncmp(version, "=", offset_ch) == 0) { + edge->operator = VER_EQUAL; + } else if (strncmp(version, "<<", offset_ch) == 0) { + edge->operator = VER_LESS; + } else if (strncmp(version, "<=", offset_ch) == 0) { + edge->operator = VER_LESS_EQUAL; + } else if (strncmp(version, ">>", offset_ch) == 0) { + edge->operator = VER_MORE; + } else if (strncmp(version, ">=", offset_ch) == 0) { + edge->operator = VER_MORE_EQUAL; + } else { + bb_error_msg_and_die("illegal operator"); + } + } + /* skip to start of version numbers */ + version += offset_ch; + version += strspn(version, " "); + + /* Truncate version at trailing ' ' or ')' */ + version[strcspn(version, " )")] = '\0'; + /* Get the versions hash number, adding it if the number isnt already in there */ + edge->version = search_name_hashtable(version); + } + + /* Get the dependency name */ + field2[strcspn(field2, " (")] = '\0'; + edge->name = search_name_hashtable(field2); + + if (or_edge) + or_edge->version++; + + add_edge_to_node(parent_node, edge); + field2 = strtok_r(NULL, "|", &line_ptr2); + } while (field2 != NULL); + + free(line2); + field = strtok_r(NULL, ",", &line_ptr1); + } while (field != NULL); + + free(line); +} + +static void free_package(common_node_t *node) +{ + unsigned i; + if (node) { + for (i = 0; i < node->num_of_edges; i++) { + free(node->edge[i]); + } + free(node->edge); + free(node); + } +} + +/* + * Gets the next package field from package_buffer, seperated into the field name + * and field value, it returns the int offset to the first character of the next field + */ +static int read_package_field(const char *package_buffer, char **field_name, char **field_value) +{ + int offset_name_start = 0; + int offset_name_end = 0; + int offset_value_start = 0; + int offset_value_end = 0; + int offset = 0; + int next_offset; + int name_length; + int value_length; + int exit_flag = FALSE; + + if (package_buffer == NULL) { + *field_name = NULL; + *field_value = NULL; + return -1; + } + while (1) { + next_offset = offset + 1; + switch (package_buffer[offset]) { + case '\0': + exit_flag = TRUE; + break; + case ':': + if (offset_name_end == 0) { + offset_name_end = offset; + offset_value_start = next_offset; + } + /* TODO: Name might still have trailing spaces if ':' isnt + * immediately after name */ + break; + case '\n': + /* TODO: The char next_offset may be out of bounds */ + if (package_buffer[next_offset] != ' ') { + exit_flag = TRUE; + break; + } + case '\t': + case ' ': + /* increment the value start point if its a just filler */ + if (offset_name_start == offset) { + offset_name_start++; + } + if (offset_value_start == offset) { + offset_value_start++; + } + break; + } + if (exit_flag) { + /* Check that the names are valid */ + offset_value_end = offset; + name_length = offset_name_end - offset_name_start; + value_length = offset_value_end - offset_value_start; + if (name_length == 0) { + break; + } + if ((name_length > 0) && (value_length > 0)) { + break; + } + + /* If not valid, start fresh with next field */ + exit_flag = FALSE; + offset_name_start = offset + 1; + offset_name_end = 0; + offset_value_start = offset + 1; + offset_value_end = offset + 1; + offset++; + } + offset++; + } + *field_name = NULL; + if (name_length) { + *field_name = xstrndup(&package_buffer[offset_name_start], name_length); + } + *field_value = NULL; + if (value_length > 0) { + *field_value = xstrndup(&package_buffer[offset_value_start], value_length); + } + return next_offset; +} + +static unsigned fill_package_struct(char *control_buffer) +{ + static const char field_names[] ALIGN1 = + "Package\0""Version\0" + "Pre-Depends\0""Depends\0""Replaces\0""Provides\0" + "Conflicts\0""Suggests\0""Recommends\0""Enhances\0"; + + common_node_t *new_node = xzalloc(sizeof(common_node_t)); + char *field_name; + char *field_value; + int field_start = 0; + int num = -1; + int buffer_length = strlen(control_buffer); + + new_node->version = search_name_hashtable("unknown"); + while (field_start < buffer_length) { + unsigned field_num; + + field_start += read_package_field(&control_buffer[field_start], + &field_name, &field_value); + + if (field_name == NULL) { + goto fill_package_struct_cleanup; + } + + field_num = index_in_strings(field_names, field_name); + switch (field_num) { + case 0: /* Package */ + new_node->name = search_name_hashtable(field_value); + break; + case 1: /* Version */ + new_node->version = search_name_hashtable(field_value); + break; + case 2: /* Pre-Depends */ + add_split_dependencies(new_node, field_value, EDGE_PRE_DEPENDS); + break; + case 3: /* Depends */ + add_split_dependencies(new_node, field_value, EDGE_DEPENDS); + break; + case 4: /* Replaces */ + add_split_dependencies(new_node, field_value, EDGE_REPLACES); + break; + case 5: /* Provides */ + add_split_dependencies(new_node, field_value, EDGE_PROVIDES); + break; + case 6: /* Conflicts */ + add_split_dependencies(new_node, field_value, EDGE_CONFLICTS); + break; + case 7: /* Suggests */ + add_split_dependencies(new_node, field_value, EDGE_SUGGESTS); + break; + case 8: /* Recommends */ + add_split_dependencies(new_node, field_value, EDGE_RECOMMENDS); + break; + case 9: /* Enhances */ + add_split_dependencies(new_node, field_value, EDGE_ENHANCES); + break; + } + fill_package_struct_cleanup: + free(field_name); + free(field_value); + } + + if (new_node->version == search_name_hashtable("unknown")) { + free_package(new_node); + return -1; + } + num = search_package_hashtable(new_node->name, new_node->version, VER_EQUAL); + free_package(package_hashtable[num]); + package_hashtable[num] = new_node; + return num; +} + +/* if num = 1, it returns the want status, 2 returns flag, 3 returns status */ +static unsigned get_status(const unsigned status_node, const int num) +{ + char *status_string = name_hashtable[status_hashtable[status_node]->status]; + char *state_sub_string; + unsigned state_sub_num; + int len; + int i; + + /* set tmp_string to point to the start of the word number */ + for (i = 1; i < num; i++) { + /* skip past a word */ + status_string += strcspn(status_string, " "); + /* skip past the separating spaces */ + status_string += strspn(status_string, " "); + } + len = strcspn(status_string, " \n"); + state_sub_string = xstrndup(status_string, len); + state_sub_num = search_name_hashtable(state_sub_string); + free(state_sub_string); + return state_sub_num; +} + +static void set_status(const unsigned status_node_num, const char *new_value, const int position) +{ + const unsigned new_value_len = strlen(new_value); + const unsigned new_value_num = search_name_hashtable(new_value); + unsigned want = get_status(status_node_num, 1); + unsigned flag = get_status(status_node_num, 2); + unsigned status = get_status(status_node_num, 3); + int want_len = strlen(name_hashtable[want]); + int flag_len = strlen(name_hashtable[flag]); + int status_len = strlen(name_hashtable[status]); + char *new_status; + + switch (position) { + case 1: + want = new_value_num; + want_len = new_value_len; + break; + case 2: + flag = new_value_num; + flag_len = new_value_len; + break; + case 3: + status = new_value_num; + status_len = new_value_len; + break; + default: + bb_error_msg_and_die("DEBUG ONLY: this shouldnt happen"); + } + + new_status = xasprintf("%s %s %s", name_hashtable[want], name_hashtable[flag], name_hashtable[status]); + status_hashtable[status_node_num]->status = search_name_hashtable(new_status); + free(new_status); +} + +static const char *describe_status(int status_num) +{ + int status_want, status_state; + if (status_hashtable[status_num] == NULL || status_hashtable[status_num]->status == 0) + return "is not installed or flagged to be installed"; + + status_want = get_status(status_num, 1); + status_state = get_status(status_num, 3); + + if (status_state == search_name_hashtable("installed")) { + if (status_want == search_name_hashtable("install")) + return "is installed"; + if (status_want == search_name_hashtable("deinstall")) + return "is marked to be removed"; + if (status_want == search_name_hashtable("purge")) + return "is marked to be purged"; + } + if (status_want == search_name_hashtable("unknown")) + return "is in an indeterminate state"; + if (status_want == search_name_hashtable("install")) + return "is marked to be installed"; + + return "is not installed or flagged to be installed"; +} + +static void index_status_file(const char *filename) +{ + FILE *status_file; + char *control_buffer; + char *status_line; + status_node_t *status_node = NULL; + unsigned status_num; + + status_file = xfopen_for_read(filename); + while ((control_buffer = xmalloc_fgetline_str(status_file, "\n\n")) != NULL) { + const unsigned package_num = fill_package_struct(control_buffer); + if (package_num != -1) { + status_node = xmalloc(sizeof(status_node_t)); + /* fill_package_struct doesnt handle the status field */ + status_line = strstr(control_buffer, "Status:"); + if (status_line != NULL) { + status_line += 7; + status_line += strspn(status_line, " \n\t"); + status_line = xstrndup(status_line, strcspn(status_line, "\n")); + status_node->status = search_name_hashtable(status_line); + free(status_line); + } + status_node->package = package_num; + status_num = search_status_hashtable(name_hashtable[package_hashtable[status_node->package]->name]); + status_hashtable[status_num] = status_node; + } + free(control_buffer); + } + fclose(status_file); +} + +static void write_buffer_no_status(FILE *new_status_file, const char *control_buffer) +{ + char *name; + char *value; + int start = 0; + while (1) { + start += read_package_field(&control_buffer[start], &name, &value); + if (name == NULL) { + break; + } + if (strcmp(name, "Status") != 0) { + fprintf(new_status_file, "%s: %s\n", name, value); + } + } +} + +/* This could do with a cleanup */ +static void write_status_file(deb_file_t **deb_file) +{ + FILE *old_status_file = xfopen_for_read("/var/lib/dpkg/status"); + FILE *new_status_file = xfopen_for_write("/var/lib/dpkg/status.udeb"); + char *package_name; + char *status_from_file; + char *control_buffer = NULL; + char *tmp_string; + int status_num; + int field_start = 0; + int write_flag; + int i = 0; + + /* Update previously known packages */ + while ((control_buffer = xmalloc_fgetline_str(old_status_file, "\n\n")) != NULL) { + tmp_string = strstr(control_buffer, "Package:"); + if (tmp_string == NULL) { + continue; + } + + tmp_string += 8; + tmp_string += strspn(tmp_string, " \n\t"); + package_name = xstrndup(tmp_string, strcspn(tmp_string, "\n")); + write_flag = FALSE; + tmp_string = strstr(control_buffer, "Status:"); + if (tmp_string != NULL) { + /* Seperate the status value from the control buffer */ + tmp_string += 7; + tmp_string += strspn(tmp_string, " \n\t"); + status_from_file = xstrndup(tmp_string, strcspn(tmp_string, "\n")); + } else { + status_from_file = NULL; + } + + /* Find this package in the status hashtable */ + status_num = search_status_hashtable(package_name); + if (status_hashtable[status_num] != NULL) { + const char *status_from_hashtable = name_hashtable[status_hashtable[status_num]->status]; + if (strcmp(status_from_file, status_from_hashtable) != 0) { + /* New status isnt exactly the same as old status */ + const int state_status = get_status(status_num, 3); + if ((strcmp("installed", name_hashtable[state_status]) == 0) + || (strcmp("unpacked", name_hashtable[state_status]) == 0) + ) { + /* We need to add the control file from the package */ + i = 0; + while (deb_file[i] != NULL) { + if (strcmp(package_name, name_hashtable[package_hashtable[deb_file[i]->package]->name]) == 0) { + /* Write a status file entry with a modified status */ + /* remove trailing \n's */ + write_buffer_no_status(new_status_file, deb_file[i]->control_file); + set_status(status_num, "ok", 2); + fprintf(new_status_file, "Status: %s\n\n", + name_hashtable[status_hashtable[status_num]->status]); + write_flag = TRUE; + break; + } + i++; + } + /* This is temperary, debugging only */ + if (deb_file[i] == NULL) { + bb_error_msg_and_die("ALERT: cannot find a control file, " + "your status file may be broken, status may be " + "incorrect for %s", package_name); + } + } + else if (strcmp("not-installed", name_hashtable[state_status]) == 0) { + /* Only write the Package, Status, Priority and Section lines */ + fprintf(new_status_file, "Package: %s\n", package_name); + fprintf(new_status_file, "Status: %s\n", status_from_hashtable); + + while (1) { + char *field_name; + char *field_value; + field_start += read_package_field(&control_buffer[field_start], &field_name, &field_value); + if (field_name == NULL) { + break; + } + if ((strcmp(field_name, "Priority") == 0) || + (strcmp(field_name, "Section") == 0)) { + fprintf(new_status_file, "%s: %s\n", field_name, field_value); + } + } + write_flag = TRUE; + fputs("\n", new_status_file); + } + else if (strcmp("config-files", name_hashtable[state_status]) == 0) { + /* only change the status line */ + while (1) { + char *field_name; + char *field_value; + field_start += read_package_field(&control_buffer[field_start], &field_name, &field_value); + if (field_name == NULL) { + break; + } + /* Setup start point for next field */ + if (strcmp(field_name, "Status") == 0) { + fprintf(new_status_file, "Status: %s\n", status_from_hashtable); + } else { + fprintf(new_status_file, "%s: %s\n", field_name, field_value); + } + } + write_flag = TRUE; + fputs("\n", new_status_file); + } + } + } + /* If the package from the status file wasnt handle above, do it now*/ + if (!write_flag) { + fprintf(new_status_file, "%s\n\n", control_buffer); + } + + free(status_from_file); + free(package_name); + free(control_buffer); + } + + /* Write any new packages */ + for (i = 0; deb_file[i] != NULL; i++) { + status_num = search_status_hashtable(name_hashtable[package_hashtable[deb_file[i]->package]->name]); + if (strcmp("reinstreq", name_hashtable[get_status(status_num, 2)]) == 0) { + write_buffer_no_status(new_status_file, deb_file[i]->control_file); + set_status(status_num, "ok", 2); + fprintf(new_status_file, "Status: %s\n\n", name_hashtable[status_hashtable[status_num]->status]); + } + } + fclose(old_status_file); + fclose(new_status_file); + + /* Create a separate backfile to dpkg */ + if (rename("/var/lib/dpkg/status", "/var/lib/dpkg/status.udeb.bak") == -1) { + if (errno != ENOENT) + bb_error_msg_and_die("cannot create backup status file"); + /* Its ok if renaming the status file fails because status + * file doesnt exist, maybe we are starting from scratch */ + bb_error_msg("no status file found, creating new one"); + } + + xrename("/var/lib/dpkg/status.udeb", "/var/lib/dpkg/status"); +} + +/* This function returns TRUE if the given package can satisfy a + * dependency of type depend_type. + * + * A pre-depends is satisfied only if a package is already installed, + * which a regular depends can be satisfied by a package which we want + * to install. + */ +static int package_satisfies_dependency(int package, int depend_type) +{ + int status_num = search_status_hashtable(name_hashtable[package_hashtable[package]->name]); + + /* status could be unknown if package is a pure virtual + * provides which cannot satisfy any dependency by itself. + */ + if (status_hashtable[status_num] == NULL) + return 0; + + switch (depend_type) { + case EDGE_PRE_DEPENDS: return get_status(status_num, 3) == search_name_hashtable("installed"); + case EDGE_DEPENDS: return get_status(status_num, 1) == search_name_hashtable("install"); + } + return 0; +} + +static int check_deps(deb_file_t **deb_file, int deb_start /*, int dep_max_count - ?? */) +{ + int *conflicts = NULL; + int conflicts_num = 0; + int i = deb_start; + int j; + + /* Check for conflicts + * TODO: TEST if conflicts with other packages to be installed + * + * Add install packages and the packages they provide + * to the list of files to check conflicts for + */ + + /* Create array of package numbers to check against + * installed package for conflicts*/ + while (deb_file[i] != NULL) { + const unsigned package_num = deb_file[i]->package; + conflicts = xrealloc_vector(conflicts, 2, conflicts_num); + conflicts[conflicts_num] = package_num; + conflicts_num++; + /* add provides to conflicts list */ + for (j = 0; j < package_hashtable[package_num]->num_of_edges; j++) { + if (package_hashtable[package_num]->edge[j]->type == EDGE_PROVIDES) { + const int conflicts_package_num = search_package_hashtable( + package_hashtable[package_num]->edge[j]->name, + package_hashtable[package_num]->edge[j]->version, + package_hashtable[package_num]->edge[j]->operator); + if (package_hashtable[conflicts_package_num] == NULL) { + /* create a new package */ + common_node_t *new_node = xzalloc(sizeof(common_node_t)); + new_node->name = package_hashtable[package_num]->edge[j]->name; + new_node->version = package_hashtable[package_num]->edge[j]->version; + package_hashtable[conflicts_package_num] = new_node; + } + conflicts = xrealloc_vector(conflicts, 2, conflicts_num); + conflicts[conflicts_num] = conflicts_package_num; + conflicts_num++; + } + } + i++; + } + + /* Check conflicts */ + i = 0; + while (deb_file[i] != NULL) { + const common_node_t *package_node = package_hashtable[deb_file[i]->package]; + int status_num = 0; + status_num = search_status_hashtable(name_hashtable[package_node->name]); + + if (get_status(status_num, 3) == search_name_hashtable("installed")) { + i++; + continue; + } + + for (j = 0; j < package_node->num_of_edges; j++) { + const edge_t *package_edge = package_node->edge[j]; + + if (package_edge->type == EDGE_CONFLICTS) { + const unsigned package_num = + search_package_hashtable(package_edge->name, + package_edge->version, + package_edge->operator); + int result = 0; + if (package_hashtable[package_num] != NULL) { + status_num = search_status_hashtable(name_hashtable[package_hashtable[package_num]->name]); + + if (get_status(status_num, 1) == search_name_hashtable("install")) { + result = test_version(package_hashtable[deb_file[i]->package]->version, + package_edge->version, package_edge->operator); + } + } + + if (result) { + bb_error_msg_and_die("package %s conflicts with %s", + name_hashtable[package_node->name], + name_hashtable[package_edge->name]); + } + } + } + i++; + } + + + /* Check dependendcies */ + for (i = 0; i < PACKAGE_HASH_PRIME; i++) { + int status_num = 0; + int number_of_alternatives = 0; + const edge_t * root_of_alternatives = NULL; + const common_node_t *package_node = package_hashtable[i]; + + /* If the package node does not exist then this + * package is a virtual one. In which case there are + * no dependencies to check. + */ + if (package_node == NULL) continue; + + status_num = search_status_hashtable(name_hashtable[package_node->name]); + + /* If there is no status then this package is a + * virtual one provided by something else. In which + * case there are no dependencies to check. + */ + if (status_hashtable[status_num] == NULL) continue; + + /* If we don't want this package installed then we may + * as well ignore it's dependencies. + */ + if (get_status(status_num, 1) != search_name_hashtable("install")) { + continue; + } + + /* This code is tested only for EDGE_DEPENDS, since I + * have no suitable pre-depends available. There is no + * reason that it shouldn't work though :-) + */ + for (j = 0; j < package_node->num_of_edges; j++) { + const edge_t *package_edge = package_node->edge[j]; + unsigned package_num; + + if (package_edge->type == EDGE_OR_PRE_DEPENDS + || package_edge->type == EDGE_OR_DEPENDS + ) { /* start an EDGE_OR_ list */ + number_of_alternatives = package_edge->version; + root_of_alternatives = package_edge; + continue; + } + if (number_of_alternatives == 0) { /* not in the middle of an EDGE_OR_ list */ + number_of_alternatives = 1; + root_of_alternatives = NULL; + } + + package_num = search_package_hashtable(package_edge->name, package_edge->version, package_edge->operator); + + if (package_edge->type == EDGE_PRE_DEPENDS || + package_edge->type == EDGE_DEPENDS) { + int result=1; + status_num = 0; + + /* If we are inside an alternative then check + * this edge is the right type. + * + * EDGE_DEPENDS == OR_DEPENDS -1 + * EDGE_PRE_DEPENDS == OR_PRE_DEPENDS -1 + */ + if (root_of_alternatives && package_edge->type != root_of_alternatives->type - 1) + bb_error_msg_and_die("fatal error, package dependencies corrupt: %d != %d - 1", + package_edge->type, root_of_alternatives->type); + + if (package_hashtable[package_num] != NULL) + result = !package_satisfies_dependency(package_num, package_edge->type); + + if (result) { /* check for other package which provide what we are looking for */ + int provider = -1; + + while ((provider = search_for_provides(package_edge->name, provider)) > -1) { + if (package_hashtable[provider] == NULL) { + puts("Have a provider but no package information for it"); + continue; + } + result = !package_satisfies_dependency(provider, package_edge->type); + + if (result == 0) + break; + } + } + + /* It must be already installed, or to be installed */ + number_of_alternatives--; + if (result && number_of_alternatives == 0) { + if (root_of_alternatives) + bb_error_msg_and_die( + "package %s %sdepends on %s, " + "which cannot be satisfied", + name_hashtable[package_node->name], + package_edge->type == EDGE_PRE_DEPENDS ? "pre-" : "", + name_hashtable[root_of_alternatives->name]); + bb_error_msg_and_die( + "package %s %sdepends on %s, which %s\n", + name_hashtable[package_node->name], + package_edge->type == EDGE_PRE_DEPENDS ? "pre-" : "", + name_hashtable[package_edge->name], + describe_status(status_num)); + } + if (result == 0 && number_of_alternatives) { + /* we've found a package which + * satisfies the dependency, + * so skip over the rest of + * the alternatives. + */ + j += number_of_alternatives; + number_of_alternatives = 0; + } + } + } + } + free(conflicts); + return TRUE; +} + +static char **create_list(const char *filename) +{ + FILE *list_stream; + char **file_list; + char *line; + int count; + + /* don't use [xw]fopen here, handle error ourself */ + list_stream = fopen_for_read(filename); + if (list_stream == NULL) { + return NULL; + } + + file_list = NULL; + count = 0; + while ((line = xmalloc_fgetline(list_stream)) != NULL) { + file_list = xrealloc_vector(file_list, 2, count); + file_list[count++] = line; + /*file_list[count] = NULL; - xrealloc_vector did it */ + } + fclose(list_stream); + + return file_list; +} + +/* maybe i should try and hook this into remove_file.c somehow */ +static int remove_file_array(char **remove_names, char **exclude_names) +{ + struct stat path_stat; + int remove_flag = 1; /* not removed anything yet */ + int i, j; + + if (remove_names == NULL) { + return 0; + } + for (i = 0; remove_names[i] != NULL; i++) { + if (exclude_names != NULL) { + for (j = 0; exclude_names[j] != NULL; j++) { + if (strcmp(remove_names[i], exclude_names[j]) == 0) { + goto skip; + } + } + } + /* TODO: why we are checking lstat? we can just try rm/rmdir */ + if (lstat(remove_names[i], &path_stat) < 0) { + continue; + } + if (S_ISDIR(path_stat.st_mode)) { + remove_flag &= rmdir(remove_names[i]); /* 0 if no error */ + } else { + remove_flag &= unlink(remove_names[i]); /* 0 if no error */ + } + skip: + continue; + } + return (remove_flag == 0); +} + +static void run_package_script_or_die(const char *package_name, const char *script_type) +{ + char *script_path; + int result; + + script_path = xasprintf("/var/lib/dpkg/info/%s.%s", package_name, script_type); + + /* If the file doesnt exist is isnt fatal */ + result = access(script_path, F_OK) ? EXIT_SUCCESS : system(script_path); + free(script_path); + if (result) + bb_error_msg_and_die("%s failed, exit code %d", script_type, result); +} + +/* +The policy manual defines what scripts get called when and with +what arguments. I realize that busybox does not support all of +these scenarios, but it does support some of them; it does not, +however, run them with any parameters in run_package_script_or_die(). +Here are the scripts: + +preinst install +preinst install +preinst upgrade +preinst abort_upgrade +postinst configure +postinst abort-upgade +postinst abort-remove +postinst abort-remove in-favour +postinst abort-deconfigure in-favor removing +prerm remove +prerm upgrade +prerm failed-upgrade +prerm remove in-favor +prerm deconfigure in-favour removing +postrm remove +postrm purge +postrm upgrade +postrm failed-upgrade +postrm abort-install +postrm abort-install +postrm abort-upgrade +postrm disappear +*/ +static const char *const all_control_files[] = { + "preinst", "postinst", "prerm", "postrm", + "list", "md5sums", "shlibs", "conffiles", + "config", "templates" +}; + +static char **all_control_list(const char *package_name) +{ + unsigned i = 0; + char **remove_files; + + /* Create a list of all /var/lib/dpkg/info/ files */ + remove_files = xzalloc(sizeof(all_control_files) + sizeof(char*)); + while (i < ARRAY_SIZE(all_control_files)) { + remove_files[i] = xasprintf("/var/lib/dpkg/info/%s.%s", + package_name, all_control_files[i]); + i++; + } + + return remove_files; +} + +static void free_array(char **array) +{ + if (array) { + unsigned i = 0; + while (array[i]) { + free(array[i]); + i++; + } + free(array); + } +} + +/* This function lists information on the installed packages. It loops through + * the status_hashtable to retrieve the info. This results in smaller code than + * scanning the status file. The resulting list, however, is unsorted. + */ +static void list_packages(const char *pattern) +{ + int i; + + puts(" Name Version"); + puts("+++-==============-=============="); + + /* go through status hash, dereference package hash and finally strings */ + for (i = 0; i < STATUS_HASH_PRIME+1; i++) { + if (status_hashtable[i]) { + const char *stat_str; /* status string */ + const char *name_str; /* package name */ + const char *vers_str; /* version */ + char s1, s2; /* status abbreviations */ + int spccnt; /* space count */ + int j; + + stat_str = name_hashtable[status_hashtable[i]->status]; + name_str = name_hashtable[package_hashtable[status_hashtable[i]->package]->name]; + vers_str = name_hashtable[package_hashtable[status_hashtable[i]->package]->version]; + + if (pattern && fnmatch(pattern, name_str, 0)) + continue; + + /* get abbreviation for status field 1 */ + s1 = stat_str[0] == 'i' ? 'i' : 'r'; + + /* get abbreviation for status field 2 */ + for (j = 0, spccnt = 0; stat_str[j] && spccnt < 2; j++) { + if (stat_str[j] == ' ') spccnt++; + } + s2 = stat_str[j]; + + /* print out the line formatted like Debian dpkg */ + printf("%c%c %-14s %s\n", s1, s2, name_str, vers_str); + } + } +} + +static void remove_package(const unsigned package_num, int noisy) +{ + const char *package_name = name_hashtable[package_hashtable[package_num]->name]; + const char *package_version = name_hashtable[package_hashtable[package_num]->version]; + const unsigned status_num = search_status_hashtable(package_name); + const int package_name_length = strlen(package_name); + char **remove_files; + char **exclude_files; + char list_name[package_name_length + 25]; + char conffile_name[package_name_length + 30]; + + if (noisy) + printf("Removing %s (%s)...\n", package_name, package_version); + + /* Run prerm script */ + run_package_script_or_die(package_name, "prerm"); + + /* Create a list of files to remove, and a separate list of those to keep */ + sprintf(list_name, "/var/lib/dpkg/info/%s.%s", package_name, "list"); + remove_files = create_list(list_name); + + sprintf(conffile_name, "/var/lib/dpkg/info/%s.%s", package_name, "conffiles"); + exclude_files = create_list(conffile_name); + + /* Some directories can't be removed straight away, so do multiple passes */ + while (remove_file_array(remove_files, exclude_files)) + continue; + free_array(exclude_files); + free_array(remove_files); + + /* Create a list of files in /var/lib/dpkg/info/.* to keep */ + exclude_files = xzalloc(sizeof(char*) * 3); + exclude_files[0] = xstrdup(conffile_name); + exclude_files[1] = xasprintf("/var/lib/dpkg/info/%s.%s", package_name, "postrm"); + + /* Create a list of all /var/lib/dpkg/info/ files */ + remove_files = all_control_list(package_name); + + remove_file_array(remove_files, exclude_files); + free_array(remove_files); + free_array(exclude_files); + + /* rename .conffiles to .list + * The conffiles control file isn't required in Debian packages, so don't + * error out if it's missing. */ + rename(conffile_name, list_name); + + /* Change package status */ + set_status(status_num, "config-files", 3); +} + +static void purge_package(const unsigned package_num) +{ + const char *package_name = name_hashtable[package_hashtable[package_num]->name]; + const char *package_version = name_hashtable[package_hashtable[package_num]->version]; + const unsigned status_num = search_status_hashtable(package_name); + char **remove_files; + char **exclude_files; + char list_name[strlen(package_name) + 25]; + + printf("Purging %s (%s)...\n", package_name, package_version); + + /* Run prerm script */ + run_package_script_or_die(package_name, "prerm"); + + /* Create a list of files to remove */ + sprintf(list_name, "/var/lib/dpkg/info/%s.%s", package_name, "list"); + remove_files = create_list(list_name); + + exclude_files = xzalloc(sizeof(char*)); + + /* Some directories cant be removed straight away, so do multiple passes */ + while (remove_file_array(remove_files, exclude_files)) /* repeat */; + free_array(remove_files); + + /* Create a list of all /var/lib/dpkg/info/ files */ + remove_files = all_control_list(package_name); + remove_file_array(remove_files, exclude_files); + free_array(remove_files); + free(exclude_files); + + /* Run postrm script */ + run_package_script_or_die(package_name, "postrm"); + + /* Change package status */ + set_status(status_num, "not-installed", 3); +} + +static archive_handle_t *init_archive_deb_ar(const char *filename) +{ + archive_handle_t *ar_handle; + + /* Setup an ar archive handle that refers to the gzip sub archive */ + ar_handle = init_handle(); + ar_handle->filter = filter_accept_list_reassign; + ar_handle->src_fd = xopen(filename, O_RDONLY); + + return ar_handle; +} + +static void init_archive_deb_control(archive_handle_t *ar_handle) +{ + archive_handle_t *tar_handle; + + /* Setup the tar archive handle */ + tar_handle = init_handle(); + tar_handle->src_fd = ar_handle->src_fd; + + /* We don't care about data.tar.* or debian-binary, just control.tar.* */ +#if ENABLE_FEATURE_SEAMLESS_GZ + llist_add_to(&(ar_handle->accept), (char*)"control.tar.gz"); +#endif +#if ENABLE_FEATURE_SEAMLESS_BZ2 + llist_add_to(&(ar_handle->accept), (char*)"control.tar.bz2"); +#endif + + /* Assign the tar handle as a subarchive of the ar handle */ + ar_handle->sub_archive = tar_handle; +} + +static void init_archive_deb_data(archive_handle_t *ar_handle) +{ + archive_handle_t *tar_handle; + + /* Setup the tar archive handle */ + tar_handle = init_handle(); + tar_handle->src_fd = ar_handle->src_fd; + + /* We don't care about control.tar.* or debian-binary, just data.tar.* */ +#if ENABLE_FEATURE_SEAMLESS_GZ + llist_add_to(&(ar_handle->accept), (char*)"data.tar.gz"); +#endif +#if ENABLE_FEATURE_SEAMLESS_BZ2 + llist_add_to(&(ar_handle->accept), (char*)"data.tar.bz2"); +#endif + + /* Assign the tar handle as a subarchive of the ar handle */ + ar_handle->sub_archive = tar_handle; +} + +static char *deb_extract_control_file_to_buffer(archive_handle_t *ar_handle, llist_t *myaccept) +{ + ar_handle->sub_archive->action_data = data_extract_to_buffer; + ar_handle->sub_archive->accept = myaccept; + ar_handle->sub_archive->filter = filter_accept_list; + + unpack_ar_archive(ar_handle); + close(ar_handle->src_fd); + + return ar_handle->sub_archive->buffer; +} + +static void FAST_FUNC data_extract_all_prefix(archive_handle_t *archive_handle) +{ + char *name_ptr = archive_handle->file_header->name; + + name_ptr += strspn(name_ptr, "./"); + if (name_ptr[0] != '\0') { + archive_handle->file_header->name = xasprintf("%s%s", archive_handle->buffer, name_ptr); + data_extract_all(archive_handle); + } +} + +static void unpack_package(deb_file_t *deb_file) +{ + const char *package_name = name_hashtable[package_hashtable[deb_file->package]->name]; + const unsigned status_num = search_status_hashtable(package_name); + const unsigned status_package_num = status_hashtable[status_num]->package; + char *info_prefix; + char *list_filename; + archive_handle_t *archive_handle; + FILE *out_stream; + llist_t *accept_list; + int i; + + /* If existing version, remove it first */ + if (strcmp(name_hashtable[get_status(status_num, 3)], "installed") == 0) { + /* Package is already installed, remove old version first */ + printf("Preparing to replace %s %s (using %s)...\n", package_name, + name_hashtable[package_hashtable[status_package_num]->version], + deb_file->filename); + remove_package(status_package_num, 0); + } else { + printf("Unpacking %s (from %s)...\n", package_name, deb_file->filename); + } + + /* Extract control.tar.gz to /var/lib/dpkg/info/.filename */ + info_prefix = xasprintf("/var/lib/dpkg/info/%s.%s", package_name, ""); + archive_handle = init_archive_deb_ar(deb_file->filename); + init_archive_deb_control(archive_handle); + + accept_list = NULL; + i = 0; + while (i < ARRAY_SIZE(all_control_files)) { + char *c = xasprintf("./%s", all_control_files[i]); + llist_add_to(&accept_list, c); + i++; + } + archive_handle->sub_archive->accept = accept_list; + archive_handle->sub_archive->filter = filter_accept_list; + archive_handle->sub_archive->action_data = data_extract_all_prefix; + archive_handle->sub_archive->buffer = info_prefix; + archive_handle->sub_archive->ah_flags |= ARCHIVE_EXTRACT_UNCONDITIONAL; + unpack_ar_archive(archive_handle); + + /* Run the preinst prior to extracting */ + run_package_script_or_die(package_name, "preinst"); + + /* Extract data.tar.gz to the root directory */ + archive_handle = init_archive_deb_ar(deb_file->filename); + init_archive_deb_data(archive_handle); + archive_handle->sub_archive->action_data = data_extract_all_prefix; + archive_handle->sub_archive->buffer = (char*)"/"; /* huh? */ + archive_handle->sub_archive->ah_flags |= ARCHIVE_EXTRACT_UNCONDITIONAL; + unpack_ar_archive(archive_handle); + + /* Create the list file */ + list_filename = xasprintf("/var/lib/dpkg/info/%s.%s", package_name, "list"); + out_stream = xfopen_for_write(list_filename); + while (archive_handle->sub_archive->passed) { + /* the leading . has been stripped by data_extract_all_prefix already */ + fputs(archive_handle->sub_archive->passed->data, out_stream); + fputc('\n', out_stream); + archive_handle->sub_archive->passed = archive_handle->sub_archive->passed->link; + } + fclose(out_stream); + + /* change status */ + set_status(status_num, "install", 1); + set_status(status_num, "unpacked", 3); + + free(info_prefix); + free(list_filename); +} + +static void configure_package(deb_file_t *deb_file) +{ + const char *package_name = name_hashtable[package_hashtable[deb_file->package]->name]; + const char *package_version = name_hashtable[package_hashtable[deb_file->package]->version]; + const int status_num = search_status_hashtable(package_name); + + printf("Setting up %s (%s)...\n", package_name, package_version); + + /* Run the postinst script */ + /* TODO: handle failure gracefully */ + run_package_script_or_die(package_name, "postinst"); + + /* Change status to reflect success */ + set_status(status_num, "install", 1); + set_status(status_num, "installed", 3); +} + +int dpkg_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int dpkg_main(int argc UNUSED_PARAM, char **argv) +{ + deb_file_t **deb_file = NULL; + status_node_t *status_node; + char *str_f; + int opt; + int package_num; + int deb_count = 0; + int state_status; + int status_num; + int i; + enum { + OPT_configure = 0x1, + OPT_force_ignore_depends = 0x2, + OPT_install = 0x4, + OPT_list_installed = 0x8, + OPT_purge = 0x10, + OPT_remove = 0x20, + OPT_unpack = 0x40, + }; + + INIT_G(); + + opt = getopt32(argv, "CF:ilPru", &str_f); + //if (opt & OPT_configure) ... // -C + if (opt & OPT_force_ignore_depends) { // -F (--force in official dpkg) + if (strcmp(str_f, "depends")) + opt &= ~OPT_force_ignore_depends; + } + //if (opt & OPT_install) ... // -i + //if (opt & OPT_list_installed) ... // -l + //if (opt & OPT_purge) ... // -P + //if (opt & OPT_remove) ... // -r + //if (opt & OPT_unpack) ... // -u (--unpack in official dpkg) + argv += optind; + /* check for non-option argument if expected */ + if (!opt || (!argv[0] && !(opt && OPT_list_installed))) + bb_show_usage(); + +/* puts("(Reading database ... xxxxx files and directories installed.)"); */ + index_status_file("/var/lib/dpkg/status"); + + /* if the list action was given print the installed packages and exit */ + if (opt & OPT_list_installed) { + list_packages(argv[0]); + return EXIT_SUCCESS; + } + + /* Read arguments and store relevant info in structs */ + while (*argv) { + /* deb_count = nb_elem - 1 and we need nb_elem + 1 to allocate terminal node [NULL pointer] */ + deb_file = xrealloc_vector(deb_file, 2, deb_count); + deb_file[deb_count] = xzalloc(sizeof(deb_file[0][0])); + if (opt & (OPT_install | OPT_unpack)) { + /* -i/-u: require filename */ + archive_handle_t *archive_handle; + llist_t *control_list = NULL; + + /* Extract the control file */ + llist_add_to(&control_list, (char*)"./control"); + archive_handle = init_archive_deb_ar(argv[0]); + init_archive_deb_control(archive_handle); + deb_file[deb_count]->control_file = deb_extract_control_file_to_buffer(archive_handle, control_list); + if (deb_file[deb_count]->control_file == NULL) { + bb_error_msg_and_die("cannot extract control file"); + } + deb_file[deb_count]->filename = xstrdup(argv[0]); + package_num = fill_package_struct(deb_file[deb_count]->control_file); + + if (package_num == -1) { + bb_error_msg("invalid control file in %s", argv[0]); + argv++; + continue; + } + deb_file[deb_count]->package = (unsigned) package_num; + + /* Add the package to the status hashtable */ + if (opt & (OPT_unpack | OPT_install)) { + /* Try and find a currently installed version of this package */ + status_num = search_status_hashtable(name_hashtable[package_hashtable[deb_file[deb_count]->package]->name]); + /* If no previous entry was found initialise a new entry */ + if (status_hashtable[status_num] == NULL + || status_hashtable[status_num]->status == 0 + ) { + status_node = xmalloc(sizeof(status_node_t)); + status_node->package = deb_file[deb_count]->package; + /* reinstreq isnt changed to "ok" until the package control info + * is written to the status file*/ + status_node->status = search_name_hashtable("install reinstreq not-installed"); + status_hashtable[status_num] = status_node; + } else { + set_status(status_num, "install", 1); + set_status(status_num, "reinstreq", 2); + } + } + } else if (opt & (OPT_configure | OPT_purge | OPT_remove)) { + /* -C/-p/-r: require package name */ + deb_file[deb_count]->package = search_package_hashtable( + search_name_hashtable(argv[0]), + search_name_hashtable("ANY"), VER_ANY); + if (package_hashtable[deb_file[deb_count]->package] == NULL) { + bb_error_msg_and_die("package %s is uninstalled or unknown", argv[0]); + } + package_num = deb_file[deb_count]->package; + status_num = search_status_hashtable(name_hashtable[package_hashtable[package_num]->name]); + state_status = get_status(status_num, 3); + + /* check package status is "installed" */ + if (opt & OPT_remove) { + if (strcmp(name_hashtable[state_status], "not-installed") == 0 + || strcmp(name_hashtable[state_status], "config-files") == 0 + ) { + bb_error_msg_and_die("%s is already removed", name_hashtable[package_hashtable[package_num]->name]); + } + set_status(status_num, "deinstall", 1); + } else if (opt & OPT_purge) { + /* if package status is "conf-files" then its ok */ + if (strcmp(name_hashtable[state_status], "not-installed") == 0) { + bb_error_msg_and_die("%s is already purged", name_hashtable[package_hashtable[package_num]->name]); + } + set_status(status_num, "purge", 1); + } + } + deb_count++; + argv++; + } + if (!deb_count) + bb_error_msg_and_die("no package files specified"); + deb_file[deb_count] = NULL; + + /* Check that the deb file arguments are installable */ + if (!(opt & OPT_force_ignore_depends)) { + if (!check_deps(deb_file, 0 /*, deb_count*/)) { + bb_error_msg_and_die("dependency check failed"); + } + } + + /* TODO: install or remove packages in the correct dependency order */ + for (i = 0; i < deb_count; i++) { + /* Remove or purge packages */ + if (opt & OPT_remove) { + remove_package(deb_file[i]->package, 1); + } + else if (opt & OPT_purge) { + purge_package(deb_file[i]->package); + } + else if (opt & OPT_unpack) { + unpack_package(deb_file[i]); + } + else if (opt & OPT_install) { + unpack_package(deb_file[i]); + /* package is configured in second pass below */ + } + else if (opt & OPT_configure) { + configure_package(deb_file[i]); + } + } + /* configure installed packages */ + if (opt & OPT_install) { + for (i = 0; i < deb_count; i++) + configure_package(deb_file[i]); + } + + write_status_file(deb_file); + + if (ENABLE_FEATURE_CLEAN_UP) { + for (i = 0; i < deb_count; i++) { + free(deb_file[i]->control_file); + free(deb_file[i]->filename); + free(deb_file[i]); + } + + free(deb_file); + + for (i = 0; i < NAME_HASH_PRIME; i++) { + free(name_hashtable[i]); + } + + for (i = 0; i < PACKAGE_HASH_PRIME; i++) { + free_package(package_hashtable[i]); + } + + for (i = 0; i < STATUS_HASH_PRIME; i++) { + free(status_hashtable[i]); + } + + free(status_hashtable); + free(package_hashtable); + free(name_hashtable); + } + + return EXIT_SUCCESS; +} diff --git a/archival/dpkg_deb.c b/archival/dpkg_deb.c new file mode 100644 index 0000000..f94c90c --- /dev/null +++ b/archival/dpkg_deb.c @@ -0,0 +1,104 @@ +/* vi: set sw=4 ts=4: */ +/* + * dpkg-deb packs, unpacks and provides information about Debian archives. + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ +#include "libbb.h" +#include "unarchive.h" + +#define DPKG_DEB_OPT_CONTENTS 1 +#define DPKG_DEB_OPT_CONTROL 2 +#define DPKG_DEB_OPT_FIELD 4 +#define DPKG_DEB_OPT_EXTRACT 8 +#define DPKG_DEB_OPT_EXTRACT_VERBOSE 16 + +int dpkg_deb_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int dpkg_deb_main(int argc, char **argv) +{ + archive_handle_t *ar_archive; + archive_handle_t *tar_archive; + llist_t *control_tar_llist = NULL; + unsigned opt; + const char *extract_dir; + int need_args; + + /* Setup the tar archive handle */ + tar_archive = init_handle(); + + /* Setup an ar archive handle that refers to the gzip sub archive */ + ar_archive = init_handle(); + ar_archive->sub_archive = tar_archive; + ar_archive->filter = filter_accept_list_reassign; + +#if ENABLE_FEATURE_SEAMLESS_GZ + llist_add_to(&(ar_archive->accept), (char*)"data.tar.gz"); + llist_add_to(&control_tar_llist, (char*)"control.tar.gz"); +#endif + +#if ENABLE_FEATURE_SEAMLESS_BZ2 + llist_add_to(&(ar_archive->accept), (char*)"data.tar.bz2"); + llist_add_to(&control_tar_llist, (char*)"control.tar.bz2"); +#endif + + opt_complementary = "c--efXx:e--cfXx:f--ceXx:X--cefx:x--cefX"; + opt = getopt32(argv, "cefXx"); + argv += optind; + argc -= optind; + + if (opt & DPKG_DEB_OPT_CONTENTS) { + tar_archive->action_header = header_verbose_list; + } + extract_dir = NULL; + need_args = 1; + if (opt & DPKG_DEB_OPT_CONTROL) { + ar_archive->accept = control_tar_llist; + tar_archive->action_data = data_extract_all; + if (1 == argc) { + extract_dir = "./DEBIAN"; + } else { + need_args++; + } + } + if (opt & DPKG_DEB_OPT_FIELD) { + /* Print the entire control file + * it should accept a second argument which specifies a + * specific field to print */ + ar_archive->accept = control_tar_llist; + llist_add_to(&(tar_archive->accept), (char*)"./control"); + tar_archive->filter = filter_accept_list; + tar_archive->action_data = data_extract_to_stdout; + } + if (opt & DPKG_DEB_OPT_EXTRACT) { + tar_archive->action_header = header_list; + } + if (opt & (DPKG_DEB_OPT_EXTRACT_VERBOSE | DPKG_DEB_OPT_EXTRACT)) { + tar_archive->action_data = data_extract_all; + need_args = 2; + } + + if (need_args != argc) { + bb_show_usage(); + } + + tar_archive->src_fd = ar_archive->src_fd = xopen(argv[0], O_RDONLY); + + /* Work out where to extract the files */ + /* 2nd argument is a dir name */ + if (argv[1]) { + extract_dir = argv[1]; + } + if (extract_dir) { + mkdir(extract_dir, 0777); /* bb_make_directory(extract_dir, 0777, 0) */ + xchdir(extract_dir); + } + + /* Do it */ + unpack_ar_archive(ar_archive); + + /* Cleanup */ + if (ENABLE_FEATURE_CLEAN_UP) + close(ar_archive->src_fd); + + return EXIT_SUCCESS; +} diff --git a/archival/gzip.c b/archival/gzip.c new file mode 100644 index 0000000..43804b2 --- /dev/null +++ b/archival/gzip.c @@ -0,0 +1,2102 @@ +/* vi: set sw=4 ts=4: */ +/* + * Gzip implementation for busybox + * + * Based on GNU gzip Copyright (C) 1992-1993 Jean-loup Gailly. + * + * Originally adjusted for busybox by Charles P. Wright + * "this is a stripped down version of gzip I put into busybox, it does + * only standard in to standard out with -9 compression. It also requires + * the zcat module for some important functions." + * + * Adjusted further by Erik Andersen to support + * files as well as stdin/stdout, and to generally behave itself wrt + * command line handling. + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +/* big objects in bss: + * 00000020 b bl_count + * 00000074 b base_length + * 00000078 b base_dist + * 00000078 b static_dtree + * 0000009c b bl_tree + * 000000f4 b dyn_dtree + * 00000100 b length_code + * 00000200 b dist_code + * 0000023d b depth + * 00000400 b flag_buf + * 0000047a b heap + * 00000480 b static_ltree + * 000008f4 b dyn_ltree + */ + +/* TODO: full support for -v for DESKTOP + * "/usr/bin/gzip -v a bogus aa" should say: +a: 85.1% -- replaced with a.gz +gzip: bogus: No such file or directory +aa: 85.1% -- replaced with aa.gz +*/ + +#include "libbb.h" +#include "unarchive.h" + + +/* =========================================================================== + */ +//#define DEBUG 1 +/* Diagnostic functions */ +#ifdef DEBUG +# define Assert(cond,msg) { if (!(cond)) bb_error_msg(msg); } +# define Trace(x) fprintf x +# define Tracev(x) {if (verbose) fprintf x; } +# define Tracevv(x) {if (verbose > 1) fprintf x; } +# define Tracec(c,x) {if (verbose && (c)) fprintf x; } +# define Tracecv(c,x) {if (verbose > 1 && (c)) fprintf x; } +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +/* =========================================================================== + */ +#define SMALL_MEM + +#ifndef INBUFSIZ +# ifdef SMALL_MEM +# define INBUFSIZ 0x2000 /* input buffer size */ +# else +# define INBUFSIZ 0x8000 /* input buffer size */ +# endif +#endif + +#ifndef OUTBUFSIZ +# ifdef SMALL_MEM +# define OUTBUFSIZ 8192 /* output buffer size */ +# else +# define OUTBUFSIZ 16384 /* output buffer size */ +# endif +#endif + +#ifndef DIST_BUFSIZE +# ifdef SMALL_MEM +# define DIST_BUFSIZE 0x2000 /* buffer for distances, see trees.c */ +# else +# define DIST_BUFSIZE 0x8000 /* buffer for distances, see trees.c */ +# endif +#endif + +/* gzip flag byte */ +#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ +#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */ +#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ +#define ORIG_NAME 0x08 /* bit 3 set: original file name present */ +#define COMMENT 0x10 /* bit 4 set: file comment present */ +#define RESERVED 0xC0 /* bit 6,7: reserved */ + +/* internal file attribute */ +#define UNKNOWN 0xffff +#define BINARY 0 +#define ASCII 1 + +#ifndef WSIZE +# define WSIZE 0x8000 /* window size--must be a power of two, and */ +#endif /* at least 32K for zip's deflate method */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST (WSIZE-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + +#ifndef MAX_PATH_LEN +# define MAX_PATH_LEN 1024 /* max pathname length */ +#endif + +#define seekable() 0 /* force sequential output */ +#define translate_eol 0 /* no option -a yet */ + +#ifndef BITS +# define BITS 16 +#endif +#define INIT_BITS 9 /* Initial number of bits per code */ + +#define BIT_MASK 0x1f /* Mask for 'number of compression bits' */ +/* Mask 0x20 is reserved to mean a fourth header byte, and 0x40 is free. + * It's a pity that old uncompress does not check bit 0x20. That makes + * extension of the format actually undesirable because old compress + * would just crash on the new format instead of giving a meaningful + * error message. It does check the number of bits, but it's more + * helpful to say "unsupported format, get a new version" than + * "can only handle 16 bits". + */ + +#ifdef MAX_EXT_CHARS +# define MAX_SUFFIX MAX_EXT_CHARS +#else +# define MAX_SUFFIX 30 +#endif + + +/* =========================================================================== + * Compile with MEDIUM_MEM to reduce the memory requirements or + * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the + * entire input file can be held in memory (not possible on 16 bit systems). + * Warning: defining these symbols affects HASH_BITS (see below) and thus + * affects the compression ratio. The compressed output + * is still correct, and might even be smaller in some cases. + */ + +#ifdef SMALL_MEM +# define HASH_BITS 13 /* Number of bits used to hash strings */ +#endif +#ifdef MEDIUM_MEM +# define HASH_BITS 14 +#endif +#ifndef HASH_BITS +# define HASH_BITS 15 + /* For portability to 16 bit machines, do not use values above 15. */ +#endif + +#define HASH_SIZE (unsigned)(1<= 4. + */ + + max_insert_length = max_lazy_match, +/* Insert new strings in the hash table only if the match length + * is not greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + good_match = 32, +/* Use a faster search when the previous match is longer than this */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ + + nice_match = 258, /* Stop searching when current match exceeds this */ +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ +}; + + +struct globals { + + lng block_start; + +/* window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + unsigned ins_h; /* hash index of string to be inserted */ + +#define H_SHIFT ((HASH_BITS+MIN_MATCH-1) / MIN_MATCH) +/* Number of bits by which ins_h and del_h must be shifted at each + * input step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * H_SHIFT * MIN_MATCH >= HASH_BITS + */ + + unsigned prev_length; + +/* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + unsigned strstart; /* start of string to insert */ + unsigned match_start; /* start of matching string */ + unsigned lookahead; /* number of valid bytes ahead in window */ + +/* =========================================================================== + */ +#define DECLARE(type, array, size) \ + type * array +#define ALLOC(type, array, size) \ + array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type)); +#define FREE(array) \ + do { free(array); array = NULL; } while (0) + + /* global buffers */ + + /* buffer for literals or lengths */ + /* DECLARE(uch, l_buf, LIT_BUFSIZE); */ + DECLARE(uch, l_buf, INBUFSIZ); + + DECLARE(ush, d_buf, DIST_BUFSIZE); + DECLARE(uch, outbuf, OUTBUFSIZ); + +/* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least WSIZE + * bytes. With this organization, matches are limited to a distance of + * WSIZE-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would + * be less efficient). + */ + DECLARE(uch, window, 2L * WSIZE); + +/* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + /* DECLARE(Pos, prev, WSIZE); */ + DECLARE(ush, prev, 1L << BITS); + +/* Heads of the hash chains or 0. */ + /* DECLARE(Pos, head, 1<> 8; + } else { + put_8bit(w); + put_8bit(w >> 8); + } +} + +static void put_32bit(ulg n) +{ + put_16bit(n); + put_16bit(n >> 16); +} + +/* =========================================================================== + * Clear input and output buffers + */ +static void clear_bufs(void) +{ + G1.outcnt = 0; +#ifdef DEBUG + G1.insize = 0; +#endif + G1.isize = 0; +} + + +/* =========================================================================== + * Run a set of bytes through the crc shift register. If s is a NULL + * pointer, then initialize the crc shift register contents instead. + * Return the current crc in either case. + */ +static uint32_t updcrc(uch * s, unsigned n) +{ + uint32_t c = G1.crc; + while (n) { + c = G1.crc_32_tab[(uch)(c ^ *s++)] ^ (c >> 8); + n--; + } + G1.crc = c; + return c; +} + + +/* =========================================================================== + * Read a new buffer from the current input file, perform end-of-line + * translation, and update the crc and input file size. + * IN assertion: size >= 2 (for end-of-line translation) + */ +static unsigned file_read(void *buf, unsigned size) +{ + unsigned len; + + Assert(G1.insize == 0, "l_buf not empty"); + + len = safe_read(ifd, buf, size); + if (len == (unsigned)(-1) || len == 0) + return len; + + updcrc(buf, len); + G1.isize += len; + return len; +} + + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +static void send_bits(int value, int length) +{ +#ifdef DEBUG + Tracev((stderr, " l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + G1.bits_sent += length; +#endif + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) + * unused bits in value. + */ + if (G1.bi_valid > (int) BUF_SIZE - length) { + G1.bi_buf |= (value << G1.bi_valid); + put_16bit(G1.bi_buf); + G1.bi_buf = (ush) value >> (BUF_SIZE - G1.bi_valid); + G1.bi_valid += length - BUF_SIZE; + } else { + G1.bi_buf |= value << G1.bi_valid; + G1.bi_valid += length; + } +} + + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +static unsigned bi_reverse(unsigned code, int len) +{ + unsigned res = 0; + + while (1) { + res |= code & 1; + if (--len <= 0) return res; + code >>= 1; + res <<= 1; + } +} + + +/* =========================================================================== + * Write out any remaining bits in an incomplete byte. + */ +static void bi_windup(void) +{ + if (G1.bi_valid > 8) { + put_16bit(G1.bi_buf); + } else if (G1.bi_valid > 0) { + put_8bit(G1.bi_buf); + } + G1.bi_buf = 0; + G1.bi_valid = 0; +#ifdef DEBUG + G1.bits_sent = (G1.bits_sent + 7) & ~7; +#endif +} + + +/* =========================================================================== + * Copy a stored block to the zip file, storing first the length and its + * one's complement if requested. + */ +static void copy_block(char *buf, unsigned len, int header) +{ + bi_windup(); /* align on byte boundary */ + + if (header) { + put_16bit(len); + put_16bit(~len); +#ifdef DEBUG + G1.bits_sent += 2 * 16; +#endif + } +#ifdef DEBUG + G1.bits_sent += (ulg) len << 3; +#endif + while (len--) { + put_8bit(*buf++); + } +} + + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead, and sets eofile if end of input file. + * IN assertion: lookahead < MIN_LOOKAHEAD && strstart + lookahead > 0 + * OUT assertions: at least one byte has been read, or eofile is set; + * file reads are performed for at least two bytes (required for the + * translate_eol option). + */ +static void fill_window(void) +{ + unsigned n, m; + unsigned more = WINDOW_SIZE - G1.lookahead - G1.strstart; + /* Amount of free space at the end of the window. */ + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (more == (unsigned) -1) { + /* Very unlikely, but possible on 16 bit machine if strstart == 0 + * and lookahead == 1 (input done one byte at time) + */ + more--; + } else if (G1.strstart >= WSIZE + MAX_DIST) { + /* By the IN assertion, the window is not empty so we can't confuse + * more == 0 with more == 64K on a 16 bit machine. + */ + Assert(WINDOW_SIZE == 2 * WSIZE, "no sliding with BIG_MEM"); + + memcpy(G1.window, G1.window + WSIZE, WSIZE); + G1.match_start -= WSIZE; + G1.strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */ + + G1.block_start -= WSIZE; + + for (n = 0; n < HASH_SIZE; n++) { + m = head[n]; + head[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0); + } + for (n = 0; n < WSIZE; n++) { + m = G1.prev[n]; + G1.prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } + more += WSIZE; + } + /* At this point, more >= 2 */ + if (!G1.eofile) { + n = file_read(G1.window + G1.strstart + G1.lookahead, more); + if (n == 0 || n == (unsigned) -1) { + G1.eofile = 1; + } else { + G1.lookahead += n; + } + } +} + + +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + */ + +/* For MSDOS, OS/2 and 386 Unix, an optimized version is in match.asm or + * match.s. The code is functionally equivalent, so you can use the C version + * if desired. + */ +static int longest_match(IPos cur_match) +{ + unsigned chain_length = max_chain_length; /* max hash chain length */ + uch *scan = G1.window + G1.strstart; /* current string */ + uch *match; /* matched string */ + int len; /* length of current match */ + int best_len = G1.prev_length; /* best match length so far */ + IPos limit = G1.strstart > (IPos) MAX_DIST ? G1.strstart - (IPos) MAX_DIST : 0; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + +/* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ +#if HASH_BITS < 8 || MAX_MATCH != 258 +# error Code too clever +#endif + uch *strend = G1.window + G1.strstart + MAX_MATCH; + uch scan_end1 = scan[best_len - 1]; + uch scan_end = scan[best_len]; + + /* Do not waste too much time if we already have a good match: */ + if (G1.prev_length >= good_match) { + chain_length >>= 2; + } + Assert(G1.strstart <= WINDOW_SIZE - MIN_LOOKAHEAD, "insufficient lookahead"); + + do { + Assert(cur_match < G1.strstart, "no future"); + match = G1.window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2: + */ + if (match[best_len] != scan_end || + match[best_len - 1] != scan_end1 || + *match != *scan || *++match != scan[1]) + continue; + + /* The check at best_len-1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && scan < strend); + + len = MAX_MATCH - (int) (strend - scan); + scan = strend - MAX_MATCH; + + if (len > best_len) { + G1.match_start = cur_match; + best_len = len; + if (len >= nice_match) + break; + scan_end1 = scan[best_len - 1]; + scan_end = scan[best_len]; + } + } while ((cur_match = G1.prev[cur_match & WMASK]) > limit + && --chain_length != 0); + + return best_len; +} + + +#ifdef DEBUG +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +static void check_match(IPos start, IPos match, int length) +{ + /* check that the match is indeed a match */ + if (memcmp(G1.window + match, G1.window + start, length) != 0) { + bb_error_msg(" start %d, match %d, length %d", start, match, length); + bb_error_msg("invalid match"); + } + if (verbose > 1) { + bb_error_msg("\\[%d,%d]", start - match, length); + do { + fputc(G1.window[start++], stderr); + } while (--length != 0); + } +} +#else +# define check_match(start, match, length) ((void)0) +#endif + + +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1992-1993 Jean-loup Gailly + * This is free software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License, see the file COPYING. + */ + +/* PURPOSE + * Encode various sets of source values using variable-length + * binary code trees. + * + * DISCUSSION + * The PKZIP "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in the ZIP file in a compressed form + * which is itself a Huffman encoding of the lengths of + * all the code strings (in ascending order by source values). + * The actual code strings are reconstructed from the lengths in + * the UNZIP process, as described in the "application note" + * (APPNOTE.TXT) distributed as part of PKWARE's PKZIP program. + * + * REFERENCES + * Lynch, Thomas J. + * Data Compression: Techniques and Applications, pp. 53-55. + * Lifetime Learning Publications, 1985. ISBN 0-534-03418-7. + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + * + * INTERFACE + * void ct_init() + * Allocate the match buffer, initialize the various tables [and save + * the location of the internal file attribute (ascii/binary) and + * method (DEFLATE/STORE) -- deleted in bbox] + * + * void ct_tally(int dist, int lc); + * Save the match info and tally the frequency counts. + * + * ulg flush_block(char *buf, ulg stored_len, int eof) + * Determine the best encoding for the current block: dynamic trees, + * static trees or store, and output the encoded block to the zip + * file. Returns the total compressed length for the file so far. + */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +/* extra bits for each length code */ +static const uint8_t extra_lbits[LENGTH_CODES] ALIGN1 = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, + 4, 4, 5, 5, 5, 5, 0 +}; + +/* extra bits for each distance code */ +static const uint8_t extra_dbits[D_CODES] ALIGN1 = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, + 10, 10, 11, 11, 12, 12, 13, 13 +}; + +/* extra bits for each bit length code */ +static const uint8_t extra_blbits[BL_CODES] ALIGN1 = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7 }; + +/* number of codes at each bit length for an optimal tree */ +static const uint8_t bl_order[BL_CODES] ALIGN1 = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#ifndef LIT_BUFSIZE +# ifdef SMALL_MEM +# define LIT_BUFSIZE 0x2000 +# else +# ifdef MEDIUM_MEM +# define LIT_BUFSIZE 0x4000 +# else +# define LIT_BUFSIZE 0x8000 +# endif +# endif +#endif +#ifndef DIST_BUFSIZE +# define DIST_BUFSIZE LIT_BUFSIZE +#endif +/* Sizes of match buffers for literals/lengths and distances. There are + * 4 reasons for limiting LIT_BUFSIZE to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input data is + * still in the window so we can still emit a stored block even when input + * comes from standard input. (This can also be done for all blocks if + * LIT_BUFSIZE is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting trees + * more frequently. + * - I can't count above 4 + * The current code is general and allows DIST_BUFSIZE < LIT_BUFSIZE (to save + * memory at the expense of compression). Some optimizations would be possible + * if we rely on DIST_BUFSIZE == LIT_BUFSIZE. + */ +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +/* =========================================================================== +*/ +/* Data structure describing a single value and its code string. */ +typedef struct ct_data { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +#define HEAP_SIZE (2*L_CODES + 1) +/* maximum heap size */ + +typedef struct tree_desc { + ct_data *dyn_tree; /* the dynamic tree */ + ct_data *static_tree; /* corresponding static tree or NULL */ + const uint8_t *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ + int max_code; /* largest code with non zero frequency */ +} tree_desc; + +struct globals2 { + + ush heap[HEAP_SIZE]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + +/* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + ct_data dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + ct_data dyn_dtree[2 * D_CODES + 1]; /* distance tree */ + + ct_data static_ltree[L_CODES + 2]; + +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see ct_init + * below). + */ + + ct_data static_dtree[D_CODES]; + +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + + ct_data bl_tree[2 * BL_CODES + 1]; + +/* Huffman tree for the bit lengths */ + + tree_desc l_desc; + tree_desc d_desc; + tree_desc bl_desc; + + ush bl_count[MAX_BITS + 1]; + +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + + uch depth[2 * L_CODES + 1]; + +/* Depth of each subtree used as tie breaker for trees of equal frequency */ + + uch length_code[MAX_MATCH - MIN_MATCH + 1]; + +/* length code for each normalized match length (0 == MIN_MATCH) */ + + uch dist_code[512]; + +/* distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + + int base_length[LENGTH_CODES]; + +/* First normalized length for each code (0 = MIN_MATCH) */ + + int base_dist[D_CODES]; + +/* First normalized distance for each code (0 = distance of 1) */ + + uch flag_buf[LIT_BUFSIZE / 8]; + +/* flag_buf is a bit array distinguishing literals from lengths in + * l_buf, thus indicating the presence or absence of a distance. + */ + + unsigned last_lit; /* running index in l_buf */ + unsigned last_dist; /* running index in d_buf */ + unsigned last_flags; /* running index in flag_buf */ + uch flags; /* current flags not yet saved in flag_buf */ + uch flag_bit; /* current bit used in flags */ + +/* bits are filled in flags starting at bit 0 (least significant). + * Note: these flags are overkill in the current code since we don't + * take advantage of DIST_BUFSIZE == LIT_BUFSIZE. + */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + + ulg compressed_len; /* total bit length of compressed file */ +}; + +#define G2ptr ((struct globals2*)(ptr_to_globals)) +#define G2 (*G2ptr) + + +/* =========================================================================== + */ +static void gen_codes(ct_data * tree, int max_code); +static void build_tree(tree_desc * desc); +static void scan_tree(ct_data * tree, int max_code); +static void send_tree(ct_data * tree, int max_code); +static int build_bl_tree(void); +static void send_all_trees(int lcodes, int dcodes, int blcodes); +static void compress_block(ct_data * ltree, ct_data * dtree); + + +#ifndef DEBUG +/* Send a code of the given tree. c and tree must not have side effects */ +# define SEND_CODE(c, tree) send_bits(tree[c].Code, tree[c].Len) +#else +# define SEND_CODE(c, tree) \ +{ \ + if (verbose > 1) bb_error_msg("\ncd %3d ",(c)); \ + send_bits(tree[c].Code, tree[c].Len); \ +} +#endif + +#define D_CODE(dist) \ + ((dist) < 256 ? G2.dist_code[dist] : G2.dist_code[256 + ((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. dist_code[256] and dist_code[257] are never + * used. + * The arguments must not have side effects. + */ + + +/* =========================================================================== + * Initialize a new block. + */ +static void init_block(void) +{ + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) + G2.dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) + G2.dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) + G2.bl_tree[n].Freq = 0; + + G2.dyn_ltree[END_BLOCK].Freq = 1; + G2.opt_len = G2.static_len = 0; + G2.last_lit = G2.last_dist = G2.last_flags = 0; + G2.flags = 0; + G2.flag_bit = 1; +} + + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ + +/* Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. */ +#define SMALLER(tree, n, m) \ + (tree[n].Freq < tree[m].Freq \ + || (tree[n].Freq == tree[m].Freq && G2.depth[n] <= G2.depth[m])) + +static void pqdownheap(ct_data * tree, int k) +{ + int v = G2.heap[k]; + int j = k << 1; /* left son of k */ + + while (j <= G2.heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < G2.heap_len && SMALLER(tree, G2.heap[j + 1], G2.heap[j])) + j++; + + /* Exit if v is smaller than both sons */ + if (SMALLER(tree, v, G2.heap[j])) + break; + + /* Exchange v with the smallest son */ + G2.heap[k] = G2.heap[j]; + k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + G2.heap[k] = v; +} + + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +static void gen_bitlen(tree_desc * desc) +{ + ct_data *tree = desc->dyn_tree; + const uint8_t *extra = desc->extra_bits; + int base = desc->extra_base; + int max_code = desc->max_code; + int max_length = desc->max_length; + ct_data *stree = desc->static_tree; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) + G2.bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[G2.heap[G2.heap_max]].Len = 0; /* root of the heap */ + + for (h = G2.heap_max + 1; h < HEAP_SIZE; h++) { + n = G2.heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) { + bits = max_length; + overflow++; + } + tree[n].Len = (ush) bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) + continue; /* not a leaf node */ + + G2.bl_count[bits]++; + xbits = 0; + if (n >= base) + xbits = extra[n - base]; + f = tree[n].Freq; + G2.opt_len += (ulg) f *(bits + xbits); + + if (stree) + G2.static_len += (ulg) f * (stree[n].Len + xbits); + } + if (overflow == 0) + return; + + Trace((stderr, "\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length - 1; + while (G2.bl_count[bits] == 0) + bits--; + G2.bl_count[bits]--; /* move one leaf down the tree */ + G2.bl_count[bits + 1] += 2; /* move one overflow item as its brother */ + G2.bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = G2.bl_count[bits]; + while (n != 0) { + m = G2.heap[--h]; + if (m > max_code) + continue; + if (tree[m].Len != (unsigned) bits) { + Trace((stderr, "code %d bits %d->%d\n", m, tree[m].Len, bits)); + G2.opt_len += ((int32_t) bits - tree[m].Len) * tree[m].Freq; + tree[m].Len = bits; + } + n--; + } + } +} + + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +static void gen_codes(ct_data * tree, int max_code) +{ + ush next_code[MAX_BITS + 1]; /* next code value for each bit length */ + ush code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + next_code[bits] = code = (code + G2.bl_count[bits - 1]) << 1; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert(code + G2.bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1, + "inconsistent bit counts"); + Tracev((stderr, "\ngen_codes: max_code %d ", max_code)); + + for (n = 0; n <= max_code; n++) { + int len = tree[n].Len; + + if (len == 0) + continue; + /* Now reverse the bits */ + tree[n].Code = bi_reverse(next_code[len]++, len); + + Tracec(tree != G2.static_ltree, + (stderr, "\nn %3d %c l %2d c %4x (%x) ", n, + (isgraph(n) ? n : ' '), len, tree[n].Code, + next_code[len] - 1)); + } +} + + +/* =========================================================================== + * Construct one Huffman tree and assigns the code bit strings and lengths. + * Update the total bit length for the current block. + * IN assertion: the field freq is set for all tree elements. + * OUT assertions: the fields len and code are set to the optimal bit length + * and corresponding code. The length opt_len is updated; static_len is + * also updated if stree is not null. The field max_code is set. + */ + +/* Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. */ + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + +#define PQREMOVE(tree, top) \ +do { \ + top = G2.heap[SMALLEST]; \ + G2.heap[SMALLEST] = G2.heap[G2.heap_len--]; \ + pqdownheap(tree, SMALLEST); \ +} while (0) + +static void build_tree(tree_desc * desc) +{ + ct_data *tree = desc->dyn_tree; + ct_data *stree = desc->static_tree; + int elems = desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node = elems; /* next internal node of the tree */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. + * heap[0] is not used. + */ + G2.heap_len = 0; + G2.heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + G2.heap[++G2.heap_len] = max_code = n; + G2.depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (G2.heap_len < 2) { + int new = G2.heap[++G2.heap_len] = (max_code < 2 ? ++max_code : 0); + + tree[new].Freq = 1; + G2.depth[new] = 0; + G2.opt_len--; + if (stree) + G2.static_len -= stree[new].Len; + /* new is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = G2.heap_len / 2; n >= 1; n--) + pqdownheap(tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + do { + PQREMOVE(tree, n); /* n = node of least frequency */ + m = G2.heap[SMALLEST]; /* m = node of next least frequency */ + + G2.heap[--G2.heap_max] = n; /* keep the nodes sorted by frequency */ + G2.heap[--G2.heap_max] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + G2.depth[node] = MAX(G2.depth[n], G2.depth[m]) + 1; + tree[n].Dad = tree[m].Dad = (ush) node; +#ifdef DUMP_BL_TREE + if (tree == G2.bl_tree) { + bb_error_msg("\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + G2.heap[SMALLEST] = node++; + pqdownheap(tree, SMALLEST); + + } while (G2.heap_len >= 2); + + G2.heap[--G2.heap_max] = G2.heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen((tree_desc *) desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes((ct_data *) tree, max_code); +} + + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. Updates opt_len to take into account the repeat + * counts. (The contribution of the bit length codes will be added later + * during the construction of bl_tree.) + */ +static void scan_tree(ct_data * tree, int max_code) +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) { + max_count = 138; + min_count = 3; + } + tree[max_code + 1].Len = 0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; + nextlen = tree[n + 1].Len; + if (++count < max_count && curlen == nextlen) + continue; + + if (count < min_count) { + G2.bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) + G2.bl_tree[curlen].Freq++; + G2.bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + G2.bl_tree[REPZ_3_10].Freq++; + } else { + G2.bl_tree[REPZ_11_138].Freq++; + } + count = 0; + prevlen = curlen; + + max_count = 7; + min_count = 4; + if (nextlen == 0) { + max_count = 138; + min_count = 3; + } else if (curlen == nextlen) { + max_count = 6; + min_count = 3; + } + } +} + + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +static void send_tree(ct_data * tree, int max_code) +{ + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + +/* tree[max_code+1].Len = -1; *//* guard already set */ + if (nextlen == 0) + max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; + nextlen = tree[n + 1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { + SEND_CODE(curlen, G2.bl_tree); + } while (--count); + } else if (curlen != 0) { + if (curlen != prevlen) { + SEND_CODE(curlen, G2.bl_tree); + count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + SEND_CODE(REP_3_6, G2.bl_tree); + send_bits(count - 3, 2); + } else if (count <= 10) { + SEND_CODE(REPZ_3_10, G2.bl_tree); + send_bits(count - 3, 3); + } else { + SEND_CODE(REPZ_11_138, G2.bl_tree); + send_bits(count - 11, 7); + } + count = 0; + prevlen = curlen; + if (nextlen == 0) { + max_count = 138; + min_count = 3; + } else if (curlen == nextlen) { + max_count = 6; + min_count = 3; + } else { + max_count = 7; + min_count = 4; + } + } +} + + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +static int build_bl_tree(void) +{ + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(G2.dyn_ltree, G2.l_desc.max_code); + scan_tree(G2.dyn_dtree, G2.d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(&G2.bl_desc); + /* opt_len now includes the length of the tree representations, except + * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES - 1; max_blindex >= 3; max_blindex--) { + if (G2.bl_tree[bl_order[max_blindex]].Len != 0) + break; + } + /* Update opt_len to include the bit length tree and counts */ + G2.opt_len += 3 * (max_blindex + 1) + 5 + 5 + 4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", G2.opt_len, G2.static_len)); + + return max_blindex; +} + + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +static void send_all_trees(int lcodes, int dcodes, int blcodes) +{ + int rank; /* index in bl_order */ + + Assert(lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert(lcodes <= L_CODES && dcodes <= D_CODES + && blcodes <= BL_CODES, "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(lcodes - 257, 5); /* not +255 as stated in appnote.txt */ + send_bits(dcodes - 1, 5); + send_bits(blcodes - 4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(G2.bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", G1.bits_sent)); + + send_tree((ct_data *) G2.dyn_ltree, lcodes - 1); /* send the literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", G1.bits_sent)); + + send_tree((ct_data *) G2.dyn_dtree, dcodes - 1); /* send the distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", G1.bits_sent)); +} + + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +static int ct_tally(int dist, int lc) +{ + G1.l_buf[G2.last_lit++] = lc; + if (dist == 0) { + /* lc is the unmatched char */ + G2.dyn_ltree[lc].Freq++; + } else { + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush) dist < (ush) MAX_DIST + && (ush) lc <= (ush) (MAX_MATCH - MIN_MATCH) + && (ush) D_CODE(dist) < (ush) D_CODES, "ct_tally: bad match" + ); + + G2.dyn_ltree[G2.length_code[lc] + LITERALS + 1].Freq++; + G2.dyn_dtree[D_CODE(dist)].Freq++; + + G1.d_buf[G2.last_dist++] = dist; + G2.flags |= G2.flag_bit; + } + G2.flag_bit <<= 1; + + /* Output the flags if they fill a byte: */ + if ((G2.last_lit & 7) == 0) { + G2.flag_buf[G2.last_flags++] = G2.flags; + G2.flags = 0; + G2.flag_bit = 1; + } + /* Try to guess if it is profitable to stop the current block here */ + if ((G2.last_lit & 0xfff) == 0) { + /* Compute an upper bound for the compressed length */ + ulg out_length = G2.last_lit * 8L; + ulg in_length = (ulg) G1.strstart - G1.block_start; + int dcode; + + for (dcode = 0; dcode < D_CODES; dcode++) { + out_length += G2.dyn_dtree[dcode].Freq * (5L + extra_dbits[dcode]); + } + out_length >>= 3; + Trace((stderr, + "\nlast_lit %u, last_dist %u, in %ld, out ~%ld(%ld%%) ", + G2.last_lit, G2.last_dist, in_length, out_length, + 100L - out_length * 100L / in_length)); + if (G2.last_dist < G2.last_lit / 2 && out_length < in_length / 2) + return 1; + } + return (G2.last_lit == LIT_BUFSIZE - 1 || G2.last_dist == DIST_BUFSIZE); + /* We avoid equality with LIT_BUFSIZE because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +static void compress_block(ct_data * ltree, ct_data * dtree) +{ + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned lx = 0; /* running index in l_buf */ + unsigned dx = 0; /* running index in d_buf */ + unsigned fx = 0; /* running index in flag_buf */ + uch flag = 0; /* current flags */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (G2.last_lit != 0) do { + if ((lx & 7) == 0) + flag = G2.flag_buf[fx++]; + lc = G1.l_buf[lx++]; + if ((flag & 1) == 0) { + SEND_CODE(lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr, " '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = G2.length_code[lc]; + SEND_CODE(code + LITERALS + 1, ltree); /* send the length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= G2.base_length[code]; + send_bits(lc, extra); /* send the extra length bits */ + } + dist = G1.d_buf[dx++]; + /* Here, dist is the match distance - 1 */ + code = D_CODE(dist); + Assert(code < D_CODES, "bad d_code"); + + SEND_CODE(code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= G2.base_dist[code]; + send_bits(dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + flag >>= 1; + } while (lx < G2.last_lit); + + SEND_CODE(END_BLOCK, ltree); +} + + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and output the encoded block to the zip file. This function + * returns the total compressed length for the file so far. + */ +static ulg flush_block(char *buf, ulg stored_len, int eof) +{ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex; /* index of last bit length code of non zero freq */ + + G2.flag_buf[G2.last_flags] = G2.flags; /* Save the flags for the last 8 items */ + + /* Construct the literal and distance trees */ + build_tree(&G2.l_desc); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", G2.opt_len, G2.static_len)); + + build_tree(&G2.d_desc); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", G2.opt_len, G2.static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(); + + /* Determine the best encoding. Compute first the block length in bytes */ + opt_lenb = (G2.opt_len + 3 + 7) >> 3; + static_lenb = (G2.static_len + 3 + 7) >> 3; + + Trace((stderr, + "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u dist %u ", + opt_lenb, G2.opt_len, static_lenb, G2.static_len, stored_len, + G2.last_lit, G2.last_dist)); + + if (static_lenb <= opt_lenb) + opt_lenb = static_lenb; + + /* If compression failed and this is the first and last block, + * and if the zip file can be seeked (to rewrite the local header), + * the whole file is transformed into a stored file: + */ + if (stored_len <= opt_lenb && eof && G2.compressed_len == 0L && seekable()) { + /* Since LIT_BUFSIZE <= 2*WSIZE, the input data must be there: */ + if (buf == NULL) + bb_error_msg("block vanished"); + + copy_block(buf, (unsigned) stored_len, 0); /* without header */ + G2.compressed_len = stored_len << 3; + + } else if (stored_len + 4 <= opt_lenb && buf != NULL) { + /* 4: two words for the lengths */ + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + send_bits((STORED_BLOCK << 1) + eof, 3); /* send block type */ + G2.compressed_len = (G2.compressed_len + 3 + 7) & ~7L; + G2.compressed_len += (stored_len + 4) << 3; + + copy_block(buf, (unsigned) stored_len, 1); /* with header */ + + } else if (static_lenb == opt_lenb) { + send_bits((STATIC_TREES << 1) + eof, 3); + compress_block((ct_data *) G2.static_ltree, (ct_data *) G2.static_dtree); + G2.compressed_len += 3 + G2.static_len; + } else { + send_bits((DYN_TREES << 1) + eof, 3); + send_all_trees(G2.l_desc.max_code + 1, G2.d_desc.max_code + 1, + max_blindex + 1); + compress_block((ct_data *) G2.dyn_ltree, (ct_data *) G2.dyn_dtree); + G2.compressed_len += 3 + G2.opt_len; + } + Assert(G2.compressed_len == G1.bits_sent, "bad compressed size"); + init_block(); + + if (eof) { + bi_windup(); + G2.compressed_len += 7; /* align on byte boundary */ + } + Tracev((stderr, "\ncomprlen %lu(%lu) ", G2.compressed_len >> 3, + G2.compressed_len - 7 * eof)); + + return G2.compressed_len >> 3; +} + + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(h, c) (h = (((h)<= 0L \ + ? (char*)&G1.window[(unsigned)G1.block_start] \ + : (char*)NULL, \ + (ulg)G1.strstart - G1.block_start, \ + (eof) \ + ) + +/* Insert string s in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * IN assertion: all calls to to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of s are valid + * (except for the last MIN_MATCH-1 bytes of the input file). */ +#define INSERT_STRING(s, match_head) \ +do { \ + UPDATE_HASH(G1.ins_h, G1.window[(s) + MIN_MATCH-1]); \ + G1.prev[(s) & WMASK] = match_head = head[G1.ins_h]; \ + head[G1.ins_h] = (s); \ +} while (0) + +static ulg deflate(void) +{ + IPos hash_head; /* head of hash chain */ + IPos prev_match; /* previous match */ + int flush; /* set if current block must be flushed */ + int match_available = 0; /* set if previous match exists */ + unsigned match_length = MIN_MATCH - 1; /* length of best match */ + + /* Process the input block. */ + while (G1.lookahead != 0) { + /* Insert the string window[strstart .. strstart+2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + INSERT_STRING(G1.strstart, hash_head); + + /* Find the longest match, discarding those <= prev_length. + */ + G1.prev_length = match_length; + prev_match = G1.match_start; + match_length = MIN_MATCH - 1; + + if (hash_head != 0 && G1.prev_length < max_lazy_match + && G1.strstart - hash_head <= MAX_DIST + ) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + match_length = longest_match(hash_head); + /* longest_match() sets match_start */ + if (match_length > G1.lookahead) + match_length = G1.lookahead; + + /* Ignore a length 3 match if it is too distant: */ + if (match_length == MIN_MATCH && G1.strstart - G1.match_start > TOO_FAR) { + /* If prev_match is also MIN_MATCH, G1.match_start is garbage + * but we will ignore the current match anyway. + */ + match_length--; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (G1.prev_length >= MIN_MATCH && match_length <= G1.prev_length) { + check_match(G1.strstart - 1, prev_match, G1.prev_length); + flush = ct_tally(G1.strstart - 1 - prev_match, G1.prev_length - MIN_MATCH); + + /* Insert in hash table all strings up to the end of the match. + * strstart-1 and strstart are already inserted. + */ + G1.lookahead -= G1.prev_length - 1; + G1.prev_length -= 2; + do { + G1.strstart++; + INSERT_STRING(G1.strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. If lookahead < MIN_MATCH + * these bytes are garbage, but it does not matter since the + * next lookahead bytes will always be emitted as literals. + */ + } while (--G1.prev_length != 0); + match_available = 0; + match_length = MIN_MATCH - 1; + G1.strstart++; + if (flush) { + FLUSH_BLOCK(0); + G1.block_start = G1.strstart; + } + } else if (match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr, "%c", G1.window[G1.strstart - 1])); + if (ct_tally(0, G1.window[G1.strstart - 1])) { + FLUSH_BLOCK(0); + G1.block_start = G1.strstart; + } + G1.strstart++; + G1.lookahead--; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + match_available = 1; + G1.strstart++; + G1.lookahead--; + } + Assert(G1.strstart <= G1.isize && lookahead <= G1.isize, "a bit too far"); + + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + while (G1.lookahead < MIN_LOOKAHEAD && !G1.eofile) + fill_window(); + } + if (match_available) + ct_tally(0, G1.window[G1.strstart - 1]); + + return FLUSH_BLOCK(1); /* eof */ +} + + +/* =========================================================================== + * Initialize the bit string routines. + */ +static void bi_init(void) +{ + G1.bi_buf = 0; + G1.bi_valid = 0; +#ifdef DEBUG + G1.bits_sent = 0L; +#endif +} + + +/* =========================================================================== + * Initialize the "longest match" routines for a new file + */ +static void lm_init(ush * flagsp) +{ + unsigned j; + + /* Initialize the hash table. */ + memset(head, 0, HASH_SIZE * sizeof(*head)); + /* prev will be initialized on the fly */ + + /* speed options for the general purpose bit flag */ + *flagsp |= 2; /* FAST 4, SLOW 2 */ + /* ??? reduce max_chain_length for binary files */ + + G1.strstart = 0; + G1.block_start = 0L; + + G1.lookahead = file_read(G1.window, + sizeof(int) <= 2 ? (unsigned) WSIZE : 2 * WSIZE); + + if (G1.lookahead == 0 || G1.lookahead == (unsigned) -1) { + G1.eofile = 1; + G1.lookahead = 0; + return; + } + G1.eofile = 0; + /* Make sure that we always have enough lookahead. This is important + * if input comes from a device such as a tty. + */ + while (G1.lookahead < MIN_LOOKAHEAD && !G1.eofile) + fill_window(); + + G1.ins_h = 0; + for (j = 0; j < MIN_MATCH - 1; j++) + UPDATE_HASH(G1.ins_h, G1.window[j]); + /* If lookahead < MIN_MATCH, ins_h is garbage, but this is + * not important since only literal bytes will be emitted. + */ +} + + +/* =========================================================================== + * Allocate the match buffer, initialize the various tables and save the + * location of the internal file attribute (ascii/binary) and method + * (DEFLATE/STORE). + * One callsite in zip() + */ +static void ct_init(void) +{ + int n; /* iterates over tree elements */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + + G2.compressed_len = 0L; + +#ifdef NOT_NEEDED + if (G2.static_dtree[0].Len != 0) + return; /* ct_init already called */ +#endif + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES - 1; code++) { + G2.base_length[code] = length; + for (n = 0; n < (1 << extra_lbits[code]); n++) { + G2.length_code[length++] = code; + } + } + Assert(length == 256, "ct_init: length != 256"); + /* Note that the length 255 (match length 258) can be represented + * in two different ways: code 284 + 5 bits or code 285, so we + * overwrite length_code[255] to use the best encoding: + */ + G2.length_code[length - 1] = code; + + /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ + dist = 0; + for (code = 0; code < 16; code++) { + G2.base_dist[code] = dist; + for (n = 0; n < (1 << extra_dbits[code]); n++) { + G2.dist_code[dist++] = code; + } + } + Assert(dist == 256, "ct_init: dist != 256"); + dist >>= 7; /* from now on, all distances are divided by 128 */ + for (; code < D_CODES; code++) { + G2.base_dist[code] = dist << 7; + for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) { + G2.dist_code[256 + dist++] = code; + } + } + Assert(dist == 256, "ct_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + /* already zeroed - it's in bss + for (n = 0; n <= MAX_BITS; n++) + G2.bl_count[n] = 0; */ + + n = 0; + while (n <= 143) { + G2.static_ltree[n++].Len = 8; + G2.bl_count[8]++; + } + while (n <= 255) { + G2.static_ltree[n++].Len = 9; + G2.bl_count[9]++; + } + while (n <= 279) { + G2.static_ltree[n++].Len = 7; + G2.bl_count[7]++; + } + while (n <= 287) { + G2.static_ltree[n++].Len = 8; + G2.bl_count[8]++; + } + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *) G2.static_ltree, L_CODES + 1); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + G2.static_dtree[n].Len = 5; + G2.static_dtree[n].Code = bi_reverse(n, 5); + } + + /* Initialize the first block of the first file: */ + init_block(); +} + + +/* =========================================================================== + * Deflate in to out. + * IN assertions: the input and output buffers are cleared. + */ + +static void zip(ulg time_stamp) +{ + ush deflate_flags = 0; /* pkzip -es, -en or -ex equivalent */ + + G1.outcnt = 0; + + /* Write the header to the gzip file. See algorithm.doc for the format */ + /* magic header for gzip files: 1F 8B */ + /* compression method: 8 (DEFLATED) */ + /* general flags: 0 */ + put_32bit(0x00088b1f); + put_32bit(time_stamp); + + /* Write deflated file to zip file */ + G1.crc = ~0; + + bi_init(); + ct_init(); + lm_init(&deflate_flags); + + put_8bit(deflate_flags); /* extra flags */ + put_8bit(3); /* OS identifier = 3 (Unix) */ + + deflate(); + + /* Write the crc and uncompressed size */ + put_32bit(~G1.crc); + put_32bit(G1.isize); + + flush_outbuf(); +} + + +/* ======================================================================== */ +static +char* make_new_name_gzip(char *filename) +{ + return xasprintf("%s.gz", filename); +} + +static +USE_DESKTOP(long long) int pack_gzip(unpack_info_t *info UNUSED_PARAM) +{ + struct stat s; + + clear_bufs(); + s.st_ctime = 0; + fstat(STDIN_FILENO, &s); + zip(s.st_ctime); + return 0; +} + +/* + * Linux kernel build uses gzip -d -n. We accept and ignore it. + * Man page says: + * -n --no-name + * gzip: do not save the original file name and time stamp. + * (The original name is always saved if the name had to be truncated.) + * gunzip: do not restore the original file name/time even if present + * (remove only the gzip suffix from the compressed file name). + * This option is the default when decompressing. + * -N --name + * gzip: always save the original file name and time stamp (this is the default) + * gunzip: restore the original file name and time stamp if present. + */ + +int gzip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +#if ENABLE_GUNZIP +int gzip_main(int argc, char **argv) +#else +int gzip_main(int argc UNUSED_PARAM, char **argv) +#endif +{ + unsigned opt; + + /* Must match bbunzip's constants OPT_STDOUT, OPT_FORCE! */ + opt = getopt32(argv, "cfv" USE_GUNZIP("dt") "q123456789n"); +#if ENABLE_GUNZIP /* gunzip_main may not be visible... */ + if (opt & 0x18) // -d and/or -t + return gunzip_main(argc, argv); +#endif + option_mask32 &= 0x7; /* ignore -q, -0..9 */ + //if (opt & 0x1) // -c + //if (opt & 0x2) // -f + //if (opt & 0x4) // -v + argv += optind; + + SET_PTR_TO_GLOBALS(xzalloc(sizeof(struct globals) + sizeof(struct globals2)) + + sizeof(struct globals)); + barrier(); + G2.l_desc.dyn_tree = G2.dyn_ltree; + G2.l_desc.static_tree = G2.static_ltree; + G2.l_desc.extra_bits = extra_lbits; + G2.l_desc.extra_base = LITERALS + 1; + G2.l_desc.elems = L_CODES; + G2.l_desc.max_length = MAX_BITS; + //G2.l_desc.max_code = 0; + + G2.d_desc.dyn_tree = G2.dyn_dtree; + G2.d_desc.static_tree = G2.static_dtree; + G2.d_desc.extra_bits = extra_dbits; + //G2.d_desc.extra_base = 0; + G2.d_desc.elems = D_CODES; + G2.d_desc.max_length = MAX_BITS; + //G2.d_desc.max_code = 0; + + G2.bl_desc.dyn_tree = G2.bl_tree; + //G2.bl_desc.static_tree = NULL; + G2.bl_desc.extra_bits = extra_blbits, + //G2.bl_desc.extra_base = 0; + G2.bl_desc.elems = BL_CODES; + G2.bl_desc.max_length = MAX_BL_BITS; + //G2.bl_desc.max_code = 0; + + /* Allocate all global buffers (for DYN_ALLOC option) */ + ALLOC(uch, G1.l_buf, INBUFSIZ); + ALLOC(uch, G1.outbuf, OUTBUFSIZ); + ALLOC(ush, G1.d_buf, DIST_BUFSIZE); + ALLOC(uch, G1.window, 2L * WSIZE); + ALLOC(ush, G1.prev, 1L << BITS); + + /* Initialise the CRC32 table */ + G1.crc_32_tab = crc32_filltable(NULL, 0); + + return bbunpack(argv, make_new_name_gzip, pack_gzip); +} diff --git a/archival/libunarchive/Kbuild b/archival/libunarchive/Kbuild new file mode 100644 index 0000000..364f917 --- /dev/null +++ b/archival/libunarchive/Kbuild @@ -0,0 +1,51 @@ +# Makefile for busybox +# +# Copyright (C) 1999-2004 by Erik Andersen +# +# Licensed under the GPL v2 or later, see the file LICENSE in this tarball. + +lib-y:= \ +\ + data_skip.o \ + data_extract_all.o \ + data_extract_to_stdout.o \ + data_extract_to_buffer.o \ +\ + filter_accept_all.o \ + filter_accept_list.o \ + filter_accept_reject_list.o \ +\ + header_skip.o \ + header_list.o \ + header_verbose_list.o \ +\ + seek_by_read.o \ + seek_by_jump.o \ +\ + data_align.o \ + find_list_entry.o \ + init_handle.o + +DPKG_FILES:= \ + get_header_ar.o \ + unpack_ar_archive.o \ + get_header_tar.o \ + filter_accept_list_reassign.o + +lib-$(CONFIG_AR) += get_header_ar.o unpack_ar_archive.o +lib-$(CONFIG_BUNZIP2) += decompress_bunzip2.o +lib-$(CONFIG_UNLZMA) += decompress_unlzma.o +lib-$(CONFIG_CPIO) += get_header_cpio.o +lib-$(CONFIG_DPKG) += $(DPKG_FILES) +lib-$(CONFIG_DPKG_DEB) += $(DPKG_FILES) +lib-$(CONFIG_GUNZIP) += decompress_unzip.o +lib-$(CONFIG_RPM2CPIO) += decompress_unzip.o get_header_cpio.o +lib-$(CONFIG_RPM) += open_transformer.o decompress_unzip.o get_header_cpio.o +lib-$(CONFIG_TAR) += get_header_tar.o +lib-$(CONFIG_UNCOMPRESS) += decompress_uncompress.o +lib-$(CONFIG_UNZIP) += decompress_unzip.o +lib-$(CONFIG_FEATURE_SEAMLESS_Z) += open_transformer.o decompress_uncompress.o +lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_unzip.o get_header_tar_gz.o +lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o get_header_tar_bz2.o +lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o get_header_tar_lzma.o +lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o diff --git a/archival/libunarchive/data_align.c b/archival/libunarchive/data_align.c new file mode 100644 index 0000000..9f2e843 --- /dev/null +++ b/archival/libunarchive/data_align.c @@ -0,0 +1,15 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +void FAST_FUNC data_align(archive_handle_t *archive_handle, unsigned boundary) +{ + unsigned skip_amount = (boundary - (archive_handle->offset % boundary)) % boundary; + + archive_handle->seek(archive_handle, skip_amount); + archive_handle->offset += skip_amount; +} diff --git a/archival/libunarchive/data_extract_all.c b/archival/libunarchive/data_extract_all.c new file mode 100644 index 0000000..8b1ee2a --- /dev/null +++ b/archival/libunarchive/data_extract_all.c @@ -0,0 +1,148 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + int dst_fd; + int res; + + if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) { + char *name = xstrdup(file_header->name); + bb_make_directory(dirname(name), -1, FILEUTILS_RECUR); + free(name); + } + + /* Check if the file already exists */ + if (archive_handle->ah_flags & ARCHIVE_EXTRACT_UNCONDITIONAL) { + /* Remove the entry if it exists */ + if (((file_header->mode & S_IFMT) != S_IFDIR) + && (unlink(file_header->name) == -1) + && (errno != ENOENT) + ) { + bb_perror_msg_and_die("cannot remove old file %s", + file_header->name); + } + } + else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) { + /* Remove the existing entry if its older than the extracted entry */ + struct stat statbuf; + if (lstat(file_header->name, &statbuf) == -1) { + if (errno != ENOENT) { + bb_perror_msg_and_die("cannot stat old file"); + } + } + else if (statbuf.st_mtime <= file_header->mtime) { + if (!(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) { + bb_error_msg("%s not created: newer or " + "same age file exists", file_header->name); + } + data_skip(archive_handle); + return; + } + else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) { + bb_perror_msg_and_die("cannot remove old file %s", + file_header->name); + } + } + + /* Handle hard links separately + * We identified hard links as regular files of size 0 with a symlink */ + if (S_ISREG(file_header->mode) && (file_header->link_target) + && (file_header->size == 0) + ) { + /* hard link */ + res = link(file_header->link_target, file_header->name); + if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) { + bb_perror_msg("cannot create %slink " + "from %s to %s", "hard", + file_header->name, + file_header->link_target); + } + } else { + /* Create the filesystem entry */ + switch (file_header->mode & S_IFMT) { + case S_IFREG: { + /* Regular file */ + dst_fd = xopen3(file_header->name, O_WRONLY | O_CREAT | O_EXCL, + file_header->mode); + bb_copyfd_exact_size(archive_handle->src_fd, dst_fd, file_header->size); + close(dst_fd); + break; + } + case S_IFDIR: + res = mkdir(file_header->name, file_header->mode); + if ((res == -1) + && (errno != EISDIR) /* btw, Linux doesn't return this */ + && (errno != EEXIST) + && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + ) { + bb_perror_msg("cannot make dir %s", file_header->name); + } + break; + case S_IFLNK: + /* Symlink */ + res = symlink(file_header->link_target, file_header->name); + if ((res == -1) + && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + ) { + bb_perror_msg("cannot create %slink " + "from %s to %s", "sym", + file_header->name, + file_header->link_target); + } + break; + case S_IFSOCK: + case S_IFBLK: + case S_IFCHR: + case S_IFIFO: + res = mknod(file_header->name, file_header->mode, file_header->device); + if ((res == -1) + && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + ) { + bb_perror_msg("cannot create node %s", file_header->name); + } + break; + default: + bb_error_msg_and_die("unrecognized file type"); + } + } + + if (!(archive_handle->ah_flags & ARCHIVE_NOPRESERVE_OWN)) { +#if ENABLE_FEATURE_TAR_UNAME_GNAME + uid_t uid = file_header->uid; + gid_t gid = file_header->gid; + + if (file_header->uname) { + struct passwd *pwd = getpwnam(file_header->uname); + if (pwd) uid = pwd->pw_uid; + } + if (file_header->gname) { + struct group *grp = getgrnam(file_header->gname); + if (grp) gid = grp->gr_gid; + } + lchown(file_header->name, uid, gid); +#else + lchown(file_header->name, file_header->uid, file_header->gid); +#endif + } + if ((file_header->mode & S_IFMT) != S_IFLNK) { + /* uclibc has no lchmod, glibc is even stranger - + * it has lchmod which seems to do nothing! + * so we use chmod... */ + if (!(archive_handle->ah_flags & ARCHIVE_NOPRESERVE_PERM)) { + chmod(file_header->name, file_header->mode); + } + /* same for utime */ + if (archive_handle->ah_flags & ARCHIVE_PRESERVE_DATE) { + struct utimbuf t; + t.actime = t.modtime = file_header->mtime; + utime(file_header->name, &t); + } + } +} diff --git a/archival/libunarchive/data_extract_to_buffer.c b/archival/libunarchive/data_extract_to_buffer.c new file mode 100644 index 0000000..1d74e03 --- /dev/null +++ b/archival/libunarchive/data_extract_to_buffer.c @@ -0,0 +1,17 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright 2002 Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +void FAST_FUNC data_extract_to_buffer(archive_handle_t *archive_handle) +{ + unsigned int size = archive_handle->file_header->size; + + archive_handle->buffer = xzalloc(size + 1); + xread(archive_handle->src_fd, archive_handle->buffer, size); +} diff --git a/archival/libunarchive/data_extract_to_stdout.c b/archival/libunarchive/data_extract_to_stdout.c new file mode 100644 index 0000000..a3efea1 --- /dev/null +++ b/archival/libunarchive/data_extract_to_stdout.c @@ -0,0 +1,14 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +void FAST_FUNC data_extract_to_stdout(archive_handle_t *archive_handle) +{ + bb_copyfd_exact_size(archive_handle->src_fd, + STDOUT_FILENO, + archive_handle->file_header->size); +} diff --git a/archival/libunarchive/data_skip.c b/archival/libunarchive/data_skip.c new file mode 100644 index 0000000..438750f --- /dev/null +++ b/archival/libunarchive/data_skip.c @@ -0,0 +1,12 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +void FAST_FUNC data_skip(archive_handle_t *archive_handle) +{ + archive_handle->seek(archive_handle, archive_handle->file_header->size); +} diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c new file mode 100644 index 0000000..b53720f --- /dev/null +++ b/archival/libunarchive/decompress_bunzip2.c @@ -0,0 +1,724 @@ +/* vi: set sw=4 ts=4: */ +/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). + + Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), + which also acknowledges contributions by Mike Burrows, David Wheeler, + Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, + Robert Sedgewick, and Jon L. Bentley. + + Licensed under GPLv2 or later, see file LICENSE in this tarball for details. +*/ + +/* + Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). + + More efficient reading of Huffman codes, a streamlined read_bunzip() + function, and various other tweaks. In (limited) tests, approximately + 20% faster than bzcat on x86 and about 10% faster on arm. + + Note that about 2/3 of the time is spent in read_unzip() reversing + the Burrows-Wheeler transformation. Much of that time is delay + resulting from cache misses. + + I would ask that anyone benefiting from this work, especially those + using it in commercial products, consider making a donation to my local + non-profit hospice organization (www.hospiceacadiana.com) in the name of + the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003. + + Manuel + */ + +#include "libbb.h" +#include "unarchive.h" + +/* Constants for Huffman coding */ +#define MAX_GROUPS 6 +#define GROUP_SIZE 50 /* 64 would have been more efficient */ +#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ +#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ +#define SYMBOL_RUNA 0 +#define SYMBOL_RUNB 1 + +/* Status return values */ +#define RETVAL_OK 0 +#define RETVAL_LAST_BLOCK (-1) +#define RETVAL_NOT_BZIP_DATA (-2) +#define RETVAL_UNEXPECTED_INPUT_EOF (-3) +#define RETVAL_SHORT_WRITE (-4) +#define RETVAL_DATA_ERROR (-5) +#define RETVAL_OUT_OF_MEMORY (-6) +#define RETVAL_OBSOLETE_INPUT (-7) + +/* Other housekeeping constants */ +#define IOBUF_SIZE 4096 + +/* This is what we know about each Huffman coding group */ +struct group_data { + /* We have an extra slot at the end of limit[] for a sentinel value. */ + int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS]; + int minLen, maxLen; +}; + +/* Structure holding all the housekeeping data, including IO buffers and + * memory that persists between calls to bunzip + * Found the most used member: + * cat this_file.c | sed -e 's/"/ /g' -e "s/'/ /g" | xargs -n1 \ + * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER + * and moved it (inbufBitCount) to offset 0. + */ +struct bunzip_data { + /* I/O tracking data (file handles, buffers, positions, etc.) */ + unsigned inbufBitCount, inbufBits; + int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; + unsigned char *inbuf /*,*outbuf*/; + + /* State for interrupting output loop */ + int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent; + + /* The CRC values stored in the block header and calculated from the data */ + uint32_t headerCRC, totalCRC, writeCRC; + + /* Intermediate buffer and its size (in bytes) */ + unsigned *dbuf, dbufSize; + + /* For I/O error handling */ + jmp_buf jmpbuf; + + /* Big things go last (register-relative addressing can be larger for big offsets) */ + uint32_t crc32Table[256]; + unsigned char selectors[32768]; /* nSelectors=15 bits */ + struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ +}; +/* typedef struct bunzip_data bunzip_data; -- done in .h file */ + + +/* Return the next nnn bits of input. All reads from the compressed input + are done through this function. All reads are big endian */ + +static unsigned get_bits(bunzip_data *bd, int bits_wanted) +{ + unsigned bits = 0; + + /* If we need to get more data from the byte buffer, do so. (Loop getting + one byte at a time to enforce endianness and avoid unaligned access.) */ + while ((int)(bd->inbufBitCount) < bits_wanted) { + + /* If we need to read more data from file into byte buffer, do so */ + if (bd->inbufPos == bd->inbufCount) { + /* if "no input fd" case: in_fd == -1, read fails, we jump */ + bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE); + if (bd->inbufCount <= 0) + longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF); + bd->inbufPos = 0; + } + + /* Avoid 32-bit overflow (dump bit buffer to top of output) */ + if (bd->inbufBitCount >= 24) { + bits = bd->inbufBits & ((1 << bd->inbufBitCount) - 1); + bits_wanted -= bd->inbufBitCount; + bits <<= bits_wanted; + bd->inbufBitCount = 0; + } + + /* Grab next 8 bits of input from buffer. */ + bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + } + + /* Calculate result */ + bd->inbufBitCount -= bits_wanted; + bits |= (bd->inbufBits >> bd->inbufBitCount) & ((1 << bits_wanted) - 1); + + return bits; +} + +/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */ +static int get_next_block(bunzip_data *bd) +{ + struct group_data *hufGroup; + int dbufCount, nextSym, dbufSize, groupCount, *base, *limit, selector, + i, j, k, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; + unsigned char uc, symToByte[256], mtfSymbol[256], *selectors; + unsigned *dbuf, origPtr; + + dbuf = bd->dbuf; + dbufSize = bd->dbufSize; + selectors = bd->selectors; + + /* Reset longjmp I/O error handling */ + i = setjmp(bd->jmpbuf); + if (i) return i; + + /* Read in header signature and CRC, then validate signature. + (last block signature means CRC is for whole file, return now) */ + i = get_bits(bd, 24); + j = get_bits(bd, 24); + bd->headerCRC = get_bits(bd, 32); + if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK; + if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA; + + /* We can add support for blockRandomised if anybody complains. There was + some code for this in busybox 1.0.0-pre3, but nobody ever noticed that + it didn't actually work. */ + if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; + origPtr = get_bits(bd, 24); + if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR; + + /* mapping table: if some byte values are never used (encoding things + like ascii text), the compression code removes the gaps to have fewer + symbols to deal with, and writes a sparse bitfield indicating which + values were present. We make a translation table to convert the symbols + back to the corresponding bytes. */ + t = get_bits(bd, 16); + symTotal = 0; + for (i = 0; i < 16; i++) { + if (t & (1 << (15-i))) { + k = get_bits(bd, 16); + for (j = 0; j < 16; j++) + if (k & (1 << (15-j))) + symToByte[symTotal++] = (16*i) + j; + } + } + + /* How many different Huffman coding groups does this block use? */ + groupCount = get_bits(bd, 3); + if (groupCount < 2 || groupCount > MAX_GROUPS) + return RETVAL_DATA_ERROR; + + /* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding + group. Read in the group selector list, which is stored as MTF encoded + bit runs. (MTF=Move To Front, as each value is used it's moved to the + start of the list.) */ + nSelectors = get_bits(bd, 15); + if (!nSelectors) return RETVAL_DATA_ERROR; + for (i = 0; i < groupCount; i++) mtfSymbol[i] = i; + for (i = 0; i < nSelectors; i++) { + + /* Get next value */ + for (j = 0; get_bits(bd, 1); j++) + if (j >= groupCount) return RETVAL_DATA_ERROR; + + /* Decode MTF to get the next selector */ + uc = mtfSymbol[j]; + for (;j;j--) mtfSymbol[j] = mtfSymbol[j-1]; + mtfSymbol[0] = selectors[i] = uc; + } + + /* Read the Huffman coding tables for each group, which code for symTotal + literal symbols, plus two run symbols (RUNA, RUNB) */ + symCount = symTotal + 2; + for (j = 0; j < groupCount; j++) { + unsigned char length[MAX_SYMBOLS]; + /* 8 bits is ALMOST enough for temp[], see below */ + unsigned temp[MAX_HUFCODE_BITS+1]; + int minLen, maxLen, pp; + + /* Read Huffman code lengths for each symbol. They're stored in + a way similar to mtf; record a starting value for the first symbol, + and an offset from the previous value for everys symbol after that. + (Subtracting 1 before the loop and then adding it back at the end is + an optimization that makes the test inside the loop simpler: symbol + length 0 becomes negative, so an unsigned inequality catches it.) */ + t = get_bits(bd, 5) - 1; + for (i = 0; i < symCount; i++) { + for (;;) { + if ((unsigned)t > (MAX_HUFCODE_BITS-1)) + return RETVAL_DATA_ERROR; + + /* If first bit is 0, stop. Else second bit indicates whether + to increment or decrement the value. Optimization: grab 2 + bits and unget the second if the first was 0. */ + k = get_bits(bd, 2); + if (k < 2) { + bd->inbufBitCount++; + break; + } + + /* Add one if second bit 1, else subtract 1. Avoids if/else */ + t += (((k+1) & 2) - 1); + } + + /* Correct for the initial -1, to get the final symbol length */ + length[i] = t + 1; + } + + /* Find largest and smallest lengths in this group */ + minLen = maxLen = length[0]; + for (i = 1; i < symCount; i++) { + if (length[i] > maxLen) maxLen = length[i]; + else if (length[i] < minLen) minLen = length[i]; + } + + /* Calculate permute[], base[], and limit[] tables from length[]. + * + * permute[] is the lookup table for converting Huffman coded symbols + * into decoded symbols. base[] is the amount to subtract from the + * value of a Huffman symbol of a given length when using permute[]. + * + * limit[] indicates the largest numerical value a symbol with a given + * number of bits can have. This is how the Huffman codes can vary in + * length: each code with a value>limit[length] needs another bit. + */ + hufGroup = bd->groups + j; + hufGroup->minLen = minLen; + hufGroup->maxLen = maxLen; + + /* Note that minLen can't be smaller than 1, so we adjust the base + and limit array pointers so we're not always wasting the first + entry. We do this again when using them (during symbol decoding).*/ + base = hufGroup->base - 1; + limit = hufGroup->limit - 1; + + /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ + pp = 0; + for (i = minLen; i <= maxLen; i++) { + temp[i] = limit[i] = 0; + for (t = 0; t < symCount; t++) + if (length[t] == i) + hufGroup->permute[pp++] = t; + } + + /* Count symbols coded for at each bit length */ + /* NB: in pathological cases, temp[8] can end ip being 256. + * That's why uint8_t is too small for temp[]. */ + for (i = 0; i < symCount; i++) temp[length[i]]++; + + /* Calculate limit[] (the largest symbol-coding value at each bit + * length, which is (previous limit<<1)+symbols at this level), and + * base[] (number of symbols to ignore at each bit length, which is + * limit minus the cumulative count of symbols coded for already). */ + pp = t = 0; + for (i = minLen; i < maxLen; i++) { + pp += temp[i]; + + /* We read the largest possible symbol size and then unget bits + after determining how many we need, and those extra bits could + be set to anything. (They're noise from future symbols.) At + each level we're really only interested in the first few bits, + so here we set all the trailing to-be-ignored bits to 1 so they + don't affect the value>limit[length] comparison. */ + limit[i] = (pp << (maxLen - i)) - 1; + pp <<= 1; + t += temp[i]; + base[i+1] = pp - t; + } + limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ + limit[maxLen] = pp + temp[maxLen] - 1; + base[minLen] = 0; + } + + /* We've finished reading and digesting the block header. Now read this + block's Huffman coded symbols from the file and undo the Huffman coding + and run length encoding, saving the result into dbuf[dbufCount++] = uc */ + + /* Initialize symbol occurrence counters and symbol Move To Front table */ + memset(byteCount, 0, sizeof(byteCount)); /* smaller, maybe slower? */ + for (i = 0; i < 256; i++) { + //byteCount[i] = 0; + mtfSymbol[i] = (unsigned char)i; + } + + /* Loop through compressed symbols. */ + + runPos = dbufCount = selector = 0; + for (;;) { + + /* Fetch next Huffman coding group from list. */ + symCount = GROUP_SIZE - 1; + if (selector >= nSelectors) return RETVAL_DATA_ERROR; + hufGroup = bd->groups + selectors[selector++]; + base = hufGroup->base - 1; + limit = hufGroup->limit - 1; + continue_this_group: + + /* Read next Huffman-coded symbol. */ + + /* Note: It is far cheaper to read maxLen bits and back up than it is + to read minLen bits and then an additional bit at a time, testing + as we go. Because there is a trailing last block (with file CRC), + there is no danger of the overread causing an unexpected EOF for a + valid compressed file. As a further optimization, we do the read + inline (falling back to a call to get_bits if the buffer runs + dry). The following (up to got_huff_bits:) is equivalent to + j = get_bits(bd, hufGroup->maxLen); + */ + while ((int)(bd->inbufBitCount) < hufGroup->maxLen) { + if (bd->inbufPos == bd->inbufCount) { + j = get_bits(bd, hufGroup->maxLen); + goto got_huff_bits; + } + bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + }; + bd->inbufBitCount -= hufGroup->maxLen; + j = (bd->inbufBits >> bd->inbufBitCount) & ((1 << hufGroup->maxLen) - 1); + + got_huff_bits: + + /* Figure how how many bits are in next symbol and unget extras */ + i = hufGroup->minLen; + while (j > limit[i]) ++i; + bd->inbufBitCount += (hufGroup->maxLen - i); + + /* Huffman decode value to get nextSym (with bounds checking) */ + if (i > hufGroup->maxLen) + return RETVAL_DATA_ERROR; + j = (j >> (hufGroup->maxLen - i)) - base[i]; + if ((unsigned)j >= MAX_SYMBOLS) + return RETVAL_DATA_ERROR; + nextSym = hufGroup->permute[j]; + + /* We have now decoded the symbol, which indicates either a new literal + byte, or a repeated run of the most recent literal byte. First, + check if nextSym indicates a repeated run, and if so loop collecting + how many times to repeat the last literal. */ + if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ + + /* If this is the start of a new run, zero out counter */ + if (!runPos) { + runPos = 1; + t = 0; + } + + /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at + each bit position, add 1 or 2 instead. For example, + 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2. + You can make any bit pattern that way using 1 less symbol than + the basic or 0/1 method (except all bits 0, which would use no + symbols, but a run of length 0 doesn't mean anything in this + context). Thus space is saved. */ + t += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ + if (runPos < dbufSize) runPos <<= 1; + goto end_of_huffman_loop; + } + + /* When we hit the first non-run symbol after a run, we now know + how many times to repeat the last literal, so append that many + copies to our buffer of decoded symbols (dbuf) now. (The last + literal used is the one at the head of the mtfSymbol array.) */ + if (runPos) { + runPos = 0; + if (dbufCount + t >= dbufSize) return RETVAL_DATA_ERROR; + + uc = symToByte[mtfSymbol[0]]; + byteCount[uc] += t; + while (t--) dbuf[dbufCount++] = uc; + } + + /* Is this the terminating symbol? */ + if (nextSym > symTotal) break; + + /* At this point, nextSym indicates a new literal character. Subtract + one to get the position in the MTF array at which this literal is + currently to be found. (Note that the result can't be -1 or 0, + because 0 and 1 are RUNA and RUNB. But another instance of the + first symbol in the mtf array, position 0, would have been handled + as part of a run above. Therefore 1 unused mtf position minus + 2 non-literal nextSym values equals -1.) */ + if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR; + i = nextSym - 1; + uc = mtfSymbol[i]; + + /* Adjust the MTF array. Since we typically expect to move only a + * small number of symbols, and are bound by 256 in any case, using + * memmove here would typically be bigger and slower due to function + * call overhead and other assorted setup costs. */ + do { + mtfSymbol[i] = mtfSymbol[i-1]; + } while (--i); + mtfSymbol[0] = uc; + uc = symToByte[uc]; + + /* We have our literal byte. Save it into dbuf. */ + byteCount[uc]++; + dbuf[dbufCount++] = (unsigned)uc; + + /* Skip group initialization if we're not done with this group. Done + * this way to avoid compiler warning. */ + end_of_huffman_loop: + if (symCount--) goto continue_this_group; + } + + /* At this point, we've read all the Huffman-coded symbols (and repeated + runs) for this block from the input stream, and decoded them into the + intermediate buffer. There are dbufCount many decoded bytes in dbuf[]. + Now undo the Burrows-Wheeler transform on dbuf. + See http://dogma.net/markn/articles/bwt/bwt.htm + */ + + /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ + j = 0; + for (i = 0; i < 256; i++) { + k = j + byteCount[i]; + byteCount[i] = j; + j = k; + } + + /* Figure out what order dbuf would be in if we sorted it. */ + for (i = 0; i < dbufCount; i++) { + uc = (unsigned char)(dbuf[i] & 0xff); + dbuf[byteCount[uc]] |= (i << 8); + byteCount[uc]++; + } + + /* Decode first byte by hand to initialize "previous" byte. Note that it + doesn't get output, and if the first three characters are identical + it doesn't qualify as a run (hence writeRunCountdown=5). */ + if (dbufCount) { + if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; + bd->writePos = dbuf[origPtr]; + bd->writeCurrent = (unsigned char)(bd->writePos & 0xff); + bd->writePos >>= 8; + bd->writeRunCountdown = 5; + } + bd->writeCount = dbufCount; + + return RETVAL_OK; +} + +/* Undo burrows-wheeler transform on intermediate buffer to produce output. + If start_bunzip was initialized with out_fd=-1, then up to len bytes of + data are written to outbuf. Return value is number of bytes written or + error (all errors are negative numbers). If out_fd!=-1, outbuf and len + are ignored, data is written to out_fd and return is RETVAL_OK or error. +*/ +int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) +{ + const unsigned *dbuf; + int pos, current, previous, gotcount; + + /* If last read was short due to end of file, return last block now */ + if (bd->writeCount < 0) return bd->writeCount; + + gotcount = 0; + dbuf = bd->dbuf; + pos = bd->writePos; + current = bd->writeCurrent; + + /* We will always have pending decoded data to write into the output + buffer unless this is the very first call (in which case we haven't + Huffman-decoded a block into the intermediate buffer yet). */ + if (bd->writeCopies) { + + /* Inside the loop, writeCopies means extra copies (beyond 1) */ + --bd->writeCopies; + + /* Loop outputting bytes */ + for (;;) { + + /* If the output buffer is full, snapshot state and return */ + if (gotcount >= len) { + bd->writePos = pos; + bd->writeCurrent = current; + bd->writeCopies++; + return len; + } + + /* Write next byte into output buffer, updating CRC */ + outbuf[gotcount++] = current; + bd->writeCRC = (bd->writeCRC << 8) + ^ bd->crc32Table[(bd->writeCRC >> 24) ^ current]; + + /* Loop now if we're outputting multiple copies of this byte */ + if (bd->writeCopies) { + --bd->writeCopies; + continue; + } + decode_next_byte: + if (!bd->writeCount--) break; + /* Follow sequence vector to undo Burrows-Wheeler transform */ + previous = current; + pos = dbuf[pos]; + current = pos & 0xff; + pos >>= 8; + + /* After 3 consecutive copies of the same byte, the 4th + * is a repeat count. We count down from 4 instead + * of counting up because testing for non-zero is faster */ + if (--bd->writeRunCountdown) { + if (current != previous) + bd->writeRunCountdown = 4; + } else { + + /* We have a repeated run, this byte indicates the count */ + bd->writeCopies = current; + current = previous; + bd->writeRunCountdown = 5; + + /* Sometimes there are just 3 bytes (run length 0) */ + if (!bd->writeCopies) goto decode_next_byte; + + /* Subtract the 1 copy we'd output anyway to get extras */ + --bd->writeCopies; + } + } + + /* Decompression of this block completed successfully */ + bd->writeCRC = ~bd->writeCRC; + bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; + + /* If this block had a CRC error, force file level CRC error. */ + if (bd->writeCRC != bd->headerCRC) { + bd->totalCRC = bd->headerCRC + 1; + return RETVAL_LAST_BLOCK; + } + } + + /* Refill the intermediate buffer by Huffman-decoding next block of input */ + /* (previous is just a convenient unused temp variable here) */ + previous = get_next_block(bd); + if (previous) { + bd->writeCount = previous; + return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount; + } + bd->writeCRC = ~0; + pos = bd->writePos; + current = bd->writeCurrent; + goto decode_next_byte; +} + +/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain + a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are + ignored, and data is read from file handle into temporary buffer. */ + +/* Because bunzip2 is used for help text unpacking, and because bb_show_usage() + should work for NOFORK applets too, we must be extremely careful to not leak + any allocations! */ +int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, + int len) +{ + bunzip_data *bd; + unsigned i; + enum { + BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0', + h0 = ('h' << 8) + '0', + }; + + /* Figure out how much data to allocate */ + i = sizeof(bunzip_data); + if (in_fd != -1) i += IOBUF_SIZE; + + /* Allocate bunzip_data. Most fields initialize to zero. */ + bd = *bdp = xzalloc(i); + + /* Setup input buffer */ + bd->in_fd = in_fd; + if (-1 == in_fd) { + /* in this case, bd->inbuf is read-only */ + bd->inbuf = (void*)inbuf; /* cast away const-ness */ + bd->inbufCount = len; + } else + bd->inbuf = (unsigned char *)(bd + 1); + + /* Init the CRC32 table (big endian) */ + crc32_filltable(bd->crc32Table, 1); + + /* Setup for I/O error handling via longjmp */ + i = setjmp(bd->jmpbuf); + if (i) return i; + + /* Ensure that file starts with "BZh['1'-'9']." */ + /* Update: now caller verifies 1st two bytes, makes .gz/.bz2 + * integration easier */ + /* was: */ + /* i = get_bits(bd, 32); */ + /* if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; */ + i = get_bits(bd, 16); + if ((unsigned)(i - h0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; + + /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of + uncompressed data. Allocate intermediate buffer for block. */ + /* bd->dbufSize = 100000 * (i - BZh0); */ + bd->dbufSize = 100000 * (i - h0); + + /* Cannot use xmalloc - may leak bd in NOFORK case! */ + bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(int)); + if (!bd->dbuf) { + free(bd); + xfunc_die(); + } + return RETVAL_OK; +} + +void FAST_FUNC dealloc_bunzip(bunzip_data *bd) +{ + free(bd->dbuf); + free(bd); +} + + +/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ +USE_DESKTOP(long long) int FAST_FUNC +unpack_bz2_stream(int src_fd, int dst_fd) +{ + USE_DESKTOP(long long total_written = 0;) + char *outbuf; + bunzip_data *bd; + int i; + + outbuf = xmalloc(IOBUF_SIZE); + i = start_bunzip(&bd, src_fd, NULL, 0); + if (!i) { + for (;;) { + i = read_bunzip(bd, outbuf, IOBUF_SIZE); + if (i <= 0) break; + if (i != full_write(dst_fd, outbuf, i)) { + i = RETVAL_SHORT_WRITE; + break; + } + USE_DESKTOP(total_written += i;) + } + } + + /* Check CRC and release memory */ + + if (i == RETVAL_LAST_BLOCK) { + if (bd->headerCRC != bd->totalCRC) { + bb_error_msg("CRC error"); + } else { + i = RETVAL_OK; + } + } else if (i == RETVAL_SHORT_WRITE) { + bb_error_msg("short write"); + } else { + bb_error_msg("bunzip error %d", i); + } + dealloc_bunzip(bd); + free(outbuf); + + return i ? i : USE_DESKTOP(total_written) + 0; +} + +USE_DESKTOP(long long) int FAST_FUNC +unpack_bz2_stream_prime(int src_fd, int dst_fd) +{ + unsigned char magic[2]; + xread(src_fd, magic, 2); + if (magic[0] != 'B' || magic[1] != 'Z') { + bb_error_msg_and_die("invalid magic"); + } + return unpack_bz2_stream(src_fd, dst_fd); +} + +#ifdef TESTING + +static char *const bunzip_errors[] = { + NULL, "Bad file checksum", "Not bzip data", + "Unexpected input EOF", "Unexpected output EOF", "Data error", + "Out of memory", "Obsolete (pre 0.9.5) bzip format not supported" +}; + +/* Dumb little test thing, decompress stdin to stdout */ +int main(int argc, char **argv) +{ + int i; + char c; + + int i = unpack_bz2_stream_prime(0, 1); + if (i < 0) + fprintf(stderr, "%s\n", bunzip_errors[-i]); + else if (read(STDIN_FILENO, &c, 1)) + fprintf(stderr, "Trailing garbage ignored\n"); + return -i; +} +#endif diff --git a/archival/libunarchive/decompress_uncompress.c b/archival/libunarchive/decompress_uncompress.c new file mode 100644 index 0000000..fe1491e --- /dev/null +++ b/archival/libunarchive/decompress_uncompress.c @@ -0,0 +1,307 @@ +/* vi: set sw=4 ts=4: */ +/* uncompress for busybox -- (c) 2002 Robert Griebl + * + * based on the original compress42.c source + * (see disclaimer below) + */ + +/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992. + * + * Authors: + * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) + * Jim McKie (decvax!mcvax!jim) + * Steve Davies (decvax!vax135!petsd!peora!srd) + * Ken Turkowski (decvax!decwrl!turtlevax!ken) + * James A. Woods (decvax!ihnp4!ames!jaw) + * Joe Orost (decvax!vax135!petsd!joe) + * Dave Mack (csu@alembic.acs.com) + * Peter Jannesen, Network Communication Systems + * (peter@ncs.nl) + * + * marc@suse.de : a small security fix for a buffer overflow + * + * [... History snipped ...] + * + */ + +#include "libbb.h" +#include "unarchive.h" + + +/* Default input buffer size */ +#define IBUFSIZ 2048 + +/* Default output buffer size */ +#define OBUFSIZ 2048 + +/* Defines for third byte of header */ +#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */ + /* Masks 0x20 and 0x40 are free. */ + /* I think 0x20 should mean that there is */ + /* a fourth header byte (for expansion). */ +#define BLOCK_MODE 0x80 /* Block compression if table is full and */ + /* compression rate is dropping flush tables */ + /* the next two codes should not be changed lightly, as they must not */ + /* lie within the contiguous general code space. */ +#define FIRST 257 /* first free entry */ +#define CLEAR 256 /* table clear output code */ + +#define INIT_BITS 9 /* initial number of bits/code */ + + +/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */ +#define HBITS 17 /* 50% occupancy */ +#define HSIZE (1< BITS) { + bb_error_msg("compressed with %d bits, can only handle " + BITS_STR" bits", maxbits); + goto err; + } + + n_bits = INIT_BITS; + maxcode = MAXCODE(INIT_BITS) - 1; + bitmask = (1 << INIT_BITS) - 1; + oldcode = -1; + finchar = 0; + outpos = 0; + posbits = 0 << 3; + + free_ent = ((block_mode) ? FIRST : 256); + + /* As above, initialize the first 256 entries in the table. */ + /*clear_tab_prefixof(); - done by xzalloc */ + + for (code = 255; code >= 0; --code) { + tab_suffixof(code) = (unsigned char) code; + } + + do { + resetbuf: + { + int i; + int e; + int o; + + o = posbits >> 3; + e = insize - o; + + for (i = 0; i < e; ++i) + inbuf[i] = inbuf[i + o]; + + insize = e; + posbits = 0; + } + + if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) { + rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ); +//error check?? + insize += rsize; + } + + inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 : + (insize << 3) - (n_bits - 1)); + + while (inbits > posbits) { + if (free_ent > maxcode) { + posbits = + ((posbits - 1) + + ((n_bits << 3) - + (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); + ++n_bits; + if (n_bits == maxbits) { + maxcode = maxmaxcode; + } else { + maxcode = MAXCODE(n_bits) - 1; + } + bitmask = (1 << n_bits) - 1; + goto resetbuf; + } + { + unsigned char *p = &inbuf[posbits >> 3]; + + code = ((((long) (p[0])) | ((long) (p[1]) << 8) | + ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask; + } + posbits += n_bits; + + + if (oldcode == -1) { + oldcode = code; + finchar = (int) oldcode; + outbuf[outpos++] = (unsigned char) finchar; + continue; + } + + if (code == CLEAR && block_mode) { + clear_tab_prefixof(); + free_ent = FIRST - 1; + posbits = + ((posbits - 1) + + ((n_bits << 3) - + (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); + n_bits = INIT_BITS; + maxcode = MAXCODE(INIT_BITS) - 1; + bitmask = (1 << INIT_BITS) - 1; + goto resetbuf; + } + + incode = code; + stackp = de_stack; + + /* Special case for KwKwK string. */ + if (code >= free_ent) { + if (code > free_ent) { + unsigned char *p; + + posbits -= n_bits; + p = &inbuf[posbits >> 3]; + + bb_error_msg + ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)", + insize, posbits, p[-1], p[0], p[1], p[2], p[3], + (posbits & 07)); + bb_error_msg("uncompress: corrupt input"); + goto err; + } + + *--stackp = (unsigned char) finchar; + code = oldcode; + } + + /* Generate output characters in reverse order */ + while ((long) code >= (long) 256) { + *--stackp = tab_suffixof(code); + code = tab_prefixof(code); + } + + finchar = tab_suffixof(code); + *--stackp = (unsigned char) finchar; + + /* And put them out in forward order */ + { + int i; + + i = de_stack - stackp; + if (outpos + i >= OBUFSIZ) { + do { + if (i > OBUFSIZ - outpos) { + i = OBUFSIZ - outpos; + } + + if (i > 0) { + memcpy(outbuf + outpos, stackp, i); + outpos += i; + } + + if (outpos >= OBUFSIZ) { + full_write(fd_out, outbuf, outpos); +//error check?? + USE_DESKTOP(total_written += outpos;) + outpos = 0; + } + stackp += i; + i = de_stack - stackp; + } while (i > 0); + } else { + memcpy(outbuf + outpos, stackp, i); + outpos += i; + } + } + + /* Generate the new entry. */ + code = free_ent; + if (code < maxmaxcode) { + tab_prefixof(code) = (unsigned short) oldcode; + tab_suffixof(code) = (unsigned char) finchar; + free_ent = code + 1; + } + + /* Remember previous code. */ + oldcode = incode; + } + + } while (rsize > 0); + + if (outpos > 0) { + full_write(fd_out, outbuf, outpos); +//error check?? + USE_DESKTOP(total_written += outpos;) + } + + retval = USE_DESKTOP(total_written) + 0; + err: + free(inbuf); + free(outbuf); + free(htab); + free(codetab); + return retval; +} diff --git a/archival/libunarchive/decompress_unlzma.c b/archival/libunarchive/decompress_unlzma.c new file mode 100644 index 0000000..2cfcd9b --- /dev/null +++ b/archival/libunarchive/decompress_unlzma.c @@ -0,0 +1,503 @@ +/* vi: set sw=4 ts=4: */ +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Copyright (C) 1999-2005 Igor Pavlov + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +#if ENABLE_FEATURE_LZMA_FAST +# define speed_inline ALWAYS_INLINE +#else +# define speed_inline +#endif + + +typedef struct { + int fd; + uint8_t *ptr; + +/* Was keeping rc on stack in unlzma and separately allocating buffer, + * but with "buffer 'attached to' allocated rc" code is smaller: */ + /* uint8_t *buffer; */ +#define RC_BUFFER ((uint8_t*)(rc+1)) + + uint8_t *buffer_end; + +/* Had provisions for variable buffer, but we don't need it here */ + /* int buffer_size; */ +#define RC_BUFFER_SIZE 0x10000 + + uint32_t code; + uint32_t range; + uint32_t bound; +} rc_t; + +#define RC_TOP_BITS 24 +#define RC_MOVE_BITS 5 +#define RC_MODEL_TOTAL_BITS 11 + + +/* Called twice: once at startup and once in rc_normalize() */ +static void rc_read(rc_t *rc) +{ + int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); + if (buffer_size <= 0) + bb_error_msg_and_die("unexpected EOF"); + rc->ptr = RC_BUFFER; + rc->buffer_end = RC_BUFFER + buffer_size; +} + +/* Called once */ +static rc_t* rc_init(int fd) /*, int buffer_size) */ +{ + int i; + rc_t *rc; + + rc = xmalloc(sizeof(*rc) + RC_BUFFER_SIZE); + + rc->fd = fd; + /* rc->buffer_size = buffer_size; */ + rc->buffer_end = RC_BUFFER + RC_BUFFER_SIZE; + rc->ptr = rc->buffer_end; + + rc->code = 0; + rc->range = 0xFFFFFFFF; + for (i = 0; i < 5; i++) { + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->code = (rc->code << 8) | *rc->ptr++; + } + return rc; +} + +/* Called once */ +static ALWAYS_INLINE void rc_free(rc_t *rc) +{ + free(rc); +} + +/* Called twice, but one callsite is in speed_inline'd rc_is_bit_0_helper() */ +static void rc_do_normalize(rc_t *rc) +{ + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->range <<= 8; + rc->code = (rc->code << 8) | *rc->ptr++; +} +static ALWAYS_INLINE void rc_normalize(rc_t *rc) +{ + if (rc->range < (1 << RC_TOP_BITS)) { + rc_do_normalize(rc); + } +} + +/* rc_is_bit_0 is called 9 times */ +/* Why rc_is_bit_0_helper exists? + * Because we want to always expose (rc->code < rc->bound) to optimizer. + * Thus rc_is_bit_0 is always inlined, and rc_is_bit_0_helper is inlined + * only if we compile for speed. + */ +static speed_inline uint32_t rc_is_bit_0_helper(rc_t *rc, uint16_t *p) +{ + rc_normalize(rc); + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); + return rc->bound; +} +static ALWAYS_INLINE int rc_is_bit_0(rc_t *rc, uint16_t *p) +{ + uint32_t t = rc_is_bit_0_helper(rc, p); + return rc->code < t; +} + +/* Called ~10 times, but very small, thus inlined */ +static speed_inline void rc_update_bit_0(rc_t *rc, uint16_t *p) +{ + rc->range = rc->bound; + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; +} +static speed_inline void rc_update_bit_1(rc_t *rc, uint16_t *p) +{ + rc->range -= rc->bound; + rc->code -= rc->bound; + *p -= *p >> RC_MOVE_BITS; +} + +/* Called 4 times in unlzma loop */ +static int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) +{ + if (rc_is_bit_0(rc, p)) { + rc_update_bit_0(rc, p); + *symbol *= 2; + return 0; + } else { + rc_update_bit_1(rc, p); + *symbol = *symbol * 2 + 1; + return 1; + } +} + +/* Called once */ +static ALWAYS_INLINE int rc_direct_bit(rc_t *rc) +{ + rc_normalize(rc); + rc->range >>= 1; + if (rc->code >= rc->range) { + rc->code -= rc->range; + return 1; + } + return 0; +} + +/* Called twice */ +static speed_inline void +rc_bit_tree_decode(rc_t *rc, uint16_t *p, int num_levels, int *symbol) +{ + int i = num_levels; + + *symbol = 1; + while (i--) + rc_get_bit(rc, p + *symbol, symbol); + *symbol -= 1 << num_levels; +} + + +typedef struct { + uint8_t pos; + uint32_t dict_size; + uint64_t dst_size; +} __attribute__ ((packed)) lzma_header_t; + + +/* #defines will force compiler to compute/optimize each one with each usage. + * Have heart and use enum instead. */ +enum { + LZMA_BASE_SIZE = 1846, + LZMA_LIT_SIZE = 768, + + LZMA_NUM_POS_BITS_MAX = 4, + + LZMA_LEN_NUM_LOW_BITS = 3, + LZMA_LEN_NUM_MID_BITS = 3, + LZMA_LEN_NUM_HIGH_BITS = 8, + + LZMA_LEN_CHOICE = 0, + LZMA_LEN_CHOICE_2 = (LZMA_LEN_CHOICE + 1), + LZMA_LEN_LOW = (LZMA_LEN_CHOICE_2 + 1), + LZMA_LEN_MID = (LZMA_LEN_LOW \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))), + LZMA_LEN_HIGH = (LZMA_LEN_MID \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))), + LZMA_NUM_LEN_PROBS = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)), + + LZMA_NUM_STATES = 12, + LZMA_NUM_LIT_STATES = 7, + + LZMA_START_POS_MODEL_INDEX = 4, + LZMA_END_POS_MODEL_INDEX = 14, + LZMA_NUM_FULL_DISTANCES = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)), + + LZMA_NUM_POS_SLOT_BITS = 6, + LZMA_NUM_LEN_TO_POS_STATES = 4, + + LZMA_NUM_ALIGN_BITS = 4, + + LZMA_MATCH_MIN_LEN = 2, + + LZMA_IS_MATCH = 0, + LZMA_IS_REP = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), + LZMA_IS_REP_G0 = (LZMA_IS_REP + LZMA_NUM_STATES), + LZMA_IS_REP_G1 = (LZMA_IS_REP_G0 + LZMA_NUM_STATES), + LZMA_IS_REP_G2 = (LZMA_IS_REP_G1 + LZMA_NUM_STATES), + LZMA_IS_REP_0_LONG = (LZMA_IS_REP_G2 + LZMA_NUM_STATES), + LZMA_POS_SLOT = (LZMA_IS_REP_0_LONG \ + + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), + LZMA_SPEC_POS = (LZMA_POS_SLOT \ + + (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)), + LZMA_ALIGN = (LZMA_SPEC_POS \ + + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX), + LZMA_LEN_CODER = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)), + LZMA_REP_LEN_CODER = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS), + LZMA_LITERAL = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS), +}; + + +USE_DESKTOP(long long) int FAST_FUNC +unpack_lzma_stream(int src_fd, int dst_fd) +{ + USE_DESKTOP(long long total_written = 0;) + lzma_header_t header; + int lc, pb, lp; + uint32_t pos_state_mask; + uint32_t literal_pos_mask; + uint32_t pos; + uint16_t *p; + uint16_t *prob; + uint16_t *prob_lit; + int num_bits; + int num_probs; + rc_t *rc; + int i, mi; + uint8_t *buffer; + uint8_t previous_byte = 0; + size_t buffer_pos = 0, global_pos = 0; + int len = 0; + int state = 0; + uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; + + xread(src_fd, &header, sizeof(header)); + + if (header.pos >= (9 * 5 * 5)) + bb_error_msg_and_die("bad header"); + mi = header.pos / 9; + lc = header.pos % 9; + pb = mi / 5; + lp = mi % 5; + pos_state_mask = (1 << pb) - 1; + literal_pos_mask = (1 << lp) - 1; + + header.dict_size = SWAP_LE32(header.dict_size); + header.dst_size = SWAP_LE64(header.dst_size); + + if (header.dict_size == 0) + header.dict_size = 1; + + buffer = xmalloc(MIN(header.dst_size, header.dict_size)); + + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); + p = xmalloc(num_probs * sizeof(*p)); + num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp)); + for (i = 0; i < num_probs; i++) + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; + + rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */ + + while (global_pos + buffer_pos < header.dst_size) { + int pos_state = (buffer_pos + global_pos) & pos_state_mask; + + prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; + if (rc_is_bit_0(rc, prob)) { + mi = 1; + rc_update_bit_0(rc, prob); + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) + + (previous_byte >> (8 - lc)) + ) + ) + ); + + if (state >= LZMA_NUM_LIT_STATES) { + int match_byte; + + pos = buffer_pos - rep0; + while (pos >= header.dict_size) + pos += header.dict_size; + match_byte = buffer[pos]; + do { + int bit; + + match_byte <<= 1; + bit = match_byte & 0x100; + prob_lit = prob + 0x100 + bit + mi; + bit ^= (rc_get_bit(rc, prob_lit, &mi) << 8); /* 0x100 or 0 */ + if (bit) + break; + } while (mi < 0x100); + } + while (mi < 0x100) { + prob_lit = prob + mi; + rc_get_bit(rc, prob_lit, &mi); + } + + state -= 3; + if (state < 4-3) + state = 0; + if (state >= 10-3) + state -= 6-3; + + previous_byte = (uint8_t) mi; +#if ENABLE_FEATURE_LZMA_FAST + one_byte1: + buffer[buffer_pos++] = previous_byte; + if (buffer_pos == header.dict_size) { + buffer_pos = 0; + global_pos += header.dict_size; + if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) + goto bad; + USE_DESKTOP(total_written += header.dict_size;) + } +#else + len = 1; + goto one_byte2; +#endif + } else { + int offset; + uint16_t *prob_len; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP + state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + state = state < LZMA_NUM_LIT_STATES ? 0 : 3; + prob = p + LZMA_LEN_CODER; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G0 + state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + prob = (p + LZMA_IS_REP_0_LONG + + (state << LZMA_NUM_POS_BITS_MAX) + + pos_state + ); + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + + state = state < LZMA_NUM_LIT_STATES ? 9 : 11; +#if ENABLE_FEATURE_LZMA_FAST + pos = buffer_pos - rep0; + while (pos >= header.dict_size) + pos += header.dict_size; + previous_byte = buffer[pos]; + goto one_byte1; +#else + len = 1; + goto string; +#endif + } else { + rc_update_bit_1(rc, prob); + } + } else { + uint32_t distance; + + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G1 + state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = rep1; + } else { + rc_update_bit_1(rc, prob); + prob = p + LZMA_IS_REP_G2 + state; + if (rc_is_bit_0(rc, prob)) { + rc_update_bit_0(rc, prob); + distance = rep2; + } else { + rc_update_bit_1(rc, prob); + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < LZMA_NUM_LIT_STATES ? 8 : 11; + prob = p + LZMA_REP_LEN_CODER; + } + + prob_len = prob + LZMA_LEN_CHOICE; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_LOW + + (pos_state << LZMA_LEN_NUM_LOW_BITS)); + offset = 0; + num_bits = LZMA_LEN_NUM_LOW_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_CHOICE_2; + if (rc_is_bit_0(rc, prob_len)) { + rc_update_bit_0(rc, prob_len); + prob_len = (prob + LZMA_LEN_MID + + (pos_state << LZMA_LEN_NUM_MID_BITS)); + offset = 1 << LZMA_LEN_NUM_LOW_BITS; + num_bits = LZMA_LEN_NUM_MID_BITS; + } else { + rc_update_bit_1(rc, prob_len); + prob_len = prob + LZMA_LEN_HIGH; + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + + (1 << LZMA_LEN_NUM_MID_BITS)); + num_bits = LZMA_LEN_NUM_HIGH_BITS; + } + } + rc_bit_tree_decode(rc, prob_len, num_bits, &len); + len += offset; + + if (state < 4) { + int pos_slot; + + state += LZMA_NUM_LIT_STATES; + prob = p + LZMA_POS_SLOT + + ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); + rc_bit_tree_decode(rc, prob, LZMA_NUM_POS_SLOT_BITS, + &pos_slot); + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { + num_bits = (pos_slot >> 1) - 1; + rep0 = 2 | (pos_slot & 1); + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { + rep0 <<= num_bits; + prob = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; + } else { + num_bits -= LZMA_NUM_ALIGN_BITS; + while (num_bits--) + rep0 = (rep0 << 1) | rc_direct_bit(rc); + prob = p + LZMA_ALIGN; + rep0 <<= LZMA_NUM_ALIGN_BITS; + num_bits = LZMA_NUM_ALIGN_BITS; + } + i = 1; + mi = 1; + while (num_bits--) { + if (rc_get_bit(rc, prob + mi, &mi)) + rep0 |= i; + i <<= 1; + } + } else + rep0 = pos_slot; + if (++rep0 == 0) + break; + } + + len += LZMA_MATCH_MIN_LEN; + SKIP_FEATURE_LZMA_FAST(string:) + do { + pos = buffer_pos - rep0; + while (pos >= header.dict_size) + pos += header.dict_size; + previous_byte = buffer[pos]; + SKIP_FEATURE_LZMA_FAST(one_byte2:) + buffer[buffer_pos++] = previous_byte; + if (buffer_pos == header.dict_size) { + buffer_pos = 0; + global_pos += header.dict_size; + if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) + goto bad; + USE_DESKTOP(total_written += header.dict_size;) + } + len--; + } while (len != 0 && buffer_pos < header.dst_size); + } + } + + { + SKIP_DESKTOP(int total_written = 0; /* success */) + USE_DESKTOP(total_written += buffer_pos;) + if (full_write(dst_fd, buffer, buffer_pos) != (ssize_t)buffer_pos) { + bad: + total_written = -1; /* failure */ + } + rc_free(rc); + free(p); + free(buffer); + return total_written; + } +} diff --git a/archival/libunarchive/decompress_unzip.c b/archival/libunarchive/decompress_unzip.c new file mode 100644 index 0000000..e83cd4f --- /dev/null +++ b/archival/libunarchive/decompress_unzip.c @@ -0,0 +1,1253 @@ +/* vi: set sw=4 ts=4: */ +/* + * gunzip implementation for busybox + * + * Based on GNU gzip v1.2.4 Copyright (C) 1992-1993 Jean-loup Gailly. + * + * Originally adjusted for busybox by Sven Rudolph + * based on gzip sources + * + * Adjusted further by Erik Andersen to support + * files as well as stdin/stdout, and to generally behave itself wrt + * command line handling. + * + * General cleanup to better adhere to the style guide and make use of standard + * busybox functions by Glenn McGrath + * + * read_gz interface + associated hacking by Laurence Anderson + * + * Fixed huft_build() so decoding end-of-block code does not grab more bits + * than necessary (this is required by unzip applet), added inflate_cleanup() + * to free leaked bytebuffer memory (used in unzip.c), and some minor style + * guide cleanups by Ed Clark + * + * gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface + * Copyright (C) 1992-1993 Jean-loup Gailly + * The unzip code was written and put in the public domain by Mark Adler. + * Portions of the lzw code are derived from the public domain 'compress' + * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies, + * Ken Turkowski, Dave Mack and Peter Jannesen. + * + * See the file algorithm.doc for the compression algorithms and file formats. + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include +#include "libbb.h" +#include "unarchive.h" + +typedef struct huft_t { + unsigned char e; /* number of extra bits or operation */ + unsigned char b; /* number of bits in this code or subcode */ + union { + unsigned short n; /* literal, length base, or distance base */ + struct huft_t *t; /* pointer to next level of table */ + } v; +} huft_t; + +enum { + /* gunzip_window size--must be a power of two, and + * at least 32K for zip's deflate method */ + GUNZIP_WSIZE = 0x8000, + /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ + BMAX = 16, /* maximum bit length of any code (16 for explode) */ + N_MAX = 288, /* maximum number of codes in any set */ +}; + + +/* This is somewhat complex-looking arrangement, but it allows + * to place decompressor state either in bss or in + * malloc'ed space simply by changing #defines below. + * Sizes on i386: + * text data bss dec hex + * 5256 0 108 5364 14f4 - bss + * 4915 0 0 4915 1333 - malloc + */ +#define STATE_IN_BSS 0 +#define STATE_IN_MALLOC 1 + + +typedef struct state_t { + off_t gunzip_bytes_out; /* number of output bytes */ + uint32_t gunzip_crc; + + int gunzip_src_fd; + unsigned gunzip_outbuf_count; /* bytes in output buffer */ + + unsigned char *gunzip_window; + + uint32_t *gunzip_crc_table; + + /* bitbuffer */ + unsigned gunzip_bb; /* bit buffer */ + unsigned char gunzip_bk; /* bits in bit buffer */ + + /* input (compressed) data */ + unsigned char *bytebuffer; /* buffer itself */ + off_t to_read; /* compressed bytes to read (unzip only, -1 for gunzip) */ +// unsigned bytebuffer_max; /* buffer size */ + unsigned bytebuffer_offset; /* buffer position */ + unsigned bytebuffer_size; /* how much data is there (size <= max) */ + + /* private data of inflate_codes() */ + unsigned inflate_codes_ml; /* masks for bl and bd bits */ + unsigned inflate_codes_md; /* masks for bl and bd bits */ + unsigned inflate_codes_bb; /* bit buffer */ + unsigned inflate_codes_k; /* number of bits in bit buffer */ + unsigned inflate_codes_w; /* current gunzip_window position */ + huft_t *inflate_codes_tl; + huft_t *inflate_codes_td; + unsigned inflate_codes_bl; + unsigned inflate_codes_bd; + unsigned inflate_codes_nn; /* length and index for copy */ + unsigned inflate_codes_dd; + + smallint resume_copy; + + /* private data of inflate_get_next_window() */ + smallint method; /* method == -1 for stored, -2 for codes */ + smallint need_another_block; + smallint end_reached; + + /* private data of inflate_stored() */ + unsigned inflate_stored_n; + unsigned inflate_stored_b; + unsigned inflate_stored_k; + unsigned inflate_stored_w; + + const char *error_msg; + jmp_buf error_jmp; +} state_t; +#define gunzip_bytes_out (S()gunzip_bytes_out ) +#define gunzip_crc (S()gunzip_crc ) +#define gunzip_src_fd (S()gunzip_src_fd ) +#define gunzip_outbuf_count (S()gunzip_outbuf_count) +#define gunzip_window (S()gunzip_window ) +#define gunzip_crc_table (S()gunzip_crc_table ) +#define gunzip_bb (S()gunzip_bb ) +#define gunzip_bk (S()gunzip_bk ) +#define to_read (S()to_read ) +// #define bytebuffer_max (S()bytebuffer_max ) +// Both gunzip and unzip can use constant buffer size now (16k): +#define bytebuffer_max 0x4000 +#define bytebuffer (S()bytebuffer ) +#define bytebuffer_offset (S()bytebuffer_offset ) +#define bytebuffer_size (S()bytebuffer_size ) +#define inflate_codes_ml (S()inflate_codes_ml ) +#define inflate_codes_md (S()inflate_codes_md ) +#define inflate_codes_bb (S()inflate_codes_bb ) +#define inflate_codes_k (S()inflate_codes_k ) +#define inflate_codes_w (S()inflate_codes_w ) +#define inflate_codes_tl (S()inflate_codes_tl ) +#define inflate_codes_td (S()inflate_codes_td ) +#define inflate_codes_bl (S()inflate_codes_bl ) +#define inflate_codes_bd (S()inflate_codes_bd ) +#define inflate_codes_nn (S()inflate_codes_nn ) +#define inflate_codes_dd (S()inflate_codes_dd ) +#define resume_copy (S()resume_copy ) +#define method (S()method ) +#define need_another_block (S()need_another_block ) +#define end_reached (S()end_reached ) +#define inflate_stored_n (S()inflate_stored_n ) +#define inflate_stored_b (S()inflate_stored_b ) +#define inflate_stored_k (S()inflate_stored_k ) +#define inflate_stored_w (S()inflate_stored_w ) +#define error_msg (S()error_msg ) +#define error_jmp (S()error_jmp ) + +/* This is a generic part */ +#if STATE_IN_BSS /* Use global data segment */ +#define DECLARE_STATE /*nothing*/ +#define ALLOC_STATE /*nothing*/ +#define DEALLOC_STATE ((void)0) +#define S() state. +#define PASS_STATE /*nothing*/ +#define PASS_STATE_ONLY /*nothing*/ +#define STATE_PARAM /*nothing*/ +#define STATE_PARAM_ONLY void +static state_t state; +#endif + +#if STATE_IN_MALLOC /* Use malloc space */ +#define DECLARE_STATE state_t *state +#define ALLOC_STATE (state = xzalloc(sizeof(*state))) +#define DEALLOC_STATE free(state) +#define S() state-> +#define PASS_STATE state, +#define PASS_STATE_ONLY state +#define STATE_PARAM state_t *state, +#define STATE_PARAM_ONLY state_t *state +#endif + + +static const uint16_t mask_bits[] ALIGN2 = { + 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, + 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; + +/* Copy lengths for literal codes 257..285 */ +static const uint16_t cplens[] ALIGN2 = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 +}; + +/* note: see note #13 above about the 258 in this list. */ +/* Extra bits for literal codes 257..285 */ +static const uint8_t cplext[] ALIGN1 = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, + 5, 5, 5, 0, 99, 99 +}; /* 99 == invalid */ + +/* Copy offsets for distance codes 0..29 */ +static const uint16_t cpdist[] ALIGN2 = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, + 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 +}; + +/* Extra bits for distance codes */ +static const uint8_t cpdext[] ALIGN1 = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13 +}; + +/* Tables for deflate from PKZIP's appnote.txt. */ +/* Order of the bit length code lengths */ +static const uint8_t border[] ALIGN1 = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + + +/* + * Free the malloc'ed tables built by huft_build(), which makes a linked + * list of the tables it made, with the links in a dummy first entry of + * each table. + * t: table to free + */ +static void huft_free(huft_t *p) +{ + huft_t *q; + + /* Go through linked list, freeing from the malloced (t[-1]) address. */ + while (p) { + q = (--p)->v.t; + free(p); + p = q; + } +} + +static void huft_free_all(STATE_PARAM_ONLY) +{ + huft_free(inflate_codes_tl); + huft_free(inflate_codes_td); + inflate_codes_tl = NULL; + inflate_codes_td = NULL; +} + +static void abort_unzip(STATE_PARAM_ONLY) NORETURN; +static void abort_unzip(STATE_PARAM_ONLY) +{ + huft_free_all(PASS_STATE_ONLY); + longjmp(error_jmp, 1); +} + +static unsigned fill_bitbuffer(STATE_PARAM unsigned bitbuffer, unsigned *current, const unsigned required) +{ + while (*current < required) { + if (bytebuffer_offset >= bytebuffer_size) { + unsigned sz = bytebuffer_max - 4; + if (to_read >= 0 && to_read < sz) /* unzip only */ + sz = to_read; + /* Leave the first 4 bytes empty so we can always unwind the bitbuffer + * to the front of the bytebuffer */ + bytebuffer_size = safe_read(gunzip_src_fd, &bytebuffer[4], sz); + if ((int)bytebuffer_size < 1) { + error_msg = "unexpected end of file"; + abort_unzip(PASS_STATE_ONLY); + } + if (to_read >= 0) /* unzip only */ + to_read -= bytebuffer_size; + bytebuffer_size += 4; + bytebuffer_offset = 4; + } + bitbuffer |= ((unsigned) bytebuffer[bytebuffer_offset]) << *current; + bytebuffer_offset++; + *current += 8; + } + return bitbuffer; +} + + +/* Given a list of code lengths and a maximum table size, make a set of + * tables to decode that set of codes. Return zero on success, one if + * the given code set is incomplete (the tables are still built in this + * case), two if the input is invalid (all zero length codes or an + * oversubscribed set of lengths) - in this case stores NULL in *t. + * + * b: code lengths in bits (all assumed <= BMAX) + * n: number of codes (assumed <= N_MAX) + * s: number of simple-valued codes (0..s-1) + * d: list of base values for non-simple codes + * e: list of extra bits for non-simple codes + * t: result: starting table + * m: maximum lookup bits, returns actual + */ +static int huft_build(const unsigned *b, const unsigned n, + const unsigned s, const unsigned short *d, + const unsigned char *e, huft_t **t, unsigned *m) +{ + unsigned a; /* counter for codes of length k */ + unsigned c[BMAX + 1]; /* bit length count table */ + unsigned eob_len; /* length of end-of-block code (value 256) */ + unsigned f; /* i repeats in table every f entries */ + int g; /* maximum code length */ + int htl; /* table level */ + unsigned i; /* counter, current code */ + unsigned j; /* counter */ + int k; /* number of bits in current code */ + unsigned *p; /* pointer into c[], b[], or v[] */ + huft_t *q; /* points to current table */ + huft_t r; /* table entry for structure assignment */ + huft_t *u[BMAX]; /* table stack */ + unsigned v[N_MAX]; /* values in order of bit length */ + int ws[BMAX + 1]; /* bits decoded stack */ + int w; /* bits decoded */ + unsigned x[BMAX + 1]; /* bit offsets, then code stack */ + unsigned *xp; /* pointer into x */ + int y; /* number of dummy codes added */ + unsigned z; /* number of entries in current table */ + + /* Length of EOB code, if any */ + eob_len = n > 256 ? b[256] : BMAX; + + *t = NULL; + + /* Generate counts for each bit length */ + memset(c, 0, sizeof(c)); + p = (unsigned *) b; /* cast allows us to reuse p for pointing to b */ + i = n; + do { + c[*p]++; /* assume all entries <= BMAX */ + p++; /* can't combine with above line (Solaris bug) */ + } while (--i); + if (c[0] == n) { /* null input - all zero length codes */ + *m = 0; + return 2; + } + + /* Find minimum and maximum length, bound *m by those */ + for (j = 1; (c[j] == 0) && (j <= BMAX); j++) + continue; + k = j; /* minimum code length */ + for (i = BMAX; (c[i] == 0) && i; i--) + continue; + g = i; /* maximum code length */ + *m = (*m < j) ? j : ((*m > i) ? i : *m); + + /* Adjust last length count to fill out codes, if needed */ + for (y = 1 << j; j < i; j++, y <<= 1) { + y -= c[j]; + if (y < 0) + return 2; /* bad input: more codes than bits */ + } + y -= c[i]; + if (y < 0) + return 2; + c[i] += y; + + /* Generate starting offsets into the value table for each length */ + x[1] = j = 0; + p = c + 1; + xp = x + 2; + while (--i) { /* note that i == g from above */ + j += *p++; + *xp++ = j; + } + + /* Make a table of values in order of bit lengths */ + p = (unsigned *) b; + i = 0; + do { + j = *p++; + if (j != 0) { + v[x[j]++] = i; + } + } while (++i < n); + + /* Generate the Huffman codes and for each, make the table entries */ + x[0] = i = 0; /* first Huffman code is zero */ + p = v; /* grab values in bit order */ + htl = -1; /* no tables yet--level -1 */ + w = ws[0] = 0; /* bits decoded */ + u[0] = NULL; /* just to keep compilers happy */ + q = NULL; /* ditto */ + z = 0; /* ditto */ + + /* go through the bit lengths (k already is bits in shortest code) */ + for (; k <= g; k++) { + a = c[k]; + while (a--) { + /* here i is the Huffman code of length k bits for value *p */ + /* make tables up to required level */ + while (k > ws[htl + 1]) { + w = ws[++htl]; + + /* compute minimum size table less than or equal to *m bits */ + z = g - w; + z = z > *m ? *m : z; /* upper limit on table size */ + j = k - w; + f = 1 << j; + if (f > a + 1) { /* try a k-w bit table */ + /* too few codes for k-w bit table */ + f -= a + 1; /* deduct codes from patterns left */ + xp = c + k; + while (++j < z) { /* try smaller tables up to z bits */ + f <<= 1; + if (f <= *++xp) { + break; /* enough codes to use up j bits */ + } + f -= *xp; /* else deduct codes from patterns */ + } + } + j = (w + j > eob_len && w < eob_len) ? eob_len - w : j; /* make EOB code end at table */ + z = 1 << j; /* table entries for j-bit table */ + ws[htl+1] = w + j; /* set bits decoded in stack */ + + /* allocate and link in new table */ + q = xzalloc((z + 1) * sizeof(huft_t)); + *t = q + 1; /* link to list for huft_free() */ + t = &(q->v.t); + u[htl] = ++q; /* table starts after link */ + + /* connect to last table, if there is one */ + if (htl) { + x[htl] = i; /* save pattern for backing up */ + r.b = (unsigned char) (w - ws[htl - 1]); /* bits to dump before this table */ + r.e = (unsigned char) (16 + j); /* bits in this table */ + r.v.t = q; /* pointer to this table */ + j = (i & ((1 << w) - 1)) >> ws[htl - 1]; + u[htl - 1][j] = r; /* connect to last table */ + } + } + + /* set up table entry in r */ + r.b = (unsigned char) (k - w); + if (p >= v + n) { + r.e = 99; /* out of values--invalid code */ + } else if (*p < s) { + r.e = (unsigned char) (*p < 256 ? 16 : 15); /* 256 is EOB code */ + r.v.n = (unsigned short) (*p++); /* simple code is just the value */ + } else { + r.e = (unsigned char) e[*p - s]; /* non-simple--look up in lists */ + r.v.n = d[*p++ - s]; + } + + /* fill code-like entries with r */ + f = 1 << (k - w); + for (j = i >> w; j < z; j += f) { + q[j] = r; + } + + /* backwards increment the k-bit code i */ + for (j = 1 << (k - 1); i & j; j >>= 1) { + i ^= j; + } + i ^= j; + + /* backup over finished tables */ + while ((i & ((1 << w) - 1)) != x[htl]) { + w = ws[--htl]; + } + } + } + + /* return actual size of base table */ + *m = ws[1]; + + /* Return 1 if we were given an incomplete table */ + return y != 0 && g != 1; +} + + +/* + * inflate (decompress) the codes in a deflated (compressed) block. + * Return an error code or zero if it all goes ok. + * + * tl, td: literal/length and distance decoder tables + * bl, bd: number of bits decoded by tl[] and td[] + */ +/* called once from inflate_block */ + +/* map formerly local static variables to globals */ +#define ml inflate_codes_ml +#define md inflate_codes_md +#define bb inflate_codes_bb +#define k inflate_codes_k +#define w inflate_codes_w +#define tl inflate_codes_tl +#define td inflate_codes_td +#define bl inflate_codes_bl +#define bd inflate_codes_bd +#define nn inflate_codes_nn +#define dd inflate_codes_dd +static void inflate_codes_setup(STATE_PARAM unsigned my_bl, unsigned my_bd) +{ + bl = my_bl; + bd = my_bd; + /* make local copies of globals */ + bb = gunzip_bb; /* initialize bit buffer */ + k = gunzip_bk; + w = gunzip_outbuf_count; /* initialize gunzip_window position */ + /* inflate the coded data */ + ml = mask_bits[bl]; /* precompute masks for speed */ + md = mask_bits[bd]; +} +/* called once from inflate_get_next_window */ +static int inflate_codes(STATE_PARAM_ONLY) +{ + unsigned e; /* table entry flag/number of extra bits */ + huft_t *t; /* pointer to table entry */ + + if (resume_copy) + goto do_copy; + + while (1) { /* do until end of block */ + bb = fill_bitbuffer(PASS_STATE bb, &k, bl); + t = tl + ((unsigned) bb & ml); + e = t->e; + if (e > 16) + do { + if (e == 99) + abort_unzip(PASS_STATE_ONLY);; + bb >>= t->b; + k -= t->b; + e -= 16; + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + t = t->v.t + ((unsigned) bb & mask_bits[e]); + e = t->e; + } while (e > 16); + bb >>= t->b; + k -= t->b; + if (e == 16) { /* then it's a literal */ + gunzip_window[w++] = (unsigned char) t->v.n; + if (w == GUNZIP_WSIZE) { + gunzip_outbuf_count = w; + //flush_gunzip_window(); + w = 0; + return 1; // We have a block to read + } + } else { /* it's an EOB or a length */ + /* exit if end of block */ + if (e == 15) { + break; + } + + /* get length of block to copy */ + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + nn = t->v.n + ((unsigned) bb & mask_bits[e]); + bb >>= e; + k -= e; + + /* decode distance of block to copy */ + bb = fill_bitbuffer(PASS_STATE bb, &k, bd); + t = td + ((unsigned) bb & md); + e = t->e; + if (e > 16) + do { + if (e == 99) + abort_unzip(PASS_STATE_ONLY); + bb >>= t->b; + k -= t->b; + e -= 16; + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + t = t->v.t + ((unsigned) bb & mask_bits[e]); + e = t->e; + } while (e > 16); + bb >>= t->b; + k -= t->b; + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + dd = w - t->v.n - ((unsigned) bb & mask_bits[e]); + bb >>= e; + k -= e; + + /* do the copy */ + do_copy: + do { + /* Was: nn -= (e = (e = GUNZIP_WSIZE - ((dd &= GUNZIP_WSIZE - 1) > w ? dd : w)) > nn ? nn : e); */ + /* Who wrote THAT?? rewritten as: */ + dd &= GUNZIP_WSIZE - 1; + e = GUNZIP_WSIZE - (dd > w ? dd : w); + if (e > nn) e = nn; + nn -= e; + + /* copy to new buffer to prevent possible overwrite */ + if (w - dd >= e) { /* (this test assumes unsigned comparison) */ + memcpy(gunzip_window + w, gunzip_window + dd, e); + w += e; + dd += e; + } else { + /* do it slow to avoid memcpy() overlap */ + /* !NOMEMCPY */ + do { + gunzip_window[w++] = gunzip_window[dd++]; + } while (--e); + } + if (w == GUNZIP_WSIZE) { + gunzip_outbuf_count = w; + resume_copy = (nn != 0); + //flush_gunzip_window(); + w = 0; + return 1; + } + } while (nn); + resume_copy = 0; + } + } + + /* restore the globals from the locals */ + gunzip_outbuf_count = w; /* restore global gunzip_window pointer */ + gunzip_bb = bb; /* restore global bit buffer */ + gunzip_bk = k; + + /* normally just after call to inflate_codes, but save code by putting it here */ + /* free the decoding tables (tl and td), return */ + huft_free_all(PASS_STATE_ONLY); + + /* done */ + return 0; +} +#undef ml +#undef md +#undef bb +#undef k +#undef w +#undef tl +#undef td +#undef bl +#undef bd +#undef nn +#undef dd + + +/* called once from inflate_block */ +static void inflate_stored_setup(STATE_PARAM int my_n, int my_b, int my_k) +{ + inflate_stored_n = my_n; + inflate_stored_b = my_b; + inflate_stored_k = my_k; + /* initialize gunzip_window position */ + inflate_stored_w = gunzip_outbuf_count; +} +/* called once from inflate_get_next_window */ +static int inflate_stored(STATE_PARAM_ONLY) +{ + /* read and output the compressed data */ + while (inflate_stored_n--) { + inflate_stored_b = fill_bitbuffer(PASS_STATE inflate_stored_b, &inflate_stored_k, 8); + gunzip_window[inflate_stored_w++] = (unsigned char) inflate_stored_b; + if (inflate_stored_w == GUNZIP_WSIZE) { + gunzip_outbuf_count = inflate_stored_w; + //flush_gunzip_window(); + inflate_stored_w = 0; + inflate_stored_b >>= 8; + inflate_stored_k -= 8; + return 1; /* We have a block */ + } + inflate_stored_b >>= 8; + inflate_stored_k -= 8; + } + + /* restore the globals from the locals */ + gunzip_outbuf_count = inflate_stored_w; /* restore global gunzip_window pointer */ + gunzip_bb = inflate_stored_b; /* restore global bit buffer */ + gunzip_bk = inflate_stored_k; + return 0; /* Finished */ +} + + +/* + * decompress an inflated block + * e: last block flag + * + * GLOBAL VARIABLES: bb, kk, + */ +/* Return values: -1 = inflate_stored, -2 = inflate_codes */ +/* One callsite in inflate_get_next_window */ +static int inflate_block(STATE_PARAM smallint *e) +{ + unsigned ll[286 + 30]; /* literal/length and distance code lengths */ + unsigned t; /* block type */ + unsigned b; /* bit buffer */ + unsigned k; /* number of bits in bit buffer */ + + /* make local bit buffer */ + + b = gunzip_bb; + k = gunzip_bk; + + /* read in last block bit */ + b = fill_bitbuffer(PASS_STATE b, &k, 1); + *e = b & 1; + b >>= 1; + k -= 1; + + /* read in block type */ + b = fill_bitbuffer(PASS_STATE b, &k, 2); + t = (unsigned) b & 3; + b >>= 2; + k -= 2; + + /* restore the global bit buffer */ + gunzip_bb = b; + gunzip_bk = k; + + /* Do we see block type 1 often? Yes! + * TODO: fix performance problem (see below) */ + //bb_error_msg("blktype %d", t); + + /* inflate that block type */ + switch (t) { + case 0: /* Inflate stored */ + { + unsigned n; /* number of bytes in block */ + unsigned b_stored; /* bit buffer */ + unsigned k_stored; /* number of bits in bit buffer */ + + /* make local copies of globals */ + b_stored = gunzip_bb; /* initialize bit buffer */ + k_stored = gunzip_bk; + + /* go to byte boundary */ + n = k_stored & 7; + b_stored >>= n; + k_stored -= n; + + /* get the length and its complement */ + b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16); + n = ((unsigned) b_stored & 0xffff); + b_stored >>= 16; + k_stored -= 16; + + b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16); + if (n != (unsigned) ((~b_stored) & 0xffff)) { + abort_unzip(PASS_STATE_ONLY); /* error in compressed data */ + } + b_stored >>= 16; + k_stored -= 16; + + inflate_stored_setup(PASS_STATE n, b_stored, k_stored); + + return -1; + } + case 1: + /* Inflate fixed + * decompress an inflated type 1 (fixed Huffman codes) block. We should + * either replace this with a custom decoder, or at least precompute the + * Huffman tables. TODO */ + { + int i; /* temporary variable */ + unsigned bl; /* lookup bits for tl */ + unsigned bd; /* lookup bits for td */ + /* gcc 4.2.1 is too dumb to reuse stackspace. Moved up... */ + //unsigned ll[288]; /* length list for huft_build */ + + /* set up literal table */ + for (i = 0; i < 144; i++) + ll[i] = 8; + for (; i < 256; i++) + ll[i] = 9; + for (; i < 280; i++) + ll[i] = 7; + for (; i < 288; i++) /* make a complete, but wrong code set */ + ll[i] = 8; + bl = 7; + huft_build(ll, 288, 257, cplens, cplext, &inflate_codes_tl, &bl); + /* huft_build() never return nonzero - we use known data */ + + /* set up distance table */ + for (i = 0; i < 30; i++) /* make an incomplete code set */ + ll[i] = 5; + bd = 5; + huft_build(ll, 30, 0, cpdist, cpdext, &inflate_codes_td, &bd); + + /* set up data for inflate_codes() */ + inflate_codes_setup(PASS_STATE bl, bd); + + /* huft_free code moved into inflate_codes */ + + return -2; + } + case 2: /* Inflate dynamic */ + { + enum { dbits = 6 }; /* bits in base distance lookup table */ + enum { lbits = 9 }; /* bits in base literal/length lookup table */ + + huft_t *td; /* distance code table */ + unsigned i; /* temporary variables */ + unsigned j; + unsigned l; /* last length */ + unsigned m; /* mask for bit lengths table */ + unsigned n; /* number of lengths to get */ + unsigned bl; /* lookup bits for tl */ + unsigned bd; /* lookup bits for td */ + unsigned nb; /* number of bit length codes */ + unsigned nl; /* number of literal/length codes */ + unsigned nd; /* number of distance codes */ + + //unsigned ll[286 + 30];/* literal/length and distance code lengths */ + unsigned b_dynamic; /* bit buffer */ + unsigned k_dynamic; /* number of bits in bit buffer */ + + /* make local bit buffer */ + b_dynamic = gunzip_bb; + k_dynamic = gunzip_bk; + + /* read in table lengths */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5); + nl = 257 + ((unsigned) b_dynamic & 0x1f); /* number of literal/length codes */ + + b_dynamic >>= 5; + k_dynamic -= 5; + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5); + nd = 1 + ((unsigned) b_dynamic & 0x1f); /* number of distance codes */ + + b_dynamic >>= 5; + k_dynamic -= 5; + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 4); + nb = 4 + ((unsigned) b_dynamic & 0xf); /* number of bit length codes */ + + b_dynamic >>= 4; + k_dynamic -= 4; + if (nl > 286 || nd > 30) + abort_unzip(PASS_STATE_ONLY); /* bad lengths */ + + /* read in bit-length-code lengths */ + for (j = 0; j < nb; j++) { + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3); + ll[border[j]] = (unsigned) b_dynamic & 7; + b_dynamic >>= 3; + k_dynamic -= 3; + } + for (; j < 19; j++) + ll[border[j]] = 0; + + /* build decoding table for trees - single level, 7 bit lookup */ + bl = 7; + i = huft_build(ll, 19, 19, NULL, NULL, &inflate_codes_tl, &bl); + if (i != 0) { + abort_unzip(PASS_STATE_ONLY); //return i; /* incomplete code set */ + } + + /* read in literal and distance code lengths */ + n = nl + nd; + m = mask_bits[bl]; + i = l = 0; + while ((unsigned) i < n) { + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, (unsigned)bl); + td = inflate_codes_tl + ((unsigned) b_dynamic & m); + j = td->b; + b_dynamic >>= j; + k_dynamic -= j; + j = td->v.n; + if (j < 16) { /* length of code in bits (0..15) */ + ll[i++] = l = j; /* save last length in l */ + } else if (j == 16) { /* repeat last length 3 to 6 times */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 2); + j = 3 + ((unsigned) b_dynamic & 3); + b_dynamic >>= 2; + k_dynamic -= 2; + if ((unsigned) i + j > n) { + abort_unzip(PASS_STATE_ONLY); //return 1; + } + while (j--) { + ll[i++] = l; + } + } else if (j == 17) { /* 3 to 10 zero length codes */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3); + j = 3 + ((unsigned) b_dynamic & 7); + b_dynamic >>= 3; + k_dynamic -= 3; + if ((unsigned) i + j > n) { + abort_unzip(PASS_STATE_ONLY); //return 1; + } + while (j--) { + ll[i++] = 0; + } + l = 0; + } else { /* j == 18: 11 to 138 zero length codes */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 7); + j = 11 + ((unsigned) b_dynamic & 0x7f); + b_dynamic >>= 7; + k_dynamic -= 7; + if ((unsigned) i + j > n) { + abort_unzip(PASS_STATE_ONLY); //return 1; + } + while (j--) { + ll[i++] = 0; + } + l = 0; + } + } + + /* free decoding table for trees */ + huft_free(inflate_codes_tl); + + /* restore the global bit buffer */ + gunzip_bb = b_dynamic; + gunzip_bk = k_dynamic; + + /* build the decoding tables for literal/length and distance codes */ + bl = lbits; + + i = huft_build(ll, nl, 257, cplens, cplext, &inflate_codes_tl, &bl); + if (i != 0) + abort_unzip(PASS_STATE_ONLY); + bd = dbits; + i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &inflate_codes_td, &bd); + if (i != 0) + abort_unzip(PASS_STATE_ONLY); + + /* set up data for inflate_codes() */ + inflate_codes_setup(PASS_STATE bl, bd); + + /* huft_free code moved into inflate_codes */ + + return -2; + } + default: + abort_unzip(PASS_STATE_ONLY); + } +} + +/* Two callsites, both in inflate_get_next_window */ +static void calculate_gunzip_crc(STATE_PARAM_ONLY) +{ + unsigned n; + for (n = 0; n < gunzip_outbuf_count; n++) { + gunzip_crc = gunzip_crc_table[((int) gunzip_crc ^ (gunzip_window[n])) & 0xff] ^ (gunzip_crc >> 8); + } + gunzip_bytes_out += gunzip_outbuf_count; +} + +/* One callsite in inflate_unzip_internal */ +static int inflate_get_next_window(STATE_PARAM_ONLY) +{ + gunzip_outbuf_count = 0; + + while (1) { + int ret; + + if (need_another_block) { + if (end_reached) { + calculate_gunzip_crc(PASS_STATE_ONLY); + end_reached = 0; + /* NB: need_another_block is still set */ + return 0; /* Last block */ + } + method = inflate_block(PASS_STATE &end_reached); + need_another_block = 0; + } + + switch (method) { + case -1: + ret = inflate_stored(PASS_STATE_ONLY); + break; + case -2: + ret = inflate_codes(PASS_STATE_ONLY); + break; + default: /* cannot happen */ + abort_unzip(PASS_STATE_ONLY); + } + + if (ret == 1) { + calculate_gunzip_crc(PASS_STATE_ONLY); + return 1; /* more data left */ + } + need_another_block = 1; /* end of that block */ + } + /* Doesnt get here */ +} + + +/* Called from unpack_gz_stream() and inflate_unzip() */ +static USE_DESKTOP(long long) int +inflate_unzip_internal(STATE_PARAM int in, int out) +{ + USE_DESKTOP(long long) int n = 0; + ssize_t nwrote; + + /* Allocate all global buffers (for DYN_ALLOC option) */ + gunzip_window = xmalloc(GUNZIP_WSIZE); + gunzip_outbuf_count = 0; + gunzip_bytes_out = 0; + gunzip_src_fd = in; + + /* (re) initialize state */ + method = -1; + need_another_block = 1; + resume_copy = 0; + gunzip_bk = 0; + gunzip_bb = 0; + + /* Create the crc table */ + gunzip_crc_table = crc32_filltable(NULL, 0); + gunzip_crc = ~0; + + error_msg = "corrupted data"; + if (setjmp(error_jmp)) { + /* Error from deep inside zip machinery */ + n = -1; + goto ret; + } + + while (1) { + int r = inflate_get_next_window(PASS_STATE_ONLY); + nwrote = full_write(out, gunzip_window, gunzip_outbuf_count); + if (nwrote != (ssize_t)gunzip_outbuf_count) { + bb_perror_msg("write"); + n = -1; + goto ret; + } + USE_DESKTOP(n += nwrote;) + if (r == 0) break; + } + + /* Store unused bytes in a global buffer so calling applets can access it */ + if (gunzip_bk >= 8) { + /* Undo too much lookahead. The next read will be byte aligned + * so we can discard unused bits in the last meaningful byte. */ + bytebuffer_offset--; + bytebuffer[bytebuffer_offset] = gunzip_bb & 0xff; + gunzip_bb >>= 8; + gunzip_bk -= 8; + } + ret: + /* Cleanup */ + free(gunzip_window); + free(gunzip_crc_table); + return n; +} + + +/* External entry points */ + +/* For unzip */ + +USE_DESKTOP(long long) int FAST_FUNC +inflate_unzip(inflate_unzip_result *res, off_t compr_size, int in, int out) +{ + USE_DESKTOP(long long) int n; + DECLARE_STATE; + + ALLOC_STATE; + + to_read = compr_size; +// bytebuffer_max = 0x8000; + bytebuffer_offset = 4; + bytebuffer = xmalloc(bytebuffer_max); + n = inflate_unzip_internal(PASS_STATE in, out); + free(bytebuffer); + + res->crc = gunzip_crc; + res->bytes_out = gunzip_bytes_out; + DEALLOC_STATE; + return n; +} + + +/* For gunzip */ + +/* helpers first */ + +/* Top up the input buffer with at least n bytes. */ +static int top_up(STATE_PARAM unsigned n) +{ + int count = bytebuffer_size - bytebuffer_offset; + + if (count < (int)n) { + memmove(bytebuffer, &bytebuffer[bytebuffer_offset], count); + bytebuffer_offset = 0; + bytebuffer_size = full_read(gunzip_src_fd, &bytebuffer[count], bytebuffer_max - count); + if ((int)bytebuffer_size < 0) { + bb_error_msg("read error"); + return 0; + } + bytebuffer_size += count; + if (bytebuffer_size < n) + return 0; + } + return 1; +} + +static uint16_t buffer_read_le_u16(STATE_PARAM_ONLY) +{ + uint16_t res; +#if BB_LITTLE_ENDIAN + /* gcc 4.2.1 is very clever */ + memcpy(&res, &bytebuffer[bytebuffer_offset], 2); +#else + res = bytebuffer[bytebuffer_offset]; + res |= bytebuffer[bytebuffer_offset + 1] << 8; +#endif + bytebuffer_offset += 2; + return res; +} + +static uint32_t buffer_read_le_u32(STATE_PARAM_ONLY) +{ + uint32_t res; +#if BB_LITTLE_ENDIAN + memcpy(&res, &bytebuffer[bytebuffer_offset], 4); +#else + res = bytebuffer[bytebuffer_offset]; + res |= bytebuffer[bytebuffer_offset + 1] << 8; + res |= bytebuffer[bytebuffer_offset + 2] << 16; + res |= bytebuffer[bytebuffer_offset + 3] << 24; +#endif + bytebuffer_offset += 4; + return res; +} + +static int check_header_gzip(STATE_PARAM unpack_info_t *info) +{ + union { + unsigned char raw[8]; + struct { + uint8_t gz_method; + uint8_t flags; + uint32_t mtime; + uint8_t xtra_flags_UNUSED; + uint8_t os_flags_UNUSED; + } __attribute__((packed)) formatted; + } header; + struct BUG_header { + char BUG_header[sizeof(header) == 8 ? 1 : -1]; + }; + + /* + * Rewind bytebuffer. We use the beginning because the header has 8 + * bytes, leaving enough for unwinding afterwards. + */ + bytebuffer_size -= bytebuffer_offset; + memmove(bytebuffer, &bytebuffer[bytebuffer_offset], bytebuffer_size); + bytebuffer_offset = 0; + + if (!top_up(PASS_STATE 8)) + return 0; + memcpy(header.raw, &bytebuffer[bytebuffer_offset], 8); + bytebuffer_offset += 8; + + /* Check the compression method */ + if (header.formatted.gz_method != 8) { + return 0; + } + + if (header.formatted.flags & 0x04) { + /* bit 2 set: extra field present */ + unsigned extra_short; + + if (!top_up(PASS_STATE 2)) + return 0; + extra_short = buffer_read_le_u16(PASS_STATE_ONLY); + if (!top_up(PASS_STATE extra_short)) + return 0; + /* Ignore extra field */ + bytebuffer_offset += extra_short; + } + + /* Discard original name and file comment if any */ + /* bit 3 set: original file name present */ + /* bit 4 set: file comment present */ + if (header.formatted.flags & 0x18) { + while (1) { + do { + if (!top_up(PASS_STATE 1)) + return 0; + } while (bytebuffer[bytebuffer_offset++] != 0); + if ((header.formatted.flags & 0x18) != 0x18) + break; + header.formatted.flags &= ~0x18; + } + } + + if (info) + info->mtime = SWAP_LE32(header.formatted.mtime); + + /* Read the header checksum */ + if (header.formatted.flags & 0x02) { + if (!top_up(PASS_STATE 2)) + return 0; + bytebuffer_offset += 2; + } + return 1; +} + +USE_DESKTOP(long long) int FAST_FUNC +unpack_gz_stream_with_info(int in, int out, unpack_info_t *info) +{ + uint32_t v32; + USE_DESKTOP(long long) int n; + DECLARE_STATE; + + n = 0; + + ALLOC_STATE; + to_read = -1; +// bytebuffer_max = 0x8000; + bytebuffer = xmalloc(bytebuffer_max); + gunzip_src_fd = in; + + again: + if (!check_header_gzip(PASS_STATE info)) { + bb_error_msg("corrupted data"); + n = -1; + goto ret; + } + n += inflate_unzip_internal(PASS_STATE in, out); + if (n < 0) + goto ret; + + if (!top_up(PASS_STATE 8)) { + bb_error_msg("corrupted data"); + n = -1; + goto ret; + } + + /* Validate decompression - crc */ + v32 = buffer_read_le_u32(PASS_STATE_ONLY); + if ((~gunzip_crc) != v32) { + bb_error_msg("crc error"); + n = -1; + goto ret; + } + + /* Validate decompression - size */ + v32 = buffer_read_le_u32(PASS_STATE_ONLY); + if ((uint32_t)gunzip_bytes_out != v32) { + bb_error_msg("incorrect length"); + n = -1; + } + + if (!top_up(PASS_STATE 2)) + goto ret; /* EOF */ + + if (bytebuffer[bytebuffer_offset] == 0x1f + && bytebuffer[bytebuffer_offset + 1] == 0x8b + ) { + bytebuffer_offset += 2; + goto again; + } + /* GNU gzip says: */ + /*bb_error_msg("decompression OK, trailing garbage ignored");*/ + + ret: + free(bytebuffer); + DEALLOC_STATE; + return n; +} + +USE_DESKTOP(long long) int FAST_FUNC +unpack_gz_stream(int in, int out) +{ + return unpack_gz_stream_with_info(in, out, NULL); +} diff --git a/archival/libunarchive/filter_accept_all.c b/archival/libunarchive/filter_accept_all.c new file mode 100644 index 0000000..21f9c5c --- /dev/null +++ b/archival/libunarchive/filter_accept_all.c @@ -0,0 +1,17 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +/* Accept any non-null name, its not really a filter at all */ +char FAST_FUNC filter_accept_all(archive_handle_t *archive_handle) +{ + if (archive_handle->file_header->name) + return EXIT_SUCCESS; + return EXIT_FAILURE; +} diff --git a/archival/libunarchive/filter_accept_list.c b/archival/libunarchive/filter_accept_list.c new file mode 100644 index 0000000..afa0b4c --- /dev/null +++ b/archival/libunarchive/filter_accept_list.c @@ -0,0 +1,19 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +/* + * Accept names that are in the accept list, ignoring reject list. + */ +char FAST_FUNC filter_accept_list(archive_handle_t *archive_handle) +{ + if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) + return EXIT_SUCCESS; + return EXIT_FAILURE; +} diff --git a/archival/libunarchive/filter_accept_list_reassign.c b/archival/libunarchive/filter_accept_list_reassign.c new file mode 100644 index 0000000..f1de4e8 --- /dev/null +++ b/archival/libunarchive/filter_accept_list_reassign.c @@ -0,0 +1,51 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +/* Built and used only if ENABLE_DPKG || ENABLE_DPKG_DEB */ + +/* + * Reassign the subarchive metadata parser based on the filename extension + * e.g. if its a .tar.gz modify archive_handle->sub_archive to process a .tar.gz + * or if its a .tar.bz2 make archive_handle->sub_archive handle that + */ +char FAST_FUNC filter_accept_list_reassign(archive_handle_t *archive_handle) +{ + /* Check the file entry is in the accept list */ + if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) { + const char *name_ptr; + + /* Find extension */ + name_ptr = strrchr(archive_handle->file_header->name, '.'); + if (!name_ptr) + return EXIT_FAILURE; + name_ptr++; + + /* Modify the subarchive handler based on the extension */ + if (ENABLE_FEATURE_SEAMLESS_GZ + && strcmp(name_ptr, "gz") == 0 + ) { + archive_handle->action_data_subarchive = get_header_tar_gz; + return EXIT_SUCCESS; + } + if (ENABLE_FEATURE_SEAMLESS_BZ2 + && strcmp(name_ptr, "bz2") == 0 + ) { + archive_handle->action_data_subarchive = get_header_tar_bz2; + return EXIT_SUCCESS; + } + if (ENABLE_FEATURE_SEAMLESS_LZMA + && strcmp(name_ptr, "lzma") == 0 + ) { + archive_handle->action_data_subarchive = get_header_tar_lzma; + return EXIT_SUCCESS; + } + } + return EXIT_FAILURE; +} diff --git a/archival/libunarchive/filter_accept_reject_list.c b/archival/libunarchive/filter_accept_reject_list.c new file mode 100644 index 0000000..aa601e1 --- /dev/null +++ b/archival/libunarchive/filter_accept_reject_list.c @@ -0,0 +1,36 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +/* + * Accept names that are in the accept list and not in the reject list + */ +char FAST_FUNC filter_accept_reject_list(archive_handle_t *archive_handle) +{ + const char *key; + const llist_t *reject_entry; + const llist_t *accept_entry; + + key = archive_handle->file_header->name; + + /* If the key is in a reject list fail */ + reject_entry = find_list_entry2(archive_handle->reject, key); + if (reject_entry) { + return EXIT_FAILURE; + } + accept_entry = find_list_entry2(archive_handle->accept, key); + + /* Fail if an accept list was specified and the key wasnt in there */ + if ((accept_entry == NULL) && archive_handle->accept) { + return EXIT_FAILURE; + } + + /* Accepted */ + return EXIT_SUCCESS; +} diff --git a/archival/libunarchive/find_list_entry.c b/archival/libunarchive/find_list_entry.c new file mode 100644 index 0000000..bc7bc64 --- /dev/null +++ b/archival/libunarchive/find_list_entry.c @@ -0,0 +1,54 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include +#include "libbb.h" +#include "unarchive.h" + +/* Find a string in a shell pattern list */ +const llist_t* FAST_FUNC find_list_entry(const llist_t *list, const char *filename) +{ + while (list) { + if (fnmatch(list->data, filename, 0) == 0) { + return list; + } + list = list->link; + } + return NULL; +} + +/* Same, but compares only path components present in pattern + * (extra trailing path components in filename are assumed to match) + */ +const llist_t* FAST_FUNC find_list_entry2(const llist_t *list, const char *filename) +{ + char buf[PATH_MAX]; + int pattern_slash_cnt; + const char *c; + char *d; + + while (list) { + c = list->data; + pattern_slash_cnt = 0; + while (*c) + if (*c++ == '/') pattern_slash_cnt++; + c = filename; + d = buf; + /* paranoia is better than buffer overflows */ + while (*c && d != buf + sizeof(buf)-1) { + if (*c == '/' && --pattern_slash_cnt < 0) + break; + *d++ = *c++; + } + *d = '\0'; + if (fnmatch(list->data, buf, 0) == 0) { + return list; + } + list = list->link; + } + return NULL; +} diff --git a/archival/libunarchive/get_header_ar.c b/archival/libunarchive/get_header_ar.c new file mode 100644 index 0000000..d476a9d --- /dev/null +++ b/archival/libunarchive/get_header_ar.c @@ -0,0 +1,127 @@ +/* vi: set sw=4 ts=4: */ +/* Copyright 2001 Glenn McGrath. + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +char FAST_FUNC get_header_ar(archive_handle_t *archive_handle) +{ + int err; + file_header_t *typed = archive_handle->file_header; + union { + char raw[60]; + struct { + char name[16]; + char date[12]; + char uid[6]; + char gid[6]; + char mode[8]; + char size[10]; + char magic[2]; + } formatted; + } ar; +#if ENABLE_FEATURE_AR_LONG_FILENAMES + static char *ar_long_names; + static unsigned ar_long_name_size; +#endif + + /* dont use xread as we want to handle the error ourself */ + if (read(archive_handle->src_fd, ar.raw, 60) != 60) { + /* End Of File */ + return EXIT_FAILURE; + } + + /* ar header starts on an even byte (2 byte aligned) + * '\n' is used for padding + */ + if (ar.raw[0] == '\n') { + /* fix up the header, we started reading 1 byte too early */ + memmove(ar.raw, &ar.raw[1], 59); + ar.raw[59] = xread_char(archive_handle->src_fd); + archive_handle->offset++; + } + archive_handle->offset += 60; + + /* align the headers based on the header magic */ + if (ar.formatted.magic[0] != '`' || ar.formatted.magic[1] != '\n') + bb_error_msg_and_die("invalid ar header"); + + /* FIXME: more thorough routine would be in order here */ + /* (we have something like that in tar) */ + /* but for now we are lax. This code works because */ + /* on misformatted numbers bb_strtou returns all-ones */ + typed->mode = err = bb_strtou(ar.formatted.mode, NULL, 8); + if (err == -1) bb_error_msg_and_die("invalid ar header"); + typed->mtime = err = bb_strtou(ar.formatted.date, NULL, 10); + if (err == -1) bb_error_msg_and_die("invalid ar header"); + typed->uid = err = bb_strtou(ar.formatted.uid, NULL, 10); + if (err == -1) bb_error_msg_and_die("invalid ar header"); + typed->gid = err = bb_strtou(ar.formatted.gid, NULL, 10); + if (err == -1) bb_error_msg_and_die("invalid ar header"); + typed->size = err = bb_strtou(ar.formatted.size, NULL, 10); + if (err == -1) bb_error_msg_and_die("invalid ar header"); + + /* long filenames have '/' as the first character */ + if (ar.formatted.name[0] == '/') { +#if ENABLE_FEATURE_AR_LONG_FILENAMES + unsigned long_offset; + + if (ar.formatted.name[1] == '/') { + /* If the second char is a '/' then this entries data section + * stores long filename for multiple entries, they are stored + * in static variable long_names for use in future entries */ + ar_long_name_size = typed->size; + ar_long_names = xmalloc(ar_long_name_size); + xread(archive_handle->src_fd, ar_long_names, ar_long_name_size); + archive_handle->offset += ar_long_name_size; + /* This ar entries data section only contained filenames for other records + * they are stored in the static ar_long_names for future reference */ + return get_header_ar(archive_handle); /* Return next header */ + } + + if (ar.formatted.name[1] == ' ') { + /* This is the index of symbols in the file for compilers */ + data_skip(archive_handle); + archive_handle->offset += typed->size; + return get_header_ar(archive_handle); /* Return next header */ + } + + /* The number after the '/' indicates the offset in the ar data section + * (saved in variable long_name) that conatains the real filename */ + long_offset = atoi(&ar.formatted.name[1]); + if (long_offset >= ar_long_name_size) { + bb_error_msg_and_die("can't resolve long filename"); + } + typed->name = xstrdup(ar_long_names + long_offset); +#else + bb_error_msg_and_die("long filenames not supported"); +#endif + } else { + /* short filenames */ + typed->name = xstrndup(ar.formatted.name, 16); + } + + typed->name[strcspn(typed->name, " /")] = '\0'; + + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { + archive_handle->action_header(typed); +#if ENABLE_DPKG || ENABLE_DPKG_DEB + if (archive_handle->sub_archive) { + while (archive_handle->action_data_subarchive(archive_handle->sub_archive) == EXIT_SUCCESS) + continue; + } else +#endif + archive_handle->action_data(archive_handle); + } else { + data_skip(archive_handle); + } + + archive_handle->offset += typed->size; + /* Set the file pointer to the correct spot, we may have been reading a compressed file */ + lseek(archive_handle->src_fd, archive_handle->offset, SEEK_SET); + + return EXIT_SUCCESS; +} diff --git a/archival/libunarchive/get_header_cpio.c b/archival/libunarchive/get_header_cpio.c new file mode 100644 index 0000000..302f122 --- /dev/null +++ b/archival/libunarchive/get_header_cpio.c @@ -0,0 +1,182 @@ +/* vi: set sw=4 ts=4: */ +/* Copyright 2002 Laurence Anderson + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +typedef struct hardlinks_t { + struct hardlinks_t *next; + int inode; /* TODO: must match maj/min too! */ + int mode ; + int mtime; /* These three are useful only in corner case */ + int uid ; /* of hardlinks with zero size body */ + int gid ; + char name[1]; +} hardlinks_t; + +char FAST_FUNC get_header_cpio(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + char cpio_header[110]; + int namesize; + int major, minor, nlink, mode, inode; + unsigned size, uid, gid, mtime; + +#define hardlinks_to_create (*(hardlinks_t **)(&archive_handle->ah_priv[0])) +#define created_hardlinks (*(hardlinks_t **)(&archive_handle->ah_priv[1])) +#define block_count (archive_handle->ah_priv[2]) +// if (!archive_handle->ah_priv_inited) { +// archive_handle->ah_priv_inited = 1; +// hardlinks_to_create = NULL; +// created_hardlinks = NULL; +// } + + /* There can be padding before archive header */ + data_align(archive_handle, 4); + + size = full_read(archive_handle->src_fd, cpio_header, 110); + if (size == 0) { + goto create_hardlinks; + } + if (size != 110) { + bb_error_msg_and_die("short read"); + } + archive_handle->offset += 110; + + if (strncmp(&cpio_header[0], "07070", 5) != 0 + || (cpio_header[5] != '1' && cpio_header[5] != '2') + ) { + bb_error_msg_and_die("unsupported cpio format, use newc or crc"); + } + + if (sscanf(cpio_header + 6, + "%8x" "%8x" "%8x" "%8x" + "%8x" "%8x" "%8x" /*maj,min:*/ "%*16c" + /*rmaj,rmin:*/"%8x" "%8x" "%8x" /*chksum: "%*8c"*/, + &inode, &mode, &uid, &gid, + &nlink, &mtime, &size, + &major, &minor, &namesize) != 10) + bb_error_msg_and_die("damaged cpio file"); + file_header->mode = mode; + file_header->uid = uid; + file_header->gid = gid; + file_header->mtime = mtime; + file_header->size = size; + + namesize &= 0x1fff; /* paranoia: limit names to 8k chars */ + file_header->name = xzalloc(namesize + 1); + /* Read in filename */ + xread(archive_handle->src_fd, file_header->name, namesize); + archive_handle->offset += namesize; + + /* Update offset amount and skip padding before file contents */ + data_align(archive_handle, 4); + + if (strcmp(file_header->name, "TRAILER!!!") == 0) { + /* Always round up. ">> 9" divides by 512 */ + block_count = (void*)(ptrdiff_t) ((archive_handle->offset + 511) >> 9); + goto create_hardlinks; + } + + file_header->link_target = NULL; + if (S_ISLNK(file_header->mode)) { + file_header->size &= 0x1fff; /* paranoia: limit names to 8k chars */ + file_header->link_target = xzalloc(file_header->size + 1); + xread(archive_handle->src_fd, file_header->link_target, file_header->size); + archive_handle->offset += file_header->size; + file_header->size = 0; /* Stop possible seeks in future */ + } + +// TODO: data_extract_all can't deal with hardlinks to non-files... +// when fixed, change S_ISREG to !S_ISDIR here + + if (nlink > 1 && S_ISREG(file_header->mode)) { + hardlinks_t *new = xmalloc(sizeof(*new) + namesize); + new->inode = inode; + new->mode = mode ; + new->mtime = mtime; + new->uid = uid ; + new->gid = gid ; + strcpy(new->name, file_header->name); + /* Put file on a linked list for later */ + if (size == 0) { + new->next = hardlinks_to_create; + hardlinks_to_create = new; + return EXIT_SUCCESS; /* Skip this one */ + /* TODO: this breaks cpio -t (it does not show hardlinks) */ + } + new->next = created_hardlinks; + created_hardlinks = new; + } + file_header->device = makedev(major, minor); + + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { + archive_handle->action_data(archive_handle); + archive_handle->action_header(file_header); + } else { + data_skip(archive_handle); + } + + archive_handle->offset += file_header->size; + + free(file_header->link_target); + free(file_header->name); + file_header->link_target = NULL; + file_header->name = NULL; + + return EXIT_SUCCESS; + + create_hardlinks: + free(file_header->link_target); + free(file_header->name); + + while (hardlinks_to_create) { + hardlinks_t *cur; + hardlinks_t *make_me = hardlinks_to_create; + + hardlinks_to_create = make_me->next; + + memset(file_header, 0, sizeof(*file_header)); + file_header->mtime = make_me->mtime; + file_header->name = make_me->name; + file_header->mode = make_me->mode; + file_header->uid = make_me->uid; + file_header->gid = make_me->gid; + /*file_header->size = 0;*/ + /*file_header->link_target = NULL;*/ + + /* Try to find a file we are hardlinked to */ + cur = created_hardlinks; + while (cur) { + /* TODO: must match maj/min too! */ + if (cur->inode == make_me->inode) { + file_header->link_target = cur->name; + /* link_target != NULL, size = 0: "I am a hardlink" */ + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) + archive_handle->action_data(archive_handle); + free(make_me); + goto next_link; + } + cur = cur->next; + } + /* Oops... no file with such inode was created... do it now + * (happens when hardlinked files are empty (zero length)) */ + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) + archive_handle->action_data(archive_handle); + /* Move to the list of created hardlinked files */ + make_me->next = created_hardlinks; + created_hardlinks = make_me; + next_link: ; + } + + while (created_hardlinks) { + hardlinks_t *p = created_hardlinks; + created_hardlinks = p->next; + free(p); + } + + return EXIT_FAILURE; /* "No more files to process" */ +} diff --git a/archival/libunarchive/get_header_tar.c b/archival/libunarchive/get_header_tar.c new file mode 100644 index 0000000..bf0f92b --- /dev/null +++ b/archival/libunarchive/get_header_tar.c @@ -0,0 +1,378 @@ +/* vi: set sw=4 ts=4: */ +/* Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + * + * FIXME: + * In privileged mode if uname and gname map to a uid and gid then use the + * mapped value instead of the uid/gid values in tar header + * + * References: + * GNU tar and star man pages, + * Opengroup's ustar interchange format, + * http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html + */ + +#include "libbb.h" +#include "unarchive.h" + +/* NB: _DESTROYS_ str[len] character! */ +static unsigned long long getOctal(char *str, int len) +{ + unsigned long long v; + /* NB: leading spaces are allowed. Using strtoull to handle that. + * The downside is that we accept e.g. "-123" too :) + */ + str[len] = '\0'; + v = strtoull(str, &str, 8); + if (*str && (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY || *str != ' ')) + bb_error_msg_and_die("corrupted octal value in tar header"); + return v; +} +#define GET_OCTAL(a) getOctal((a), sizeof(a)) + +void BUG_tar_header_size(void); +char FAST_FUNC get_header_tar(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + struct { + /* ustar header, Posix 1003.1 */ + char name[100]; /* 0-99 */ + char mode[8]; /* 100-107 */ + char uid[8]; /* 108-115 */ + char gid[8]; /* 116-123 */ + char size[12]; /* 124-135 */ + char mtime[12]; /* 136-147 */ + char chksum[8]; /* 148-155 */ + char typeflag; /* 156-156 */ + char linkname[100]; /* 157-256 */ + /* POSIX: "ustar" NUL "00" */ + /* GNU tar: "ustar " NUL */ + /* Normally it's defined as magic[6] followed by + * version[2], but we put them together to simplify code + */ + char magic[8]; /* 257-264 */ + char uname[32]; /* 265-296 */ + char gname[32]; /* 297-328 */ + char devmajor[8]; /* 329-336 */ + char devminor[8]; /* 337-344 */ + char prefix[155]; /* 345-499 */ + char padding[12]; /* 500-512 */ + } tar; + char *cp; + int i, sum_u, sum; +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + int sum_s; +#endif + int parse_names; + + /* Our "private data" */ +#define p_end (*(smallint *)(&archive_handle->ah_priv[0])) +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS +#define p_longname (*(char* *)(&archive_handle->ah_priv[1])) +#define p_linkname (*(char* *)(&archive_handle->ah_priv[2])) +#else +#define p_longname 0 +#define p_linkname 0 +#endif +// if (!archive_handle->ah_priv_inited) { +// archive_handle->ah_priv_inited = 1; +// p_end = 0; +// USE_FEATURE_TAR_GNU_EXTENSIONS(p_longname = NULL;) +// USE_FEATURE_TAR_GNU_EXTENSIONS(p_linkname = NULL;) +// } + + if (sizeof(tar) != 512) + BUG_tar_header_size(); + +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS + again: +#endif + /* Align header */ + data_align(archive_handle, 512); + + again_after_align: + +#if ENABLE_DESKTOP + /* to prevent misdetection of bz2 sig */ + *(uint32_t*)(&tar) = 0; + i = full_read(archive_handle->src_fd, &tar, 512); + /* If GNU tar sees EOF in above read, it says: + * "tar: A lone zero block at N", where N = kilobyte + * where EOF was met (not EOF block, actual EOF!), + * and exits with EXIT_SUCCESS. + * We will mimic exit(EXIT_SUCCESS), although we will not mimic + * the message and we don't check whether we indeed + * saw zero block directly before this. */ + if (i == 0) { + xfunc_error_retval = 0; + short_read: + bb_error_msg_and_die("short read"); + } + if (i != 512) { + USE_FEATURE_TAR_AUTODETECT(goto autodetect;) + goto short_read; + } + +#else + i = 512; + xread(archive_handle->src_fd, &tar, i); +#endif + archive_handle->offset += i; + + /* If there is no filename its an empty header */ + if (tar.name[0] == 0 && tar.prefix[0] == 0) { + if (p_end) { + /* Second consecutive empty header - end of archive. + * Read until the end to empty the pipe from gz or bz2 + */ + while (full_read(archive_handle->src_fd, &tar, 512) == 512) + continue; + return EXIT_FAILURE; + } + p_end = 1; + return EXIT_SUCCESS; + } + p_end = 0; + + /* Check header has valid magic, "ustar" is for the proper tar, + * five NULs are for the old tar format */ + if (strncmp(tar.magic, "ustar", 5) != 0 + && (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY + || memcmp(tar.magic, "\0\0\0\0", 5) != 0) + ) { +#if ENABLE_FEATURE_TAR_AUTODETECT + char FAST_FUNC (*get_header_ptr)(archive_handle_t *); + + USE_DESKTOP(autodetect:) + /* tar gz/bz autodetect: check for gz/bz2 magic. + * If we see the magic, and it is the very first block, + * we can switch to get_header_tar_gz/bz2/lzma(). + * Needs seekable fd. I wish recv(MSG_PEEK) works + * on any fd... */ +#if ENABLE_FEATURE_SEAMLESS_GZ + if (tar.name[0] == 0x1f && tar.name[1] == (char)0x8b) { /* gzip */ + get_header_ptr = get_header_tar_gz; + } else +#endif +#if ENABLE_FEATURE_SEAMLESS_BZ2 + if (tar.name[0] == 'B' && tar.name[1] == 'Z' + && tar.name[2] == 'h' && isdigit(tar.name[3]) + ) { /* bzip2 */ + get_header_ptr = get_header_tar_bz2; + } else +#endif + goto err; + /* Two different causes for lseek() != 0: + * unseekable fd (would like to support that too, but...), + * or not first block (false positive, it's not .gz/.bz2!) */ + if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0) + goto err; + while (get_header_ptr(archive_handle) == EXIT_SUCCESS) + continue; + return EXIT_FAILURE; + err: +#endif /* FEATURE_TAR_AUTODETECT */ + bb_error_msg_and_die("invalid tar magic"); + } + + /* Do checksum on headers. + * POSIX says that checksum is done on unsigned bytes, but + * Sun and HP-UX gets it wrong... more details in + * GNU tar source. */ +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + sum_s = ' ' * sizeof(tar.chksum); +#endif + sum_u = ' ' * sizeof(tar.chksum); + for (i = 0; i < 148; i++) { + sum_u += ((unsigned char*)&tar)[i]; +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + sum_s += ((signed char*)&tar)[i]; +#endif + } + for (i = 156; i < 512; i++) { + sum_u += ((unsigned char*)&tar)[i]; +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + sum_s += ((signed char*)&tar)[i]; +#endif + } +#if ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY + sum = strtoul(tar.chksum, &cp, 8); + if ((*cp && *cp != ' ') + || (sum_u != sum USE_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) + ) { + bb_error_msg_and_die("invalid tar header checksum"); + } +#else + /* This field does not need special treatment (getOctal) */ + sum = xstrtoul(tar.chksum, 8); + if (sum_u != sum USE_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) { + bb_error_msg_and_die("invalid tar header checksum"); + } +#endif + + /* 0 is reserved for high perf file, treat as normal file */ + if (!tar.typeflag) tar.typeflag = '0'; + parse_names = (tar.typeflag >= '0' && tar.typeflag <= '7'); + + /* getOctal trashes subsequent field, therefore we call it + * on fields in reverse order */ + if (tar.devmajor[0]) { + char t = tar.prefix[0]; + /* we trash prefix[0] here, but we DO need it later! */ + unsigned minor = GET_OCTAL(tar.devminor); + unsigned major = GET_OCTAL(tar.devmajor); + file_header->device = makedev(major, minor); + tar.prefix[0] = t; + } + file_header->link_target = NULL; + if (!p_linkname && parse_names && tar.linkname[0]) { + file_header->link_target = xstrndup(tar.linkname, sizeof(tar.linkname)); + /* FIXME: what if we have non-link object with link_target? */ + /* Will link_target be free()ed? */ + } +#if ENABLE_FEATURE_TAR_UNAME_GNAME + file_header->uname = tar.uname[0] ? xstrndup(tar.uname, sizeof(tar.uname)) : NULL; + file_header->gname = tar.gname[0] ? xstrndup(tar.gname, sizeof(tar.gname)) : NULL; +#endif + file_header->mtime = GET_OCTAL(tar.mtime); + file_header->size = GET_OCTAL(tar.size); + file_header->gid = GET_OCTAL(tar.gid); + file_header->uid = GET_OCTAL(tar.uid); + /* Set bits 0-11 of the files mode */ + file_header->mode = 07777 & GET_OCTAL(tar.mode); + + file_header->name = NULL; + if (!p_longname && parse_names) { + /* we trash mode[0] here, it's ok */ + //tar.name[sizeof(tar.name)] = '\0'; - gcc 4.3.0 would complain + tar.mode[0] = '\0'; + if (tar.prefix[0]) { + /* and padding[0] */ + //tar.prefix[sizeof(tar.prefix)] = '\0'; - gcc 4.3.0 would complain + tar.padding[0] = '\0'; + file_header->name = concat_path_file(tar.prefix, tar.name); + } else + file_header->name = xstrdup(tar.name); + } + + /* Set bits 12-15 of the files mode */ + /* (typeflag was not trashed because chksum does not use getOctal) */ + switch (tar.typeflag) { + /* busybox identifies hard links as being regular files with 0 size and a link name */ + case '1': + file_header->mode |= S_IFREG; + break; + case '7': + /* case 0: */ + case '0': +#if ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY + if (last_char_is(file_header->name, '/')) { + goto set_dir; + } +#endif + file_header->mode |= S_IFREG; + break; + case '2': + file_header->mode |= S_IFLNK; + /* have seen tarballs with size field containing + * the size of the link target's name */ + size0: + file_header->size = 0; + break; + case '3': + file_header->mode |= S_IFCHR; + goto size0; /* paranoia */ + case '4': + file_header->mode |= S_IFBLK; + goto size0; + case '5': + USE_FEATURE_TAR_OLDGNU_COMPATIBILITY(set_dir:) + file_header->mode |= S_IFDIR; + goto size0; + case '6': + file_header->mode |= S_IFIFO; + goto size0; +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS + case 'L': + /* free: paranoia: tar with several consecutive longnames */ + free(p_longname); + /* For paranoia reasons we allocate extra NUL char */ + p_longname = xzalloc(file_header->size + 1); + /* We read ASCIZ string, including NUL */ + xread(archive_handle->src_fd, p_longname, file_header->size); + archive_handle->offset += file_header->size; + /* return get_header_tar(archive_handle); */ + /* gcc 4.1.1 didn't optimize it into jump */ + /* so we will do it ourself, this also saves stack */ + goto again; + case 'K': + free(p_linkname); + p_linkname = xzalloc(file_header->size + 1); + xread(archive_handle->src_fd, p_linkname, file_header->size); + archive_handle->offset += file_header->size; + /* return get_header_tar(archive_handle); */ + goto again; + case 'D': /* GNU dump dir */ + case 'M': /* Continuation of multi volume archive */ + case 'N': /* Old GNU for names > 100 characters */ + case 'S': /* Sparse file */ + case 'V': /* Volume header */ +#endif + case 'g': /* pax global header */ + case 'x': { /* pax extended header */ + off_t sz; + bb_error_msg("warning: skipping header '%c'", tar.typeflag); + sz = (file_header->size + 511) & ~(off_t)511; + archive_handle->offset += sz; + sz >>= 9; /* sz /= 512 but w/o contortions for signed div */ + while (sz--) + xread(archive_handle->src_fd, &tar, 512); + /* return get_header_tar(archive_handle); */ + goto again_after_align; + } + default: + bb_error_msg_and_die("unknown typeflag: 0x%x", tar.typeflag); + } + +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS + if (p_longname) { + file_header->name = p_longname; + p_longname = NULL; + } + if (p_linkname) { + file_header->link_target = p_linkname; + p_linkname = NULL; + } +#endif + if (strncmp(file_header->name, "/../"+1, 3) == 0 + || strstr(file_header->name, "/../") + ) { + bb_error_msg_and_die("name with '..' encountered: '%s'", + file_header->name); + } + + /* Strip trailing '/' in directories */ + /* Must be done after mode is set as '/' is used to check if it's a directory */ + cp = last_char_is(file_header->name, '/'); + + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { + archive_handle->action_header(/*archive_handle->*/ file_header); + /* Note that we kill the '/' only after action_header() */ + /* (like GNU tar 1.15.1: verbose mode outputs "dir/dir/") */ + if (cp) *cp = '\0'; + archive_handle->ah_flags |= ARCHIVE_EXTRACT_QUIET; + archive_handle->action_data(archive_handle); + llist_add_to(&(archive_handle->passed), file_header->name); + } else { + data_skip(archive_handle); + free(file_header->name); + } + archive_handle->offset += file_header->size; + + free(file_header->link_target); + /* Do not free(file_header->name)! (why?) */ +#if ENABLE_FEATURE_TAR_UNAME_GNAME + free(file_header->uname); + free(file_header->gname); +#endif + return EXIT_SUCCESS; +} diff --git a/archival/libunarchive/get_header_tar_bz2.c b/archival/libunarchive/get_header_tar_bz2.c new file mode 100644 index 0000000..615bbba --- /dev/null +++ b/archival/libunarchive/get_header_tar_bz2.c @@ -0,0 +1,21 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +char FAST_FUNC get_header_tar_bz2(archive_handle_t *archive_handle) +{ + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + open_transformer(archive_handle->src_fd, unpack_bz2_stream_prime, "bunzip2"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} diff --git a/archival/libunarchive/get_header_tar_gz.c b/archival/libunarchive/get_header_tar_gz.c new file mode 100644 index 0000000..e88b720 --- /dev/null +++ b/archival/libunarchive/get_header_tar_gz.c @@ -0,0 +1,36 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +char FAST_FUNC get_header_tar_gz(archive_handle_t *archive_handle) +{ +#if BB_MMU + unsigned char magic[2]; +#endif + + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + /* Check gzip magic only if open_transformer will invoke unpack_gz_stream (MMU case). + * Otherwise, it will invoke an external helper "gunzip -cf" (NOMMU case) which will + * need the header. */ +#if BB_MMU + xread(archive_handle->src_fd, &magic, 2); + /* Can skip this check, but error message will be less clear */ + if ((magic[0] != 0x1f) || (magic[1] != 0x8b)) { + bb_error_msg_and_die("invalid gzip magic"); + } +#endif + + open_transformer(archive_handle->src_fd, unpack_gz_stream, "gunzip"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} diff --git a/archival/libunarchive/get_header_tar_lzma.c b/archival/libunarchive/get_header_tar_lzma.c new file mode 100644 index 0000000..03b1b79 --- /dev/null +++ b/archival/libunarchive/get_header_tar_lzma.c @@ -0,0 +1,24 @@ +/* vi: set sw=4 ts=4: */ +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs + * + * Licensed under GPL v2, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +char FAST_FUNC get_header_tar_lzma(archive_handle_t *archive_handle) +{ + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + open_transformer(archive_handle->src_fd, unpack_lzma_stream, "unlzma"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} diff --git a/archival/libunarchive/header_list.c b/archival/libunarchive/header_list.c new file mode 100644 index 0000000..6ec2df3 --- /dev/null +++ b/archival/libunarchive/header_list.c @@ -0,0 +1,11 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ +#include "libbb.h" +#include "unarchive.h" + +void FAST_FUNC header_list(const file_header_t *file_header) +{ + puts(file_header->name); +} diff --git a/archival/libunarchive/header_skip.c b/archival/libunarchive/header_skip.c new file mode 100644 index 0000000..a97a9ce --- /dev/null +++ b/archival/libunarchive/header_skip.c @@ -0,0 +1,10 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ +#include "libbb.h" +#include "unarchive.h" + +void FAST_FUNC header_skip(const file_header_t *file_header UNUSED_PARAM) +{ +} diff --git a/archival/libunarchive/header_verbose_list.c b/archival/libunarchive/header_verbose_list.c new file mode 100644 index 0000000..f059dd9 --- /dev/null +++ b/archival/libunarchive/header_verbose_list.c @@ -0,0 +1,58 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +void FAST_FUNC header_verbose_list(const file_header_t *file_header) +{ + struct tm *mtime = localtime(&(file_header->mtime)); + +#if ENABLE_FEATURE_TAR_UNAME_GNAME + char uid[8]; + char gid[8]; + char *user = file_header->uname; + char *group = file_header->gname; + + if (user == NULL) { + snprintf(uid, sizeof(uid), "%u", (unsigned)file_header->uid); + user = uid; + } + if (group == NULL) { + snprintf(gid, sizeof(gid), "%u", (unsigned)file_header->gid); + group = gid; + } + printf("%s %s/%s %9u %4u-%02u-%02u %02u:%02u:%02u %s", + bb_mode_string(file_header->mode), + user, + group, + (unsigned int) file_header->size, + 1900 + mtime->tm_year, + 1 + mtime->tm_mon, + mtime->tm_mday, + mtime->tm_hour, + mtime->tm_min, + mtime->tm_sec, + file_header->name); +#else /* !FEATURE_TAR_UNAME_GNAME */ + printf("%s %d/%d %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s", + bb_mode_string(file_header->mode), + file_header->uid, + file_header->gid, + file_header->size, + 1900 + mtime->tm_year, + 1 + mtime->tm_mon, + mtime->tm_mday, + mtime->tm_hour, + mtime->tm_min, + mtime->tm_sec, + file_header->name); +#endif /* FEATURE_TAR_UNAME_GNAME */ + + if (file_header->link_target) { + printf(" -> %s", file_header->link_target); + } + bb_putchar('\n'); +} diff --git a/archival/libunarchive/init_handle.c b/archival/libunarchive/init_handle.c new file mode 100644 index 0000000..ff7d484 --- /dev/null +++ b/archival/libunarchive/init_handle.c @@ -0,0 +1,22 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +archive_handle_t* FAST_FUNC init_handle(void) +{ + archive_handle_t *archive_handle; + + /* Initialize default values */ + archive_handle = xzalloc(sizeof(archive_handle_t)); + archive_handle->file_header = xzalloc(sizeof(file_header_t)); + archive_handle->action_header = header_skip; + archive_handle->action_data = data_skip; + archive_handle->filter = filter_accept_all; + archive_handle->seek = seek_by_jump; + + return archive_handle; +} diff --git a/archival/libunarchive/open_transformer.c b/archival/libunarchive/open_transformer.c new file mode 100644 index 0000000..42fdd96 --- /dev/null +++ b/archival/libunarchive/open_transformer.c @@ -0,0 +1,64 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +/* transformer(), more than meets the eye */ +/* + * On MMU machine, the transform_prog is removed by macro magic + * in include/unarchive.h. On NOMMU, transformer is removed. + */ +void FAST_FUNC open_transformer(int fd, + USE_DESKTOP(long long) int FAST_FUNC (*transformer)(int src_fd, int dst_fd), + const char *transform_prog) +{ + struct fd_pair fd_pipe; + int pid; + + xpiped_pair(fd_pipe); + +#if BB_MMU + pid = fork(); + if (pid == -1) + bb_perror_msg_and_die("vfork" + 1); +#else + pid = vfork(); + if (pid == -1) + bb_perror_msg_and_die("vfork"); +#endif + + if (pid == 0) { + /* child process */ + close(fd_pipe.rd); /* we don't want to read from the parent */ + // FIXME: error check? +#if BB_MMU + transformer(fd, fd_pipe.wr); + if (ENABLE_FEATURE_CLEAN_UP) { + close(fd_pipe.wr); /* send EOF */ + close(fd); + } + /* must be _exit! bug was actually seen here */ + _exit(EXIT_SUCCESS); +#else + { + char *argv[4]; + xmove_fd(fd, 0); + xmove_fd(fd_pipe.wr, 1); + argv[0] = (char*)transform_prog; + argv[1] = (char*)"-cf"; + argv[2] = (char*)"-"; + argv[3] = NULL; + BB_EXECVP(transform_prog, argv); + bb_perror_msg_and_die("can't exec %s", transform_prog); + } +#endif + /* notreached */ + } + + /* parent process */ + close(fd_pipe.wr); /* don't want to write to the child */ + xmove_fd(fd_pipe.rd, fd); +} diff --git a/archival/libunarchive/seek_by_jump.c b/archival/libunarchive/seek_by_jump.c new file mode 100644 index 0000000..0a259c9 --- /dev/null +++ b/archival/libunarchive/seek_by_jump.c @@ -0,0 +1,19 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +void FAST_FUNC seek_by_jump(const archive_handle_t *archive_handle, unsigned amount) +{ + if (amount + && lseek(archive_handle->src_fd, (off_t) amount, SEEK_CUR) == (off_t) -1 + ) { + if (errno == ESPIPE) + seek_by_read(archive_handle, amount); + else + bb_perror_msg_and_die("seek failure"); + } +} diff --git a/archival/libunarchive/seek_by_read.c b/archival/libunarchive/seek_by_read.c new file mode 100644 index 0000000..2326a75 --- /dev/null +++ b/archival/libunarchive/seek_by_read.c @@ -0,0 +1,16 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +/* If we are reading through a pipe, or from stdin then we can't lseek, + * we must read and discard the data to skip over it. + */ +void FAST_FUNC seek_by_read(const archive_handle_t *archive_handle, unsigned jump_size) +{ + if (jump_size) + bb_copyfd_exact_size(archive_handle->src_fd, -1, jump_size); +} diff --git a/archival/libunarchive/unpack_ar_archive.c b/archival/libunarchive/unpack_ar_archive.c new file mode 100644 index 0000000..dc2eec2 --- /dev/null +++ b/archival/libunarchive/unpack_ar_archive.c @@ -0,0 +1,21 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +void FAST_FUNC unpack_ar_archive(archive_handle_t *ar_archive) +{ + char magic[7]; + + xread(ar_archive->src_fd, magic, 7); + if (strncmp(magic, "!", 7) != 0) { + bb_error_msg_and_die("invalid ar magic"); + } + ar_archive->offset += 7; + + while (get_header_ar(ar_archive) == EXIT_SUCCESS) + continue; +} diff --git a/archival/rpm.c b/archival/rpm.c new file mode 100644 index 0000000..4c36067 --- /dev/null +++ b/archival/rpm.c @@ -0,0 +1,407 @@ +/* vi: set sw=4 ts=4: */ +/* + * Mini rpm applet for busybox + * + * Copyright (C) 2001,2002 by Laurence Anderson + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include "libbb.h" +#include "unarchive.h" + +#define RPM_HEADER_MAGIC "\216\255\350" +#define RPM_CHAR_TYPE 1 +#define RPM_INT8_TYPE 2 +#define RPM_INT16_TYPE 3 +#define RPM_INT32_TYPE 4 +/* #define RPM_INT64_TYPE 5 ---- These aren't supported (yet) */ +#define RPM_STRING_TYPE 6 +#define RPM_BIN_TYPE 7 +#define RPM_STRING_ARRAY_TYPE 8 +#define RPM_I18NSTRING_TYPE 9 + +#define TAG_NAME 1000 +#define TAG_VERSION 1001 +#define TAG_RELEASE 1002 +#define TAG_SUMMARY 1004 +#define TAG_DESCRIPTION 1005 +#define TAG_BUILDTIME 1006 +#define TAG_BUILDHOST 1007 +#define TAG_SIZE 1009 +#define TAG_VENDOR 1011 +#define TAG_LICENSE 1014 +#define TAG_PACKAGER 1015 +#define TAG_GROUP 1016 +#define TAG_URL 1020 +#define TAG_PREIN 1023 +#define TAG_POSTIN 1024 +#define TAG_FILEFLAGS 1037 +#define TAG_FILEUSERNAME 1039 +#define TAG_FILEGROUPNAME 1040 +#define TAG_SOURCERPM 1044 +#define TAG_PREINPROG 1085 +#define TAG_POSTINPROG 1086 +#define TAG_PREFIXS 1098 +#define TAG_DIRINDEXES 1116 +#define TAG_BASENAMES 1117 +#define TAG_DIRNAMES 1118 +#define RPMFILE_CONFIG (1 << 0) +#define RPMFILE_DOC (1 << 1) + +enum rpm_functions_e { + rpm_query = 1, + rpm_install = 2, + rpm_query_info = 4, + rpm_query_package = 8, + rpm_query_list = 16, + rpm_query_list_doc = 32, + rpm_query_list_config = 64 +}; + +typedef struct { + uint32_t tag; /* 4 byte tag */ + uint32_t type; /* 4 byte type */ + uint32_t offset; /* 4 byte offset */ + uint32_t count; /* 4 byte count */ +} rpm_index; + +static void *map; +static rpm_index **mytags; +static int tagcount; + +static void extract_cpio_gz(int fd); +static rpm_index **rpm_gettags(int fd, int *num_tags); +static int bsearch_rpmtag(const void *key, const void *item); +static char *rpm_getstr(int tag, int itemindex); +static int rpm_getint(int tag, int itemindex); +static int rpm_getcount(int tag); +static void fileaction_dobackup(char *filename, int fileref); +static void fileaction_setowngrp(char *filename, int fileref); +static void loop_through_files(int filetag, void (*fileaction)(char *filename, int fileref)); + +int rpm_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int rpm_main(int argc, char **argv) +{ + int opt = 0, func = 0, rpm_fd, offset; + const int pagesize = getpagesize(); + + while ((opt = getopt(argc, argv, "iqpldc")) != -1) { + switch (opt) { + case 'i': /* First arg: Install mode, with q: Information */ + if (!func) func = rpm_install; + else func |= rpm_query_info; + break; + case 'q': /* First arg: Query mode */ + if (func) bb_show_usage(); + func = rpm_query; + break; + case 'p': /* Query a package */ + func |= rpm_query_package; + break; + case 'l': /* List files in a package */ + func |= rpm_query_list; + break; + case 'd': /* List doc files in a package (implies list) */ + func |= rpm_query_list; + func |= rpm_query_list_doc; + break; + case 'c': /* List config files in a package (implies list) */ + func |= rpm_query_list; + func |= rpm_query_list_config; + break; + default: + bb_show_usage(); + } + } + argv += optind; + //argc -= optind; + if (!argv[0]) bb_show_usage(); + + while (*argv) { + rpm_fd = xopen(*argv++, O_RDONLY); + mytags = rpm_gettags(rpm_fd, &tagcount); + if (!mytags) + bb_error_msg_and_die("error reading rpm header"); + offset = xlseek(rpm_fd, 0, SEEK_CUR); + /* Mimimum is one page */ + map = mmap(0, offset > pagesize ? (offset + offset % pagesize) : pagesize, PROT_READ, MAP_PRIVATE, rpm_fd, 0); + + if (func & rpm_install) { + /* Backup any config files */ + loop_through_files(TAG_BASENAMES, fileaction_dobackup); + /* Extact the archive */ + extract_cpio_gz(rpm_fd); + /* Set the correct file uid/gid's */ + loop_through_files(TAG_BASENAMES, fileaction_setowngrp); + } + else if ((func & (rpm_query|rpm_query_package)) == (rpm_query|rpm_query_package)) { + if (!(func & (rpm_query_info|rpm_query_list))) { + /* If just a straight query, just give package name */ + printf("%s-%s-%s\n", rpm_getstr(TAG_NAME, 0), rpm_getstr(TAG_VERSION, 0), rpm_getstr(TAG_RELEASE, 0)); + } + if (func & rpm_query_info) { + /* Do the nice printout */ + time_t bdate_time; + struct tm *bdate; + char bdatestring[50]; + printf("Name : %-29sRelocations: %s\n", rpm_getstr(TAG_NAME, 0), rpm_getstr(TAG_PREFIXS, 0) ? rpm_getstr(TAG_PREFIXS, 0) : "(not relocateable)"); + printf("Version : %-34sVendor: %s\n", rpm_getstr(TAG_VERSION, 0), rpm_getstr(TAG_VENDOR, 0) ? rpm_getstr(TAG_VENDOR, 0) : "(none)"); + bdate_time = rpm_getint(TAG_BUILDTIME, 0); + bdate = localtime((time_t *) &bdate_time); + strftime(bdatestring, 50, "%a %d %b %Y %T %Z", bdate); + printf("Release : %-30sBuild Date: %s\n", rpm_getstr(TAG_RELEASE, 0), bdatestring); + printf("Install date: %-30sBuild Host: %s\n", "(not installed)", rpm_getstr(TAG_BUILDHOST, 0)); + printf("Group : %-30sSource RPM: %s\n", rpm_getstr(TAG_GROUP, 0), rpm_getstr(TAG_SOURCERPM, 0)); + printf("Size : %-33dLicense: %s\n", rpm_getint(TAG_SIZE, 0), rpm_getstr(TAG_LICENSE, 0)); + printf("URL : %s\n", rpm_getstr(TAG_URL, 0)); + printf("Summary : %s\n", rpm_getstr(TAG_SUMMARY, 0)); + printf("Description :\n%s\n", rpm_getstr(TAG_DESCRIPTION, 0)); + } + if (func & rpm_query_list) { + int count, it, flags; + count = rpm_getcount(TAG_BASENAMES); + for (it = 0; it < count; it++) { + flags = rpm_getint(TAG_FILEFLAGS, it); + switch (func & (rpm_query_list_doc|rpm_query_list_config)) { + case rpm_query_list_doc: + if (!(flags & RPMFILE_DOC)) continue; + break; + case rpm_query_list_config: + if (!(flags & RPMFILE_CONFIG)) continue; + break; + case rpm_query_list_doc|rpm_query_list_config: + if (!(flags & (RPMFILE_CONFIG|RPMFILE_DOC))) continue; + break; + } + printf("%s%s\n", + rpm_getstr(TAG_DIRNAMES, rpm_getint(TAG_DIRINDEXES, it)), + rpm_getstr(TAG_BASENAMES, it)); + } + } + } + free(mytags); + } + return 0; +} + +static void extract_cpio_gz(int fd) +{ + archive_handle_t *archive_handle; + unsigned char magic[2]; +#if BB_MMU + USE_DESKTOP(long long) int FAST_FUNC (*xformer)(int src_fd, int dst_fd); + enum { xformer_prog = 0 }; +#else + enum { xformer = 0 }; + const char *xformer_prog; +#endif + + /* Initialize */ + archive_handle = init_handle(); + archive_handle->seek = seek_by_read; + //archive_handle->action_header = header_list; + archive_handle->action_data = data_extract_all; + archive_handle->ah_flags = ARCHIVE_PRESERVE_DATE | ARCHIVE_CREATE_LEADING_DIRS + /* compat: overwrite existing files. + * try "rpm -i foo.src.rpm" few times in a row - + * standard rpm will not complain. + * (TODO? real rpm creates "file;1234" and then renames it) */ + | ARCHIVE_EXTRACT_UNCONDITIONAL; + archive_handle->src_fd = fd; + /*archive_handle->offset = 0; - init_handle() did it */ + +// TODO: open_zipped does the same + + xread(archive_handle->src_fd, &magic, 2); +#if BB_MMU + xformer = unpack_gz_stream; +#else + xformer_prog = "gunzip"; +#endif + if (magic[0] != 0x1f || magic[1] != 0x8b) { + if (!ENABLE_FEATURE_SEAMLESS_BZ2 + || magic[0] != 'B' || magic[1] != 'Z' + ) { + bb_error_msg_and_die("no gzip" + USE_FEATURE_SEAMLESS_BZ2("/bzip2") + " magic"); + } +#if BB_MMU + xformer = unpack_bz2_stream; +#else + xformer_prog = "bunzip2"; +#endif + } else { +#if !BB_MMU + /* NOMMU version of open_transformer execs an external unzipper that should + * have the file position at the start of the file */ + xlseek(archive_handle->src_fd, 0, SEEK_SET); +#endif + } + + xchdir("/"); /* Install RPM's to root */ + open_transformer(archive_handle->src_fd, xformer, xformer_prog); + archive_handle->offset = 0; + while (get_header_cpio(archive_handle) == EXIT_SUCCESS) + continue; +} + + +static rpm_index **rpm_gettags(int fd, int *num_tags) +{ + /* We should never need mode than 200, and realloc later */ + rpm_index **tags = xzalloc(200 * sizeof(tags[0])); + int pass, tagindex = 0; + + xlseek(fd, 96, SEEK_CUR); /* Seek past the unused lead */ + + /* 1st pass is the signature headers, 2nd is the main stuff */ + for (pass = 0; pass < 2; pass++) { + struct { + char magic[3]; /* 3 byte magic: 0x8e 0xad 0xe8 */ + uint8_t version; /* 1 byte version number */ + uint32_t reserved; /* 4 bytes reserved */ + uint32_t entries; /* Number of entries in header (4 bytes) */ + uint32_t size; /* Size of store (4 bytes) */ + } header; + struct BUG_header { + char BUG_header[sizeof(header) == 16 ? 1 : -1]; + }; + rpm_index *tmpindex; + int storepos; + + xread(fd, &header, sizeof(header)); + if (strncmp((char *) &header.magic, RPM_HEADER_MAGIC, 3) != 0) + return NULL; /* Invalid magic */ + if (header.version != 1) + return NULL; /* This program only supports v1 headers */ + header.size = ntohl(header.size); + header.entries = ntohl(header.entries); + storepos = xlseek(fd,0,SEEK_CUR) + header.entries * 16; + + while (header.entries--) { + tmpindex = tags[tagindex++] = xmalloc(sizeof(*tmpindex)); + xread(fd, tmpindex, sizeof(*tmpindex)); + tmpindex->tag = ntohl(tmpindex->tag); + tmpindex->type = ntohl(tmpindex->type); + tmpindex->count = ntohl(tmpindex->count); + tmpindex->offset = storepos + ntohl(tmpindex->offset); + if (pass == 0) + tmpindex->tag -= 743; + } + xlseek(fd, header.size, SEEK_CUR); /* Seek past store */ + /* Skip padding to 8 byte boundary after reading signature headers */ + if (pass == 0) + xlseek(fd, (8 - (xlseek(fd,0,SEEK_CUR) % 8)) % 8, SEEK_CUR); + } + tags = xrealloc(tags, tagindex * sizeof(tags[0])); /* realloc tags to save space */ + *num_tags = tagindex; + return tags; /* All done, leave the file at the start of the gzipped cpio archive */ +} + +static int bsearch_rpmtag(const void *key, const void *item) +{ + int *tag = (int *)key; + rpm_index **tmp = (rpm_index **) item; + return (*tag - tmp[0]->tag); +} + +static int rpm_getcount(int tag) +{ + rpm_index **found; + found = bsearch(&tag, mytags, tagcount, sizeof(struct rpmtag *), bsearch_rpmtag); + if (!found) + return 0; + return found[0]->count; +} + +static char *rpm_getstr(int tag, int itemindex) +{ + rpm_index **found; + found = bsearch(&tag, mytags, tagcount, sizeof(struct rpmtag *), bsearch_rpmtag); + if (!found || itemindex >= found[0]->count) + return NULL; + if (found[0]->type == RPM_STRING_TYPE || found[0]->type == RPM_I18NSTRING_TYPE || found[0]->type == RPM_STRING_ARRAY_TYPE) { + int n; + char *tmpstr = (char *) (map + found[0]->offset); + for (n=0; n < itemindex; n++) + tmpstr = tmpstr + strlen(tmpstr) + 1; + return tmpstr; + } + return NULL; +} + +static int rpm_getint(int tag, int itemindex) +{ + rpm_index **found; + int *tmpint; /* NB: using int8_t* would be easier to code */ + + /* gcc throws warnings here when sizeof(void*)!=sizeof(int) ... + * it's ok to ignore it because tag won't be used as a pointer */ + found = bsearch(&tag, mytags, tagcount, sizeof(struct rpmtag *), bsearch_rpmtag); + if (!found || itemindex >= found[0]->count) + return -1; + + tmpint = (int *) (map + found[0]->offset); + + if (found[0]->type == RPM_INT32_TYPE) { + tmpint = (int *) ((char *) tmpint + itemindex*4); + /*return ntohl(*tmpint);*/ + /* int can be != int32_t */ + return ntohl(*(int32_t*)tmpint); + } + if (found[0]->type == RPM_INT16_TYPE) { + tmpint = (int *) ((char *) tmpint + itemindex*2); + /* ??? read int, and THEN ntohs() it?? */ + /*return ntohs(*tmpint);*/ + return ntohs(*(int16_t*)tmpint); + } + if (found[0]->type == RPM_INT8_TYPE) { + tmpint = (int *) ((char *) tmpint + itemindex); + /* ??? why we don't read byte here??? */ + /*return ntohs(*tmpint);*/ + return *(int8_t*)tmpint; + } + return -1; +} + +static void fileaction_dobackup(char *filename, int fileref) +{ + struct stat oldfile; + int stat_res; + char *newname; + if (rpm_getint(TAG_FILEFLAGS, fileref) & RPMFILE_CONFIG) { + /* Only need to backup config files */ + stat_res = lstat(filename, &oldfile); + if (stat_res == 0 && S_ISREG(oldfile.st_mode)) { + /* File already exists - really should check MD5's etc to see if different */ + newname = xasprintf("%s.rpmorig", filename); + copy_file(filename, newname, FILEUTILS_RECUR | FILEUTILS_PRESERVE_STATUS); + remove_file(filename, FILEUTILS_RECUR | FILEUTILS_FORCE); + free(newname); + } + } +} + +static void fileaction_setowngrp(char *filename, int fileref) +{ + /* real rpm warns: "user foo does not exist - using " */ + struct passwd *pw = getpwnam(rpm_getstr(TAG_FILEUSERNAME, fileref)); + int uid = pw ? pw->pw_uid : getuid(); /* or euid? */ + struct group *gr = getgrnam(rpm_getstr(TAG_FILEGROUPNAME, fileref)); + int gid = gr ? gr->gr_gid : getgid(); + chown(filename, uid, gid); +} + +static void loop_through_files(int filetag, void (*fileaction)(char *filename, int fileref)) +{ + int count = 0; + while (rpm_getstr(filetag, count)) { + char* filename = xasprintf("%s%s", + rpm_getstr(TAG_DIRNAMES, rpm_getint(TAG_DIRINDEXES, count)), + rpm_getstr(TAG_BASENAMES, count)); + fileaction(filename, count++); + free(filename); + } +} diff --git a/archival/rpm2cpio.c b/archival/rpm2cpio.c new file mode 100644 index 0000000..ee93871 --- /dev/null +++ b/archival/rpm2cpio.c @@ -0,0 +1,89 @@ +/* vi: set sw=4 ts=4: */ +/* + * Mini rpm2cpio implementation for busybox + * + * Copyright (C) 2001 by Laurence Anderson + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ +#include "libbb.h" +#include "unarchive.h" + +#define RPM_MAGIC "\355\253\356\333" +#define RPM_HEADER_MAGIC "\216\255\350" + +struct rpm_lead { + unsigned char magic[4]; + uint8_t major, minor; + uint16_t type; + uint16_t archnum; + char name[66]; + uint16_t osnum; + uint16_t signature_type; + char reserved[16]; +}; + +struct rpm_header { + char magic[3]; /* 3 byte magic: 0x8e 0xad 0xe8 */ + uint8_t version; /* 1 byte version number */ + uint32_t reserved; /* 4 bytes reserved */ + uint32_t entries; /* Number of entries in header (4 bytes) */ + uint32_t size; /* Size of store (4 bytes) */ +}; + +static void skip_header(int rpm_fd) +{ + struct rpm_header header; + + xread(rpm_fd, &header, sizeof(struct rpm_header)); + if (strncmp((char *) &header.magic, RPM_HEADER_MAGIC, 3) != 0) { + bb_error_msg_and_die("invalid RPM header magic"); /* Invalid magic */ + } + if (header.version != 1) { + bb_error_msg_and_die("unsupported RPM header version"); /* This program only supports v1 headers */ + } + header.entries = ntohl(header.entries); + header.size = ntohl(header.size); + lseek (rpm_fd, 16 * header.entries, SEEK_CUR); /* Seek past index entries */ + lseek (rpm_fd, header.size, SEEK_CUR); /* Seek past store */ +} + +/* No getopt required */ +int rpm2cpio_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int rpm2cpio_main(int argc, char **argv) +{ + struct rpm_lead lead; + int rpm_fd; + unsigned char magic[2]; + + if (argc == 1) { + rpm_fd = STDIN_FILENO; + } else { + rpm_fd = xopen(argv[1], O_RDONLY); + } + + xread(rpm_fd, &lead, sizeof(struct rpm_lead)); + if (strncmp((char *) &lead.magic, RPM_MAGIC, 4) != 0) { + bb_error_msg_and_die("invalid RPM magic"); /* Just check the magic, the rest is irrelevant */ + } + + /* Skip the signature header */ + skip_header(rpm_fd); + lseek(rpm_fd, (8 - (lseek(rpm_fd, 0, SEEK_CUR) % 8)) % 8, SEEK_CUR); + + /* Skip the main header */ + skip_header(rpm_fd); + + xread(rpm_fd, &magic, 2); + if ((magic[0] != 0x1f) || (magic[1] != 0x8b)) { + bb_error_msg_and_die("invalid gzip magic"); + } + + if (unpack_gz_stream(rpm_fd, STDOUT_FILENO) < 0) { + bb_error_msg("error inflating"); + } + + close(rpm_fd); + + return 0; +} diff --git a/archival/tar.c b/archival/tar.c new file mode 100644 index 0000000..47cc39c --- /dev/null +++ b/archival/tar.c @@ -0,0 +1,988 @@ +/* vi: set sw=4 ts=4: */ +/* + * Mini tar implementation for busybox + * + * Modified to use common extraction code used by ar, cpio, dpkg-deb, dpkg + * by Glenn McGrath + * + * Note, that as of BusyBox-0.43, tar has been completely rewritten from the + * ground up. It still has remnants of the old code lying about, but it is + * very different now (i.e., cleaner, less global variables, etc.) + * + * Copyright (C) 1999-2004 by Erik Andersen + * + * Based in part in the tar implementation in sash + * Copyright (c) 1999 by David I. Bell + * Permission is granted to use, distribute, or modify this source, + * provided that this copyright notice remains intact. + * Permission to distribute sash derived code under the GPL has been granted. + * + * Based in part on the tar implementation from busybox-0.28 + * Copyright (C) 1995 Bruce Perens + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + */ + +#include +#include "libbb.h" +#include "unarchive.h" + +/* FIXME: Stop using this non-standard feature */ +#ifndef FNM_LEADING_DIR +#define FNM_LEADING_DIR 0 +#endif + + +#define block_buf bb_common_bufsiz1 + + +#if !ENABLE_FEATURE_SEAMLESS_GZ && !ENABLE_FEATURE_SEAMLESS_BZ2 +/* Do not pass gzip flag to writeTarFile() */ +#define writeTarFile(tar_fd, verboseFlag, dereferenceFlag, include, exclude, gzip) \ + writeTarFile(tar_fd, verboseFlag, dereferenceFlag, include, exclude) +#endif + + +#if ENABLE_FEATURE_TAR_CREATE + +/* Tar file constants */ + +#define TAR_BLOCK_SIZE 512 + +/* POSIX tar Header Block, from POSIX 1003.1-1990 */ +#define NAME_SIZE 100 +#define NAME_SIZE_STR "100" +typedef struct TarHeader TarHeader; +struct TarHeader { /* byte offset */ + char name[NAME_SIZE]; /* 0-99 */ + char mode[8]; /* 100-107 */ + char uid[8]; /* 108-115 */ + char gid[8]; /* 116-123 */ + char size[12]; /* 124-135 */ + char mtime[12]; /* 136-147 */ + char chksum[8]; /* 148-155 */ + char typeflag; /* 156-156 */ + char linkname[NAME_SIZE]; /* 157-256 */ + /* POSIX: "ustar" NUL "00" */ + /* GNU tar: "ustar " NUL */ + /* Normally it's defined as magic[6] followed by + * version[2], but we put them together to save code. + */ + char magic[8]; /* 257-264 */ + char uname[32]; /* 265-296 */ + char gname[32]; /* 297-328 */ + char devmajor[8]; /* 329-336 */ + char devminor[8]; /* 337-344 */ + char prefix[155]; /* 345-499 */ + char padding[12]; /* 500-512 (pad to exactly TAR_BLOCK_SIZE) */ +}; + +/* +** writeTarFile(), writeFileToTarball(), and writeTarHeader() are +** the only functions that deal with the HardLinkInfo structure. +** Even these functions use the xxxHardLinkInfo() functions. +*/ +typedef struct HardLinkInfo HardLinkInfo; +struct HardLinkInfo { + HardLinkInfo *next; /* Next entry in list */ + dev_t dev; /* Device number */ + ino_t ino; /* Inode number */ + short linkCount; /* (Hard) Link Count */ + char name[1]; /* Start of filename (must be last) */ +}; + +/* Some info to be carried along when creating a new tarball */ +typedef struct TarBallInfo TarBallInfo; +struct TarBallInfo { + int tarFd; /* Open-for-write file descriptor + * for the tarball */ + struct stat statBuf; /* Stat info for the tarball, letting + * us know the inode and device that the + * tarball lives, so we can avoid trying + * to include the tarball into itself */ + int verboseFlag; /* Whether to print extra stuff or not */ + const llist_t *excludeList; /* List of files to not include */ + HardLinkInfo *hlInfoHead; /* Hard Link Tracking Information */ + HardLinkInfo *hlInfo; /* Hard Link Info for the current file */ +}; + +/* A nice enum with all the possible tar file content types */ +enum TarFileType { + REGTYPE = '0', /* regular file */ + REGTYPE0 = '\0', /* regular file (ancient bug compat) */ + LNKTYPE = '1', /* hard link */ + SYMTYPE = '2', /* symbolic link */ + CHRTYPE = '3', /* character special */ + BLKTYPE = '4', /* block special */ + DIRTYPE = '5', /* directory */ + FIFOTYPE = '6', /* FIFO special */ + CONTTYPE = '7', /* reserved */ + GNULONGLINK = 'K', /* GNU long (>100 chars) link name */ + GNULONGNAME = 'L', /* GNU long (>100 chars) file name */ +}; +typedef enum TarFileType TarFileType; + +/* Might be faster (and bigger) if the dev/ino were stored in numeric order;) */ +static void addHardLinkInfo(HardLinkInfo **hlInfoHeadPtr, + struct stat *statbuf, + const char *fileName) +{ + /* Note: hlInfoHeadPtr can never be NULL! */ + HardLinkInfo *hlInfo; + + hlInfo = xmalloc(sizeof(HardLinkInfo) + strlen(fileName)); + hlInfo->next = *hlInfoHeadPtr; + *hlInfoHeadPtr = hlInfo; + hlInfo->dev = statbuf->st_dev; + hlInfo->ino = statbuf->st_ino; + hlInfo->linkCount = statbuf->st_nlink; + strcpy(hlInfo->name, fileName); +} + +static void freeHardLinkInfo(HardLinkInfo **hlInfoHeadPtr) +{ + HardLinkInfo *hlInfo; + HardLinkInfo *hlInfoNext; + + if (hlInfoHeadPtr) { + hlInfo = *hlInfoHeadPtr; + while (hlInfo) { + hlInfoNext = hlInfo->next; + free(hlInfo); + hlInfo = hlInfoNext; + } + *hlInfoHeadPtr = NULL; + } +} + +/* Might be faster (and bigger) if the dev/ino were stored in numeric order;) */ +static HardLinkInfo *findHardLinkInfo(HardLinkInfo *hlInfo, struct stat *statbuf) +{ + while (hlInfo) { + if ((statbuf->st_ino == hlInfo->ino) && (statbuf->st_dev == hlInfo->dev)) + break; + hlInfo = hlInfo->next; + } + return hlInfo; +} + +/* Put an octal string into the specified buffer. + * The number is zero padded and possibly null terminated. + * Stores low-order bits only if whole value does not fit. */ +static void putOctal(char *cp, int len, off_t value) +{ + char tempBuffer[sizeof(off_t)*3+1]; + char *tempString = tempBuffer; + int width; + + width = sprintf(tempBuffer, "%0*"OFF_FMT"o", len, value); + tempString += (width - len); + + /* If string has leading zeroes, we can drop one */ + /* and field will have trailing '\0' */ + /* (increases chances of compat with other tars) */ + if (tempString[0] == '0') + tempString++; + + /* Copy the string to the field */ + memcpy(cp, tempString, len); +} +#define PUT_OCTAL(a, b) putOctal((a), sizeof(a), (b)) + +static void chksum_and_xwrite(int fd, struct TarHeader* hp) +{ + /* POSIX says that checksum is done on unsigned bytes + * (Sun and HP-UX gets it wrong... more details in + * GNU tar source) */ + const unsigned char *cp; + int chksum, size; + + strcpy(hp->magic, "ustar "); + + /* Calculate and store the checksum (i.e., the sum of all of the bytes of + * the header). The checksum field must be filled with blanks for the + * calculation. The checksum field is formatted differently from the + * other fields: it has 6 digits, a null, then a space -- rather than + * digits, followed by a null like the other fields... */ + memset(hp->chksum, ' ', sizeof(hp->chksum)); + cp = (const unsigned char *) hp; + chksum = 0; + size = sizeof(*hp); + do { chksum += *cp++; } while (--size); + putOctal(hp->chksum, sizeof(hp->chksum)-1, chksum); + + /* Now write the header out to disk */ + xwrite(fd, hp, sizeof(*hp)); +} + +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS +static void writeLongname(int fd, int type, const char *name, int dir) +{ + static const struct { + char mode[8]; /* 100-107 */ + char uid[8]; /* 108-115 */ + char gid[8]; /* 116-123 */ + char size[12]; /* 124-135 */ + char mtime[12]; /* 136-147 */ + } prefilled = { + "0000000", + "0000000", + "0000000", + "00000000000", + "00000000000", + }; + struct TarHeader header; + int size; + + dir = !!dir; /* normalize: 0/1 */ + size = strlen(name) + 1 + dir; /* GNU tar uses strlen+1 */ + /* + dir: account for possible '/' */ + + memset(&header, 0, sizeof(header)); + strcpy(header.name, "././@LongLink"); + memcpy(header.mode, prefilled.mode, sizeof(prefilled)); + PUT_OCTAL(header.size, size); + header.typeflag = type; + chksum_and_xwrite(fd, &header); + + /* Write filename[/] and pad the block. */ + /* dir=0: writes 'name', pads */ + /* dir=1: writes 'name', writes '/', pads */ + dir *= 2; + xwrite(fd, name, size - dir); + xwrite(fd, "/", dir); + size = (-size) & (TAR_BLOCK_SIZE-1); + memset(&header, 0, size); + xwrite(fd, &header, size); +} +#endif + +/* Write out a tar header for the specified file/directory/whatever */ +void BUG_tar_header_size(void); +static int writeTarHeader(struct TarBallInfo *tbInfo, + const char *header_name, const char *fileName, struct stat *statbuf) +{ + struct TarHeader header; + + if (sizeof(header) != 512) + BUG_tar_header_size(); + + memset(&header, 0, sizeof(struct TarHeader)); + + strncpy(header.name, header_name, sizeof(header.name)); + + /* POSIX says to mask mode with 07777. */ + PUT_OCTAL(header.mode, statbuf->st_mode & 07777); + PUT_OCTAL(header.uid, statbuf->st_uid); + PUT_OCTAL(header.gid, statbuf->st_gid); + memset(header.size, '0', sizeof(header.size)-1); /* Regular file size is handled later */ + PUT_OCTAL(header.mtime, statbuf->st_mtime); + + /* Enter the user and group names */ + safe_strncpy(header.uname, get_cached_username(statbuf->st_uid), sizeof(header.uname)); + safe_strncpy(header.gname, get_cached_groupname(statbuf->st_gid), sizeof(header.gname)); + + if (tbInfo->hlInfo) { + /* This is a hard link */ + header.typeflag = LNKTYPE; + strncpy(header.linkname, tbInfo->hlInfo->name, + sizeof(header.linkname)); +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS + /* Write out long linkname if needed */ + if (header.linkname[sizeof(header.linkname)-1]) + writeLongname(tbInfo->tarFd, GNULONGLINK, + tbInfo->hlInfo->name, 0); +#endif + } else if (S_ISLNK(statbuf->st_mode)) { + char *lpath = xmalloc_readlink_or_warn(fileName); + if (!lpath) + return FALSE; + header.typeflag = SYMTYPE; + strncpy(header.linkname, lpath, sizeof(header.linkname)); +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS + /* Write out long linkname if needed */ + if (header.linkname[sizeof(header.linkname)-1]) + writeLongname(tbInfo->tarFd, GNULONGLINK, lpath, 0); +#else + /* If it is larger than 100 bytes, bail out */ + if (header.linkname[sizeof(header.linkname)-1]) { + free(lpath); + bb_error_msg("names longer than "NAME_SIZE_STR" chars not supported"); + return FALSE; + } +#endif + free(lpath); + } else if (S_ISDIR(statbuf->st_mode)) { + header.typeflag = DIRTYPE; + /* Append '/' only if there is a space for it */ + if (!header.name[sizeof(header.name)-1]) + header.name[strlen(header.name)] = '/'; + } else if (S_ISCHR(statbuf->st_mode)) { + header.typeflag = CHRTYPE; + PUT_OCTAL(header.devmajor, major(statbuf->st_rdev)); + PUT_OCTAL(header.devminor, minor(statbuf->st_rdev)); + } else if (S_ISBLK(statbuf->st_mode)) { + header.typeflag = BLKTYPE; + PUT_OCTAL(header.devmajor, major(statbuf->st_rdev)); + PUT_OCTAL(header.devminor, minor(statbuf->st_rdev)); + } else if (S_ISFIFO(statbuf->st_mode)) { + header.typeflag = FIFOTYPE; + } else if (S_ISREG(statbuf->st_mode)) { + if (sizeof(statbuf->st_size) > 4 + && statbuf->st_size > (off_t)0777777777777LL + ) { + bb_error_msg_and_die("cannot store file '%s' " + "of size %"OFF_FMT"d, aborting", + fileName, statbuf->st_size); + } + header.typeflag = REGTYPE; + PUT_OCTAL(header.size, statbuf->st_size); + } else { + bb_error_msg("%s: unknown file type", fileName); + return FALSE; + } + +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS + /* Write out long name if needed */ + /* (we, like GNU tar, output long linkname *before* long name) */ + if (header.name[sizeof(header.name)-1]) + writeLongname(tbInfo->tarFd, GNULONGNAME, + header_name, S_ISDIR(statbuf->st_mode)); +#endif + + /* Now write the header out to disk */ + chksum_and_xwrite(tbInfo->tarFd, &header); + + /* Now do the verbose thing (or not) */ + if (tbInfo->verboseFlag) { + FILE *vbFd = stdout; + + if (tbInfo->tarFd == STDOUT_FILENO) /* If the archive goes to stdout, verbose to stderr */ + vbFd = stderr; + /* GNU "tar cvvf" prints "extended" listing a-la "ls -l" */ + /* We don't have such excesses here: for us "v" == "vv" */ + /* '/' is probably a GNUism */ + fprintf(vbFd, "%s%s\n", header_name, + S_ISDIR(statbuf->st_mode) ? "/" : ""); + } + + return TRUE; +} + +#if ENABLE_FEATURE_TAR_FROM +static int exclude_file(const llist_t *excluded_files, const char *file) +{ + while (excluded_files) { + if (excluded_files->data[0] == '/') { + if (fnmatch(excluded_files->data, file, + FNM_PATHNAME | FNM_LEADING_DIR) == 0) + return 1; + } else { + const char *p; + + for (p = file; p[0] != '\0'; p++) { + if ((p == file || p[-1] == '/') && p[0] != '/' && + fnmatch(excluded_files->data, p, + FNM_PATHNAME | FNM_LEADING_DIR) == 0) + return 1; + } + } + excluded_files = excluded_files->link; + } + + return 0; +} +#else +#define exclude_file(excluded_files, file) 0 +#endif + +static int FAST_FUNC writeFileToTarball(const char *fileName, struct stat *statbuf, + void *userData, int depth UNUSED_PARAM) +{ + struct TarBallInfo *tbInfo = (struct TarBallInfo *) userData; + const char *header_name; + int inputFileFd = -1; + + /* Strip leading '/' (must be before memorizing hardlink's name) */ + header_name = fileName; + while (header_name[0] == '/') { + static smallint warned; + + if (!warned) { + bb_error_msg("removing leading '/' from member names"); + warned = 1; + } + header_name++; + } + + if (header_name[0] == '\0') + return TRUE; + + /* It is against the rules to archive a socket */ + if (S_ISSOCK(statbuf->st_mode)) { + bb_error_msg("%s: socket ignored", fileName); + return TRUE; + } + + /* + * Check to see if we are dealing with a hard link. + * If so - + * Treat the first occurance of a given dev/inode as a file while + * treating any additional occurances as hard links. This is done + * by adding the file information to the HardLinkInfo linked list. + */ + tbInfo->hlInfo = NULL; + if (statbuf->st_nlink > 1) { + tbInfo->hlInfo = findHardLinkInfo(tbInfo->hlInfoHead, statbuf); + if (tbInfo->hlInfo == NULL) + addHardLinkInfo(&tbInfo->hlInfoHead, statbuf, header_name); + } + + /* It is a bad idea to store the archive we are in the process of creating, + * so check the device and inode to be sure that this particular file isn't + * the new tarball */ + if (tbInfo->statBuf.st_dev == statbuf->st_dev + && tbInfo->statBuf.st_ino == statbuf->st_ino + ) { + bb_error_msg("%s: file is the archive; skipping", fileName); + return TRUE; + } + + if (exclude_file(tbInfo->excludeList, header_name)) + return SKIP; + +#if !ENABLE_FEATURE_TAR_GNU_EXTENSIONS + if (strlen(header_name) >= NAME_SIZE) { + bb_error_msg("names longer than "NAME_SIZE_STR" chars not supported"); + return TRUE; + } +#endif + + /* Is this a regular file? */ + if (tbInfo->hlInfo == NULL && S_ISREG(statbuf->st_mode)) { + /* open the file we want to archive, and make sure all is well */ + inputFileFd = open_or_warn(fileName, O_RDONLY); + if (inputFileFd < 0) { + return FALSE; + } + } + + /* Add an entry to the tarball */ + if (writeTarHeader(tbInfo, header_name, fileName, statbuf) == FALSE) { + return FALSE; + } + + /* If it was a regular file, write out the body */ + if (inputFileFd >= 0) { + size_t readSize; + /* Write the file to the archive. */ + /* We record size into header first, */ + /* and then write out file. If file shrinks in between, */ + /* tar will be corrupted. So we don't allow for that. */ + /* NB: GNU tar 1.16 warns and pads with zeroes */ + /* or even seeks back and updates header */ + bb_copyfd_exact_size(inputFileFd, tbInfo->tarFd, statbuf->st_size); + ////off_t readSize; + ////readSize = bb_copyfd_size(inputFileFd, tbInfo->tarFd, statbuf->st_size); + ////if (readSize != statbuf->st_size && readSize >= 0) { + //// bb_error_msg_and_die("short read from %s, aborting", fileName); + ////} + + /* Check that file did not grow in between? */ + /* if (safe_read(inputFileFd, 1) == 1) warn but continue? */ + + close(inputFileFd); + + /* Pad the file up to the tar block size */ + /* (a few tricks here in the name of code size) */ + readSize = (-(int)statbuf->st_size) & (TAR_BLOCK_SIZE-1); + memset(block_buf, 0, readSize); + xwrite(tbInfo->tarFd, block_buf, readSize); + } + + return TRUE; +} + +#if ENABLE_FEATURE_SEAMLESS_GZ || ENABLE_FEATURE_SEAMLESS_BZ2 +#if !(ENABLE_FEATURE_SEAMLESS_GZ && ENABLE_FEATURE_SEAMLESS_BZ2) +#define vfork_compressor(tar_fd, gzip) vfork_compressor(tar_fd) +#endif +/* Don't inline: vfork scares gcc and pessimizes code */ +static void NOINLINE vfork_compressor(int tar_fd, int gzip) +{ + pid_t gzipPid; +#if ENABLE_FEATURE_SEAMLESS_GZ && ENABLE_FEATURE_SEAMLESS_BZ2 + const char *zip_exec = (gzip == 1) ? "gzip" : "bzip2"; +#elif ENABLE_FEATURE_SEAMLESS_GZ + const char *zip_exec = "gzip"; +#else /* only ENABLE_FEATURE_SEAMLESS_BZ2 */ + const char *zip_exec = "bzip2"; +#endif + // On Linux, vfork never unpauses parent early, although standard + // allows for that. Do we want to waste bytes checking for it? +#define WAIT_FOR_CHILD 0 + volatile int vfork_exec_errno = 0; + struct fd_pair gzipDataPipe; +#if WAIT_FOR_CHILD + struct fd_pair gzipStatusPipe; + xpiped_pair(gzipStatusPipe); +#endif + xpiped_pair(gzipDataPipe); + + signal(SIGPIPE, SIG_IGN); /* we only want EPIPE on errors */ + +#if defined(__GNUC__) && __GNUC__ + /* Avoid vfork clobbering */ + (void) &zip_exec; +#endif + + gzipPid = vfork(); + if (gzipPid < 0) + bb_perror_msg_and_die("vfork"); + + if (gzipPid == 0) { + /* child */ + /* NB: close _first_, then move fds! */ + close(gzipDataPipe.wr); +#if WAIT_FOR_CHILD + close(gzipStatusPipe.rd); + /* gzipStatusPipe.wr will close only on exec - + * parent waits for this close to happen */ + fcntl(gzipStatusPipe.wr, F_SETFD, FD_CLOEXEC); +#endif + xmove_fd(gzipDataPipe.rd, 0); + xmove_fd(tar_fd, 1); + /* exec gzip/bzip2 program/applet */ + BB_EXECLP(zip_exec, zip_exec, "-f", NULL); + vfork_exec_errno = errno; + _exit(EXIT_FAILURE); + } + + /* parent */ + xmove_fd(gzipDataPipe.wr, tar_fd); + close(gzipDataPipe.rd); +#if WAIT_FOR_CHILD + close(gzipStatusPipe.wr); + while (1) { + char buf; + int n; + + /* Wait until child execs (or fails to) */ + n = full_read(gzipStatusPipe.rd, &buf, 1); + if (n < 0 /* && errno == EAGAIN */) + continue; /* try it again */ + } + close(gzipStatusPipe.rd); +#endif + if (vfork_exec_errno) { + errno = vfork_exec_errno; + bb_perror_msg_and_die("cannot exec %s", zip_exec); + } +} +#endif /* ENABLE_FEATURE_SEAMLESS_GZ || ENABLE_FEATURE_SEAMLESS_BZ2 */ + + +/* gcc 4.2.1 inlines it, making code bigger */ +static NOINLINE int writeTarFile(int tar_fd, int verboseFlag, + int dereferenceFlag, const llist_t *include, + const llist_t *exclude, int gzip) +{ + int errorFlag = FALSE; + struct TarBallInfo tbInfo; + + tbInfo.hlInfoHead = NULL; + tbInfo.tarFd = tar_fd; + tbInfo.verboseFlag = verboseFlag; + + /* Store the stat info for the tarball's file, so + * can avoid including the tarball into itself.... */ + if (fstat(tbInfo.tarFd, &tbInfo.statBuf) < 0) + bb_perror_msg_and_die("cannot stat tar file"); + +#if ENABLE_FEATURE_SEAMLESS_GZ || ENABLE_FEATURE_SEAMLESS_BZ2 + if (gzip) + vfork_compressor(tbInfo.tarFd, gzip); +#endif + + tbInfo.excludeList = exclude; + + /* Read the directory/files and iterate over them one at a time */ + while (include) { + if (!recursive_action(include->data, ACTION_RECURSE | + (dereferenceFlag ? ACTION_FOLLOWLINKS : 0), + writeFileToTarball, writeFileToTarball, &tbInfo, 0)) + { + errorFlag = TRUE; + } + include = include->link; + } + /* Write two empty blocks to the end of the archive */ + memset(block_buf, 0, 2*TAR_BLOCK_SIZE); + xwrite(tbInfo.tarFd, block_buf, 2*TAR_BLOCK_SIZE); + + /* To be pedantically correct, we would check if the tarball + * is smaller than 20 tar blocks, and pad it if it was smaller, + * but that isn't necessary for GNU tar interoperability, and + * so is considered a waste of space */ + + /* Close so the child process (if any) will exit */ + close(tbInfo.tarFd); + + /* Hang up the tools, close up shop, head home */ + if (ENABLE_FEATURE_CLEAN_UP) + freeHardLinkInfo(&tbInfo.hlInfoHead); + + if (errorFlag) + bb_error_msg("error exit delayed from previous errors"); + +#if ENABLE_FEATURE_SEAMLESS_GZ || ENABLE_FEATURE_SEAMLESS_BZ2 + if (gzip) { + int status; + if (safe_waitpid(-1, &status, 0) == -1) + bb_perror_msg("waitpid"); + else if (!WIFEXITED(status) || WEXITSTATUS(status)) + /* gzip was killed or has exited with nonzero! */ + errorFlag = TRUE; + } +#endif + return errorFlag; +} +#else +int writeTarFile(int tar_fd, int verboseFlag, + int dereferenceFlag, const llist_t *include, + const llist_t *exclude, int gzip); +#endif /* FEATURE_TAR_CREATE */ + +#if ENABLE_FEATURE_TAR_FROM +static llist_t *append_file_list_to_list(llist_t *list) +{ + FILE *src_stream; + char *line; + llist_t *newlist = NULL; + + while (list) { + src_stream = xfopen_for_read(llist_pop(&list)); + while ((line = xmalloc_fgetline(src_stream)) != NULL) { + /* kill trailing '/' unless the string is just "/" */ + char *cp = last_char_is(line, '/'); + if (cp > line) + *cp = '\0'; + llist_add_to(&newlist, line); + } + fclose(src_stream); + } + return newlist; +} +#else +#define append_file_list_to_list(x) 0 +#endif + +#if ENABLE_FEATURE_SEAMLESS_Z +static char FAST_FUNC get_header_tar_Z(archive_handle_t *archive_handle) +{ + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + /* do the decompression, and cleanup */ + if (xread_char(archive_handle->src_fd) != 0x1f + || xread_char(archive_handle->src_fd) != 0x9d + ) { + bb_error_msg_and_die("invalid magic"); + } + + open_transformer(archive_handle->src_fd, unpack_Z_stream, "uncompress"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} +#else +#define get_header_tar_Z NULL +#endif + +#ifdef CHECK_FOR_CHILD_EXITCODE +/* Looks like it isn't needed - tar detects malformed (truncated) + * archive if e.g. bunzip2 fails */ +static int child_error; + +static void handle_SIGCHLD(int status) +{ + /* Actually, 'status' is a signo. We reuse it for other needs */ + + /* Wait for any child without blocking */ + if (wait_any_nohang(&status) < 0) + /* wait failed?! I'm confused... */ + return; + + if (WIFEXITED(status) && WEXITSTATUS(status)==0) + /* child exited with 0 */ + return; + /* Cannot happen? + if (!WIFSIGNALED(status) && !WIFEXITED(status)) return; */ + child_error = 1; +} +#endif + +enum { + OPTBIT_KEEP_OLD = 7, + USE_FEATURE_TAR_CREATE( OPTBIT_CREATE ,) + USE_FEATURE_TAR_CREATE( OPTBIT_DEREFERENCE ,) + USE_FEATURE_SEAMLESS_BZ2( OPTBIT_BZIP2 ,) + USE_FEATURE_SEAMLESS_LZMA(OPTBIT_LZMA ,) + USE_FEATURE_TAR_FROM( OPTBIT_INCLUDE_FROM,) + USE_FEATURE_TAR_FROM( OPTBIT_EXCLUDE_FROM,) + USE_FEATURE_SEAMLESS_GZ( OPTBIT_GZIP ,) + USE_FEATURE_SEAMLESS_Z( OPTBIT_COMPRESS ,) + OPTBIT_NOPRESERVE_OWN, + OPTBIT_NOPRESERVE_PERM, + OPT_TEST = 1 << 0, // t + OPT_EXTRACT = 1 << 1, // x + OPT_BASEDIR = 1 << 2, // C + OPT_TARNAME = 1 << 3, // f + OPT_2STDOUT = 1 << 4, // O + OPT_P = 1 << 5, // p + OPT_VERBOSE = 1 << 6, // v + OPT_KEEP_OLD = 1 << 7, // k + OPT_CREATE = USE_FEATURE_TAR_CREATE( (1 << OPTBIT_CREATE )) + 0, // c + OPT_DEREFERENCE = USE_FEATURE_TAR_CREATE( (1 << OPTBIT_DEREFERENCE )) + 0, // h + OPT_BZIP2 = USE_FEATURE_SEAMLESS_BZ2( (1 << OPTBIT_BZIP2 )) + 0, // j + OPT_LZMA = USE_FEATURE_SEAMLESS_LZMA((1 << OPTBIT_LZMA )) + 0, // a + OPT_INCLUDE_FROM = USE_FEATURE_TAR_FROM( (1 << OPTBIT_INCLUDE_FROM)) + 0, // T + OPT_EXCLUDE_FROM = USE_FEATURE_TAR_FROM( (1 << OPTBIT_EXCLUDE_FROM)) + 0, // X + OPT_GZIP = USE_FEATURE_SEAMLESS_GZ( (1 << OPTBIT_GZIP )) + 0, // z + OPT_COMPRESS = USE_FEATURE_SEAMLESS_Z( (1 << OPTBIT_COMPRESS )) + 0, // Z + OPT_NOPRESERVE_OWN = 1 << OPTBIT_NOPRESERVE_OWN , // no-same-owner + OPT_NOPRESERVE_PERM = 1 << OPTBIT_NOPRESERVE_PERM, // no-same-permissions +}; +#if ENABLE_FEATURE_TAR_LONG_OPTIONS +static const char tar_longopts[] ALIGN1 = + "list\0" No_argument "t" + "extract\0" No_argument "x" + "directory\0" Required_argument "C" + "file\0" Required_argument "f" + "to-stdout\0" No_argument "O" + "same-permissions\0" No_argument "p" + "verbose\0" No_argument "v" + "keep-old\0" No_argument "k" +# if ENABLE_FEATURE_TAR_CREATE + "create\0" No_argument "c" + "dereference\0" No_argument "h" +# endif +# if ENABLE_FEATURE_SEAMLESS_BZ2 + "bzip2\0" No_argument "j" +# endif +# if ENABLE_FEATURE_SEAMLESS_LZMA + "lzma\0" No_argument "a" +# endif +# if ENABLE_FEATURE_TAR_FROM + "files-from\0" Required_argument "T" + "exclude-from\0" Required_argument "X" +# endif +# if ENABLE_FEATURE_SEAMLESS_GZ + "gzip\0" No_argument "z" +# endif +# if ENABLE_FEATURE_SEAMLESS_Z + "compress\0" No_argument "Z" +# endif + "no-same-owner\0" No_argument "\xfd" + "no-same-permissions\0" No_argument "\xfe" + /* --exclude takes next bit position in option mask, */ + /* therefore we have to either put it _after_ --no-same-perm */ + /* or add OPT[BIT]_EXCLUDE before OPT[BIT]_NOPRESERVE_OWN */ +# if ENABLE_FEATURE_TAR_FROM + "exclude\0" Required_argument "\xff" +# endif + ; +#endif + +int tar_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int tar_main(int argc UNUSED_PARAM, char **argv) +{ + char FAST_FUNC (*get_header_ptr)(archive_handle_t *) = get_header_tar; + archive_handle_t *tar_handle; + char *base_dir = NULL; + const char *tar_filename = "-"; + unsigned opt; + int verboseFlag = 0; +#if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM + llist_t *excludes = NULL; +#endif + + /* Initialise default values */ + tar_handle = init_handle(); + tar_handle->ah_flags = ARCHIVE_CREATE_LEADING_DIRS + | ARCHIVE_PRESERVE_DATE + | ARCHIVE_EXTRACT_UNCONDITIONAL; + + /* Apparently only root's tar preserves perms (see bug 3844) */ + if (getuid() != 0) + tar_handle->ah_flags |= ARCHIVE_NOPRESERVE_PERM; + + /* Prepend '-' to the first argument if required */ + opt_complementary = "--:" // first arg is options + "tt:vv:" // count -t,-v + "?:" // bail out with usage instead of error return + "X::T::" // cumulative lists +#if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM + "\xff::" // cumulative lists for --exclude +#endif + USE_FEATURE_TAR_CREATE("c:") "t:x:" // at least one of these is reqd + USE_FEATURE_TAR_CREATE("c--tx:t--cx:x--ct") // mutually exclusive + SKIP_FEATURE_TAR_CREATE("t--x:x--t"); // mutually exclusive +#if ENABLE_FEATURE_TAR_LONG_OPTIONS + applet_long_options = tar_longopts; +#endif + opt = getopt32(argv, + "txC:f:Opvk" + USE_FEATURE_TAR_CREATE( "ch" ) + USE_FEATURE_SEAMLESS_BZ2( "j" ) + USE_FEATURE_SEAMLESS_LZMA("a" ) + USE_FEATURE_TAR_FROM( "T:X:") + USE_FEATURE_SEAMLESS_GZ( "z" ) + USE_FEATURE_SEAMLESS_Z( "Z" ) + , &base_dir // -C dir + , &tar_filename // -f filename + USE_FEATURE_TAR_FROM(, &(tar_handle->accept)) // T + USE_FEATURE_TAR_FROM(, &(tar_handle->reject)) // X +#if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM + , &excludes // --exclude +#endif + , &verboseFlag // combined count for -t and -v + , &verboseFlag // combined count for -t and -v + ); + argv += optind; + + if (verboseFlag) tar_handle->action_header = header_verbose_list; + if (verboseFlag == 1) tar_handle->action_header = header_list; + + if (opt & OPT_EXTRACT) + tar_handle->action_data = data_extract_all; + + if (opt & OPT_2STDOUT) + tar_handle->action_data = data_extract_to_stdout; + + if (opt & OPT_KEEP_OLD) + tar_handle->ah_flags &= ~ARCHIVE_EXTRACT_UNCONDITIONAL; + + if (opt & OPT_NOPRESERVE_OWN) + tar_handle->ah_flags |= ARCHIVE_NOPRESERVE_OWN; + + if (opt & OPT_NOPRESERVE_PERM) + tar_handle->ah_flags |= ARCHIVE_NOPRESERVE_PERM; + + if (opt & OPT_GZIP) + get_header_ptr = get_header_tar_gz; + + if (opt & OPT_BZIP2) + get_header_ptr = get_header_tar_bz2; + + if (opt & OPT_LZMA) + get_header_ptr = get_header_tar_lzma; + + if (opt & OPT_COMPRESS) + get_header_ptr = get_header_tar_Z; + +#if ENABLE_FEATURE_TAR_FROM + tar_handle->reject = append_file_list_to_list(tar_handle->reject); +#if ENABLE_FEATURE_TAR_LONG_OPTIONS + /* Append excludes to reject */ + while (excludes) { + llist_t *next = excludes->link; + excludes->link = tar_handle->reject; + tar_handle->reject = excludes; + excludes = next; + } +#endif + tar_handle->accept = append_file_list_to_list(tar_handle->accept); +#endif + + /* Setup an array of filenames to work with */ + /* TODO: This is the same as in ar, separate function ? */ + while (*argv) { + /* kill trailing '/' unless the string is just "/" */ + char *cp = last_char_is(*argv, '/'); + if (cp > *argv) + *cp = '\0'; + llist_add_to_end(&tar_handle->accept, *argv); + argv++; + } + + if (tar_handle->accept || tar_handle->reject) + tar_handle->filter = filter_accept_reject_list; + + /* Open the tar file */ + { + FILE *tar_stream; + int flags; + + if (opt & OPT_CREATE) { + /* Make sure there is at least one file to tar up. */ + if (tar_handle->accept == NULL) + bb_error_msg_and_die("empty archive"); + + tar_stream = stdout; + /* Mimicking GNU tar 1.15.1: */ + flags = O_WRONLY | O_CREAT | O_TRUNC; + } else { + tar_stream = stdin; + flags = O_RDONLY; + } + + if (LONE_DASH(tar_filename)) { + tar_handle->src_fd = fileno(tar_stream); + tar_handle->seek = seek_by_read; + } else { + if (ENABLE_FEATURE_TAR_AUTODETECT && flags == O_RDONLY) { + get_header_ptr = get_header_tar; + tar_handle->src_fd = open_zipped(tar_filename); + if (tar_handle->src_fd < 0) + bb_perror_msg_and_die("can't open '%s'", tar_filename); + } else { + tar_handle->src_fd = xopen(tar_filename, flags); + } + } + } + + if (base_dir) + xchdir(base_dir); + +#ifdef CHECK_FOR_CHILD_EXITCODE + /* We need to know whether child (gzip/bzip/etc) exits abnormally */ + signal(SIGCHLD, handle_SIGCHLD); +#endif + + /* create an archive */ + if (opt & OPT_CREATE) { +#if ENABLE_FEATURE_SEAMLESS_GZ || ENABLE_FEATURE_SEAMLESS_BZ2 + int zipMode = 0; + if (ENABLE_FEATURE_SEAMLESS_GZ && (opt & OPT_GZIP)) + zipMode = 1; + if (ENABLE_FEATURE_SEAMLESS_BZ2 && (opt & OPT_BZIP2)) + zipMode = 2; +#endif + /* NB: writeTarFile() closes tar_handle->src_fd */ + return writeTarFile(tar_handle->src_fd, verboseFlag, opt & OPT_DEREFERENCE, + tar_handle->accept, + tar_handle->reject, zipMode); + } + + while (get_header_ptr(tar_handle) == EXIT_SUCCESS) + continue; + + /* Check that every file that should have been extracted was */ + while (tar_handle->accept) { + if (!find_list_entry(tar_handle->reject, tar_handle->accept->data) + && !find_list_entry(tar_handle->passed, tar_handle->accept->data) + ) { + bb_error_msg_and_die("%s: not found in archive", + tar_handle->accept->data); + } + tar_handle->accept = tar_handle->accept->link; + } + if (ENABLE_FEATURE_CLEAN_UP /* && tar_handle->src_fd != STDIN_FILENO */) + close(tar_handle->src_fd); + + return EXIT_SUCCESS; +} diff --git a/archival/unzip.c b/archival/unzip.c new file mode 100644 index 0000000..7b47a8a --- /dev/null +++ b/archival/unzip.c @@ -0,0 +1,565 @@ +/* vi: set sw=4 ts=4: */ +/* + * Mini unzip implementation for busybox + * + * Copyright (C) 2004 by Ed Clark + * + * Loosely based on original busybox unzip applet by Laurence Anderson. + * All options and features should work in this version. + * + * Licensed under the GPL v2 or later, see the file LICENSE in this tarball. + */ + +/* For reference see + * http://www.pkware.com/company/standards/appnote/ + * http://www.info-zip.org/pub/infozip/doc/appnote-iz-latest.zip + */ + +/* TODO + * Zip64 + other methods + */ + +#include "libbb.h" +#include "unarchive.h" + +enum { +#if BB_BIG_ENDIAN + ZIP_FILEHEADER_MAGIC = 0x504b0304, + ZIP_CDS_MAGIC = 0x504b0102, + ZIP_CDE_MAGIC = 0x504b0506, + ZIP_DD_MAGIC = 0x504b0708, +#else + ZIP_FILEHEADER_MAGIC = 0x04034b50, + ZIP_CDS_MAGIC = 0x02014b50, + ZIP_CDE_MAGIC = 0x06054b50, + ZIP_DD_MAGIC = 0x08074b50, +#endif +}; + +#define ZIP_HEADER_LEN 26 + +typedef union { + uint8_t raw[ZIP_HEADER_LEN]; + struct { + uint16_t version; /* 0-1 */ + uint16_t flags; /* 2-3 */ + uint16_t method; /* 4-5 */ + uint16_t modtime; /* 6-7 */ + uint16_t moddate; /* 8-9 */ + uint32_t crc32 PACKED; /* 10-13 */ + uint32_t cmpsize PACKED; /* 14-17 */ + uint32_t ucmpsize PACKED; /* 18-21 */ + uint16_t filename_len; /* 22-23 */ + uint16_t extra_len; /* 24-25 */ + } formatted PACKED; +} zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */ + +/* Check the offset of the last element, not the length. This leniency + * allows for poor packing, whereby the overall struct may be too long, + * even though the elements are all in the right place. + */ +struct BUG_zip_header_must_be_26_bytes { + char BUG_zip_header_must_be_26_bytes[ + offsetof(zip_header_t, formatted.extra_len) + 2 + == ZIP_HEADER_LEN ? 1 : -1]; +}; + +#define FIX_ENDIANNESS_ZIP(zip_header) do { \ + (zip_header).formatted.version = SWAP_LE16((zip_header).formatted.version ); \ + (zip_header).formatted.flags = SWAP_LE16((zip_header).formatted.flags ); \ + (zip_header).formatted.method = SWAP_LE16((zip_header).formatted.method ); \ + (zip_header).formatted.modtime = SWAP_LE16((zip_header).formatted.modtime ); \ + (zip_header).formatted.moddate = SWAP_LE16((zip_header).formatted.moddate ); \ + (zip_header).formatted.crc32 = SWAP_LE32((zip_header).formatted.crc32 ); \ + (zip_header).formatted.cmpsize = SWAP_LE32((zip_header).formatted.cmpsize ); \ + (zip_header).formatted.ucmpsize = SWAP_LE32((zip_header).formatted.ucmpsize ); \ + (zip_header).formatted.filename_len = SWAP_LE16((zip_header).formatted.filename_len); \ + (zip_header).formatted.extra_len = SWAP_LE16((zip_header).formatted.extra_len ); \ +} while (0) + +#define CDS_HEADER_LEN 42 + +typedef union { + uint8_t raw[CDS_HEADER_LEN]; + struct { + /* uint32_t signature; 50 4b 01 02 */ + uint16_t version_made_by; /* 0-1 */ + uint16_t version_needed; /* 2-3 */ + uint16_t cds_flags; /* 4-5 */ + uint16_t method; /* 6-7 */ + uint16_t mtime; /* 8-9 */ + uint16_t mdate; /* 10-11 */ + uint32_t crc32; /* 12-15 */ + uint32_t cmpsize; /* 16-19 */ + uint32_t ucmpsize; /* 20-23 */ + uint16_t file_name_length; /* 24-25 */ + uint16_t extra_field_length; /* 26-27 */ + uint16_t file_comment_length; /* 28-29 */ + uint16_t disk_number_start; /* 30-31 */ + uint16_t internal_file_attributes; /* 32-33 */ + uint32_t external_file_attributes PACKED; /* 34-37 */ + uint32_t relative_offset_of_local_header PACKED; /* 38-41 */ + } formatted PACKED; +} cds_header_t; + +struct BUG_cds_header_must_be_42_bytes { + char BUG_cds_header_must_be_42_bytes[ + offsetof(cds_header_t, formatted.relative_offset_of_local_header) + 4 + == CDS_HEADER_LEN ? 1 : -1]; +}; + +#define FIX_ENDIANNESS_CDS(cds_header) do { \ + (cds_header).formatted.crc32 = SWAP_LE32((cds_header).formatted.crc32 ); \ + (cds_header).formatted.cmpsize = SWAP_LE32((cds_header).formatted.cmpsize ); \ + (cds_header).formatted.ucmpsize = SWAP_LE32((cds_header).formatted.ucmpsize ); \ + (cds_header).formatted.file_name_length = SWAP_LE16((cds_header).formatted.file_name_length); \ + (cds_header).formatted.extra_field_length = SWAP_LE16((cds_header).formatted.extra_field_length); \ + (cds_header).formatted.file_comment_length = SWAP_LE16((cds_header).formatted.file_comment_length); \ +} while (0) + +#define CDE_HEADER_LEN 16 + +typedef union { + uint8_t raw[CDE_HEADER_LEN]; + struct { + /* uint32_t signature; 50 4b 05 06 */ + uint16_t this_disk_no; + uint16_t disk_with_cds_no; + uint16_t cds_entries_on_this_disk; + uint16_t cds_entries_total; + uint32_t cds_size; + uint32_t cds_offset; + /* uint16_t file_comment_length; */ + /* .ZIP file comment (variable size) */ + } formatted PACKED; +} cde_header_t; + +struct BUG_cde_header_must_be_16_bytes { + char BUG_cde_header_must_be_16_bytes[ + sizeof(cde_header_t) == CDE_HEADER_LEN ? 1 : -1]; +}; + +#define FIX_ENDIANNESS_CDE(cde_header) do { \ + (cde_header).formatted.cds_offset = SWAP_LE32((cde_header).formatted.cds_offset); \ +} while (0) + +enum { zip_fd = 3 }; + + +#if ENABLE_DESKTOP +/* NB: does not preserve file position! */ +static uint32_t find_cds_offset(void) +{ + unsigned char buf[1024]; + cde_header_t cde_header; + unsigned char *p; + off_t end; + + end = xlseek(zip_fd, 0, SEEK_END); + if (end < 1024) + end = 1024; + end -= 1024; + xlseek(zip_fd, end, SEEK_SET); + full_read(zip_fd, buf, 1024); + + p = buf; + while (p <= buf + 1024 - CDE_HEADER_LEN - 4) { + if (*p != 'P') { + p++; + continue; + } + if (*++p != 'K') + continue; + if (*++p != 5) + continue; + if (*++p != 6) + continue; + /* we found CDE! */ + memcpy(cde_header.raw, p + 1, CDE_HEADER_LEN); + FIX_ENDIANNESS_CDE(cde_header); + return cde_header.formatted.cds_offset; + } + bb_error_msg_and_die("can't find file table"); +}; + +static uint32_t read_next_cds(int count_m1, uint32_t cds_offset, cds_header_t *cds_ptr) +{ + off_t org; + + org = xlseek(zip_fd, 0, SEEK_CUR); + + if (!cds_offset) + cds_offset = find_cds_offset(); + + while (count_m1-- >= 0) { + xlseek(zip_fd, cds_offset + 4, SEEK_SET); + xread(zip_fd, cds_ptr->raw, CDS_HEADER_LEN); + FIX_ENDIANNESS_CDS(*cds_ptr); + cds_offset += 4 + CDS_HEADER_LEN + + cds_ptr->formatted.file_name_length + + cds_ptr->formatted.extra_field_length + + cds_ptr->formatted.file_comment_length; + } + + xlseek(zip_fd, org, SEEK_SET); + return cds_offset; +}; +#endif + +static void unzip_skip(off_t skip) +{ + bb_copyfd_exact_size(zip_fd, -1, skip); +} + +static void unzip_create_leading_dirs(const char *fn) +{ + /* Create all leading directories */ + char *name = xstrdup(fn); + if (bb_make_directory(dirname(name), 0777, FILEUTILS_RECUR)) { + bb_error_msg_and_die("exiting"); /* bb_make_directory is noisy */ + } + free(name); +} + +static void unzip_extract(zip_header_t *zip_header, int dst_fd) +{ + if (zip_header->formatted.method == 0) { + /* Method 0 - stored (not compressed) */ + off_t size = zip_header->formatted.ucmpsize; + if (size) + bb_copyfd_exact_size(zip_fd, dst_fd, size); + } else { + /* Method 8 - inflate */ + inflate_unzip_result res; + if (inflate_unzip(&res, zip_header->formatted.cmpsize, zip_fd, dst_fd) < 0) + bb_error_msg_and_die("inflate error"); + /* Validate decompression - crc */ + if (zip_header->formatted.crc32 != (res.crc ^ 0xffffffffL)) { + bb_error_msg_and_die("crc error"); + } + /* Validate decompression - size */ + if (zip_header->formatted.ucmpsize != res.bytes_out) { + /* Don't die. Who knows, maybe len calculation + * was botched somewhere. After all, crc matched! */ + bb_error_msg("bad length"); + } + } +} + +int unzip_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; +int unzip_main(int argc, char **argv) +{ + enum { O_PROMPT, O_NEVER, O_ALWAYS }; + + zip_header_t zip_header; + smallint verbose = 1; + smallint listing = 0; + smallint overwrite = O_PROMPT; +#if ENABLE_DESKTOP + uint32_t cds_offset; + unsigned cds_entries; +#endif + unsigned total_size; + unsigned total_entries; + int dst_fd = -1; + char *src_fn = NULL; + char *dst_fn = NULL; + llist_t *zaccept = NULL; + llist_t *zreject = NULL; + char *base_dir = NULL; + int i, opt; + int opt_range = 0; + char key_buf[80]; + struct stat stat_buf; + + /* '-' makes getopt return 1 for non-options */ + while ((opt = getopt(argc, argv, "-d:lnopqx")) != -1) { + switch (opt_range) { + case 0: /* Options */ + switch (opt) { + case 'l': /* List */ + listing = 1; + break; + + case 'n': /* Never overwrite existing files */ + overwrite = O_NEVER; + break; + + case 'o': /* Always overwrite existing files */ + overwrite = O_ALWAYS; + break; + + case 'p': /* Extract files to stdout and fall through to set verbosity */ + dst_fd = STDOUT_FILENO; + + case 'q': /* Be quiet */ + verbose = 0; + break; + + case 1: /* The zip file */ + /* +5: space for ".zip" and NUL */ + src_fn = xmalloc(strlen(optarg) + 5); + strcpy(src_fn, optarg); + opt_range++; + break; + + default: + bb_show_usage(); + + } + break; + + case 1: /* Include files */ + if (opt == 1) { + llist_add_to(&zaccept, optarg); + break; + } + if (opt == 'd') { + base_dir = optarg; + opt_range += 2; + break; + } + if (opt == 'x') { + opt_range++; + break; + } + bb_show_usage(); + + case 2 : /* Exclude files */ + if (opt == 1) { + llist_add_to(&zreject, optarg); + break; + } + if (opt == 'd') { /* Extract to base directory */ + base_dir = optarg; + opt_range++; + break; + } + /* fall through */ + + default: + bb_show_usage(); + } + } + + if (src_fn == NULL) { + bb_show_usage(); + } + + /* Open input file */ + if (LONE_DASH(src_fn)) { + xdup2(STDIN_FILENO, zip_fd); + /* Cannot use prompt mode since zip data is arriving on STDIN */ + if (overwrite == O_PROMPT) + overwrite = O_NEVER; + } else { + static const char extn[][5] = {"", ".zip", ".ZIP"}; + int orig_src_fn_len = strlen(src_fn); + int src_fd = -1; + + for (i = 0; (i < 3) && (src_fd == -1); i++) { + strcpy(src_fn + orig_src_fn_len, extn[i]); + src_fd = open(src_fn, O_RDONLY); + } + if (src_fd == -1) { + src_fn[orig_src_fn_len] = '\0'; + bb_error_msg_and_die("can't open %s, %s.zip, %s.ZIP", src_fn, src_fn, src_fn); + } + xmove_fd(src_fd, zip_fd); + } + + /* Change dir if necessary */ + if (base_dir) + xchdir(base_dir); + + if (verbose) { + printf("Archive: %s\n", src_fn); + if (listing){ + puts(" Length Date Time Name\n" + " -------- ---- ---- ----"); + } + } + + total_size = 0; + total_entries = 0; +#if ENABLE_DESKTOP + cds_entries = 0; + cds_offset = 0; +#endif + while (1) { + uint32_t magic; + + /* Check magic number */ + xread(zip_fd, &magic, 4); + /* Central directory? It's at the end, so exit */ + if (magic == ZIP_CDS_MAGIC) + break; +#if ENABLE_DESKTOP + /* Data descriptor? It was a streaming file, go on */ + if (magic == ZIP_DD_MAGIC) { + /* skip over duplicate crc32, cmpsize and ucmpsize */ + unzip_skip(3 * 4); + continue; + } +#endif + if (magic != ZIP_FILEHEADER_MAGIC) + bb_error_msg_and_die("invalid zip magic %08X", (int)magic); + + /* Read the file header */ + xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN); + FIX_ENDIANNESS_ZIP(zip_header); + if ((zip_header.formatted.method != 0) && (zip_header.formatted.method != 8)) { + bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method); + } +#if !ENABLE_DESKTOP + if (zip_header.formatted.flags & 0x0009) { + bb_error_msg_and_die("zip flags 1 and 8 are not supported"); + } +#else + if (zip_header.formatted.flags & 0x0001) { + /* 0x0001 - encrypted */ + bb_error_msg_and_die("zip flag 1 (encryption) is not supported"); + } + if (zip_header.formatted.flags & 0x0008) { + cds_header_t cds_header; + /* 0x0008 - streaming. [u]cmpsize can be reliably gotten + * only from Central Directory. See unzip_doc.txt */ + cds_offset = read_next_cds(total_entries - cds_entries, cds_offset, &cds_header); + cds_entries = total_entries + 1; + zip_header.formatted.crc32 = cds_header.formatted.crc32; + zip_header.formatted.cmpsize = cds_header.formatted.cmpsize; + zip_header.formatted.ucmpsize = cds_header.formatted.ucmpsize; + } +#endif + + /* Read filename */ + free(dst_fn); + dst_fn = xzalloc(zip_header.formatted.filename_len + 1); + xread(zip_fd, dst_fn, zip_header.formatted.filename_len); + + /* Skip extra header bytes */ + unzip_skip(zip_header.formatted.extra_len); + + /* Filter zip entries */ + if (find_list_entry(zreject, dst_fn) + || (zaccept && !find_list_entry(zaccept, dst_fn)) + ) { /* Skip entry */ + i = 'n'; + + } else { /* Extract entry */ + if (listing) { /* List entry */ + if (verbose) { + unsigned dostime = zip_header.formatted.modtime | (zip_header.formatted.moddate << 16); + printf("%9u %02u-%02u-%02u %02u:%02u %s\n", + zip_header.formatted.ucmpsize, + (dostime & 0x01e00000) >> 21, + (dostime & 0x001f0000) >> 16, + (((dostime & 0xfe000000) >> 25) + 1980) % 100, + (dostime & 0x0000f800) >> 11, + (dostime & 0x000007e0) >> 5, + dst_fn); + total_size += zip_header.formatted.ucmpsize; + } else { + /* short listing -- filenames only */ + puts(dst_fn); + } + i = 'n'; + } else if (dst_fd == STDOUT_FILENO) { /* Extracting to STDOUT */ + i = -1; + } else if (last_char_is(dst_fn, '/')) { /* Extract directory */ + if (stat(dst_fn, &stat_buf) == -1) { + if (errno != ENOENT) { + bb_perror_msg_and_die("can't stat '%s'", dst_fn); + } + if (verbose) { + printf(" creating: %s\n", dst_fn); + } + unzip_create_leading_dirs(dst_fn); + if (bb_make_directory(dst_fn, 0777, 0)) { + bb_error_msg_and_die("exiting"); + } + } else { + if (!S_ISDIR(stat_buf.st_mode)) { + bb_error_msg_and_die("'%s' exists but is not directory", dst_fn); + } + } + i = 'n'; + + } else { /* Extract file */ + check_file: + if (stat(dst_fn, &stat_buf) == -1) { /* File does not exist */ + if (errno != ENOENT) { + bb_perror_msg_and_die("can't stat '%s'", dst_fn); + } + i = 'y'; + } else { /* File already exists */ + if (overwrite == O_NEVER) { + i = 'n'; + } else if (S_ISREG(stat_buf.st_mode)) { /* File is regular file */ + if (overwrite == O_ALWAYS) { + i = 'y'; + } else { + printf("replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ", dst_fn); + if (!fgets(key_buf, sizeof(key_buf), stdin)) { + bb_perror_msg_and_die("can't read input"); + } + i = key_buf[0]; + } + } else { /* File is not regular file */ + bb_error_msg_and_die("'%s' exists but is not regular file", dst_fn); + } + } + } + } + + switch (i) { + case 'A': + overwrite = O_ALWAYS; + case 'y': /* Open file and fall into unzip */ + unzip_create_leading_dirs(dst_fn); + dst_fd = xopen(dst_fn, O_WRONLY | O_CREAT | O_TRUNC); + case -1: /* Unzip */ + if (verbose) { + printf(" inflating: %s\n", dst_fn); + } + unzip_extract(&zip_header, dst_fd); + if (dst_fd != STDOUT_FILENO) { + /* closing STDOUT is potentially bad for future business */ + close(dst_fd); + } + break; + + case 'N': + overwrite = O_NEVER; + case 'n': + /* Skip entry data */ + unzip_skip(zip_header.formatted.cmpsize); + break; + + case 'r': + /* Prompt for new name */ + printf("new name: "); + if (!fgets(key_buf, sizeof(key_buf), stdin)) { + bb_perror_msg_and_die("can't read input"); + } + free(dst_fn); + dst_fn = xstrdup(key_buf); + chomp(dst_fn); + goto check_file; + + default: + printf("error: invalid response [%c]\n",(char)i); + goto check_file; + } + + total_entries++; + } + + if (listing && verbose) { + printf(" -------- -------\n" + "%9d %d files\n", + total_size, total_entries); + } + + return 0; +} diff --git a/archival/unzip_doc.txt.bz2 b/archival/unzip_doc.txt.bz2 new file mode 100644 index 0000000..ab77d10 Binary files /dev/null and b/archival/unzip_doc.txt.bz2 differ -- cgit v1.2.3