Import xz 5.2.2 (as of svn r86089)
This commit is contained in:
31
doc/examples/00_README.txt
Normal file
31
doc/examples/00_README.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
|
||||
liblzma example programs
|
||||
========================
|
||||
|
||||
Introduction
|
||||
|
||||
The examples are written so that the same comments aren't
|
||||
repeated (much) in later files.
|
||||
|
||||
On POSIX systems, the examples should build by just typing "make".
|
||||
|
||||
The examples that use stdin or stdout don't set stdin and stdout
|
||||
to binary mode. On systems where it matters (e.g. Windows) it is
|
||||
possible that the examples won't work without modification.
|
||||
|
||||
|
||||
List of examples
|
||||
|
||||
01_compress_easy.c Multi-call compression using
|
||||
a compression preset
|
||||
|
||||
02_decompress.c Multi-call decompression
|
||||
|
||||
03_compress_custom.c Like 01_compress_easy.c but using
|
||||
a custom filter chain
|
||||
(x86 BCJ + LZMA2)
|
||||
|
||||
04_compress_easy_mt.c Multi-threaded multi-call
|
||||
compression using a compression
|
||||
preset
|
||||
|
||||
297
doc/examples/01_compress_easy.c
Normal file
297
doc/examples/01_compress_easy.c
Normal file
@@ -0,0 +1,297 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file 01_compress_easy.c
|
||||
/// \brief Compress from stdin to stdout in multi-call mode
|
||||
///
|
||||
/// Usage: ./01_compress_easy PRESET < INFILE > OUTFILE
|
||||
///
|
||||
/// Example: ./01_compress_easy 6 < foo > foo.xz
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
// This file has been put into the public domain.
|
||||
// You can do whatever you want with this file.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <lzma.h>
|
||||
|
||||
|
||||
static void
|
||||
show_usage_and_exit(const char *argv0)
|
||||
{
|
||||
fprintf(stderr, "Usage: %s PRESET < INFILE > OUTFILE\n"
|
||||
"PRESET is a number 0-9 and can optionally be "
|
||||
"followed by `e' to indicate extreme preset\n",
|
||||
argv0);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
|
||||
static uint32_t
|
||||
get_preset(int argc, char **argv)
|
||||
{
|
||||
// One argument whose first char must be 0-9.
|
||||
if (argc != 2 || argv[1][0] < '0' || argv[1][0] > '9')
|
||||
show_usage_and_exit(argv[0]);
|
||||
|
||||
// Calculate the preste level 0-9.
|
||||
uint32_t preset = argv[1][0] - '0';
|
||||
|
||||
// If there is a second char, it must be 'e'. It will set
|
||||
// the LZMA_PRESET_EXTREME flag.
|
||||
if (argv[1][1] != '\0') {
|
||||
if (argv[1][1] != 'e' || argv[1][2] != '\0')
|
||||
show_usage_and_exit(argv[0]);
|
||||
|
||||
preset |= LZMA_PRESET_EXTREME;
|
||||
}
|
||||
|
||||
return preset;
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
init_encoder(lzma_stream *strm, uint32_t preset)
|
||||
{
|
||||
// Initialize the encoder using a preset. Set the integrity to check
|
||||
// to CRC64, which is the default in the xz command line tool. If
|
||||
// the .xz file needs to be decompressed with XZ Embedded, use
|
||||
// LZMA_CHECK_CRC32 instead.
|
||||
lzma_ret ret = lzma_easy_encoder(strm, preset, LZMA_CHECK_CRC64);
|
||||
|
||||
// Return successfully if the initialization went fine.
|
||||
if (ret == LZMA_OK)
|
||||
return true;
|
||||
|
||||
// Something went wrong. The possible errors are documented in
|
||||
// lzma/container.h (src/liblzma/api/lzma/container.h in the source
|
||||
// package or e.g. /usr/include/lzma/container.h depending on the
|
||||
// install prefix).
|
||||
const char *msg;
|
||||
switch (ret) {
|
||||
case LZMA_MEM_ERROR:
|
||||
msg = "Memory allocation failed";
|
||||
break;
|
||||
|
||||
case LZMA_OPTIONS_ERROR:
|
||||
msg = "Specified preset is not supported";
|
||||
break;
|
||||
|
||||
case LZMA_UNSUPPORTED_CHECK:
|
||||
msg = "Specified integrity check is not supported";
|
||||
break;
|
||||
|
||||
default:
|
||||
// This is most likely LZMA_PROG_ERROR indicating a bug in
|
||||
// this program or in liblzma. It is inconvenient to have a
|
||||
// separate error message for errors that should be impossible
|
||||
// to occur, but knowing the error code is important for
|
||||
// debugging. That's why it is good to print the error code
|
||||
// at least when there is no good error message to show.
|
||||
msg = "Unknown error, possibly a bug";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
|
||||
msg, ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
compress(lzma_stream *strm, FILE *infile, FILE *outfile)
|
||||
{
|
||||
// This will be LZMA_RUN until the end of the input file is reached.
|
||||
// This tells lzma_code() when there will be no more input.
|
||||
lzma_action action = LZMA_RUN;
|
||||
|
||||
// Buffers to temporarily hold uncompressed input
|
||||
// and compressed output.
|
||||
uint8_t inbuf[BUFSIZ];
|
||||
uint8_t outbuf[BUFSIZ];
|
||||
|
||||
// Initialize the input and output pointers. Initializing next_in
|
||||
// and avail_in isn't really necessary when we are going to encode
|
||||
// just one file since LZMA_STREAM_INIT takes care of initializing
|
||||
// those already. But it doesn't hurt much and it will be needed
|
||||
// if encoding more than one file like we will in 02_decompress.c.
|
||||
//
|
||||
// While we don't care about strm->total_in or strm->total_out in this
|
||||
// example, it is worth noting that initializing the encoder will
|
||||
// always reset total_in and total_out to zero. But the encoder
|
||||
// initialization doesn't touch next_in, avail_in, next_out, or
|
||||
// avail_out.
|
||||
strm->next_in = NULL;
|
||||
strm->avail_in = 0;
|
||||
strm->next_out = outbuf;
|
||||
strm->avail_out = sizeof(outbuf);
|
||||
|
||||
// Loop until the file has been successfully compressed or until
|
||||
// an error occurs.
|
||||
while (true) {
|
||||
// Fill the input buffer if it is empty.
|
||||
if (strm->avail_in == 0 && !feof(infile)) {
|
||||
strm->next_in = inbuf;
|
||||
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
|
||||
infile);
|
||||
|
||||
if (ferror(infile)) {
|
||||
fprintf(stderr, "Read error: %s\n",
|
||||
strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Once the end of the input file has been reached,
|
||||
// we need to tell lzma_code() that no more input
|
||||
// will be coming and that it should finish the
|
||||
// encoding.
|
||||
if (feof(infile))
|
||||
action = LZMA_FINISH;
|
||||
}
|
||||
|
||||
// Tell liblzma do the actual encoding.
|
||||
//
|
||||
// This reads up to strm->avail_in bytes of input starting
|
||||
// from strm->next_in. avail_in will be decremented and
|
||||
// next_in incremented by an equal amount to match the
|
||||
// number of input bytes consumed.
|
||||
//
|
||||
// Up to strm->avail_out bytes of compressed output will be
|
||||
// written starting from strm->next_out. avail_out and next_out
|
||||
// will be incremented by an equal amount to match the number
|
||||
// of output bytes written.
|
||||
//
|
||||
// The encoder has to do internal buffering, which means that
|
||||
// it may take quite a bit of input before the same data is
|
||||
// available in compressed form in the output buffer.
|
||||
lzma_ret ret = lzma_code(strm, action);
|
||||
|
||||
// If the output buffer is full or if the compression finished
|
||||
// successfully, write the data from the output bufffer to
|
||||
// the output file.
|
||||
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
|
||||
// When lzma_code() has returned LZMA_STREAM_END,
|
||||
// the output buffer is likely to be only partially
|
||||
// full. Calculate how much new data there is to
|
||||
// be written to the output file.
|
||||
size_t write_size = sizeof(outbuf) - strm->avail_out;
|
||||
|
||||
if (fwrite(outbuf, 1, write_size, outfile)
|
||||
!= write_size) {
|
||||
fprintf(stderr, "Write error: %s\n",
|
||||
strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reset next_out and avail_out.
|
||||
strm->next_out = outbuf;
|
||||
strm->avail_out = sizeof(outbuf);
|
||||
}
|
||||
|
||||
// Normally the return value of lzma_code() will be LZMA_OK
|
||||
// until everything has been encoded.
|
||||
if (ret != LZMA_OK) {
|
||||
// Once everything has been encoded successfully, the
|
||||
// return value of lzma_code() will be LZMA_STREAM_END.
|
||||
//
|
||||
// It is important to check for LZMA_STREAM_END. Do not
|
||||
// assume that getting ret != LZMA_OK would mean that
|
||||
// everything has gone well.
|
||||
if (ret == LZMA_STREAM_END)
|
||||
return true;
|
||||
|
||||
// It's not LZMA_OK nor LZMA_STREAM_END,
|
||||
// so it must be an error code. See lzma/base.h
|
||||
// (src/liblzma/api/lzma/base.h in the source package
|
||||
// or e.g. /usr/include/lzma/base.h depending on the
|
||||
// install prefix) for the list and documentation of
|
||||
// possible values. Most values listen in lzma_ret
|
||||
// enumeration aren't possible in this example.
|
||||
const char *msg;
|
||||
switch (ret) {
|
||||
case LZMA_MEM_ERROR:
|
||||
msg = "Memory allocation failed";
|
||||
break;
|
||||
|
||||
case LZMA_DATA_ERROR:
|
||||
// This error is returned if the compressed
|
||||
// or uncompressed size get near 8 EiB
|
||||
// (2^63 bytes) because that's where the .xz
|
||||
// file format size limits currently are.
|
||||
// That is, the possibility of this error
|
||||
// is mostly theoretical unless you are doing
|
||||
// something very unusual.
|
||||
//
|
||||
// Note that strm->total_in and strm->total_out
|
||||
// have nothing to do with this error. Changing
|
||||
// those variables won't increase or decrease
|
||||
// the chance of getting this error.
|
||||
msg = "File size limits exceeded";
|
||||
break;
|
||||
|
||||
default:
|
||||
// This is most likely LZMA_PROG_ERROR, but
|
||||
// if this program is buggy (or liblzma has
|
||||
// a bug), it may be e.g. LZMA_BUF_ERROR or
|
||||
// LZMA_OPTIONS_ERROR too.
|
||||
//
|
||||
// It is inconvenient to have a separate
|
||||
// error message for errors that should be
|
||||
// impossible to occur, but knowing the error
|
||||
// code is important for debugging. That's why
|
||||
// it is good to print the error code at least
|
||||
// when there is no good error message to show.
|
||||
msg = "Unknown error, possibly a bug";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Encoder error: %s (error code %u)\n",
|
||||
msg, ret);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
// Get the preset number from the command line.
|
||||
uint32_t preset = get_preset(argc, argv);
|
||||
|
||||
// Initialize a lzma_stream structure. When it is allocated on stack,
|
||||
// it is simplest to use LZMA_STREAM_INIT macro like below. When it
|
||||
// is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr))
|
||||
// works (as long as NULL pointers are represented with zero bits
|
||||
// as they are on practically all computers today).
|
||||
lzma_stream strm = LZMA_STREAM_INIT;
|
||||
|
||||
// Initialize the encoder. If it succeeds, compress from
|
||||
// stdin to stdout.
|
||||
bool success = init_encoder(&strm, preset);
|
||||
if (success)
|
||||
success = compress(&strm, stdin, stdout);
|
||||
|
||||
// Free the memory allocated for the encoder. If we were encoding
|
||||
// multiple files, this would only need to be done after the last
|
||||
// file. See 02_decompress.c for handling of multiple files.
|
||||
//
|
||||
// It is OK to call lzma_end() multiple times or when it hasn't been
|
||||
// actually used except initialized with LZMA_STREAM_INIT.
|
||||
lzma_end(&strm);
|
||||
|
||||
// Close stdout to catch possible write errors that can occur
|
||||
// when pending data is flushed from the stdio buffers.
|
||||
if (fclose(stdout)) {
|
||||
fprintf(stderr, "Write error: %s\n", strerror(errno));
|
||||
success = false;
|
||||
}
|
||||
|
||||
return success ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
287
doc/examples/02_decompress.c
Normal file
287
doc/examples/02_decompress.c
Normal file
@@ -0,0 +1,287 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file 02_decompress.c
|
||||
/// \brief Decompress .xz files to stdout
|
||||
///
|
||||
/// Usage: ./02_decompress INPUT_FILES... > OUTFILE
|
||||
///
|
||||
/// Example: ./02_decompress foo.xz bar.xz > foobar
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
// This file has been put into the public domain.
|
||||
// You can do whatever you want with this file.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <lzma.h>
|
||||
|
||||
|
||||
static bool
|
||||
init_decoder(lzma_stream *strm)
|
||||
{
|
||||
// Initialize a .xz decoder. The decoder supports a memory usage limit
|
||||
// and a set of flags.
|
||||
//
|
||||
// The memory usage of the decompressor depends on the settings used
|
||||
// to compress a .xz file. It can vary from less than a megabyte to
|
||||
// a few gigabytes, but in practice (at least for now) it rarely
|
||||
// exceeds 65 MiB because that's how much memory is required to
|
||||
// decompress files created with "xz -9". Settings requiring more
|
||||
// memory take extra effort to use and don't (at least for now)
|
||||
// provide significantly better compression in most cases.
|
||||
//
|
||||
// Memory usage limit is useful if it is important that the
|
||||
// decompressor won't consume gigabytes of memory. The need
|
||||
// for limiting depends on the application. In this example,
|
||||
// no memory usage limiting is used. This is done by setting
|
||||
// the limit to UINT64_MAX.
|
||||
//
|
||||
// The .xz format allows concatenating compressed files as is:
|
||||
//
|
||||
// echo foo | xz > foobar.xz
|
||||
// echo bar | xz >> foobar.xz
|
||||
//
|
||||
// When decompressing normal standalone .xz files, LZMA_CONCATENATED
|
||||
// should always be used to support decompression of concatenated
|
||||
// .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop
|
||||
// after the first .xz stream. This can be useful when .xz data has
|
||||
// been embedded inside another file format.
|
||||
//
|
||||
// Flags other than LZMA_CONCATENATED are supported too, and can
|
||||
// be combined with bitwise-or. See lzma/container.h
|
||||
// (src/liblzma/api/lzma/container.h in the source package or e.g.
|
||||
// /usr/include/lzma/container.h depending on the install prefix)
|
||||
// for details.
|
||||
lzma_ret ret = lzma_stream_decoder(
|
||||
strm, UINT64_MAX, LZMA_CONCATENATED);
|
||||
|
||||
// Return successfully if the initialization went fine.
|
||||
if (ret == LZMA_OK)
|
||||
return true;
|
||||
|
||||
// Something went wrong. The possible errors are documented in
|
||||
// lzma/container.h (src/liblzma/api/lzma/container.h in the source
|
||||
// package or e.g. /usr/include/lzma/container.h depending on the
|
||||
// install prefix).
|
||||
//
|
||||
// Note that LZMA_MEMLIMIT_ERROR is never possible here. If you
|
||||
// specify a very tiny limit, the error will be delayed until
|
||||
// the first headers have been parsed by a call to lzma_code().
|
||||
const char *msg;
|
||||
switch (ret) {
|
||||
case LZMA_MEM_ERROR:
|
||||
msg = "Memory allocation failed";
|
||||
break;
|
||||
|
||||
case LZMA_OPTIONS_ERROR:
|
||||
msg = "Unsupported decompressor flags";
|
||||
break;
|
||||
|
||||
default:
|
||||
// This is most likely LZMA_PROG_ERROR indicating a bug in
|
||||
// this program or in liblzma. It is inconvenient to have a
|
||||
// separate error message for errors that should be impossible
|
||||
// to occur, but knowing the error code is important for
|
||||
// debugging. That's why it is good to print the error code
|
||||
// at least when there is no good error message to show.
|
||||
msg = "Unknown error, possibly a bug";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Error initializing the decoder: %s (error code %u)\n",
|
||||
msg, ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
decompress(lzma_stream *strm, const char *inname, FILE *infile, FILE *outfile)
|
||||
{
|
||||
// When LZMA_CONCATENATED flag was used when initializing the decoder,
|
||||
// we need to tell lzma_code() when there will be no more input.
|
||||
// This is done by setting action to LZMA_FINISH instead of LZMA_RUN
|
||||
// in the same way as it is done when encoding.
|
||||
//
|
||||
// When LZMA_CONCATENATED isn't used, there is no need to use
|
||||
// LZMA_FINISH to tell when all the input has been read, but it
|
||||
// is still OK to use it if you want. When LZMA_CONCATENATED isn't
|
||||
// used, the decoder will stop after the first .xz stream. In that
|
||||
// case some unused data may be left in strm->next_in.
|
||||
lzma_action action = LZMA_RUN;
|
||||
|
||||
uint8_t inbuf[BUFSIZ];
|
||||
uint8_t outbuf[BUFSIZ];
|
||||
|
||||
strm->next_in = NULL;
|
||||
strm->avail_in = 0;
|
||||
strm->next_out = outbuf;
|
||||
strm->avail_out = sizeof(outbuf);
|
||||
|
||||
while (true) {
|
||||
if (strm->avail_in == 0 && !feof(infile)) {
|
||||
strm->next_in = inbuf;
|
||||
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
|
||||
infile);
|
||||
|
||||
if (ferror(infile)) {
|
||||
fprintf(stderr, "%s: Read error: %s\n",
|
||||
inname, strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
// Once the end of the input file has been reached,
|
||||
// we need to tell lzma_code() that no more input
|
||||
// will be coming. As said before, this isn't required
|
||||
// if the LZMA_CONATENATED flag isn't used when
|
||||
// initializing the decoder.
|
||||
if (feof(infile))
|
||||
action = LZMA_FINISH;
|
||||
}
|
||||
|
||||
lzma_ret ret = lzma_code(strm, action);
|
||||
|
||||
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
|
||||
size_t write_size = sizeof(outbuf) - strm->avail_out;
|
||||
|
||||
if (fwrite(outbuf, 1, write_size, outfile)
|
||||
!= write_size) {
|
||||
fprintf(stderr, "Write error: %s\n",
|
||||
strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
strm->next_out = outbuf;
|
||||
strm->avail_out = sizeof(outbuf);
|
||||
}
|
||||
|
||||
if (ret != LZMA_OK) {
|
||||
// Once everything has been decoded successfully, the
|
||||
// return value of lzma_code() will be LZMA_STREAM_END.
|
||||
//
|
||||
// It is important to check for LZMA_STREAM_END. Do not
|
||||
// assume that getting ret != LZMA_OK would mean that
|
||||
// everything has gone well or that when you aren't
|
||||
// getting more output it must have successfully
|
||||
// decoded everything.
|
||||
if (ret == LZMA_STREAM_END)
|
||||
return true;
|
||||
|
||||
// It's not LZMA_OK nor LZMA_STREAM_END,
|
||||
// so it must be an error code. See lzma/base.h
|
||||
// (src/liblzma/api/lzma/base.h in the source package
|
||||
// or e.g. /usr/include/lzma/base.h depending on the
|
||||
// install prefix) for the list and documentation of
|
||||
// possible values. Many values listen in lzma_ret
|
||||
// enumeration aren't possible in this example, but
|
||||
// can be made possible by enabling memory usage limit
|
||||
// or adding flags to the decoder initialization.
|
||||
const char *msg;
|
||||
switch (ret) {
|
||||
case LZMA_MEM_ERROR:
|
||||
msg = "Memory allocation failed";
|
||||
break;
|
||||
|
||||
case LZMA_FORMAT_ERROR:
|
||||
// .xz magic bytes weren't found.
|
||||
msg = "The input is not in the .xz format";
|
||||
break;
|
||||
|
||||
case LZMA_OPTIONS_ERROR:
|
||||
// For example, the headers specify a filter
|
||||
// that isn't supported by this liblzma
|
||||
// version (or it hasn't been enabled when
|
||||
// building liblzma, but no-one sane does
|
||||
// that unless building liblzma for an
|
||||
// embedded system). Upgrading to a newer
|
||||
// liblzma might help.
|
||||
//
|
||||
// Note that it is unlikely that the file has
|
||||
// accidentally became corrupt if you get this
|
||||
// error. The integrity of the .xz headers is
|
||||
// always verified with a CRC32, so
|
||||
// unintentionally corrupt files can be
|
||||
// distinguished from unsupported files.
|
||||
msg = "Unsupported compression options";
|
||||
break;
|
||||
|
||||
case LZMA_DATA_ERROR:
|
||||
msg = "Compressed file is corrupt";
|
||||
break;
|
||||
|
||||
case LZMA_BUF_ERROR:
|
||||
// Typically this error means that a valid
|
||||
// file has got truncated, but it might also
|
||||
// be a damaged part in the file that makes
|
||||
// the decoder think the file is truncated.
|
||||
// If you prefer, you can use the same error
|
||||
// message for this as for LZMA_DATA_ERROR.
|
||||
msg = "Compressed file is truncated or "
|
||||
"otherwise corrupt";
|
||||
break;
|
||||
|
||||
default:
|
||||
// This is most likely LZMA_PROG_ERROR.
|
||||
msg = "Unknown error, possibly a bug";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%s: Decoder error: "
|
||||
"%s (error code %u)\n",
|
||||
inname, msg, ret);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
if (argc <= 1) {
|
||||
fprintf(stderr, "Usage: %s FILES...\n", argv[0]);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
lzma_stream strm = LZMA_STREAM_INIT;
|
||||
|
||||
bool success = true;
|
||||
|
||||
// Try to decompress all files.
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (!init_decoder(&strm)) {
|
||||
// Decoder initialization failed. There's no point
|
||||
// to retry it so we need to exit.
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
|
||||
FILE *infile = fopen(argv[i], "rb");
|
||||
|
||||
if (infile == NULL) {
|
||||
fprintf(stderr, "%s: Error opening the "
|
||||
"input file: %s\n",
|
||||
argv[i], strerror(errno));
|
||||
success = false;
|
||||
} else {
|
||||
success &= decompress(&strm, argv[i], infile, stdout);
|
||||
fclose(infile);
|
||||
}
|
||||
}
|
||||
|
||||
// Free the memory allocated for the decoder. This only needs to be
|
||||
// done after the last file.
|
||||
lzma_end(&strm);
|
||||
|
||||
if (fclose(stdout)) {
|
||||
fprintf(stderr, "Write error: %s\n", strerror(errno));
|
||||
success = false;
|
||||
}
|
||||
|
||||
return success ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
193
doc/examples/03_compress_custom.c
Normal file
193
doc/examples/03_compress_custom.c
Normal file
@@ -0,0 +1,193 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file 03_compress_custom.c
|
||||
/// \brief Compress in multi-call mode using x86 BCJ and LZMA2
|
||||
///
|
||||
/// Usage: ./03_compress_custom < INFILE > OUTFILE
|
||||
///
|
||||
/// Example: ./03_compress_custom < foo > foo.xz
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
// This file has been put into the public domain.
|
||||
// You can do whatever you want with this file.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <lzma.h>
|
||||
|
||||
|
||||
static bool
|
||||
init_encoder(lzma_stream *strm)
|
||||
{
|
||||
// Use the default preset (6) for LZMA2.
|
||||
//
|
||||
// The lzma_options_lzma structure and the lzma_lzma_preset() function
|
||||
// are declared in lzma/lzma12.h (src/liblzma/api/lzma/lzma12.h in the
|
||||
// source package or e.g. /usr/include/lzma/lzma12.h depending on
|
||||
// the install prefix).
|
||||
lzma_options_lzma opt_lzma2;
|
||||
if (lzma_lzma_preset(&opt_lzma2, LZMA_PRESET_DEFAULT)) {
|
||||
// It should never fail because the default preset
|
||||
// (and presets 0-9 optionally with LZMA_PRESET_EXTREME)
|
||||
// are supported by all stable liblzma versions.
|
||||
//
|
||||
// (The encoder initialization later in this function may
|
||||
// still fail due to unsupported preset *if* the features
|
||||
// required by the preset have been disabled at build time,
|
||||
// but no-one does such things except on embedded systems.)
|
||||
fprintf(stderr, "Unsupported preset, possibly a bug\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Now we could customize the LZMA2 options if we wanted. For example,
|
||||
// we could set the the dictionary size (opt_lzma2.dict_size) to
|
||||
// something else than the default (8 MiB) of the default preset.
|
||||
// See lzma/lzma12.h for details of all LZMA2 options.
|
||||
//
|
||||
// The x86 BCJ filter will try to modify the x86 instruction stream so
|
||||
// that LZMA2 can compress it better. The x86 BCJ filter doesn't need
|
||||
// any options so it will be set to NULL below.
|
||||
//
|
||||
// Construct the filter chain. The uncompressed data goes first to
|
||||
// the first filter in the array, in this case the x86 BCJ filter.
|
||||
// The array is always terminated by setting .id = LZMA_VLI_UNKNOWN.
|
||||
//
|
||||
// See lzma/filter.h for more information about the lzma_filter
|
||||
// structure.
|
||||
lzma_filter filters[] = {
|
||||
{ .id = LZMA_FILTER_X86, .options = NULL },
|
||||
{ .id = LZMA_FILTER_LZMA2, .options = &opt_lzma2 },
|
||||
{ .id = LZMA_VLI_UNKNOWN, .options = NULL },
|
||||
};
|
||||
|
||||
// Initialize the encoder using the custom filter chain.
|
||||
lzma_ret ret = lzma_stream_encoder(strm, filters, LZMA_CHECK_CRC64);
|
||||
|
||||
if (ret == LZMA_OK)
|
||||
return true;
|
||||
|
||||
const char *msg;
|
||||
switch (ret) {
|
||||
case LZMA_MEM_ERROR:
|
||||
msg = "Memory allocation failed";
|
||||
break;
|
||||
|
||||
case LZMA_OPTIONS_ERROR:
|
||||
// We are no longer using a plain preset so this error
|
||||
// message has been edited accordingly compared to
|
||||
// 01_compress_easy.c.
|
||||
msg = "Specified filter chain is not supported";
|
||||
break;
|
||||
|
||||
case LZMA_UNSUPPORTED_CHECK:
|
||||
msg = "Specified integrity check is not supported";
|
||||
break;
|
||||
|
||||
default:
|
||||
msg = "Unknown error, possibly a bug";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
|
||||
msg, ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// This function is identical to the one in 01_compress_easy.c.
|
||||
static bool
|
||||
compress(lzma_stream *strm, FILE *infile, FILE *outfile)
|
||||
{
|
||||
lzma_action action = LZMA_RUN;
|
||||
|
||||
uint8_t inbuf[BUFSIZ];
|
||||
uint8_t outbuf[BUFSIZ];
|
||||
|
||||
strm->next_in = NULL;
|
||||
strm->avail_in = 0;
|
||||
strm->next_out = outbuf;
|
||||
strm->avail_out = sizeof(outbuf);
|
||||
|
||||
while (true) {
|
||||
if (strm->avail_in == 0 && !feof(infile)) {
|
||||
strm->next_in = inbuf;
|
||||
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
|
||||
infile);
|
||||
|
||||
if (ferror(infile)) {
|
||||
fprintf(stderr, "Read error: %s\n",
|
||||
strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (feof(infile))
|
||||
action = LZMA_FINISH;
|
||||
}
|
||||
|
||||
lzma_ret ret = lzma_code(strm, action);
|
||||
|
||||
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
|
||||
size_t write_size = sizeof(outbuf) - strm->avail_out;
|
||||
|
||||
if (fwrite(outbuf, 1, write_size, outfile)
|
||||
!= write_size) {
|
||||
fprintf(stderr, "Write error: %s\n",
|
||||
strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
strm->next_out = outbuf;
|
||||
strm->avail_out = sizeof(outbuf);
|
||||
}
|
||||
|
||||
if (ret != LZMA_OK) {
|
||||
if (ret == LZMA_STREAM_END)
|
||||
return true;
|
||||
|
||||
const char *msg;
|
||||
switch (ret) {
|
||||
case LZMA_MEM_ERROR:
|
||||
msg = "Memory allocation failed";
|
||||
break;
|
||||
|
||||
case LZMA_DATA_ERROR:
|
||||
msg = "File size limits exceeded";
|
||||
break;
|
||||
|
||||
default:
|
||||
msg = "Unknown error, possibly a bug";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Encoder error: %s (error code %u)\n",
|
||||
msg, ret);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
main(void)
|
||||
{
|
||||
lzma_stream strm = LZMA_STREAM_INIT;
|
||||
|
||||
bool success = init_encoder(&strm);
|
||||
if (success)
|
||||
success = compress(&strm, stdin, stdout);
|
||||
|
||||
lzma_end(&strm);
|
||||
|
||||
if (fclose(stdout)) {
|
||||
fprintf(stderr, "Write error: %s\n", strerror(errno));
|
||||
success = false;
|
||||
}
|
||||
|
||||
return success ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
206
doc/examples/04_compress_easy_mt.c
Normal file
206
doc/examples/04_compress_easy_mt.c
Normal file
@@ -0,0 +1,206 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
/// \file 04_compress_easy_mt.c
|
||||
/// \brief Compress in multi-call mode using LZMA2 in multi-threaded mode
|
||||
///
|
||||
/// Usage: ./04_compress_easy_mt < INFILE > OUTFILE
|
||||
///
|
||||
/// Example: ./04_compress_easy_mt < foo > foo.xz
|
||||
//
|
||||
// Author: Lasse Collin
|
||||
//
|
||||
// This file has been put into the public domain.
|
||||
// You can do whatever you want with this file.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <lzma.h>
|
||||
|
||||
|
||||
static bool
|
||||
init_encoder(lzma_stream *strm)
|
||||
{
|
||||
// The threaded encoder takes the options as pointer to
|
||||
// a lzma_mt structure.
|
||||
lzma_mt mt = {
|
||||
// No flags are needed.
|
||||
.flags = 0,
|
||||
|
||||
// Let liblzma determine a sane block size.
|
||||
.block_size = 0,
|
||||
|
||||
// Use no timeout for lzma_code() calls by setting timeout
|
||||
// to zero. That is, sometimes lzma_code() might block for
|
||||
// a long time (from several seconds to even minutes).
|
||||
// If this is not OK, for example due to progress indicator
|
||||
// needing updates, specify a timeout in milliseconds here.
|
||||
// See the documentation of lzma_mt in lzma/container.h for
|
||||
// information how to choose a reasonable timeout.
|
||||
.timeout = 0,
|
||||
|
||||
// Use the default preset (6) for LZMA2.
|
||||
// To use a preset, filters must be set to NULL.
|
||||
.preset = LZMA_PRESET_DEFAULT,
|
||||
.filters = NULL,
|
||||
|
||||
// Use CRC64 for integrity checking. See also
|
||||
// 01_compress_easy.c about choosing the integrity check.
|
||||
.check = LZMA_CHECK_CRC64,
|
||||
};
|
||||
|
||||
// Detect how many threads the CPU supports.
|
||||
mt.threads = lzma_cputhreads();
|
||||
|
||||
// If the number of CPU cores/threads cannot be detected,
|
||||
// use one thread. Note that this isn't the same as the normal
|
||||
// single-threaded mode as this will still split the data into
|
||||
// blocks and use more RAM than the normal single-threaded mode.
|
||||
// You may want to consider using lzma_easy_encoder() or
|
||||
// lzma_stream_encoder() instead of lzma_stream_encoder_mt() if
|
||||
// lzma_cputhreads() returns 0 or 1.
|
||||
if (mt.threads == 0)
|
||||
mt.threads = 1;
|
||||
|
||||
// If the number of CPU cores/threads exceeds threads_max,
|
||||
// limit the number of threads to keep memory usage lower.
|
||||
// The number 8 is arbitrarily chosen and may be too low or
|
||||
// high depending on the compression preset and the computer
|
||||
// being used.
|
||||
//
|
||||
// FIXME: A better way could be to check the amount of RAM
|
||||
// (or available RAM) and use lzma_stream_encoder_mt_memusage()
|
||||
// to determine if the number of threads should be reduced.
|
||||
const uint32_t threads_max = 8;
|
||||
if (mt.threads > threads_max)
|
||||
mt.threads = threads_max;
|
||||
|
||||
// Initialize the threaded encoder.
|
||||
lzma_ret ret = lzma_stream_encoder_mt(strm, &mt);
|
||||
|
||||
if (ret == LZMA_OK)
|
||||
return true;
|
||||
|
||||
const char *msg;
|
||||
switch (ret) {
|
||||
case LZMA_MEM_ERROR:
|
||||
msg = "Memory allocation failed";
|
||||
break;
|
||||
|
||||
case LZMA_OPTIONS_ERROR:
|
||||
// We are no longer using a plain preset so this error
|
||||
// message has been edited accordingly compared to
|
||||
// 01_compress_easy.c.
|
||||
msg = "Specified filter chain is not supported";
|
||||
break;
|
||||
|
||||
case LZMA_UNSUPPORTED_CHECK:
|
||||
msg = "Specified integrity check is not supported";
|
||||
break;
|
||||
|
||||
default:
|
||||
msg = "Unknown error, possibly a bug";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
|
||||
msg, ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// This function is identical to the one in 01_compress_easy.c.
|
||||
static bool
|
||||
compress(lzma_stream *strm, FILE *infile, FILE *outfile)
|
||||
{
|
||||
lzma_action action = LZMA_RUN;
|
||||
|
||||
uint8_t inbuf[BUFSIZ];
|
||||
uint8_t outbuf[BUFSIZ];
|
||||
|
||||
strm->next_in = NULL;
|
||||
strm->avail_in = 0;
|
||||
strm->next_out = outbuf;
|
||||
strm->avail_out = sizeof(outbuf);
|
||||
|
||||
while (true) {
|
||||
if (strm->avail_in == 0 && !feof(infile)) {
|
||||
strm->next_in = inbuf;
|
||||
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
|
||||
infile);
|
||||
|
||||
if (ferror(infile)) {
|
||||
fprintf(stderr, "Read error: %s\n",
|
||||
strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (feof(infile))
|
||||
action = LZMA_FINISH;
|
||||
}
|
||||
|
||||
lzma_ret ret = lzma_code(strm, action);
|
||||
|
||||
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
|
||||
size_t write_size = sizeof(outbuf) - strm->avail_out;
|
||||
|
||||
if (fwrite(outbuf, 1, write_size, outfile)
|
||||
!= write_size) {
|
||||
fprintf(stderr, "Write error: %s\n",
|
||||
strerror(errno));
|
||||
return false;
|
||||
}
|
||||
|
||||
strm->next_out = outbuf;
|
||||
strm->avail_out = sizeof(outbuf);
|
||||
}
|
||||
|
||||
if (ret != LZMA_OK) {
|
||||
if (ret == LZMA_STREAM_END)
|
||||
return true;
|
||||
|
||||
const char *msg;
|
||||
switch (ret) {
|
||||
case LZMA_MEM_ERROR:
|
||||
msg = "Memory allocation failed";
|
||||
break;
|
||||
|
||||
case LZMA_DATA_ERROR:
|
||||
msg = "File size limits exceeded";
|
||||
break;
|
||||
|
||||
default:
|
||||
msg = "Unknown error, possibly a bug";
|
||||
break;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Encoder error: %s (error code %u)\n",
|
||||
msg, ret);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern int
|
||||
main(void)
|
||||
{
|
||||
lzma_stream strm = LZMA_STREAM_INIT;
|
||||
|
||||
bool success = init_encoder(&strm);
|
||||
if (success)
|
||||
success = compress(&strm, stdin, stdout);
|
||||
|
||||
lzma_end(&strm);
|
||||
|
||||
if (fclose(stdout)) {
|
||||
fprintf(stderr, "Write error: %s\n", strerror(errno));
|
||||
success = false;
|
||||
}
|
||||
|
||||
return success ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
}
|
||||
24
doc/examples/Makefile
Normal file
24
doc/examples/Makefile
Normal file
@@ -0,0 +1,24 @@
|
||||
#
|
||||
# Author: Lasse Collin
|
||||
#
|
||||
# This file has been put into the public domain.
|
||||
# You can do whatever you want with this file.
|
||||
#
|
||||
|
||||
CC = c99
|
||||
CFLAGS = -g
|
||||
LDFLAGS = -llzma
|
||||
|
||||
PROGS = \
|
||||
01_compress_easy \
|
||||
02_decompress \
|
||||
03_compress_custom \
|
||||
04_compress_easy_mt
|
||||
|
||||
all: $(PROGS)
|
||||
|
||||
.c:
|
||||
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
|
||||
|
||||
clean:
|
||||
-rm -f $(PROGS)
|
||||
127
doc/examples_old/xz_pipe_comp.c
Normal file
127
doc/examples_old/xz_pipe_comp.c
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
* xz_pipe_comp.c
|
||||
* A simple example of pipe-only xz compressor implementation.
|
||||
* version: 2010-07-12 - by Daniel Mealha Cabrita
|
||||
* Not copyrighted -- provided to the public domain.
|
||||
*
|
||||
* Compiling:
|
||||
* Link with liblzma. GCC example:
|
||||
* $ gcc -llzma xz_pipe_comp.c -o xz_pipe_comp
|
||||
*
|
||||
* Usage example:
|
||||
* $ cat some_file | ./xz_pipe_comp > some_file.xz
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <lzma.h>
|
||||
|
||||
|
||||
/* COMPRESSION SETTINGS */
|
||||
|
||||
/* analogous to xz CLI options: -0 to -9 */
|
||||
#define COMPRESSION_LEVEL 6
|
||||
|
||||
/* boolean setting, analogous to xz CLI option: -e */
|
||||
#define COMPRESSION_EXTREME true
|
||||
|
||||
/* see: /usr/include/lzma/check.h LZMA_CHECK_* */
|
||||
#define INTEGRITY_CHECK LZMA_CHECK_CRC64
|
||||
|
||||
|
||||
/* read/write buffer sizes */
|
||||
#define IN_BUF_MAX 4096
|
||||
#define OUT_BUF_MAX 4096
|
||||
|
||||
/* error codes */
|
||||
#define RET_OK 0
|
||||
#define RET_ERROR_INIT 1
|
||||
#define RET_ERROR_INPUT 2
|
||||
#define RET_ERROR_OUTPUT 3
|
||||
#define RET_ERROR_COMPRESSION 4
|
||||
|
||||
|
||||
/* note: in_file and out_file must be open already */
|
||||
int xz_compress (FILE *in_file, FILE *out_file)
|
||||
{
|
||||
uint32_t preset = COMPRESSION_LEVEL | (COMPRESSION_EXTREME ? LZMA_PRESET_EXTREME : 0);
|
||||
lzma_check check = INTEGRITY_CHECK;
|
||||
lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
|
||||
uint8_t in_buf [IN_BUF_MAX];
|
||||
uint8_t out_buf [OUT_BUF_MAX];
|
||||
size_t in_len; /* length of useful data in in_buf */
|
||||
size_t out_len; /* length of useful data in out_buf */
|
||||
bool in_finished = false;
|
||||
bool out_finished = false;
|
||||
lzma_action action;
|
||||
lzma_ret ret_xz;
|
||||
int ret;
|
||||
|
||||
ret = RET_OK;
|
||||
|
||||
/* initialize xz encoder */
|
||||
ret_xz = lzma_easy_encoder (&strm, preset, check);
|
||||
if (ret_xz != LZMA_OK) {
|
||||
fprintf (stderr, "lzma_easy_encoder error: %d\n", (int) ret_xz);
|
||||
return RET_ERROR_INIT;
|
||||
}
|
||||
|
||||
while ((! in_finished) && (! out_finished)) {
|
||||
/* read incoming data */
|
||||
in_len = fread (in_buf, 1, IN_BUF_MAX, in_file);
|
||||
|
||||
if (feof (in_file)) {
|
||||
in_finished = true;
|
||||
}
|
||||
if (ferror (in_file)) {
|
||||
in_finished = true;
|
||||
ret = RET_ERROR_INPUT;
|
||||
}
|
||||
|
||||
strm.next_in = in_buf;
|
||||
strm.avail_in = in_len;
|
||||
|
||||
/* if no more data from in_buf, flushes the
|
||||
internal xz buffers and closes the xz data
|
||||
with LZMA_FINISH */
|
||||
action = in_finished ? LZMA_FINISH : LZMA_RUN;
|
||||
|
||||
/* loop until there's no pending compressed output */
|
||||
do {
|
||||
/* out_buf is clean at this point */
|
||||
strm.next_out = out_buf;
|
||||
strm.avail_out = OUT_BUF_MAX;
|
||||
|
||||
/* compress data */
|
||||
ret_xz = lzma_code (&strm, action);
|
||||
|
||||
if ((ret_xz != LZMA_OK) && (ret_xz != LZMA_STREAM_END)) {
|
||||
fprintf (stderr, "lzma_code error: %d\n", (int) ret_xz);
|
||||
out_finished = true;
|
||||
ret = RET_ERROR_COMPRESSION;
|
||||
} else {
|
||||
/* write compressed data */
|
||||
out_len = OUT_BUF_MAX - strm.avail_out;
|
||||
fwrite (out_buf, 1, out_len, out_file);
|
||||
if (ferror (out_file)) {
|
||||
out_finished = true;
|
||||
ret = RET_ERROR_OUTPUT;
|
||||
}
|
||||
}
|
||||
} while (strm.avail_out == 0);
|
||||
}
|
||||
|
||||
lzma_end (&strm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = xz_compress (stdin, stdout);
|
||||
return ret;
|
||||
}
|
||||
|
||||
123
doc/examples_old/xz_pipe_decomp.c
Normal file
123
doc/examples_old/xz_pipe_decomp.c
Normal file
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* xz_pipe_decomp.c
|
||||
* A simple example of pipe-only xz decompressor implementation.
|
||||
* version: 2012-06-14 - by Daniel Mealha Cabrita
|
||||
* Not copyrighted -- provided to the public domain.
|
||||
*
|
||||
* Compiling:
|
||||
* Link with liblzma. GCC example:
|
||||
* $ gcc -llzma xz_pipe_decomp.c -o xz_pipe_decomp
|
||||
*
|
||||
* Usage example:
|
||||
* $ cat some_file.xz | ./xz_pipe_decomp > some_file
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <lzma.h>
|
||||
|
||||
|
||||
/* read/write buffer sizes */
|
||||
#define IN_BUF_MAX 4096
|
||||
#define OUT_BUF_MAX 4096
|
||||
|
||||
/* error codes */
|
||||
#define RET_OK 0
|
||||
#define RET_ERROR_INIT 1
|
||||
#define RET_ERROR_INPUT 2
|
||||
#define RET_ERROR_OUTPUT 3
|
||||
#define RET_ERROR_DECOMPRESSION 4
|
||||
|
||||
|
||||
/* note: in_file and out_file must be open already */
|
||||
int xz_decompress (FILE *in_file, FILE *out_file)
|
||||
{
|
||||
lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
|
||||
const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED;
|
||||
const uint64_t memory_limit = UINT64_MAX; /* no memory limit */
|
||||
uint8_t in_buf [IN_BUF_MAX];
|
||||
uint8_t out_buf [OUT_BUF_MAX];
|
||||
size_t in_len; /* length of useful data in in_buf */
|
||||
size_t out_len; /* length of useful data in out_buf */
|
||||
bool in_finished = false;
|
||||
bool out_finished = false;
|
||||
lzma_action action;
|
||||
lzma_ret ret_xz;
|
||||
int ret;
|
||||
|
||||
ret = RET_OK;
|
||||
|
||||
/* initialize xz decoder */
|
||||
ret_xz = lzma_stream_decoder (&strm, memory_limit, flags);
|
||||
if (ret_xz != LZMA_OK) {
|
||||
fprintf (stderr, "lzma_stream_decoder error: %d\n", (int) ret_xz);
|
||||
return RET_ERROR_INIT;
|
||||
}
|
||||
|
||||
while ((! in_finished) && (! out_finished)) {
|
||||
/* read incoming data */
|
||||
in_len = fread (in_buf, 1, IN_BUF_MAX, in_file);
|
||||
|
||||
if (feof (in_file)) {
|
||||
in_finished = true;
|
||||
}
|
||||
if (ferror (in_file)) {
|
||||
in_finished = true;
|
||||
ret = RET_ERROR_INPUT;
|
||||
}
|
||||
|
||||
strm.next_in = in_buf;
|
||||
strm.avail_in = in_len;
|
||||
|
||||
/* if no more data from in_buf, flushes the
|
||||
internal xz buffers and closes the decompressed data
|
||||
with LZMA_FINISH */
|
||||
action = in_finished ? LZMA_FINISH : LZMA_RUN;
|
||||
|
||||
/* loop until there's no pending decompressed output */
|
||||
do {
|
||||
/* out_buf is clean at this point */
|
||||
strm.next_out = out_buf;
|
||||
strm.avail_out = OUT_BUF_MAX;
|
||||
|
||||
/* decompress data */
|
||||
ret_xz = lzma_code (&strm, action);
|
||||
|
||||
if ((ret_xz != LZMA_OK) && (ret_xz != LZMA_STREAM_END)) {
|
||||
fprintf (stderr, "lzma_code error: %d\n", (int) ret_xz);
|
||||
out_finished = true;
|
||||
ret = RET_ERROR_DECOMPRESSION;
|
||||
} else {
|
||||
/* write decompressed data */
|
||||
out_len = OUT_BUF_MAX - strm.avail_out;
|
||||
fwrite (out_buf, 1, out_len, out_file);
|
||||
if (ferror (out_file)) {
|
||||
out_finished = true;
|
||||
ret = RET_ERROR_OUTPUT;
|
||||
}
|
||||
}
|
||||
} while (strm.avail_out == 0);
|
||||
}
|
||||
|
||||
/* Bug fix (2012-06-14): If no errors were detected, check
|
||||
that the last lzma_code() call returned LZMA_STREAM_END.
|
||||
If not, the file is probably truncated. */
|
||||
if ((ret == RET_OK) && (ret_xz != LZMA_STREAM_END)) {
|
||||
fprintf (stderr, "Input truncated or corrupt\n");
|
||||
ret = RET_ERROR_DECOMPRESSION;
|
||||
}
|
||||
|
||||
lzma_end (&strm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = xz_decompress (stdin, stdout);
|
||||
return ret;
|
||||
}
|
||||
|
||||
224
doc/faq.txt
Normal file
224
doc/faq.txt
Normal file
@@ -0,0 +1,224 @@
|
||||
|
||||
XZ Utils FAQ
|
||||
============
|
||||
|
||||
Q: What do the letters XZ mean?
|
||||
|
||||
A: Nothing. They are just two letters, which come from the file format
|
||||
suffix .xz. The .xz suffix was selected, because it seemed to be
|
||||
pretty much unused. It has no deeper meaning.
|
||||
|
||||
|
||||
Q: What are LZMA and LZMA2?
|
||||
|
||||
A: LZMA stands for Lempel-Ziv-Markov chain-Algorithm. It is the name
|
||||
of the compression algorithm designed by Igor Pavlov for 7-Zip.
|
||||
LZMA is based on LZ77 and range encoding.
|
||||
|
||||
LZMA2 is an updated version of the original LZMA to fix a couple of
|
||||
practical issues. In context of XZ Utils, LZMA is called LZMA1 to
|
||||
emphasize that LZMA is not the same thing as LZMA2. LZMA2 is the
|
||||
primary compression algorithm in the .xz file format.
|
||||
|
||||
|
||||
Q: There are many LZMA related projects. How does XZ Utils relate to them?
|
||||
|
||||
A: 7-Zip and LZMA SDK are the original projects. LZMA SDK is roughly
|
||||
a subset of the 7-Zip source tree.
|
||||
|
||||
p7zip is 7-Zip's command-line tools ported to POSIX-like systems.
|
||||
|
||||
LZMA Utils provide a gzip-like lzma tool for POSIX-like systems.
|
||||
LZMA Utils are based on LZMA SDK. XZ Utils are the successor to
|
||||
LZMA Utils.
|
||||
|
||||
There are several other projects using LZMA. Most are more or less
|
||||
based on LZMA SDK. See <http://7-zip.org/links.html>.
|
||||
|
||||
|
||||
Q: Why is liblzma named liblzma if its primary file format is .xz?
|
||||
Shouldn't it be e.g. libxz?
|
||||
|
||||
A: When the designing of the .xz format began, the idea was to replace
|
||||
the .lzma format and use the same .lzma suffix. It would have been
|
||||
quite OK to reuse the suffix when there were very few .lzma files
|
||||
around. However, the old .lzma format became popular before the
|
||||
new format was finished. The new format was renamed to .xz but the
|
||||
name of liblzma wasn't changed.
|
||||
|
||||
|
||||
Q: Do XZ Utils support the .7z format?
|
||||
|
||||
A: No. Use 7-Zip (Windows) or p7zip (POSIX-like systems) to handle .7z
|
||||
files.
|
||||
|
||||
|
||||
Q: I have many .tar.7z files. Can I convert them to .tar.xz without
|
||||
spending hours recompressing the data?
|
||||
|
||||
A: In the "extra" directory, there is a script named 7z2lzma.bash which
|
||||
is able to convert some .7z files to the .lzma format (not .xz). It
|
||||
needs the 7za (or 7z) command from p7zip. The script may silently
|
||||
produce corrupt output if certain assumptions are not met, so
|
||||
decompress the resulting .lzma file and compare it against the
|
||||
original before deleting the original file!
|
||||
|
||||
|
||||
Q: I have many .lzma files. Can I quickly convert them to the .xz format?
|
||||
|
||||
A: For now, no. Since XZ Utils supports the .lzma format, it's usually
|
||||
not too bad to keep the old files in the old format. If you want to
|
||||
do the conversion anyway, you need to decompress the .lzma files and
|
||||
then recompress to the .xz format.
|
||||
|
||||
Technically, there is a way to make the conversion relatively fast
|
||||
(roughly twice the time that normal decompression takes). Writing
|
||||
such a tool would take quite a bit of time though, and would probably
|
||||
be useful to only a few people. If you really want such a conversion
|
||||
tool, contact Lasse Collin and offer some money.
|
||||
|
||||
|
||||
Q: I have installed xz, but my tar doesn't recognize .tar.xz files.
|
||||
How can I extract .tar.xz files?
|
||||
|
||||
A: xz -dc foo.tar.xz | tar xf -
|
||||
|
||||
|
||||
Q: Can I recover parts of a broken .xz file (e.g. a corrupted CD-R)?
|
||||
|
||||
A: It may be possible if the file consists of multiple blocks, which
|
||||
typically is not the case if the file was created in single-threaded
|
||||
mode. There is no recovery program yet.
|
||||
|
||||
|
||||
Q: Is (some part of) XZ Utils patented?
|
||||
|
||||
A: Lasse Collin is not aware of any patents that could affect XZ Utils.
|
||||
However, due to the nature of software patents, it's not possible to
|
||||
guarantee that XZ Utils isn't affected by any third party patent(s).
|
||||
|
||||
|
||||
Q: Where can I find documentation about the file format and algorithms?
|
||||
|
||||
A: The .xz format is documented in xz-file-format.txt. It is a container
|
||||
format only, and doesn't include descriptions of any non-trivial
|
||||
filters.
|
||||
|
||||
Documenting LZMA and LZMA2 is planned, but for now, there is no other
|
||||
documentation than the source code. Before you begin, you should know
|
||||
the basics of LZ77 and range-coding algorithms. LZMA is based on LZ77,
|
||||
but LZMA is a lot more complex. Range coding is used to compress
|
||||
the final bitstream like Huffman coding is used in Deflate.
|
||||
|
||||
|
||||
Q: I cannot find BCJ and BCJ2 filters. Don't they exist in liblzma?
|
||||
|
||||
A: BCJ filter is called "x86" in liblzma. BCJ2 is not included,
|
||||
because it requires using more than one encoded output stream.
|
||||
A streamable version of BCJ2-style filtering is planned.
|
||||
|
||||
|
||||
Q: I need to use a script that runs "xz -9". On a system with 256 MiB
|
||||
of RAM, xz says that it cannot allocate memory. Can I make the
|
||||
script work without modifying it?
|
||||
|
||||
A: Set a default memory usage limit for compression. You can do it e.g.
|
||||
in a shell initialization script such as ~/.bashrc or /etc/profile:
|
||||
|
||||
XZ_DEFAULTS=--memlimit-compress=150MiB
|
||||
export XZ_DEFAULTS
|
||||
|
||||
xz will then scale the compression settings down so that the given
|
||||
memory usage limit is not reached. This way xz shouldn't run out
|
||||
of memory.
|
||||
|
||||
Check also that memory-related resource limits are high enough.
|
||||
On most systems, "ulimit -a" will show the current resource limits.
|
||||
|
||||
|
||||
Q: How do I create files that can be decompressed with XZ Embedded?
|
||||
|
||||
A: See the documentation in XZ Embedded. In short, something like
|
||||
this is a good start:
|
||||
|
||||
xz --check=crc32 --lzma2=preset=6e,dict=64KiB
|
||||
|
||||
Or if a BCJ filter is needed too, e.g. if compressing
|
||||
a kernel image for PowerPC:
|
||||
|
||||
xz --check=crc32 --powerpc --lzma2=preset=6e,dict=64KiB
|
||||
|
||||
Adjust the dictionary size to get a good compromise between
|
||||
compression ratio and decompressor memory usage. Note that
|
||||
in single-call decompression mode of XZ Embedded, a big
|
||||
dictionary doesn't increase memory usage.
|
||||
|
||||
|
||||
Q: Will xz support threaded compression?
|
||||
|
||||
A: It is planned and has been taken into account when designing
|
||||
the .xz file format. Eventually there will probably be three types
|
||||
of threading, each method having its own advantages and disadvantages.
|
||||
|
||||
The simplest method is splitting the uncompressed data into blocks
|
||||
and compressing them in parallel independent from each other.
|
||||
Since the blocks are compressed independently, they can also be
|
||||
decompressed independently. Together with the index feature in .xz,
|
||||
this allows using threads to create .xz files for random-access
|
||||
reading. This also makes threaded decompression possible, although
|
||||
it is not clear if threaded decompression will ever be implemented.
|
||||
|
||||
The independent blocks method has a couple of disadvantages too. It
|
||||
will compress worse than a single-block method. Often the difference
|
||||
is not too big (maybe 1-2 %) but sometimes it can be too big. Also,
|
||||
the memory usage of the compressor increases linearly when adding
|
||||
threads.
|
||||
|
||||
Match finder parallelization is another threading method. It has
|
||||
been in 7-Zip for ages. It doesn't affect compression ratio or
|
||||
memory usage significantly. Among the three threading methods, only
|
||||
this is useful when compressing small files (files that are not
|
||||
significantly bigger than the dictionary). Unfortunately this method
|
||||
scales only to about two CPU cores.
|
||||
|
||||
The third method is pigz-style threading (I use that name, because
|
||||
pigz <http://www.zlib.net/pigz/> uses that method). It doesn't
|
||||
affect compression ratio significantly and scales to many cores.
|
||||
The memory usage scales linearly when threads are added. This isn't
|
||||
significant with pigz, because Deflate uses only a 32 KiB dictionary,
|
||||
but with LZMA2 the memory usage will increase dramatically just like
|
||||
with the independent-blocks method. There is also a constant
|
||||
computational overhead, which may make pigz-method a bit dull on
|
||||
dual-core compared to the parallel match finder method, but with more
|
||||
cores the overhead is not a big deal anymore.
|
||||
|
||||
Combining the threading methods will be possible and also useful.
|
||||
E.g. combining match finder parallelization with pigz-style threading
|
||||
can cut the memory usage by 50 %.
|
||||
|
||||
It is possible that the single-threaded method will be modified to
|
||||
create files identical to the pigz-style method. We'll see once
|
||||
pigz-style threading has been implemented in liblzma.
|
||||
|
||||
|
||||
Q: How do I build a program that needs liblzmadec (lzmadec.h)?
|
||||
|
||||
A: liblzmadec is part of LZMA Utils. XZ Utils has liblzma, but no
|
||||
liblzmadec. The code using liblzmadec should be ported to use
|
||||
liblzma instead. If you cannot or don't want to do that, download
|
||||
LZMA Utils from <http://tukaani.org/lzma/>.
|
||||
|
||||
|
||||
Q: The default build of liblzma is too big. How can I make it smaller?
|
||||
|
||||
A: Give --enable-small to the configure script. Use also appropriate
|
||||
--enable or --disable options to include only those filter encoders
|
||||
and decoders and integrity checks that you actually need. Use
|
||||
CFLAGS=-Os (with GCC) or equivalent to tell your compiler to optimize
|
||||
for size. See INSTALL for information about configure options.
|
||||
|
||||
If the result is still too big, take a look at XZ Embedded. It is
|
||||
a separate project, which provides a limited but significantly
|
||||
smaller XZ decoder implementation than XZ Utils. You can find it
|
||||
at <http://tukaani.org/xz/embedded.html>.
|
||||
|
||||
150
doc/history.txt
Normal file
150
doc/history.txt
Normal file
@@ -0,0 +1,150 @@
|
||||
|
||||
History of LZMA Utils and XZ Utils
|
||||
==================================
|
||||
|
||||
Tukaani distribution
|
||||
|
||||
In 2005, there was a small group working on the Tukaani distribution,
|
||||
which was a Slackware fork. One of the project's goals was to fit the
|
||||
distro on a single 700 MiB ISO-9660 image. Using LZMA instead of gzip
|
||||
helped a lot. Roughly speaking, one could fit data that took 1000 MiB
|
||||
in gzipped form into 700 MiB with LZMA. Naturally, the compression
|
||||
ratio varied across packages, but this was what we got on average.
|
||||
|
||||
Slackware packages have traditionally had .tgz as the filename suffix,
|
||||
which is an abbreviation of .tar.gz. A logical naming for LZMA
|
||||
compressed packages was .tlz, being an abbreviation of .tar.lzma.
|
||||
|
||||
At the end of the year 2007, there was no distribution under the
|
||||
Tukaani project anymore, but development of LZMA Utils was kept going.
|
||||
Still, there were .tlz packages around, because at least Vector Linux
|
||||
(a Slackware based distribution) used LZMA for its packages.
|
||||
|
||||
First versions of the modified pkgtools used the LZMA_Alone tool from
|
||||
Igor Pavlov's LZMA SDK as is. It was fine, because users wouldn't need
|
||||
to interact with LZMA_Alone directly. But people soon wanted to use
|
||||
LZMA for other files too, and the interface of LZMA_Alone wasn't
|
||||
comfortable for those used to gzip and bzip2.
|
||||
|
||||
|
||||
First steps of LZMA Utils
|
||||
|
||||
The first version of LZMA Utils (4.22.0) included a shell script called
|
||||
lzmash. It was a wrapper that had a gzip-like command-line interface. It
|
||||
used the LZMA_Alone tool from LZMA SDK to do all the real work. zgrep,
|
||||
zdiff, and related scripts from gzip were adapted to work with LZMA and
|
||||
were part of the first LZMA Utils release too.
|
||||
|
||||
LZMA Utils 4.22.0 included also lzmadec, which was a small (less than
|
||||
10 KiB) decoder-only command-line tool. It was written on top of the
|
||||
decoder-only C code found from the LZMA SDK. lzmadec was convenient in
|
||||
situations where LZMA_Alone (a few hundred KiB) would be too big.
|
||||
|
||||
lzmash and lzmadec were written by Lasse Collin.
|
||||
|
||||
|
||||
Second generation
|
||||
|
||||
The lzmash script was an ugly and not very secure hack. The last
|
||||
version of LZMA Utils to use lzmash was 4.27.1.
|
||||
|
||||
LZMA Utils 4.32.0beta1 introduced a new lzma command-line tool written
|
||||
by Ville Koskinen. It was written in C++, and used the encoder and
|
||||
decoder from C++ LZMA SDK with some little modifications. This tool
|
||||
replaced both the lzmash script and the LZMA_Alone command-line tool
|
||||
in LZMA Utils.
|
||||
|
||||
Introducing this new tool caused some temporary incompatibilities,
|
||||
because the LZMA_Alone executable was simply named lzma like the new
|
||||
command-line tool, but they had a completely different command-line
|
||||
interface. The file format was still the same.
|
||||
|
||||
Lasse wrote liblzmadec, which was a small decoder-only library based
|
||||
on the C code found from LZMA SDK. liblzmadec had an API similar to
|
||||
zlib, although there were some significant differences, which made it
|
||||
non-trivial to use it in some applications designed for zlib and
|
||||
libbzip2.
|
||||
|
||||
The lzmadec command-line tool was converted to use liblzmadec.
|
||||
|
||||
Alexandre Sauvé helped converting the build system to use GNU
|
||||
Autotools. This made it easier to test for certain less portable
|
||||
features needed by the new command-line tool.
|
||||
|
||||
Since the new command-line tool never got completely finished (for
|
||||
example, it didn't support the LZMA_OPT environment variable), the
|
||||
intent was to not call 4.32.x stable. Similarly, liblzmadec wasn't
|
||||
polished, but appeared to work well enough, so some people started
|
||||
using it too.
|
||||
|
||||
Because the development of the third generation of LZMA Utils was
|
||||
delayed considerably (3-4 years), the 4.32.x branch had to be kept
|
||||
maintained. It got some bug fixes now and then, and finally it was
|
||||
decided to call it stable, although most of the missing features were
|
||||
never added.
|
||||
|
||||
|
||||
File format problems
|
||||
|
||||
The file format used by LZMA_Alone was primitive. It was designed with
|
||||
embedded systems in mind, and thus provided only a minimal set of
|
||||
features. The two biggest problems for non-embedded use were the lack
|
||||
of magic bytes and an integrity check.
|
||||
|
||||
Igor and Lasse started developing a new file format with some help
|
||||
from Ville Koskinen. Also Mark Adler, Mikko Pouru, H. Peter Anvin,
|
||||
and Lars Wirzenius helped with some minor things at some point of the
|
||||
development. Designing the new format took quite a long time (actually,
|
||||
too long a time would be a more appropriate expression). It was mostly
|
||||
because Lasse was quite slow at getting things done due to personal
|
||||
reasons.
|
||||
|
||||
Originally the new format was supposed to use the same .lzma suffix
|
||||
that was already used by the old file format. Switching to the new
|
||||
format wouldn't have caused much trouble when the old format wasn't
|
||||
used by many people. But since the development of the new format took
|
||||
such a long time, the old format got quite popular, and it was decided
|
||||
that the new file format must use a different suffix.
|
||||
|
||||
It was decided to use .xz as the suffix of the new file format. The
|
||||
first stable .xz file format specification was finally released in
|
||||
December 2008. In addition to fixing the most obvious problems of
|
||||
the old .lzma format, the .xz format added some new features like
|
||||
support for multiple filters (compression algorithms), filter chaining
|
||||
(like piping on the command line), and limited random-access reading.
|
||||
|
||||
Currently the primary compression algorithm used in .xz is LZMA2.
|
||||
It is an extension on top of the original LZMA to fix some practical
|
||||
problems: LZMA2 adds support for flushing the encoder, uncompressed
|
||||
chunks, eases stateful decoder implementations, and improves support
|
||||
for multithreading. Since LZMA2 is better than the original LZMA, the
|
||||
original LZMA is not supported in .xz.
|
||||
|
||||
|
||||
Transition to XZ Utils
|
||||
|
||||
The early versions of XZ Utils were called LZMA Utils. The first
|
||||
releases were 4.42.0alphas. They dropped the rest of the C++ LZMA SDK.
|
||||
The code was still directly based on LZMA SDK but ported to C and
|
||||
converted from a callback API to a stateful API. Later, Igor Pavlov
|
||||
made a C version of the LZMA encoder too; these ports from C++ to C
|
||||
were independent in LZMA SDK and LZMA Utils.
|
||||
|
||||
The core of the new LZMA Utils was liblzma, a compression library with
|
||||
a zlib-like API. liblzma supported both the old and new file format.
|
||||
The gzip-like lzma command-line tool was rewritten to use liblzma.
|
||||
|
||||
The new LZMA Utils code base was renamed to XZ Utils when the name
|
||||
of the new file format had been decided. The liblzma compression
|
||||
library retained its name though, because changing it would have
|
||||
caused unnecessary breakage in applications already using the early
|
||||
liblzma snapshots.
|
||||
|
||||
The xz command-line tool can emulate the gzip-like lzma tool by
|
||||
creating appropriate symlinks (e.g. lzma -> xz). Thus, practically
|
||||
all scripts using the lzma tool from LZMA Utils will work as is with
|
||||
XZ Utils (and will keep using the old .lzma format). Still, the .lzma
|
||||
format is more or less deprecated. XZ Utils will keep supporting it,
|
||||
but new applications should use the .xz format, and migrating old
|
||||
applications to .xz is often a good idea too.
|
||||
|
||||
166
doc/lzma-file-format.txt
Normal file
166
doc/lzma-file-format.txt
Normal file
@@ -0,0 +1,166 @@
|
||||
|
||||
The .lzma File Format
|
||||
=====================
|
||||
|
||||
0. Preface
|
||||
0.1. Notices and Acknowledgements
|
||||
0.2. Changes
|
||||
1. File Format
|
||||
1.1. Header
|
||||
1.1.1. Properties
|
||||
1.1.2. Dictionary Size
|
||||
1.1.3. Uncompressed Size
|
||||
1.2. LZMA Compressed Data
|
||||
2. References
|
||||
|
||||
|
||||
0. Preface
|
||||
|
||||
This document describes the .lzma file format, which is
|
||||
sometimes also called LZMA_Alone format. It is a legacy file
|
||||
format, which is being or has been replaced by the .xz format.
|
||||
The MIME type of the .lzma format is `application/x-lzma'.
|
||||
|
||||
The most commonly used software to handle .lzma files are
|
||||
LZMA SDK, LZMA Utils, 7-Zip, and XZ Utils. This document
|
||||
describes some of the differences between these implementations
|
||||
and gives hints what subset of the .lzma format is the most
|
||||
portable.
|
||||
|
||||
|
||||
0.1. Notices and Acknowledgements
|
||||
|
||||
This file format was designed by Igor Pavlov for use in
|
||||
LZMA SDK. This document was written by Lasse Collin
|
||||
<lasse.collin@tukaani.org> using the documentation found
|
||||
from the LZMA SDK.
|
||||
|
||||
This document has been put into the public domain.
|
||||
|
||||
|
||||
0.2. Changes
|
||||
|
||||
Last modified: 2011-04-12 11:55+0300
|
||||
|
||||
|
||||
1. File Format
|
||||
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+==========================+
|
||||
| Header | LZMA Compressed Data |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+==========================+
|
||||
|
||||
The .lzma format file consist of 13-byte Header followed by
|
||||
the LZMA Compressed Data.
|
||||
|
||||
Unlike the .gz, .bz2, and .xz formats, it is not possible to
|
||||
concatenate multiple .lzma files as is and expect the
|
||||
decompression tool to decode the resulting file as if it were
|
||||
a single .lzma file.
|
||||
|
||||
For example, the command line tools from LZMA Utils and
|
||||
LZMA SDK silently ignore all the data after the first .lzma
|
||||
stream. In contrast, the command line tool from XZ Utils
|
||||
considers the .lzma file to be corrupt if there is data after
|
||||
the first .lzma stream.
|
||||
|
||||
|
||||
1.1. Header
|
||||
|
||||
+------------+----+----+----+----+--+--+--+--+--+--+--+--+
|
||||
| Properties | Dictionary Size | Uncompressed Size |
|
||||
+------------+----+----+----+----+--+--+--+--+--+--+--+--+
|
||||
|
||||
|
||||
1.1.1. Properties
|
||||
|
||||
The Properties field contains three properties. An abbreviation
|
||||
is given in parentheses, followed by the value range of the
|
||||
property. The field consists of
|
||||
|
||||
1) the number of literal context bits (lc, [0, 8]);
|
||||
2) the number of literal position bits (lp, [0, 4]); and
|
||||
3) the number of position bits (pb, [0, 4]).
|
||||
|
||||
The properties are encoded using the following formula:
|
||||
|
||||
Properties = (pb * 5 + lp) * 9 + lc
|
||||
|
||||
The following C code illustrates a straightforward way to
|
||||
decode the Properties field:
|
||||
|
||||
uint8_t lc, lp, pb;
|
||||
uint8_t prop = get_lzma_properties();
|
||||
if (prop > (4 * 5 + 4) * 9 + 8)
|
||||
return LZMA_PROPERTIES_ERROR;
|
||||
|
||||
pb = prop / (9 * 5);
|
||||
prop -= pb * 9 * 5;
|
||||
lp = prop / 9;
|
||||
lc = prop - lp * 9;
|
||||
|
||||
XZ Utils has an additional requirement: lc + lp <= 4. Files
|
||||
which don't follow this requirement cannot be decompressed
|
||||
with XZ Utils. Usually this isn't a problem since the most
|
||||
common lc/lp/pb values are 3/0/2. It is the only lc/lp/pb
|
||||
combination that the files created by LZMA Utils can have,
|
||||
but LZMA Utils can decompress files with any lc/lp/pb.
|
||||
|
||||
|
||||
1.1.2. Dictionary Size
|
||||
|
||||
Dictionary Size is stored as an unsigned 32-bit little endian
|
||||
integer. Any 32-bit value is possible, but for maximum
|
||||
portability, only sizes of 2^n and 2^n + 2^(n-1) should be
|
||||
used.
|
||||
|
||||
LZMA Utils creates only files with dictionary size 2^n,
|
||||
16 <= n <= 25. LZMA Utils can decompress files with any
|
||||
dictionary size.
|
||||
|
||||
XZ Utils creates and decompresses .lzma files only with
|
||||
dictionary sizes 2^n and 2^n + 2^(n-1). If some other
|
||||
dictionary size is specified when compressing, the value
|
||||
stored in the Dictionary Size field is a rounded up, but the
|
||||
specified value is still used in the actual compression code.
|
||||
|
||||
|
||||
1.1.3. Uncompressed Size
|
||||
|
||||
Uncompressed Size is stored as unsigned 64-bit little endian
|
||||
integer. A special value of 0xFFFF_FFFF_FFFF_FFFF indicates
|
||||
that Uncompressed Size is unknown. End of Payload Marker (*)
|
||||
is used if and only if Uncompressed Size is unknown.
|
||||
|
||||
XZ Utils rejects files whose Uncompressed Size field specifies
|
||||
a known size that is 256 GiB or more. This is to reject false
|
||||
positives when trying to guess if the input file is in the
|
||||
.lzma format. When Uncompressed Size is unknown, there is no
|
||||
limit for the uncompressed size of the file.
|
||||
|
||||
(*) Some tools use the term End of Stream (EOS) marker
|
||||
instead of End of Payload Marker.
|
||||
|
||||
|
||||
1.2. LZMA Compressed Data
|
||||
|
||||
Detailed description of the format of this field is out of
|
||||
scope of this document.
|
||||
|
||||
|
||||
2. References
|
||||
|
||||
LZMA SDK - The original LZMA implementation
|
||||
http://7-zip.org/sdk.html
|
||||
|
||||
7-Zip
|
||||
http://7-zip.org/
|
||||
|
||||
LZMA Utils - LZMA adapted to POSIX-like systems
|
||||
http://tukaani.org/lzma/
|
||||
|
||||
XZ Utils - The next generation of LZMA Utils
|
||||
http://tukaani.org/xz/
|
||||
|
||||
The .xz file format - The successor of the .lzma format
|
||||
http://tukaani.org/xz/xz-file-format.txt
|
||||
|
||||
BIN
doc/man/pdf-a4/lzmainfo-a4.pdf
Normal file
BIN
doc/man/pdf-a4/lzmainfo-a4.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-a4/xz-a4.pdf
Normal file
BIN
doc/man/pdf-a4/xz-a4.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-a4/xzdec-a4.pdf
Normal file
BIN
doc/man/pdf-a4/xzdec-a4.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-a4/xzdiff-a4.pdf
Normal file
BIN
doc/man/pdf-a4/xzdiff-a4.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-a4/xzgrep-a4.pdf
Normal file
BIN
doc/man/pdf-a4/xzgrep-a4.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-a4/xzless-a4.pdf
Normal file
BIN
doc/man/pdf-a4/xzless-a4.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-a4/xzmore-a4.pdf
Normal file
BIN
doc/man/pdf-a4/xzmore-a4.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-letter/lzmainfo-letter.pdf
Normal file
BIN
doc/man/pdf-letter/lzmainfo-letter.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-letter/xz-letter.pdf
Normal file
BIN
doc/man/pdf-letter/xz-letter.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-letter/xzdec-letter.pdf
Normal file
BIN
doc/man/pdf-letter/xzdec-letter.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-letter/xzdiff-letter.pdf
Normal file
BIN
doc/man/pdf-letter/xzdiff-letter.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-letter/xzgrep-letter.pdf
Normal file
BIN
doc/man/pdf-letter/xzgrep-letter.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-letter/xzless-letter.pdf
Normal file
BIN
doc/man/pdf-letter/xzless-letter.pdf
Normal file
Binary file not shown.
BIN
doc/man/pdf-letter/xzmore-letter.pdf
Normal file
BIN
doc/man/pdf-letter/xzmore-letter.pdf
Normal file
Binary file not shown.
40
doc/man/txt/lzmainfo.txt
Normal file
40
doc/man/txt/lzmainfo.txt
Normal file
@@ -0,0 +1,40 @@
|
||||
LZMAINFO(1) XZ Utils LZMAINFO(1)
|
||||
|
||||
|
||||
|
||||
NAME
|
||||
lzmainfo - show information stored in the .lzma file header
|
||||
|
||||
SYNOPSIS
|
||||
lzmainfo [--help] [--version] [file...]
|
||||
|
||||
DESCRIPTION
|
||||
lzmainfo shows information stored in the .lzma file header. It reads
|
||||
the first 13 bytes from the specified file, decodes the header, and
|
||||
prints it to standard output in human readable format. If no files are
|
||||
given or file is -, standard input is read.
|
||||
|
||||
Usually the most interesting information is the uncompressed size and
|
||||
the dictionary size. Uncompressed size can be shown only if the file
|
||||
is in the non-streamed .lzma format variant. The amount of memory
|
||||
required to decompress the file is a few dozen kilobytes plus the dic-
|
||||
tionary size.
|
||||
|
||||
lzmainfo is included in XZ Utils primarily for backward compatibility
|
||||
with LZMA Utils.
|
||||
|
||||
EXIT STATUS
|
||||
0 All is good.
|
||||
|
||||
1 An error occurred.
|
||||
|
||||
BUGS
|
||||
lzmainfo uses MB while the correct suffix would be MiB (2^20 bytes).
|
||||
This is to keep the output compatible with LZMA Utils.
|
||||
|
||||
SEE ALSO
|
||||
xz(1)
|
||||
|
||||
|
||||
|
||||
Tukaani 2013-06-30 LZMAINFO(1)
|
||||
1483
doc/man/txt/xz.txt
Normal file
1483
doc/man/txt/xz.txt
Normal file
File diff suppressed because it is too large
Load Diff
80
doc/man/txt/xzdec.txt
Normal file
80
doc/man/txt/xzdec.txt
Normal file
@@ -0,0 +1,80 @@
|
||||
XZDEC(1) XZ Utils XZDEC(1)
|
||||
|
||||
|
||||
|
||||
NAME
|
||||
xzdec, lzmadec - Small .xz and .lzma decompressors
|
||||
|
||||
SYNOPSIS
|
||||
xzdec [option...] [file...]
|
||||
lzmadec [option...] [file...]
|
||||
|
||||
DESCRIPTION
|
||||
xzdec is a liblzma-based decompression-only tool for .xz (and only .xz)
|
||||
files. xzdec is intended to work as a drop-in replacement for xz(1) in
|
||||
the most common situations where a script has been written to use xz
|
||||
--decompress --stdout (and possibly a few other commonly used options)
|
||||
to decompress .xz files. lzmadec is identical to xzdec except that
|
||||
lzmadec supports .lzma files instead of .xz files.
|
||||
|
||||
To reduce the size of the executable, xzdec doesn't support multi-
|
||||
threading or localization, and doesn't read options from XZ_DEFAULTS
|
||||
and XZ_OPT environment variables. xzdec doesn't support displaying
|
||||
intermediate progress information: sending SIGINFO to xzdec does noth-
|
||||
ing, but sending SIGUSR1 terminates the process instead of displaying
|
||||
progress information.
|
||||
|
||||
OPTIONS
|
||||
-d, --decompress, --uncompress
|
||||
Ignored for xz(1) compatibility. xzdec supports only decompres-
|
||||
sion.
|
||||
|
||||
-k, --keep
|
||||
Ignored for xz(1) compatibility. xzdec never creates or removes
|
||||
any files.
|
||||
|
||||
-c, --stdout, --to-stdout
|
||||
Ignored for xz(1) compatibility. xzdec always writes the decom-
|
||||
pressed data to standard output.
|
||||
|
||||
-q, --quiet
|
||||
Specifying this once does nothing since xzdec never displays any
|
||||
warnings or notices. Specify this twice to suppress errors.
|
||||
|
||||
-Q, --no-warn
|
||||
Ignored for xz(1) compatibility. xzdec never uses the exit sta-
|
||||
tus 2.
|
||||
|
||||
-h, --help
|
||||
Display a help message and exit successfully.
|
||||
|
||||
-V, --version
|
||||
Display the version number of xzdec and liblzma.
|
||||
|
||||
EXIT STATUS
|
||||
0 All was good.
|
||||
|
||||
1 An error occurred.
|
||||
|
||||
xzdec doesn't have any warning messages like xz(1) has, thus the exit
|
||||
status 2 is not used by xzdec.
|
||||
|
||||
NOTES
|
||||
Use xz(1) instead of xzdec or lzmadec for normal everyday use. xzdec
|
||||
or lzmadec are meant only for situations where it is important to have
|
||||
a smaller decompressor than the full-featured xz(1).
|
||||
|
||||
xzdec and lzmadec are not really that small. The size can be reduced
|
||||
further by dropping features from liblzma at compile time, but that
|
||||
shouldn't usually be done for executables distributed in typical non-
|
||||
embedded operating system distributions. If you need a truly small .xz
|
||||
decompressor, consider using XZ Embedded.
|
||||
|
||||
SEE ALSO
|
||||
xz(1)
|
||||
|
||||
XZ Embedded: <http://tukaani.org/xz/embedded.html>
|
||||
|
||||
|
||||
|
||||
Tukaani 2013-06-30 XZDEC(1)
|
||||
36
doc/man/txt/xzdiff.txt
Normal file
36
doc/man/txt/xzdiff.txt
Normal file
@@ -0,0 +1,36 @@
|
||||
XZDIFF(1) XZ Utils XZDIFF(1)
|
||||
|
||||
|
||||
|
||||
NAME
|
||||
xzcmp, xzdiff, lzcmp, lzdiff - compare compressed files
|
||||
|
||||
SYNOPSIS
|
||||
xzcmp [cmp_options] file1 [file2]
|
||||
xzdiff [diff_options] file1 [file2]
|
||||
lzcmp [cmp_options] file1 [file2]
|
||||
lzdiff [diff_options] file1 [file2]
|
||||
|
||||
DESCRIPTION
|
||||
xzcmp and xzdiff invoke cmp(1) or diff(1) on files compressed with
|
||||
xz(1), lzma(1), gzip(1), bzip2(1), or lzop(1). All options specified
|
||||
are passed directly to cmp(1) or diff(1). If only one file is speci-
|
||||
fied, then the files compared are file1 (which must have a suffix of a
|
||||
supported compression format) and file1 from which the compression for-
|
||||
mat suffix has been stripped. If two files are specified, then they
|
||||
are uncompressed if necessary and fed to cmp(1) or diff(1). The exit
|
||||
status from cmp(1) or diff(1) is preserved.
|
||||
|
||||
The names lzcmp and lzdiff are provided for backward compatibility with
|
||||
LZMA Utils.
|
||||
|
||||
SEE ALSO
|
||||
cmp(1), diff(1), xz(1), gzip(1), bzip2(1), lzop(1), zdiff(1)
|
||||
|
||||
BUGS
|
||||
Messages from the cmp(1) or diff(1) programs refer to temporary file-
|
||||
names instead of those specified.
|
||||
|
||||
|
||||
|
||||
Tukaani 2011-03-19 XZDIFF(1)
|
||||
39
doc/man/txt/xzgrep.txt
Normal file
39
doc/man/txt/xzgrep.txt
Normal file
@@ -0,0 +1,39 @@
|
||||
XZGREP(1) XZ Utils XZGREP(1)
|
||||
|
||||
|
||||
|
||||
NAME
|
||||
xzgrep - search compressed files for a regular expression
|
||||
|
||||
SYNOPSIS
|
||||
xzgrep [grep_options] [-e] pattern file...
|
||||
xzegrep ...
|
||||
xzfgrep ...
|
||||
lzgrep ...
|
||||
lzegrep ...
|
||||
lzfgrep ...
|
||||
|
||||
DESCRIPTION
|
||||
xzgrep invokes grep(1) on files which may be either uncompressed or
|
||||
compressed with xz(1), lzma(1), gzip(1), bzip2(1), or lzop(1). All
|
||||
options specified are passed directly to grep(1).
|
||||
|
||||
If no file is specified, then standard input is decompressed if neces-
|
||||
sary and fed to grep(1). When reading from standard input, gzip(1),
|
||||
bzip2(1), and lzop(1) compressed files are not supported.
|
||||
|
||||
If xzgrep is invoked as xzegrep or xzfgrep then egrep(1) or fgrep(1) is
|
||||
used instead of grep(1). The same applies to names lzgrep, lzegrep,
|
||||
and lzfgrep, which are provided for backward compatibility with LZMA
|
||||
Utils.
|
||||
|
||||
ENVIRONMENT
|
||||
GREP If the GREP environment variable is set, xzgrep uses it instead
|
||||
of grep(1), egrep(1), or fgrep(1).
|
||||
|
||||
SEE ALSO
|
||||
grep(1), xz(1), gzip(1), bzip2(1), lzop(1), zgrep(1)
|
||||
|
||||
|
||||
|
||||
Tukaani 2011-03-19 XZGREP(1)
|
||||
39
doc/man/txt/xzless.txt
Normal file
39
doc/man/txt/xzless.txt
Normal file
@@ -0,0 +1,39 @@
|
||||
XZLESS(1) XZ Utils XZLESS(1)
|
||||
|
||||
|
||||
|
||||
NAME
|
||||
xzless, lzless - view xz or lzma compressed (text) files
|
||||
|
||||
SYNOPSIS
|
||||
xzless [file...]
|
||||
lzless [file...]
|
||||
|
||||
DESCRIPTION
|
||||
xzless is a filter that displays text from compressed files to a termi-
|
||||
nal. It works on files compressed with xz(1) or lzma(1). If no files
|
||||
are given, xzless reads from standard input.
|
||||
|
||||
xzless uses less(1) to present its output. Unlike xzmore, its choice
|
||||
of pager cannot be altered by setting an environment variable. Com-
|
||||
mands are based on both more(1) and vi(1) and allow back and forth
|
||||
movement and searching. See the less(1) manual for more information.
|
||||
|
||||
The command named lzless is provided for backward compatibility with
|
||||
LZMA Utils.
|
||||
|
||||
ENVIRONMENT
|
||||
LESSMETACHARS
|
||||
A list of characters special to the shell. Set by xzless unless
|
||||
it is already set in the environment.
|
||||
|
||||
LESSOPEN
|
||||
Set to a command line to invoke the xz(1) decompressor for pre-
|
||||
processing the input files to less(1).
|
||||
|
||||
SEE ALSO
|
||||
less(1), xz(1), xzmore(1), zless(1)
|
||||
|
||||
|
||||
|
||||
Tukaani 2010-09-27 XZLESS(1)
|
||||
34
doc/man/txt/xzmore.txt
Normal file
34
doc/man/txt/xzmore.txt
Normal file
@@ -0,0 +1,34 @@
|
||||
XZMORE(1) XZ Utils XZMORE(1)
|
||||
|
||||
|
||||
|
||||
NAME
|
||||
xzmore, lzmore - view xz or lzma compressed (text) files
|
||||
|
||||
SYNOPSIS
|
||||
xzmore [file...]
|
||||
lzmore [file...]
|
||||
|
||||
DESCRIPTION
|
||||
xzmore is a filter which allows examination of xz(1) or lzma(1) com-
|
||||
pressed text files one screenful at a time on a soft-copy terminal.
|
||||
|
||||
To use a pager other than the default more, set environment variable
|
||||
PAGER to the name of the desired program. The name lzmore is provided
|
||||
for backward compatibility with LZMA Utils.
|
||||
|
||||
e or q When the prompt --More--(Next file: file) is printed, this com-
|
||||
mand causes xzmore to exit.
|
||||
|
||||
s When the prompt --More--(Next file: file) is printed, this com-
|
||||
mand causes xzmore to skip the next file and continue.
|
||||
|
||||
For list of keyboard commands supported while actually viewing the con-
|
||||
tent of a file, refer to manual of the pager you use, usually more(1).
|
||||
|
||||
SEE ALSO
|
||||
more(1), xz(1), xzless(1), zmore(1)
|
||||
|
||||
|
||||
|
||||
Tukaani 2013-06-30 XZMORE(1)
|
||||
1150
doc/xz-file-format.txt
Normal file
1150
doc/xz-file-format.txt
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user