Import xz 5.2.2 (as of svn r86089)

This commit is contained in:
Zachary Ware
2017-05-22 16:21:28 -05:00
parent d239d63057
commit 60cae1c5a1
452 changed files with 147392 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
liblzma example programs
========================
Introduction
The examples are written so that the same comments aren't
repeated (much) in later files.
On POSIX systems, the examples should build by just typing "make".
The examples that use stdin or stdout don't set stdin and stdout
to binary mode. On systems where it matters (e.g. Windows) it is
possible that the examples won't work without modification.
List of examples
01_compress_easy.c Multi-call compression using
a compression preset
02_decompress.c Multi-call decompression
03_compress_custom.c Like 01_compress_easy.c but using
a custom filter chain
(x86 BCJ + LZMA2)
04_compress_easy_mt.c Multi-threaded multi-call
compression using a compression
preset

View File

@@ -0,0 +1,297 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file 01_compress_easy.c
/// \brief Compress from stdin to stdout in multi-call mode
///
/// Usage: ./01_compress_easy PRESET < INFILE > OUTFILE
///
/// Example: ./01_compress_easy 6 < foo > foo.xz
//
// Author: Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <lzma.h>
static void
show_usage_and_exit(const char *argv0)
{
fprintf(stderr, "Usage: %s PRESET < INFILE > OUTFILE\n"
"PRESET is a number 0-9 and can optionally be "
"followed by `e' to indicate extreme preset\n",
argv0);
exit(EXIT_FAILURE);
}
static uint32_t
get_preset(int argc, char **argv)
{
// One argument whose first char must be 0-9.
if (argc != 2 || argv[1][0] < '0' || argv[1][0] > '9')
show_usage_and_exit(argv[0]);
// Calculate the preste level 0-9.
uint32_t preset = argv[1][0] - '0';
// If there is a second char, it must be 'e'. It will set
// the LZMA_PRESET_EXTREME flag.
if (argv[1][1] != '\0') {
if (argv[1][1] != 'e' || argv[1][2] != '\0')
show_usage_and_exit(argv[0]);
preset |= LZMA_PRESET_EXTREME;
}
return preset;
}
static bool
init_encoder(lzma_stream *strm, uint32_t preset)
{
// Initialize the encoder using a preset. Set the integrity to check
// to CRC64, which is the default in the xz command line tool. If
// the .xz file needs to be decompressed with XZ Embedded, use
// LZMA_CHECK_CRC32 instead.
lzma_ret ret = lzma_easy_encoder(strm, preset, LZMA_CHECK_CRC64);
// Return successfully if the initialization went fine.
if (ret == LZMA_OK)
return true;
// Something went wrong. The possible errors are documented in
// lzma/container.h (src/liblzma/api/lzma/container.h in the source
// package or e.g. /usr/include/lzma/container.h depending on the
// install prefix).
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_OPTIONS_ERROR:
msg = "Specified preset is not supported";
break;
case LZMA_UNSUPPORTED_CHECK:
msg = "Specified integrity check is not supported";
break;
default:
// This is most likely LZMA_PROG_ERROR indicating a bug in
// this program or in liblzma. It is inconvenient to have a
// separate error message for errors that should be impossible
// to occur, but knowing the error code is important for
// debugging. That's why it is good to print the error code
// at least when there is no good error message to show.
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
msg, ret);
return false;
}
static bool
compress(lzma_stream *strm, FILE *infile, FILE *outfile)
{
// This will be LZMA_RUN until the end of the input file is reached.
// This tells lzma_code() when there will be no more input.
lzma_action action = LZMA_RUN;
// Buffers to temporarily hold uncompressed input
// and compressed output.
uint8_t inbuf[BUFSIZ];
uint8_t outbuf[BUFSIZ];
// Initialize the input and output pointers. Initializing next_in
// and avail_in isn't really necessary when we are going to encode
// just one file since LZMA_STREAM_INIT takes care of initializing
// those already. But it doesn't hurt much and it will be needed
// if encoding more than one file like we will in 02_decompress.c.
//
// While we don't care about strm->total_in or strm->total_out in this
// example, it is worth noting that initializing the encoder will
// always reset total_in and total_out to zero. But the encoder
// initialization doesn't touch next_in, avail_in, next_out, or
// avail_out.
strm->next_in = NULL;
strm->avail_in = 0;
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
// Loop until the file has been successfully compressed or until
// an error occurs.
while (true) {
// Fill the input buffer if it is empty.
if (strm->avail_in == 0 && !feof(infile)) {
strm->next_in = inbuf;
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
infile);
if (ferror(infile)) {
fprintf(stderr, "Read error: %s\n",
strerror(errno));
return false;
}
// Once the end of the input file has been reached,
// we need to tell lzma_code() that no more input
// will be coming and that it should finish the
// encoding.
if (feof(infile))
action = LZMA_FINISH;
}
// Tell liblzma do the actual encoding.
//
// This reads up to strm->avail_in bytes of input starting
// from strm->next_in. avail_in will be decremented and
// next_in incremented by an equal amount to match the
// number of input bytes consumed.
//
// Up to strm->avail_out bytes of compressed output will be
// written starting from strm->next_out. avail_out and next_out
// will be incremented by an equal amount to match the number
// of output bytes written.
//
// The encoder has to do internal buffering, which means that
// it may take quite a bit of input before the same data is
// available in compressed form in the output buffer.
lzma_ret ret = lzma_code(strm, action);
// If the output buffer is full or if the compression finished
// successfully, write the data from the output bufffer to
// the output file.
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
// When lzma_code() has returned LZMA_STREAM_END,
// the output buffer is likely to be only partially
// full. Calculate how much new data there is to
// be written to the output file.
size_t write_size = sizeof(outbuf) - strm->avail_out;
if (fwrite(outbuf, 1, write_size, outfile)
!= write_size) {
fprintf(stderr, "Write error: %s\n",
strerror(errno));
return false;
}
// Reset next_out and avail_out.
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
}
// Normally the return value of lzma_code() will be LZMA_OK
// until everything has been encoded.
if (ret != LZMA_OK) {
// Once everything has been encoded successfully, the
// return value of lzma_code() will be LZMA_STREAM_END.
//
// It is important to check for LZMA_STREAM_END. Do not
// assume that getting ret != LZMA_OK would mean that
// everything has gone well.
if (ret == LZMA_STREAM_END)
return true;
// It's not LZMA_OK nor LZMA_STREAM_END,
// so it must be an error code. See lzma/base.h
// (src/liblzma/api/lzma/base.h in the source package
// or e.g. /usr/include/lzma/base.h depending on the
// install prefix) for the list and documentation of
// possible values. Most values listen in lzma_ret
// enumeration aren't possible in this example.
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_DATA_ERROR:
// This error is returned if the compressed
// or uncompressed size get near 8 EiB
// (2^63 bytes) because that's where the .xz
// file format size limits currently are.
// That is, the possibility of this error
// is mostly theoretical unless you are doing
// something very unusual.
//
// Note that strm->total_in and strm->total_out
// have nothing to do with this error. Changing
// those variables won't increase or decrease
// the chance of getting this error.
msg = "File size limits exceeded";
break;
default:
// This is most likely LZMA_PROG_ERROR, but
// if this program is buggy (or liblzma has
// a bug), it may be e.g. LZMA_BUF_ERROR or
// LZMA_OPTIONS_ERROR too.
//
// It is inconvenient to have a separate
// error message for errors that should be
// impossible to occur, but knowing the error
// code is important for debugging. That's why
// it is good to print the error code at least
// when there is no good error message to show.
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Encoder error: %s (error code %u)\n",
msg, ret);
return false;
}
}
}
extern int
main(int argc, char **argv)
{
// Get the preset number from the command line.
uint32_t preset = get_preset(argc, argv);
// Initialize a lzma_stream structure. When it is allocated on stack,
// it is simplest to use LZMA_STREAM_INIT macro like below. When it
// is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr))
// works (as long as NULL pointers are represented with zero bits
// as they are on practically all computers today).
lzma_stream strm = LZMA_STREAM_INIT;
// Initialize the encoder. If it succeeds, compress from
// stdin to stdout.
bool success = init_encoder(&strm, preset);
if (success)
success = compress(&strm, stdin, stdout);
// Free the memory allocated for the encoder. If we were encoding
// multiple files, this would only need to be done after the last
// file. See 02_decompress.c for handling of multiple files.
//
// It is OK to call lzma_end() multiple times or when it hasn't been
// actually used except initialized with LZMA_STREAM_INIT.
lzma_end(&strm);
// Close stdout to catch possible write errors that can occur
// when pending data is flushed from the stdio buffers.
if (fclose(stdout)) {
fprintf(stderr, "Write error: %s\n", strerror(errno));
success = false;
}
return success ? EXIT_SUCCESS : EXIT_FAILURE;
}

View File

@@ -0,0 +1,287 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file 02_decompress.c
/// \brief Decompress .xz files to stdout
///
/// Usage: ./02_decompress INPUT_FILES... > OUTFILE
///
/// Example: ./02_decompress foo.xz bar.xz > foobar
//
// Author: Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <lzma.h>
static bool
init_decoder(lzma_stream *strm)
{
// Initialize a .xz decoder. The decoder supports a memory usage limit
// and a set of flags.
//
// The memory usage of the decompressor depends on the settings used
// to compress a .xz file. It can vary from less than a megabyte to
// a few gigabytes, but in practice (at least for now) it rarely
// exceeds 65 MiB because that's how much memory is required to
// decompress files created with "xz -9". Settings requiring more
// memory take extra effort to use and don't (at least for now)
// provide significantly better compression in most cases.
//
// Memory usage limit is useful if it is important that the
// decompressor won't consume gigabytes of memory. The need
// for limiting depends on the application. In this example,
// no memory usage limiting is used. This is done by setting
// the limit to UINT64_MAX.
//
// The .xz format allows concatenating compressed files as is:
//
// echo foo | xz > foobar.xz
// echo bar | xz >> foobar.xz
//
// When decompressing normal standalone .xz files, LZMA_CONCATENATED
// should always be used to support decompression of concatenated
// .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop
// after the first .xz stream. This can be useful when .xz data has
// been embedded inside another file format.
//
// Flags other than LZMA_CONCATENATED are supported too, and can
// be combined with bitwise-or. See lzma/container.h
// (src/liblzma/api/lzma/container.h in the source package or e.g.
// /usr/include/lzma/container.h depending on the install prefix)
// for details.
lzma_ret ret = lzma_stream_decoder(
strm, UINT64_MAX, LZMA_CONCATENATED);
// Return successfully if the initialization went fine.
if (ret == LZMA_OK)
return true;
// Something went wrong. The possible errors are documented in
// lzma/container.h (src/liblzma/api/lzma/container.h in the source
// package or e.g. /usr/include/lzma/container.h depending on the
// install prefix).
//
// Note that LZMA_MEMLIMIT_ERROR is never possible here. If you
// specify a very tiny limit, the error will be delayed until
// the first headers have been parsed by a call to lzma_code().
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_OPTIONS_ERROR:
msg = "Unsupported decompressor flags";
break;
default:
// This is most likely LZMA_PROG_ERROR indicating a bug in
// this program or in liblzma. It is inconvenient to have a
// separate error message for errors that should be impossible
// to occur, but knowing the error code is important for
// debugging. That's why it is good to print the error code
// at least when there is no good error message to show.
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Error initializing the decoder: %s (error code %u)\n",
msg, ret);
return false;
}
static bool
decompress(lzma_stream *strm, const char *inname, FILE *infile, FILE *outfile)
{
// When LZMA_CONCATENATED flag was used when initializing the decoder,
// we need to tell lzma_code() when there will be no more input.
// This is done by setting action to LZMA_FINISH instead of LZMA_RUN
// in the same way as it is done when encoding.
//
// When LZMA_CONCATENATED isn't used, there is no need to use
// LZMA_FINISH to tell when all the input has been read, but it
// is still OK to use it if you want. When LZMA_CONCATENATED isn't
// used, the decoder will stop after the first .xz stream. In that
// case some unused data may be left in strm->next_in.
lzma_action action = LZMA_RUN;
uint8_t inbuf[BUFSIZ];
uint8_t outbuf[BUFSIZ];
strm->next_in = NULL;
strm->avail_in = 0;
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
while (true) {
if (strm->avail_in == 0 && !feof(infile)) {
strm->next_in = inbuf;
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
infile);
if (ferror(infile)) {
fprintf(stderr, "%s: Read error: %s\n",
inname, strerror(errno));
return false;
}
// Once the end of the input file has been reached,
// we need to tell lzma_code() that no more input
// will be coming. As said before, this isn't required
// if the LZMA_CONATENATED flag isn't used when
// initializing the decoder.
if (feof(infile))
action = LZMA_FINISH;
}
lzma_ret ret = lzma_code(strm, action);
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
size_t write_size = sizeof(outbuf) - strm->avail_out;
if (fwrite(outbuf, 1, write_size, outfile)
!= write_size) {
fprintf(stderr, "Write error: %s\n",
strerror(errno));
return false;
}
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
}
if (ret != LZMA_OK) {
// Once everything has been decoded successfully, the
// return value of lzma_code() will be LZMA_STREAM_END.
//
// It is important to check for LZMA_STREAM_END. Do not
// assume that getting ret != LZMA_OK would mean that
// everything has gone well or that when you aren't
// getting more output it must have successfully
// decoded everything.
if (ret == LZMA_STREAM_END)
return true;
// It's not LZMA_OK nor LZMA_STREAM_END,
// so it must be an error code. See lzma/base.h
// (src/liblzma/api/lzma/base.h in the source package
// or e.g. /usr/include/lzma/base.h depending on the
// install prefix) for the list and documentation of
// possible values. Many values listen in lzma_ret
// enumeration aren't possible in this example, but
// can be made possible by enabling memory usage limit
// or adding flags to the decoder initialization.
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_FORMAT_ERROR:
// .xz magic bytes weren't found.
msg = "The input is not in the .xz format";
break;
case LZMA_OPTIONS_ERROR:
// For example, the headers specify a filter
// that isn't supported by this liblzma
// version (or it hasn't been enabled when
// building liblzma, but no-one sane does
// that unless building liblzma for an
// embedded system). Upgrading to a newer
// liblzma might help.
//
// Note that it is unlikely that the file has
// accidentally became corrupt if you get this
// error. The integrity of the .xz headers is
// always verified with a CRC32, so
// unintentionally corrupt files can be
// distinguished from unsupported files.
msg = "Unsupported compression options";
break;
case LZMA_DATA_ERROR:
msg = "Compressed file is corrupt";
break;
case LZMA_BUF_ERROR:
// Typically this error means that a valid
// file has got truncated, but it might also
// be a damaged part in the file that makes
// the decoder think the file is truncated.
// If you prefer, you can use the same error
// message for this as for LZMA_DATA_ERROR.
msg = "Compressed file is truncated or "
"otherwise corrupt";
break;
default:
// This is most likely LZMA_PROG_ERROR.
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "%s: Decoder error: "
"%s (error code %u)\n",
inname, msg, ret);
return false;
}
}
}
extern int
main(int argc, char **argv)
{
if (argc <= 1) {
fprintf(stderr, "Usage: %s FILES...\n", argv[0]);
return EXIT_FAILURE;
}
lzma_stream strm = LZMA_STREAM_INIT;
bool success = true;
// Try to decompress all files.
for (int i = 1; i < argc; ++i) {
if (!init_decoder(&strm)) {
// Decoder initialization failed. There's no point
// to retry it so we need to exit.
success = false;
break;
}
FILE *infile = fopen(argv[i], "rb");
if (infile == NULL) {
fprintf(stderr, "%s: Error opening the "
"input file: %s\n",
argv[i], strerror(errno));
success = false;
} else {
success &= decompress(&strm, argv[i], infile, stdout);
fclose(infile);
}
}
// Free the memory allocated for the decoder. This only needs to be
// done after the last file.
lzma_end(&strm);
if (fclose(stdout)) {
fprintf(stderr, "Write error: %s\n", strerror(errno));
success = false;
}
return success ? EXIT_SUCCESS : EXIT_FAILURE;
}

View File

@@ -0,0 +1,193 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file 03_compress_custom.c
/// \brief Compress in multi-call mode using x86 BCJ and LZMA2
///
/// Usage: ./03_compress_custom < INFILE > OUTFILE
///
/// Example: ./03_compress_custom < foo > foo.xz
//
// Author: Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <lzma.h>
static bool
init_encoder(lzma_stream *strm)
{
// Use the default preset (6) for LZMA2.
//
// The lzma_options_lzma structure and the lzma_lzma_preset() function
// are declared in lzma/lzma12.h (src/liblzma/api/lzma/lzma12.h in the
// source package or e.g. /usr/include/lzma/lzma12.h depending on
// the install prefix).
lzma_options_lzma opt_lzma2;
if (lzma_lzma_preset(&opt_lzma2, LZMA_PRESET_DEFAULT)) {
// It should never fail because the default preset
// (and presets 0-9 optionally with LZMA_PRESET_EXTREME)
// are supported by all stable liblzma versions.
//
// (The encoder initialization later in this function may
// still fail due to unsupported preset *if* the features
// required by the preset have been disabled at build time,
// but no-one does such things except on embedded systems.)
fprintf(stderr, "Unsupported preset, possibly a bug\n");
return false;
}
// Now we could customize the LZMA2 options if we wanted. For example,
// we could set the the dictionary size (opt_lzma2.dict_size) to
// something else than the default (8 MiB) of the default preset.
// See lzma/lzma12.h for details of all LZMA2 options.
//
// The x86 BCJ filter will try to modify the x86 instruction stream so
// that LZMA2 can compress it better. The x86 BCJ filter doesn't need
// any options so it will be set to NULL below.
//
// Construct the filter chain. The uncompressed data goes first to
// the first filter in the array, in this case the x86 BCJ filter.
// The array is always terminated by setting .id = LZMA_VLI_UNKNOWN.
//
// See lzma/filter.h for more information about the lzma_filter
// structure.
lzma_filter filters[] = {
{ .id = LZMA_FILTER_X86, .options = NULL },
{ .id = LZMA_FILTER_LZMA2, .options = &opt_lzma2 },
{ .id = LZMA_VLI_UNKNOWN, .options = NULL },
};
// Initialize the encoder using the custom filter chain.
lzma_ret ret = lzma_stream_encoder(strm, filters, LZMA_CHECK_CRC64);
if (ret == LZMA_OK)
return true;
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_OPTIONS_ERROR:
// We are no longer using a plain preset so this error
// message has been edited accordingly compared to
// 01_compress_easy.c.
msg = "Specified filter chain is not supported";
break;
case LZMA_UNSUPPORTED_CHECK:
msg = "Specified integrity check is not supported";
break;
default:
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
msg, ret);
return false;
}
// This function is identical to the one in 01_compress_easy.c.
static bool
compress(lzma_stream *strm, FILE *infile, FILE *outfile)
{
lzma_action action = LZMA_RUN;
uint8_t inbuf[BUFSIZ];
uint8_t outbuf[BUFSIZ];
strm->next_in = NULL;
strm->avail_in = 0;
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
while (true) {
if (strm->avail_in == 0 && !feof(infile)) {
strm->next_in = inbuf;
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
infile);
if (ferror(infile)) {
fprintf(stderr, "Read error: %s\n",
strerror(errno));
return false;
}
if (feof(infile))
action = LZMA_FINISH;
}
lzma_ret ret = lzma_code(strm, action);
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
size_t write_size = sizeof(outbuf) - strm->avail_out;
if (fwrite(outbuf, 1, write_size, outfile)
!= write_size) {
fprintf(stderr, "Write error: %s\n",
strerror(errno));
return false;
}
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
}
if (ret != LZMA_OK) {
if (ret == LZMA_STREAM_END)
return true;
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_DATA_ERROR:
msg = "File size limits exceeded";
break;
default:
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Encoder error: %s (error code %u)\n",
msg, ret);
return false;
}
}
}
extern int
main(void)
{
lzma_stream strm = LZMA_STREAM_INIT;
bool success = init_encoder(&strm);
if (success)
success = compress(&strm, stdin, stdout);
lzma_end(&strm);
if (fclose(stdout)) {
fprintf(stderr, "Write error: %s\n", strerror(errno));
success = false;
}
return success ? EXIT_SUCCESS : EXIT_FAILURE;
}

View File

@@ -0,0 +1,206 @@
///////////////////////////////////////////////////////////////////////////////
//
/// \file 04_compress_easy_mt.c
/// \brief Compress in multi-call mode using LZMA2 in multi-threaded mode
///
/// Usage: ./04_compress_easy_mt < INFILE > OUTFILE
///
/// Example: ./04_compress_easy_mt < foo > foo.xz
//
// Author: Lasse Collin
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
//
///////////////////////////////////////////////////////////////////////////////
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <lzma.h>
static bool
init_encoder(lzma_stream *strm)
{
// The threaded encoder takes the options as pointer to
// a lzma_mt structure.
lzma_mt mt = {
// No flags are needed.
.flags = 0,
// Let liblzma determine a sane block size.
.block_size = 0,
// Use no timeout for lzma_code() calls by setting timeout
// to zero. That is, sometimes lzma_code() might block for
// a long time (from several seconds to even minutes).
// If this is not OK, for example due to progress indicator
// needing updates, specify a timeout in milliseconds here.
// See the documentation of lzma_mt in lzma/container.h for
// information how to choose a reasonable timeout.
.timeout = 0,
// Use the default preset (6) for LZMA2.
// To use a preset, filters must be set to NULL.
.preset = LZMA_PRESET_DEFAULT,
.filters = NULL,
// Use CRC64 for integrity checking. See also
// 01_compress_easy.c about choosing the integrity check.
.check = LZMA_CHECK_CRC64,
};
// Detect how many threads the CPU supports.
mt.threads = lzma_cputhreads();
// If the number of CPU cores/threads cannot be detected,
// use one thread. Note that this isn't the same as the normal
// single-threaded mode as this will still split the data into
// blocks and use more RAM than the normal single-threaded mode.
// You may want to consider using lzma_easy_encoder() or
// lzma_stream_encoder() instead of lzma_stream_encoder_mt() if
// lzma_cputhreads() returns 0 or 1.
if (mt.threads == 0)
mt.threads = 1;
// If the number of CPU cores/threads exceeds threads_max,
// limit the number of threads to keep memory usage lower.
// The number 8 is arbitrarily chosen and may be too low or
// high depending on the compression preset and the computer
// being used.
//
// FIXME: A better way could be to check the amount of RAM
// (or available RAM) and use lzma_stream_encoder_mt_memusage()
// to determine if the number of threads should be reduced.
const uint32_t threads_max = 8;
if (mt.threads > threads_max)
mt.threads = threads_max;
// Initialize the threaded encoder.
lzma_ret ret = lzma_stream_encoder_mt(strm, &mt);
if (ret == LZMA_OK)
return true;
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_OPTIONS_ERROR:
// We are no longer using a plain preset so this error
// message has been edited accordingly compared to
// 01_compress_easy.c.
msg = "Specified filter chain is not supported";
break;
case LZMA_UNSUPPORTED_CHECK:
msg = "Specified integrity check is not supported";
break;
default:
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n",
msg, ret);
return false;
}
// This function is identical to the one in 01_compress_easy.c.
static bool
compress(lzma_stream *strm, FILE *infile, FILE *outfile)
{
lzma_action action = LZMA_RUN;
uint8_t inbuf[BUFSIZ];
uint8_t outbuf[BUFSIZ];
strm->next_in = NULL;
strm->avail_in = 0;
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
while (true) {
if (strm->avail_in == 0 && !feof(infile)) {
strm->next_in = inbuf;
strm->avail_in = fread(inbuf, 1, sizeof(inbuf),
infile);
if (ferror(infile)) {
fprintf(stderr, "Read error: %s\n",
strerror(errno));
return false;
}
if (feof(infile))
action = LZMA_FINISH;
}
lzma_ret ret = lzma_code(strm, action);
if (strm->avail_out == 0 || ret == LZMA_STREAM_END) {
size_t write_size = sizeof(outbuf) - strm->avail_out;
if (fwrite(outbuf, 1, write_size, outfile)
!= write_size) {
fprintf(stderr, "Write error: %s\n",
strerror(errno));
return false;
}
strm->next_out = outbuf;
strm->avail_out = sizeof(outbuf);
}
if (ret != LZMA_OK) {
if (ret == LZMA_STREAM_END)
return true;
const char *msg;
switch (ret) {
case LZMA_MEM_ERROR:
msg = "Memory allocation failed";
break;
case LZMA_DATA_ERROR:
msg = "File size limits exceeded";
break;
default:
msg = "Unknown error, possibly a bug";
break;
}
fprintf(stderr, "Encoder error: %s (error code %u)\n",
msg, ret);
return false;
}
}
}
extern int
main(void)
{
lzma_stream strm = LZMA_STREAM_INIT;
bool success = init_encoder(&strm);
if (success)
success = compress(&strm, stdin, stdout);
lzma_end(&strm);
if (fclose(stdout)) {
fprintf(stderr, "Write error: %s\n", strerror(errno));
success = false;
}
return success ? EXIT_SUCCESS : EXIT_FAILURE;
}

24
doc/examples/Makefile Normal file
View File

@@ -0,0 +1,24 @@
#
# Author: Lasse Collin
#
# This file has been put into the public domain.
# You can do whatever you want with this file.
#
CC = c99
CFLAGS = -g
LDFLAGS = -llzma
PROGS = \
01_compress_easy \
02_decompress \
03_compress_custom \
04_compress_easy_mt
all: $(PROGS)
.c:
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
clean:
-rm -f $(PROGS)

View File

@@ -0,0 +1,127 @@
/*
* xz_pipe_comp.c
* A simple example of pipe-only xz compressor implementation.
* version: 2010-07-12 - by Daniel Mealha Cabrita
* Not copyrighted -- provided to the public domain.
*
* Compiling:
* Link with liblzma. GCC example:
* $ gcc -llzma xz_pipe_comp.c -o xz_pipe_comp
*
* Usage example:
* $ cat some_file | ./xz_pipe_comp > some_file.xz
*/
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdbool.h>
#include <lzma.h>
/* COMPRESSION SETTINGS */
/* analogous to xz CLI options: -0 to -9 */
#define COMPRESSION_LEVEL 6
/* boolean setting, analogous to xz CLI option: -e */
#define COMPRESSION_EXTREME true
/* see: /usr/include/lzma/check.h LZMA_CHECK_* */
#define INTEGRITY_CHECK LZMA_CHECK_CRC64
/* read/write buffer sizes */
#define IN_BUF_MAX 4096
#define OUT_BUF_MAX 4096
/* error codes */
#define RET_OK 0
#define RET_ERROR_INIT 1
#define RET_ERROR_INPUT 2
#define RET_ERROR_OUTPUT 3
#define RET_ERROR_COMPRESSION 4
/* note: in_file and out_file must be open already */
int xz_compress (FILE *in_file, FILE *out_file)
{
uint32_t preset = COMPRESSION_LEVEL | (COMPRESSION_EXTREME ? LZMA_PRESET_EXTREME : 0);
lzma_check check = INTEGRITY_CHECK;
lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
uint8_t in_buf [IN_BUF_MAX];
uint8_t out_buf [OUT_BUF_MAX];
size_t in_len; /* length of useful data in in_buf */
size_t out_len; /* length of useful data in out_buf */
bool in_finished = false;
bool out_finished = false;
lzma_action action;
lzma_ret ret_xz;
int ret;
ret = RET_OK;
/* initialize xz encoder */
ret_xz = lzma_easy_encoder (&strm, preset, check);
if (ret_xz != LZMA_OK) {
fprintf (stderr, "lzma_easy_encoder error: %d\n", (int) ret_xz);
return RET_ERROR_INIT;
}
while ((! in_finished) && (! out_finished)) {
/* read incoming data */
in_len = fread (in_buf, 1, IN_BUF_MAX, in_file);
if (feof (in_file)) {
in_finished = true;
}
if (ferror (in_file)) {
in_finished = true;
ret = RET_ERROR_INPUT;
}
strm.next_in = in_buf;
strm.avail_in = in_len;
/* if no more data from in_buf, flushes the
internal xz buffers and closes the xz data
with LZMA_FINISH */
action = in_finished ? LZMA_FINISH : LZMA_RUN;
/* loop until there's no pending compressed output */
do {
/* out_buf is clean at this point */
strm.next_out = out_buf;
strm.avail_out = OUT_BUF_MAX;
/* compress data */
ret_xz = lzma_code (&strm, action);
if ((ret_xz != LZMA_OK) && (ret_xz != LZMA_STREAM_END)) {
fprintf (stderr, "lzma_code error: %d\n", (int) ret_xz);
out_finished = true;
ret = RET_ERROR_COMPRESSION;
} else {
/* write compressed data */
out_len = OUT_BUF_MAX - strm.avail_out;
fwrite (out_buf, 1, out_len, out_file);
if (ferror (out_file)) {
out_finished = true;
ret = RET_ERROR_OUTPUT;
}
}
} while (strm.avail_out == 0);
}
lzma_end (&strm);
return ret;
}
int main ()
{
int ret;
ret = xz_compress (stdin, stdout);
return ret;
}

View File

@@ -0,0 +1,123 @@
/*
* xz_pipe_decomp.c
* A simple example of pipe-only xz decompressor implementation.
* version: 2012-06-14 - by Daniel Mealha Cabrita
* Not copyrighted -- provided to the public domain.
*
* Compiling:
* Link with liblzma. GCC example:
* $ gcc -llzma xz_pipe_decomp.c -o xz_pipe_decomp
*
* Usage example:
* $ cat some_file.xz | ./xz_pipe_decomp > some_file
*/
#include <stdio.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdbool.h>
#include <lzma.h>
/* read/write buffer sizes */
#define IN_BUF_MAX 4096
#define OUT_BUF_MAX 4096
/* error codes */
#define RET_OK 0
#define RET_ERROR_INIT 1
#define RET_ERROR_INPUT 2
#define RET_ERROR_OUTPUT 3
#define RET_ERROR_DECOMPRESSION 4
/* note: in_file and out_file must be open already */
int xz_decompress (FILE *in_file, FILE *out_file)
{
lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */
const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED;
const uint64_t memory_limit = UINT64_MAX; /* no memory limit */
uint8_t in_buf [IN_BUF_MAX];
uint8_t out_buf [OUT_BUF_MAX];
size_t in_len; /* length of useful data in in_buf */
size_t out_len; /* length of useful data in out_buf */
bool in_finished = false;
bool out_finished = false;
lzma_action action;
lzma_ret ret_xz;
int ret;
ret = RET_OK;
/* initialize xz decoder */
ret_xz = lzma_stream_decoder (&strm, memory_limit, flags);
if (ret_xz != LZMA_OK) {
fprintf (stderr, "lzma_stream_decoder error: %d\n", (int) ret_xz);
return RET_ERROR_INIT;
}
while ((! in_finished) && (! out_finished)) {
/* read incoming data */
in_len = fread (in_buf, 1, IN_BUF_MAX, in_file);
if (feof (in_file)) {
in_finished = true;
}
if (ferror (in_file)) {
in_finished = true;
ret = RET_ERROR_INPUT;
}
strm.next_in = in_buf;
strm.avail_in = in_len;
/* if no more data from in_buf, flushes the
internal xz buffers and closes the decompressed data
with LZMA_FINISH */
action = in_finished ? LZMA_FINISH : LZMA_RUN;
/* loop until there's no pending decompressed output */
do {
/* out_buf is clean at this point */
strm.next_out = out_buf;
strm.avail_out = OUT_BUF_MAX;
/* decompress data */
ret_xz = lzma_code (&strm, action);
if ((ret_xz != LZMA_OK) && (ret_xz != LZMA_STREAM_END)) {
fprintf (stderr, "lzma_code error: %d\n", (int) ret_xz);
out_finished = true;
ret = RET_ERROR_DECOMPRESSION;
} else {
/* write decompressed data */
out_len = OUT_BUF_MAX - strm.avail_out;
fwrite (out_buf, 1, out_len, out_file);
if (ferror (out_file)) {
out_finished = true;
ret = RET_ERROR_OUTPUT;
}
}
} while (strm.avail_out == 0);
}
/* Bug fix (2012-06-14): If no errors were detected, check
that the last lzma_code() call returned LZMA_STREAM_END.
If not, the file is probably truncated. */
if ((ret == RET_OK) && (ret_xz != LZMA_STREAM_END)) {
fprintf (stderr, "Input truncated or corrupt\n");
ret = RET_ERROR_DECOMPRESSION;
}
lzma_end (&strm);
return ret;
}
int main ()
{
int ret;
ret = xz_decompress (stdin, stdout);
return ret;
}

224
doc/faq.txt Normal file
View File

@@ -0,0 +1,224 @@
XZ Utils FAQ
============
Q: What do the letters XZ mean?
A: Nothing. They are just two letters, which come from the file format
suffix .xz. The .xz suffix was selected, because it seemed to be
pretty much unused. It has no deeper meaning.
Q: What are LZMA and LZMA2?
A: LZMA stands for Lempel-Ziv-Markov chain-Algorithm. It is the name
of the compression algorithm designed by Igor Pavlov for 7-Zip.
LZMA is based on LZ77 and range encoding.
LZMA2 is an updated version of the original LZMA to fix a couple of
practical issues. In context of XZ Utils, LZMA is called LZMA1 to
emphasize that LZMA is not the same thing as LZMA2. LZMA2 is the
primary compression algorithm in the .xz file format.
Q: There are many LZMA related projects. How does XZ Utils relate to them?
A: 7-Zip and LZMA SDK are the original projects. LZMA SDK is roughly
a subset of the 7-Zip source tree.
p7zip is 7-Zip's command-line tools ported to POSIX-like systems.
LZMA Utils provide a gzip-like lzma tool for POSIX-like systems.
LZMA Utils are based on LZMA SDK. XZ Utils are the successor to
LZMA Utils.
There are several other projects using LZMA. Most are more or less
based on LZMA SDK. See <http://7-zip.org/links.html>.
Q: Why is liblzma named liblzma if its primary file format is .xz?
Shouldn't it be e.g. libxz?
A: When the designing of the .xz format began, the idea was to replace
the .lzma format and use the same .lzma suffix. It would have been
quite OK to reuse the suffix when there were very few .lzma files
around. However, the old .lzma format became popular before the
new format was finished. The new format was renamed to .xz but the
name of liblzma wasn't changed.
Q: Do XZ Utils support the .7z format?
A: No. Use 7-Zip (Windows) or p7zip (POSIX-like systems) to handle .7z
files.
Q: I have many .tar.7z files. Can I convert them to .tar.xz without
spending hours recompressing the data?
A: In the "extra" directory, there is a script named 7z2lzma.bash which
is able to convert some .7z files to the .lzma format (not .xz). It
needs the 7za (or 7z) command from p7zip. The script may silently
produce corrupt output if certain assumptions are not met, so
decompress the resulting .lzma file and compare it against the
original before deleting the original file!
Q: I have many .lzma files. Can I quickly convert them to the .xz format?
A: For now, no. Since XZ Utils supports the .lzma format, it's usually
not too bad to keep the old files in the old format. If you want to
do the conversion anyway, you need to decompress the .lzma files and
then recompress to the .xz format.
Technically, there is a way to make the conversion relatively fast
(roughly twice the time that normal decompression takes). Writing
such a tool would take quite a bit of time though, and would probably
be useful to only a few people. If you really want such a conversion
tool, contact Lasse Collin and offer some money.
Q: I have installed xz, but my tar doesn't recognize .tar.xz files.
How can I extract .tar.xz files?
A: xz -dc foo.tar.xz | tar xf -
Q: Can I recover parts of a broken .xz file (e.g. a corrupted CD-R)?
A: It may be possible if the file consists of multiple blocks, which
typically is not the case if the file was created in single-threaded
mode. There is no recovery program yet.
Q: Is (some part of) XZ Utils patented?
A: Lasse Collin is not aware of any patents that could affect XZ Utils.
However, due to the nature of software patents, it's not possible to
guarantee that XZ Utils isn't affected by any third party patent(s).
Q: Where can I find documentation about the file format and algorithms?
A: The .xz format is documented in xz-file-format.txt. It is a container
format only, and doesn't include descriptions of any non-trivial
filters.
Documenting LZMA and LZMA2 is planned, but for now, there is no other
documentation than the source code. Before you begin, you should know
the basics of LZ77 and range-coding algorithms. LZMA is based on LZ77,
but LZMA is a lot more complex. Range coding is used to compress
the final bitstream like Huffman coding is used in Deflate.
Q: I cannot find BCJ and BCJ2 filters. Don't they exist in liblzma?
A: BCJ filter is called "x86" in liblzma. BCJ2 is not included,
because it requires using more than one encoded output stream.
A streamable version of BCJ2-style filtering is planned.
Q: I need to use a script that runs "xz -9". On a system with 256 MiB
of RAM, xz says that it cannot allocate memory. Can I make the
script work without modifying it?
A: Set a default memory usage limit for compression. You can do it e.g.
in a shell initialization script such as ~/.bashrc or /etc/profile:
XZ_DEFAULTS=--memlimit-compress=150MiB
export XZ_DEFAULTS
xz will then scale the compression settings down so that the given
memory usage limit is not reached. This way xz shouldn't run out
of memory.
Check also that memory-related resource limits are high enough.
On most systems, "ulimit -a" will show the current resource limits.
Q: How do I create files that can be decompressed with XZ Embedded?
A: See the documentation in XZ Embedded. In short, something like
this is a good start:
xz --check=crc32 --lzma2=preset=6e,dict=64KiB
Or if a BCJ filter is needed too, e.g. if compressing
a kernel image for PowerPC:
xz --check=crc32 --powerpc --lzma2=preset=6e,dict=64KiB
Adjust the dictionary size to get a good compromise between
compression ratio and decompressor memory usage. Note that
in single-call decompression mode of XZ Embedded, a big
dictionary doesn't increase memory usage.
Q: Will xz support threaded compression?
A: It is planned and has been taken into account when designing
the .xz file format. Eventually there will probably be three types
of threading, each method having its own advantages and disadvantages.
The simplest method is splitting the uncompressed data into blocks
and compressing them in parallel independent from each other.
Since the blocks are compressed independently, they can also be
decompressed independently. Together with the index feature in .xz,
this allows using threads to create .xz files for random-access
reading. This also makes threaded decompression possible, although
it is not clear if threaded decompression will ever be implemented.
The independent blocks method has a couple of disadvantages too. It
will compress worse than a single-block method. Often the difference
is not too big (maybe 1-2 %) but sometimes it can be too big. Also,
the memory usage of the compressor increases linearly when adding
threads.
Match finder parallelization is another threading method. It has
been in 7-Zip for ages. It doesn't affect compression ratio or
memory usage significantly. Among the three threading methods, only
this is useful when compressing small files (files that are not
significantly bigger than the dictionary). Unfortunately this method
scales only to about two CPU cores.
The third method is pigz-style threading (I use that name, because
pigz <http://www.zlib.net/pigz/> uses that method). It doesn't
affect compression ratio significantly and scales to many cores.
The memory usage scales linearly when threads are added. This isn't
significant with pigz, because Deflate uses only a 32 KiB dictionary,
but with LZMA2 the memory usage will increase dramatically just like
with the independent-blocks method. There is also a constant
computational overhead, which may make pigz-method a bit dull on
dual-core compared to the parallel match finder method, but with more
cores the overhead is not a big deal anymore.
Combining the threading methods will be possible and also useful.
E.g. combining match finder parallelization with pigz-style threading
can cut the memory usage by 50 %.
It is possible that the single-threaded method will be modified to
create files identical to the pigz-style method. We'll see once
pigz-style threading has been implemented in liblzma.
Q: How do I build a program that needs liblzmadec (lzmadec.h)?
A: liblzmadec is part of LZMA Utils. XZ Utils has liblzma, but no
liblzmadec. The code using liblzmadec should be ported to use
liblzma instead. If you cannot or don't want to do that, download
LZMA Utils from <http://tukaani.org/lzma/>.
Q: The default build of liblzma is too big. How can I make it smaller?
A: Give --enable-small to the configure script. Use also appropriate
--enable or --disable options to include only those filter encoders
and decoders and integrity checks that you actually need. Use
CFLAGS=-Os (with GCC) or equivalent to tell your compiler to optimize
for size. See INSTALL for information about configure options.
If the result is still too big, take a look at XZ Embedded. It is
a separate project, which provides a limited but significantly
smaller XZ decoder implementation than XZ Utils. You can find it
at <http://tukaani.org/xz/embedded.html>.

150
doc/history.txt Normal file
View File

@@ -0,0 +1,150 @@
History of LZMA Utils and XZ Utils
==================================
Tukaani distribution
In 2005, there was a small group working on the Tukaani distribution,
which was a Slackware fork. One of the project's goals was to fit the
distro on a single 700 MiB ISO-9660 image. Using LZMA instead of gzip
helped a lot. Roughly speaking, one could fit data that took 1000 MiB
in gzipped form into 700 MiB with LZMA. Naturally, the compression
ratio varied across packages, but this was what we got on average.
Slackware packages have traditionally had .tgz as the filename suffix,
which is an abbreviation of .tar.gz. A logical naming for LZMA
compressed packages was .tlz, being an abbreviation of .tar.lzma.
At the end of the year 2007, there was no distribution under the
Tukaani project anymore, but development of LZMA Utils was kept going.
Still, there were .tlz packages around, because at least Vector Linux
(a Slackware based distribution) used LZMA for its packages.
First versions of the modified pkgtools used the LZMA_Alone tool from
Igor Pavlov's LZMA SDK as is. It was fine, because users wouldn't need
to interact with LZMA_Alone directly. But people soon wanted to use
LZMA for other files too, and the interface of LZMA_Alone wasn't
comfortable for those used to gzip and bzip2.
First steps of LZMA Utils
The first version of LZMA Utils (4.22.0) included a shell script called
lzmash. It was a wrapper that had a gzip-like command-line interface. It
used the LZMA_Alone tool from LZMA SDK to do all the real work. zgrep,
zdiff, and related scripts from gzip were adapted to work with LZMA and
were part of the first LZMA Utils release too.
LZMA Utils 4.22.0 included also lzmadec, which was a small (less than
10 KiB) decoder-only command-line tool. It was written on top of the
decoder-only C code found from the LZMA SDK. lzmadec was convenient in
situations where LZMA_Alone (a few hundred KiB) would be too big.
lzmash and lzmadec were written by Lasse Collin.
Second generation
The lzmash script was an ugly and not very secure hack. The last
version of LZMA Utils to use lzmash was 4.27.1.
LZMA Utils 4.32.0beta1 introduced a new lzma command-line tool written
by Ville Koskinen. It was written in C++, and used the encoder and
decoder from C++ LZMA SDK with some little modifications. This tool
replaced both the lzmash script and the LZMA_Alone command-line tool
in LZMA Utils.
Introducing this new tool caused some temporary incompatibilities,
because the LZMA_Alone executable was simply named lzma like the new
command-line tool, but they had a completely different command-line
interface. The file format was still the same.
Lasse wrote liblzmadec, which was a small decoder-only library based
on the C code found from LZMA SDK. liblzmadec had an API similar to
zlib, although there were some significant differences, which made it
non-trivial to use it in some applications designed for zlib and
libbzip2.
The lzmadec command-line tool was converted to use liblzmadec.
Alexandre Sauvé helped converting the build system to use GNU
Autotools. This made it easier to test for certain less portable
features needed by the new command-line tool.
Since the new command-line tool never got completely finished (for
example, it didn't support the LZMA_OPT environment variable), the
intent was to not call 4.32.x stable. Similarly, liblzmadec wasn't
polished, but appeared to work well enough, so some people started
using it too.
Because the development of the third generation of LZMA Utils was
delayed considerably (3-4 years), the 4.32.x branch had to be kept
maintained. It got some bug fixes now and then, and finally it was
decided to call it stable, although most of the missing features were
never added.
File format problems
The file format used by LZMA_Alone was primitive. It was designed with
embedded systems in mind, and thus provided only a minimal set of
features. The two biggest problems for non-embedded use were the lack
of magic bytes and an integrity check.
Igor and Lasse started developing a new file format with some help
from Ville Koskinen. Also Mark Adler, Mikko Pouru, H. Peter Anvin,
and Lars Wirzenius helped with some minor things at some point of the
development. Designing the new format took quite a long time (actually,
too long a time would be a more appropriate expression). It was mostly
because Lasse was quite slow at getting things done due to personal
reasons.
Originally the new format was supposed to use the same .lzma suffix
that was already used by the old file format. Switching to the new
format wouldn't have caused much trouble when the old format wasn't
used by many people. But since the development of the new format took
such a long time, the old format got quite popular, and it was decided
that the new file format must use a different suffix.
It was decided to use .xz as the suffix of the new file format. The
first stable .xz file format specification was finally released in
December 2008. In addition to fixing the most obvious problems of
the old .lzma format, the .xz format added some new features like
support for multiple filters (compression algorithms), filter chaining
(like piping on the command line), and limited random-access reading.
Currently the primary compression algorithm used in .xz is LZMA2.
It is an extension on top of the original LZMA to fix some practical
problems: LZMA2 adds support for flushing the encoder, uncompressed
chunks, eases stateful decoder implementations, and improves support
for multithreading. Since LZMA2 is better than the original LZMA, the
original LZMA is not supported in .xz.
Transition to XZ Utils
The early versions of XZ Utils were called LZMA Utils. The first
releases were 4.42.0alphas. They dropped the rest of the C++ LZMA SDK.
The code was still directly based on LZMA SDK but ported to C and
converted from a callback API to a stateful API. Later, Igor Pavlov
made a C version of the LZMA encoder too; these ports from C++ to C
were independent in LZMA SDK and LZMA Utils.
The core of the new LZMA Utils was liblzma, a compression library with
a zlib-like API. liblzma supported both the old and new file format.
The gzip-like lzma command-line tool was rewritten to use liblzma.
The new LZMA Utils code base was renamed to XZ Utils when the name
of the new file format had been decided. The liblzma compression
library retained its name though, because changing it would have
caused unnecessary breakage in applications already using the early
liblzma snapshots.
The xz command-line tool can emulate the gzip-like lzma tool by
creating appropriate symlinks (e.g. lzma -> xz). Thus, practically
all scripts using the lzma tool from LZMA Utils will work as is with
XZ Utils (and will keep using the old .lzma format). Still, the .lzma
format is more or less deprecated. XZ Utils will keep supporting it,
but new applications should use the .xz format, and migrating old
applications to .xz is often a good idea too.

166
doc/lzma-file-format.txt Normal file
View File

@@ -0,0 +1,166 @@
The .lzma File Format
=====================
0. Preface
0.1. Notices and Acknowledgements
0.2. Changes
1. File Format
1.1. Header
1.1.1. Properties
1.1.2. Dictionary Size
1.1.3. Uncompressed Size
1.2. LZMA Compressed Data
2. References
0. Preface
This document describes the .lzma file format, which is
sometimes also called LZMA_Alone format. It is a legacy file
format, which is being or has been replaced by the .xz format.
The MIME type of the .lzma format is `application/x-lzma'.
The most commonly used software to handle .lzma files are
LZMA SDK, LZMA Utils, 7-Zip, and XZ Utils. This document
describes some of the differences between these implementations
and gives hints what subset of the .lzma format is the most
portable.
0.1. Notices and Acknowledgements
This file format was designed by Igor Pavlov for use in
LZMA SDK. This document was written by Lasse Collin
<lasse.collin@tukaani.org> using the documentation found
from the LZMA SDK.
This document has been put into the public domain.
0.2. Changes
Last modified: 2011-04-12 11:55+0300
1. File Format
+-+-+-+-+-+-+-+-+-+-+-+-+-+==========================+
| Header | LZMA Compressed Data |
+-+-+-+-+-+-+-+-+-+-+-+-+-+==========================+
The .lzma format file consist of 13-byte Header followed by
the LZMA Compressed Data.
Unlike the .gz, .bz2, and .xz formats, it is not possible to
concatenate multiple .lzma files as is and expect the
decompression tool to decode the resulting file as if it were
a single .lzma file.
For example, the command line tools from LZMA Utils and
LZMA SDK silently ignore all the data after the first .lzma
stream. In contrast, the command line tool from XZ Utils
considers the .lzma file to be corrupt if there is data after
the first .lzma stream.
1.1. Header
+------------+----+----+----+----+--+--+--+--+--+--+--+--+
| Properties | Dictionary Size | Uncompressed Size |
+------------+----+----+----+----+--+--+--+--+--+--+--+--+
1.1.1. Properties
The Properties field contains three properties. An abbreviation
is given in parentheses, followed by the value range of the
property. The field consists of
1) the number of literal context bits (lc, [0, 8]);
2) the number of literal position bits (lp, [0, 4]); and
3) the number of position bits (pb, [0, 4]).
The properties are encoded using the following formula:
Properties = (pb * 5 + lp) * 9 + lc
The following C code illustrates a straightforward way to
decode the Properties field:
uint8_t lc, lp, pb;
uint8_t prop = get_lzma_properties();
if (prop > (4 * 5 + 4) * 9 + 8)
return LZMA_PROPERTIES_ERROR;
pb = prop / (9 * 5);
prop -= pb * 9 * 5;
lp = prop / 9;
lc = prop - lp * 9;
XZ Utils has an additional requirement: lc + lp <= 4. Files
which don't follow this requirement cannot be decompressed
with XZ Utils. Usually this isn't a problem since the most
common lc/lp/pb values are 3/0/2. It is the only lc/lp/pb
combination that the files created by LZMA Utils can have,
but LZMA Utils can decompress files with any lc/lp/pb.
1.1.2. Dictionary Size
Dictionary Size is stored as an unsigned 32-bit little endian
integer. Any 32-bit value is possible, but for maximum
portability, only sizes of 2^n and 2^n + 2^(n-1) should be
used.
LZMA Utils creates only files with dictionary size 2^n,
16 <= n <= 25. LZMA Utils can decompress files with any
dictionary size.
XZ Utils creates and decompresses .lzma files only with
dictionary sizes 2^n and 2^n + 2^(n-1). If some other
dictionary size is specified when compressing, the value
stored in the Dictionary Size field is a rounded up, but the
specified value is still used in the actual compression code.
1.1.3. Uncompressed Size
Uncompressed Size is stored as unsigned 64-bit little endian
integer. A special value of 0xFFFF_FFFF_FFFF_FFFF indicates
that Uncompressed Size is unknown. End of Payload Marker (*)
is used if and only if Uncompressed Size is unknown.
XZ Utils rejects files whose Uncompressed Size field specifies
a known size that is 256 GiB or more. This is to reject false
positives when trying to guess if the input file is in the
.lzma format. When Uncompressed Size is unknown, there is no
limit for the uncompressed size of the file.
(*) Some tools use the term End of Stream (EOS) marker
instead of End of Payload Marker.
1.2. LZMA Compressed Data
Detailed description of the format of this field is out of
scope of this document.
2. References
LZMA SDK - The original LZMA implementation
http://7-zip.org/sdk.html
7-Zip
http://7-zip.org/
LZMA Utils - LZMA adapted to POSIX-like systems
http://tukaani.org/lzma/
XZ Utils - The next generation of LZMA Utils
http://tukaani.org/xz/
The .xz file format - The successor of the .lzma format
http://tukaani.org/xz/xz-file-format.txt

Binary file not shown.

BIN
doc/man/pdf-a4/xz-a4.pdf Normal file

Binary file not shown.

BIN
doc/man/pdf-a4/xzdec-a4.pdf Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

40
doc/man/txt/lzmainfo.txt Normal file
View File

@@ -0,0 +1,40 @@
LZMAINFO(1) XZ Utils LZMAINFO(1)
NAME
lzmainfo - show information stored in the .lzma file header
SYNOPSIS
lzmainfo [--help] [--version] [file...]
DESCRIPTION
lzmainfo shows information stored in the .lzma file header. It reads
the first 13 bytes from the specified file, decodes the header, and
prints it to standard output in human readable format. If no files are
given or file is -, standard input is read.
Usually the most interesting information is the uncompressed size and
the dictionary size. Uncompressed size can be shown only if the file
is in the non-streamed .lzma format variant. The amount of memory
required to decompress the file is a few dozen kilobytes plus the dic-
tionary size.
lzmainfo is included in XZ Utils primarily for backward compatibility
with LZMA Utils.
EXIT STATUS
0 All is good.
1 An error occurred.
BUGS
lzmainfo uses MB while the correct suffix would be MiB (2^20 bytes).
This is to keep the output compatible with LZMA Utils.
SEE ALSO
xz(1)
Tukaani 2013-06-30 LZMAINFO(1)

1483
doc/man/txt/xz.txt Normal file

File diff suppressed because it is too large Load Diff

80
doc/man/txt/xzdec.txt Normal file
View File

@@ -0,0 +1,80 @@
XZDEC(1) XZ Utils XZDEC(1)
NAME
xzdec, lzmadec - Small .xz and .lzma decompressors
SYNOPSIS
xzdec [option...] [file...]
lzmadec [option...] [file...]
DESCRIPTION
xzdec is a liblzma-based decompression-only tool for .xz (and only .xz)
files. xzdec is intended to work as a drop-in replacement for xz(1) in
the most common situations where a script has been written to use xz
--decompress --stdout (and possibly a few other commonly used options)
to decompress .xz files. lzmadec is identical to xzdec except that
lzmadec supports .lzma files instead of .xz files.
To reduce the size of the executable, xzdec doesn't support multi-
threading or localization, and doesn't read options from XZ_DEFAULTS
and XZ_OPT environment variables. xzdec doesn't support displaying
intermediate progress information: sending SIGINFO to xzdec does noth-
ing, but sending SIGUSR1 terminates the process instead of displaying
progress information.
OPTIONS
-d, --decompress, --uncompress
Ignored for xz(1) compatibility. xzdec supports only decompres-
sion.
-k, --keep
Ignored for xz(1) compatibility. xzdec never creates or removes
any files.
-c, --stdout, --to-stdout
Ignored for xz(1) compatibility. xzdec always writes the decom-
pressed data to standard output.
-q, --quiet
Specifying this once does nothing since xzdec never displays any
warnings or notices. Specify this twice to suppress errors.
-Q, --no-warn
Ignored for xz(1) compatibility. xzdec never uses the exit sta-
tus 2.
-h, --help
Display a help message and exit successfully.
-V, --version
Display the version number of xzdec and liblzma.
EXIT STATUS
0 All was good.
1 An error occurred.
xzdec doesn't have any warning messages like xz(1) has, thus the exit
status 2 is not used by xzdec.
NOTES
Use xz(1) instead of xzdec or lzmadec for normal everyday use. xzdec
or lzmadec are meant only for situations where it is important to have
a smaller decompressor than the full-featured xz(1).
xzdec and lzmadec are not really that small. The size can be reduced
further by dropping features from liblzma at compile time, but that
shouldn't usually be done for executables distributed in typical non-
embedded operating system distributions. If you need a truly small .xz
decompressor, consider using XZ Embedded.
SEE ALSO
xz(1)
XZ Embedded: <http://tukaani.org/xz/embedded.html>
Tukaani 2013-06-30 XZDEC(1)

36
doc/man/txt/xzdiff.txt Normal file
View File

@@ -0,0 +1,36 @@
XZDIFF(1) XZ Utils XZDIFF(1)
NAME
xzcmp, xzdiff, lzcmp, lzdiff - compare compressed files
SYNOPSIS
xzcmp [cmp_options] file1 [file2]
xzdiff [diff_options] file1 [file2]
lzcmp [cmp_options] file1 [file2]
lzdiff [diff_options] file1 [file2]
DESCRIPTION
xzcmp and xzdiff invoke cmp(1) or diff(1) on files compressed with
xz(1), lzma(1), gzip(1), bzip2(1), or lzop(1). All options specified
are passed directly to cmp(1) or diff(1). If only one file is speci-
fied, then the files compared are file1 (which must have a suffix of a
supported compression format) and file1 from which the compression for-
mat suffix has been stripped. If two files are specified, then they
are uncompressed if necessary and fed to cmp(1) or diff(1). The exit
status from cmp(1) or diff(1) is preserved.
The names lzcmp and lzdiff are provided for backward compatibility with
LZMA Utils.
SEE ALSO
cmp(1), diff(1), xz(1), gzip(1), bzip2(1), lzop(1), zdiff(1)
BUGS
Messages from the cmp(1) or diff(1) programs refer to temporary file-
names instead of those specified.
Tukaani 2011-03-19 XZDIFF(1)

39
doc/man/txt/xzgrep.txt Normal file
View File

@@ -0,0 +1,39 @@
XZGREP(1) XZ Utils XZGREP(1)
NAME
xzgrep - search compressed files for a regular expression
SYNOPSIS
xzgrep [grep_options] [-e] pattern file...
xzegrep ...
xzfgrep ...
lzgrep ...
lzegrep ...
lzfgrep ...
DESCRIPTION
xzgrep invokes grep(1) on files which may be either uncompressed or
compressed with xz(1), lzma(1), gzip(1), bzip2(1), or lzop(1). All
options specified are passed directly to grep(1).
If no file is specified, then standard input is decompressed if neces-
sary and fed to grep(1). When reading from standard input, gzip(1),
bzip2(1), and lzop(1) compressed files are not supported.
If xzgrep is invoked as xzegrep or xzfgrep then egrep(1) or fgrep(1) is
used instead of grep(1). The same applies to names lzgrep, lzegrep,
and lzfgrep, which are provided for backward compatibility with LZMA
Utils.
ENVIRONMENT
GREP If the GREP environment variable is set, xzgrep uses it instead
of grep(1), egrep(1), or fgrep(1).
SEE ALSO
grep(1), xz(1), gzip(1), bzip2(1), lzop(1), zgrep(1)
Tukaani 2011-03-19 XZGREP(1)

39
doc/man/txt/xzless.txt Normal file
View File

@@ -0,0 +1,39 @@
XZLESS(1) XZ Utils XZLESS(1)
NAME
xzless, lzless - view xz or lzma compressed (text) files
SYNOPSIS
xzless [file...]
lzless [file...]
DESCRIPTION
xzless is a filter that displays text from compressed files to a termi-
nal. It works on files compressed with xz(1) or lzma(1). If no files
are given, xzless reads from standard input.
xzless uses less(1) to present its output. Unlike xzmore, its choice
of pager cannot be altered by setting an environment variable. Com-
mands are based on both more(1) and vi(1) and allow back and forth
movement and searching. See the less(1) manual for more information.
The command named lzless is provided for backward compatibility with
LZMA Utils.
ENVIRONMENT
LESSMETACHARS
A list of characters special to the shell. Set by xzless unless
it is already set in the environment.
LESSOPEN
Set to a command line to invoke the xz(1) decompressor for pre-
processing the input files to less(1).
SEE ALSO
less(1), xz(1), xzmore(1), zless(1)
Tukaani 2010-09-27 XZLESS(1)

34
doc/man/txt/xzmore.txt Normal file
View File

@@ -0,0 +1,34 @@
XZMORE(1) XZ Utils XZMORE(1)
NAME
xzmore, lzmore - view xz or lzma compressed (text) files
SYNOPSIS
xzmore [file...]
lzmore [file...]
DESCRIPTION
xzmore is a filter which allows examination of xz(1) or lzma(1) com-
pressed text files one screenful at a time on a soft-copy terminal.
To use a pager other than the default more, set environment variable
PAGER to the name of the desired program. The name lzmore is provided
for backward compatibility with LZMA Utils.
e or q When the prompt --More--(Next file: file) is printed, this com-
mand causes xzmore to exit.
s When the prompt --More--(Next file: file) is printed, this com-
mand causes xzmore to skip the next file and continue.
For list of keyboard commands supported while actually viewing the con-
tent of a file, refer to manual of the pager you use, usually more(1).
SEE ALSO
more(1), xz(1), xzless(1), zmore(1)
Tukaani 2013-06-30 XZMORE(1)

1150
doc/xz-file-format.txt Normal file

File diff suppressed because it is too large Load Diff