src/xz/coder.c

   1 // SPDX-License-Identifier: 0BSD
   2
   3 ///////////////////////////////////////////////////////////////////////////////
   4 //
   5 /// \file       coder.c
   6 /// \brief      Compresses or uncompresses a file
   7 //
   8 //  Authors:    Lasse Collin
   9 //              Jia Tan
  10 //
  11 ///////////////////////////////////////////////////////////////////////////////
  12
  13 #include "private.h"
  14 #include "tuklib_integer.h"
  15
  16
  17 /// Return value type for coder_init().
  18 enum coder_init_ret {
  19         CODER_INIT_NORMAL,
  20         CODER_INIT_PASSTHRU,
  21         CODER_INIT_ERROR,
  22 };
  23
  24
  25 enum operation_mode opt_mode = MODE_COMPRESS;
  26 enum format_type opt_format = FORMAT_AUTO;
  27 bool opt_auto_adjust = true;
  28 bool opt_single_stream = false;
  29 uint64_t opt_block_size = 0;
  30 block_list_entry *opt_block_list = NULL;
  31 uint64_t block_list_largest;
  32 uint32_t block_list_chain_mask;
  33
  34 /// Stream used to communicate with liblzma
  35 static lzma_stream strm = LZMA_STREAM_INIT;
  36
  37 /// Maximum number of filter chains. The first filter chain is the default,
  38 /// and 9 other filter chains can be specified with --filtersX.
  39 #define NUM_FILTER_CHAIN_MAX 10
  40
  41 /// The default filter chain is in chains[0]. It is used for encoding
  42 /// in all supported formats and also for decdoing raw streams. The other
  43 /// filter chains are set by --filtersX to support changing filters with
  44 /// the --block-list option.
  45 static lzma_filter chains[NUM_FILTER_CHAIN_MAX][LZMA_FILTERS_MAX + 1];
  46
  47 /// Bitmask indicating which filter chains are actually used when encoding
  48 /// in the .xz format. This is needed since the filter chains specified using
  49 /// --filtersX (or the default filter chain) might in reality be unneeded
  50 /// if they are never used in --block-list. When --block-list isn't
  51 /// specified, only the default filter chain is used, thus the initial
  52 /// value of this variable is 1U << 0 (the number of the default chain is 0).
  53 static uint32_t chains_used_mask = 1U << 0;
  54
  55 /// Input and output buffers
  56 static io_buf in_buf;
  57 static io_buf out_buf;
  58
  59 /// Number of filters in the default filter chain. Zero indicates that
  60 /// we are using a preset.
  61 static uint32_t filters_count = 0;
  62
  63 /// Number of the preset (0-9)
  64 static uint32_t preset_number = LZMA_PRESET_DEFAULT;
  65
  66 /// True if the current default filter chain was set using the --filters
  67 /// option. The filter chain is reset if a preset option (like -9) or an
  68 /// old-style filter option (like --lzma2) is used after a --filters option.
  69 static bool string_to_filter_used = false;
  70
  71 /// Integrity check type
  72 static lzma_check check;
  73
  74 /// This becomes false if the --check=CHECK option is used.
  75 static bool check_default = true;
  76
  77 /// Indicates if unconsumed input is allowed to remain after
  78 /// decoding has successfully finished. This is set for each file
  79 /// in coder_init().
  80 static bool allow_trailing_input;
  81
  82 #ifdef MYTHREAD_ENABLED
  83 static lzma_mt mt_options = {
  84         .flags = 0,
  85         .timeout = 300,
  86 };
  87 #endif
  88
  89
  90 extern void
  91 coder_set_check(lzma_check new_check)
  92 {
  93         check = new_check;
  94         check_default = false;
  95         return;
  96 }
  97
  98
  99 static void
 100 forget_filter_chain(void)
 101 {
 102         // Setting a preset or using --filters makes us forget
 103         // the earlier custom filter chain (if any).
 104         if (filters_count > 0) {
 105                 lzma_filters_free(chains[0], NULL);
 106                 filters_count = 0;
 107         }
 108
 109         string_to_filter_used = false;
 110         return;
 111 }
 112
 113
 114 extern void
 115 coder_set_preset(uint32_t new_preset)
 116 {
 117         preset_number &= ~LZMA_PRESET_LEVEL_MASK;
 118         preset_number |= new_preset;
 119         forget_filter_chain();
 120         return;
 121 }
 122
 123
 124 extern void
 125 coder_set_extreme(void)
 126 {
 127         preset_number |= LZMA_PRESET_EXTREME;
 128         forget_filter_chain();
 129         return;
 130 }
 131
 132
 133 extern void
 134 coder_add_filter(lzma_vli id, void *options)
 135 {
 136         if (filters_count == LZMA_FILTERS_MAX)
 137                 message_fatal(_("Maximum number of filters is four"));
 138
 139         if (string_to_filter_used)
 140                 forget_filter_chain();
 141
 142         chains[0][filters_count].id = id;
 143         chains[0][filters_count].options = options;
 144
 145         // Terminate the filter chain with LZMA_VLI_UNKNOWN to simplify
 146         // implementation of forget_filter_chain().
 147         chains[0][++filters_count].id = LZMA_VLI_UNKNOWN;
 148
 149         // Setting a custom filter chain makes us forget the preset options.
 150         // This makes a difference if one specifies e.g. "xz -9 --lzma2 -e"
 151         // where the custom filter chain resets the preset level back to
 152         // the default 6, making the example equivalent to "xz -6e".
 153         preset_number = LZMA_PRESET_DEFAULT;
 154
 155         return;
 156 }
 157
 158
 159 static void
 160 str_to_filters(const char *str, uint32_t index, uint32_t flags)
 161 {
 162         int error_pos;
 163         const char *err = lzma_str_to_filters(str, &error_pos,
 164                         chains[index], flags, NULL);
 165
 166         if (err != NULL) {
 167                 char filter_num[2] = "";
 168                 if (index > 0)
 169                         filter_num[0] = '0' + index;
 170
 171                 // FIXME? The message in err isn't translated.
 172                 // Including the translations in the xz translations is
 173                 // slightly ugly but possible. Creating a new domain for
 174                 // liblzma might not be worth it especially since on some
 175                 // OSes it adds extra dependencies to translation libraries.
 176                 message(V_ERROR, _("Error in --filters%s=FILTERS option:"),
 177                                 filter_num);
 178                 message(V_ERROR, "%s", str);
 179                 message(V_ERROR, "%*s^", error_pos, "");
 180                 message_fatal("%s", err);
 181         }
 182 }
 183
 184
 185 extern void
 186 coder_add_filters_from_str(const char *filter_str)
 187 {
 188         // Forget presets and previously defined filter chain. See
 189         // coder_add_filter() above for why preset_number must be reset too.
 190         forget_filter_chain();
 191         preset_number = LZMA_PRESET_DEFAULT;
 192
 193         string_to_filter_used = true;
 194
 195         // Include LZMA_STR_ALL_FILTERS so this can be used with --format=raw.
 196         str_to_filters(filter_str, 0, LZMA_STR_ALL_FILTERS);
 197
 198         // Set the filters_count to be the number of filters converted from
 199         // the string.
 200         for (filters_count = 0; chains[0][filters_count].id
 201                         != LZMA_VLI_UNKNOWN;
 202                         ++filters_count) ;
 203
 204         assert(filters_count > 0);
 205         return;
 206 }
 207
 208
 209 extern void
 210 coder_add_block_filters(const char *str, size_t slot)
 211 {
 212         // Free old filters first, if they were previously allocated.
 213         if (chains_used_mask & (1U << slot))
 214                 lzma_filters_free(chains[slot], NULL);
 215
 216         str_to_filters(str, slot, 0);
 217
 218         chains_used_mask |= 1U << slot;
 219 }
 220
 221
 222 tuklib_attr_noreturn
 223 static void
 224 memlimit_too_small(uint64_t memory_usage)
 225 {
 226         message(V_ERROR, _("Memory usage limit is too low for the given "
 227                         "filter setup."));
 228         message_mem_needed(V_ERROR, memory_usage);
 229         tuklib_exit(E_ERROR, E_ERROR, false);
 230 }
 231
 232
 233 #ifdef HAVE_ENCODERS
 234 /// \brief      Calculate the memory usage of each filter chain.
 235 ///
 236 /// \param      chains_memusages    If non-NULL, the memusage of the encoder
 237 ///                                 or decoder for each chain is stored in
 238 ///                                 this array.
 239 /// \param      mt                  If non-NULL, calculate memory usage of
 240 ///                                 multithreaded encoder.
 241 /// \param      encode              Whether to calculate encoder or decoder
 242 ///                                 memory usage. This must be true if
 243 ///                                 mt != NULL.
 244 ///
 245 /// \return     Return the highest memory usage of all of the filter chains.
 246 static uint64_t
 247 get_chains_memusage(uint64_t *chains_memusages, const lzma_mt *mt, bool encode)
 248 {
 249         uint64_t max_memusage = 0;
 250
 251 #ifdef MYTHREAD_ENABLED
 252         // Copy multithreading options to a temporary struct since the
 253         // "filters" member needs to be changed.
 254         lzma_mt mt_local;
 255         if (mt != NULL)
 256                 mt_local = *mt;
 257 #else
 258         (void)mt;
 259 #endif
 260
 261         for (uint32_t i = 0; i < ARRAY_SIZE(chains); i++) {
 262                 if (!(chains_used_mask & (1U << i)))
 263                         continue;
 264
 265                 uint64_t memusage = UINT64_MAX;
 266 #ifdef MYTHREAD_ENABLED
 267                 if (mt != NULL) {
 268                         assert(encode);
 269                         mt_local.filters = chains[i];
 270                         memusage = lzma_stream_encoder_mt_memusage(&mt_local);
 271                 } else
 272 #endif
 273                 if (encode) {
 274                         memusage = lzma_raw_encoder_memusage(chains[i]);
 275                 }
 276 #ifdef HAVE_DECODERS
 277                 else {
 278                         memusage = lzma_raw_decoder_memusage(chains[i]);
 279                 }
 280 #endif
 281
 282                 if (chains_memusages != NULL)
 283                         chains_memusages[i] = memusage;
 284
 285                 if (memusage > max_memusage)
 286                         max_memusage = memusage;
 287         }
 288
 289         return max_memusage;
 290 }
 291 #endif
 292
 293
 294 extern void
 295 coder_set_compression_settings(void)
 296 {
 297 #ifdef HAVE_LZIP_DECODER
 298         // .lz compression isn't supported.
 299         assert(opt_format != FORMAT_LZIP);
 300 #endif
 301
 302         // The default check type is CRC64, but fallback to CRC32
 303         // if CRC64 isn't supported by the copy of liblzma we are
 304         // using. CRC32 is always supported.
 305         if (check_default) {
 306                 check = LZMA_CHECK_CRC64;
 307                 if (!lzma_check_is_supported(check))
 308                         check = LZMA_CHECK_CRC32;
 309         }
 310
 311 #ifdef HAVE_ENCODERS
 312         if (opt_block_list != NULL) {
 313                 // args.c ensures these.
 314                 assert(opt_mode == MODE_COMPRESS);
 315                 assert(opt_format == FORMAT_XZ);
 316
 317                 // Find out if block_list_chain_mask has a bit set that
 318                 // isn't set in chains_used_mask.
 319                 const uint32_t missing_chains_mask
 320                                 = (block_list_chain_mask ^ chains_used_mask)
 321                                 & block_list_chain_mask;
 322
 323                 // If a filter chain was specified in --block-list but no
 324                 // matching --filtersX option was used, exit with an error.
 325                 if (missing_chains_mask != 0) {
 326                         // Get the number of the first missing filter chain
 327                         // and show it in the error message.
 328                         const unsigned first_missing
 329                                 = (unsigned)ctz32(missing_chains_mask);
 330
 331                         message_fatal(_("filter chain %u used by "
 332                                 "--block-list but not specified "
 333                                 "with --filters%u="),
 334                                 first_missing, first_missing);
 335                 }
 336
 337                 // Omit the unused filter chains from mask of used chains.
 338                 //
 339                 // (FIXME? When built with debugging, coder_free() will free()
 340                 // the filter chains (except the default chain) which makes
 341                 // Valgrind show fewer reachable allocations. But coder_free()
 342                 // uses this mask to determine which chains to free. Thus it
 343                 // won't free the ones that are cleared here from the mask.
 344                 // In practice this doesn't matter.)
 345                 chains_used_mask &= block_list_chain_mask;
 346         } else {
 347                 // Reset filters used mask in case --block-list is not
 348                 // used, but --filtersX is used.
 349                 chains_used_mask = 1U << 0;
 350         }
 351 #endif
 352
 353         // Options for LZMA1 or LZMA2 in case we are using a preset.
 354         static lzma_options_lzma opt_lzma;
 355
 356         // The first filter in the chains[] array is for the default
 357         // filter chain.
 358         lzma_filter *default_filters = chains[0];
 359
 360         if (filters_count == 0 && chains_used_mask & 1) {
 361                 // We are using a preset. This is not a good idea in raw mode
 362                 // except when playing around with things. Different versions
 363                 // of this software may use different options in presets, and
 364                 // thus make uncompressing the raw data difficult.
 365                 if (opt_format == FORMAT_RAW) {
 366                         // The message is shown only if warnings are allowed
 367                         // but the exit status isn't changed.
 368                         message(V_WARNING, _("Using a preset in raw mode "
 369                                         "is discouraged."));
 370                         message(V_WARNING, _("The exact options of the "
 371                                         "presets may vary between software "
 372                                         "versions."));
 373                 }
 374
 375                 // Get the preset for LZMA1 or LZMA2.
 376                 if (lzma_lzma_preset(&opt_lzma, preset_number))
 377                         message_bug();
 378
 379                 // Use LZMA2 except with --format=lzma we use LZMA1.
 380                 default_filters[0].id = opt_format == FORMAT_LZMA
 381                                 ? LZMA_FILTER_LZMA1 : LZMA_FILTER_LZMA2;
 382                 default_filters[0].options = &opt_lzma;
 383
 384                 filters_count = 1;
 385
 386                 // Terminate the filter options array.
 387                 default_filters[1].id = LZMA_VLI_UNKNOWN;
 388         }
 389
 390         // If we are using the .lzma format, allow exactly one filter
 391         // which has to be LZMA1. There is no need to check if the default
 392         // filter chain is being used since it can only be disabled if
 393         // --block-list is used, which is incompatible with FORMAT_LZMA.
 394         if (opt_format == FORMAT_LZMA && (filters_count != 1
 395                         || default_filters[0].id != LZMA_FILTER_LZMA1))
 396                 message_fatal(_("The .lzma format supports only "
 397                                 "the LZMA1 filter"));
 398
 399         // If we are using the .xz format, make sure that there is no LZMA1
 400         // filter to prevent LZMA_PROG_ERROR. With the chains from --filtersX
 401         // we have already ensured this by calling lzma_str_to_filters()
 402         // without setting the flags that would allow non-.xz filters.
 403         if (opt_format == FORMAT_XZ && chains_used_mask & 1)
 404                 for (size_t i = 0; i < filters_count; ++i)
 405                         if (default_filters[i].id == LZMA_FILTER_LZMA1)
 406                                 message_fatal(_("LZMA1 cannot be used "
 407                                                 "with the .xz format"));
 408
 409         if (chains_used_mask & 1) {
 410                 // Print the selected default filter chain.
 411                 message_filters_show(V_DEBUG, default_filters);
 412         }
 413
 414         // The --flush-timeout option requires LZMA_SYNC_FLUSH support
 415         // from the filter chain. Currently the threaded encoder doesn't
 416         // support LZMA_SYNC_FLUSH so single-threaded mode must be used.
 417         if (opt_mode == MODE_COMPRESS && opt_flush_timeout != 0) {
 418                 for (unsigned i = 0; i < ARRAY_SIZE(chains); ++i) {
 419                         if (!(chains_used_mask & (1U << i)))
 420                                 continue;
 421
 422                         const lzma_filter *fc = chains[i];
 423                         for (size_t j = 0; fc[j].id != LZMA_VLI_UNKNOWN; j++) {
 424                                 switch (fc[j].id) {
 425                                 case LZMA_FILTER_LZMA2:
 426                                 case LZMA_FILTER_DELTA:
 427                                         break;
 428
 429                                 default:
 430                                         message_fatal(_("Filter chain %u is "
 431                                                         "incompatible with "
 432                                                         "--flush-timeout"),
 433                                                         i);
 434                                 }
 435                         }
 436                 }
 437
 438                 if (hardware_threads_is_mt()) {
 439                         message(V_WARNING, _("Switching to single-threaded "
 440                                         "mode due to --flush-timeout"));
 441                         hardware_threads_set(1);
 442                 }
 443         }
 444
 445         // Get memory limit and the memory usage of the used filter chains.
 446         // Note that if --format=raw was used, we can be decompressing
 447         // using the default filter chain.
 448         //
 449         // If multithreaded .xz compression is done, the memory limit
 450         // will be replaced.
 451         uint64_t memory_limit = hardware_memlimit_get(opt_mode);
 452         uint64_t memory_usage = UINT64_MAX;
 453
 454 #ifdef HAVE_ENCODERS
 455         // Memory usage for each encoder filter chain (default
 456         // or --filtersX). The encoder options may need to be
 457         // scaled down depending on the memory usage limit.
 458         uint64_t encoder_memusages[ARRAY_SIZE(chains)];
 459 #endif
 460
 461         if (opt_mode == MODE_COMPRESS) {
 462 #ifdef HAVE_ENCODERS
 463 #       ifdef MYTHREAD_ENABLED
 464                 if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) {
 465                         memory_limit = hardware_memlimit_mtenc_get();
 466                         mt_options.threads = hardware_threads_get();
 467
 468                         uint64_t block_size = opt_block_size;
 469
 470                         // If opt_block_size is not set, find the maximum
 471                         // recommended Block size based on the filter chains
 472                         if (block_size == 0) {
 473                                 for (unsigned i = 0; i < ARRAY_SIZE(chains);
 474                                                 i++) {
 475                                         if (!(chains_used_mask & (1U << i)))
 476                                                 continue;
 477
 478                                         uint64_t size = lzma_mt_block_size(
 479                                                         chains[i]);
 480
 481                                         // If this returns an error, then one
 482                                         // of the filter chains in use is
 483                                         // invalid, so there is no point in
 484                                         // progressing further.
 485                                         if (size == UINT64_MAX)
 486                                                 message_fatal(_("Unsupported "
 487                                                         "options in filter "
 488                                                         "chain %u"), i);
 489
 490                                         if (size > block_size)
 491                                                 block_size = size;
 492                                 }
 493
 494                                 // If --block-list was used and our current
 495                                 // Block size exceeds the largest size
 496                                 // in --block-list, reduce the Block size of
 497                                 // the multithreaded encoder. The extra size
 498                                 // would only be a waste of RAM. With a
 499                                 // smaller Block size we might even be able
 500                                 // to use more threads in some cases.
 501                                 if (block_list_largest > 0 && block_size
 502                                                 > block_list_largest)
 503                                         block_size = block_list_largest;
 504                         }
 505
 506                         mt_options.block_size = block_size;
 507                         mt_options.check = check;
 508
 509                         memory_usage = get_chains_memusage(encoder_memusages,
 510                                                 &mt_options, true);
 511                         if (memory_usage != UINT64_MAX)
 512                                 message(V_DEBUG, _("Using up to %" PRIu32
 513                                                 " threads."),
 514                                                 mt_options.threads);
 515                 } else
 516 #       endif
 517                 {
 518                         memory_usage = get_chains_memusage(encoder_memusages,
 519                                         NULL, true);
 520                 }
 521 #endif
 522         } else {
 523 #ifdef HAVE_DECODERS
 524                 memory_usage = lzma_raw_decoder_memusage(default_filters);
 525 #endif
 526         }
 527
 528         if (memory_usage == UINT64_MAX)
 529                 message_fatal(_("Unsupported filter chain or filter options"));
 530
 531         // Print memory usage info before possible dictionary
 532         // size auto-adjusting.
 533         //
 534         // NOTE: If only encoder support was built, we cannot show
 535         // what the decoder memory usage will be.
 536         message_mem_needed(V_DEBUG, memory_usage);
 537
 538 #if defined(HAVE_ENCODERS) && defined(HAVE_DECODERS)
 539         if (opt_mode == MODE_COMPRESS && message_verbosity_get() >= V_DEBUG) {
 540                 const uint64_t decmem = get_chains_memusage(NULL, NULL, false);
 541                 if (decmem != UINT64_MAX)
 542                         message(V_DEBUG, _("Decompression will need "
 543                                         "%s MiB of memory."), uint64_to_str(
 544                                                 round_up_to_mib(decmem), 0));
 545         }
 546 #endif
 547
 548         if (memory_usage <= memory_limit)
 549                 return;
 550
 551         // With --format=raw settings are never adjusted to meet
 552         // the memory usage limit.
 553         if (opt_format == FORMAT_RAW)
 554                 memlimit_too_small(memory_usage);
 555
 556         assert(opt_mode == MODE_COMPRESS);
 557
 558 #ifdef HAVE_ENCODERS
 559 #       ifdef MYTHREAD_ENABLED
 560         if (opt_format == FORMAT_XZ && hardware_threads_is_mt()) {
 561                 // Try to reduce the number of threads before
 562                 // adjusting the compression settings down.
 563                 while (mt_options.threads > 1) {
 564                         // Reduce the number of threads by one and check
 565                         // the memory usage.
 566                         --mt_options.threads;
 567                         memory_usage = get_chains_memusage(encoder_memusages,
 568                                         &mt_options, true);
 569                         if (memory_usage == UINT64_MAX)
 570                                 message_bug();
 571
 572                         if (memory_usage <= memory_limit) {
 573                                 // The memory usage is now low enough.
 574                                 //
 575                                 // Since 5.6.1: This is only shown at
 576                                 // V_DEBUG instead of V_WARNING because
 577                                 // changing the number of threads doesn't
 578                                 // affect the output. On some systems this
 579                                 // message would be too common now that
 580                                 // multithreaded compression is the default.
 581                                 message(V_DEBUG, _("Reduced the number of "
 582                                         "threads from %s to %s to not exceed "
 583                                         "the memory usage limit of %s MiB"),
 584                                         uint64_to_str(
 585                                                 hardware_threads_get(), 0),
 586                                         uint64_to_str(mt_options.threads, 1),
 587                                         uint64_to_str(round_up_to_mib(
 588                                                 memory_limit), 2));
 589                                 return;
 590                         }
 591                 }
 592
 593                 // If the memory usage limit is only a soft limit (automatic
 594                 // number of threads and no --memlimit-compress), the limit
 595                 // is only used to reduce the number of threads and once at
 596                 // just one thread, the limit is completely ignored. This
 597                 // way -T0 won't use insane amount of memory but at the same
 598                 // time the soft limit will never make xz fail and never make
 599                 // xz change settings that would affect the compressed output.
 600                 //
 601                 // Since 5.6.1: Like above, this is now shown at V_DEBUG
 602                 // instead of V_WARNING.
 603                 if (hardware_memlimit_mtenc_is_default()) {
 604                         message(V_DEBUG, _("Reduced the number of threads "
 605                                 "from %s to one. The automatic memory usage "
 606                                 "limit of %s MiB is still being exceeded. "
 607                                 "%s MiB of memory is required. "
 608                                 "Continuing anyway."),
 609                                 uint64_to_str(hardware_threads_get(), 0),
 610                                 uint64_to_str(
 611                                         round_up_to_mib(memory_limit), 1),
 612                                 uint64_to_str(
 613                                         round_up_to_mib(memory_usage), 2));
 614                         return;
 615                 }
 616
 617                 // If --no-adjust was used, we cannot drop to single-threaded
 618                 // mode since it produces different compressed output.
 619                 //
 620                 // NOTE: In xz 5.2.x, --no-adjust also prevented reducing
 621                 // the number of threads. This changed in 5.3.3alpha.
 622                 if (!opt_auto_adjust)
 623                         memlimit_too_small(memory_usage);
 624
 625                 // Switch to single-threaded mode. It uses
 626                 // less memory than using one thread in
 627                 // the multithreaded mode but the output
 628                 // is also different.
 629                 hardware_threads_set(1);
 630                 memory_usage = get_chains_memusage(encoder_memusages,
 631                                 NULL, true);
 632                 message(V_WARNING, _("Switching to single-threaded mode "
 633                         "to not exceed the memory usage limit of %s MiB"),
 634                         uint64_to_str(round_up_to_mib(memory_limit), 0));
 635         }
 636 #       endif
 637
 638         if (memory_usage <= memory_limit)
 639                 return;
 640
 641         // Don't adjust LZMA2 or LZMA1 dictionary size if --no-adjust
 642         // was specified as that would change the compressed output.
 643         if (!opt_auto_adjust)
 644                 memlimit_too_small(memory_usage);
 645
 646         // Adjust each filter chain that is exceeding the memory usage limit.
 647         for (unsigned i = 0; i < ARRAY_SIZE(chains); i++) {
 648                 // Skip unused chains.
 649                 if (!(chains_used_mask & (1U << i)))
 650                         continue;
 651
 652                 // Skip chains that already meet the memory usage limit.
 653                 if (encoder_memusages[i] <=  memory_limit)
 654                         continue;
 655
 656                 // Look for the last filter if it is LZMA2 or LZMA1, so we
 657                 // can make it use less RAM. We cannot adjust other filters.
 658                 unsigned j = 0;
 659                 while (chains[i][j].id != LZMA_FILTER_LZMA2
 660                                 && chains[i][j].id != LZMA_FILTER_LZMA1) {
 661                         // NOTE: This displays the too high limit of this
 662                         // particular filter chain. If multiple chains are
 663                         // specified and another one would need more then
 664                         // this message could be confusing. As long as LZMA2
 665                         // is the only memory hungry filter in .xz this
 666                         // doesn't matter at all in practice.
 667                         //
 668                         // FIXME? However, it's sort of odd still if we had
 669                         // switched from multithreaded mode to single-threaded
 670                         // mode because single-threaded produces different
 671                         // output. So the messages could perhaps be clearer.
 672                         // Another case of this is a few lines below.
 673                         if (chains[i][j].id == LZMA_VLI_UNKNOWN)
 674                                 memlimit_too_small(encoder_memusages[i]);
 675
 676                         ++j;
 677                 }
 678
 679                 // Decrease the dictionary size until we meet the memory
 680                 // usage limit. First round down to full mebibytes.
 681                 lzma_options_lzma *opt = chains[i][j].options;
 682                 const uint32_t orig_dict_size = opt->dict_size;
 683                 opt->dict_size &= ~((UINT32_C(1) << 20) - 1);
 684
 685                 while (true) {
 686                         // If it is below 1 MiB, auto-adjusting failed.
 687                         //
 688                         // FIXME? See the FIXME a few lines above.
 689                         if (opt->dict_size < (UINT32_C(1) << 20))
 690                                 memlimit_too_small(encoder_memusages[i]);
 691
 692                         encoder_memusages[i]
 693                                 = lzma_raw_encoder_memusage(chains[i]);
 694                         if (encoder_memusages[i] == UINT64_MAX)
 695                                 message_bug();
 696
 697                         // Accept it if it is low enough.
 698                         if (encoder_memusages[i] <= memory_limit)
 699                                 break;
 700
 701                         // Otherwise adjust it 1 MiB down and try again.
 702                         opt->dict_size -= UINT32_C(1) << 20;
 703                 }
 704
 705                 // Tell the user that we decreased the dictionary size.
 706                 // The message is slightly different between the default
 707                 // filter chain (0) or and chains from --filtersX.
 708                 const char lzma_num = chains[i][j].id == LZMA_FILTER_LZMA2
 709                                         ? '2' : '1';
 710                 const char *from_size = uint64_to_str(orig_dict_size >> 20, 0);
 711                 const char *to_size = uint64_to_str(opt->dict_size >> 20, 1);
 712                 const char *limit_size = uint64_to_str(round_up_to_mib(
 713                                         memory_limit), 2);
 714                 if (i == 0)
 715                         message(V_WARNING, _("Adjusted LZMA%c dictionary "
 716                                 "size from %s MiB to %s MiB to not exceed the "
 717                                 "memory usage limit of %s MiB"),
 718                                 lzma_num, from_size, to_size, limit_size);
 719                 else
 720                         message(V_WARNING, _("Adjusted LZMA%c dictionary size "
 721                                 "for --filters%u from %s MiB to %s MiB to not "
 722                                 "exceed the memory usage limit of %s MiB"),
 723                                 lzma_num, i, from_size, to_size, limit_size);
 724         }
 725 #endif
 726
 727         return;
 728 }
 729
 730
 731 #ifdef HAVE_DECODERS
 732 /// Return true if the data in in_buf seems to be in the .xz format.
 733 static bool
 734 is_format_xz(void)
 735 {
 736         // Specify the magic as hex to be compatible with EBCDIC systems.
 737         static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 };
 738         return strm.avail_in >= sizeof(magic)
 739                         && memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
 740 }
 741
 742
 743 /// Return true if the data in in_buf seems to be in the .lzma format.
 744 static bool
 745 is_format_lzma(void)
 746 {
 747         // The .lzma header is 13 bytes.
 748         if (strm.avail_in < 13)
 749                 return false;
 750
 751         // Decode the LZMA1 properties.
 752         lzma_filter filter = { .id = LZMA_FILTER_LZMA1 };
 753         if (lzma_properties_decode(&filter, NULL, in_buf.u8, 5) != LZMA_OK)
 754                 return false;
 755
 756         // A hack to ditch tons of false positives: We allow only dictionary
 757         // sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone
 758         // created only files with 2^n, but accepts any dictionary size.
 759         // If someone complains, this will be reconsidered.
 760         lzma_options_lzma *opt = filter.options;
 761         const uint32_t dict_size = opt->dict_size;
 762         free(opt);
 763
 764         if (dict_size != UINT32_MAX) {
 765                 uint32_t d = dict_size - 1;
 766                 d |= d >> 2;
 767                 d |= d >> 3;
 768                 d |= d >> 4;
 769                 d |= d >> 8;
 770                 d |= d >> 16;
 771                 ++d;
 772                 if (d != dict_size || dict_size == 0)
 773                         return false;
 774         }
 775
 776         // Another hack to ditch false positives: Assume that if the
 777         // uncompressed size is known, it must be less than 256 GiB.
 778         // Again, if someone complains, this will be reconsidered.
 779         uint64_t uncompressed_size = 0;
 780         for (size_t i = 0; i < 8; ++i)
 781                 uncompressed_size |= (uint64_t)(in_buf.u8[5 + i]) << (i * 8);
 782
 783         if (uncompressed_size != UINT64_MAX
 784                         && uncompressed_size > (UINT64_C(1) << 38))
 785                 return false;
 786
 787         return true;
 788 }
 789
 790
 791 #ifdef HAVE_LZIP_DECODER
 792 /// Return true if the data in in_buf seems to be in the .lz format.
 793 static bool
 794 is_format_lzip(void)
 795 {
 796         static const uint8_t magic[4] = { 0x4C, 0x5A, 0x49, 0x50 };
 797         return strm.avail_in >= sizeof(magic)
 798                         && memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
 799 }
 800 #endif
 801 #endif
 802
 803
 804 /// Detect the input file type (for now, this done only when decompressing),
 805 /// and initialize an appropriate coder. Return value indicates if a normal
 806 /// liblzma-based coder was initialized (CODER_INIT_NORMAL), if passthru
 807 /// mode should be used (CODER_INIT_PASSTHRU), or if an error occurred
 808 /// (CODER_INIT_ERROR).
 809 static enum coder_init_ret
 810 coder_init(file_pair *pair)
 811 {
 812         lzma_ret ret = LZMA_PROG_ERROR;
 813
 814         // In most cases if there is input left when coding finishes,
 815         // something has gone wrong. Exceptions are --single-stream
 816         // and decoding .lz files which can contain trailing non-.lz data.
 817         // These will be handled later in this function.
 818         allow_trailing_input = false;
 819
 820         // Set the first filter chain. If the --block-list option is not
 821         // used then use the default filter chain (chains[0]).
 822         // Otherwise, use first filter chain from the block list.
 823         lzma_filter *active_filters = opt_block_list == NULL
 824                         ? chains[0]
 825                         : chains[opt_block_list[0].chain_num];
 826
 827         if (opt_mode == MODE_COMPRESS) {
 828 #ifdef HAVE_ENCODERS
 829                 switch (opt_format) {
 830                 case FORMAT_AUTO:
 831                         // args.c ensures this.
 832                         assert(0);
 833                         break;
 834
 835                 case FORMAT_XZ:
 836 #       ifdef MYTHREAD_ENABLED
 837                         mt_options.filters = active_filters;
 838                         if (hardware_threads_is_mt())
 839                                 ret = lzma_stream_encoder_mt(
 840                                                 &strm, &mt_options);
 841                         else
 842 #       endif
 843                                 ret = lzma_stream_encoder(
 844                                                 &strm, active_filters, check);
 845                         break;
 846
 847                 case FORMAT_LZMA:
 848                         ret = lzma_alone_encoder(&strm,
 849                                         active_filters[0].options);
 850                         break;
 851
 852 #       ifdef HAVE_LZIP_DECODER
 853                 case FORMAT_LZIP:
 854                         // args.c should disallow this.
 855                         assert(0);
 856                         ret = LZMA_PROG_ERROR;
 857                         break;
 858 #       endif
 859
 860                 case FORMAT_RAW:
 861                         ret = lzma_raw_encoder(&strm, active_filters);
 862                         break;
 863                 }
 864 #endif
 865         } else {
 866 #ifdef HAVE_DECODERS
 867                 uint32_t flags = 0;
 868
 869                 // It seems silly to warn about unsupported check if the
 870                 // check won't be verified anyway due to --ignore-check.
 871                 if (opt_ignore_check)
 872                         flags |= LZMA_IGNORE_CHECK;
 873                 else
 874                         flags |= LZMA_TELL_UNSUPPORTED_CHECK;
 875
 876                 if (opt_single_stream)
 877                         allow_trailing_input = true;
 878                 else
 879                         flags |= LZMA_CONCATENATED;
 880
 881                 // We abuse FORMAT_AUTO to indicate unknown file format,
 882                 // for which we may consider passthru mode.
 883                 enum format_type init_format = FORMAT_AUTO;
 884
 885                 switch (opt_format) {
 886                 case FORMAT_AUTO:
 887                         // .lz is checked before .lzma since .lzma detection
 888                         // is more complicated (no magic bytes).
 889                         if (is_format_xz())
 890                                 init_format = FORMAT_XZ;
 891 #       ifdef HAVE_LZIP_DECODER
 892                         else if (is_format_lzip())
 893                                 init_format = FORMAT_LZIP;
 894 #       endif
 895                         else if (is_format_lzma())
 896                                 init_format = FORMAT_LZMA;
 897                         break;
 898
 899                 case FORMAT_XZ:
 900                         if (is_format_xz())
 901                                 init_format = FORMAT_XZ;
 902                         break;
 903
 904                 case FORMAT_LZMA:
 905                         if (is_format_lzma())
 906                                 init_format = FORMAT_LZMA;
 907                         break;
 908
 909 #       ifdef HAVE_LZIP_DECODER
 910                 case FORMAT_LZIP:
 911                         if (is_format_lzip())
 912                                 init_format = FORMAT_LZIP;
 913                         break;
 914 #       endif
 915
 916                 case FORMAT_RAW:
 917                         init_format = FORMAT_RAW;
 918                         break;
 919                 }
 920
 921                 switch (init_format) {
 922                 case FORMAT_AUTO:
 923                         // Unknown file format. If --decompress --stdout
 924                         // --force have been given, then we copy the input
 925                         // as is to stdout. Checking for MODE_DECOMPRESS
 926                         // is needed, because we don't want to do use
 927                         // passthru mode with --test.
 928                         if (opt_mode == MODE_DECOMPRESS
 929                                         && opt_stdout && opt_force) {
 930                                 // These are needed for progress info.
 931                                 strm.total_in = 0;
 932                                 strm.total_out = 0;
 933                                 return CODER_INIT_PASSTHRU;
 934                         }
 935
 936                         ret = LZMA_FORMAT_ERROR;
 937                         break;
 938
 939                 case FORMAT_XZ:
 940 #       ifdef MYTHREAD_ENABLED
 941                         mt_options.flags = flags;
 942
 943                         mt_options.threads = hardware_threads_get();
 944                         mt_options.memlimit_stop
 945                                 = hardware_memlimit_get(MODE_DECOMPRESS);
 946
 947                         // If single-threaded mode was requested, set the
 948                         // memlimit for threading to zero. This forces the
 949                         // decoder to use single-threaded mode which matches
 950                         // the behavior of lzma_stream_decoder().
 951                         //
 952                         // Otherwise use the limit for threaded decompression
 953                         // which has a sane default (users are still free to
 954                         // make it insanely high though).
 955                         mt_options.memlimit_threading
 956                                         = mt_options.threads == 1
 957                                         ? 0 : hardware_memlimit_mtdec_get();
 958
 959                         ret = lzma_stream_decoder_mt(&strm, &mt_options);
 960 #       else
 961                         ret = lzma_stream_decoder(&strm,
 962                                         hardware_memlimit_get(
 963                                                 MODE_DECOMPRESS), flags);
 964 #       endif
 965                         break;
 966
 967                 case FORMAT_LZMA:
 968                         ret = lzma_alone_decoder(&strm,
 969                                         hardware_memlimit_get(
 970                                                 MODE_DECOMPRESS));
 971                         break;
 972
 973 #       ifdef HAVE_LZIP_DECODER
 974                 case FORMAT_LZIP:
 975                         allow_trailing_input = true;
 976                         ret = lzma_lzip_decoder(&strm,
 977                                         hardware_memlimit_get(
 978                                                 MODE_DECOMPRESS), flags);
 979                         break;
 980 #       endif
 981
 982                 case FORMAT_RAW:
 983                         // Memory usage has already been checked in
 984                         // coder_set_compression_settings().
 985                         ret = lzma_raw_decoder(&strm, active_filters);
 986                         break;
 987                 }
 988
 989                 // Try to decode the headers. This will catch too low
 990                 // memory usage limit in case it happens in the first
 991                 // Block of the first Stream, which is where it very
 992                 // probably will happen if it is going to happen.
 993                 //
 994                 // This will also catch unsupported check type which
 995                 // we treat as a warning only. If there are empty
 996                 // concatenated Streams with unsupported check type then
 997                 // the message can be shown more than once here. The loop
 998                 // is used in case there is first a warning about
 999                 // unsupported check type and then the first Block
1000                 // would exceed the memlimit.
1001                 if (ret == LZMA_OK && init_format != FORMAT_RAW) {
1002                         strm.next_out = NULL;
1003                         strm.avail_out = 0;
1004                         while ((ret = lzma_code(&strm, LZMA_RUN))
1005                                         == LZMA_UNSUPPORTED_CHECK)
1006                                 message_warning(_("%s: %s"), pair->src_name,
1007                                                 message_strm(ret));
1008
1009                         // With --single-stream lzma_code won't wait for
1010                         // LZMA_FINISH and thus it can return LZMA_STREAM_END
1011                         // if the file has no uncompressed data inside.
1012                         // So treat LZMA_STREAM_END as LZMA_OK here.
1013                         // When lzma_code() is called again in coder_normal()
1014                         // it will return LZMA_STREAM_END again.
1015                         if (ret == LZMA_STREAM_END)
1016                                 ret = LZMA_OK;
1017                 }
1018 #endif
1019         }
1020
1021         if (ret != LZMA_OK) {
1022                 message_error(_("%s: %s"), pair->src_name, message_strm(ret));
1023                 if (ret == LZMA_MEMLIMIT_ERROR)
1024                         message_mem_needed(V_ERROR, lzma_memusage(&strm));
1025
1026                 return CODER_INIT_ERROR;
1027         }
1028
1029         return CODER_INIT_NORMAL;
1030 }
1031
1032
1033 #ifdef HAVE_ENCODERS
1034 /// Resolve conflicts between opt_block_size and opt_block_list in single
1035 /// threaded mode. We want to default to opt_block_list, except when it is
1036 /// larger than opt_block_size. If this is the case for the current Block
1037 /// at *list_pos, then we break into smaller Blocks. Otherwise advance
1038 /// to the next Block in opt_block_list, and break apart if needed.
1039 static void
1040 split_block(uint64_t *block_remaining,
1041             uint64_t *next_block_remaining,
1042             size_t *list_pos)
1043 {
1044         if (*next_block_remaining > 0) {
1045                 // The Block at *list_pos has previously been split up.
1046                 assert(!hardware_threads_is_mt());
1047                 assert(opt_block_size > 0);
1048                 assert(opt_block_list != NULL);
1049
1050                 if (*next_block_remaining > opt_block_size) {
1051                         // We have to split the current Block at *list_pos
1052                         // into another opt_block_size length Block.
1053                         *block_remaining = opt_block_size;
1054                 } else {
1055                         // This is the last remaining split Block for the
1056                         // Block at *list_pos.
1057                         *block_remaining = *next_block_remaining;
1058                 }
1059
1060                 *next_block_remaining -= *block_remaining;
1061
1062         } else {
1063                 // The Block at *list_pos has been finished. Go to the next
1064                 // entry in the list. If the end of the list has been
1065                 // reached, reuse the size and filters of the last Block.
1066                 if (opt_block_list[*list_pos + 1].size != 0) {
1067                         ++*list_pos;
1068
1069                         // Update the filters if needed.
1070                         if (opt_block_list[*list_pos - 1].chain_num
1071                                 != opt_block_list[*list_pos].chain_num) {
1072                                 const unsigned chain_num
1073                                         = opt_block_list[*list_pos].chain_num;
1074                                 const lzma_filter *next = chains[chain_num];
1075                                 const lzma_ret ret = lzma_filters_update(
1076                                                 &strm, next);
1077
1078                                 if (ret != LZMA_OK) {
1079                                         // This message is only possible if
1080                                         // the filter chain has unsupported
1081                                         // options since the filter chain is
1082                                         // validated using
1083                                         // lzma_raw_encoder_memusage() or
1084                                         // lzma_stream_encoder_mt_memusage().
1085                                         // Some options are not validated until
1086                                         // the encoders are initialized.
1087                                         message_fatal(
1088                                                 _("Error changing to "
1089                                                 "filter chain %u: %s"),
1090                                                 chain_num,
1091                                                 message_strm(ret));
1092                                 }
1093                         }
1094                 }
1095
1096                 *block_remaining = opt_block_list[*list_pos].size;
1097
1098                 // If in single-threaded mode, split up the Block if needed.
1099                 // This is not needed in multi-threaded mode because liblzma
1100                 // will do this due to how threaded encoding works.
1101                 if (!hardware_threads_is_mt() && opt_block_size > 0
1102                                 && *block_remaining > opt_block_size) {
1103                         *next_block_remaining
1104                                         = *block_remaining - opt_block_size;
1105                         *block_remaining = opt_block_size;
1106                 }
1107         }
1108 }
1109 #endif
1110
1111
1112 static bool
1113 coder_write_output(file_pair *pair)
1114 {
1115         if (opt_mode != MODE_TEST) {
1116                 if (io_write(pair, &out_buf, IO_BUFFER_SIZE - strm.avail_out))
1117                         return true;
1118         }
1119
1120         strm.next_out = out_buf.u8;
1121         strm.avail_out = IO_BUFFER_SIZE;
1122         return false;
1123 }
1124
1125
1126 /// Compress or decompress using liblzma.
1127 static bool
1128 coder_normal(file_pair *pair)
1129 {
1130         // Encoder needs to know when we have given all the input to it.
1131         // The decoders need to know it too when we are using
1132         // LZMA_CONCATENATED. We need to check for src_eof here, because
1133         // the first input chunk has been already read if decompressing,
1134         // and that may have been the only chunk we will read.
1135         lzma_action action = pair->src_eof ? LZMA_FINISH : LZMA_RUN;
1136
1137         lzma_ret ret;
1138
1139         // Assume that something goes wrong.
1140         bool success = false;
1141
1142 #ifdef HAVE_ENCODERS
1143         // block_remaining indicates how many input bytes to encode before
1144         // finishing the current .xz Block. The Block size is set with
1145         // --block-size=SIZE and --block-list. They have an effect only when
1146         // compressing to the .xz format. If block_remaining == UINT64_MAX,
1147         // only a single block is created.
1148         uint64_t block_remaining = UINT64_MAX;
1149
1150         // next_block_remaining for when we are in single-threaded mode and
1151         // the Block in --block-list is larger than the --block-size=SIZE.
1152         uint64_t next_block_remaining = 0;
1153
1154         // Position in opt_block_list. Unused if --block-list wasn't used.
1155         size_t list_pos = 0;
1156
1157         // Handle --block-size for single-threaded mode and the first step
1158         // of --block-list.
1159         if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_XZ) {
1160                 // --block-size doesn't do anything here in threaded mode,
1161                 // because the threaded encoder will take care of splitting
1162                 // to fixed-sized Blocks.
1163                 if (!hardware_threads_is_mt() && opt_block_size > 0)
1164                         block_remaining = opt_block_size;
1165
1166                 // If --block-list was used, start with the first size.
1167                 //
1168                 // For threaded case, --block-size specifies how big Blocks
1169                 // the encoder needs to be prepared to create at maximum
1170                 // and --block-list will simultaneously cause new Blocks
1171                 // to be started at specified intervals. To keep things
1172                 // logical, the same is done in single-threaded mode. The
1173                 // output is still not identical because in single-threaded
1174                 // mode the size info isn't written into Block Headers.
1175                 if (opt_block_list != NULL) {
1176                         if (block_remaining < opt_block_list[list_pos].size) {
1177                                 assert(!hardware_threads_is_mt());
1178                                 next_block_remaining =
1179                                                 opt_block_list[list_pos].size
1180                                                 - block_remaining;
1181                         } else {
1182                                 block_remaining =
1183                                                 opt_block_list[list_pos].size;
1184                         }
1185                 }
1186         }
1187 #endif
1188
1189         strm.next_out = out_buf.u8;
1190         strm.avail_out = IO_BUFFER_SIZE;
1191
1192         while (!user_abort) {
1193                 // Fill the input buffer if it is empty and we aren't
1194                 // flushing or finishing.
1195                 if (strm.avail_in == 0 && action == LZMA_RUN) {
1196                         strm.next_in = in_buf.u8;
1197 #ifdef HAVE_ENCODERS
1198                         const size_t read_size = my_min(block_remaining,
1199                                         IO_BUFFER_SIZE);
1200 #else
1201                         const size_t read_size = IO_BUFFER_SIZE;
1202 #endif
1203                         strm.avail_in = io_read(pair, &in_buf, read_size);
1204
1205                         if (strm.avail_in == SIZE_MAX)
1206                                 break;
1207
1208                         if (pair->src_eof) {
1209                                 action = LZMA_FINISH;
1210                         }
1211 #ifdef HAVE_ENCODERS
1212                         else if (block_remaining != UINT64_MAX) {
1213                                 // Start a new Block after every
1214                                 // opt_block_size bytes of input.
1215                                 block_remaining -= strm.avail_in;
1216                                 if (block_remaining == 0)
1217                                         action = LZMA_FULL_BARRIER;
1218                         }
1219
1220                         if (action == LZMA_RUN && pair->flush_needed)
1221                                 action = LZMA_SYNC_FLUSH;
1222 #endif
1223                 }
1224
1225                 // Let liblzma do the actual work.
1226                 ret = lzma_code(&strm, action);
1227
1228                 // Write out if the output buffer became full.
1229                 if (strm.avail_out == 0) {
1230                         if (coder_write_output(pair))
1231                                 break;
1232                 }
1233
1234 #ifdef HAVE_ENCODERS
1235                 if (ret == LZMA_STREAM_END && (action == LZMA_SYNC_FLUSH
1236                                 || action == LZMA_FULL_BARRIER)) {
1237                         if (action == LZMA_SYNC_FLUSH) {
1238                                 // Flushing completed. Write the pending data
1239                                 // out immediately so that the reading side
1240                                 // can decompress everything compressed so far.
1241                                 if (coder_write_output(pair))
1242                                         break;
1243
1244                                 // Mark that we haven't seen any new input
1245                                 // since the previous flush.
1246                                 pair->src_has_seen_input = false;
1247                                 pair->flush_needed = false;
1248                         } else {
1249                                 // Start a new Block after LZMA_FULL_BARRIER.
1250                                 if (opt_block_list == NULL) {
1251                                         assert(!hardware_threads_is_mt());
1252                                         assert(opt_block_size > 0);
1253                                         block_remaining = opt_block_size;
1254                                 } else {
1255                                         split_block(&block_remaining,
1256                                                         &next_block_remaining,
1257                                                         &list_pos);
1258                                 }
1259                         }
1260
1261                         // Start a new Block after LZMA_FULL_FLUSH or continue
1262                         // the same block after LZMA_SYNC_FLUSH.
1263                         action = LZMA_RUN;
1264                 } else
1265 #endif
1266                 if (ret != LZMA_OK) {
1267                         // Determine if the return value indicates that we
1268                         // won't continue coding. LZMA_NO_CHECK would be
1269                         // here too if LZMA_TELL_ANY_CHECK was used.
1270                         const bool stop = ret != LZMA_UNSUPPORTED_CHECK;
1271
1272                         if (stop) {
1273                                 // Write the remaining bytes even if something
1274                                 // went wrong, because that way the user gets
1275                                 // as much data as possible, which can be good
1276                                 // when trying to get at least some useful
1277                                 // data out of damaged files.
1278                                 if (coder_write_output(pair))
1279                                         break;
1280                         }
1281
1282                         if (ret == LZMA_STREAM_END) {
1283                                 if (allow_trailing_input) {
1284                                         io_fix_src_pos(pair, strm.avail_in);
1285                                         success = true;
1286                                         break;
1287                                 }
1288
1289                                 // Check that there is no trailing garbage.
1290                                 // This is needed for LZMA_Alone and raw
1291                                 // streams. This is *not* done with .lz files
1292                                 // as that format specifically requires
1293                                 // allowing trailing garbage.
1294                                 if (strm.avail_in == 0 && !pair->src_eof) {
1295                                         // Try reading one more byte.
1296                                         // Hopefully we don't get any more
1297                                         // input, and thus pair->src_eof
1298                                         // becomes true.
1299                                         strm.avail_in = io_read(
1300                                                         pair, &in_buf, 1);
1301                                         if (strm.avail_in == SIZE_MAX)
1302                                                 break;
1303
1304                                         assert(strm.avail_in == 0
1305                                                         || strm.avail_in == 1);
1306                                 }
1307
1308                                 if (strm.avail_in == 0) {
1309                                         assert(pair->src_eof);
1310                                         success = true;
1311                                         break;
1312                                 }
1313
1314                                 // We hadn't reached the end of the file.
1315                                 ret = LZMA_DATA_ERROR;
1316                                 assert(stop);
1317                         }
1318
1319                         // If we get here and stop is true, something went
1320                         // wrong and we print an error. Otherwise it's just
1321                         // a warning and coding can continue.
1322                         if (stop) {
1323                                 message_error(_("%s: %s"), pair->src_name,
1324                                                 message_strm(ret));
1325                         } else {
1326                                 message_warning(_("%s: %s"), pair->src_name,
1327                                                 message_strm(ret));
1328
1329                                 // When compressing, all possible errors set
1330                                 // stop to true.
1331                                 assert(opt_mode != MODE_COMPRESS);
1332                         }
1333
1334                         if (ret == LZMA_MEMLIMIT_ERROR) {
1335                                 // Display how much memory it would have
1336                                 // actually needed.
1337                                 message_mem_needed(V_ERROR,
1338                                                 lzma_memusage(&strm));
1339                         }
1340
1341                         if (stop)
1342                                 break;
1343                 }
1344
1345                 // Show progress information under certain conditions.
1346                 message_progress_update();
1347         }
1348
1349         return success;
1350 }
1351
1352
1353 /// Copy from input file to output file without processing the data in any
1354 /// way. This is used only when trying to decompress unrecognized files
1355 /// with --decompress --stdout --force, so the output is always stdout.
1356 static bool
1357 coder_passthru(file_pair *pair)
1358 {
1359         while (strm.avail_in != 0) {
1360                 if (user_abort)
1361                         return false;
1362
1363                 if (io_write(pair, &in_buf, strm.avail_in))
1364                         return false;
1365
1366                 strm.total_in += strm.avail_in;
1367                 strm.total_out = strm.total_in;
1368                 message_progress_update();
1369
1370                 strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
1371                 if (strm.avail_in == SIZE_MAX)
1372                         return false;
1373         }
1374
1375         return true;
1376 }
1377
1378
1379 extern void
1380 coder_run(const char *filename)
1381 {
1382         // Set and possibly print the filename for the progress message.
1383         message_filename(filename);
1384
1385         // Try to open the input file.
1386         file_pair *pair = io_open_src(filename);
1387         if (pair == NULL)
1388                 return;
1389
1390         // Assume that something goes wrong.
1391         bool success = false;
1392
1393         if (opt_mode == MODE_COMPRESS) {
1394                 strm.next_in = NULL;
1395                 strm.avail_in = 0;
1396         } else {
1397                 // Read the first chunk of input data. This is needed
1398                 // to detect the input file type.
1399                 strm.next_in = in_buf.u8;
1400                 strm.avail_in = io_read(pair, &in_buf, IO_BUFFER_SIZE);
1401         }
1402
1403         if (strm.avail_in != SIZE_MAX) {
1404                 // Initialize the coder. This will detect the file format
1405                 // and, in decompression or testing mode, check the memory
1406                 // usage of the first Block too. This way we don't try to
1407                 // open the destination file if we see that coding wouldn't
1408                 // work at all anyway. This also avoids deleting the old
1409                 // "target" file if --force was used.
1410                 const enum coder_init_ret init_ret = coder_init(pair);
1411
1412                 if (init_ret != CODER_INIT_ERROR && !user_abort) {
1413                         // Don't open the destination file when --test
1414                         // is used.
1415                         if (opt_mode == MODE_TEST || !io_open_dest(pair)) {
1416                                 // Remember the current time. It is needed
1417                                 // for progress indicator.
1418                                 mytime_set_start_time();
1419
1420                                 // Initialize the progress indicator.
1421                                 //
1422                                 // NOTE: When reading from stdin, fstat()
1423                                 // isn't called on it and thus src_st.st_size
1424                                 // is zero. If stdin pointed to a regular
1425                                 // file, it would still be possible to know
1426                                 // the file size but then we would also need
1427                                 // to take into account the current reading
1428                                 // position since with stdin it isn't
1429                                 // necessarily at the beginning of the file.
1430                                 const bool is_passthru = init_ret
1431                                                 == CODER_INIT_PASSTHRU;
1432                                 const uint64_t in_size
1433                                         = pair->src_st.st_size <= 0
1434                                         ? 0 : (uint64_t)(pair->src_st.st_size);
1435                                 message_progress_start(&strm,
1436                                                 is_passthru, in_size);
1437
1438                                 // Do the actual coding or passthru.
1439                                 if (is_passthru)
1440                                         success = coder_passthru(pair);
1441                                 else
1442                                         success = coder_normal(pair);
1443
1444                                 message_progress_end(success);
1445                         }
1446                 }
1447         }
1448
1449         // Close the file pair. It needs to know if coding was successful to
1450         // know if the source or target file should be unlinked.
1451         io_close(pair, success);
1452
1453         return;
1454 }
1455
1456
1457 #ifndef NDEBUG
1458 extern void
1459 coder_free(void)
1460 {
1461         // Free starting from the second filter chain since the default
1462         // filter chain may have its options set from a static variable
1463         // in coder_set_compression_settings(). Since this is only run in
1464         // debug mode and will be freed when the process ends anyway, we
1465         // don't worry about freeing it.
1466         for (uint32_t i = 1; i < ARRAY_SIZE(chains); i++) {
1467                 if (chains_used_mask & (1U << i))
1468                         lzma_filters_free(chains[i], NULL);
1469         }
1470
1471         lzma_end(&strm);
1472         return;
1473 }
1474 #endif