1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Argument parsing
6 /// \note Filter-specific options parsing is in options.c.
8 // Authors: Lasse Collin
11 // This file has been put into the public domain.
12 // You can do whatever you want with this file.
14 ///////////////////////////////////////////////////////////////////////////////
22 bool opt_stdout
= false;
23 bool opt_force
= false;
24 bool opt_keep_original
= false;
25 bool opt_robot
= false;
26 bool opt_ignore_check
= false;
28 // We don't modify or free() this, but we need to assign it in some
29 // non-const pointers.
30 const char stdin_filename
[] = "(stdin)";
33 /// Parse and set the memory usage limit for compression, decompression,
34 /// and/or multithreaded decompression.
36 parse_memlimit(const char *name
, const char *name_percentage
, const char *str
,
37 bool set_compress
, bool set_decompress
, bool set_mtdec
)
39 bool is_percentage
= false;
42 const size_t len
= strlen(str
);
43 if (len
> 0 && str
[len
- 1] == '%') {
44 // Make a copy so that we can get rid of %.
46 // In the past str wasn't const and we modified it directly
47 // but that modified argv[] and thus affected what was visible
48 // in "ps auxf" or similar tools which was confusing. For
49 // example, --memlimit=50% would show up as --memlimit=50
50 // since the percent sign was overwritten here.
51 char *s
= xstrdup(str
);
54 value
= str_to_uint64(name_percentage
, s
, 1, 100);
57 // On 32-bit systems, SIZE_MAX would make more sense than
58 // UINT64_MAX. But use UINT64_MAX still so that scripts
59 // that assume > 4 GiB values don't break.
60 value
= str_to_uint64(name
, str
, 0, UINT64_MAX
);
63 hardware_memlimit_set(value
, set_compress
, set_decompress
, set_mtdec
,
70 parse_block_list(const char *str_const
)
72 // We need a modifiable string in the for-loop.
73 char *str_start
= xstrdup(str_const
);
74 char *str
= str_start
;
76 // It must be non-empty and not begin with a comma.
77 if (str
[0] == '\0' || str
[0] == ',')
78 message_fatal(_("%s: Invalid argument to --block-list"), str
);
80 // Count the number of comma-separated strings.
82 for (size_t i
= 0; str
[i
] != '\0'; ++i
)
86 // Prevent an unlikely integer overflow.
87 if (count
> SIZE_MAX
/ sizeof(block_list_entry
) - 1)
88 message_fatal(_("%s: Too many arguments to --block-list"),
91 // Allocate memory to hold all the sizes specified.
92 // If --block-list was specified already, its value is forgotten.
94 opt_block_list
= xmalloc((count
+ 1) * sizeof(block_list_entry
));
96 for (size_t i
= 0; i
< count
; ++i
) {
97 // Locate the next comma and replace it with \0.
98 char *p
= strchr(str
, ',');
102 // Use the default filter chain unless overridden.
103 opt_block_list
[i
].filters_index
= 0;
105 // To specify a filter chain, the block list entry may be
106 // prepended with "[filter-chain-number]:". The size is
107 // still required for every block.
109 // --block-list=2:10MiB,1:5MiB,,8MiB,0:0
112 // 1. Block of 10 MiB using filter chain 2
113 // 2. Block of 5 MiB using filter chain 1
114 // 3. Block of 5 MiB using filter chain 1
115 // 4. Block of 8 MiB using the default filter chain
116 // 5. The last block uses the default filter chain
119 // --block-list=2:MiB,1:,0
121 // Is not allowed because the second block does not specify
122 // the block size, only the filter chain.
123 if (str
[0] >= '0' && str
[0] <= '9' && str
[1] == ':') {
125 message_fatal(_("In --block-list, block "
126 "size is missing after "
127 "filter chain number '%c:'"),
130 int filter_num
= str
[0] - '0';
131 opt_block_list
[i
].filters_index
=
132 (uint32_t)filter_num
;
136 if (str
[0] == '\0') {
137 // There is no string, that is, a comma follows
138 // another comma. Use the previous value.
140 // NOTE: We checked earlier that the first char
141 // of the whole list cannot be a comma.
143 opt_block_list
[i
] = opt_block_list
[i
- 1];
145 opt_block_list
[i
].size
= str_to_uint64("block-list",
148 // Zero indicates no more new Blocks.
149 if (opt_block_list
[i
].size
== 0) {
151 message_fatal(_("0 can only be used "
152 "as the last element "
155 opt_block_list
[i
].size
= UINT64_MAX
;
162 // Terminate the array.
163 opt_block_list
[count
].size
= 0;
171 parse_real(args_info
*args
, int argc
, char **argv
)
174 OPT_FILTERS
= INT_MIN
,
206 OPT_MEM_MT_DECOMPRESS
,
214 static const char short_opts
[]
215 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
217 static const struct option long_opts
[] = {
219 { "compress", no_argument
, NULL
, 'z' },
220 { "decompress", no_argument
, NULL
, 'd' },
221 { "uncompress", no_argument
, NULL
, 'd' },
222 { "test", no_argument
, NULL
, 't' },
223 { "list", no_argument
, NULL
, 'l' },
225 // Operation modifiers
226 { "keep", no_argument
, NULL
, 'k' },
227 { "force", no_argument
, NULL
, 'f' },
228 { "stdout", no_argument
, NULL
, 'c' },
229 { "to-stdout", no_argument
, NULL
, 'c' },
230 { "single-stream", no_argument
, NULL
, OPT_SINGLE_STREAM
},
231 { "no-sparse", no_argument
, NULL
, OPT_NO_SPARSE
},
232 { "suffix", required_argument
, NULL
, 'S' },
233 // { "recursive", no_argument, NULL, 'r' }, // TODO
234 { "files", optional_argument
, NULL
, OPT_FILES
},
235 { "files0", optional_argument
, NULL
, OPT_FILES0
},
237 // Basic compression settings
238 { "format", required_argument
, NULL
, 'F' },
239 { "check", required_argument
, NULL
, 'C' },
240 { "ignore-check", no_argument
, NULL
, OPT_IGNORE_CHECK
},
241 { "block-size", required_argument
, NULL
, OPT_BLOCK_SIZE
},
242 { "block-list", required_argument
, NULL
, OPT_BLOCK_LIST
},
243 { "memlimit-compress", required_argument
, NULL
, OPT_MEM_COMPRESS
},
244 { "memlimit-decompress", required_argument
, NULL
, OPT_MEM_DECOMPRESS
},
245 { "memlimit-mt-decompress", required_argument
, NULL
, OPT_MEM_MT_DECOMPRESS
},
246 { "memlimit", required_argument
, NULL
, 'M' },
247 { "memory", required_argument
, NULL
, 'M' }, // Old alias
248 { "no-adjust", no_argument
, NULL
, OPT_NO_ADJUST
},
249 { "threads", required_argument
, NULL
, 'T' },
250 { "flush-timeout", required_argument
, NULL
, OPT_FLUSH_TIMEOUT
},
252 { "extreme", no_argument
, NULL
, 'e' },
253 { "fast", no_argument
, NULL
, '0' },
254 { "best", no_argument
, NULL
, '9' },
257 { "filters", optional_argument
, NULL
, OPT_FILTERS
},
258 { "filters1", optional_argument
, NULL
, OPT_FILTERS1
},
259 { "filters2", optional_argument
, NULL
, OPT_FILTERS2
},
260 { "filters3", optional_argument
, NULL
, OPT_FILTERS3
},
261 { "filters4", optional_argument
, NULL
, OPT_FILTERS4
},
262 { "filters5", optional_argument
, NULL
, OPT_FILTERS5
},
263 { "filters6", optional_argument
, NULL
, OPT_FILTERS6
},
264 { "filters7", optional_argument
, NULL
, OPT_FILTERS7
},
265 { "filters8", optional_argument
, NULL
, OPT_FILTERS8
},
266 { "filters9", optional_argument
, NULL
, OPT_FILTERS9
},
267 { "filters-help", optional_argument
, NULL
, OPT_FILTERS_HELP
},
269 { "lzma1", optional_argument
, NULL
, OPT_LZMA1
},
270 { "lzma2", optional_argument
, NULL
, OPT_LZMA2
},
271 { "x86", optional_argument
, NULL
, OPT_X86
},
272 { "powerpc", optional_argument
, NULL
, OPT_POWERPC
},
273 { "ia64", optional_argument
, NULL
, OPT_IA64
},
274 { "arm", optional_argument
, NULL
, OPT_ARM
},
275 { "armthumb", optional_argument
, NULL
, OPT_ARMTHUMB
},
276 { "arm64", optional_argument
, NULL
, OPT_ARM64
},
277 { "sparc", optional_argument
, NULL
, OPT_SPARC
},
278 { "riscv", optional_argument
, NULL
, OPT_RISCV
},
279 { "delta", optional_argument
, NULL
, OPT_DELTA
},
282 { "quiet", no_argument
, NULL
, 'q' },
283 { "verbose", no_argument
, NULL
, 'v' },
284 { "no-warn", no_argument
, NULL
, 'Q' },
285 { "robot", no_argument
, NULL
, OPT_ROBOT
},
286 { "info-memory", no_argument
, NULL
, OPT_INFO_MEMORY
},
287 { "help", no_argument
, NULL
, 'h' },
288 { "long-help", no_argument
, NULL
, 'H' },
289 { "version", no_argument
, NULL
, 'V' },
296 while ((c
= getopt_long(argc
, argv
, short_opts
, long_opts
, NULL
))
299 // Compression preset (also for decompression if --format=raw)
300 case '0': case '1': case '2': case '3': case '4':
301 case '5': case '6': case '7': case '8': case '9':
302 coder_set_preset((uint32_t)(c
- '0'));
305 // --memlimit-compress
306 case OPT_MEM_COMPRESS
:
307 parse_memlimit("memlimit-compress",
308 "memlimit-compress%", optarg
,
312 // --memlimit-decompress
313 case OPT_MEM_DECOMPRESS
:
314 parse_memlimit("memlimit-decompress",
315 "memlimit-decompress%", optarg
,
319 // --memlimit-mt-decompress
320 case OPT_MEM_MT_DECOMPRESS
:
321 parse_memlimit("memlimit-mt-decompress",
322 "memlimit-mt-decompress%", optarg
,
328 parse_memlimit("memlimit", "memlimit%", optarg
,
338 // Since xz 5.4.0: Ignore leading '+' first.
339 const char *s
= optarg
;
340 if (optarg
[0] == '+')
343 // The max is from src/liblzma/common/common.h.
344 uint32_t t
= str_to_uint64("threads", s
, 0, 16384);
346 // If leading '+' was used then use multi-threaded
347 // mode even if exactly one thread was specified.
348 if (t
== 1 && optarg
[0] == '+')
351 hardware_threads_set(t
);
357 // This doesn't return.
367 opt_mode
= MODE_DECOMPRESS
;
381 case OPT_INFO_MEMORY
:
382 // This doesn't return.
383 hardware_memlimit_show();
387 // This doesn't return.
392 // This doesn't return.
397 opt_mode
= MODE_LIST
;
402 opt_keep_original
= true;
407 message_verbosity_decrease();
415 opt_mode
= MODE_TEST
;
420 message_verbosity_increase();
427 // This is to make sure that floating point numbers
428 // always have a dot as decimal separator.
429 setlocale(LC_NUMERIC
, "C");
433 opt_mode
= MODE_COMPRESS
;
438 coder_add_filters_from_str(optarg
);
441 // --filters1...--filters9
451 coder_add_block_filters(optarg
,
452 (size_t)(c
- OPT_FILTERS
));
456 case OPT_FILTERS_HELP
:
457 // This doesn't return.
458 message_filters_help();
462 coder_add_filter(LZMA_FILTER_X86
,
463 options_bcj(optarg
));
467 coder_add_filter(LZMA_FILTER_POWERPC
,
468 options_bcj(optarg
));
472 coder_add_filter(LZMA_FILTER_IA64
,
473 options_bcj(optarg
));
477 coder_add_filter(LZMA_FILTER_ARM
,
478 options_bcj(optarg
));
482 coder_add_filter(LZMA_FILTER_ARMTHUMB
,
483 options_bcj(optarg
));
487 coder_add_filter(LZMA_FILTER_ARM64
,
488 options_bcj(optarg
));
492 coder_add_filter(LZMA_FILTER_SPARC
,
493 options_bcj(optarg
));
497 coder_add_filter(LZMA_FILTER_RISCV
,
498 options_bcj(optarg
));
502 coder_add_filter(LZMA_FILTER_DELTA
,
503 options_delta(optarg
));
507 coder_add_filter(LZMA_FILTER_LZMA1
,
508 options_lzma(optarg
));
512 coder_add_filter(LZMA_FILTER_LZMA2
,
513 options_lzma(optarg
));
520 // Just in case, support both "lzma" and "alone" since
521 // the latter was used for forward compatibility in
522 // LZMA Utils 4.32.x.
523 static const struct {
525 enum format_type format
;
527 { "auto", FORMAT_AUTO
},
529 { "lzma", FORMAT_LZMA
},
530 { "alone", FORMAT_LZMA
},
531 #ifdef HAVE_LZIP_DECODER
532 { "lzip", FORMAT_LZIP
},
534 { "raw", FORMAT_RAW
},
538 while (strcmp(types
[i
].str
, optarg
) != 0)
539 if (++i
== ARRAY_SIZE(types
))
540 message_fatal(_("%s: Unknown file "
544 opt_format
= types
[i
].format
;
550 static const struct {
554 { "none", LZMA_CHECK_NONE
},
555 { "crc32", LZMA_CHECK_CRC32
},
556 { "crc64", LZMA_CHECK_CRC64
},
557 { "sha256", LZMA_CHECK_SHA256
},
561 while (strcmp(types
[i
].str
, optarg
) != 0) {
562 if (++i
== ARRAY_SIZE(types
))
563 message_fatal(_("%s: Unsupported "
565 "check type"), optarg
);
568 // Use a separate check in case we are using different
569 // liblzma than what was used to compile us.
570 if (!lzma_check_is_supported(types
[i
].check
))
571 message_fatal(_("%s: Unsupported integrity "
572 "check type"), optarg
);
574 coder_set_check(types
[i
].check
);
578 case OPT_IGNORE_CHECK
:
579 opt_ignore_check
= true;
583 opt_block_size
= str_to_uint64("block-size", optarg
,
587 case OPT_BLOCK_LIST
: {
588 parse_block_list(optarg
);
592 case OPT_SINGLE_STREAM
:
593 opt_single_stream
= true;
601 args
->files_delim
= '\n';
606 if (args
->files_name
!= NULL
)
607 message_fatal(_("Only one file can be "
608 "specified with '--files' "
611 if (optarg
== NULL
) {
612 args
->files_name
= stdin_filename
;
613 args
->files_file
= stdin
;
615 args
->files_name
= optarg
;
616 args
->files_file
= fopen(optarg
,
617 c
== OPT_FILES
? "r" : "rb");
618 if (args
->files_file
== NULL
)
619 // TRANSLATORS: This is a translatable
620 // string because French needs a space
621 // before the colon ("%s : %s").
622 message_fatal(_("%s: %s"), optarg
,
629 opt_auto_adjust
= false;
632 case OPT_FLUSH_TIMEOUT
:
633 opt_flush_timeout
= str_to_uint64("flush-timeout",
634 optarg
, 0, UINT64_MAX
);
639 tuklib_exit(E_ERROR
, E_ERROR
, false);
648 parse_environment(args_info
*args
, char *argv0
, const char *varname
)
650 char *env
= getenv(varname
);
654 // We modify the string, so make a copy of it.
657 // Calculate the number of arguments in env. argc stats at one
658 // to include space for the program name.
660 bool prev_was_space
= true;
661 for (size_t i
= 0; env
[i
] != '\0'; ++i
) {
662 // NOTE: Cast to unsigned char is needed so that correct
663 // value gets passed to isspace(), which expects
664 // unsigned char cast to int. Casting to int is done
665 // automatically due to integer promotion, but we need to
666 // force char to unsigned char manually. Otherwise 8-bit
667 // characters would get promoted to wrong value if
669 if (isspace((unsigned char)env
[i
])) {
670 prev_was_space
= true;
671 } else if (prev_was_space
) {
672 prev_was_space
= false;
674 // Keep argc small enough to fit into a signed int
675 // and to keep it usable for memory allocation.
676 if (++argc
== my_min(
677 INT_MAX
, SIZE_MAX
/ sizeof(char *)))
678 message_fatal(_("The environment variable "
679 "%s contains too many "
680 "arguments"), varname
);
684 // Allocate memory to hold pointers to the arguments. Add one to get
685 // space for the terminating NULL (if some systems happen to need it).
686 char **argv
= xmalloc(((size_t)(argc
) + 1) * sizeof(char *));
690 // Go through the string again. Split the arguments using '\0'
691 // characters and add pointers to the resulting strings to argv.
693 prev_was_space
= true;
694 for (size_t i
= 0; env
[i
] != '\0'; ++i
) {
695 if (isspace((unsigned char)env
[i
])) {
696 prev_was_space
= true;
698 } else if (prev_was_space
) {
699 prev_was_space
= false;
700 argv
[argc
++] = env
+ i
;
704 // Parse the argument list we got from the environment. All non-option
705 // arguments i.e. filenames are ignored.
706 parse_real(args
, argc
, argv
);
708 // Reset the state of the getopt_long() so that we can parse the
709 // command line options too. There are two incompatible ways to
720 // We don't need the argument list from environment anymore.
729 args_parse(args_info
*args
, int argc
, char **argv
)
731 // Initialize those parts of *args that we need later.
732 args
->files_name
= NULL
;
733 args
->files_file
= NULL
;
734 args
->files_delim
= '\0';
736 // Check how we were called.
738 // Remove the leading path name, if any.
739 const char *name
= strrchr(argv
[0], '/');
745 // NOTE: It's possible that name[0] is now '\0' if argv[0]
746 // is weird, but it doesn't matter here.
748 // Look for full command names instead of substrings like
749 // "un", "cat", and "lz" to reduce possibility of false
750 // positives when the programs have been renamed.
751 if (strstr(name
, "xzcat") != NULL
) {
752 opt_mode
= MODE_DECOMPRESS
;
754 } else if (strstr(name
, "unxz") != NULL
) {
755 opt_mode
= MODE_DECOMPRESS
;
756 } else if (strstr(name
, "lzcat") != NULL
) {
757 opt_format
= FORMAT_LZMA
;
758 opt_mode
= MODE_DECOMPRESS
;
760 } else if (strstr(name
, "unlzma") != NULL
) {
761 opt_format
= FORMAT_LZMA
;
762 opt_mode
= MODE_DECOMPRESS
;
763 } else if (strstr(name
, "lzma") != NULL
) {
764 opt_format
= FORMAT_LZMA
;
768 // First the flags from the environment
769 parse_environment(args
, argv
[0], "XZ_DEFAULTS");
770 parse_environment(args
, argv
[0], "XZ_OPT");
772 // Then from the command line
773 parse_real(args
, argc
, argv
);
775 // If encoder or decoder support was omitted at build time,
776 // show an error now so that the rest of the code can rely on
777 // that whatever is in opt_mode is also supported.
778 #ifndef HAVE_ENCODERS
779 if (opt_mode
== MODE_COMPRESS
)
780 message_fatal(_("Compression support was disabled "
783 #ifndef HAVE_DECODERS
784 // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS
785 // is the only valid choice.
786 if (opt_mode
!= MODE_COMPRESS
)
787 message_fatal(_("Decompression support was disabled "
791 #ifdef HAVE_LZIP_DECODER
792 if (opt_mode
== MODE_COMPRESS
&& opt_format
== FORMAT_LZIP
)
793 message_fatal(_("Compression of lzip files (.lz) "
794 "is not supported"));
797 // Never remove the source file when the destination is not on disk.
798 // In test mode the data is written nowhere, but setting opt_stdout
799 // will make the rest of the code behave well.
800 if (opt_stdout
|| opt_mode
== MODE_TEST
) {
801 opt_keep_original
= true;
805 // When compressing, if no --format flag was used, or it
806 // was --format=auto, we compress to the .xz format.
807 if (opt_mode
== MODE_COMPRESS
&& opt_format
== FORMAT_AUTO
)
808 opt_format
= FORMAT_XZ
;
810 // Set opt_block_list to NULL if we are not compressing to the .xz
811 // format. This option cannot be used outside of this case, and
812 // simplifies the implementation later.
813 if ((opt_mode
!= MODE_COMPRESS
|| opt_format
!= FORMAT_XZ
)
814 && opt_block_list
!= NULL
) {
815 message(V_WARNING
, _("--block-list is ignored unless "
816 "compressing to the .xz format"));
817 free(opt_block_list
);
818 opt_block_list
= NULL
;
821 // If raw format is used and a custom suffix is not provided,
822 // then only stdout mode can be used when compressing or
824 if (opt_format
== FORMAT_RAW
&& !suffix_is_set() && !opt_stdout
825 && (opt_mode
== MODE_COMPRESS
826 || opt_mode
== MODE_DECOMPRESS
)) {
827 if (args
->files_name
!= NULL
)
828 message_fatal(_("With --format=raw, "
829 "--suffix=.SUF is required "
830 "unless writing to stdout"));
832 // If all of the filenames provided are "-" (more than one
833 // "-" could be specified) or no filenames are provided,
834 // then we are only going to be writing to standard out.
835 for (int i
= optind
; i
< argc
; i
++) {
836 if (strcmp(argv
[i
], "-") != 0)
837 message_fatal(_("With --format=raw, "
838 "--suffix=.SUF is required "
839 "unless writing to stdout"));
843 // Compression settings need to be validated (options themselves and
844 // their memory usage) when compressing to any file format. It has to
845 // be done also when uncompressing raw data, since for raw decoding
846 // the options given on the command line are used to know what kind
847 // of raw data we are supposed to decode.
848 if (opt_mode
== MODE_COMPRESS
|| (opt_format
== FORMAT_RAW
849 && opt_mode
!= MODE_LIST
))
850 coder_set_compression_settings();
852 // If no filenames are given, use stdin.
853 if (argv
[optind
] == NULL
&& args
->files_name
== NULL
) {
854 // We don't modify or free() the "-" constant. The caller
855 // modifies this so don't make the struct itself const.
856 static char *names_stdin
[2] = { (char *)"-", NULL
};
857 args
->arg_names
= names_stdin
;
860 // We got at least one filename from the command line, or
861 // --files or --files0 was specified.
862 args
->arg_names
= argv
+ optind
;
863 args
->arg_count
= (unsigned int)(argc
- optind
);
874 free(opt_block_list
);