1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Argument parsing
6 /// \note Filter-specific options parsing is in options.c.
8 // Authors: Lasse Collin
11 // This file has been put into the public domain.
12 // You can do whatever you want with this file.
14 ///////////////////////////////////////////////////////////////////////////////
22 bool opt_stdout
= false;
23 bool opt_force
= false;
24 bool opt_keep_original
= false;
25 bool opt_robot
= false;
26 bool opt_ignore_check
= false;
28 // We don't modify or free() this, but we need to assign it in some
29 // non-const pointers.
30 const char stdin_filename
[] = "(stdin)";
33 /// Parse and set the memory usage limit for compression, decompression,
34 /// and/or multithreaded decompression.
36 parse_memlimit(const char *name
, const char *name_percentage
, const char *str
,
37 bool set_compress
, bool set_decompress
, bool set_mtdec
)
39 bool is_percentage
= false;
42 const size_t len
= strlen(str
);
43 if (len
> 0 && str
[len
- 1] == '%') {
44 // Make a copy so that we can get rid of %.
46 // In the past str wasn't const and we modified it directly
47 // but that modified argv[] and thus affected what was visible
48 // in "ps auxf" or similar tools which was confusing. For
49 // example, --memlimit=50% would show up as --memlimit=50
50 // since the percent sign was overwritten here.
51 char *s
= xstrdup(str
);
54 value
= str_to_uint64(name_percentage
, s
, 1, 100);
57 // On 32-bit systems, SIZE_MAX would make more sense than
58 // UINT64_MAX. But use UINT64_MAX still so that scripts
59 // that assume > 4 GiB values don't break.
60 value
= str_to_uint64(name
, str
, 0, UINT64_MAX
);
63 hardware_memlimit_set(value
, set_compress
, set_decompress
, set_mtdec
,
70 parse_block_list(const char *str_const
)
72 // We need a modifiable string in the for-loop.
73 char *str_start
= xstrdup(str_const
);
74 char *str
= str_start
;
76 // It must be non-empty and not begin with a comma.
77 if (str
[0] == '\0' || str
[0] == ',')
78 message_fatal(_("%s: Invalid argument to --block-list"), str
);
80 // Count the number of comma-separated strings.
82 for (size_t i
= 0; str
[i
] != '\0'; ++i
)
86 // Prevent an unlikely integer overflow.
87 if (count
> SIZE_MAX
/ sizeof(block_list_entry
) - 1)
88 message_fatal(_("%s: Too many arguments to --block-list"),
91 // Allocate memory to hold all the sizes specified.
92 // If --block-list was specified already, its value is forgotten.
94 opt_block_list
= xmalloc((count
+ 1) * sizeof(block_list_entry
));
96 for (size_t i
= 0; i
< count
; ++i
) {
97 // Locate the next comma and replace it with \0.
98 char *p
= strchr(str
, ',');
102 // Use the default filter chain unless overridden.
103 opt_block_list
[i
].filters_index
= 0;
105 // To specify a filter chain, the block list entry may be
106 // prepended with "[filter-chain-number]:". The size is
107 // still required for every block.
109 // --block-list=2:10MiB,1:5MiB,,8MiB,0:0
112 // 1. Block of 10 MiB using filter chain 2
113 // 2. Block of 5 MiB using filter chain 1
114 // 3. Block of 5 MiB using filter chain 1
115 // 4. Block of 8 MiB using the default filter chain
116 // 5. The last block uses the default filter chain
119 // --block-list=2:MiB,1:,0
121 // Is not allowed because the second block does not specify
122 // the block size, only the filter chain.
123 if (str
[0] >= '0' && str
[0] <= '9' && str
[1] == ':') {
125 message_fatal(_("In --block-list, block "
126 "size is missing after "
127 "filter chain number `%c:'"),
130 int filter_num
= str
[0] - '0';
131 opt_block_list
[i
].filters_index
=
132 (uint32_t)filter_num
;
136 if (str
[0] == '\0') {
137 // There is no string, that is, a comma follows
138 // another comma. Use the previous value.
140 // NOTE: We checked earlier that the first char
141 // of the whole list cannot be a comma.
143 opt_block_list
[i
] = opt_block_list
[i
- 1];
145 opt_block_list
[i
].size
= str_to_uint64("block-list",
148 // Zero indicates no more new Blocks.
149 if (opt_block_list
[i
].size
== 0) {
151 message_fatal(_("0 can only be used "
152 "as the last element "
155 opt_block_list
[i
].size
= UINT64_MAX
;
162 // Terminate the array.
163 opt_block_list
[count
].size
= 0;
171 parse_real(args_info
*args
, int argc
, char **argv
)
174 OPT_FILTERS
= INT_MIN
,
205 OPT_MEM_MT_DECOMPRESS
,
213 static const char short_opts
[]
214 = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
216 static const struct option long_opts
[] = {
218 { "compress", no_argument
, NULL
, 'z' },
219 { "decompress", no_argument
, NULL
, 'd' },
220 { "uncompress", no_argument
, NULL
, 'd' },
221 { "test", no_argument
, NULL
, 't' },
222 { "list", no_argument
, NULL
, 'l' },
224 // Operation modifiers
225 { "keep", no_argument
, NULL
, 'k' },
226 { "force", no_argument
, NULL
, 'f' },
227 { "stdout", no_argument
, NULL
, 'c' },
228 { "to-stdout", no_argument
, NULL
, 'c' },
229 { "single-stream", no_argument
, NULL
, OPT_SINGLE_STREAM
},
230 { "no-sparse", no_argument
, NULL
, OPT_NO_SPARSE
},
231 { "suffix", required_argument
, NULL
, 'S' },
232 // { "recursive", no_argument, NULL, 'r' }, // TODO
233 { "files", optional_argument
, NULL
, OPT_FILES
},
234 { "files0", optional_argument
, NULL
, OPT_FILES0
},
236 // Basic compression settings
237 { "format", required_argument
, NULL
, 'F' },
238 { "check", required_argument
, NULL
, 'C' },
239 { "ignore-check", no_argument
, NULL
, OPT_IGNORE_CHECK
},
240 { "block-size", required_argument
, NULL
, OPT_BLOCK_SIZE
},
241 { "block-list", required_argument
, NULL
, OPT_BLOCK_LIST
},
242 { "memlimit-compress", required_argument
, NULL
, OPT_MEM_COMPRESS
},
243 { "memlimit-decompress", required_argument
, NULL
, OPT_MEM_DECOMPRESS
},
244 { "memlimit-mt-decompress", required_argument
, NULL
, OPT_MEM_MT_DECOMPRESS
},
245 { "memlimit", required_argument
, NULL
, 'M' },
246 { "memory", required_argument
, NULL
, 'M' }, // Old alias
247 { "no-adjust", no_argument
, NULL
, OPT_NO_ADJUST
},
248 { "threads", required_argument
, NULL
, 'T' },
249 { "flush-timeout", required_argument
, NULL
, OPT_FLUSH_TIMEOUT
},
251 { "extreme", no_argument
, NULL
, 'e' },
252 { "fast", no_argument
, NULL
, '0' },
253 { "best", no_argument
, NULL
, '9' },
256 { "filters", optional_argument
, NULL
, OPT_FILTERS
},
257 { "filters1", optional_argument
, NULL
, OPT_FILTERS1
},
258 { "filters2", optional_argument
, NULL
, OPT_FILTERS2
},
259 { "filters3", optional_argument
, NULL
, OPT_FILTERS3
},
260 { "filters4", optional_argument
, NULL
, OPT_FILTERS4
},
261 { "filters5", optional_argument
, NULL
, OPT_FILTERS5
},
262 { "filters6", optional_argument
, NULL
, OPT_FILTERS6
},
263 { "filters7", optional_argument
, NULL
, OPT_FILTERS7
},
264 { "filters8", optional_argument
, NULL
, OPT_FILTERS8
},
265 { "filters9", optional_argument
, NULL
, OPT_FILTERS9
},
266 { "filters-help", optional_argument
, NULL
, OPT_FILTERS_HELP
},
268 { "lzma1", optional_argument
, NULL
, OPT_LZMA1
},
269 { "lzma2", optional_argument
, NULL
, OPT_LZMA2
},
270 { "x86", optional_argument
, NULL
, OPT_X86
},
271 { "powerpc", optional_argument
, NULL
, OPT_POWERPC
},
272 { "ia64", optional_argument
, NULL
, OPT_IA64
},
273 { "arm", optional_argument
, NULL
, OPT_ARM
},
274 { "armthumb", optional_argument
, NULL
, OPT_ARMTHUMB
},
275 { "arm64", optional_argument
, NULL
, OPT_ARM64
},
276 { "sparc", optional_argument
, NULL
, OPT_SPARC
},
277 { "delta", optional_argument
, NULL
, OPT_DELTA
},
280 { "quiet", no_argument
, NULL
, 'q' },
281 { "verbose", no_argument
, NULL
, 'v' },
282 { "no-warn", no_argument
, NULL
, 'Q' },
283 { "robot", no_argument
, NULL
, OPT_ROBOT
},
284 { "info-memory", no_argument
, NULL
, OPT_INFO_MEMORY
},
285 { "help", no_argument
, NULL
, 'h' },
286 { "long-help", no_argument
, NULL
, 'H' },
287 { "version", no_argument
, NULL
, 'V' },
294 while ((c
= getopt_long(argc
, argv
, short_opts
, long_opts
, NULL
))
297 // Compression preset (also for decompression if --format=raw)
298 case '0': case '1': case '2': case '3': case '4':
299 case '5': case '6': case '7': case '8': case '9':
300 coder_set_preset((uint32_t)(c
- '0'));
303 // --memlimit-compress
304 case OPT_MEM_COMPRESS
:
305 parse_memlimit("memlimit-compress",
306 "memlimit-compress%", optarg
,
310 // --memlimit-decompress
311 case OPT_MEM_DECOMPRESS
:
312 parse_memlimit("memlimit-decompress",
313 "memlimit-decompress%", optarg
,
317 // --memlimit-mt-decompress
318 case OPT_MEM_MT_DECOMPRESS
:
319 parse_memlimit("memlimit-mt-decompress",
320 "memlimit-mt-decompress%", optarg
,
326 parse_memlimit("memlimit", "memlimit%", optarg
,
336 // Since xz 5.4.0: Ignore leading '+' first.
337 const char *s
= optarg
;
338 if (optarg
[0] == '+')
341 // The max is from src/liblzma/common/common.h.
342 uint32_t t
= str_to_uint64("threads", s
, 0, 16384);
344 // If leading '+' was used then use multi-threaded
345 // mode even if exactly one thread was specified.
346 if (t
== 1 && optarg
[0] == '+')
349 hardware_threads_set(t
);
355 // This doesn't return.
365 opt_mode
= MODE_DECOMPRESS
;
379 case OPT_INFO_MEMORY
:
380 // This doesn't return.
381 hardware_memlimit_show();
385 // This doesn't return.
390 // This doesn't return.
395 opt_mode
= MODE_LIST
;
400 opt_keep_original
= true;
405 message_verbosity_decrease();
413 opt_mode
= MODE_TEST
;
418 message_verbosity_increase();
425 // This is to make sure that floating point numbers
426 // always have a dot as decimal separator.
427 setlocale(LC_NUMERIC
, "C");
431 opt_mode
= MODE_COMPRESS
;
436 coder_add_filters_from_str(optarg
);
439 // --filters1...--filters9
449 coder_add_block_filters(optarg
,
450 (size_t)(c
- OPT_FILTERS
));
454 case OPT_FILTERS_HELP
:
455 // This doesn't return.
456 message_filters_help();
460 coder_add_filter(LZMA_FILTER_X86
,
461 options_bcj(optarg
));
465 coder_add_filter(LZMA_FILTER_POWERPC
,
466 options_bcj(optarg
));
470 coder_add_filter(LZMA_FILTER_IA64
,
471 options_bcj(optarg
));
475 coder_add_filter(LZMA_FILTER_ARM
,
476 options_bcj(optarg
));
480 coder_add_filter(LZMA_FILTER_ARMTHUMB
,
481 options_bcj(optarg
));
485 coder_add_filter(LZMA_FILTER_ARM64
,
486 options_bcj(optarg
));
490 coder_add_filter(LZMA_FILTER_SPARC
,
491 options_bcj(optarg
));
495 coder_add_filter(LZMA_FILTER_DELTA
,
496 options_delta(optarg
));
500 coder_add_filter(LZMA_FILTER_LZMA1
,
501 options_lzma(optarg
));
505 coder_add_filter(LZMA_FILTER_LZMA2
,
506 options_lzma(optarg
));
513 // Just in case, support both "lzma" and "alone" since
514 // the latter was used for forward compatibility in
515 // LZMA Utils 4.32.x.
516 static const struct {
518 enum format_type format
;
520 { "auto", FORMAT_AUTO
},
522 { "lzma", FORMAT_LZMA
},
523 { "alone", FORMAT_LZMA
},
524 #ifdef HAVE_LZIP_DECODER
525 { "lzip", FORMAT_LZIP
},
527 { "raw", FORMAT_RAW
},
531 while (strcmp(types
[i
].str
, optarg
) != 0)
532 if (++i
== ARRAY_SIZE(types
))
533 message_fatal(_("%s: Unknown file "
537 opt_format
= types
[i
].format
;
543 static const struct {
547 { "none", LZMA_CHECK_NONE
},
548 { "crc32", LZMA_CHECK_CRC32
},
549 { "crc64", LZMA_CHECK_CRC64
},
550 { "sha256", LZMA_CHECK_SHA256
},
554 while (strcmp(types
[i
].str
, optarg
) != 0) {
555 if (++i
== ARRAY_SIZE(types
))
556 message_fatal(_("%s: Unsupported "
558 "check type"), optarg
);
561 // Use a separate check in case we are using different
562 // liblzma than what was used to compile us.
563 if (!lzma_check_is_supported(types
[i
].check
))
564 message_fatal(_("%s: Unsupported integrity "
565 "check type"), optarg
);
567 coder_set_check(types
[i
].check
);
571 case OPT_IGNORE_CHECK
:
572 opt_ignore_check
= true;
576 opt_block_size
= str_to_uint64("block-size", optarg
,
580 case OPT_BLOCK_LIST
: {
581 parse_block_list(optarg
);
585 case OPT_SINGLE_STREAM
:
586 opt_single_stream
= true;
594 args
->files_delim
= '\n';
599 if (args
->files_name
!= NULL
)
600 message_fatal(_("Only one file can be "
601 "specified with `--files' "
604 if (optarg
== NULL
) {
605 args
->files_name
= stdin_filename
;
606 args
->files_file
= stdin
;
608 args
->files_name
= optarg
;
609 args
->files_file
= fopen(optarg
,
610 c
== OPT_FILES
? "r" : "rb");
611 if (args
->files_file
== NULL
)
612 // TRANSLATORS: This is a translatable
613 // string because French needs a space
614 // before the colon ("%s : %s").
615 message_fatal(_("%s: %s"), optarg
,
622 opt_auto_adjust
= false;
625 case OPT_FLUSH_TIMEOUT
:
626 opt_flush_timeout
= str_to_uint64("flush-timeout",
627 optarg
, 0, UINT64_MAX
);
632 tuklib_exit(E_ERROR
, E_ERROR
, false);
641 parse_environment(args_info
*args
, char *argv0
, const char *varname
)
643 char *env
= getenv(varname
);
647 // We modify the string, so make a copy of it.
650 // Calculate the number of arguments in env. argc stats at one
651 // to include space for the program name.
653 bool prev_was_space
= true;
654 for (size_t i
= 0; env
[i
] != '\0'; ++i
) {
655 // NOTE: Cast to unsigned char is needed so that correct
656 // value gets passed to isspace(), which expects
657 // unsigned char cast to int. Casting to int is done
658 // automatically due to integer promotion, but we need to
659 // force char to unsigned char manually. Otherwise 8-bit
660 // characters would get promoted to wrong value if
662 if (isspace((unsigned char)env
[i
])) {
663 prev_was_space
= true;
664 } else if (prev_was_space
) {
665 prev_was_space
= false;
667 // Keep argc small enough to fit into a signed int
668 // and to keep it usable for memory allocation.
669 if (++argc
== my_min(
670 INT_MAX
, SIZE_MAX
/ sizeof(char *)))
671 message_fatal(_("The environment variable "
672 "%s contains too many "
673 "arguments"), varname
);
677 // Allocate memory to hold pointers to the arguments. Add one to get
678 // space for the terminating NULL (if some systems happen to need it).
679 char **argv
= xmalloc(((size_t)(argc
) + 1) * sizeof(char *));
683 // Go through the string again. Split the arguments using '\0'
684 // characters and add pointers to the resulting strings to argv.
686 prev_was_space
= true;
687 for (size_t i
= 0; env
[i
] != '\0'; ++i
) {
688 if (isspace((unsigned char)env
[i
])) {
689 prev_was_space
= true;
691 } else if (prev_was_space
) {
692 prev_was_space
= false;
693 argv
[argc
++] = env
+ i
;
697 // Parse the argument list we got from the environment. All non-option
698 // arguments i.e. filenames are ignored.
699 parse_real(args
, argc
, argv
);
701 // Reset the state of the getopt_long() so that we can parse the
702 // command line options too. There are two incompatible ways to
713 // We don't need the argument list from environment anymore.
722 args_parse(args_info
*args
, int argc
, char **argv
)
724 // Initialize those parts of *args that we need later.
725 args
->files_name
= NULL
;
726 args
->files_file
= NULL
;
727 args
->files_delim
= '\0';
729 // Check how we were called.
731 // Remove the leading path name, if any.
732 const char *name
= strrchr(argv
[0], '/');
738 // NOTE: It's possible that name[0] is now '\0' if argv[0]
739 // is weird, but it doesn't matter here.
741 // Look for full command names instead of substrings like
742 // "un", "cat", and "lz" to reduce possibility of false
743 // positives when the programs have been renamed.
744 if (strstr(name
, "xzcat") != NULL
) {
745 opt_mode
= MODE_DECOMPRESS
;
747 } else if (strstr(name
, "unxz") != NULL
) {
748 opt_mode
= MODE_DECOMPRESS
;
749 } else if (strstr(name
, "lzcat") != NULL
) {
750 opt_format
= FORMAT_LZMA
;
751 opt_mode
= MODE_DECOMPRESS
;
753 } else if (strstr(name
, "unlzma") != NULL
) {
754 opt_format
= FORMAT_LZMA
;
755 opt_mode
= MODE_DECOMPRESS
;
756 } else if (strstr(name
, "lzma") != NULL
) {
757 opt_format
= FORMAT_LZMA
;
761 // First the flags from the environment
762 parse_environment(args
, argv
[0], "XZ_DEFAULTS");
763 parse_environment(args
, argv
[0], "XZ_OPT");
765 // Then from the command line
766 parse_real(args
, argc
, argv
);
768 // If encoder or decoder support was omitted at build time,
769 // show an error now so that the rest of the code can rely on
770 // that whatever is in opt_mode is also supported.
771 #ifndef HAVE_ENCODERS
772 if (opt_mode
== MODE_COMPRESS
)
773 message_fatal(_("Compression support was disabled "
776 #ifndef HAVE_DECODERS
777 // Even MODE_LIST cannot work without decoder support so MODE_COMPRESS
778 // is the only valid choice.
779 if (opt_mode
!= MODE_COMPRESS
)
780 message_fatal(_("Decompression support was disabled "
784 #ifdef HAVE_LZIP_DECODER
785 if (opt_mode
== MODE_COMPRESS
&& opt_format
== FORMAT_LZIP
)
786 message_fatal(_("Compression of lzip files (.lz) "
787 "is not supported"));
790 // Never remove the source file when the destination is not on disk.
791 // In test mode the data is written nowhere, but setting opt_stdout
792 // will make the rest of the code behave well.
793 if (opt_stdout
|| opt_mode
== MODE_TEST
) {
794 opt_keep_original
= true;
798 // When compressing, if no --format flag was used, or it
799 // was --format=auto, we compress to the .xz format.
800 if (opt_mode
== MODE_COMPRESS
&& opt_format
== FORMAT_AUTO
)
801 opt_format
= FORMAT_XZ
;
803 // Set opt_block_list to NULL if we are not compressing to the .xz
804 // format. This option cannot be used outside of this case, and
805 // simplifies the implementation later.
806 if ((opt_mode
!= MODE_COMPRESS
|| opt_format
!= FORMAT_XZ
)
807 && opt_block_list
!= NULL
) {
808 message(V_WARNING
, _("--block-list is ignored unless "
809 "compressing to the .xz format"));
810 free(opt_block_list
);
811 opt_block_list
= NULL
;
814 // Compression settings need to be validated (options themselves and
815 // their memory usage) when compressing to any file format. It has to
816 // be done also when uncompressing raw data, since for raw decoding
817 // the options given on the command line are used to know what kind
818 // of raw data we are supposed to decode.
819 if (opt_mode
== MODE_COMPRESS
|| (opt_format
== FORMAT_RAW
820 && opt_mode
!= MODE_LIST
))
821 coder_set_compression_settings();
823 // If raw format is used and a custom suffix is not provided,
824 // then only stdout mode can be used when compressing or decompressing.
825 if (opt_format
== FORMAT_RAW
&& !suffix_is_set() && !opt_stdout
826 && (opt_mode
== MODE_COMPRESS
827 || opt_mode
== MODE_DECOMPRESS
))
828 message_fatal(_("With --format=raw, --suffix=.SUF is "
829 "required unless writing to stdout"));
831 // If no filenames are given, use stdin.
832 if (argv
[optind
] == NULL
&& args
->files_name
== NULL
) {
833 // We don't modify or free() the "-" constant. The caller
834 // modifies this so don't make the struct itself const.
835 static char *names_stdin
[2] = { (char *)"-", NULL
};
836 args
->arg_names
= names_stdin
;
839 // We got at least one filename from the command line, or
840 // --files or --files0 was specified.
841 args
->arg_names
= argv
+ optind
;
842 args
->arg_count
= (unsigned int)(argc
- optind
);
853 free(opt_block_list
);