Merge mozilla-central to autoland. CLOSED TREE
[gecko.git] / toolkit / xre / CmdLineAndEnvUtils.h
blob6d26c9b45c4338aba859efe80ffa64c0b0b833f9
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at https://mozilla.org/MPL/2.0/. */
7 #ifndef mozilla_CmdLineAndEnvUtils_h
8 #define mozilla_CmdLineAndEnvUtils_h
10 // NB: This code may be used outside of xul and thus must not depend on XPCOM
12 #if defined(MOZILLA_INTERNAL_API)
13 # include "prenv.h"
14 # include "prprf.h"
15 # include <string.h>
16 #endif
18 #if defined(XP_WIN)
19 # include "mozilla/UniquePtr.h"
20 # include "mozilla/Vector.h"
21 # include "mozilla/WinHeaderOnlyUtils.h"
23 # include <wchar.h>
24 # include <windows.h>
25 #endif // defined(XP_WIN)
27 #include "mozilla/Maybe.h"
28 #include "mozilla/MemoryChecking.h"
29 #include "mozilla/TypedEnumBits.h"
31 #include <ctype.h>
32 #include <stdint.h>
33 #include <stdlib.h>
35 #ifndef NS_NO_XPCOM
36 # include "nsIFile.h"
37 # include "mozilla/AlreadyAddRefed.h"
38 #endif
40 // Undo X11/X.h's definition of None
41 #undef None
43 namespace mozilla {
45 enum ArgResult {
46 ARG_NONE = 0,
47 ARG_FOUND = 1,
48 ARG_BAD = 2 // you wanted a param, but there isn't one
51 template <typename CharT>
52 inline void RemoveArg(int& argc, CharT** argv) {
53 do {
54 *argv = *(argv + 1);
55 ++argv;
56 } while (*argv);
58 --argc;
61 namespace internal {
63 #if 'a' == '\x61'
64 // Valid option characters must have the same representation in every locale
65 // (which is true for most of ASCII, barring \x5C and \x7E).
66 static inline constexpr bool isValidOptionCharacter(char c) {
67 // We specifically avoid the use of `islower` here; it's locale-dependent, and
68 // may return true for non-ASCII values in some locales.
69 return ('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || c == '-';
72 // Convert uppercase to lowercase, locale-insensitively.
73 static inline constexpr char toLowercase(char c) {
74 // We specifically avoid the use of `tolower` here; it's locale-dependent, and
75 // may output ASCII values for non-ASCII input (or vice versa) in some
76 // locales.
77 return ('A' <= c && c <= 'Z') ? char(c | ' ') : c;
80 // Convert a CharT to a char, ensuring that no CharT is mapped to any valid
81 // option character except the unique CharT naturally corresponding thereto.
82 template <typename CharT>
83 static inline constexpr char toNarrow(CharT c) {
84 // confirmed to compile down to nothing when `CharT` is `char`
85 return (c & static_cast<CharT>(0xff)) == c ? c : 0xff;
87 #else
88 // The target system's character set isn't even ASCII-compatible. If you're
89 // porting Gecko to such a platform, you'll have to implement these yourself.
90 # error Character conversion functions not implemented for this platform.
91 #endif
93 // Case-insensitively compare a string taken from the command-line (`mixedstr`)
94 // to the text of some known command-line option (`lowerstr`).
95 template <typename CharT>
96 static inline bool strimatch(const char* lowerstr, const CharT* mixedstr) {
97 while (*lowerstr) {
98 if (!*mixedstr) return false; // mixedstr is shorter
100 // Non-ASCII strings may compare incorrectly depending on the user's locale.
101 // Some ASCII-safe characters are also dispermitted for semantic reasons
102 // and simplicity.
103 if (!isValidOptionCharacter(*lowerstr)) return false;
105 if (toLowercase(toNarrow(*mixedstr)) != *lowerstr) {
106 return false; // no match
109 ++lowerstr;
110 ++mixedstr;
113 if (*mixedstr) return false; // lowerstr is shorter
115 return true;
118 // Given a command-line argument, return Nothing if it isn't structurally a
119 // command-line option, and Some(<the option text>) if it is.
120 template <typename CharT>
121 mozilla::Maybe<const CharT*> ReadAsOption(const CharT* str) {
122 if (!str) {
123 return Nothing();
125 if (*str == '-') {
126 str++;
127 if (*str == '-') {
128 str++;
130 return Some(str);
132 #ifdef XP_WIN
133 if (*str == '/') {
134 return Some(str + 1);
136 #endif
137 return Nothing();
140 } // namespace internal
142 using internal::strimatch;
144 const wchar_t kCommandLineDelimiter[] = L" \t";
146 enum class CheckArgFlag : uint32_t {
147 None = 0,
148 // (1 << 0) Used to be CheckOSInt
149 RemoveArg = (1 << 1) // Remove the argument from the argv array.
152 MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(CheckArgFlag)
155 * Check for a commandline flag. If the flag takes a parameter, the
156 * parameter is returned in aParam. Flags may be in the form -arg or
157 * --arg (or /arg on win32).
159 * @param aArgc The argc value.
160 * @param aArgv The original argv.
161 * @param aArg the parameter to check. Must be lowercase.
162 * @param aParam if non-null, the -arg <data> will be stored in this pointer.
163 * This is *not* allocated, but rather a pointer to the argv data.
164 * @param aFlags Flags @see CheckArgFlag
166 template <typename CharT>
167 inline ArgResult CheckArg(int& aArgc, CharT** aArgv, const char* aArg,
168 const CharT** aParam = nullptr,
169 CheckArgFlag aFlags = CheckArgFlag::RemoveArg) {
170 using internal::ReadAsOption;
171 MOZ_ASSERT(aArgv && aArg);
173 CharT** curarg = aArgv + 1; // skip argv[0]
174 ArgResult ar = ARG_NONE;
176 while (*curarg) {
177 if (const auto arg = ReadAsOption(*curarg)) {
178 if (strimatch(aArg, arg.value())) {
179 if (aFlags & CheckArgFlag::RemoveArg) {
180 RemoveArg(aArgc, curarg);
181 } else {
182 ++curarg;
185 if (!aParam) {
186 ar = ARG_FOUND;
187 break;
190 if (*curarg) {
191 if (ReadAsOption(*curarg)) {
192 return ARG_BAD;
195 *aParam = *curarg;
197 if (aFlags & CheckArgFlag::RemoveArg) {
198 RemoveArg(aArgc, curarg);
201 ar = ARG_FOUND;
202 break;
205 return ARG_BAD;
209 ++curarg;
212 return ar;
215 template <typename CharT>
216 inline ArgResult CheckArg(int& aArgc, CharT** aArgv, const char* aArg,
217 std::nullptr_t,
218 CheckArgFlag aFlags = CheckArgFlag::RemoveArg) {
219 return CheckArg<CharT>(aArgc, aArgv, aArg,
220 static_cast<const CharT**>(nullptr), aFlags);
223 namespace internal {
224 // template <typename T>
225 // constexpr bool IsStringRange =
226 // std::convertible_to<std::ranges::range_value_t<T>, const char *>;
228 template <typename CharT, typename ListT>
229 // requires IsStringRange<ListT>
230 static bool MatchesAnyOf(CharT const* unknown, ListT const& known) {
231 for (const char* k : known) {
232 if (strimatch(k, unknown)) {
233 return true;
236 return false;
239 template <typename CharT, typename ReqContainerT, typename OptContainerT>
240 // requires IsStringRange<ReqContainerT> && IsStringRange<OptContainerT>
241 inline bool EnsureCommandlineSafeImpl(int aArgc, CharT** aArgv,
242 ReqContainerT const& requiredParams,
243 OptContainerT const& optionalParams) {
244 // We expect either no -osint, or the full commandline to be:
246 // app -osint [<optional-param>...] <required-param> <required-argument>
248 // Otherwise, we abort to avoid abuse of other command-line handlers from apps
249 // that do a poor job escaping links they give to the OS.
251 // Note that the above implies that optional parameters do not themselves take
252 // arguments. This is a security feature, to prevent the possible injection of
253 // additional parameters via such arguments. (See, e.g., bug 384384.)
255 static constexpr const char* osintLit = "osint";
257 // If "-osint" (or the equivalent) is not present, then this is trivially
258 // satisfied.
259 if (CheckArg(aArgc, aArgv, osintLit, nullptr, CheckArgFlag::None) !=
260 ARG_FOUND) {
261 return true;
264 // There should be at least 4 items present:
265 // <app name> -osint <required param> <arg>.
266 if (aArgc < 4) {
267 return false;
270 // The first parameter must be osint.
271 const auto arg1 = ReadAsOption(aArgv[1]);
272 if (!arg1) return false;
273 if (!strimatch(osintLit, arg1.value())) {
274 return false;
276 // Following this is any number of optional parameters, terminated by a
277 // required parameter.
278 int pos = 2;
279 while (true) {
280 if (pos >= aArgc) return false;
282 auto const arg = ReadAsOption(aArgv[pos]);
283 if (!arg) return false;
285 if (MatchesAnyOf(arg.value(), optionalParams)) {
286 ++pos;
287 continue;
290 if (MatchesAnyOf(arg.value(), requiredParams)) {
291 ++pos;
292 break;
295 return false;
298 // There must be one argument remaining...
299 if (pos + 1 != aArgc) return false;
300 // ... which must not be another option.
301 if (ReadAsOption(aArgv[pos])) {
302 return false;
305 // Nothing ill-formed was passed.
306 return true;
309 // C (and so C++) disallows empty arrays. Rather than require callers to jump
310 // through hoops to specify an empty optional-argument list, allow either its
311 // omission or its specification as `nullptr`, and do the hoop-jumping here.
313 // No such facility is provided for requiredParams, which must have at least one
314 // entry.
315 template <typename CharT, typename ReqContainerT>
316 inline bool EnsureCommandlineSafeImpl(int aArgc, CharT** aArgv,
317 ReqContainerT const& requiredParams,
318 std::nullptr_t _ = nullptr) {
319 struct {
320 inline const char** begin() const { return nullptr; }
321 inline const char** end() const { return nullptr; }
322 } emptyContainer;
323 return EnsureCommandlineSafeImpl(aArgc, aArgv, requiredParams,
324 emptyContainer);
326 } // namespace internal
328 template <typename CharT, typename ReqContainerT,
329 typename OptContainerT = std::nullptr_t>
330 inline void EnsureCommandlineSafe(
331 int aArgc, CharT** aArgv, ReqContainerT const& requiredParams,
332 OptContainerT const& optionalParams = nullptr) {
333 if (!internal::EnsureCommandlineSafeImpl(aArgc, aArgv, requiredParams,
334 optionalParams)) {
335 exit(127);
339 #if defined(XP_WIN)
340 namespace internal {
343 * Attempt to copy the string `s` (considered as a command-line argument) into
344 * the buffer `d` with all necessary escaping and quoting. Returns the number of
345 * characters written.
347 * If `d` is NULL, doesn't actually write anything to `d`, and merely returns
348 * the number of characters that _would_ have been written.
350 * (This moderately-awkward conflation ensures that the pre-allocation counting
351 * step and post-allocation copying step use the same algorithm.)
353 inline size_t CopyArgImpl_(wchar_t* d, const wchar_t* s) {
354 size_t len = 0;
356 bool const actuallyCopy = d != nullptr;
357 auto const appendChar = [&](wchar_t c) {
358 if (actuallyCopy) {
359 *d++ = c;
361 len++;
364 bool hasDoubleQuote = wcschr(s, L'"') != nullptr;
365 // Only add doublequotes if...
366 bool addDoubleQuotes =
367 // ... the string is empty, or...
368 *s == '\0' ||
369 // ... the string contains a space or a tab.
370 wcspbrk(s, kCommandLineDelimiter) != nullptr;
372 if (addDoubleQuotes) {
373 appendChar('"');
376 if (hasDoubleQuote) {
377 size_t backslashes = 0;
378 while (*s) {
379 if (*s == '\\') {
380 ++backslashes;
381 } else {
382 if (*s == '"') {
383 // Escape the doublequote and all backslashes preceding the
384 // doublequote
385 for (size_t i = 0; i <= backslashes; ++i) {
386 appendChar('\\');
390 backslashes = 0;
393 appendChar(*s);
394 ++s;
396 } else {
397 // optimization: just blit
398 auto const src_len = wcslen(s);
399 if (actuallyCopy) {
400 ::wcscpy(d, s);
401 d += src_len;
403 len += src_len;
406 if (addDoubleQuotes) {
407 appendChar('"');
410 return len;
414 * Compute the space required for the serialized form of this argument. Includes
415 * any additional space needed for quotes and backslash-escapes.
417 inline size_t ArgStrLen(const wchar_t* s) { return CopyArgImpl_(nullptr, s); }
420 * Copy string "s" to string "d", quoting the argument as appropriate and
421 * escaping doublequotes along with any backslashes that immediately precede
422 * doublequotes.
423 * The CRT parses this to retrieve the original argc/argv that we meant,
424 * see STDARGV.C in the MSVC CRT sources.
426 * @return the end of the string
428 inline wchar_t* ArgToString(wchar_t* d, const wchar_t* s) {
429 return d + CopyArgImpl_(d, s);
432 } // namespace internal
435 * Creates a command line from a list of arguments.
437 * @param argc Number of elements in |argv|
438 * @param argv Array of arguments
439 * @param aArgcExtra Number of elements in |aArgvExtra|
440 * @param aArgvExtra Optional array of arguments to be appended to the resulting
441 * command line after those provided by |argv|.
443 inline UniquePtr<wchar_t[]> MakeCommandLine(
444 int argc, const wchar_t* const* argv, int aArgcExtra = 0,
445 const wchar_t* const* aArgvExtra = nullptr) {
446 int i;
447 size_t len = 0;
449 // The + 1 for each argument reserves space for either a ' ' or the null
450 // terminator, depending on the position of the argument.
451 for (i = 0; i < argc; ++i) {
452 len += internal::ArgStrLen(argv[i]) + 1;
455 for (i = 0; i < aArgcExtra; ++i) {
456 len += internal::ArgStrLen(aArgvExtra[i]) + 1;
459 // Protect against callers that pass 0 arguments
460 if (len == 0) {
461 len = 1;
464 auto s = MakeUnique<wchar_t[]>(len);
466 int totalArgc = argc + aArgcExtra;
468 wchar_t* c = s.get();
469 for (i = 0; i < argc; ++i) {
470 c = internal::ArgToString(c, argv[i]);
471 if (i + 1 != totalArgc) {
472 *c = ' ';
473 ++c;
477 for (i = 0; i < aArgcExtra; ++i) {
478 c = internal::ArgToString(c, aArgvExtra[i]);
479 if (i + 1 != aArgcExtra) {
480 *c = ' ';
481 ++c;
485 *c = '\0';
487 return s;
490 inline bool SetArgv0ToFullBinaryPath(wchar_t* aArgv[]) {
491 if (!aArgv) {
492 return false;
495 UniquePtr<wchar_t[]> newArgv_0(GetFullBinaryPath());
496 if (!newArgv_0) {
497 return false;
500 // We intentionally leak newArgv_0 into argv[0]
501 aArgv[0] = newArgv_0.release();
502 MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(aArgv[0]);
503 return true;
506 # if defined(MOZILLA_INTERNAL_API)
507 // This class converts a command line string into an array of the arguments.
508 // It's basically the opposite of MakeCommandLine. However, the behavior is
509 // different from ::CommandLineToArgvW in several ways, such as escaping a
510 // backslash or quoting an argument containing whitespaces. This satisfies
511 // the examples at:
512 // https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines
513 // https://docs.microsoft.com/en-us/previous-versions/17w5ykft(v=vs.85)
514 template <typename T>
515 class CommandLineParserWin final {
516 int mArgc;
517 T** mArgv;
519 void Release() {
520 if (mArgv) {
521 while (mArgc) {
522 delete[] mArgv[--mArgc];
524 delete[] mArgv;
525 mArgv = nullptr;
529 public:
530 CommandLineParserWin() : mArgc(0), mArgv(nullptr) {}
531 ~CommandLineParserWin() { Release(); }
533 CommandLineParserWin(const CommandLineParserWin&) = delete;
534 CommandLineParserWin(CommandLineParserWin&&) = delete;
535 CommandLineParserWin& operator=(const CommandLineParserWin&) = delete;
536 CommandLineParserWin& operator=(CommandLineParserWin&&) = delete;
538 int Argc() const { return mArgc; }
539 const T* const* Argv() const { return mArgv; }
541 // Returns the number of characters handled
542 int HandleCommandLine(const nsTSubstring<T>& aCmdLineString) {
543 Release();
545 if (aCmdLineString.IsEmpty()) {
546 return 0;
549 int justCounting = 1;
550 // Flags, etc.
551 int init = 1;
552 int between, quoted, bSlashCount;
553 const T* p;
554 const T* const pEnd = aCmdLineString.EndReading();
555 nsTAutoString<T> arg;
557 // We loop if we've not finished the second pass through.
558 while (1) {
559 // Initialize if required.
560 if (init) {
561 p = aCmdLineString.BeginReading();
562 between = 1;
563 mArgc = quoted = bSlashCount = 0;
565 init = 0;
568 const T charCurr = (p < pEnd) ? *p : 0;
569 const T charNext = (p + 1 < pEnd) ? *(p + 1) : 0;
571 if (between) {
572 // We are traversing whitespace between args.
573 // Check for start of next arg.
574 if (charCurr != 0 && !wcschr(kCommandLineDelimiter, charCurr)) {
575 // Start of another arg.
576 between = 0;
577 arg.Truncate();
578 switch (charCurr) {
579 case '\\':
580 // Count the backslash.
581 bSlashCount = 1;
582 break;
583 case '"':
584 // Remember we're inside quotes.
585 quoted = 1;
586 break;
587 default:
588 // Add character to arg.
589 arg += charCurr;
590 break;
592 } else {
593 // Another space between args, ignore it.
595 } else {
596 // We are processing the contents of an argument.
597 // Check for whitespace or end.
598 if (charCurr == 0 ||
599 (!quoted && wcschr(kCommandLineDelimiter, charCurr))) {
600 // Process pending backslashes (interpret them
601 // literally since they're not followed by a ").
602 while (bSlashCount) {
603 arg += '\\';
604 bSlashCount--;
606 // End current arg.
607 if (!justCounting) {
608 mArgv[mArgc] = new T[arg.Length() + 1];
609 memcpy(mArgv[mArgc], arg.get(), (arg.Length() + 1) * sizeof(T));
611 mArgc++;
612 // We're now between args.
613 between = 1;
614 } else {
615 // Still inside argument, process the character.
616 switch (charCurr) {
617 case '"':
618 // First, digest preceding backslashes (if any).
619 while (bSlashCount > 1) {
620 // Put one backsplash in arg for each pair.
621 arg += '\\';
622 bSlashCount -= 2;
624 if (bSlashCount) {
625 // Quote is literal.
626 arg += '"';
627 bSlashCount = 0;
628 } else {
629 // Quote starts or ends a quoted section.
630 if (quoted) {
631 // Check for special case of consecutive double
632 // quotes inside a quoted section.
633 if (charNext == '"') {
634 // This implies a literal double-quote. Fake that
635 // out by causing next double-quote to look as
636 // if it was preceded by a backslash.
637 bSlashCount = 1;
638 } else {
639 quoted = 0;
641 } else {
642 quoted = 1;
645 break;
646 case '\\':
647 // Add to count.
648 bSlashCount++;
649 break;
650 default:
651 // Accept any preceding backslashes literally.
652 while (bSlashCount) {
653 arg += '\\';
654 bSlashCount--;
656 // Just add next char to the current arg.
657 arg += charCurr;
658 break;
663 // Check for end of input.
664 if (charCurr) {
665 // Go to next character.
666 p++;
667 } else {
668 // If on first pass, go on to second.
669 if (justCounting) {
670 // Allocate argv array.
671 mArgv = new T*[mArgc];
673 // Start second pass
674 justCounting = 0;
675 init = 1;
676 } else {
677 // Quit.
678 break;
683 return p - aCmdLineString.BeginReading();
686 # endif // defined(MOZILLA_INTERNAL_API)
688 #endif // defined(XP_WIN)
690 // SaveToEnv and EnvHasValue are only available on Windows or when
691 // MOZILLA_INTERNAL_API is defined
692 #if defined(MOZILLA_INTERNAL_API) || defined(XP_WIN)
694 // Save literal putenv string to environment variable.
695 MOZ_NEVER_INLINE inline void SaveToEnv(const char* aEnvString) {
696 # if defined(MOZILLA_INTERNAL_API)
697 char* expr = strdup(aEnvString);
698 if (expr) {
699 PR_SetEnv(expr);
702 // We intentionally leak |expr| here since it is required by PR_SetEnv.
703 MOZ_LSAN_INTENTIONALLY_LEAK_OBJECT(expr);
704 # elif defined(XP_WIN)
705 // This is the same as the NSPR implementation
706 // (Note that we don't need to do a strdup for this case; the CRT makes a
707 // copy)
708 _putenv(aEnvString);
709 # endif
712 inline bool EnvHasValue(const char* aVarName) {
713 # if defined(MOZILLA_INTERNAL_API)
714 const char* val = PR_GetEnv(aVarName);
715 return val && *val;
716 # elif defined(XP_WIN)
717 // This is the same as the NSPR implementation
718 const char* val = getenv(aVarName);
719 return val && *val;
720 # endif
723 #endif // end windows/internal_api-only definitions
725 #ifndef NS_NO_XPCOM
726 already_AddRefed<nsIFile> GetFileFromEnv(const char* name);
727 #endif
729 } // namespace mozilla
731 #endif // mozilla_CmdLineAndEnvUtils_h