Bug 1845134 - Part 4: Update existing ui-icons to use the latest source from acorn...
[gecko.git] / netwerk / base / nsURLHelper.cpp
blob5d0a9e994677f7f9957c328ad7826efe144efc65
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=4 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsURLHelper.h"
9 #include "mozilla/Encoding.h"
10 #include "mozilla/RangedPtr.h"
11 #include "mozilla/TextUtils.h"
13 #include <algorithm>
14 #include <iterator>
16 #include "nsASCIIMask.h"
17 #include "nsIFile.h"
18 #include "nsIURLParser.h"
19 #include "nsCOMPtr.h"
20 #include "nsCRT.h"
21 #include "nsNetCID.h"
22 #include "mozilla/Preferences.h"
23 #include "prnetdb.h"
24 #include "mozilla/StaticPrefs_network.h"
25 #include "mozilla/Tokenizer.h"
26 #include "nsEscape.h"
27 #include "nsDOMString.h"
28 #include "mozilla/net/rust_helper.h"
29 #include "mozilla/net/DNS.h"
31 using namespace mozilla;
33 //----------------------------------------------------------------------------
34 // Init/Shutdown
35 //----------------------------------------------------------------------------
37 static bool gInitialized = false;
38 static StaticRefPtr<nsIURLParser> gNoAuthURLParser;
39 static StaticRefPtr<nsIURLParser> gAuthURLParser;
40 static StaticRefPtr<nsIURLParser> gStdURLParser;
42 static void InitGlobals() {
43 nsCOMPtr<nsIURLParser> parser;
45 parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
46 NS_ASSERTION(parser, "failed getting 'noauth' url parser");
47 if (parser) {
48 gNoAuthURLParser = parser;
51 parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
52 NS_ASSERTION(parser, "failed getting 'auth' url parser");
53 if (parser) {
54 gAuthURLParser = parser;
57 parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
58 NS_ASSERTION(parser, "failed getting 'std' url parser");
59 if (parser) {
60 gStdURLParser = parser;
63 gInitialized = true;
66 void net_ShutdownURLHelper() {
67 if (gInitialized) {
68 gInitialized = false;
70 gNoAuthURLParser = nullptr;
71 gAuthURLParser = nullptr;
72 gStdURLParser = nullptr;
75 //----------------------------------------------------------------------------
76 // nsIURLParser getters
77 //----------------------------------------------------------------------------
79 nsIURLParser* net_GetAuthURLParser() {
80 if (!gInitialized) InitGlobals();
81 return gAuthURLParser;
84 nsIURLParser* net_GetNoAuthURLParser() {
85 if (!gInitialized) InitGlobals();
86 return gNoAuthURLParser;
89 nsIURLParser* net_GetStdURLParser() {
90 if (!gInitialized) InitGlobals();
91 return gStdURLParser;
94 //---------------------------------------------------------------------------
95 // GetFileFromURLSpec implementations
96 //---------------------------------------------------------------------------
97 nsresult net_GetURLSpecFromDir(nsIFile* aFile, nsACString& result) {
98 nsAutoCString escPath;
99 nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
100 if (NS_FAILED(rv)) return rv;
102 if (escPath.Last() != '/') {
103 escPath += '/';
106 result = escPath;
107 return NS_OK;
110 nsresult net_GetURLSpecFromFile(nsIFile* aFile, nsACString& result) {
111 nsAutoCString escPath;
112 nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
113 if (NS_FAILED(rv)) return rv;
115 // if this file references a directory, then we need to ensure that the
116 // URL ends with a slash. this is important since it affects the rules
117 // for relative URL resolution when this URL is used as a base URL.
118 // if the file does not exist, then we make no assumption about its type,
119 // and simply leave the URL unmodified.
120 if (escPath.Last() != '/') {
121 bool dir;
122 rv = aFile->IsDirectory(&dir);
123 if (NS_SUCCEEDED(rv) && dir) escPath += '/';
126 result = escPath;
127 return NS_OK;
130 //----------------------------------------------------------------------------
131 // file:// URL parsing
132 //----------------------------------------------------------------------------
134 nsresult net_ParseFileURL(const nsACString& inURL, nsACString& outDirectory,
135 nsACString& outFileBaseName,
136 nsACString& outFileExtension) {
137 nsresult rv;
139 if (inURL.Length() >
140 (uint32_t)StaticPrefs::network_standard_url_max_length()) {
141 return NS_ERROR_MALFORMED_URI;
144 outDirectory.Truncate();
145 outFileBaseName.Truncate();
146 outFileExtension.Truncate();
148 const nsPromiseFlatCString& flatURL = PromiseFlatCString(inURL);
149 const char* url = flatURL.get();
151 nsAutoCString scheme;
152 rv = net_ExtractURLScheme(flatURL, scheme);
153 if (NS_FAILED(rv)) return rv;
155 if (!scheme.EqualsLiteral("file")) {
156 NS_ERROR("must be a file:// url");
157 return NS_ERROR_UNEXPECTED;
160 nsIURLParser* parser = net_GetNoAuthURLParser();
161 NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
163 uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
164 int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
166 // invoke the parser to extract the URL path
167 rv = parser->ParseURL(url, flatURL.Length(), nullptr,
168 nullptr, // don't care about scheme
169 nullptr, nullptr, // don't care about authority
170 &pathPos, &pathLen);
171 if (NS_FAILED(rv)) return rv;
173 // invoke the parser to extract filepath from the path
174 rv = parser->ParsePath(url + pathPos, pathLen, &filepathPos, &filepathLen,
175 nullptr, nullptr, // don't care about query
176 nullptr, nullptr); // don't care about ref
177 if (NS_FAILED(rv)) return rv;
179 filepathPos += pathPos;
181 // invoke the parser to extract the directory and filename from filepath
182 rv = parser->ParseFilePath(url + filepathPos, filepathLen, &directoryPos,
183 &directoryLen, &basenamePos, &basenameLen,
184 &extensionPos, &extensionLen);
185 if (NS_FAILED(rv)) return rv;
187 if (directoryLen > 0) {
188 outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
190 if (basenameLen > 0) {
191 outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
193 if (extensionLen > 0) {
194 outFileExtension =
195 Substring(inURL, filepathPos + extensionPos, extensionLen);
197 // since we are using a no-auth url parser, there will never be a host
198 // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
200 return NS_OK;
203 //----------------------------------------------------------------------------
204 // path manipulation functions
205 //----------------------------------------------------------------------------
207 // Replace all /./ with a / while resolving URLs
208 // But only till #?
209 void net_CoalesceDirs(netCoalesceFlags flags, char* path) {
210 /* Stolen from the old netlib's mkparse.c.
212 * modifies a url of the form /foo/../foo1 -> /foo1
213 * and /foo/./foo1 -> /foo/foo1
214 * and /foo/foo1/.. -> /foo/
216 char* fwdPtr = path;
217 char* urlPtr = path;
218 char* lastslash = path;
219 uint32_t traversal = 0;
220 uint32_t special_ftp_len = 0;
222 /* Remember if this url is a special ftp one: */
223 if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT) {
224 /* some schemes (for example ftp) have the speciality that
225 the path can begin // or /%2F to mark the root of the
226 servers filesystem, a simple / only marks the root relative
227 to the user loging in. We remember the length of the marker */
228 if (nsCRT::strncasecmp(path, "/%2F", 4) == 0) {
229 special_ftp_len = 4;
230 } else if (strncmp(path, "//", 2) == 0) {
231 special_ftp_len = 2;
235 /* find the last slash before # or ? */
236 for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#'); ++fwdPtr) {
239 /* found nothing, but go back one only */
240 /* if there is something to go back to */
241 if (fwdPtr != path && *fwdPtr == '\0') {
242 --fwdPtr;
245 /* search the slash */
246 for (; (fwdPtr != path) && (*fwdPtr != '/'); --fwdPtr) {
248 lastslash = fwdPtr;
249 fwdPtr = path;
251 /* replace all %2E or %2e with . in the path */
252 /* but stop at lastchar if non null */
253 for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#') &&
254 (*lastslash == '\0' || fwdPtr != lastslash);
255 ++fwdPtr) {
256 if (*fwdPtr == '%' && *(fwdPtr + 1) == '2' &&
257 (*(fwdPtr + 2) == 'E' || *(fwdPtr + 2) == 'e')) {
258 *urlPtr++ = '.';
259 ++fwdPtr;
260 ++fwdPtr;
261 } else {
262 *urlPtr++ = *fwdPtr;
265 // Copy remaining stuff past the #?;
266 for (; *fwdPtr != '\0'; ++fwdPtr) {
267 *urlPtr++ = *fwdPtr;
269 *urlPtr = '\0'; // terminate the url
271 // start again, this time for real
272 fwdPtr = path;
273 urlPtr = path;
275 for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#'); ++fwdPtr) {
276 if (*fwdPtr == '/' && *(fwdPtr + 1) == '.' && *(fwdPtr + 2) == '/') {
277 // remove . followed by slash
278 ++fwdPtr;
279 } else if (*fwdPtr == '/' && *(fwdPtr + 1) == '.' && *(fwdPtr + 2) == '.' &&
280 (*(fwdPtr + 3) == '/' ||
281 *(fwdPtr + 3) == '\0' || // This will take care of
282 *(fwdPtr + 3) == '?' || // something like foo/bar/..#sometag
283 *(fwdPtr + 3) == '#')) {
284 // remove foo/..
285 // reverse the urlPtr to the previous slash if possible
286 // if url does not allow relative root then drop .. above root
287 // otherwise retain them in the path
288 if (traversal > 0 || !(flags & NET_COALESCE_ALLOW_RELATIVE_ROOT)) {
289 if (urlPtr != path) urlPtr--; // we must be going back at least by one
290 for (; *urlPtr != '/' && urlPtr != path; urlPtr--) {
291 ; // null body
293 --traversal; // count back
294 // forward the fwdPtr past the ../
295 fwdPtr += 2;
296 // if we have reached the beginning of the path
297 // while searching for the previous / and we remember
298 // that it is an url that begins with /%2F then
299 // advance urlPtr again by 3 chars because /%2F already
300 // marks the root of the path
301 if (urlPtr == path && special_ftp_len > 3) {
302 ++urlPtr;
303 ++urlPtr;
304 ++urlPtr;
306 // special case if we have reached the end
307 // to preserve the last /
308 if (*fwdPtr == '.' && *(fwdPtr + 1) == '\0') ++urlPtr;
309 } else {
310 // there are to much /.. in this path, just copy them instead.
311 // forward the urlPtr past the /.. and copying it
313 // However if we remember it is an url that starts with
314 // /%2F and urlPtr just points at the "F" of "/%2F" then do
315 // not overwrite it with the /, just copy .. and move forward
316 // urlPtr.
317 if (special_ftp_len > 3 && urlPtr == path + special_ftp_len - 1) {
318 ++urlPtr;
319 } else {
320 *urlPtr++ = *fwdPtr;
322 ++fwdPtr;
323 *urlPtr++ = *fwdPtr;
324 ++fwdPtr;
325 *urlPtr++ = *fwdPtr;
327 } else {
328 // count the hierachie, but only if we do not have reached
329 // the root of some special urls with a special root marker
330 if (*fwdPtr == '/' && *(fwdPtr + 1) != '.' &&
331 (special_ftp_len != 2 || *(fwdPtr + 1) != '/')) {
332 traversal++;
334 // copy the url incrementaly
335 *urlPtr++ = *fwdPtr;
340 * Now lets remove trailing . case
341 * /foo/foo1/. -> /foo/foo1/
344 if ((urlPtr > (path + 1)) && (*(urlPtr - 1) == '.') &&
345 (*(urlPtr - 2) == '/')) {
346 urlPtr--;
349 // Copy remaining stuff past the #?;
350 for (; *fwdPtr != '\0'; ++fwdPtr) {
351 *urlPtr++ = *fwdPtr;
353 *urlPtr = '\0'; // terminate the url
356 //----------------------------------------------------------------------------
357 // scheme fu
358 //----------------------------------------------------------------------------
360 static bool net_IsValidSchemeChar(const char aChar) {
361 return mozilla::net::rust_net_is_valid_scheme_char(aChar);
364 /* Extract URI-Scheme if possible */
365 nsresult net_ExtractURLScheme(const nsACString& inURI, nsACString& scheme) {
366 nsACString::const_iterator start, end;
367 inURI.BeginReading(start);
368 inURI.EndReading(end);
370 // Strip C0 and space from begining
371 while (start != end) {
372 if ((uint8_t)*start > 0x20) {
373 break;
375 start++;
378 Tokenizer p(Substring(start, end), "\r\n\t");
379 p.Record();
380 if (!p.CheckChar(IsAsciiAlpha)) {
381 // First char must be alpha
382 return NS_ERROR_MALFORMED_URI;
385 while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
386 // Skip valid scheme characters or \r\n\t
389 if (!p.CheckChar(':')) {
390 return NS_ERROR_MALFORMED_URI;
393 p.Claim(scheme);
394 scheme.StripTaggedASCII(ASCIIMask::MaskCRLFTab());
395 ToLowerCase(scheme);
396 return NS_OK;
399 bool net_IsValidScheme(const nsACString& scheme) {
400 return mozilla::net::rust_net_is_valid_scheme(&scheme);
403 bool net_IsAbsoluteURL(const nsACString& uri) {
404 nsACString::const_iterator start, end;
405 uri.BeginReading(start);
406 uri.EndReading(end);
408 // Strip C0 and space from begining
409 while (start != end) {
410 if ((uint8_t)*start > 0x20) {
411 break;
413 start++;
416 Tokenizer p(Substring(start, end), "\r\n\t");
418 // First char must be alpha
419 if (!p.CheckChar(IsAsciiAlpha)) {
420 return false;
423 while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
424 // Skip valid scheme characters or \r\n\t
426 if (!p.CheckChar(':')) {
427 return false;
429 p.SkipWhites();
431 if (!p.CheckChar('/')) {
432 return false;
434 p.SkipWhites();
436 if (p.CheckChar('/')) {
437 // aSpec is really absolute. Ignore aBaseURI in this case
438 return true;
440 return false;
443 void net_FilterURIString(const nsACString& input, nsACString& result) {
444 result.Truncate();
446 const auto* start = input.BeginReading();
447 const auto* end = input.EndReading();
449 // Trim off leading and trailing invalid chars.
450 auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
451 const auto* newStart = std::find_if(start, end, charFilter);
452 const auto* newEnd =
453 std::find_if(std::reverse_iterator<decltype(end)>(end),
454 std::reverse_iterator<decltype(newStart)>(newStart),
455 charFilter)
456 .base();
458 // Check if chars need to be stripped.
459 bool needsStrip = false;
460 const ASCIIMaskArray& mask = ASCIIMask::MaskCRLFTab();
461 for (const auto* itr = start; itr != end; ++itr) {
462 if (ASCIIMask::IsMasked(mask, *itr)) {
463 needsStrip = true;
464 break;
468 // Just use the passed in string rather than creating new copies if no
469 // changes are necessary.
470 if (newStart == start && newEnd == end && !needsStrip) {
471 result = input;
472 return;
475 result.Assign(Substring(newStart, newEnd));
476 if (needsStrip) {
477 result.StripTaggedASCII(mask);
481 nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags,
482 const ASCIIMaskArray& aFilterMask,
483 nsACString& aResult) {
484 aResult.Truncate();
486 const auto* start = aInput.BeginReading();
487 const auto* end = aInput.EndReading();
489 // Trim off leading and trailing invalid chars.
490 auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
491 const auto* newStart = std::find_if(start, end, charFilter);
492 const auto* newEnd =
493 std::find_if(std::reverse_iterator<decltype(end)>(end),
494 std::reverse_iterator<decltype(newStart)>(newStart),
495 charFilter)
496 .base();
498 return NS_EscapeAndFilterURL(Substring(newStart, newEnd), aFlags,
499 &aFilterMask, aResult, fallible);
502 #if defined(XP_WIN)
503 bool net_NormalizeFileURL(const nsACString& aURL, nsCString& aResultBuf) {
504 bool writing = false;
506 nsACString::const_iterator beginIter, endIter;
507 aURL.BeginReading(beginIter);
508 aURL.EndReading(endIter);
510 const char *s, *begin = beginIter.get();
512 for (s = begin; s != endIter.get(); ++s) {
513 if (*s == '\\') {
514 writing = true;
515 if (s > begin) aResultBuf.Append(begin, s - begin);
516 aResultBuf += '/';
517 begin = s + 1;
519 if (*s == '#') {
520 // Don't normalize any backslashes following the hash.
521 s = endIter.get();
522 break;
525 if (writing && s > begin) aResultBuf.Append(begin, s - begin);
527 return writing;
529 #endif
531 //----------------------------------------------------------------------------
532 // miscellaneous (i.e., stuff that should really be elsewhere)
533 //----------------------------------------------------------------------------
535 static inline void ToLower(char& c) {
536 if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A')) c += 'a' - 'A';
539 void net_ToLowerCase(char* str, uint32_t length) {
540 for (char* end = str + length; str < end; ++str) ToLower(*str);
543 void net_ToLowerCase(char* str) {
544 for (; *str; ++str) ToLower(*str);
547 char* net_FindCharInSet(const char* iter, const char* stop, const char* set) {
548 for (; iter != stop && *iter; ++iter) {
549 for (const char* s = set; *s; ++s) {
550 if (*iter == *s) return (char*)iter;
553 return (char*)iter;
556 char* net_FindCharNotInSet(const char* iter, const char* stop,
557 const char* set) {
558 repeat:
559 for (const char* s = set; *s; ++s) {
560 if (*iter == *s) {
561 if (++iter == stop) break;
562 goto repeat;
565 return (char*)iter;
568 char* net_RFindCharNotInSet(const char* stop, const char* iter,
569 const char* set) {
570 --iter;
571 --stop;
573 if (iter == stop) return (char*)iter;
575 repeat:
576 for (const char* s = set; *s; ++s) {
577 if (*iter == *s) {
578 if (--iter == stop) break;
579 goto repeat;
582 return (char*)iter;
585 #define HTTP_LWS " \t"
587 // Return the index of the closing quote of the string, if any
588 static uint32_t net_FindStringEnd(const nsCString& flatStr,
589 uint32_t stringStart, char stringDelim) {
590 NS_ASSERTION(stringStart < flatStr.Length() &&
591 flatStr.CharAt(stringStart) == stringDelim &&
592 (stringDelim == '"' || stringDelim == '\''),
593 "Invalid stringStart");
595 const char set[] = {stringDelim, '\\', '\0'};
596 do {
597 // stringStart points to either the start quote or the last
598 // escaped char (the char following a '\\')
600 // Write to searchStart here, so that when we get back to the
601 // top of the loop right outside this one we search from the
602 // right place.
603 uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
604 if (stringEnd == uint32_t(kNotFound)) return flatStr.Length();
606 if (flatStr.CharAt(stringEnd) == '\\') {
607 // Hit a backslash-escaped char. Need to skip over it.
608 stringStart = stringEnd + 1;
609 if (stringStart == flatStr.Length()) return stringStart;
611 // Go back to looking for the next escape or the string end
612 continue;
615 return stringEnd;
617 } while (true);
619 MOZ_ASSERT_UNREACHABLE("How did we get here?");
620 return flatStr.Length();
623 static uint32_t net_FindMediaDelimiter(const nsCString& flatStr,
624 uint32_t searchStart, char delimiter) {
625 do {
626 // searchStart points to the spot from which we should start looking
627 // for the delimiter.
628 const char delimStr[] = {delimiter, '"', '\0'};
629 uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
630 if (curDelimPos == uint32_t(kNotFound)) return flatStr.Length();
632 char ch = flatStr.CharAt(curDelimPos);
633 if (ch == delimiter) {
634 // Found delimiter
635 return curDelimPos;
638 // We hit the start of a quoted string. Look for its end.
639 searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
640 if (searchStart == flatStr.Length()) return searchStart;
642 ++searchStart;
644 // searchStart now points to the first char after the end of the
645 // string, so just go back to the top of the loop and look for
646 // |delimiter| again.
647 } while (true);
649 MOZ_ASSERT_UNREACHABLE("How did we get here?");
650 return flatStr.Length();
653 // aOffset should be added to aCharsetStart and aCharsetEnd if this
654 // function sets them.
655 static void net_ParseMediaType(const nsACString& aMediaTypeStr,
656 nsACString& aContentType,
657 nsACString& aContentCharset, int32_t aOffset,
658 bool* aHadCharset, int32_t* aCharsetStart,
659 int32_t* aCharsetEnd, bool aStrict) {
660 const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
661 const char* start = flatStr.get();
662 const char* end = start + flatStr.Length();
664 // Trim LWS leading and trailing whitespace from type.
665 const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
666 const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";");
668 const char* charset = "";
669 const char* charsetEnd = charset;
670 int32_t charsetParamStart = 0;
671 int32_t charsetParamEnd = 0;
673 uint32_t consumed = typeEnd - type;
675 // Iterate over parameters
676 bool typeHasCharset = false;
677 uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);
678 if (paramStart != uint32_t(kNotFound)) {
679 // We have parameters. Iterate over them.
680 uint32_t curParamStart = paramStart + 1;
681 do {
682 uint32_t curParamEnd =
683 net_FindMediaDelimiter(flatStr, curParamStart, ';');
685 const char* paramName = net_FindCharNotInSet(
686 start + curParamStart, start + curParamEnd, HTTP_LWS);
687 static const char charsetStr[] = "charset=";
688 if (nsCRT::strncasecmp(paramName, charsetStr, sizeof(charsetStr) - 1) ==
689 0) {
690 charset = paramName + sizeof(charsetStr) - 1;
691 charsetEnd = start + curParamEnd;
692 typeHasCharset = true;
693 charsetParamStart = curParamStart - 1;
694 charsetParamEnd = curParamEnd;
697 consumed = curParamEnd;
698 curParamStart = curParamEnd + 1;
699 } while (curParamStart < flatStr.Length());
702 bool charsetNeedsQuotedStringUnescaping = false;
703 if (typeHasCharset) {
704 // Trim LWS leading and trailing whitespace from charset.
705 charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
706 if (*charset == '"') {
707 charsetNeedsQuotedStringUnescaping = true;
708 charsetEnd =
709 start + net_FindStringEnd(flatStr, charset - start, *charset);
710 charset++;
711 NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
712 } else {
713 charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";");
717 // if the server sent "*/*", it is meaningless, so do not store it.
718 // also, if type is the same as aContentType, then just update the
719 // charset. however, if charset is empty and aContentType hasn't
720 // changed, then don't wipe-out an existing aContentCharset. We
721 // also want to reject a mime-type if it does not include a slash.
722 // some servers give junk after the charset parameter, which may
723 // include a comma, so this check makes us a bit more tolerant.
725 if (type != typeEnd && memchr(type, '/', typeEnd - type) != nullptr &&
726 (aStrict ? (net_FindCharNotInSet(start + consumed, end, HTTP_LWS) == end)
727 : (strncmp(type, "*/*", typeEnd - type) != 0))) {
728 // Common case here is that aContentType is empty
729 bool eq = !aContentType.IsEmpty() &&
730 aContentType.Equals(Substring(type, typeEnd),
731 nsCaseInsensitiveCStringComparator);
732 if (!eq) {
733 aContentType.Assign(type, typeEnd - type);
734 ToLowerCase(aContentType);
737 if ((!eq && *aHadCharset) || typeHasCharset) {
738 *aHadCharset = true;
739 if (charsetNeedsQuotedStringUnescaping) {
740 // parameters using the "quoted-string" syntax need
741 // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
742 aContentCharset.Truncate();
743 for (const char* c = charset; c != charsetEnd; c++) {
744 if (*c == '\\' && c + 1 != charsetEnd) {
745 // eat escape
746 c++;
748 aContentCharset.Append(*c);
750 } else {
751 aContentCharset.Assign(charset, charsetEnd - charset);
753 if (typeHasCharset) {
754 *aCharsetStart = charsetParamStart + aOffset;
755 *aCharsetEnd = charsetParamEnd + aOffset;
758 // Only set a new charset position if this is a different type
759 // from the last one we had and it doesn't already have a
760 // charset param. If this is the same type, we probably want
761 // to leave the charset position on its first occurrence.
762 if (!eq && !typeHasCharset) {
763 int32_t charsetStart = int32_t(paramStart);
764 if (charsetStart == kNotFound) charsetStart = flatStr.Length();
766 *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
771 #undef HTTP_LWS
773 void net_ParseContentType(const nsACString& aHeaderStr,
774 nsACString& aContentType, nsACString& aContentCharset,
775 bool* aHadCharset) {
776 int32_t dummy1, dummy2;
777 net_ParseContentType(aHeaderStr, aContentType, aContentCharset, aHadCharset,
778 &dummy1, &dummy2);
781 void net_ParseContentType(const nsACString& aHeaderStr,
782 nsACString& aContentType, nsACString& aContentCharset,
783 bool* aHadCharset, int32_t* aCharsetStart,
784 int32_t* aCharsetEnd) {
786 // Augmented BNF (from RFC 2616 section 3.7):
788 // header-value = media-type *( LWS "," LWS media-type )
789 // media-type = type "/" subtype *( LWS ";" LWS parameter )
790 // type = token
791 // subtype = token
792 // parameter = attribute "=" value
793 // attribute = token
794 // value = token | quoted-string
797 // Examples:
799 // text/html
800 // text/html, text/html
801 // text/html,text/html; charset=ISO-8859-1
802 // text/html,text/html; charset="ISO-8859-1"
803 // text/html;charset=ISO-8859-1, text/html
804 // text/html;charset='ISO-8859-1', text/html
805 // application/octet-stream
808 *aHadCharset = false;
809 const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
811 // iterate over media-types. Note that ',' characters can happen
812 // inside quoted strings, so we need to watch out for that.
813 uint32_t curTypeStart = 0;
814 do {
815 // curTypeStart points to the start of the current media-type. We want
816 // to look for its end.
817 uint32_t curTypeEnd = net_FindMediaDelimiter(flatStr, curTypeStart, ',');
819 // At this point curTypeEnd points to the spot where the media-type
820 // starting at curTypeEnd ends. Time to parse that!
821 net_ParseMediaType(
822 Substring(flatStr, curTypeStart, curTypeEnd - curTypeStart),
823 aContentType, aContentCharset, curTypeStart, aHadCharset, aCharsetStart,
824 aCharsetEnd, false);
826 // And let's move on to the next media-type
827 curTypeStart = curTypeEnd + 1;
828 } while (curTypeStart < flatStr.Length());
831 void net_ParseRequestContentType(const nsACString& aHeaderStr,
832 nsACString& aContentType,
833 nsACString& aContentCharset,
834 bool* aHadCharset) {
836 // Augmented BNF (from RFC 7231 section 3.1.1.1):
838 // media-type = type "/" subtype *( OWS ";" OWS parameter )
839 // type = token
840 // subtype = token
841 // parameter = token "=" ( token / quoted-string )
843 // Examples:
845 // text/html
846 // text/html; charset=ISO-8859-1
847 // text/html; charset="ISO-8859-1"
848 // application/octet-stream
851 aContentType.Truncate();
852 aContentCharset.Truncate();
853 *aHadCharset = false;
854 const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
856 // At this point curTypeEnd points to the spot where the media-type
857 // starting at curTypeEnd ends. Time to parse that!
858 nsAutoCString contentType, contentCharset;
859 bool hadCharset = false;
860 int32_t dummy1, dummy2;
861 uint32_t typeEnd = net_FindMediaDelimiter(flatStr, 0, ',');
862 if (typeEnd != flatStr.Length()) {
863 // We have some stuff left at the end, so this is not a valid
864 // request Content-Type header.
865 return;
867 net_ParseMediaType(flatStr, contentType, contentCharset, 0, &hadCharset,
868 &dummy1, &dummy2, true);
870 aContentType = contentType;
871 aContentCharset = contentCharset;
872 *aHadCharset = hadCharset;
875 bool net_IsValidHostName(const nsACString& host) {
876 // The host name is limited to 253 ascii characters.
877 if (host.Length() > 253) {
878 return false;
881 const char* end = host.EndReading();
882 // Use explicit whitelists to select which characters we are
883 // willing to send to lower-level DNS logic. This is more
884 // self-documenting, and can also be slightly faster than the
885 // blacklist approach, since DNS names are the common case, and
886 // the commonest characters will tend to be near the start of
887 // the list.
889 // Whitelist for DNS names (RFC 1035) with extra characters added
890 // for pragmatic reasons "$+_"
891 // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
892 if (net_FindCharNotInSet(host.BeginReading(), end,
893 "abcdefghijklmnopqrstuvwxyz"
894 ".-0123456789"
895 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end) {
896 return true;
899 // Might be a valid IPv6 link-local address containing a percent sign
900 return mozilla::net::HostIsIPLiteral(host);
903 bool net_IsValidIPv4Addr(const nsACString& aAddr) {
904 return mozilla::net::rust_net_is_valid_ipv4_addr(&aAddr);
907 bool net_IsValidIPv6Addr(const nsACString& aAddr) {
908 return mozilla::net::rust_net_is_valid_ipv6_addr(&aAddr);
911 bool net_GetDefaultStatusTextForCode(uint16_t aCode, nsACString& aOutText) {
912 switch (aCode) {
913 // start with the most common
914 case 200:
915 aOutText.AssignLiteral("OK");
916 break;
917 case 404:
918 aOutText.AssignLiteral("Not Found");
919 break;
920 case 301:
921 aOutText.AssignLiteral("Moved Permanently");
922 break;
923 case 304:
924 aOutText.AssignLiteral("Not Modified");
925 break;
926 case 307:
927 aOutText.AssignLiteral("Temporary Redirect");
928 break;
929 case 500:
930 aOutText.AssignLiteral("Internal Server Error");
931 break;
933 // also well known
934 case 100:
935 aOutText.AssignLiteral("Continue");
936 break;
937 case 101:
938 aOutText.AssignLiteral("Switching Protocols");
939 break;
940 case 201:
941 aOutText.AssignLiteral("Created");
942 break;
943 case 202:
944 aOutText.AssignLiteral("Accepted");
945 break;
946 case 203:
947 aOutText.AssignLiteral("Non Authoritative");
948 break;
949 case 204:
950 aOutText.AssignLiteral("No Content");
951 break;
952 case 205:
953 aOutText.AssignLiteral("Reset Content");
954 break;
955 case 206:
956 aOutText.AssignLiteral("Partial Content");
957 break;
958 case 207:
959 aOutText.AssignLiteral("Multi-Status");
960 break;
961 case 208:
962 aOutText.AssignLiteral("Already Reported");
963 break;
964 case 300:
965 aOutText.AssignLiteral("Multiple Choices");
966 break;
967 case 302:
968 aOutText.AssignLiteral("Found");
969 break;
970 case 303:
971 aOutText.AssignLiteral("See Other");
972 break;
973 case 305:
974 aOutText.AssignLiteral("Use Proxy");
975 break;
976 case 308:
977 aOutText.AssignLiteral("Permanent Redirect");
978 break;
979 case 400:
980 aOutText.AssignLiteral("Bad Request");
981 break;
982 case 401:
983 aOutText.AssignLiteral("Unauthorized");
984 break;
985 case 402:
986 aOutText.AssignLiteral("Payment Required");
987 break;
988 case 403:
989 aOutText.AssignLiteral("Forbidden");
990 break;
991 case 405:
992 aOutText.AssignLiteral("Method Not Allowed");
993 break;
994 case 406:
995 aOutText.AssignLiteral("Not Acceptable");
996 break;
997 case 407:
998 aOutText.AssignLiteral("Proxy Authentication Required");
999 break;
1000 case 408:
1001 aOutText.AssignLiteral("Request Timeout");
1002 break;
1003 case 409:
1004 aOutText.AssignLiteral("Conflict");
1005 break;
1006 case 410:
1007 aOutText.AssignLiteral("Gone");
1008 break;
1009 case 411:
1010 aOutText.AssignLiteral("Length Required");
1011 break;
1012 case 412:
1013 aOutText.AssignLiteral("Precondition Failed");
1014 break;
1015 case 413:
1016 aOutText.AssignLiteral("Request Entity Too Large");
1017 break;
1018 case 414:
1019 aOutText.AssignLiteral("Request URI Too Long");
1020 break;
1021 case 415:
1022 aOutText.AssignLiteral("Unsupported Media Type");
1023 break;
1024 case 416:
1025 aOutText.AssignLiteral("Requested Range Not Satisfiable");
1026 break;
1027 case 417:
1028 aOutText.AssignLiteral("Expectation Failed");
1029 break;
1030 case 418:
1031 aOutText.AssignLiteral("I'm a teapot");
1032 break;
1033 case 421:
1034 aOutText.AssignLiteral("Misdirected Request");
1035 break;
1036 case 422:
1037 aOutText.AssignLiteral("Unprocessable Entity");
1038 break;
1039 case 423:
1040 aOutText.AssignLiteral("Locked");
1041 break;
1042 case 424:
1043 aOutText.AssignLiteral("Failed Dependency");
1044 break;
1045 case 425:
1046 aOutText.AssignLiteral("Too Early");
1047 break;
1048 case 426:
1049 aOutText.AssignLiteral("Upgrade Required");
1050 break;
1051 case 428:
1052 aOutText.AssignLiteral("Precondition Required");
1053 break;
1054 case 429:
1055 aOutText.AssignLiteral("Too Many Requests");
1056 break;
1057 case 431:
1058 aOutText.AssignLiteral("Request Header Fields Too Large");
1059 break;
1060 case 451:
1061 aOutText.AssignLiteral("Unavailable For Legal Reasons");
1062 break;
1063 case 501:
1064 aOutText.AssignLiteral("Not Implemented");
1065 break;
1066 case 502:
1067 aOutText.AssignLiteral("Bad Gateway");
1068 break;
1069 case 503:
1070 aOutText.AssignLiteral("Service Unavailable");
1071 break;
1072 case 504:
1073 aOutText.AssignLiteral("Gateway Timeout");
1074 break;
1075 case 505:
1076 aOutText.AssignLiteral("HTTP Version Unsupported");
1077 break;
1078 case 506:
1079 aOutText.AssignLiteral("Variant Also Negotiates");
1080 break;
1081 case 507:
1082 aOutText.AssignLiteral("Insufficient Storage ");
1083 break;
1084 case 508:
1085 aOutText.AssignLiteral("Loop Detected");
1086 break;
1087 case 510:
1088 aOutText.AssignLiteral("Not Extended");
1089 break;
1090 case 511:
1091 aOutText.AssignLiteral("Network Authentication Required");
1092 break;
1093 default:
1094 aOutText.AssignLiteral("No Reason Phrase");
1095 return false;
1097 return true;
1100 namespace mozilla {
1101 static auto MakeNameMatcher(const nsAString& aName) {
1102 return [&aName](const auto& param) { return param.mKey.Equals(aName); };
1105 bool URLParams::Has(const nsAString& aName) {
1106 return std::any_of(mParams.cbegin(), mParams.cend(), MakeNameMatcher(aName));
1109 bool URLParams::Has(const nsAString& aName, const nsAString& aValue) {
1110 return std::any_of(
1111 mParams.cbegin(), mParams.cend(), [&aName, &aValue](const auto& param) {
1112 return param.mKey.Equals(aName) && param.mValue.Equals(aValue);
1116 void URLParams::Get(const nsAString& aName, nsString& aRetval) {
1117 SetDOMStringToNull(aRetval);
1119 const auto end = mParams.cend();
1120 const auto it = std::find_if(mParams.cbegin(), end, MakeNameMatcher(aName));
1121 if (it != end) {
1122 aRetval.Assign(it->mValue);
1126 void URLParams::GetAll(const nsAString& aName, nsTArray<nsString>& aRetval) {
1127 aRetval.Clear();
1129 for (uint32_t i = 0, len = mParams.Length(); i < len; ++i) {
1130 if (mParams[i].mKey.Equals(aName)) {
1131 aRetval.AppendElement(mParams[i].mValue);
1136 void URLParams::Append(const nsAString& aName, const nsAString& aValue) {
1137 Param* param = mParams.AppendElement();
1138 param->mKey = aName;
1139 param->mValue = aValue;
1142 void URLParams::Set(const nsAString& aName, const nsAString& aValue) {
1143 Param* param = nullptr;
1144 for (uint32_t i = 0, len = mParams.Length(); i < len;) {
1145 if (!mParams[i].mKey.Equals(aName)) {
1146 ++i;
1147 continue;
1149 if (!param) {
1150 param = &mParams[i];
1151 ++i;
1152 continue;
1154 // Remove duplicates.
1155 mParams.RemoveElementAt(i);
1156 --len;
1159 if (!param) {
1160 param = mParams.AppendElement();
1161 param->mKey = aName;
1164 param->mValue = aValue;
1167 void URLParams::Delete(const nsAString& aName) {
1168 mParams.RemoveElementsBy(
1169 [&aName](const auto& param) { return param.mKey.Equals(aName); });
1172 void URLParams::Delete(const nsAString& aName, const nsAString& aValue) {
1173 mParams.RemoveElementsBy([&aName, &aValue](const auto& param) {
1174 return param.mKey.Equals(aName) && param.mValue.Equals(aValue);
1178 /* static */
1179 void URLParams::ConvertString(const nsACString& aInput, nsAString& aOutput) {
1180 if (NS_FAILED(UTF_8_ENCODING->DecodeWithoutBOMHandling(aInput, aOutput))) {
1181 MOZ_CRASH("Out of memory when converting URL params.");
1185 /* static */
1186 void URLParams::DecodeString(const nsACString& aInput, nsAString& aOutput) {
1187 const char* const end = aInput.EndReading();
1189 nsAutoCString unescaped;
1191 for (const char* iter = aInput.BeginReading(); iter != end;) {
1192 // replace '+' with U+0020
1193 if (*iter == '+') {
1194 unescaped.Append(' ');
1195 ++iter;
1196 continue;
1199 // Percent decode algorithm
1200 if (*iter == '%') {
1201 const char* const first = iter + 1;
1202 const char* const second = first + 1;
1204 const auto asciiHexDigit = [](char x) {
1205 return (x >= 0x41 && x <= 0x46) || (x >= 0x61 && x <= 0x66) ||
1206 (x >= 0x30 && x <= 0x39);
1209 const auto hexDigit = [](char x) {
1210 return x >= 0x30 && x <= 0x39
1211 ? x - 0x30
1212 : (x >= 0x41 && x <= 0x46 ? x - 0x37 : x - 0x57);
1215 if (first != end && second != end && asciiHexDigit(*first) &&
1216 asciiHexDigit(*second)) {
1217 unescaped.Append(hexDigit(*first) * 16 + hexDigit(*second));
1218 iter = second + 1;
1219 } else {
1220 unescaped.Append('%');
1221 ++iter;
1224 continue;
1227 unescaped.Append(*iter);
1228 ++iter;
1231 // XXX It seems rather wasteful to first decode into a UTF-8 nsCString and
1232 // then convert the whole string to UTF-16, at least if we exceed the inline
1233 // storage size.
1234 ConvertString(unescaped, aOutput);
1237 /* static */
1238 bool URLParams::ParseNextInternal(const char*& aStart, const char* const aEnd,
1239 bool aShouldDecode, nsAString* aOutputName,
1240 nsAString* aOutputValue) {
1241 nsDependentCSubstring string;
1243 const char* const iter = std::find(aStart, aEnd, '&');
1244 if (iter != aEnd) {
1245 string.Rebind(aStart, iter);
1246 aStart = iter + 1;
1247 } else {
1248 string.Rebind(aStart, aEnd);
1249 aStart = aEnd;
1252 if (string.IsEmpty()) {
1253 return false;
1256 const auto* const eqStart = string.BeginReading();
1257 const auto* const eqEnd = string.EndReading();
1258 const auto* const eqIter = std::find(eqStart, eqEnd, '=');
1260 nsDependentCSubstring name;
1261 nsDependentCSubstring value;
1263 if (eqIter != eqEnd) {
1264 name.Rebind(eqStart, eqIter);
1265 value.Rebind(eqIter + 1, eqEnd);
1266 } else {
1267 name.Rebind(string, 0);
1270 if (aShouldDecode) {
1271 DecodeString(name, *aOutputName);
1272 DecodeString(value, *aOutputValue);
1273 return true;
1276 ConvertString(name, *aOutputName);
1277 ConvertString(value, *aOutputValue);
1278 return true;
1281 /* static */
1282 bool URLParams::Extract(const nsACString& aInput, const nsAString& aName,
1283 nsAString& aValue) {
1284 aValue.SetIsVoid(true);
1285 return !URLParams::Parse(
1286 aInput, true, [&aName, &aValue](const nsAString& name, nsString&& value) {
1287 if (aName == name) {
1288 aValue = std::move(value);
1289 return false;
1291 return true;
1295 void URLParams::ParseInput(const nsACString& aInput) {
1296 // Remove all the existing data before parsing a new input.
1297 DeleteAll();
1299 URLParams::Parse(aInput, true, [this](nsString&& name, nsString&& value) {
1300 mParams.AppendElement(Param{std::move(name), std::move(value)});
1301 return true;
1305 void URLParams::SerializeString(const nsCString& aInput, nsAString& aValue) {
1306 const unsigned char* p = (const unsigned char*)aInput.get();
1307 const unsigned char* end = p + aInput.Length();
1309 while (p != end) {
1310 // ' ' to '+'
1311 if (*p == 0x20) {
1312 aValue.Append(0x2B);
1313 // Percent Encode algorithm
1314 } else if (*p == 0x2A || *p == 0x2D || *p == 0x2E ||
1315 (*p >= 0x30 && *p <= 0x39) || (*p >= 0x41 && *p <= 0x5A) ||
1316 *p == 0x5F || (*p >= 0x61 && *p <= 0x7A)) {
1317 aValue.Append(*p);
1318 } else {
1319 aValue.AppendPrintf("%%%.2X", *p);
1322 ++p;
1326 void URLParams::Serialize(nsAString& aValue, bool aEncode) const {
1327 aValue.Truncate();
1328 bool first = true;
1330 for (uint32_t i = 0, len = mParams.Length(); i < len; ++i) {
1331 if (first) {
1332 first = false;
1333 } else {
1334 aValue.Append('&');
1337 // XXX Actually, it's not necessary to build a new string object. Generally,
1338 // such cases could just convert each codepoint one-by-one.
1339 if (aEncode) {
1340 SerializeString(NS_ConvertUTF16toUTF8(mParams[i].mKey), aValue);
1341 aValue.Append('=');
1342 SerializeString(NS_ConvertUTF16toUTF8(mParams[i].mValue), aValue);
1343 } else {
1344 aValue.Append(mParams[i].mKey);
1345 aValue.Append('=');
1346 aValue.Append(mParams[i].mValue);
1351 void URLParams::Sort() {
1352 mParams.StableSort([](const Param& lhs, const Param& rhs) {
1353 return Compare(lhs.mKey, rhs.mKey);
1357 } // namespace mozilla