Merge mozilla-central to autoland. CLOSED TREE
[gecko.git] / netwerk / base / nsURLHelper.cpp
blob109df8f6d137427b54c074e98c82b3d5c32d028a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=4 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsURLHelper.h"
9 #include "mozilla/Encoding.h"
10 #include "mozilla/RangedPtr.h"
11 #include "mozilla/TextUtils.h"
13 #include <algorithm>
14 #include <iterator>
16 #include "nsASCIIMask.h"
17 #include "nsIFile.h"
18 #include "nsIURLParser.h"
19 #include "nsCOMPtr.h"
20 #include "nsCRT.h"
21 #include "nsNetCID.h"
22 #include "mozilla/Preferences.h"
23 #include "prnetdb.h"
24 #include "mozilla/StaticPrefs_network.h"
25 #include "mozilla/Tokenizer.h"
26 #include "nsEscape.h"
27 #include "nsDOMString.h"
28 #include "mozilla/net/rust_helper.h"
29 #include "mozilla/net/DNS.h"
31 using namespace mozilla;
33 //----------------------------------------------------------------------------
34 // Init/Shutdown
35 //----------------------------------------------------------------------------
37 static bool gInitialized = false;
38 static StaticRefPtr<nsIURLParser> gNoAuthURLParser;
39 static StaticRefPtr<nsIURLParser> gAuthURLParser;
40 static StaticRefPtr<nsIURLParser> gStdURLParser;
42 static void InitGlobals() {
43 nsCOMPtr<nsIURLParser> parser;
45 parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
46 NS_ASSERTION(parser, "failed getting 'noauth' url parser");
47 if (parser) {
48 gNoAuthURLParser = parser;
51 parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
52 NS_ASSERTION(parser, "failed getting 'auth' url parser");
53 if (parser) {
54 gAuthURLParser = parser;
57 parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
58 NS_ASSERTION(parser, "failed getting 'std' url parser");
59 if (parser) {
60 gStdURLParser = parser;
63 gInitialized = true;
66 void net_ShutdownURLHelper() {
67 if (gInitialized) {
68 gInitialized = false;
70 gNoAuthURLParser = nullptr;
71 gAuthURLParser = nullptr;
72 gStdURLParser = nullptr;
75 //----------------------------------------------------------------------------
76 // nsIURLParser getters
77 //----------------------------------------------------------------------------
79 nsIURLParser* net_GetAuthURLParser() {
80 if (!gInitialized) InitGlobals();
81 return gAuthURLParser;
84 nsIURLParser* net_GetNoAuthURLParser() {
85 if (!gInitialized) InitGlobals();
86 return gNoAuthURLParser;
89 nsIURLParser* net_GetStdURLParser() {
90 if (!gInitialized) InitGlobals();
91 return gStdURLParser;
94 //---------------------------------------------------------------------------
95 // GetFileFromURLSpec implementations
96 //---------------------------------------------------------------------------
97 nsresult net_GetURLSpecFromDir(nsIFile* aFile, nsACString& result) {
98 nsAutoCString escPath;
99 nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
100 if (NS_FAILED(rv)) return rv;
102 if (escPath.Last() != '/') {
103 escPath += '/';
106 result = escPath;
107 return NS_OK;
110 nsresult net_GetURLSpecFromFile(nsIFile* aFile, nsACString& result) {
111 nsAutoCString escPath;
112 nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
113 if (NS_FAILED(rv)) return rv;
115 // if this file references a directory, then we need to ensure that the
116 // URL ends with a slash. this is important since it affects the rules
117 // for relative URL resolution when this URL is used as a base URL.
118 // if the file does not exist, then we make no assumption about its type,
119 // and simply leave the URL unmodified.
120 if (escPath.Last() != '/') {
121 bool dir;
122 rv = aFile->IsDirectory(&dir);
123 if (NS_SUCCEEDED(rv) && dir) escPath += '/';
126 result = escPath;
127 return NS_OK;
130 //----------------------------------------------------------------------------
131 // file:// URL parsing
132 //----------------------------------------------------------------------------
134 nsresult net_ParseFileURL(const nsACString& inURL, nsACString& outDirectory,
135 nsACString& outFileBaseName,
136 nsACString& outFileExtension) {
137 nsresult rv;
139 if (inURL.Length() >
140 (uint32_t)StaticPrefs::network_standard_url_max_length()) {
141 return NS_ERROR_MALFORMED_URI;
144 outDirectory.Truncate();
145 outFileBaseName.Truncate();
146 outFileExtension.Truncate();
148 const nsPromiseFlatCString& flatURL = PromiseFlatCString(inURL);
149 const char* url = flatURL.get();
151 nsAutoCString scheme;
152 rv = net_ExtractURLScheme(flatURL, scheme);
153 if (NS_FAILED(rv)) return rv;
155 if (!scheme.EqualsLiteral("file")) {
156 NS_ERROR("must be a file:// url");
157 return NS_ERROR_UNEXPECTED;
160 nsIURLParser* parser = net_GetNoAuthURLParser();
161 NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
163 uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
164 int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
166 // invoke the parser to extract the URL path
167 rv = parser->ParseURL(url, flatURL.Length(), nullptr,
168 nullptr, // don't care about scheme
169 nullptr, nullptr, // don't care about authority
170 &pathPos, &pathLen);
171 if (NS_FAILED(rv)) return rv;
173 // invoke the parser to extract filepath from the path
174 rv = parser->ParsePath(url + pathPos, pathLen, &filepathPos, &filepathLen,
175 nullptr, nullptr, // don't care about query
176 nullptr, nullptr); // don't care about ref
177 if (NS_FAILED(rv)) return rv;
179 filepathPos += pathPos;
181 // invoke the parser to extract the directory and filename from filepath
182 rv = parser->ParseFilePath(url + filepathPos, filepathLen, &directoryPos,
183 &directoryLen, &basenamePos, &basenameLen,
184 &extensionPos, &extensionLen);
185 if (NS_FAILED(rv)) return rv;
187 if (directoryLen > 0) {
188 outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
190 if (basenameLen > 0) {
191 outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
193 if (extensionLen > 0) {
194 outFileExtension =
195 Substring(inURL, filepathPos + extensionPos, extensionLen);
197 // since we are using a no-auth url parser, there will never be a host
198 // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
200 return NS_OK;
203 //----------------------------------------------------------------------------
204 // path manipulation functions
205 //----------------------------------------------------------------------------
207 // Replace all /./ with a / while resolving URLs
208 // But only till #?
209 void net_CoalesceDirs(netCoalesceFlags flags, char* path) {
210 /* Stolen from the old netlib's mkparse.c.
212 * modifies a url of the form /foo/../foo1 -> /foo1
213 * and /foo/./foo1 -> /foo/foo1
214 * and /foo/foo1/.. -> /foo/
216 char* fwdPtr = path;
217 char* urlPtr = path;
218 char* lastslash = path;
219 uint32_t traversal = 0;
220 uint32_t special_ftp_len = 0;
222 MOZ_ASSERT(*path == '/', "We expect the path to begin with /");
223 if (*path != '/') {
224 return;
227 /* Remember if this url is a special ftp one: */
228 if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT) {
229 /* some schemes (for example ftp) have the speciality that
230 the path can begin // or /%2F to mark the root of the
231 servers filesystem, a simple / only marks the root relative
232 to the user loging in. We remember the length of the marker */
233 if (nsCRT::strncasecmp(path, "/%2F", 4) == 0) {
234 special_ftp_len = 4;
235 } else if (strncmp(path, "//", 2) == 0) {
236 special_ftp_len = 2;
240 /* find the last slash before # or ? */
241 for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#'); ++fwdPtr) {
244 /* found nothing, but go back one only */
245 /* if there is something to go back to */
246 if (fwdPtr != path && *fwdPtr == '\0') {
247 --fwdPtr;
250 /* search the slash */
251 for (; (fwdPtr != path) && (*fwdPtr != '/'); --fwdPtr) {
253 lastslash = fwdPtr;
254 fwdPtr = path;
256 /* replace all %2E or %2e with . in the path */
257 /* but stop at lastslash if non null */
258 for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#') &&
259 (*lastslash == '\0' || fwdPtr != lastslash);
260 ++fwdPtr) {
261 if (*fwdPtr == '%' && *(fwdPtr + 1) == '2' &&
262 (*(fwdPtr + 2) == 'E' || *(fwdPtr + 2) == 'e')) {
263 *urlPtr++ = '.';
264 ++fwdPtr;
265 ++fwdPtr;
266 } else {
267 *urlPtr++ = *fwdPtr;
270 // Copy remaining stuff past the #?;
271 for (; *fwdPtr != '\0'; ++fwdPtr) {
272 *urlPtr++ = *fwdPtr;
274 *urlPtr = '\0'; // terminate the url
276 // start again, this time for real
277 fwdPtr = path;
278 urlPtr = path;
280 for (; (*fwdPtr != '\0') && (*fwdPtr != '?') && (*fwdPtr != '#'); ++fwdPtr) {
281 if (*fwdPtr == '/' && *(fwdPtr + 1) == '.' && *(fwdPtr + 2) == '/') {
282 // remove . followed by slash
283 ++fwdPtr;
284 } else if (*fwdPtr == '/' && *(fwdPtr + 1) == '.' && *(fwdPtr + 2) == '.' &&
285 (*(fwdPtr + 3) == '/' ||
286 *(fwdPtr + 3) == '\0' || // This will take care of
287 *(fwdPtr + 3) == '?' || // something like foo/bar/..#sometag
288 *(fwdPtr + 3) == '#')) {
289 // remove foo/..
290 // reverse the urlPtr to the previous slash if possible
291 // if url does not allow relative root then drop .. above root
292 // otherwise retain them in the path
293 if (traversal > 0 || !(flags & NET_COALESCE_ALLOW_RELATIVE_ROOT)) {
294 if (urlPtr != path) urlPtr--; // we must be going back at least by one
295 for (; *urlPtr != '/' && urlPtr != path; urlPtr--) {
296 ; // null body
298 --traversal; // count back
299 // forward the fwdPtr past the ../
300 fwdPtr += 2;
301 // if we have reached the beginning of the path
302 // while searching for the previous / and we remember
303 // that it is an url that begins with /%2F then
304 // advance urlPtr again by 3 chars because /%2F already
305 // marks the root of the path
306 if (urlPtr == path && special_ftp_len > 3) {
307 ++urlPtr;
308 ++urlPtr;
309 ++urlPtr;
311 // special case if we have reached the end
312 // to preserve the last /
313 if (*fwdPtr == '.' && *(fwdPtr + 1) == '\0') ++urlPtr;
314 } else {
315 // there are to much /.. in this path, just copy them instead.
316 // forward the urlPtr past the /.. and copying it
318 // However if we remember it is an url that starts with
319 // /%2F and urlPtr just points at the "F" of "/%2F" then do
320 // not overwrite it with the /, just copy .. and move forward
321 // urlPtr.
322 if (special_ftp_len > 3 && urlPtr == path + special_ftp_len - 1) {
323 ++urlPtr;
324 } else {
325 *urlPtr++ = *fwdPtr;
327 ++fwdPtr;
328 *urlPtr++ = *fwdPtr;
329 ++fwdPtr;
330 *urlPtr++ = *fwdPtr;
332 } else {
333 // count the hierachie, but only if we do not have reached
334 // the root of some special urls with a special root marker
335 if (*fwdPtr == '/' && *(fwdPtr + 1) != '.' &&
336 (special_ftp_len != 2 || *(fwdPtr + 1) != '/')) {
337 traversal++;
339 // copy the url incrementaly
340 *urlPtr++ = *fwdPtr;
345 * Now lets remove trailing . case
346 * /foo/foo1/. -> /foo/foo1/
349 if ((urlPtr > (path + 1)) && (*(urlPtr - 1) == '.') &&
350 (*(urlPtr - 2) == '/')) {
351 urlPtr--;
354 // Before we start copying past ?#, we must make sure we don't overwrite
355 // the first / character. If fwdPtr is also unchanged, just copy everything
356 // (this shouldn't happen unless we could get in here without a leading
357 // slash).
358 if (urlPtr == path && fwdPtr != path) {
359 urlPtr++;
362 // Copy remaining stuff past the #?;
363 for (; *fwdPtr != '\0'; ++fwdPtr) {
364 *urlPtr++ = *fwdPtr;
366 *urlPtr = '\0'; // terminate the url
369 //----------------------------------------------------------------------------
370 // scheme fu
371 //----------------------------------------------------------------------------
373 static bool net_IsValidSchemeChar(const char aChar) {
374 return mozilla::net::rust_net_is_valid_scheme_char(aChar);
377 /* Extract URI-Scheme if possible */
378 nsresult net_ExtractURLScheme(const nsACString& inURI, nsACString& scheme) {
379 nsACString::const_iterator start, end;
380 inURI.BeginReading(start);
381 inURI.EndReading(end);
383 // Strip C0 and space from begining
384 while (start != end) {
385 if ((uint8_t)*start > 0x20) {
386 break;
388 start++;
391 Tokenizer p(Substring(start, end), "\r\n\t");
392 p.Record();
393 if (!p.CheckChar(IsAsciiAlpha)) {
394 // First char must be alpha
395 return NS_ERROR_MALFORMED_URI;
398 while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
399 // Skip valid scheme characters or \r\n\t
402 if (!p.CheckChar(':')) {
403 return NS_ERROR_MALFORMED_URI;
406 p.Claim(scheme);
407 scheme.StripTaggedASCII(ASCIIMask::MaskCRLFTab());
408 ToLowerCase(scheme);
409 return NS_OK;
412 bool net_IsValidScheme(const nsACString& scheme) {
413 return mozilla::net::rust_net_is_valid_scheme(&scheme);
416 bool net_IsAbsoluteURL(const nsACString& uri) {
417 nsACString::const_iterator start, end;
418 uri.BeginReading(start);
419 uri.EndReading(end);
421 // Strip C0 and space from begining
422 while (start != end) {
423 if ((uint8_t)*start > 0x20) {
424 break;
426 start++;
429 Tokenizer p(Substring(start, end), "\r\n\t");
431 // First char must be alpha
432 if (!p.CheckChar(IsAsciiAlpha)) {
433 return false;
436 while (p.CheckChar(net_IsValidSchemeChar) || p.CheckWhite()) {
437 // Skip valid scheme characters or \r\n\t
439 if (!p.CheckChar(':')) {
440 return false;
442 p.SkipWhites();
444 if (!p.CheckChar('/')) {
445 return false;
447 p.SkipWhites();
449 if (p.CheckChar('/')) {
450 // aSpec is really absolute. Ignore aBaseURI in this case
451 return true;
453 return false;
456 void net_FilterURIString(const nsACString& input, nsACString& result) {
457 result.Truncate();
459 const auto* start = input.BeginReading();
460 const auto* end = input.EndReading();
462 // Trim off leading and trailing invalid chars.
463 auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
464 const auto* newStart = std::find_if(start, end, charFilter);
465 const auto* newEnd =
466 std::find_if(std::reverse_iterator<decltype(end)>(end),
467 std::reverse_iterator<decltype(newStart)>(newStart),
468 charFilter)
469 .base();
471 // Check if chars need to be stripped.
472 bool needsStrip = false;
473 const ASCIIMaskArray& mask = ASCIIMask::MaskCRLFTab();
474 for (const auto* itr = start; itr != end; ++itr) {
475 if (ASCIIMask::IsMasked(mask, *itr)) {
476 needsStrip = true;
477 break;
481 // Just use the passed in string rather than creating new copies if no
482 // changes are necessary.
483 if (newStart == start && newEnd == end && !needsStrip) {
484 result = input;
485 return;
488 result.Assign(Substring(newStart, newEnd));
489 if (needsStrip) {
490 result.StripTaggedASCII(mask);
494 nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags,
495 const ASCIIMaskArray& aFilterMask,
496 nsACString& aResult) {
497 aResult.Truncate();
499 const auto* start = aInput.BeginReading();
500 const auto* end = aInput.EndReading();
502 // Trim off leading and trailing invalid chars.
503 auto charFilter = [](char c) { return static_cast<uint8_t>(c) > 0x20; };
504 const auto* newStart = std::find_if(start, end, charFilter);
505 const auto* newEnd =
506 std::find_if(std::reverse_iterator<decltype(end)>(end),
507 std::reverse_iterator<decltype(newStart)>(newStart),
508 charFilter)
509 .base();
511 return NS_EscapeAndFilterURL(Substring(newStart, newEnd), aFlags,
512 &aFilterMask, aResult, fallible);
515 #if defined(XP_WIN)
516 bool net_NormalizeFileURL(const nsACString& aURL, nsCString& aResultBuf) {
517 bool writing = false;
519 nsACString::const_iterator beginIter, endIter;
520 aURL.BeginReading(beginIter);
521 aURL.EndReading(endIter);
523 const char *s, *begin = beginIter.get();
525 for (s = begin; s != endIter.get(); ++s) {
526 if (*s == '\\') {
527 writing = true;
528 if (s > begin) aResultBuf.Append(begin, s - begin);
529 aResultBuf += '/';
530 begin = s + 1;
532 if (*s == '#') {
533 // Don't normalize any backslashes following the hash.
534 s = endIter.get();
535 break;
538 if (writing && s > begin) aResultBuf.Append(begin, s - begin);
540 return writing;
542 #endif
544 //----------------------------------------------------------------------------
545 // miscellaneous (i.e., stuff that should really be elsewhere)
546 //----------------------------------------------------------------------------
548 static inline void ToLower(char& c) {
549 if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A')) c += 'a' - 'A';
552 void net_ToLowerCase(char* str, uint32_t length) {
553 for (char* end = str + length; str < end; ++str) ToLower(*str);
556 void net_ToLowerCase(char* str) {
557 for (; *str; ++str) ToLower(*str);
560 char* net_FindCharInSet(const char* iter, const char* stop, const char* set) {
561 for (; iter != stop && *iter; ++iter) {
562 for (const char* s = set; *s; ++s) {
563 if (*iter == *s) return (char*)iter;
566 return (char*)iter;
569 char* net_FindCharNotInSet(const char* iter, const char* stop,
570 const char* set) {
571 repeat:
572 for (const char* s = set; *s; ++s) {
573 if (*iter == *s) {
574 if (++iter == stop) break;
575 goto repeat;
578 return (char*)iter;
581 char* net_RFindCharNotInSet(const char* stop, const char* iter,
582 const char* set) {
583 --iter;
584 --stop;
586 if (iter == stop) return (char*)iter;
588 repeat:
589 for (const char* s = set; *s; ++s) {
590 if (*iter == *s) {
591 if (--iter == stop) break;
592 goto repeat;
595 return (char*)iter;
598 #define HTTP_LWS " \t"
600 // Return the index of the closing quote of the string, if any
601 static uint32_t net_FindStringEnd(const nsCString& flatStr,
602 uint32_t stringStart, char stringDelim) {
603 NS_ASSERTION(stringStart < flatStr.Length() &&
604 flatStr.CharAt(stringStart) == stringDelim &&
605 (stringDelim == '"' || stringDelim == '\''),
606 "Invalid stringStart");
608 const char set[] = {stringDelim, '\\', '\0'};
609 do {
610 // stringStart points to either the start quote or the last
611 // escaped char (the char following a '\\')
613 // Write to searchStart here, so that when we get back to the
614 // top of the loop right outside this one we search from the
615 // right place.
616 uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
617 if (stringEnd == uint32_t(kNotFound)) return flatStr.Length();
619 if (flatStr.CharAt(stringEnd) == '\\') {
620 // Hit a backslash-escaped char. Need to skip over it.
621 stringStart = stringEnd + 1;
622 if (stringStart == flatStr.Length()) return stringStart;
624 // Go back to looking for the next escape or the string end
625 continue;
628 return stringEnd;
630 } while (true);
632 MOZ_ASSERT_UNREACHABLE("How did we get here?");
633 return flatStr.Length();
636 static uint32_t net_FindMediaDelimiter(const nsCString& flatStr,
637 uint32_t searchStart, char delimiter) {
638 do {
639 // searchStart points to the spot from which we should start looking
640 // for the delimiter.
641 const char delimStr[] = {delimiter, '"', '\0'};
642 uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
643 if (curDelimPos == uint32_t(kNotFound)) return flatStr.Length();
645 char ch = flatStr.CharAt(curDelimPos);
646 if (ch == delimiter) {
647 // Found delimiter
648 return curDelimPos;
651 // We hit the start of a quoted string. Look for its end.
652 searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
653 if (searchStart == flatStr.Length()) return searchStart;
655 ++searchStart;
657 // searchStart now points to the first char after the end of the
658 // string, so just go back to the top of the loop and look for
659 // |delimiter| again.
660 } while (true);
662 MOZ_ASSERT_UNREACHABLE("How did we get here?");
663 return flatStr.Length();
666 // aOffset should be added to aCharsetStart and aCharsetEnd if this
667 // function sets them.
668 static void net_ParseMediaType(const nsACString& aMediaTypeStr,
669 nsACString& aContentType,
670 nsACString& aContentCharset, int32_t aOffset,
671 bool* aHadCharset, int32_t* aCharsetStart,
672 int32_t* aCharsetEnd, bool aStrict) {
673 const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
674 const char* start = flatStr.get();
675 const char* end = start + flatStr.Length();
677 // Trim LWS leading and trailing whitespace from type.
678 const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
679 const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";");
681 const char* charset = "";
682 const char* charsetEnd = charset;
683 int32_t charsetParamStart = 0;
684 int32_t charsetParamEnd = 0;
686 uint32_t consumed = typeEnd - type;
688 // Iterate over parameters
689 bool typeHasCharset = false;
690 uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);
691 if (paramStart != uint32_t(kNotFound)) {
692 // We have parameters. Iterate over them.
693 uint32_t curParamStart = paramStart + 1;
694 do {
695 uint32_t curParamEnd =
696 net_FindMediaDelimiter(flatStr, curParamStart, ';');
698 const char* paramName = net_FindCharNotInSet(
699 start + curParamStart, start + curParamEnd, HTTP_LWS);
700 static const char charsetStr[] = "charset=";
701 if (nsCRT::strncasecmp(paramName, charsetStr, sizeof(charsetStr) - 1) ==
702 0) {
703 charset = paramName + sizeof(charsetStr) - 1;
704 charsetEnd = start + curParamEnd;
705 typeHasCharset = true;
706 charsetParamStart = curParamStart - 1;
707 charsetParamEnd = curParamEnd;
710 consumed = curParamEnd;
711 curParamStart = curParamEnd + 1;
712 } while (curParamStart < flatStr.Length());
715 bool charsetNeedsQuotedStringUnescaping = false;
716 if (typeHasCharset) {
717 // Trim LWS leading and trailing whitespace from charset.
718 charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
719 if (*charset == '"') {
720 charsetNeedsQuotedStringUnescaping = true;
721 charsetEnd =
722 start + net_FindStringEnd(flatStr, charset - start, *charset);
723 charset++;
724 NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
725 } else {
726 charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";");
730 // if the server sent "*/*", it is meaningless, so do not store it.
731 // also, if type is the same as aContentType, then just update the
732 // charset. however, if charset is empty and aContentType hasn't
733 // changed, then don't wipe-out an existing aContentCharset. We
734 // also want to reject a mime-type if it does not include a slash.
735 // some servers give junk after the charset parameter, which may
736 // include a comma, so this check makes us a bit more tolerant.
738 if (type != typeEnd && memchr(type, '/', typeEnd - type) != nullptr &&
739 (aStrict ? (net_FindCharNotInSet(start + consumed, end, HTTP_LWS) == end)
740 : (strncmp(type, "*/*", typeEnd - type) != 0))) {
741 // Common case here is that aContentType is empty
742 bool eq = !aContentType.IsEmpty() &&
743 aContentType.Equals(Substring(type, typeEnd),
744 nsCaseInsensitiveCStringComparator);
745 if (!eq) {
746 aContentType.Assign(type, typeEnd - type);
747 ToLowerCase(aContentType);
750 if ((!eq && *aHadCharset) || typeHasCharset) {
751 *aHadCharset = true;
752 if (charsetNeedsQuotedStringUnescaping) {
753 // parameters using the "quoted-string" syntax need
754 // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
755 aContentCharset.Truncate();
756 for (const char* c = charset; c != charsetEnd; c++) {
757 if (*c == '\\' && c + 1 != charsetEnd) {
758 // eat escape
759 c++;
761 aContentCharset.Append(*c);
763 } else {
764 aContentCharset.Assign(charset, charsetEnd - charset);
766 if (typeHasCharset) {
767 *aCharsetStart = charsetParamStart + aOffset;
768 *aCharsetEnd = charsetParamEnd + aOffset;
771 // Only set a new charset position if this is a different type
772 // from the last one we had and it doesn't already have a
773 // charset param. If this is the same type, we probably want
774 // to leave the charset position on its first occurrence.
775 if (!eq && !typeHasCharset) {
776 int32_t charsetStart = int32_t(paramStart);
777 if (charsetStart == kNotFound) charsetStart = flatStr.Length();
779 *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
784 #undef HTTP_LWS
786 void net_ParseContentType(const nsACString& aHeaderStr,
787 nsACString& aContentType, nsACString& aContentCharset,
788 bool* aHadCharset) {
789 int32_t dummy1, dummy2;
790 net_ParseContentType(aHeaderStr, aContentType, aContentCharset, aHadCharset,
791 &dummy1, &dummy2);
794 void net_ParseContentType(const nsACString& aHeaderStr,
795 nsACString& aContentType, nsACString& aContentCharset,
796 bool* aHadCharset, int32_t* aCharsetStart,
797 int32_t* aCharsetEnd) {
799 // Augmented BNF (from RFC 2616 section 3.7):
801 // header-value = media-type *( LWS "," LWS media-type )
802 // media-type = type "/" subtype *( LWS ";" LWS parameter )
803 // type = token
804 // subtype = token
805 // parameter = attribute "=" value
806 // attribute = token
807 // value = token | quoted-string
810 // Examples:
812 // text/html
813 // text/html, text/html
814 // text/html,text/html; charset=ISO-8859-1
815 // text/html,text/html; charset="ISO-8859-1"
816 // text/html;charset=ISO-8859-1, text/html
817 // text/html;charset='ISO-8859-1', text/html
818 // application/octet-stream
821 *aHadCharset = false;
822 const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
824 // iterate over media-types. Note that ',' characters can happen
825 // inside quoted strings, so we need to watch out for that.
826 uint32_t curTypeStart = 0;
827 do {
828 // curTypeStart points to the start of the current media-type. We want
829 // to look for its end.
830 uint32_t curTypeEnd = net_FindMediaDelimiter(flatStr, curTypeStart, ',');
832 // At this point curTypeEnd points to the spot where the media-type
833 // starting at curTypeEnd ends. Time to parse that!
834 net_ParseMediaType(
835 Substring(flatStr, curTypeStart, curTypeEnd - curTypeStart),
836 aContentType, aContentCharset, curTypeStart, aHadCharset, aCharsetStart,
837 aCharsetEnd, false);
839 // And let's move on to the next media-type
840 curTypeStart = curTypeEnd + 1;
841 } while (curTypeStart < flatStr.Length());
844 void net_ParseRequestContentType(const nsACString& aHeaderStr,
845 nsACString& aContentType,
846 nsACString& aContentCharset,
847 bool* aHadCharset) {
849 // Augmented BNF (from RFC 7231 section 3.1.1.1):
851 // media-type = type "/" subtype *( OWS ";" OWS parameter )
852 // type = token
853 // subtype = token
854 // parameter = token "=" ( token / quoted-string )
856 // Examples:
858 // text/html
859 // text/html; charset=ISO-8859-1
860 // text/html; charset="ISO-8859-1"
861 // application/octet-stream
864 aContentType.Truncate();
865 aContentCharset.Truncate();
866 *aHadCharset = false;
867 const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
869 // At this point curTypeEnd points to the spot where the media-type
870 // starting at curTypeEnd ends. Time to parse that!
871 nsAutoCString contentType, contentCharset;
872 bool hadCharset = false;
873 int32_t dummy1, dummy2;
874 uint32_t typeEnd = net_FindMediaDelimiter(flatStr, 0, ',');
875 if (typeEnd != flatStr.Length()) {
876 // We have some stuff left at the end, so this is not a valid
877 // request Content-Type header.
878 return;
880 net_ParseMediaType(flatStr, contentType, contentCharset, 0, &hadCharset,
881 &dummy1, &dummy2, true);
883 aContentType = contentType;
884 aContentCharset = contentCharset;
885 *aHadCharset = hadCharset;
888 bool net_IsValidHostName(const nsACString& host) {
889 // The host name is limited to 253 ascii characters.
890 if (host.Length() > 253) {
891 return false;
894 const char* end = host.EndReading();
895 // Use explicit whitelists to select which characters we are
896 // willing to send to lower-level DNS logic. This is more
897 // self-documenting, and can also be slightly faster than the
898 // blacklist approach, since DNS names are the common case, and
899 // the commonest characters will tend to be near the start of
900 // the list.
902 // Whitelist for DNS names (RFC 1035) with extra characters added
903 // for pragmatic reasons "$+_"
904 // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
905 if (net_FindCharNotInSet(host.BeginReading(), end,
906 "abcdefghijklmnopqrstuvwxyz"
907 ".-0123456789"
908 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end) {
909 return true;
912 // Might be a valid IPv6 link-local address containing a percent sign
913 return mozilla::net::HostIsIPLiteral(host);
916 bool net_IsValidIPv4Addr(const nsACString& aAddr) {
917 return mozilla::net::rust_net_is_valid_ipv4_addr(&aAddr);
920 bool net_IsValidIPv6Addr(const nsACString& aAddr) {
921 return mozilla::net::rust_net_is_valid_ipv6_addr(&aAddr);
924 bool net_GetDefaultStatusTextForCode(uint16_t aCode, nsACString& aOutText) {
925 switch (aCode) {
926 // start with the most common
927 case 200:
928 aOutText.AssignLiteral("OK");
929 break;
930 case 404:
931 aOutText.AssignLiteral("Not Found");
932 break;
933 case 301:
934 aOutText.AssignLiteral("Moved Permanently");
935 break;
936 case 304:
937 aOutText.AssignLiteral("Not Modified");
938 break;
939 case 307:
940 aOutText.AssignLiteral("Temporary Redirect");
941 break;
942 case 500:
943 aOutText.AssignLiteral("Internal Server Error");
944 break;
946 // also well known
947 case 100:
948 aOutText.AssignLiteral("Continue");
949 break;
950 case 101:
951 aOutText.AssignLiteral("Switching Protocols");
952 break;
953 case 201:
954 aOutText.AssignLiteral("Created");
955 break;
956 case 202:
957 aOutText.AssignLiteral("Accepted");
958 break;
959 case 203:
960 aOutText.AssignLiteral("Non Authoritative");
961 break;
962 case 204:
963 aOutText.AssignLiteral("No Content");
964 break;
965 case 205:
966 aOutText.AssignLiteral("Reset Content");
967 break;
968 case 206:
969 aOutText.AssignLiteral("Partial Content");
970 break;
971 case 207:
972 aOutText.AssignLiteral("Multi-Status");
973 break;
974 case 208:
975 aOutText.AssignLiteral("Already Reported");
976 break;
977 case 300:
978 aOutText.AssignLiteral("Multiple Choices");
979 break;
980 case 302:
981 aOutText.AssignLiteral("Found");
982 break;
983 case 303:
984 aOutText.AssignLiteral("See Other");
985 break;
986 case 305:
987 aOutText.AssignLiteral("Use Proxy");
988 break;
989 case 308:
990 aOutText.AssignLiteral("Permanent Redirect");
991 break;
992 case 400:
993 aOutText.AssignLiteral("Bad Request");
994 break;
995 case 401:
996 aOutText.AssignLiteral("Unauthorized");
997 break;
998 case 402:
999 aOutText.AssignLiteral("Payment Required");
1000 break;
1001 case 403:
1002 aOutText.AssignLiteral("Forbidden");
1003 break;
1004 case 405:
1005 aOutText.AssignLiteral("Method Not Allowed");
1006 break;
1007 case 406:
1008 aOutText.AssignLiteral("Not Acceptable");
1009 break;
1010 case 407:
1011 aOutText.AssignLiteral("Proxy Authentication Required");
1012 break;
1013 case 408:
1014 aOutText.AssignLiteral("Request Timeout");
1015 break;
1016 case 409:
1017 aOutText.AssignLiteral("Conflict");
1018 break;
1019 case 410:
1020 aOutText.AssignLiteral("Gone");
1021 break;
1022 case 411:
1023 aOutText.AssignLiteral("Length Required");
1024 break;
1025 case 412:
1026 aOutText.AssignLiteral("Precondition Failed");
1027 break;
1028 case 413:
1029 aOutText.AssignLiteral("Request Entity Too Large");
1030 break;
1031 case 414:
1032 aOutText.AssignLiteral("Request URI Too Long");
1033 break;
1034 case 415:
1035 aOutText.AssignLiteral("Unsupported Media Type");
1036 break;
1037 case 416:
1038 aOutText.AssignLiteral("Requested Range Not Satisfiable");
1039 break;
1040 case 417:
1041 aOutText.AssignLiteral("Expectation Failed");
1042 break;
1043 case 418:
1044 aOutText.AssignLiteral("I'm a teapot");
1045 break;
1046 case 421:
1047 aOutText.AssignLiteral("Misdirected Request");
1048 break;
1049 case 422:
1050 aOutText.AssignLiteral("Unprocessable Entity");
1051 break;
1052 case 423:
1053 aOutText.AssignLiteral("Locked");
1054 break;
1055 case 424:
1056 aOutText.AssignLiteral("Failed Dependency");
1057 break;
1058 case 425:
1059 aOutText.AssignLiteral("Too Early");
1060 break;
1061 case 426:
1062 aOutText.AssignLiteral("Upgrade Required");
1063 break;
1064 case 428:
1065 aOutText.AssignLiteral("Precondition Required");
1066 break;
1067 case 429:
1068 aOutText.AssignLiteral("Too Many Requests");
1069 break;
1070 case 431:
1071 aOutText.AssignLiteral("Request Header Fields Too Large");
1072 break;
1073 case 451:
1074 aOutText.AssignLiteral("Unavailable For Legal Reasons");
1075 break;
1076 case 501:
1077 aOutText.AssignLiteral("Not Implemented");
1078 break;
1079 case 502:
1080 aOutText.AssignLiteral("Bad Gateway");
1081 break;
1082 case 503:
1083 aOutText.AssignLiteral("Service Unavailable");
1084 break;
1085 case 504:
1086 aOutText.AssignLiteral("Gateway Timeout");
1087 break;
1088 case 505:
1089 aOutText.AssignLiteral("HTTP Version Unsupported");
1090 break;
1091 case 506:
1092 aOutText.AssignLiteral("Variant Also Negotiates");
1093 break;
1094 case 507:
1095 aOutText.AssignLiteral("Insufficient Storage ");
1096 break;
1097 case 508:
1098 aOutText.AssignLiteral("Loop Detected");
1099 break;
1100 case 510:
1101 aOutText.AssignLiteral("Not Extended");
1102 break;
1103 case 511:
1104 aOutText.AssignLiteral("Network Authentication Required");
1105 break;
1106 default:
1107 aOutText.AssignLiteral("No Reason Phrase");
1108 return false;
1110 return true;
1113 static auto MakeNameMatcher(const nsACString& aName) {
1114 return [&aName](const auto& param) { return param.mKey.Equals(aName); };
1117 static void AssignMaybeInvalidUTF8String(const nsACString& aSource,
1118 nsACString& aDest) {
1119 if (NS_FAILED(UTF_8_ENCODING->DecodeWithoutBOMHandling(aSource, aDest))) {
1120 MOZ_CRASH("Out of memory when converting URL params.");
1124 namespace mozilla {
1126 bool URLParams::Has(const nsACString& aName) {
1127 return std::any_of(mParams.cbegin(), mParams.cend(), MakeNameMatcher(aName));
1130 bool URLParams::Has(const nsACString& aName, const nsACString& aValue) {
1131 return std::any_of(
1132 mParams.cbegin(), mParams.cend(), [&aName, &aValue](const auto& param) {
1133 return param.mKey.Equals(aName) && param.mValue.Equals(aValue);
1137 void URLParams::Get(const nsACString& aName, nsACString& aRetval) {
1138 aRetval.SetIsVoid(true);
1140 const auto end = mParams.cend();
1141 const auto it = std::find_if(mParams.cbegin(), end, MakeNameMatcher(aName));
1142 if (it != end) {
1143 aRetval.Assign(it->mValue);
1147 void URLParams::GetAll(const nsACString& aName, nsTArray<nsCString>& aRetval) {
1148 aRetval.Clear();
1150 for (uint32_t i = 0, len = mParams.Length(); i < len; ++i) {
1151 if (mParams[i].mKey.Equals(aName)) {
1152 aRetval.AppendElement(mParams[i].mValue);
1157 void URLParams::Append(const nsACString& aName, const nsACString& aValue) {
1158 Param* param = mParams.AppendElement();
1159 param->mKey = aName;
1160 param->mValue = aValue;
1163 void URLParams::Set(const nsACString& aName, const nsACString& aValue) {
1164 Param* param = nullptr;
1165 for (uint32_t i = 0, len = mParams.Length(); i < len;) {
1166 if (!mParams[i].mKey.Equals(aName)) {
1167 ++i;
1168 continue;
1170 if (!param) {
1171 param = &mParams[i];
1172 ++i;
1173 continue;
1175 // Remove duplicates.
1176 mParams.RemoveElementAt(i);
1177 --len;
1180 if (!param) {
1181 param = mParams.AppendElement();
1182 param->mKey = aName;
1185 param->mValue = aValue;
1188 void URLParams::Delete(const nsACString& aName) {
1189 mParams.RemoveElementsBy(
1190 [&aName](const auto& param) { return param.mKey.Equals(aName); });
1193 void URLParams::Delete(const nsACString& aName, const nsACString& aValue) {
1194 mParams.RemoveElementsBy([&aName, &aValue](const auto& param) {
1195 return param.mKey.Equals(aName) && param.mValue.Equals(aValue);
1199 /* static */
1200 void URLParams::DecodeString(const nsACString& aInput, nsACString& aOutput) {
1201 const char* const end = aInput.EndReading();
1202 for (const char* iter = aInput.BeginReading(); iter != end;) {
1203 // replace '+' with U+0020
1204 if (*iter == '+') {
1205 aOutput.Append(' ');
1206 ++iter;
1207 continue;
1210 // Percent decode algorithm
1211 if (*iter == '%') {
1212 const char* const first = iter + 1;
1213 const char* const second = first + 1;
1215 const auto asciiHexDigit = [](char x) {
1216 return (x >= 0x41 && x <= 0x46) || (x >= 0x61 && x <= 0x66) ||
1217 (x >= 0x30 && x <= 0x39);
1220 const auto hexDigit = [](char x) {
1221 return x >= 0x30 && x <= 0x39
1222 ? x - 0x30
1223 : (x >= 0x41 && x <= 0x46 ? x - 0x37 : x - 0x57);
1226 if (first != end && second != end && asciiHexDigit(*first) &&
1227 asciiHexDigit(*second)) {
1228 aOutput.Append(hexDigit(*first) * 16 + hexDigit(*second));
1229 iter = second + 1;
1230 } else {
1231 aOutput.Append('%');
1232 ++iter;
1235 continue;
1238 aOutput.Append(*iter);
1239 ++iter;
1241 AssignMaybeInvalidUTF8String(aOutput, aOutput);
1244 /* static */
1245 bool URLParams::ParseNextInternal(const char*& aStart, const char* const aEnd,
1246 bool aShouldDecode, nsACString* aOutputName,
1247 nsACString* aOutputValue) {
1248 nsDependentCSubstring string;
1250 const char* const iter = std::find(aStart, aEnd, '&');
1251 if (iter != aEnd) {
1252 string.Rebind(aStart, iter);
1253 aStart = iter + 1;
1254 } else {
1255 string.Rebind(aStart, aEnd);
1256 aStart = aEnd;
1259 if (string.IsEmpty()) {
1260 return false;
1263 const auto* const eqStart = string.BeginReading();
1264 const auto* const eqEnd = string.EndReading();
1265 const auto* const eqIter = std::find(eqStart, eqEnd, '=');
1267 nsDependentCSubstring name;
1268 nsDependentCSubstring value;
1270 if (eqIter != eqEnd) {
1271 name.Rebind(eqStart, eqIter);
1272 value.Rebind(eqIter + 1, eqEnd);
1273 } else {
1274 name.Rebind(string, 0);
1277 if (aShouldDecode) {
1278 DecodeString(name, *aOutputName);
1279 DecodeString(value, *aOutputValue);
1280 return true;
1283 AssignMaybeInvalidUTF8String(name, *aOutputName);
1284 AssignMaybeInvalidUTF8String(value, *aOutputValue);
1285 return true;
1288 /* static */
1289 bool URLParams::Extract(const nsACString& aInput, const nsACString& aName,
1290 nsACString& aValue) {
1291 aValue.SetIsVoid(true);
1292 return !URLParams::Parse(
1293 aInput, true,
1294 [&aName, &aValue](const nsACString& name, nsCString&& value) {
1295 if (aName == name) {
1296 aValue = std::move(value);
1297 return false;
1299 return true;
1303 void URLParams::ParseInput(const nsACString& aInput) {
1304 // Remove all the existing data before parsing a new input.
1305 DeleteAll();
1307 URLParams::Parse(aInput, true, [this](nsCString&& name, nsCString&& value) {
1308 mParams.AppendElement(Param{std::move(name), std::move(value)});
1309 return true;
1313 void URLParams::SerializeString(const nsACString& aInput, nsACString& aValue) {
1314 const unsigned char* p = (const unsigned char*)aInput.BeginReading();
1315 const unsigned char* end = p + aInput.Length();
1317 while (p != end) {
1318 // ' ' to '+'
1319 if (*p == 0x20) {
1320 aValue.Append(0x2B);
1321 // Percent Encode algorithm
1322 } else if (*p == 0x2A || *p == 0x2D || *p == 0x2E ||
1323 (*p >= 0x30 && *p <= 0x39) || (*p >= 0x41 && *p <= 0x5A) ||
1324 *p == 0x5F || (*p >= 0x61 && *p <= 0x7A)) {
1325 aValue.Append(*p);
1326 } else {
1327 aValue.AppendPrintf("%%%.2X", *p);
1330 ++p;
1334 void URLParams::Serialize(nsACString& aValue, bool aEncode) const {
1335 aValue.Truncate();
1336 bool first = true;
1338 for (uint32_t i = 0, len = mParams.Length(); i < len; ++i) {
1339 if (first) {
1340 first = false;
1341 } else {
1342 aValue.Append('&');
1345 // XXX Actually, it's not necessary to build a new string object. Generally,
1346 // such cases could just convert each codepoint one-by-one.
1347 if (aEncode) {
1348 SerializeString(mParams[i].mKey, aValue);
1349 aValue.Append('=');
1350 SerializeString(mParams[i].mValue, aValue);
1351 } else {
1352 aValue.Append(mParams[i].mKey);
1353 aValue.Append('=');
1354 aValue.Append(mParams[i].mValue);
1359 void URLParams::Sort() {
1360 mParams.StableSort([](const Param& lhs, const Param& rhs) {
1361 // FIXME(emilio, bug 1888901): The URLSearchParams.sort() spec requires
1362 // comparing by utf-16 code points... That's a bit unfortunate, maybe we
1363 // can optimize the string conversions here?
1364 return Compare(NS_ConvertUTF8toUTF16(lhs.mKey),
1365 NS_ConvertUTF8toUTF16(rhs.mKey));
1369 } // namespace mozilla