1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=4 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsURLHelper.h"
9 #include "mozilla/Encoding.h"
10 #include "mozilla/RangedPtr.h"
11 #include "mozilla/TextUtils.h"
16 #include "nsASCIIMask.h"
18 #include "nsIURLParser.h"
22 #include "mozilla/Preferences.h"
24 #include "mozilla/StaticPrefs_network.h"
25 #include "mozilla/Tokenizer.h"
27 #include "nsDOMString.h"
28 #include "mozilla/net/rust_helper.h"
29 #include "mozilla/net/DNS.h"
31 using namespace mozilla
;
33 //----------------------------------------------------------------------------
35 //----------------------------------------------------------------------------
37 static bool gInitialized
= false;
38 static StaticRefPtr
<nsIURLParser
> gNoAuthURLParser
;
39 static StaticRefPtr
<nsIURLParser
> gAuthURLParser
;
40 static StaticRefPtr
<nsIURLParser
> gStdURLParser
;
42 static void InitGlobals() {
43 nsCOMPtr
<nsIURLParser
> parser
;
45 parser
= do_GetService(NS_NOAUTHURLPARSER_CONTRACTID
);
46 NS_ASSERTION(parser
, "failed getting 'noauth' url parser");
48 gNoAuthURLParser
= parser
;
51 parser
= do_GetService(NS_AUTHURLPARSER_CONTRACTID
);
52 NS_ASSERTION(parser
, "failed getting 'auth' url parser");
54 gAuthURLParser
= parser
;
57 parser
= do_GetService(NS_STDURLPARSER_CONTRACTID
);
58 NS_ASSERTION(parser
, "failed getting 'std' url parser");
60 gStdURLParser
= parser
;
66 void net_ShutdownURLHelper() {
70 gNoAuthURLParser
= nullptr;
71 gAuthURLParser
= nullptr;
72 gStdURLParser
= nullptr;
75 //----------------------------------------------------------------------------
76 // nsIURLParser getters
77 //----------------------------------------------------------------------------
79 nsIURLParser
* net_GetAuthURLParser() {
80 if (!gInitialized
) InitGlobals();
81 return gAuthURLParser
;
84 nsIURLParser
* net_GetNoAuthURLParser() {
85 if (!gInitialized
) InitGlobals();
86 return gNoAuthURLParser
;
89 nsIURLParser
* net_GetStdURLParser() {
90 if (!gInitialized
) InitGlobals();
94 //---------------------------------------------------------------------------
95 // GetFileFromURLSpec implementations
96 //---------------------------------------------------------------------------
97 nsresult
net_GetURLSpecFromDir(nsIFile
* aFile
, nsACString
& result
) {
98 nsAutoCString escPath
;
99 nsresult rv
= net_GetURLSpecFromActualFile(aFile
, escPath
);
100 if (NS_FAILED(rv
)) return rv
;
102 if (escPath
.Last() != '/') {
110 nsresult
net_GetURLSpecFromFile(nsIFile
* aFile
, nsACString
& result
) {
111 nsAutoCString escPath
;
112 nsresult rv
= net_GetURLSpecFromActualFile(aFile
, escPath
);
113 if (NS_FAILED(rv
)) return rv
;
115 // if this file references a directory, then we need to ensure that the
116 // URL ends with a slash. this is important since it affects the rules
117 // for relative URL resolution when this URL is used as a base URL.
118 // if the file does not exist, then we make no assumption about its type,
119 // and simply leave the URL unmodified.
120 if (escPath
.Last() != '/') {
122 rv
= aFile
->IsDirectory(&dir
);
123 if (NS_SUCCEEDED(rv
) && dir
) escPath
+= '/';
130 //----------------------------------------------------------------------------
131 // file:// URL parsing
132 //----------------------------------------------------------------------------
134 nsresult
net_ParseFileURL(const nsACString
& inURL
, nsACString
& outDirectory
,
135 nsACString
& outFileBaseName
,
136 nsACString
& outFileExtension
) {
140 (uint32_t)StaticPrefs::network_standard_url_max_length()) {
141 return NS_ERROR_MALFORMED_URI
;
144 outDirectory
.Truncate();
145 outFileBaseName
.Truncate();
146 outFileExtension
.Truncate();
148 const nsPromiseFlatCString
& flatURL
= PromiseFlatCString(inURL
);
149 const char* url
= flatURL
.get();
151 nsAutoCString scheme
;
152 rv
= net_ExtractURLScheme(flatURL
, scheme
);
153 if (NS_FAILED(rv
)) return rv
;
155 if (!scheme
.EqualsLiteral("file")) {
156 NS_ERROR("must be a file:// url");
157 return NS_ERROR_UNEXPECTED
;
160 nsIURLParser
* parser
= net_GetNoAuthURLParser();
161 NS_ENSURE_TRUE(parser
, NS_ERROR_UNEXPECTED
);
163 uint32_t pathPos
, filepathPos
, directoryPos
, basenamePos
, extensionPos
;
164 int32_t pathLen
, filepathLen
, directoryLen
, basenameLen
, extensionLen
;
166 // invoke the parser to extract the URL path
167 rv
= parser
->ParseURL(url
, flatURL
.Length(), nullptr,
168 nullptr, // don't care about scheme
169 nullptr, nullptr, // don't care about authority
171 if (NS_FAILED(rv
)) return rv
;
173 // invoke the parser to extract filepath from the path
174 rv
= parser
->ParsePath(url
+ pathPos
, pathLen
, &filepathPos
, &filepathLen
,
175 nullptr, nullptr, // don't care about query
176 nullptr, nullptr); // don't care about ref
177 if (NS_FAILED(rv
)) return rv
;
179 filepathPos
+= pathPos
;
181 // invoke the parser to extract the directory and filename from filepath
182 rv
= parser
->ParseFilePath(url
+ filepathPos
, filepathLen
, &directoryPos
,
183 &directoryLen
, &basenamePos
, &basenameLen
,
184 &extensionPos
, &extensionLen
);
185 if (NS_FAILED(rv
)) return rv
;
187 if (directoryLen
> 0) {
188 outDirectory
= Substring(inURL
, filepathPos
+ directoryPos
, directoryLen
);
190 if (basenameLen
> 0) {
191 outFileBaseName
= Substring(inURL
, filepathPos
+ basenamePos
, basenameLen
);
193 if (extensionLen
> 0) {
195 Substring(inURL
, filepathPos
+ extensionPos
, extensionLen
);
197 // since we are using a no-auth url parser, there will never be a host
198 // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
203 //----------------------------------------------------------------------------
204 // path manipulation functions
205 //----------------------------------------------------------------------------
207 // Replace all /./ with a / while resolving URLs
209 void net_CoalesceDirs(netCoalesceFlags flags
, char* path
) {
210 /* Stolen from the old netlib's mkparse.c.
212 * modifies a url of the form /foo/../foo1 -> /foo1
213 * and /foo/./foo1 -> /foo/foo1
214 * and /foo/foo1/.. -> /foo/
218 char* lastslash
= path
;
219 uint32_t traversal
= 0;
220 uint32_t special_ftp_len
= 0;
222 MOZ_ASSERT(*path
== '/', "We expect the path to begin with /");
227 /* Remember if this url is a special ftp one: */
228 if (flags
& NET_COALESCE_DOUBLE_SLASH_IS_ROOT
) {
229 /* some schemes (for example ftp) have the speciality that
230 the path can begin // or /%2F to mark the root of the
231 servers filesystem, a simple / only marks the root relative
232 to the user loging in. We remember the length of the marker */
233 if (nsCRT::strncasecmp(path
, "/%2F", 4) == 0) {
235 } else if (strncmp(path
, "//", 2) == 0) {
240 /* find the last slash before # or ? */
241 for (; (*fwdPtr
!= '\0') && (*fwdPtr
!= '?') && (*fwdPtr
!= '#'); ++fwdPtr
) {
244 /* found nothing, but go back one only */
245 /* if there is something to go back to */
246 if (fwdPtr
!= path
&& *fwdPtr
== '\0') {
250 /* search the slash */
251 for (; (fwdPtr
!= path
) && (*fwdPtr
!= '/'); --fwdPtr
) {
256 /* replace all %2E or %2e with . in the path */
257 /* but stop at lastslash if non null */
258 for (; (*fwdPtr
!= '\0') && (*fwdPtr
!= '?') && (*fwdPtr
!= '#') &&
259 (*lastslash
== '\0' || fwdPtr
!= lastslash
);
261 if (*fwdPtr
== '%' && *(fwdPtr
+ 1) == '2' &&
262 (*(fwdPtr
+ 2) == 'E' || *(fwdPtr
+ 2) == 'e')) {
270 // Copy remaining stuff past the #?;
271 for (; *fwdPtr
!= '\0'; ++fwdPtr
) {
274 *urlPtr
= '\0'; // terminate the url
276 // start again, this time for real
280 for (; (*fwdPtr
!= '\0') && (*fwdPtr
!= '?') && (*fwdPtr
!= '#'); ++fwdPtr
) {
281 if (*fwdPtr
== '/' && *(fwdPtr
+ 1) == '.' && *(fwdPtr
+ 2) == '/') {
282 // remove . followed by slash
284 } else if (*fwdPtr
== '/' && *(fwdPtr
+ 1) == '.' && *(fwdPtr
+ 2) == '.' &&
285 (*(fwdPtr
+ 3) == '/' ||
286 *(fwdPtr
+ 3) == '\0' || // This will take care of
287 *(fwdPtr
+ 3) == '?' || // something like foo/bar/..#sometag
288 *(fwdPtr
+ 3) == '#')) {
290 // reverse the urlPtr to the previous slash if possible
291 // if url does not allow relative root then drop .. above root
292 // otherwise retain them in the path
293 if (traversal
> 0 || !(flags
& NET_COALESCE_ALLOW_RELATIVE_ROOT
)) {
294 if (urlPtr
!= path
) urlPtr
--; // we must be going back at least by one
295 for (; *urlPtr
!= '/' && urlPtr
!= path
; urlPtr
--) {
298 --traversal
; // count back
299 // forward the fwdPtr past the ../
301 // if we have reached the beginning of the path
302 // while searching for the previous / and we remember
303 // that it is an url that begins with /%2F then
304 // advance urlPtr again by 3 chars because /%2F already
305 // marks the root of the path
306 if (urlPtr
== path
&& special_ftp_len
> 3) {
311 // special case if we have reached the end
312 // to preserve the last /
313 if (*fwdPtr
== '.' && *(fwdPtr
+ 1) == '\0') ++urlPtr
;
315 // there are to much /.. in this path, just copy them instead.
316 // forward the urlPtr past the /.. and copying it
318 // However if we remember it is an url that starts with
319 // /%2F and urlPtr just points at the "F" of "/%2F" then do
320 // not overwrite it with the /, just copy .. and move forward
322 if (special_ftp_len
> 3 && urlPtr
== path
+ special_ftp_len
- 1) {
333 // count the hierachie, but only if we do not have reached
334 // the root of some special urls with a special root marker
335 if (*fwdPtr
== '/' && *(fwdPtr
+ 1) != '.' &&
336 (special_ftp_len
!= 2 || *(fwdPtr
+ 1) != '/')) {
339 // copy the url incrementaly
345 * Now lets remove trailing . case
346 * /foo/foo1/. -> /foo/foo1/
349 if ((urlPtr
> (path
+ 1)) && (*(urlPtr
- 1) == '.') &&
350 (*(urlPtr
- 2) == '/')) {
354 // Before we start copying past ?#, we must make sure we don't overwrite
355 // the first / character. If fwdPtr is also unchanged, just copy everything
356 // (this shouldn't happen unless we could get in here without a leading
358 if (urlPtr
== path
&& fwdPtr
!= path
) {
362 // Copy remaining stuff past the #?;
363 for (; *fwdPtr
!= '\0'; ++fwdPtr
) {
366 *urlPtr
= '\0'; // terminate the url
369 //----------------------------------------------------------------------------
371 //----------------------------------------------------------------------------
373 static bool net_IsValidSchemeChar(const char aChar
) {
374 return mozilla::net::rust_net_is_valid_scheme_char(aChar
);
377 /* Extract URI-Scheme if possible */
378 nsresult
net_ExtractURLScheme(const nsACString
& inURI
, nsACString
& scheme
) {
379 nsACString::const_iterator start
, end
;
380 inURI
.BeginReading(start
);
381 inURI
.EndReading(end
);
383 // Strip C0 and space from begining
384 while (start
!= end
) {
385 if ((uint8_t)*start
> 0x20) {
391 Tokenizer
p(Substring(start
, end
), "\r\n\t");
393 if (!p
.CheckChar(IsAsciiAlpha
)) {
394 // First char must be alpha
395 return NS_ERROR_MALFORMED_URI
;
398 while (p
.CheckChar(net_IsValidSchemeChar
) || p
.CheckWhite()) {
399 // Skip valid scheme characters or \r\n\t
402 if (!p
.CheckChar(':')) {
403 return NS_ERROR_MALFORMED_URI
;
407 scheme
.StripTaggedASCII(ASCIIMask::MaskCRLFTab());
412 bool net_IsValidScheme(const nsACString
& scheme
) {
413 return mozilla::net::rust_net_is_valid_scheme(&scheme
);
416 bool net_IsAbsoluteURL(const nsACString
& uri
) {
417 nsACString::const_iterator start
, end
;
418 uri
.BeginReading(start
);
421 // Strip C0 and space from begining
422 while (start
!= end
) {
423 if ((uint8_t)*start
> 0x20) {
429 Tokenizer
p(Substring(start
, end
), "\r\n\t");
431 // First char must be alpha
432 if (!p
.CheckChar(IsAsciiAlpha
)) {
436 while (p
.CheckChar(net_IsValidSchemeChar
) || p
.CheckWhite()) {
437 // Skip valid scheme characters or \r\n\t
439 if (!p
.CheckChar(':')) {
444 if (!p
.CheckChar('/')) {
449 if (p
.CheckChar('/')) {
450 // aSpec is really absolute. Ignore aBaseURI in this case
456 void net_FilterURIString(const nsACString
& input
, nsACString
& result
) {
459 const auto* start
= input
.BeginReading();
460 const auto* end
= input
.EndReading();
462 // Trim off leading and trailing invalid chars.
463 auto charFilter
= [](char c
) { return static_cast<uint8_t>(c
) > 0x20; };
464 const auto* newStart
= std::find_if(start
, end
, charFilter
);
466 std::find_if(std::reverse_iterator
<decltype(end
)>(end
),
467 std::reverse_iterator
<decltype(newStart
)>(newStart
),
471 // Check if chars need to be stripped.
472 bool needsStrip
= false;
473 const ASCIIMaskArray
& mask
= ASCIIMask::MaskCRLFTab();
474 for (const auto* itr
= start
; itr
!= end
; ++itr
) {
475 if (ASCIIMask::IsMasked(mask
, *itr
)) {
481 // Just use the passed in string rather than creating new copies if no
482 // changes are necessary.
483 if (newStart
== start
&& newEnd
== end
&& !needsStrip
) {
488 result
.Assign(Substring(newStart
, newEnd
));
490 result
.StripTaggedASCII(mask
);
494 nsresult
net_FilterAndEscapeURI(const nsACString
& aInput
, uint32_t aFlags
,
495 const ASCIIMaskArray
& aFilterMask
,
496 nsACString
& aResult
) {
499 const auto* start
= aInput
.BeginReading();
500 const auto* end
= aInput
.EndReading();
502 // Trim off leading and trailing invalid chars.
503 auto charFilter
= [](char c
) { return static_cast<uint8_t>(c
) > 0x20; };
504 const auto* newStart
= std::find_if(start
, end
, charFilter
);
506 std::find_if(std::reverse_iterator
<decltype(end
)>(end
),
507 std::reverse_iterator
<decltype(newStart
)>(newStart
),
511 return NS_EscapeAndFilterURL(Substring(newStart
, newEnd
), aFlags
,
512 &aFilterMask
, aResult
, fallible
);
516 bool net_NormalizeFileURL(const nsACString
& aURL
, nsCString
& aResultBuf
) {
517 bool writing
= false;
519 nsACString::const_iterator beginIter
, endIter
;
520 aURL
.BeginReading(beginIter
);
521 aURL
.EndReading(endIter
);
523 const char *s
, *begin
= beginIter
.get();
525 for (s
= begin
; s
!= endIter
.get(); ++s
) {
528 if (s
> begin
) aResultBuf
.Append(begin
, s
- begin
);
533 // Don't normalize any backslashes following the hash.
538 if (writing
&& s
> begin
) aResultBuf
.Append(begin
, s
- begin
);
544 //----------------------------------------------------------------------------
545 // miscellaneous (i.e., stuff that should really be elsewhere)
546 //----------------------------------------------------------------------------
548 static inline void ToLower(char& c
) {
549 if ((unsigned)(c
- 'A') <= (unsigned)('Z' - 'A')) c
+= 'a' - 'A';
552 void net_ToLowerCase(char* str
, uint32_t length
) {
553 for (char* end
= str
+ length
; str
< end
; ++str
) ToLower(*str
);
556 void net_ToLowerCase(char* str
) {
557 for (; *str
; ++str
) ToLower(*str
);
560 char* net_FindCharInSet(const char* iter
, const char* stop
, const char* set
) {
561 for (; iter
!= stop
&& *iter
; ++iter
) {
562 for (const char* s
= set
; *s
; ++s
) {
563 if (*iter
== *s
) return (char*)iter
;
569 char* net_FindCharNotInSet(const char* iter
, const char* stop
,
572 for (const char* s
= set
; *s
; ++s
) {
574 if (++iter
== stop
) break;
581 char* net_RFindCharNotInSet(const char* stop
, const char* iter
,
586 if (iter
== stop
) return (char*)iter
;
589 for (const char* s
= set
; *s
; ++s
) {
591 if (--iter
== stop
) break;
598 #define HTTP_LWS " \t"
600 // Return the index of the closing quote of the string, if any
601 static uint32_t net_FindStringEnd(const nsCString
& flatStr
,
602 uint32_t stringStart
, char stringDelim
) {
603 NS_ASSERTION(stringStart
< flatStr
.Length() &&
604 flatStr
.CharAt(stringStart
) == stringDelim
&&
605 (stringDelim
== '"' || stringDelim
== '\''),
606 "Invalid stringStart");
608 const char set
[] = {stringDelim
, '\\', '\0'};
610 // stringStart points to either the start quote or the last
611 // escaped char (the char following a '\\')
613 // Write to searchStart here, so that when we get back to the
614 // top of the loop right outside this one we search from the
616 uint32_t stringEnd
= flatStr
.FindCharInSet(set
, stringStart
+ 1);
617 if (stringEnd
== uint32_t(kNotFound
)) return flatStr
.Length();
619 if (flatStr
.CharAt(stringEnd
) == '\\') {
620 // Hit a backslash-escaped char. Need to skip over it.
621 stringStart
= stringEnd
+ 1;
622 if (stringStart
== flatStr
.Length()) return stringStart
;
624 // Go back to looking for the next escape or the string end
632 MOZ_ASSERT_UNREACHABLE("How did we get here?");
633 return flatStr
.Length();
636 static uint32_t net_FindMediaDelimiter(const nsCString
& flatStr
,
637 uint32_t searchStart
, char delimiter
) {
639 // searchStart points to the spot from which we should start looking
640 // for the delimiter.
641 const char delimStr
[] = {delimiter
, '"', '\0'};
642 uint32_t curDelimPos
= flatStr
.FindCharInSet(delimStr
, searchStart
);
643 if (curDelimPos
== uint32_t(kNotFound
)) return flatStr
.Length();
645 char ch
= flatStr
.CharAt(curDelimPos
);
646 if (ch
== delimiter
) {
651 // We hit the start of a quoted string. Look for its end.
652 searchStart
= net_FindStringEnd(flatStr
, curDelimPos
, ch
);
653 if (searchStart
== flatStr
.Length()) return searchStart
;
657 // searchStart now points to the first char after the end of the
658 // string, so just go back to the top of the loop and look for
659 // |delimiter| again.
662 MOZ_ASSERT_UNREACHABLE("How did we get here?");
663 return flatStr
.Length();
666 // aOffset should be added to aCharsetStart and aCharsetEnd if this
667 // function sets them.
668 static void net_ParseMediaType(const nsACString
& aMediaTypeStr
,
669 nsACString
& aContentType
,
670 nsACString
& aContentCharset
, int32_t aOffset
,
671 bool* aHadCharset
, int32_t* aCharsetStart
,
672 int32_t* aCharsetEnd
, bool aStrict
) {
673 const nsCString
& flatStr
= PromiseFlatCString(aMediaTypeStr
);
674 const char* start
= flatStr
.get();
675 const char* end
= start
+ flatStr
.Length();
677 // Trim LWS leading and trailing whitespace from type.
678 const char* type
= net_FindCharNotInSet(start
, end
, HTTP_LWS
);
679 const char* typeEnd
= net_FindCharInSet(type
, end
, HTTP_LWS
";");
681 const char* charset
= "";
682 const char* charsetEnd
= charset
;
683 int32_t charsetParamStart
= 0;
684 int32_t charsetParamEnd
= 0;
686 uint32_t consumed
= typeEnd
- type
;
688 // Iterate over parameters
689 bool typeHasCharset
= false;
690 uint32_t paramStart
= flatStr
.FindChar(';', typeEnd
- start
);
691 if (paramStart
!= uint32_t(kNotFound
)) {
692 // We have parameters. Iterate over them.
693 uint32_t curParamStart
= paramStart
+ 1;
695 uint32_t curParamEnd
=
696 net_FindMediaDelimiter(flatStr
, curParamStart
, ';');
698 const char* paramName
= net_FindCharNotInSet(
699 start
+ curParamStart
, start
+ curParamEnd
, HTTP_LWS
);
700 static const char charsetStr
[] = "charset=";
701 if (nsCRT::strncasecmp(paramName
, charsetStr
, sizeof(charsetStr
) - 1) ==
703 charset
= paramName
+ sizeof(charsetStr
) - 1;
704 charsetEnd
= start
+ curParamEnd
;
705 typeHasCharset
= true;
706 charsetParamStart
= curParamStart
- 1;
707 charsetParamEnd
= curParamEnd
;
710 consumed
= curParamEnd
;
711 curParamStart
= curParamEnd
+ 1;
712 } while (curParamStart
< flatStr
.Length());
715 bool charsetNeedsQuotedStringUnescaping
= false;
716 if (typeHasCharset
) {
717 // Trim LWS leading and trailing whitespace from charset.
718 charset
= net_FindCharNotInSet(charset
, charsetEnd
, HTTP_LWS
);
719 if (*charset
== '"') {
720 charsetNeedsQuotedStringUnescaping
= true;
722 start
+ net_FindStringEnd(flatStr
, charset
- start
, *charset
);
724 NS_ASSERTION(charsetEnd
>= charset
, "Bad charset parsing");
726 charsetEnd
= net_FindCharInSet(charset
, charsetEnd
, HTTP_LWS
";");
730 // if the server sent "*/*", it is meaningless, so do not store it.
731 // also, if type is the same as aContentType, then just update the
732 // charset. however, if charset is empty and aContentType hasn't
733 // changed, then don't wipe-out an existing aContentCharset. We
734 // also want to reject a mime-type if it does not include a slash.
735 // some servers give junk after the charset parameter, which may
736 // include a comma, so this check makes us a bit more tolerant.
738 if (type
!= typeEnd
&& memchr(type
, '/', typeEnd
- type
) != nullptr &&
739 (aStrict
? (net_FindCharNotInSet(start
+ consumed
, end
, HTTP_LWS
) == end
)
740 : (strncmp(type
, "*/*", typeEnd
- type
) != 0))) {
741 // Common case here is that aContentType is empty
742 bool eq
= !aContentType
.IsEmpty() &&
743 aContentType
.Equals(Substring(type
, typeEnd
),
744 nsCaseInsensitiveCStringComparator
);
746 aContentType
.Assign(type
, typeEnd
- type
);
747 ToLowerCase(aContentType
);
750 if ((!eq
&& *aHadCharset
) || typeHasCharset
) {
752 if (charsetNeedsQuotedStringUnescaping
) {
753 // parameters using the "quoted-string" syntax need
754 // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
755 aContentCharset
.Truncate();
756 for (const char* c
= charset
; c
!= charsetEnd
; c
++) {
757 if (*c
== '\\' && c
+ 1 != charsetEnd
) {
761 aContentCharset
.Append(*c
);
764 aContentCharset
.Assign(charset
, charsetEnd
- charset
);
766 if (typeHasCharset
) {
767 *aCharsetStart
= charsetParamStart
+ aOffset
;
768 *aCharsetEnd
= charsetParamEnd
+ aOffset
;
771 // Only set a new charset position if this is a different type
772 // from the last one we had and it doesn't already have a
773 // charset param. If this is the same type, we probably want
774 // to leave the charset position on its first occurrence.
775 if (!eq
&& !typeHasCharset
) {
776 int32_t charsetStart
= int32_t(paramStart
);
777 if (charsetStart
== kNotFound
) charsetStart
= flatStr
.Length();
779 *aCharsetEnd
= *aCharsetStart
= charsetStart
+ aOffset
;
786 void net_ParseContentType(const nsACString
& aHeaderStr
,
787 nsACString
& aContentType
, nsACString
& aContentCharset
,
789 int32_t dummy1
, dummy2
;
790 net_ParseContentType(aHeaderStr
, aContentType
, aContentCharset
, aHadCharset
,
794 void net_ParseContentType(const nsACString
& aHeaderStr
,
795 nsACString
& aContentType
, nsACString
& aContentCharset
,
796 bool* aHadCharset
, int32_t* aCharsetStart
,
797 int32_t* aCharsetEnd
) {
799 // Augmented BNF (from RFC 2616 section 3.7):
801 // header-value = media-type *( LWS "," LWS media-type )
802 // media-type = type "/" subtype *( LWS ";" LWS parameter )
805 // parameter = attribute "=" value
807 // value = token | quoted-string
813 // text/html, text/html
814 // text/html,text/html; charset=ISO-8859-1
815 // text/html,text/html; charset="ISO-8859-1"
816 // text/html;charset=ISO-8859-1, text/html
817 // text/html;charset='ISO-8859-1', text/html
818 // application/octet-stream
821 *aHadCharset
= false;
822 const nsCString
& flatStr
= PromiseFlatCString(aHeaderStr
);
824 // iterate over media-types. Note that ',' characters can happen
825 // inside quoted strings, so we need to watch out for that.
826 uint32_t curTypeStart
= 0;
828 // curTypeStart points to the start of the current media-type. We want
829 // to look for its end.
830 uint32_t curTypeEnd
= net_FindMediaDelimiter(flatStr
, curTypeStart
, ',');
832 // At this point curTypeEnd points to the spot where the media-type
833 // starting at curTypeEnd ends. Time to parse that!
835 Substring(flatStr
, curTypeStart
, curTypeEnd
- curTypeStart
),
836 aContentType
, aContentCharset
, curTypeStart
, aHadCharset
, aCharsetStart
,
839 // And let's move on to the next media-type
840 curTypeStart
= curTypeEnd
+ 1;
841 } while (curTypeStart
< flatStr
.Length());
844 void net_ParseRequestContentType(const nsACString
& aHeaderStr
,
845 nsACString
& aContentType
,
846 nsACString
& aContentCharset
,
849 // Augmented BNF (from RFC 7231 section 3.1.1.1):
851 // media-type = type "/" subtype *( OWS ";" OWS parameter )
854 // parameter = token "=" ( token / quoted-string )
859 // text/html; charset=ISO-8859-1
860 // text/html; charset="ISO-8859-1"
861 // application/octet-stream
864 aContentType
.Truncate();
865 aContentCharset
.Truncate();
866 *aHadCharset
= false;
867 const nsCString
& flatStr
= PromiseFlatCString(aHeaderStr
);
869 // At this point curTypeEnd points to the spot where the media-type
870 // starting at curTypeEnd ends. Time to parse that!
871 nsAutoCString contentType
, contentCharset
;
872 bool hadCharset
= false;
873 int32_t dummy1
, dummy2
;
874 uint32_t typeEnd
= net_FindMediaDelimiter(flatStr
, 0, ',');
875 if (typeEnd
!= flatStr
.Length()) {
876 // We have some stuff left at the end, so this is not a valid
877 // request Content-Type header.
880 net_ParseMediaType(flatStr
, contentType
, contentCharset
, 0, &hadCharset
,
881 &dummy1
, &dummy2
, true);
883 aContentType
= contentType
;
884 aContentCharset
= contentCharset
;
885 *aHadCharset
= hadCharset
;
888 bool net_IsValidHostName(const nsACString
& host
) {
889 // The host name is limited to 253 ascii characters.
890 if (host
.Length() > 253) {
894 const char* end
= host
.EndReading();
895 // Use explicit whitelists to select which characters we are
896 // willing to send to lower-level DNS logic. This is more
897 // self-documenting, and can also be slightly faster than the
898 // blacklist approach, since DNS names are the common case, and
899 // the commonest characters will tend to be near the start of
902 // Whitelist for DNS names (RFC 1035) with extra characters added
903 // for pragmatic reasons "$+_"
904 // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
905 if (net_FindCharNotInSet(host
.BeginReading(), end
,
906 "abcdefghijklmnopqrstuvwxyz"
908 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end
) {
912 // Might be a valid IPv6 link-local address containing a percent sign
913 return mozilla::net::HostIsIPLiteral(host
);
916 bool net_IsValidIPv4Addr(const nsACString
& aAddr
) {
917 return mozilla::net::rust_net_is_valid_ipv4_addr(&aAddr
);
920 bool net_IsValidIPv6Addr(const nsACString
& aAddr
) {
921 return mozilla::net::rust_net_is_valid_ipv6_addr(&aAddr
);
924 bool net_GetDefaultStatusTextForCode(uint16_t aCode
, nsACString
& aOutText
) {
926 // start with the most common
928 aOutText
.AssignLiteral("OK");
931 aOutText
.AssignLiteral("Not Found");
934 aOutText
.AssignLiteral("Moved Permanently");
937 aOutText
.AssignLiteral("Not Modified");
940 aOutText
.AssignLiteral("Temporary Redirect");
943 aOutText
.AssignLiteral("Internal Server Error");
948 aOutText
.AssignLiteral("Continue");
951 aOutText
.AssignLiteral("Switching Protocols");
954 aOutText
.AssignLiteral("Created");
957 aOutText
.AssignLiteral("Accepted");
960 aOutText
.AssignLiteral("Non Authoritative");
963 aOutText
.AssignLiteral("No Content");
966 aOutText
.AssignLiteral("Reset Content");
969 aOutText
.AssignLiteral("Partial Content");
972 aOutText
.AssignLiteral("Multi-Status");
975 aOutText
.AssignLiteral("Already Reported");
978 aOutText
.AssignLiteral("Multiple Choices");
981 aOutText
.AssignLiteral("Found");
984 aOutText
.AssignLiteral("See Other");
987 aOutText
.AssignLiteral("Use Proxy");
990 aOutText
.AssignLiteral("Permanent Redirect");
993 aOutText
.AssignLiteral("Bad Request");
996 aOutText
.AssignLiteral("Unauthorized");
999 aOutText
.AssignLiteral("Payment Required");
1002 aOutText
.AssignLiteral("Forbidden");
1005 aOutText
.AssignLiteral("Method Not Allowed");
1008 aOutText
.AssignLiteral("Not Acceptable");
1011 aOutText
.AssignLiteral("Proxy Authentication Required");
1014 aOutText
.AssignLiteral("Request Timeout");
1017 aOutText
.AssignLiteral("Conflict");
1020 aOutText
.AssignLiteral("Gone");
1023 aOutText
.AssignLiteral("Length Required");
1026 aOutText
.AssignLiteral("Precondition Failed");
1029 aOutText
.AssignLiteral("Request Entity Too Large");
1032 aOutText
.AssignLiteral("Request URI Too Long");
1035 aOutText
.AssignLiteral("Unsupported Media Type");
1038 aOutText
.AssignLiteral("Requested Range Not Satisfiable");
1041 aOutText
.AssignLiteral("Expectation Failed");
1044 aOutText
.AssignLiteral("I'm a teapot");
1047 aOutText
.AssignLiteral("Misdirected Request");
1050 aOutText
.AssignLiteral("Unprocessable Entity");
1053 aOutText
.AssignLiteral("Locked");
1056 aOutText
.AssignLiteral("Failed Dependency");
1059 aOutText
.AssignLiteral("Too Early");
1062 aOutText
.AssignLiteral("Upgrade Required");
1065 aOutText
.AssignLiteral("Precondition Required");
1068 aOutText
.AssignLiteral("Too Many Requests");
1071 aOutText
.AssignLiteral("Request Header Fields Too Large");
1074 aOutText
.AssignLiteral("Unavailable For Legal Reasons");
1077 aOutText
.AssignLiteral("Not Implemented");
1080 aOutText
.AssignLiteral("Bad Gateway");
1083 aOutText
.AssignLiteral("Service Unavailable");
1086 aOutText
.AssignLiteral("Gateway Timeout");
1089 aOutText
.AssignLiteral("HTTP Version Unsupported");
1092 aOutText
.AssignLiteral("Variant Also Negotiates");
1095 aOutText
.AssignLiteral("Insufficient Storage ");
1098 aOutText
.AssignLiteral("Loop Detected");
1101 aOutText
.AssignLiteral("Not Extended");
1104 aOutText
.AssignLiteral("Network Authentication Required");
1107 aOutText
.AssignLiteral("No Reason Phrase");
1113 static auto MakeNameMatcher(const nsACString
& aName
) {
1114 return [&aName
](const auto& param
) { return param
.mKey
.Equals(aName
); };
1117 static void AssignMaybeInvalidUTF8String(const nsACString
& aSource
,
1118 nsACString
& aDest
) {
1119 if (NS_FAILED(UTF_8_ENCODING
->DecodeWithoutBOMHandling(aSource
, aDest
))) {
1120 MOZ_CRASH("Out of memory when converting URL params.");
1126 bool URLParams::Has(const nsACString
& aName
) {
1127 return std::any_of(mParams
.cbegin(), mParams
.cend(), MakeNameMatcher(aName
));
1130 bool URLParams::Has(const nsACString
& aName
, const nsACString
& aValue
) {
1132 mParams
.cbegin(), mParams
.cend(), [&aName
, &aValue
](const auto& param
) {
1133 return param
.mKey
.Equals(aName
) && param
.mValue
.Equals(aValue
);
1137 void URLParams::Get(const nsACString
& aName
, nsACString
& aRetval
) {
1138 aRetval
.SetIsVoid(true);
1140 const auto end
= mParams
.cend();
1141 const auto it
= std::find_if(mParams
.cbegin(), end
, MakeNameMatcher(aName
));
1143 aRetval
.Assign(it
->mValue
);
1147 void URLParams::GetAll(const nsACString
& aName
, nsTArray
<nsCString
>& aRetval
) {
1150 for (uint32_t i
= 0, len
= mParams
.Length(); i
< len
; ++i
) {
1151 if (mParams
[i
].mKey
.Equals(aName
)) {
1152 aRetval
.AppendElement(mParams
[i
].mValue
);
1157 void URLParams::Append(const nsACString
& aName
, const nsACString
& aValue
) {
1158 Param
* param
= mParams
.AppendElement();
1159 param
->mKey
= aName
;
1160 param
->mValue
= aValue
;
1163 void URLParams::Set(const nsACString
& aName
, const nsACString
& aValue
) {
1164 Param
* param
= nullptr;
1165 for (uint32_t i
= 0, len
= mParams
.Length(); i
< len
;) {
1166 if (!mParams
[i
].mKey
.Equals(aName
)) {
1171 param
= &mParams
[i
];
1175 // Remove duplicates.
1176 mParams
.RemoveElementAt(i
);
1181 param
= mParams
.AppendElement();
1182 param
->mKey
= aName
;
1185 param
->mValue
= aValue
;
1188 void URLParams::Delete(const nsACString
& aName
) {
1189 mParams
.RemoveElementsBy(
1190 [&aName
](const auto& param
) { return param
.mKey
.Equals(aName
); });
1193 void URLParams::Delete(const nsACString
& aName
, const nsACString
& aValue
) {
1194 mParams
.RemoveElementsBy([&aName
, &aValue
](const auto& param
) {
1195 return param
.mKey
.Equals(aName
) && param
.mValue
.Equals(aValue
);
1200 void URLParams::DecodeString(const nsACString
& aInput
, nsACString
& aOutput
) {
1201 const char* const end
= aInput
.EndReading();
1202 for (const char* iter
= aInput
.BeginReading(); iter
!= end
;) {
1203 // replace '+' with U+0020
1205 aOutput
.Append(' ');
1210 // Percent decode algorithm
1212 const char* const first
= iter
+ 1;
1213 const char* const second
= first
+ 1;
1215 const auto asciiHexDigit
= [](char x
) {
1216 return (x
>= 0x41 && x
<= 0x46) || (x
>= 0x61 && x
<= 0x66) ||
1217 (x
>= 0x30 && x
<= 0x39);
1220 const auto hexDigit
= [](char x
) {
1221 return x
>= 0x30 && x
<= 0x39
1223 : (x
>= 0x41 && x
<= 0x46 ? x
- 0x37 : x
- 0x57);
1226 if (first
!= end
&& second
!= end
&& asciiHexDigit(*first
) &&
1227 asciiHexDigit(*second
)) {
1228 aOutput
.Append(hexDigit(*first
) * 16 + hexDigit(*second
));
1231 aOutput
.Append('%');
1238 aOutput
.Append(*iter
);
1241 AssignMaybeInvalidUTF8String(aOutput
, aOutput
);
1245 bool URLParams::ParseNextInternal(const char*& aStart
, const char* const aEnd
,
1246 bool aShouldDecode
, nsACString
* aOutputName
,
1247 nsACString
* aOutputValue
) {
1248 nsDependentCSubstring string
;
1250 const char* const iter
= std::find(aStart
, aEnd
, '&');
1252 string
.Rebind(aStart
, iter
);
1255 string
.Rebind(aStart
, aEnd
);
1259 if (string
.IsEmpty()) {
1263 const auto* const eqStart
= string
.BeginReading();
1264 const auto* const eqEnd
= string
.EndReading();
1265 const auto* const eqIter
= std::find(eqStart
, eqEnd
, '=');
1267 nsDependentCSubstring name
;
1268 nsDependentCSubstring value
;
1270 if (eqIter
!= eqEnd
) {
1271 name
.Rebind(eqStart
, eqIter
);
1272 value
.Rebind(eqIter
+ 1, eqEnd
);
1274 name
.Rebind(string
, 0);
1277 if (aShouldDecode
) {
1278 DecodeString(name
, *aOutputName
);
1279 DecodeString(value
, *aOutputValue
);
1283 AssignMaybeInvalidUTF8String(name
, *aOutputName
);
1284 AssignMaybeInvalidUTF8String(value
, *aOutputValue
);
1289 bool URLParams::Extract(const nsACString
& aInput
, const nsACString
& aName
,
1290 nsACString
& aValue
) {
1291 aValue
.SetIsVoid(true);
1292 return !URLParams::Parse(
1294 [&aName
, &aValue
](const nsACString
& name
, nsCString
&& value
) {
1295 if (aName
== name
) {
1296 aValue
= std::move(value
);
1303 void URLParams::ParseInput(const nsACString
& aInput
) {
1304 // Remove all the existing data before parsing a new input.
1307 URLParams::Parse(aInput
, true, [this](nsCString
&& name
, nsCString
&& value
) {
1308 mParams
.AppendElement(Param
{std::move(name
), std::move(value
)});
1313 void URLParams::SerializeString(const nsACString
& aInput
, nsACString
& aValue
) {
1314 const unsigned char* p
= (const unsigned char*)aInput
.BeginReading();
1315 const unsigned char* end
= p
+ aInput
.Length();
1320 aValue
.Append(0x2B);
1321 // Percent Encode algorithm
1322 } else if (*p
== 0x2A || *p
== 0x2D || *p
== 0x2E ||
1323 (*p
>= 0x30 && *p
<= 0x39) || (*p
>= 0x41 && *p
<= 0x5A) ||
1324 *p
== 0x5F || (*p
>= 0x61 && *p
<= 0x7A)) {
1327 aValue
.AppendPrintf("%%%.2X", *p
);
1334 void URLParams::Serialize(nsACString
& aValue
, bool aEncode
) const {
1338 for (uint32_t i
= 0, len
= mParams
.Length(); i
< len
; ++i
) {
1345 // XXX Actually, it's not necessary to build a new string object. Generally,
1346 // such cases could just convert each codepoint one-by-one.
1348 SerializeString(mParams
[i
].mKey
, aValue
);
1350 SerializeString(mParams
[i
].mValue
, aValue
);
1352 aValue
.Append(mParams
[i
].mKey
);
1354 aValue
.Append(mParams
[i
].mValue
);
1359 void URLParams::Sort() {
1360 mParams
.StableSort([](const Param
& lhs
, const Param
& rhs
) {
1361 // FIXME(emilio, bug 1888901): The URLSearchParams.sort() spec requires
1362 // comparing by utf-16 code points... That's a bit unfortunate, maybe we
1363 // can optimize the string conversions here?
1364 return Compare(NS_ConvertUTF8toUTF16(lhs
.mKey
),
1365 NS_ConvertUTF8toUTF16(rhs
.mKey
));
1369 } // namespace mozilla