1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
9 #include "mozilla/ArrayUtils.h"
10 #include "mozilla/BinarySearch.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/TextUtils.h"
15 #include "nsASCIIMask.h"
17 static const char hexCharsUpper
[] = "0123456789ABCDEF";
18 static const char hexCharsUpperLower
[] = "0123456789ABCDEFabcdef";
20 static const unsigned char netCharType
[256] =
22 /* Bit 0 xalpha -- the alphas
23 ** Bit 1 xpalpha -- as xalpha but
24 ** converts spaces to plus and plus to %2B
25 ** Bit 3 ... path -- as xalphas but doesn't escape '/'
26 ** Bit 4 ... NSURL-ref -- extra encoding for Apple NSURL compatibility.
27 ** This encoding set is used on encoded URL ref
28 ** components before converting a URL to an NSURL
29 ** so we don't include '%' to avoid double encoding.
31 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
32 { 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, /* 0x */
33 0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0, /* 1x */
34 /* ! " # $ % & ' ( ) * + , - . / */
35 0x0,0x8,0x0,0x0,0x8,0x8,0x8,0x8,0x8,0x8,0xf,0xc,0x8,0xf,0xf,0xc, /* 2x */
36 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */
37 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x8,0x8,0x0,0x8,0x0,0x8, /* 3x */
38 /* @ A B C D E F G H I J K L M N O */
39 0x8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, /* 4x */
40 /* bits for '@' changed from 7 to 0 so '@' can be escaped */
41 /* in usernames and passwords in publishing. */
42 /* P Q R S T U V W X Y Z [ \ ] ^ _ */
43 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x0,0x0,0x0,0x0,0xf, /* 5x */
44 /* ` a b c d e f g h i j k l m n o */
45 0x0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, /* 6x */
46 /* p q r s t u v w x y z { | } ~ DEL */
47 0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x0,0x0,0x0,0x8,0x0, /* 7x */
51 /* decode % escaped hex codes into character values
54 ((C >= '0' && C <= '9') ? C - '0' : \
55 ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
56 ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
59 #define IS_OK(C) (netCharType[((unsigned char)(C))] & (aFlags))
60 #define HEX_ESCAPE '%'
62 static const uint32_t ENCODE_MAX_LEN
= 6; // %uABCD
64 static uint32_t AppendPercentHex(char* aBuffer
, unsigned char aChar
) {
67 aBuffer
[i
++] = hexCharsUpper
[aChar
>> 4]; // high nibble
68 aBuffer
[i
++] = hexCharsUpper
[aChar
& 0xF]; // low nibble
72 static uint32_t AppendPercentHex(char16_t
* aBuffer
, char16_t aChar
) {
77 aBuffer
[i
++] = hexCharsUpper
[aChar
>> 12]; // high-byte high nibble
78 aBuffer
[i
++] = hexCharsUpper
[(aChar
>> 8) & 0xF]; // high-byte low nibble
80 aBuffer
[i
++] = hexCharsUpper
[(aChar
>> 4) & 0xF]; // low-byte high nibble
81 aBuffer
[i
++] = hexCharsUpper
[aChar
& 0xF]; // low-byte low nibble
85 //----------------------------------------------------------------------------------------
86 char* nsEscape(const char* aStr
, size_t aLength
, size_t* aOutputLength
,
88 //----------------------------------------------------------------------------------------
94 size_t charsToEscape
= 0;
96 const unsigned char* src
= (const unsigned char*)aStr
;
97 for (size_t i
= 0; i
< aLength
; ++i
) {
103 // calculate how much memory should be allocated
104 // original length + 2 bytes for each escaped character + terminating '\0'
105 // do the sum in steps to check for overflow
106 size_t dstSize
= aLength
+ 1 + charsToEscape
;
107 if (dstSize
<= aLength
) {
110 dstSize
+= charsToEscape
;
111 if (dstSize
< aLength
) {
115 // fail if we need more than 4GB
116 if (dstSize
> UINT32_MAX
) {
120 char* result
= (char*)moz_xmalloc(dstSize
);
122 unsigned char* dst
= (unsigned char*)result
;
123 if (aFlags
== url_XPAlphas
) {
124 for (size_t i
= 0; i
< aLength
; ++i
) {
125 unsigned char c
= *src
++;
128 } else if (c
== ' ') {
129 *dst
++ = '+'; /* convert spaces to pluses */
132 *dst
++ = hexCharsUpper
[c
>> 4]; /* high nibble */
133 *dst
++ = hexCharsUpper
[c
& 0x0f]; /* low nibble */
137 for (size_t i
= 0; i
< aLength
; ++i
) {
138 unsigned char c
= *src
++;
143 *dst
++ = hexCharsUpper
[c
>> 4]; /* high nibble */
144 *dst
++ = hexCharsUpper
[c
& 0x0f]; /* low nibble */
149 *dst
= '\0'; /* tack on eos */
151 *aOutputLength
= dst
- (unsigned char*)result
;
157 //----------------------------------------------------------------------------------------
158 char* nsUnescape(char* aStr
)
159 //----------------------------------------------------------------------------------------
161 nsUnescapeCount(aStr
);
165 //----------------------------------------------------------------------------------------
166 int32_t nsUnescapeCount(char* aStr
)
167 //----------------------------------------------------------------------------------------
174 char* const pc1
= c1
;
175 char* const pc2
= c2
;
178 // A null string was passed in. Nothing to escape.
179 // Returns early as the string might not actually be mutable with
186 if (*(src
+ 1) == '\0') {
192 if (*src
!= HEX_ESCAPE
|| strpbrk(pc1
, hexCharsUpperLower
) == nullptr ||
193 strpbrk(pc2
, hexCharsUpperLower
) == nullptr) {
196 src
++; /* walk over escape */
198 *dst
= UNHEX(*src
) << 4;
202 *dst
= (*dst
+ UNHEX(*src
));
210 return (int)(dst
- aStr
);
212 } /* NET_UnEscapeCnt */
214 void nsAppendEscapedHTML(const nsACString
& aSrc
, nsACString
& aDst
) {
215 // Preparation: aDst's length will increase by at least aSrc's length. If the
216 // addition overflows, we skip this, which is fine, and we'll likely abort
217 // while (infallibly) appending due to aDst becoming too large.
218 mozilla::CheckedInt
<nsACString::size_type
> newCapacity
= aDst
.Length();
219 newCapacity
+= aSrc
.Length();
220 if (newCapacity
.isValid()) {
221 aDst
.SetCapacity(newCapacity
.value());
224 for (auto cur
= aSrc
.BeginReading(); cur
!= aSrc
.EndReading(); cur
++) {
226 aDst
.AppendLiteral("<");
227 } else if (*cur
== '>') {
228 aDst
.AppendLiteral(">");
229 } else if (*cur
== '&') {
230 aDst
.AppendLiteral("&");
231 } else if (*cur
== '"') {
232 aDst
.AppendLiteral(""");
233 } else if (*cur
== '\'') {
234 aDst
.AppendLiteral("'");
241 //----------------------------------------------------------------------------------------
243 // The following table encodes which characters needs to be escaped for which
244 // parts of an URL. The bits are the "url components" in the enum EscapeMask,
248 static constexpr void AddUnescapedChars(const char (&aChars
)[N
],
250 std::array
<uint32_t, 256>& aTable
) {
251 for (size_t i
= 0; i
< N
- 1; ++i
) {
252 aTable
[static_cast<unsigned char>(aChars
[i
])] |= aFlags
;
256 static constexpr std::array
<uint32_t, 256> BuildEscapeChars() {
257 constexpr uint32_t kAllModes
= esc_Scheme
| esc_Username
| esc_Password
|
258 esc_Host
| esc_Directory
| esc_FileBaseName
|
259 esc_FileExtension
| esc_Param
| esc_Query
|
260 esc_Ref
| esc_ExtHandler
;
262 std::array
<uint32_t, 256> table
{0};
264 // Alphanumerics shouldn't be escaped in all escape modes.
265 AddUnescapedChars("0123456789", kAllModes
, table
);
266 AddUnescapedChars("ABCDEFGHIJKLMNOPQRSTUVWXYZ", kAllModes
, table
);
267 AddUnescapedChars("abcdefghijklmnopqrstuvwxyz", kAllModes
, table
);
268 AddUnescapedChars("!$&()*+,-_~", kAllModes
, table
);
270 // Extra characters which aren't escaped in particular escape modes.
271 AddUnescapedChars(".", esc_Scheme
, table
);
272 // esc_Username has no additional unescaped characters.
273 AddUnescapedChars("|", esc_Password
, table
);
274 AddUnescapedChars(".", esc_Host
, table
);
275 AddUnescapedChars("'./:;=@[]|", esc_Directory
, table
);
276 AddUnescapedChars("'.:;=@[]|", esc_FileBaseName
, table
);
277 AddUnescapedChars("':;=@[]|", esc_FileExtension
, table
);
278 AddUnescapedChars(".:;=@[\\]^`{|}", esc_Param
, table
);
279 AddUnescapedChars("./:;=?@[\\]^`{|}", esc_Query
, table
);
280 AddUnescapedChars("#'./:;=?@[\\]^{|}", esc_Ref
, table
);
281 AddUnescapedChars("#'./:;=?@[]", esc_ExtHandler
, table
);
286 static constexpr std::array
<uint32_t, 256> EscapeChars
= BuildEscapeChars();
288 static bool dontNeedEscape(unsigned char aChar
, uint32_t aFlags
) {
289 return EscapeChars
[(size_t)aChar
] & aFlags
;
291 static bool dontNeedEscape(uint16_t aChar
, uint32_t aFlags
) {
292 return aChar
< EscapeChars
.size() ? (EscapeChars
[(size_t)aChar
] & aFlags
)
296 // Temporary static assert to make sure that the rewrite to using
297 // `BuildEscapeChars` didn't change the final array in memory.
298 // It will be removed in Bug 1750945.
300 static_assert([]() constexpr {
301 constexpr uint32_t OldEscapeChars
[256] =
303 // 0 1 2 3 4 5 6 7 8 9 A B C D E F
305 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
306 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
307 0,132095, 0,131584,132095, 0,132095,131696,132095,132095,132095,132095,132095,132095,132025,131856, // 2x !"#$%&'()*+,-./
308 132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132080,132080, 0,132080, 0,131840, // 3x 0123456789:;<=>?
309 132080,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095, // 4x @ABCDEFGHIJKLMNO
310 132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132080, 896,132080, 896,132095, // 5x PQRSTUVWXYZ[\]^_
311 384,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095, // 6x `abcdefghijklmno
312 132095,132095,132095,132095,132095,132095,132095,132095,132095,132095,132095, 896, 1012, 896,132095, 0, // 7x pqrstuvwxyz{|}~ DEL
313 0 // 80 to FF are zero
317 for (size_t i
= 0; i
< EscapeChars
.size(); ++i
) {
318 if (OldEscapeChars
[i
] != EscapeChars
[i
]) {
325 //----------------------------------------------------------------------------------------
328 * Templated helper for URL escaping a portion of a string.
330 * @param aPart The pointer to the beginning of the portion of the string to
332 * @param aPartLen The length of the string to escape.
333 * @param aFlags Flags used to configure escaping. @see EscapeMask
334 * @param aResult String that has the URL escaped portion appended to. Only
335 * altered if the string is URL escaped or |esc_AlwaysCopy| is specified.
336 * @param aDidAppend Indicates whether or not data was appended to |aResult|.
337 * @return NS_ERROR_INVALID_ARG, NS_ERROR_OUT_OF_MEMORY on failure.
340 static nsresult
T_EscapeURL(const typename
T::char_type
* aPart
, size_t aPartLen
,
341 uint32_t aFlags
, const ASCIIMaskArray
* aFilterMask
,
342 T
& aResult
, bool& aDidAppend
) {
343 typedef nsCharTraits
<typename
T::char_type
> traits
;
344 typedef typename
traits::unsigned_char_type unsigned_char_type
;
345 static_assert(sizeof(*aPart
) == 1 || sizeof(*aPart
) == 2,
346 "unexpected char type");
349 MOZ_ASSERT_UNREACHABLE("null pointer");
350 return NS_ERROR_INVALID_ARG
;
353 bool forced
= !!(aFlags
& esc_Forced
);
354 bool ignoreNonAscii
= !!(aFlags
& esc_OnlyASCII
);
355 bool ignoreAscii
= !!(aFlags
& esc_OnlyNonASCII
);
356 bool writing
= !!(aFlags
& esc_AlwaysCopy
);
357 bool colon
= !!(aFlags
& esc_Colon
);
358 bool spaces
= !!(aFlags
& esc_Spaces
);
360 auto src
= reinterpret_cast<const unsigned_char_type
*>(aPart
);
362 typename
T::char_type tempBuffer
[100];
363 unsigned int tempBufferPos
= 0;
365 for (size_t i
= 0; i
< aPartLen
; ++i
) {
366 unsigned_char_type c
= *src
++;
368 // If there is a filter, we wish to skip any characters which match it.
369 // This is needed so we don't perform an extra pass just to extract the
370 // filtered characters.
371 if (aFilterMask
&& mozilla::ASCIIMask::IsMasked(*aFilterMask
, c
)) {
373 if (!aResult
.Append(aPart
, i
, mozilla::fallible
)) {
374 return NS_ERROR_OUT_OF_MEMORY
;
381 // if the char has not to be escaped or whatever follows % is
382 // a valid escaped string, just copy the char.
384 // Also the % will not be escaped until forced
385 // See bugzilla bug 61269 for details why we changed this
387 // And, we will not escape non-ascii characters if requested.
388 // On special request we will also escape the colon even when
389 // not covered by the matrix.
390 // ignoreAscii is not honored for control characters (C0 and DEL)
392 // 0x20..0x7e are the valid ASCII characters.
393 if ((dontNeedEscape(c
, aFlags
) || (c
== HEX_ESCAPE
&& !forced
) ||
394 (c
> 0x7f && ignoreNonAscii
) ||
395 (c
>= 0x20 && c
< 0x7f && ignoreAscii
)) &&
396 !(c
== ':' && colon
) && !(c
== ' ' && spaces
)) {
398 tempBuffer
[tempBufferPos
++] = c
;
400 } else { /* do the escape magic */
402 if (!aResult
.Append(aPart
, i
, mozilla::fallible
)) {
403 return NS_ERROR_OUT_OF_MEMORY
;
407 uint32_t len
= ::AppendPercentHex(tempBuffer
+ tempBufferPos
, c
);
408 tempBufferPos
+= len
;
409 MOZ_ASSERT(len
<= ENCODE_MAX_LEN
, "potential buffer overflow");
412 // Flush the temp buffer if it doesnt't have room for another encoded char.
413 if (tempBufferPos
>= mozilla::ArrayLength(tempBuffer
) - ENCODE_MAX_LEN
) {
414 NS_ASSERTION(writing
, "should be writing");
415 if (!aResult
.Append(tempBuffer
, tempBufferPos
, mozilla::fallible
)) {
416 return NS_ERROR_OUT_OF_MEMORY
;
422 if (!aResult
.Append(tempBuffer
, tempBufferPos
, mozilla::fallible
)) {
423 return NS_ERROR_OUT_OF_MEMORY
;
426 aDidAppend
= writing
;
430 bool NS_EscapeURL(const char* aPart
, int32_t aPartLen
, uint32_t aFlags
,
431 nsACString
& aResult
) {
434 partLen
= strlen(aPart
);
439 return NS_EscapeURLSpan(mozilla::Span(aPart
, partLen
), aFlags
, aResult
);
442 bool NS_EscapeURLSpan(mozilla::Span
<const char> aStr
, uint32_t aFlags
,
443 nsACString
& aResult
) {
444 bool appended
= false;
445 nsresult rv
= T_EscapeURL(aStr
.Elements(), aStr
.Length(), aFlags
, nullptr,
448 ::NS_ABORT_OOM(aResult
.Length() * sizeof(nsACString::char_type
));
454 nsresult
NS_EscapeURL(const nsACString
& aStr
, uint32_t aFlags
,
455 nsACString
& aResult
, const mozilla::fallible_t
&) {
456 bool appended
= false;
457 nsresult rv
= T_EscapeURL(aStr
.Data(), aStr
.Length(), aFlags
, nullptr,
471 nsresult
NS_EscapeAndFilterURL(const nsACString
& aStr
, uint32_t aFlags
,
472 const ASCIIMaskArray
* aFilterMask
,
474 const mozilla::fallible_t
&) {
475 bool appended
= false;
476 nsresult rv
= T_EscapeURL(aStr
.Data(), aStr
.Length(), aFlags
, aFilterMask
,
484 if (!aResult
.Assign(aStr
, mozilla::fallible
)) {
485 return NS_ERROR_OUT_OF_MEMORY
;
492 const nsAString
& NS_EscapeURL(const nsAString
& aStr
, uint32_t aFlags
,
493 nsAString
& aResult
) {
495 nsresult rv
= T_EscapeURL
<nsAString
>(aStr
.Data(), aStr
.Length(), aFlags
,
496 nullptr, aResult
, result
);
499 ::NS_ABORT_OOM(aResult
.Length() * sizeof(nsAString::char_type
));
508 // Starting at aStr[aStart] find the first index in aStr that matches any
509 // character that is forbidden by aFunction. Return false if not found.
510 static bool FindFirstMatchFrom(const nsString
& aStr
, size_t aStart
,
511 const std::function
<bool(char16_t
)>& aFunction
,
513 for (size_t j
= aStart
, l
= aStr
.Length(); j
< l
; ++j
) {
514 if (aFunction(aStr
[j
])) {
522 const nsAString
& NS_EscapeURL(const nsString
& aStr
,
523 const std::function
<bool(char16_t
)>& aFunction
,
524 nsAString
& aResult
) {
525 bool didEscape
= false;
526 for (size_t i
= 0, strLen
= aStr
.Length(); i
< strLen
;) {
528 if (MOZ_UNLIKELY(FindFirstMatchFrom(aStr
, i
, aFunction
, &j
))) {
532 aResult
.SetCapacity(aStr
.Length());
535 // The substring from 'i' up to 'j' that needs no escaping.
536 aResult
.Append(nsDependentSubstring(aStr
, i
, j
- i
));
538 char16_t buffer
[ENCODE_MAX_LEN
];
539 uint32_t bufferLen
= ::AppendPercentHex(buffer
, aStr
[j
]);
540 MOZ_ASSERT(bufferLen
<= ENCODE_MAX_LEN
, "buffer overflow");
541 aResult
.Append(buffer
, bufferLen
);
544 if (MOZ_UNLIKELY(didEscape
)) {
545 // The tail of the string that needs no escaping.
546 aResult
.Append(nsDependentSubstring(aStr
, i
, strLen
- i
));
551 if (MOZ_UNLIKELY(didEscape
)) {
557 bool NS_UnescapeURL(const char* aStr
, int32_t aLen
, uint32_t aFlags
,
558 nsACString
& aResult
) {
559 bool didAppend
= false;
561 NS_UnescapeURL(aStr
, aLen
, aFlags
, aResult
, didAppend
, mozilla::fallible
);
562 if (rv
== NS_ERROR_OUT_OF_MEMORY
) {
563 ::NS_ABORT_OOM(aLen
* sizeof(nsACString::char_type
));
569 nsresult
NS_UnescapeURL(const char* aStr
, int32_t aLen
, uint32_t aFlags
,
570 nsACString
& aResult
, bool& aDidAppend
,
571 const mozilla::fallible_t
&) {
573 MOZ_ASSERT_UNREACHABLE("null pointer");
574 return NS_ERROR_INVALID_ARG
;
577 MOZ_ASSERT(aResult
.IsEmpty(),
578 "Passing a non-empty string as an out parameter!");
582 size_t stringLength
= strlen(aStr
);
583 if (stringLength
>= UINT32_MAX
) {
584 return NS_ERROR_OUT_OF_MEMORY
;
591 bool ignoreNonAscii
= !!(aFlags
& esc_OnlyASCII
);
592 bool ignoreAscii
= !!(aFlags
& esc_OnlyNonASCII
);
593 bool writing
= !!(aFlags
& esc_AlwaysCopy
);
594 bool skipControl
= !!(aFlags
& esc_SkipControl
);
595 bool skipInvalidHostChar
= !!(aFlags
& esc_Host
);
597 unsigned char* destPtr
;
601 if (!aResult
.SetLength(len
, mozilla::fallible
)) {
602 return NS_ERROR_OUT_OF_MEMORY
;
605 destPtr
= reinterpret_cast<unsigned char*>(aResult
.BeginWriting());
608 const char* last
= aStr
;
609 const char* end
= aStr
+ len
;
611 for (const char* p
= aStr
; p
< end
; ++p
) {
612 if (*p
== HEX_ESCAPE
&& p
+ 2 < end
) {
613 unsigned char c1
= *((unsigned char*)p
+ 1);
614 unsigned char c2
= *((unsigned char*)p
+ 2);
615 unsigned char u
= (UNHEX(c1
) << 4) + UNHEX(c2
);
616 if (mozilla::IsAsciiHexDigit(c1
) && mozilla::IsAsciiHexDigit(c2
) &&
617 (!skipInvalidHostChar
|| dontNeedEscape(u
, aFlags
) || c1
>= '8') &&
618 ((c1
< '8' && !ignoreAscii
) || (c1
>= '8' && !ignoreNonAscii
)) &&
620 (c1
< '2' || (c1
== '7' && (c2
== 'f' || c2
== 'F'))))) {
621 if (MOZ_UNLIKELY(!writing
)) {
623 if (!aResult
.SetLength(len
, mozilla::fallible
)) {
624 return NS_ERROR_OUT_OF_MEMORY
;
627 destPtr
= reinterpret_cast<unsigned char*>(aResult
.BeginWriting());
630 auto toCopy
= p
- last
;
631 memcpy(destPtr
+ destPos
, last
, toCopy
);
633 MOZ_ASSERT(destPos
<= len
);
636 destPtr
[destPos
] = u
;
638 MOZ_ASSERT(destPos
<= len
);
644 if (writing
&& last
< end
) {
645 auto toCopy
= end
- last
;
646 memcpy(destPtr
+ destPos
, last
, toCopy
);
648 MOZ_ASSERT(destPos
<= len
);
652 aResult
.Truncate(destPos
);
655 aDidAppend
= writing
;