1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
12 #include "nsReadableUtils.h"
14 const int netCharType
[256] =
15 /* Bit 0 xalpha -- the alphas
16 ** Bit 1 xpalpha -- as xalpha but
17 ** converts spaces to plus and plus to %2B
18 ** Bit 3 ... path -- as xalphas but doesn't escape '/'
20 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
21 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */
22 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */
23 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
24 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
25 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
26 /* bits for '@' changed from 7 to 0 so '@' can be escaped */
27 /* in usernames and passwords in publishing. */
28 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
29 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
30 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */
33 /* decode % escaped hex codes into character values
36 ((C >= '0' && C <= '9') ? C - '0' : \
37 ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
38 ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
41 #define IS_OK(C) (netCharType[((unsigned int)(C))] & (aFlags))
42 #define HEX_ESCAPE '%'
44 //----------------------------------------------------------------------------------------
46 nsEscapeCount(const char* aStr
, nsEscapeMask aFlags
, size_t* aOutLen
)
47 //----------------------------------------------------------------------------------------
54 size_t charsToEscape
= 0;
55 static const char hexChars
[] = "0123456789ABCDEF";
57 const unsigned char* src
= (const unsigned char*)aStr
;
65 // calculate how much memory should be allocated
66 // original length + 2 bytes for each escaped character + terminating '\0'
67 // do the sum in steps to check for overflow
68 size_t dstSize
= len
+ 1 + charsToEscape
;
72 dstSize
+= charsToEscape
;
77 // fail if we need more than 4GB
78 // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t)
79 // calls NS_Alloc_P(size_t) which calls PR_Malloc(uint32_t), so there is
80 // no chance to allocate more than 4GB using nsMemory::Alloc()
81 if (dstSize
> UINT32_MAX
) {
85 char* result
= (char*)nsMemory::Alloc(dstSize
);
90 unsigned char* dst
= (unsigned char*)result
;
91 src
= (const unsigned char*)aStr
;
92 if (aFlags
== url_XPAlphas
) {
93 for (size_t i
= 0; i
< len
; ++i
) {
94 unsigned char c
= *src
++;
97 } else if (c
== ' ') {
98 *dst
++ = '+'; /* convert spaces to pluses */
101 *dst
++ = hexChars
[c
>> 4]; /* high nibble */
102 *dst
++ = hexChars
[c
& 0x0f]; /* low nibble */
106 for (size_t i
= 0; i
< len
; ++i
) {
107 unsigned char c
= *src
++;
112 *dst
++ = hexChars
[c
>> 4]; /* high nibble */
113 *dst
++ = hexChars
[c
& 0x0f]; /* low nibble */
118 *dst
= '\0'; /* tack on eos */
120 *aOutLen
= dst
- (unsigned char*)result
;
125 //----------------------------------------------------------------------------------------
127 nsEscape(const char* aStr
, nsEscapeMask aFlags
)
128 //----------------------------------------------------------------------------------------
133 return nsEscapeCount(aStr
, aFlags
, nullptr);
136 //----------------------------------------------------------------------------------------
138 nsUnescape(char* aStr
)
139 //----------------------------------------------------------------------------------------
141 nsUnescapeCount(aStr
);
145 //----------------------------------------------------------------------------------------
147 nsUnescapeCount(char* aStr
)
148 //----------------------------------------------------------------------------------------
152 static const char hexChars
[] = "0123456789ABCDEFabcdef";
156 char* const pc1
= c1
;
157 char* const pc2
= c2
;
160 // A null string was passed in. Nothing to escape.
161 // Returns early as the string might not actually be mutable with
168 if (*(src
+ 1) == '\0') {
174 if (*src
!= HEX_ESCAPE
|| PL_strpbrk(pc1
, hexChars
) == 0 ||
175 PL_strpbrk(pc2
, hexChars
) == 0) {
178 src
++; /* walk over escape */
180 *dst
= UNHEX(*src
) << 4;
184 *dst
= (*dst
+ UNHEX(*src
));
192 return (int)(dst
- aStr
);
194 } /* NET_UnEscapeCnt */
198 nsEscapeHTML(const char* aString
)
201 /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
202 uint32_t len
= strlen(aString
);
203 if (len
>= (UINT32_MAX
/ 6)) {
207 rv
= (char*)NS_Alloc((6 * len
) + 1);
211 for (; *aString
!= '\0'; ++aString
) {
212 if (*aString
== '<') {
217 } else if (*aString
== '>') {
222 } else if (*aString
== '&') {
228 } else if (*aString
== '"') {
235 } else if (*aString
== '\'') {
252 nsEscapeHTML2(const char16_t
* aSourceBuffer
, int32_t aSourceBufferLen
)
254 // Calculate the length, if the caller didn't.
255 if (aSourceBufferLen
< 0) {
256 aSourceBufferLen
= NS_strlen(aSourceBuffer
);
259 /* XXX Hardcoded max entity len. */
260 if (uint32_t(aSourceBufferLen
) >=
261 ((UINT32_MAX
- sizeof(char16_t
)) / (6 * sizeof(char16_t
)))) {
265 char16_t
* resultBuffer
= (char16_t
*)nsMemory::Alloc(
266 aSourceBufferLen
* 6 * sizeof(char16_t
) + sizeof(char16_t('\0')));
267 char16_t
* ptr
= resultBuffer
;
272 for (i
= 0; i
< aSourceBufferLen
; ++i
) {
273 if (aSourceBuffer
[i
] == '<') {
278 } else if (aSourceBuffer
[i
] == '>') {
283 } else if (aSourceBuffer
[i
] == '&') {
289 } else if (aSourceBuffer
[i
] == '"') {
296 } else if (aSourceBuffer
[i
] == '\'') {
303 *ptr
++ = aSourceBuffer
[i
];
312 //----------------------------------------------------------------------------------------
314 const int EscapeChars
[256] =
315 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
317 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
318 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
319 0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */
320 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */
321 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */
322 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */
323 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
324 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */
328 #define NO_NEED_ESC(C) (EscapeChars[((unsigned int)(C))] & (aFlags))
330 //----------------------------------------------------------------------------------------
332 /* returns an escaped string */
334 /* use the following flags to specify which
335 part of an URL you want to escape:
342 esc_FileBaseName = 32
343 esc_FileExtension = 64
349 /* by default this function will not escape parts of a string
350 that already look escaped, which means it already includes
351 a valid hexcode. This is done to avoid multiple escapes of
352 a string. Use the following flags to force escaping of a
359 NS_EscapeURL(const char* aPart
, int32_t aPartLen
, uint32_t aFlags
,
363 NS_NOTREACHED("null pointer");
367 static const char hexChars
[] = "0123456789ABCDEF";
369 aPartLen
= strlen(aPart
);
371 bool forced
= !!(aFlags
& esc_Forced
);
372 bool ignoreNonAscii
= !!(aFlags
& esc_OnlyASCII
);
373 bool ignoreAscii
= !!(aFlags
& esc_OnlyNonASCII
);
374 bool writing
= !!(aFlags
& esc_AlwaysCopy
);
375 bool colon
= !!(aFlags
& esc_Colon
);
377 const unsigned char* src
= (const unsigned char*)aPart
;
379 char tempBuffer
[100];
380 unsigned int tempBufferPos
= 0;
382 bool previousIsNonASCII
= false;
383 for (int i
= 0; i
< aPartLen
; ++i
) {
384 unsigned char c
= *src
++;
386 // if the char has not to be escaped or whatever follows % is
387 // a valid escaped string, just copy the char.
389 // Also the % will not be escaped until forced
390 // See bugzilla bug 61269 for details why we changed this
392 // And, we will not escape non-ascii characters if requested.
393 // On special request we will also escape the colon even when
394 // not covered by the matrix.
395 // ignoreAscii is not honored for control characters (C0 and DEL)
397 // And, we should escape the '|' character when it occurs after any
398 // non-ASCII character as it may be aPart of a multi-byte character.
400 // 0x20..0x7e are the valid ASCII characters. We also escape spaces
401 // (0x20) since they are not legal in URLs.
402 if ((NO_NEED_ESC(c
) || (c
== HEX_ESCAPE
&& !forced
)
403 || (c
> 0x7f && ignoreNonAscii
)
404 || (c
> 0x20 && c
< 0x7f && ignoreAscii
))
405 && !(c
== ':' && colon
)
406 && !(previousIsNonASCII
&& c
== '|' && !ignoreNonAscii
)) {
408 tempBuffer
[tempBufferPos
++] = c
;
410 } else { /* do the escape magic */
412 aResult
.Append(aPart
, i
);
415 tempBuffer
[tempBufferPos
++] = HEX_ESCAPE
;
416 tempBuffer
[tempBufferPos
++] = hexChars
[c
>> 4]; /* high nibble */
417 tempBuffer
[tempBufferPos
++] = hexChars
[c
& 0x0f]; /* low nibble */
420 if (tempBufferPos
>= sizeof(tempBuffer
) - 4) {
421 NS_ASSERTION(writing
, "should be writing");
422 tempBuffer
[tempBufferPos
] = '\0';
423 aResult
+= tempBuffer
;
427 previousIsNonASCII
= (c
> 0x7f);
430 tempBuffer
[tempBufferPos
] = '\0';
431 aResult
+= tempBuffer
;
436 #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
439 NS_UnescapeURL(const char* aStr
, int32_t aLen
, uint32_t aFlags
,
443 NS_NOTREACHED("null pointer");
451 bool ignoreNonAscii
= !!(aFlags
& esc_OnlyASCII
);
452 bool ignoreAscii
= !!(aFlags
& esc_OnlyNonASCII
);
453 bool writing
= !!(aFlags
& esc_AlwaysCopy
);
454 bool skipControl
= !!(aFlags
& esc_SkipControl
);
456 static const char hexChars
[] = "0123456789ABCDEFabcdef";
458 const char* last
= aStr
;
459 const char* p
= aStr
;
461 for (int i
= 0; i
< aLen
; ++i
, ++p
) {
462 //printf("%c [i=%d of aLen=%d]\n", *p, i, aLen);
463 if (*p
== HEX_ESCAPE
&& i
< aLen
- 2) {
464 unsigned char* p1
= (unsigned char*)p
+ 1;
465 unsigned char* p2
= (unsigned char*)p
+ 2;
466 if (ISHEX(*p1
) && ISHEX(*p2
) &&
467 ((*p1
< '8' && !ignoreAscii
) || (*p1
>= '8' && !ignoreNonAscii
)) &&
469 (*p1
< '2' || (*p1
== '7' && (*p2
== 'f' || *p2
== 'F'))))) {
470 //printf("- p1=%c p2=%c\n", *p1, *p2);
473 //printf("- p=%p, last=%p\n", p, last);
474 aResult
.Append(last
, p
- last
);
477 char u
= (UNHEX(*p1
) << 4) + UNHEX(*p2
);
478 //printf("- u=%c\n", u);
486 if (writing
&& last
< aStr
+ aLen
) {
487 aResult
.Append(last
, aStr
+ aLen
- last
);