Bumping manifests a=b2g-bump
[gecko.git] / xpcom / io / nsEscape.cpp
blobc57191dbb9952d081291ed8891937cc25ce95c2f
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // First checked in on 98/12/03 by John R. McMullen, derived from net.h/mkparse.c.
9 #include "nsEscape.h"
10 #include "nsMemory.h"
11 #include "nsCRT.h"
12 #include "nsReadableUtils.h"
14 const int netCharType[256] =
15 /* Bit 0 xalpha -- the alphas
16 ** Bit 1 xpalpha -- as xalpha but
17 ** converts spaces to plus and plus to %2B
18 ** Bit 3 ... path -- as xalphas but doesn't escape '/'
20 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
21 { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x */
22 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 1x */
23 0,0,0,0,0,0,0,0,0,0,7,4,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
24 7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
25 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
26 /* bits for '@' changed from 7 to 0 so '@' can be escaped */
27 /* in usernames and passwords in publishing. */
28 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
29 0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
30 7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0, /* 7X pqrstuvwxyz{\}~ DEL */
31 0, };
33 /* decode % escaped hex codes into character values
35 #define UNHEX(C) \
36 ((C >= '0' && C <= '9') ? C - '0' : \
37 ((C >= 'A' && C <= 'F') ? C - 'A' + 10 : \
38 ((C >= 'a' && C <= 'f') ? C - 'a' + 10 : 0)))
41 #define IS_OK(C) (netCharType[((unsigned int)(C))] & (aFlags))
42 #define HEX_ESCAPE '%'
44 //----------------------------------------------------------------------------------------
45 static char*
46 nsEscapeCount(const char* aStr, nsEscapeMask aFlags, size_t* aOutLen)
47 //----------------------------------------------------------------------------------------
49 if (!aStr) {
50 return 0;
53 size_t len = 0;
54 size_t charsToEscape = 0;
55 static const char hexChars[] = "0123456789ABCDEF";
57 const unsigned char* src = (const unsigned char*)aStr;
58 while (*src) {
59 len++;
60 if (!IS_OK(*src++)) {
61 charsToEscape++;
65 // calculate how much memory should be allocated
66 // original length + 2 bytes for each escaped character + terminating '\0'
67 // do the sum in steps to check for overflow
68 size_t dstSize = len + 1 + charsToEscape;
69 if (dstSize <= len) {
70 return 0;
72 dstSize += charsToEscape;
73 if (dstSize < len) {
74 return 0;
77 // fail if we need more than 4GB
78 // size_t is likely to be long unsigned int but nsMemory::Alloc(size_t)
79 // calls NS_Alloc_P(size_t) which calls PR_Malloc(uint32_t), so there is
80 // no chance to allocate more than 4GB using nsMemory::Alloc()
81 if (dstSize > UINT32_MAX) {
82 return 0;
85 char* result = (char*)nsMemory::Alloc(dstSize);
86 if (!result) {
87 return 0;
90 unsigned char* dst = (unsigned char*)result;
91 src = (const unsigned char*)aStr;
92 if (aFlags == url_XPAlphas) {
93 for (size_t i = 0; i < len; ++i) {
94 unsigned char c = *src++;
95 if (IS_OK(c)) {
96 *dst++ = c;
97 } else if (c == ' ') {
98 *dst++ = '+'; /* convert spaces to pluses */
99 } else {
100 *dst++ = HEX_ESCAPE;
101 *dst++ = hexChars[c >> 4]; /* high nibble */
102 *dst++ = hexChars[c & 0x0f]; /* low nibble */
105 } else {
106 for (size_t i = 0; i < len; ++i) {
107 unsigned char c = *src++;
108 if (IS_OK(c)) {
109 *dst++ = c;
110 } else {
111 *dst++ = HEX_ESCAPE;
112 *dst++ = hexChars[c >> 4]; /* high nibble */
113 *dst++ = hexChars[c & 0x0f]; /* low nibble */
118 *dst = '\0'; /* tack on eos */
119 if (aOutLen) {
120 *aOutLen = dst - (unsigned char*)result;
122 return result;
125 //----------------------------------------------------------------------------------------
126 char*
127 nsEscape(const char* aStr, nsEscapeMask aFlags)
128 //----------------------------------------------------------------------------------------
130 if (!aStr) {
131 return nullptr;
133 return nsEscapeCount(aStr, aFlags, nullptr);
136 //----------------------------------------------------------------------------------------
137 char*
138 nsUnescape(char* aStr)
139 //----------------------------------------------------------------------------------------
141 nsUnescapeCount(aStr);
142 return aStr;
145 //----------------------------------------------------------------------------------------
146 int32_t
147 nsUnescapeCount(char* aStr)
148 //----------------------------------------------------------------------------------------
150 char* src = aStr;
151 char* dst = aStr;
152 static const char hexChars[] = "0123456789ABCDEFabcdef";
154 char c1[] = " ";
155 char c2[] = " ";
156 char* const pc1 = c1;
157 char* const pc2 = c2;
159 if (!*src) {
160 // A null string was passed in. Nothing to escape.
161 // Returns early as the string might not actually be mutable with
162 // length 0.
163 return 0;
166 while (*src) {
167 c1[0] = *(src + 1);
168 if (*(src + 1) == '\0') {
169 c2[0] = '\0';
170 } else {
171 c2[0] = *(src + 2);
174 if (*src != HEX_ESCAPE || PL_strpbrk(pc1, hexChars) == 0 ||
175 PL_strpbrk(pc2, hexChars) == 0) {
176 *dst++ = *src++;
177 } else {
178 src++; /* walk over escape */
179 if (*src) {
180 *dst = UNHEX(*src) << 4;
181 src++;
183 if (*src) {
184 *dst = (*dst + UNHEX(*src));
185 src++;
187 dst++;
191 *dst = 0;
192 return (int)(dst - aStr);
194 } /* NET_UnEscapeCnt */
197 char*
198 nsEscapeHTML(const char* aString)
200 char* rv = nullptr;
201 /* XXX Hardcoded max entity len. The +1 is for the trailing null. */
202 uint32_t len = strlen(aString);
203 if (len >= (UINT32_MAX / 6)) {
204 return nullptr;
207 rv = (char*)NS_Alloc((6 * len) + 1);
208 char* ptr = rv;
210 if (rv) {
211 for (; *aString != '\0'; ++aString) {
212 if (*aString == '<') {
213 *ptr++ = '&';
214 *ptr++ = 'l';
215 *ptr++ = 't';
216 *ptr++ = ';';
217 } else if (*aString == '>') {
218 *ptr++ = '&';
219 *ptr++ = 'g';
220 *ptr++ = 't';
221 *ptr++ = ';';
222 } else if (*aString == '&') {
223 *ptr++ = '&';
224 *ptr++ = 'a';
225 *ptr++ = 'm';
226 *ptr++ = 'p';
227 *ptr++ = ';';
228 } else if (*aString == '"') {
229 *ptr++ = '&';
230 *ptr++ = 'q';
231 *ptr++ = 'u';
232 *ptr++ = 'o';
233 *ptr++ = 't';
234 *ptr++ = ';';
235 } else if (*aString == '\'') {
236 *ptr++ = '&';
237 *ptr++ = '#';
238 *ptr++ = '3';
239 *ptr++ = '9';
240 *ptr++ = ';';
241 } else {
242 *ptr++ = *aString;
245 *ptr = '\0';
248 return rv;
251 char16_t*
252 nsEscapeHTML2(const char16_t* aSourceBuffer, int32_t aSourceBufferLen)
254 // Calculate the length, if the caller didn't.
255 if (aSourceBufferLen < 0) {
256 aSourceBufferLen = NS_strlen(aSourceBuffer);
259 /* XXX Hardcoded max entity len. */
260 if (uint32_t(aSourceBufferLen) >=
261 ((UINT32_MAX - sizeof(char16_t)) / (6 * sizeof(char16_t)))) {
262 return nullptr;
265 char16_t* resultBuffer = (char16_t*)nsMemory::Alloc(
266 aSourceBufferLen * 6 * sizeof(char16_t) + sizeof(char16_t('\0')));
267 char16_t* ptr = resultBuffer;
269 if (resultBuffer) {
270 int32_t i;
272 for (i = 0; i < aSourceBufferLen; ++i) {
273 if (aSourceBuffer[i] == '<') {
274 *ptr++ = '&';
275 *ptr++ = 'l';
276 *ptr++ = 't';
277 *ptr++ = ';';
278 } else if (aSourceBuffer[i] == '>') {
279 *ptr++ = '&';
280 *ptr++ = 'g';
281 *ptr++ = 't';
282 *ptr++ = ';';
283 } else if (aSourceBuffer[i] == '&') {
284 *ptr++ = '&';
285 *ptr++ = 'a';
286 *ptr++ = 'm';
287 *ptr++ = 'p';
288 *ptr++ = ';';
289 } else if (aSourceBuffer[i] == '"') {
290 *ptr++ = '&';
291 *ptr++ = 'q';
292 *ptr++ = 'u';
293 *ptr++ = 'o';
294 *ptr++ = 't';
295 *ptr++ = ';';
296 } else if (aSourceBuffer[i] == '\'') {
297 *ptr++ = '&';
298 *ptr++ = '#';
299 *ptr++ = '3';
300 *ptr++ = '9';
301 *ptr++ = ';';
302 } else {
303 *ptr++ = aSourceBuffer[i];
306 *ptr = 0;
309 return resultBuffer;
312 //----------------------------------------------------------------------------------------
314 const int EscapeChars[256] =
315 /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
317 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
318 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
319 0,1023, 0, 512,1023, 0,1023, 0,1023,1023,1023,1023,1023,1023, 953, 784, /* 2x !"#$%&'()*+,-./ */
320 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1008,1008, 0,1008, 0, 768, /* 3x 0123456789:;<=>? */
321 1008,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 4x @ABCDEFGHIJKLMNO */
322 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896, 896, 896, 896,1023, /* 5x PQRSTUVWXYZ[\]^_ */
323 0,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, /* 6x `abcdefghijklmno */
324 1023,1023,1023,1023,1023,1023,1023,1023,1023,1023,1023, 896,1012, 896,1023, 0, /* 7x pqrstuvwxyz{|}~ */
325 0 /* 8x DEL */
328 #define NO_NEED_ESC(C) (EscapeChars[((unsigned int)(C))] & (aFlags))
330 //----------------------------------------------------------------------------------------
332 /* returns an escaped string */
334 /* use the following flags to specify which
335 part of an URL you want to escape:
337 esc_Scheme = 1
338 esc_Username = 2
339 esc_Password = 4
340 esc_Host = 8
341 esc_Directory = 16
342 esc_FileBaseName = 32
343 esc_FileExtension = 64
344 esc_Param = 128
345 esc_Query = 256
346 esc_Ref = 512
349 /* by default this function will not escape parts of a string
350 that already look escaped, which means it already includes
351 a valid hexcode. This is done to avoid multiple escapes of
352 a string. Use the following flags to force escaping of a
353 string:
355 esc_Forced = 1024
358 bool
359 NS_EscapeURL(const char* aPart, int32_t aPartLen, uint32_t aFlags,
360 nsACString& aResult)
362 if (!aPart) {
363 NS_NOTREACHED("null pointer");
364 return false;
367 static const char hexChars[] = "0123456789ABCDEF";
368 if (aPartLen < 0) {
369 aPartLen = strlen(aPart);
371 bool forced = !!(aFlags & esc_Forced);
372 bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII);
373 bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII);
374 bool writing = !!(aFlags & esc_AlwaysCopy);
375 bool colon = !!(aFlags & esc_Colon);
377 const unsigned char* src = (const unsigned char*)aPart;
379 char tempBuffer[100];
380 unsigned int tempBufferPos = 0;
382 bool previousIsNonASCII = false;
383 for (int i = 0; i < aPartLen; ++i) {
384 unsigned char c = *src++;
386 // if the char has not to be escaped or whatever follows % is
387 // a valid escaped string, just copy the char.
389 // Also the % will not be escaped until forced
390 // See bugzilla bug 61269 for details why we changed this
392 // And, we will not escape non-ascii characters if requested.
393 // On special request we will also escape the colon even when
394 // not covered by the matrix.
395 // ignoreAscii is not honored for control characters (C0 and DEL)
397 // And, we should escape the '|' character when it occurs after any
398 // non-ASCII character as it may be aPart of a multi-byte character.
400 // 0x20..0x7e are the valid ASCII characters. We also escape spaces
401 // (0x20) since they are not legal in URLs.
402 if ((NO_NEED_ESC(c) || (c == HEX_ESCAPE && !forced)
403 || (c > 0x7f && ignoreNonAscii)
404 || (c > 0x20 && c < 0x7f && ignoreAscii))
405 && !(c == ':' && colon)
406 && !(previousIsNonASCII && c == '|' && !ignoreNonAscii)) {
407 if (writing) {
408 tempBuffer[tempBufferPos++] = c;
410 } else { /* do the escape magic */
411 if (!writing) {
412 aResult.Append(aPart, i);
413 writing = true;
415 tempBuffer[tempBufferPos++] = HEX_ESCAPE;
416 tempBuffer[tempBufferPos++] = hexChars[c >> 4]; /* high nibble */
417 tempBuffer[tempBufferPos++] = hexChars[c & 0x0f]; /* low nibble */
420 if (tempBufferPos >= sizeof(tempBuffer) - 4) {
421 NS_ASSERTION(writing, "should be writing");
422 tempBuffer[tempBufferPos] = '\0';
423 aResult += tempBuffer;
424 tempBufferPos = 0;
427 previousIsNonASCII = (c > 0x7f);
429 if (writing) {
430 tempBuffer[tempBufferPos] = '\0';
431 aResult += tempBuffer;
433 return writing;
436 #define ISHEX(c) memchr(hexChars, c, sizeof(hexChars)-1)
438 bool
439 NS_UnescapeURL(const char* aStr, int32_t aLen, uint32_t aFlags,
440 nsACString& aResult)
442 if (!aStr) {
443 NS_NOTREACHED("null pointer");
444 return false;
447 if (aLen < 0) {
448 aLen = strlen(aStr);
451 bool ignoreNonAscii = !!(aFlags & esc_OnlyASCII);
452 bool ignoreAscii = !!(aFlags & esc_OnlyNonASCII);
453 bool writing = !!(aFlags & esc_AlwaysCopy);
454 bool skipControl = !!(aFlags & esc_SkipControl);
456 static const char hexChars[] = "0123456789ABCDEFabcdef";
458 const char* last = aStr;
459 const char* p = aStr;
461 for (int i = 0; i < aLen; ++i, ++p) {
462 //printf("%c [i=%d of aLen=%d]\n", *p, i, aLen);
463 if (*p == HEX_ESCAPE && i < aLen - 2) {
464 unsigned char* p1 = (unsigned char*)p + 1;
465 unsigned char* p2 = (unsigned char*)p + 2;
466 if (ISHEX(*p1) && ISHEX(*p2) &&
467 ((*p1 < '8' && !ignoreAscii) || (*p1 >= '8' && !ignoreNonAscii)) &&
468 !(skipControl &&
469 (*p1 < '2' || (*p1 == '7' && (*p2 == 'f' || *p2 == 'F'))))) {
470 //printf("- p1=%c p2=%c\n", *p1, *p2);
471 writing = true;
472 if (p > last) {
473 //printf("- p=%p, last=%p\n", p, last);
474 aResult.Append(last, p - last);
475 last = p;
477 char u = (UNHEX(*p1) << 4) + UNHEX(*p2);
478 //printf("- u=%c\n", u);
479 aResult.Append(u);
480 i += 2;
481 p += 2;
482 last += 3;
486 if (writing && last < aStr + aLen) {
487 aResult.Append(last, aStr + aLen - last);
490 return writing;