Merge mozilla-central to autoland. CLOSED TREE
[gecko.git] / netwerk / base / nsURLParsers.cpp
blob7dc6567d8427b18c869bf0e8c1e5013a4db728cd
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include <string.h>
8 #include "mozilla/RangedPtr.h"
9 #include "mozilla/TextUtils.h"
11 #include "nsCRTGlue.h"
12 #include "nsURLParsers.h"
13 #include "nsURLHelper.h"
14 #include "nsString.h"
16 using namespace mozilla;
18 //----------------------------------------------------------------------------
20 static uint32_t CountConsecutiveSlashes(const char* str, int32_t len) {
21 RangedPtr<const char> p(str, len);
22 uint32_t count = 0;
23 while (len-- && *p++ == '/') ++count;
24 return count;
27 //----------------------------------------------------------------------------
28 // nsBaseURLParser implementation
29 //----------------------------------------------------------------------------
31 NS_IMPL_ISUPPORTS(nsAuthURLParser, nsIURLParser)
32 NS_IMPL_ISUPPORTS(nsNoAuthURLParser, nsIURLParser)
34 #define SET_RESULT(component, pos, len) \
35 PR_BEGIN_MACRO \
36 if (component##Pos) *component##Pos = uint32_t(pos); \
37 if (component##Len) *component##Len = int32_t(len); \
38 PR_END_MACRO
40 #define OFFSET_RESULT(component, offset) \
41 PR_BEGIN_MACRO \
42 if (component##Pos) *component##Pos += (offset); \
43 PR_END_MACRO
45 NS_IMETHODIMP
46 nsBaseURLParser::ParseURL(const char* spec, int32_t specLen,
47 uint32_t* schemePos, int32_t* schemeLen,
48 uint32_t* authorityPos, int32_t* authorityLen,
49 uint32_t* pathPos, int32_t* pathLen) {
50 if (NS_WARN_IF(!spec)) {
51 return NS_ERROR_INVALID_POINTER;
54 if (specLen < 0) specLen = strlen(spec);
56 const char* stop = nullptr;
57 const char* colon = nullptr;
58 const char* slash = nullptr;
59 const char* p = spec;
60 uint32_t offset = 0;
61 int32_t len = specLen;
63 // skip leading whitespace
64 while (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') {
65 spec++;
66 specLen--;
67 offset++;
69 p++;
70 len--;
73 for (; len && *p && !colon && !slash; ++p, --len) {
74 switch (*p) {
75 case ':':
76 if (!colon) colon = p;
77 break;
78 case '/': // start of filepath
79 case '?': // start of query
80 case '#': // start of ref
81 if (!slash) slash = p;
82 break;
83 case '@': // username@hostname
84 case '[': // start of IPv6 address literal
85 if (!stop) stop = p;
86 break;
89 // disregard the first colon if it follows an '@' or a '['
90 if (colon && stop && colon > stop) colon = nullptr;
92 // if the spec only contained whitespace ...
93 if (specLen == 0) {
94 SET_RESULT(scheme, 0, -1);
95 SET_RESULT(authority, 0, 0);
96 SET_RESULT(path, 0, 0);
97 return NS_OK;
100 // ignore trailing whitespace and control characters
101 for (p = spec + specLen - 1; ((unsigned char)*p <= ' ') && (p != spec); --p) {
105 specLen = p - spec + 1;
107 if (colon && (colon < slash || !slash)) {
109 // spec = <scheme>:/<the-rest>
111 // or
113 // spec = <scheme>:<authority>
114 // spec = <scheme>:<path-no-slashes>
116 if (!net_IsValidScheme(nsDependentCSubstring(spec, colon - spec))) {
117 return NS_ERROR_MALFORMED_URI;
119 SET_RESULT(scheme, offset, colon - spec);
120 if (authorityLen || pathLen) {
121 uint32_t schemeLen = colon + 1 - spec;
122 offset += schemeLen;
123 ParseAfterScheme(colon + 1, specLen - schemeLen, authorityPos,
124 authorityLen, pathPos, pathLen);
125 OFFSET_RESULT(authority, offset);
126 OFFSET_RESULT(path, offset);
128 } else {
130 // spec = <authority-no-port-or-password>/<path>
131 // spec = <path>
133 // or
135 // spec = <authority-no-port-or-password>/<path-with-colon>
136 // spec = <path-with-colon>
138 // or
140 // spec = <authority-no-port-or-password>
141 // spec = <path-no-slashes-or-colon>
143 SET_RESULT(scheme, 0, -1);
144 if (authorityLen || pathLen) {
145 ParseAfterScheme(spec, specLen, authorityPos, authorityLen, pathPos,
146 pathLen);
147 OFFSET_RESULT(authority, offset);
148 OFFSET_RESULT(path, offset);
151 return NS_OK;
154 NS_IMETHODIMP
155 nsBaseURLParser::ParseAuthority(const char* auth, int32_t authLen,
156 uint32_t* usernamePos, int32_t* usernameLen,
157 uint32_t* passwordPos, int32_t* passwordLen,
158 uint32_t* hostnamePos, int32_t* hostnameLen,
159 int32_t* port) {
160 if (NS_WARN_IF(!auth)) {
161 return NS_ERROR_INVALID_POINTER;
164 if (authLen < 0) authLen = strlen(auth);
166 SET_RESULT(username, 0, -1);
167 SET_RESULT(password, 0, -1);
168 SET_RESULT(hostname, 0, authLen);
169 if (port) *port = -1;
170 return NS_OK;
173 NS_IMETHODIMP
174 nsBaseURLParser::ParseUserInfo(const char* userinfo, int32_t userinfoLen,
175 uint32_t* usernamePos, int32_t* usernameLen,
176 uint32_t* passwordPos, int32_t* passwordLen) {
177 SET_RESULT(username, 0, -1);
178 SET_RESULT(password, 0, -1);
179 return NS_OK;
182 NS_IMETHODIMP
183 nsBaseURLParser::ParseServerInfo(const char* serverinfo, int32_t serverinfoLen,
184 uint32_t* hostnamePos, int32_t* hostnameLen,
185 int32_t* port) {
186 SET_RESULT(hostname, 0, -1);
187 if (port) *port = -1;
188 return NS_OK;
191 NS_IMETHODIMP
192 nsBaseURLParser::ParsePath(const char* path, int32_t pathLen,
193 uint32_t* filepathPos, int32_t* filepathLen,
194 uint32_t* queryPos, int32_t* queryLen,
195 uint32_t* refPos, int32_t* refLen) {
196 if (NS_WARN_IF(!path)) {
197 return NS_ERROR_INVALID_POINTER;
200 if (pathLen < 0) pathLen = strlen(path);
202 // path = [/]<segment1>/<segment2>/<...>/<segmentN>?<query>#<ref>
204 // XXX PL_strnpbrk would be nice, but it's buggy
206 // search for first occurrence of either ? or #
207 const char *query_beg = nullptr, *query_end = nullptr;
208 const char* ref_beg = nullptr;
209 const char* p = nullptr;
210 for (p = path; p < path + pathLen; ++p) {
211 // only match the query string if it precedes the reference fragment
212 if (!ref_beg && !query_beg && *p == '?') {
213 query_beg = p + 1;
214 } else if (*p == '#') {
215 ref_beg = p + 1;
216 if (query_beg) query_end = p;
217 break;
221 if (query_beg) {
222 if (query_end) {
223 SET_RESULT(query, query_beg - path, query_end - query_beg);
224 } else {
225 SET_RESULT(query, query_beg - path, pathLen - (query_beg - path));
227 } else {
228 SET_RESULT(query, 0, -1);
231 if (ref_beg) {
232 SET_RESULT(ref, ref_beg - path, pathLen - (ref_beg - path));
233 } else {
234 SET_RESULT(ref, 0, -1);
237 const char* end;
238 if (query_beg) {
239 end = query_beg - 1;
240 } else if (ref_beg) {
241 end = ref_beg - 1;
242 } else {
243 end = path + pathLen;
246 // an empty file path is no file path
247 if (end != path) {
248 SET_RESULT(filepath, 0, end - path);
249 } else {
250 SET_RESULT(filepath, 0, -1);
252 return NS_OK;
255 NS_IMETHODIMP
256 nsBaseURLParser::ParseFilePath(const char* filepath, int32_t filepathLen,
257 uint32_t* directoryPos, int32_t* directoryLen,
258 uint32_t* basenamePos, int32_t* basenameLen,
259 uint32_t* extensionPos, int32_t* extensionLen) {
260 if (NS_WARN_IF(!filepath)) {
261 return NS_ERROR_INVALID_POINTER;
264 if (filepathLen < 0) filepathLen = strlen(filepath);
266 if (filepathLen == 0) {
267 SET_RESULT(directory, 0, -1);
268 SET_RESULT(basename, 0, 0); // assume a zero length file basename
269 SET_RESULT(extension, 0, -1);
270 return NS_OK;
273 const char* p;
274 const char* end = filepath + filepathLen;
276 // search backwards for filename
277 for (p = end - 1; *p != '/' && p > filepath; --p) {
280 if (*p == '/') {
281 // catch /.. and /.
282 if ((p + 1 < end && *(p + 1) == '.') &&
283 (p + 2 == end || (*(p + 2) == '.' && p + 3 == end))) {
284 p = end - 1;
286 // filepath = <directory><filename>.<extension>
287 SET_RESULT(directory, 0, p - filepath + 1);
288 ParseFileName(p + 1, end - (p + 1), basenamePos, basenameLen, extensionPos,
289 extensionLen);
290 OFFSET_RESULT(basename, p + 1 - filepath);
291 OFFSET_RESULT(extension, p + 1 - filepath);
292 } else {
293 // filepath = <filename>.<extension>
294 SET_RESULT(directory, 0, -1);
295 ParseFileName(filepath, filepathLen, basenamePos, basenameLen, extensionPos,
296 extensionLen);
298 return NS_OK;
301 nsresult nsBaseURLParser::ParseFileName(
302 const char* filename, int32_t filenameLen, uint32_t* basenamePos,
303 int32_t* basenameLen, uint32_t* extensionPos, int32_t* extensionLen) {
304 if (NS_WARN_IF(!filename)) {
305 return NS_ERROR_INVALID_POINTER;
308 if (filenameLen < 0) filenameLen = strlen(filename);
310 // no extension if filename ends with a '.'
311 if (filename[filenameLen - 1] != '.') {
312 // ignore '.' at the beginning
313 for (const char* p = filename + filenameLen - 1; p > filename; --p) {
314 if (*p == '.') {
315 // filename = <basename.extension>
316 SET_RESULT(basename, 0, p - filename);
317 SET_RESULT(extension, p + 1 - filename,
318 filenameLen - (p - filename + 1));
319 return NS_OK;
323 // filename = <basename>
324 SET_RESULT(basename, 0, filenameLen);
325 SET_RESULT(extension, 0, -1);
326 return NS_OK;
329 //----------------------------------------------------------------------------
330 // nsNoAuthURLParser implementation
331 //----------------------------------------------------------------------------
333 NS_IMETHODIMP
334 nsNoAuthURLParser::ParseAuthority(const char* auth, int32_t authLen,
335 uint32_t* usernamePos, int32_t* usernameLen,
336 uint32_t* passwordPos, int32_t* passwordLen,
337 uint32_t* hostnamePos, int32_t* hostnameLen,
338 int32_t* port) {
339 MOZ_ASSERT_UNREACHABLE("Shouldn't parse auth in a NoAuthURL!");
340 return NS_ERROR_UNEXPECTED;
343 void nsNoAuthURLParser::ParseAfterScheme(const char* spec, int32_t specLen,
344 uint32_t* authPos, int32_t* authLen,
345 uint32_t* pathPos, int32_t* pathLen) {
346 MOZ_ASSERT(specLen >= 0, "unexpected");
348 // everything is the path
349 uint32_t pos = 0;
350 switch (CountConsecutiveSlashes(spec, specLen)) {
351 case 0:
352 case 1:
353 break;
354 case 2: {
355 const char* p = nullptr;
356 if (specLen > 2) {
357 // looks like there is an authority section
359 // if the authority looks like a drive number then we
360 // really want to treat it as part of the path
361 // [a-zA-Z][:|]{/\}
362 // i.e one of: c: c:\foo c:/foo c| c|\foo c|/foo
363 if ((specLen > 3) && (spec[3] == ':' || spec[3] == '|') &&
364 IsAsciiAlpha(spec[2]) &&
365 ((specLen == 4) || (spec[4] == '/') || (spec[4] == '\\'))) {
366 pos = 1;
367 break;
369 // Ignore apparent authority; path is everything after it
370 for (p = spec + 2; p < spec + specLen; ++p) {
371 if (*p == '/' || *p == '?' || *p == '#') break;
374 SET_RESULT(auth, 0, -1);
375 if (p && p != spec + specLen) {
376 SET_RESULT(path, p - spec, specLen - (p - spec));
377 } else {
378 SET_RESULT(path, 0, -1);
380 return;
382 default:
383 pos = 2;
384 break;
386 SET_RESULT(auth, pos, 0);
387 SET_RESULT(path, pos, specLen - pos);
390 #if defined(XP_WIN)
391 NS_IMETHODIMP
392 nsNoAuthURLParser::ParseFilePath(const char* filepath, int32_t filepathLen,
393 uint32_t* directoryPos, int32_t* directoryLen,
394 uint32_t* basenamePos, int32_t* basenameLen,
395 uint32_t* extensionPos,
396 int32_t* extensionLen) {
397 if (NS_WARN_IF(!filepath)) {
398 return NS_ERROR_INVALID_POINTER;
401 if (filepathLen < 0) filepathLen = strlen(filepath);
403 // look for a filepath consisting of only a drive number, which may or
404 // may not have a leading slash.
405 if (filepathLen > 1 && filepathLen < 4) {
406 const char* end = filepath + filepathLen;
407 const char* p = filepath;
408 if (*p == '/') p++;
409 if ((end - p == 2) && (p[1] == ':' || p[1] == '|') && IsAsciiAlpha(*p)) {
410 // filepath = <drive-number>:
411 SET_RESULT(directory, 0, filepathLen);
412 SET_RESULT(basename, 0, -1);
413 SET_RESULT(extension, 0, -1);
414 return NS_OK;
418 // otherwise fallback on common implementation
419 return nsBaseURLParser::ParseFilePath(filepath, filepathLen, directoryPos,
420 directoryLen, basenamePos, basenameLen,
421 extensionPos, extensionLen);
423 #endif
425 //----------------------------------------------------------------------------
426 // nsAuthURLParser implementation
427 //----------------------------------------------------------------------------
429 NS_IMETHODIMP
430 nsAuthURLParser::ParseAuthority(const char* auth, int32_t authLen,
431 uint32_t* usernamePos, int32_t* usernameLen,
432 uint32_t* passwordPos, int32_t* passwordLen,
433 uint32_t* hostnamePos, int32_t* hostnameLen,
434 int32_t* port) {
435 nsresult rv;
437 if (NS_WARN_IF(!auth)) {
438 return NS_ERROR_INVALID_POINTER;
441 if (authLen < 0) authLen = strlen(auth);
443 if (authLen == 0) {
444 SET_RESULT(username, 0, -1);
445 SET_RESULT(password, 0, -1);
446 SET_RESULT(hostname, 0, 0);
447 if (port) *port = -1;
448 return NS_OK;
451 // search backwards for @
452 const char* p = auth + authLen - 1;
453 for (; (*p != '@') && (p > auth); --p) {
455 if (*p == '@') {
456 // auth = <user-info@server-info>
457 rv = ParseUserInfo(auth, p - auth, usernamePos, usernameLen, passwordPos,
458 passwordLen);
459 if (NS_FAILED(rv)) return rv;
460 rv = ParseServerInfo(p + 1, authLen - (p - auth + 1), hostnamePos,
461 hostnameLen, port);
462 if (NS_FAILED(rv)) return rv;
463 OFFSET_RESULT(hostname, p + 1 - auth);
465 // malformed if has a username or password
466 // but no host info, such as: http://u:p@/
467 if ((usernamePos || passwordPos) && (!hostnamePos || !*hostnameLen)) {
468 return NS_ERROR_MALFORMED_URI;
470 } else {
471 // auth = <server-info>
472 SET_RESULT(username, 0, -1);
473 SET_RESULT(password, 0, -1);
474 rv = ParseServerInfo(auth, authLen, hostnamePos, hostnameLen, port);
475 if (NS_FAILED(rv)) return rv;
477 return NS_OK;
480 NS_IMETHODIMP
481 nsAuthURLParser::ParseUserInfo(const char* userinfo, int32_t userinfoLen,
482 uint32_t* usernamePos, int32_t* usernameLen,
483 uint32_t* passwordPos, int32_t* passwordLen) {
484 if (NS_WARN_IF(!userinfo)) {
485 return NS_ERROR_INVALID_POINTER;
488 if (userinfoLen < 0) userinfoLen = strlen(userinfo);
490 if (userinfoLen == 0) {
491 SET_RESULT(username, 0, -1);
492 SET_RESULT(password, 0, -1);
493 return NS_OK;
496 const char* p = (const char*)memchr(userinfo, ':', userinfoLen);
497 if (p) {
498 // userinfo = <username:password>
499 SET_RESULT(username, 0, p - userinfo);
500 SET_RESULT(password, p - userinfo + 1, userinfoLen - (p - userinfo + 1));
501 } else {
502 // userinfo = <username>
503 SET_RESULT(username, 0, userinfoLen);
504 SET_RESULT(password, 0, -1);
506 return NS_OK;
509 NS_IMETHODIMP
510 nsAuthURLParser::ParseServerInfo(const char* serverinfo, int32_t serverinfoLen,
511 uint32_t* hostnamePos, int32_t* hostnameLen,
512 int32_t* port) {
513 if (NS_WARN_IF(!serverinfo)) {
514 return NS_ERROR_INVALID_POINTER;
517 if (serverinfoLen < 0) serverinfoLen = strlen(serverinfo);
519 if (serverinfoLen == 0) {
520 SET_RESULT(hostname, 0, 0);
521 if (port) *port = -1;
522 return NS_OK;
525 // search backwards for a ':' but stop on ']' (IPv6 address literal
526 // delimiter). check for illegal characters in the hostname.
527 const char* p = serverinfo + serverinfoLen - 1;
528 const char *colon = nullptr, *bracket = nullptr;
529 for (; p > serverinfo; --p) {
530 switch (*p) {
531 case ']':
532 bracket = p;
533 break;
534 case ':':
535 if (bracket == nullptr) colon = p;
536 break;
537 case ' ':
538 // hostname must not contain a space
539 return NS_ERROR_MALFORMED_URI;
543 if (colon) {
544 // serverinfo = <hostname:port>
545 SET_RESULT(hostname, 0, colon - serverinfo);
546 if (port) {
547 // XXX unfortunately ToInteger is not defined for substrings
548 nsAutoCString buf(colon + 1, serverinfoLen - (colon + 1 - serverinfo));
549 if (buf.Length() == 0) {
550 *port = -1;
551 } else {
552 const char* nondigit = NS_strspnp("0123456789", buf.get());
553 if (nondigit && *nondigit) return NS_ERROR_MALFORMED_URI;
555 nsresult err;
556 *port = buf.ToInteger(&err);
557 if (NS_FAILED(err) || *port < 0 ||
558 *port > std::numeric_limits<uint16_t>::max()) {
559 return NS_ERROR_MALFORMED_URI;
563 } else {
564 // serverinfo = <hostname>
565 SET_RESULT(hostname, 0, serverinfoLen);
566 if (port) *port = -1;
569 // In case of IPv6 address check its validity
570 if (*hostnameLen > 1 && *(serverinfo + *hostnamePos) == '[' &&
571 *(serverinfo + *hostnamePos + *hostnameLen - 1) == ']' &&
572 !net_IsValidIPv6Addr(
573 Substring(serverinfo + *hostnamePos + 1, *hostnameLen - 2))) {
574 return NS_ERROR_MALFORMED_URI;
577 return NS_OK;
580 void nsAuthURLParser::ParseAfterScheme(const char* spec, int32_t specLen,
581 uint32_t* authPos, int32_t* authLen,
582 uint32_t* pathPos, int32_t* pathLen) {
583 MOZ_ASSERT(specLen >= 0, "unexpected");
585 uint32_t nslash = CountConsecutiveSlashes(spec, specLen);
587 // search for the end of the authority section
588 const char* end = spec + specLen;
589 const char* p;
590 for (p = spec + nslash; p < end; ++p) {
591 if (*p == '/' || *p == '?' || *p == '#') break;
593 if (p < end) {
594 // spec = [/]<auth><path>
595 SET_RESULT(auth, nslash, p - (spec + nslash));
596 SET_RESULT(path, p - spec, specLen - (p - spec));
597 } else {
598 // spec = [/]<auth>
599 SET_RESULT(auth, nslash, specLen - nslash);
600 SET_RESULT(path, 0, -1);
604 //----------------------------------------------------------------------------
605 // nsStdURLParser implementation
606 //----------------------------------------------------------------------------
608 void nsStdURLParser::ParseAfterScheme(const char* spec, int32_t specLen,
609 uint32_t* authPos, int32_t* authLen,
610 uint32_t* pathPos, int32_t* pathLen) {
611 MOZ_ASSERT(specLen >= 0, "unexpected");
613 uint32_t nslash = CountConsecutiveSlashes(spec, specLen);
615 // search for the end of the authority section
616 const char* end = spec + specLen;
617 const char* p;
618 for (p = spec + nslash; p < end; ++p) {
619 if (strchr("/?#;", *p)) break;
621 switch (nslash) {
622 case 0:
623 case 2:
624 if (p < end) {
625 // spec = (//)<auth><path>
626 SET_RESULT(auth, nslash, p - (spec + nslash));
627 SET_RESULT(path, p - spec, specLen - (p - spec));
628 } else {
629 // spec = (//)<auth>
630 SET_RESULT(auth, nslash, specLen - nslash);
631 SET_RESULT(path, 0, -1);
633 break;
634 case 1:
635 // spec = /<path>
636 SET_RESULT(auth, 0, -1);
637 SET_RESULT(path, 0, specLen);
638 break;
639 default:
640 // spec = ///[/]<path>
641 SET_RESULT(auth, 2, 0);
642 SET_RESULT(path, 2, specLen - 2);