1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 #include "mozilla/RangedPtr.h"
9 #include "mozilla/TextUtils.h"
11 #include "nsCRTGlue.h"
12 #include "nsURLParsers.h"
13 #include "nsURLHelper.h"
16 using namespace mozilla
;
18 //----------------------------------------------------------------------------
20 static uint32_t CountConsecutiveSlashes(const char* str
, int32_t len
) {
21 RangedPtr
<const char> p(str
, len
);
23 while (len
-- && *p
++ == '/') ++count
;
27 //----------------------------------------------------------------------------
28 // nsBaseURLParser implementation
29 //----------------------------------------------------------------------------
31 NS_IMPL_ISUPPORTS(nsAuthURLParser
, nsIURLParser
)
32 NS_IMPL_ISUPPORTS(nsNoAuthURLParser
, nsIURLParser
)
34 #define SET_RESULT(component, pos, len) \
36 if (component##Pos) *component##Pos = uint32_t(pos); \
37 if (component##Len) *component##Len = int32_t(len); \
40 #define OFFSET_RESULT(component, offset) \
42 if (component##Pos) *component##Pos += (offset); \
46 nsBaseURLParser::ParseURL(const char* spec
, int32_t specLen
,
47 uint32_t* schemePos
, int32_t* schemeLen
,
48 uint32_t* authorityPos
, int32_t* authorityLen
,
49 uint32_t* pathPos
, int32_t* pathLen
) {
50 if (NS_WARN_IF(!spec
)) {
51 return NS_ERROR_INVALID_POINTER
;
54 if (specLen
< 0) specLen
= strlen(spec
);
56 const char* stop
= nullptr;
57 const char* colon
= nullptr;
58 const char* slash
= nullptr;
61 int32_t len
= specLen
;
63 // skip leading whitespace
64 while (*p
== ' ' || *p
== '\n' || *p
== '\r' || *p
== '\t') {
73 for (; len
&& *p
&& !colon
&& !slash
; ++p
, --len
) {
76 if (!colon
) colon
= p
;
78 case '/': // start of filepath
79 case '?': // start of query
80 case '#': // start of ref
81 if (!slash
) slash
= p
;
83 case '@': // username@hostname
84 case '[': // start of IPv6 address literal
89 // disregard the first colon if it follows an '@' or a '['
90 if (colon
&& stop
&& colon
> stop
) colon
= nullptr;
92 // if the spec only contained whitespace ...
94 SET_RESULT(scheme
, 0, -1);
95 SET_RESULT(authority
, 0, 0);
96 SET_RESULT(path
, 0, 0);
100 // ignore trailing whitespace and control characters
101 for (p
= spec
+ specLen
- 1; ((unsigned char)*p
<= ' ') && (p
!= spec
); --p
) {
105 specLen
= p
- spec
+ 1;
107 if (colon
&& (colon
< slash
|| !slash
)) {
109 // spec = <scheme>:/<the-rest>
113 // spec = <scheme>:<authority>
114 // spec = <scheme>:<path-no-slashes>
116 if (!net_IsValidScheme(nsDependentCSubstring(spec
, colon
- spec
))) {
117 return NS_ERROR_MALFORMED_URI
;
119 SET_RESULT(scheme
, offset
, colon
- spec
);
120 if (authorityLen
|| pathLen
) {
121 uint32_t schemeLen
= colon
+ 1 - spec
;
123 ParseAfterScheme(colon
+ 1, specLen
- schemeLen
, authorityPos
,
124 authorityLen
, pathPos
, pathLen
);
125 OFFSET_RESULT(authority
, offset
);
126 OFFSET_RESULT(path
, offset
);
130 // spec = <authority-no-port-or-password>/<path>
135 // spec = <authority-no-port-or-password>/<path-with-colon>
136 // spec = <path-with-colon>
140 // spec = <authority-no-port-or-password>
141 // spec = <path-no-slashes-or-colon>
143 SET_RESULT(scheme
, 0, -1);
144 if (authorityLen
|| pathLen
) {
145 ParseAfterScheme(spec
, specLen
, authorityPos
, authorityLen
, pathPos
,
147 OFFSET_RESULT(authority
, offset
);
148 OFFSET_RESULT(path
, offset
);
155 nsBaseURLParser::ParseAuthority(const char* auth
, int32_t authLen
,
156 uint32_t* usernamePos
, int32_t* usernameLen
,
157 uint32_t* passwordPos
, int32_t* passwordLen
,
158 uint32_t* hostnamePos
, int32_t* hostnameLen
,
160 if (NS_WARN_IF(!auth
)) {
161 return NS_ERROR_INVALID_POINTER
;
164 if (authLen
< 0) authLen
= strlen(auth
);
166 SET_RESULT(username
, 0, -1);
167 SET_RESULT(password
, 0, -1);
168 SET_RESULT(hostname
, 0, authLen
);
169 if (port
) *port
= -1;
174 nsBaseURLParser::ParseUserInfo(const char* userinfo
, int32_t userinfoLen
,
175 uint32_t* usernamePos
, int32_t* usernameLen
,
176 uint32_t* passwordPos
, int32_t* passwordLen
) {
177 SET_RESULT(username
, 0, -1);
178 SET_RESULT(password
, 0, -1);
183 nsBaseURLParser::ParseServerInfo(const char* serverinfo
, int32_t serverinfoLen
,
184 uint32_t* hostnamePos
, int32_t* hostnameLen
,
186 SET_RESULT(hostname
, 0, -1);
187 if (port
) *port
= -1;
192 nsBaseURLParser::ParsePath(const char* path
, int32_t pathLen
,
193 uint32_t* filepathPos
, int32_t* filepathLen
,
194 uint32_t* queryPos
, int32_t* queryLen
,
195 uint32_t* refPos
, int32_t* refLen
) {
196 if (NS_WARN_IF(!path
)) {
197 return NS_ERROR_INVALID_POINTER
;
200 if (pathLen
< 0) pathLen
= strlen(path
);
202 // path = [/]<segment1>/<segment2>/<...>/<segmentN>?<query>#<ref>
204 // XXX PL_strnpbrk would be nice, but it's buggy
206 // search for first occurrence of either ? or #
207 const char *query_beg
= nullptr, *query_end
= nullptr;
208 const char* ref_beg
= nullptr;
209 const char* p
= nullptr;
210 for (p
= path
; p
< path
+ pathLen
; ++p
) {
211 // only match the query string if it precedes the reference fragment
212 if (!ref_beg
&& !query_beg
&& *p
== '?') {
214 } else if (*p
== '#') {
216 if (query_beg
) query_end
= p
;
223 SET_RESULT(query
, query_beg
- path
, query_end
- query_beg
);
225 SET_RESULT(query
, query_beg
- path
, pathLen
- (query_beg
- path
));
228 SET_RESULT(query
, 0, -1);
232 SET_RESULT(ref
, ref_beg
- path
, pathLen
- (ref_beg
- path
));
234 SET_RESULT(ref
, 0, -1);
240 } else if (ref_beg
) {
243 end
= path
+ pathLen
;
246 // an empty file path is no file path
248 SET_RESULT(filepath
, 0, end
- path
);
250 SET_RESULT(filepath
, 0, -1);
256 nsBaseURLParser::ParseFilePath(const char* filepath
, int32_t filepathLen
,
257 uint32_t* directoryPos
, int32_t* directoryLen
,
258 uint32_t* basenamePos
, int32_t* basenameLen
,
259 uint32_t* extensionPos
, int32_t* extensionLen
) {
260 if (NS_WARN_IF(!filepath
)) {
261 return NS_ERROR_INVALID_POINTER
;
264 if (filepathLen
< 0) filepathLen
= strlen(filepath
);
266 if (filepathLen
== 0) {
267 SET_RESULT(directory
, 0, -1);
268 SET_RESULT(basename
, 0, 0); // assume a zero length file basename
269 SET_RESULT(extension
, 0, -1);
274 const char* end
= filepath
+ filepathLen
;
276 // search backwards for filename
277 for (p
= end
- 1; *p
!= '/' && p
> filepath
; --p
) {
282 if ((p
+ 1 < end
&& *(p
+ 1) == '.') &&
283 (p
+ 2 == end
|| (*(p
+ 2) == '.' && p
+ 3 == end
))) {
286 // filepath = <directory><filename>.<extension>
287 SET_RESULT(directory
, 0, p
- filepath
+ 1);
288 ParseFileName(p
+ 1, end
- (p
+ 1), basenamePos
, basenameLen
, extensionPos
,
290 OFFSET_RESULT(basename
, p
+ 1 - filepath
);
291 OFFSET_RESULT(extension
, p
+ 1 - filepath
);
293 // filepath = <filename>.<extension>
294 SET_RESULT(directory
, 0, -1);
295 ParseFileName(filepath
, filepathLen
, basenamePos
, basenameLen
, extensionPos
,
301 nsresult
nsBaseURLParser::ParseFileName(
302 const char* filename
, int32_t filenameLen
, uint32_t* basenamePos
,
303 int32_t* basenameLen
, uint32_t* extensionPos
, int32_t* extensionLen
) {
304 if (NS_WARN_IF(!filename
)) {
305 return NS_ERROR_INVALID_POINTER
;
308 if (filenameLen
< 0) filenameLen
= strlen(filename
);
310 // no extension if filename ends with a '.'
311 if (filename
[filenameLen
- 1] != '.') {
312 // ignore '.' at the beginning
313 for (const char* p
= filename
+ filenameLen
- 1; p
> filename
; --p
) {
315 // filename = <basename.extension>
316 SET_RESULT(basename
, 0, p
- filename
);
317 SET_RESULT(extension
, p
+ 1 - filename
,
318 filenameLen
- (p
- filename
+ 1));
323 // filename = <basename>
324 SET_RESULT(basename
, 0, filenameLen
);
325 SET_RESULT(extension
, 0, -1);
329 //----------------------------------------------------------------------------
330 // nsNoAuthURLParser implementation
331 //----------------------------------------------------------------------------
334 nsNoAuthURLParser::ParseAuthority(const char* auth
, int32_t authLen
,
335 uint32_t* usernamePos
, int32_t* usernameLen
,
336 uint32_t* passwordPos
, int32_t* passwordLen
,
337 uint32_t* hostnamePos
, int32_t* hostnameLen
,
339 MOZ_ASSERT_UNREACHABLE("Shouldn't parse auth in a NoAuthURL!");
340 return NS_ERROR_UNEXPECTED
;
343 void nsNoAuthURLParser::ParseAfterScheme(const char* spec
, int32_t specLen
,
344 uint32_t* authPos
, int32_t* authLen
,
345 uint32_t* pathPos
, int32_t* pathLen
) {
346 MOZ_ASSERT(specLen
>= 0, "unexpected");
348 // everything is the path
350 switch (CountConsecutiveSlashes(spec
, specLen
)) {
355 const char* p
= nullptr;
357 // looks like there is an authority section
359 // if the authority looks like a drive number then we
360 // really want to treat it as part of the path
362 // i.e one of: c: c:\foo c:/foo c| c|\foo c|/foo
363 if ((specLen
> 3) && (spec
[3] == ':' || spec
[3] == '|') &&
364 IsAsciiAlpha(spec
[2]) &&
365 ((specLen
== 4) || (spec
[4] == '/') || (spec
[4] == '\\'))) {
369 // Ignore apparent authority; path is everything after it
370 for (p
= spec
+ 2; p
< spec
+ specLen
; ++p
) {
371 if (*p
== '/' || *p
== '?' || *p
== '#') break;
374 SET_RESULT(auth
, 0, -1);
375 if (p
&& p
!= spec
+ specLen
) {
376 SET_RESULT(path
, p
- spec
, specLen
- (p
- spec
));
378 SET_RESULT(path
, 0, -1);
386 SET_RESULT(auth
, pos
, 0);
387 SET_RESULT(path
, pos
, specLen
- pos
);
392 nsNoAuthURLParser::ParseFilePath(const char* filepath
, int32_t filepathLen
,
393 uint32_t* directoryPos
, int32_t* directoryLen
,
394 uint32_t* basenamePos
, int32_t* basenameLen
,
395 uint32_t* extensionPos
,
396 int32_t* extensionLen
) {
397 if (NS_WARN_IF(!filepath
)) {
398 return NS_ERROR_INVALID_POINTER
;
401 if (filepathLen
< 0) filepathLen
= strlen(filepath
);
403 // look for a filepath consisting of only a drive number, which may or
404 // may not have a leading slash.
405 if (filepathLen
> 1 && filepathLen
< 4) {
406 const char* end
= filepath
+ filepathLen
;
407 const char* p
= filepath
;
409 if ((end
- p
== 2) && (p
[1] == ':' || p
[1] == '|') && IsAsciiAlpha(*p
)) {
410 // filepath = <drive-number>:
411 SET_RESULT(directory
, 0, filepathLen
);
412 SET_RESULT(basename
, 0, -1);
413 SET_RESULT(extension
, 0, -1);
418 // otherwise fallback on common implementation
419 return nsBaseURLParser::ParseFilePath(filepath
, filepathLen
, directoryPos
,
420 directoryLen
, basenamePos
, basenameLen
,
421 extensionPos
, extensionLen
);
425 //----------------------------------------------------------------------------
426 // nsAuthURLParser implementation
427 //----------------------------------------------------------------------------
430 nsAuthURLParser::ParseAuthority(const char* auth
, int32_t authLen
,
431 uint32_t* usernamePos
, int32_t* usernameLen
,
432 uint32_t* passwordPos
, int32_t* passwordLen
,
433 uint32_t* hostnamePos
, int32_t* hostnameLen
,
437 if (NS_WARN_IF(!auth
)) {
438 return NS_ERROR_INVALID_POINTER
;
441 if (authLen
< 0) authLen
= strlen(auth
);
444 SET_RESULT(username
, 0, -1);
445 SET_RESULT(password
, 0, -1);
446 SET_RESULT(hostname
, 0, 0);
447 if (port
) *port
= -1;
451 // search backwards for @
452 const char* p
= auth
+ authLen
- 1;
453 for (; (*p
!= '@') && (p
> auth
); --p
) {
456 // auth = <user-info@server-info>
457 rv
= ParseUserInfo(auth
, p
- auth
, usernamePos
, usernameLen
, passwordPos
,
459 if (NS_FAILED(rv
)) return rv
;
460 rv
= ParseServerInfo(p
+ 1, authLen
- (p
- auth
+ 1), hostnamePos
,
462 if (NS_FAILED(rv
)) return rv
;
463 OFFSET_RESULT(hostname
, p
+ 1 - auth
);
465 // malformed if has a username or password
466 // but no host info, such as: http://u:p@/
467 if ((usernamePos
|| passwordPos
) && (!hostnamePos
|| !*hostnameLen
)) {
468 return NS_ERROR_MALFORMED_URI
;
471 // auth = <server-info>
472 SET_RESULT(username
, 0, -1);
473 SET_RESULT(password
, 0, -1);
474 rv
= ParseServerInfo(auth
, authLen
, hostnamePos
, hostnameLen
, port
);
475 if (NS_FAILED(rv
)) return rv
;
481 nsAuthURLParser::ParseUserInfo(const char* userinfo
, int32_t userinfoLen
,
482 uint32_t* usernamePos
, int32_t* usernameLen
,
483 uint32_t* passwordPos
, int32_t* passwordLen
) {
484 if (NS_WARN_IF(!userinfo
)) {
485 return NS_ERROR_INVALID_POINTER
;
488 if (userinfoLen
< 0) userinfoLen
= strlen(userinfo
);
490 if (userinfoLen
== 0) {
491 SET_RESULT(username
, 0, -1);
492 SET_RESULT(password
, 0, -1);
496 const char* p
= (const char*)memchr(userinfo
, ':', userinfoLen
);
498 // userinfo = <username:password>
499 SET_RESULT(username
, 0, p
- userinfo
);
500 SET_RESULT(password
, p
- userinfo
+ 1, userinfoLen
- (p
- userinfo
+ 1));
502 // userinfo = <username>
503 SET_RESULT(username
, 0, userinfoLen
);
504 SET_RESULT(password
, 0, -1);
510 nsAuthURLParser::ParseServerInfo(const char* serverinfo
, int32_t serverinfoLen
,
511 uint32_t* hostnamePos
, int32_t* hostnameLen
,
513 if (NS_WARN_IF(!serverinfo
)) {
514 return NS_ERROR_INVALID_POINTER
;
517 if (serverinfoLen
< 0) serverinfoLen
= strlen(serverinfo
);
519 if (serverinfoLen
== 0) {
520 SET_RESULT(hostname
, 0, 0);
521 if (port
) *port
= -1;
525 // search backwards for a ':' but stop on ']' (IPv6 address literal
526 // delimiter). check for illegal characters in the hostname.
527 const char* p
= serverinfo
+ serverinfoLen
- 1;
528 const char *colon
= nullptr, *bracket
= nullptr;
529 for (; p
> serverinfo
; --p
) {
535 if (bracket
== nullptr) colon
= p
;
538 // hostname must not contain a space
539 return NS_ERROR_MALFORMED_URI
;
544 // serverinfo = <hostname:port>
545 SET_RESULT(hostname
, 0, colon
- serverinfo
);
547 // XXX unfortunately ToInteger is not defined for substrings
548 nsAutoCString
buf(colon
+ 1, serverinfoLen
- (colon
+ 1 - serverinfo
));
549 if (buf
.Length() == 0) {
552 const char* nondigit
= NS_strspnp("0123456789", buf
.get());
553 if (nondigit
&& *nondigit
) return NS_ERROR_MALFORMED_URI
;
556 *port
= buf
.ToInteger(&err
);
557 if (NS_FAILED(err
) || *port
< 0 ||
558 *port
> std::numeric_limits
<uint16_t>::max()) {
559 return NS_ERROR_MALFORMED_URI
;
564 // serverinfo = <hostname>
565 SET_RESULT(hostname
, 0, serverinfoLen
);
566 if (port
) *port
= -1;
569 // In case of IPv6 address check its validity
570 if (*hostnameLen
> 1 && *(serverinfo
+ *hostnamePos
) == '[' &&
571 *(serverinfo
+ *hostnamePos
+ *hostnameLen
- 1) == ']' &&
572 !net_IsValidIPv6Addr(
573 Substring(serverinfo
+ *hostnamePos
+ 1, *hostnameLen
- 2))) {
574 return NS_ERROR_MALFORMED_URI
;
580 void nsAuthURLParser::ParseAfterScheme(const char* spec
, int32_t specLen
,
581 uint32_t* authPos
, int32_t* authLen
,
582 uint32_t* pathPos
, int32_t* pathLen
) {
583 MOZ_ASSERT(specLen
>= 0, "unexpected");
585 uint32_t nslash
= CountConsecutiveSlashes(spec
, specLen
);
587 // search for the end of the authority section
588 const char* end
= spec
+ specLen
;
590 for (p
= spec
+ nslash
; p
< end
; ++p
) {
591 if (*p
== '/' || *p
== '?' || *p
== '#') break;
594 // spec = [/]<auth><path>
595 SET_RESULT(auth
, nslash
, p
- (spec
+ nslash
));
596 SET_RESULT(path
, p
- spec
, specLen
- (p
- spec
));
599 SET_RESULT(auth
, nslash
, specLen
- nslash
);
600 SET_RESULT(path
, 0, -1);
604 //----------------------------------------------------------------------------
605 // nsStdURLParser implementation
606 //----------------------------------------------------------------------------
608 void nsStdURLParser::ParseAfterScheme(const char* spec
, int32_t specLen
,
609 uint32_t* authPos
, int32_t* authLen
,
610 uint32_t* pathPos
, int32_t* pathLen
) {
611 MOZ_ASSERT(specLen
>= 0, "unexpected");
613 uint32_t nslash
= CountConsecutiveSlashes(spec
, specLen
);
615 // search for the end of the authority section
616 const char* end
= spec
+ specLen
;
618 for (p
= spec
+ nslash
; p
< end
; ++p
) {
619 if (strchr("/?#;", *p
)) break;
625 // spec = (//)<auth><path>
626 SET_RESULT(auth
, nslash
, p
- (spec
+ nslash
));
627 SET_RESULT(path
, p
- spec
, specLen
- (p
- spec
));
630 SET_RESULT(auth
, nslash
, specLen
- nslash
);
631 SET_RESULT(path
, 0, -1);
636 SET_RESULT(auth
, 0, -1);
637 SET_RESULT(path
, 0, specLen
);
640 // spec = ///[/]<path>
641 SET_RESULT(auth
, 2, 0);
642 SET_RESULT(path
, 2, specLen
- 2);