1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/logging.h"
6 #include "url/third_party/mozilla/url_parse.h"
7 #include "url/url_file.h"
8 #include "url/url_parse_internal.h"
10 // Interesting IE file:isms...
13 // ========================= ==============================
14 // file:/foo/bar file:///foo/bar
15 // The result here seems totally invalid!?!? This isn't UNC.
18 // file:// or any other number of slashes
19 // IE6 doesn't do anything at all if you click on this link. No error:
20 // nothing. IE6's history system seems to always color this link, so I'm
21 // guessing that it maps internally to the empty URL.
24 // When on a file: URL source page, this link will work. When over HTTP,
25 // the file: URL will appear in the status bar but the link will not work
26 // (security restriction for all file URLs).
28 // file:foo/ file:foo/ (invalid?!?!?)
29 // file:/foo/ file:///foo/ (invalid?!?!?)
30 // file://foo/ file://foo/ (UNC to server "foo")
31 // file:///foo/ file:///foo/ (invalid, seems to be a file)
32 // file:////foo/ file://foo/ (UNC to server "foo")
33 // Any more than four slashes is also treated as UNC.
35 // file:C:/ file://C:/
36 // file:/C:/ file://C:/
37 // The number of slashes after "file:" don't matter if the thing following
38 // it looks like an absolute drive path. Also, slashes and backslashes are
39 // equally valid here.
45 // A subcomponent of DoInitFileURL, the input of this function should be a UNC
46 // path name, with the index of the first character after the slashes following
47 // the scheme given in |after_slashes|. This will initialize the host, path,
48 // query, and ref, and leave the other output components untouched
49 // (DoInitFileURL handles these for us).
50 template<typename CHAR
>
51 void DoParseUNC(const CHAR
* spec
,
55 int next_slash
= FindNextSlash(spec
, after_slashes
, spec_len
);
56 if (next_slash
== spec_len
) {
57 // No additional slash found, as in "file://foo", treat the text as the
58 // host with no path (this will end up being UNC to server "foo").
59 int host_len
= spec_len
- after_slashes
;
61 parsed
->host
= Component(after_slashes
, host_len
);
69 // See if we have something that looks like a path following the first
70 // component. As in "file://localhost/c:/", we get "c:/" out. We want to
71 // treat this as a having no host but the path given. Works on Windows only.
72 if (DoesBeginWindowsDriveSpec(spec
, next_slash
+ 1, spec_len
)) {
74 ParsePathInternal(spec
, MakeRange(next_slash
, spec_len
),
75 &parsed
->path
, &parsed
->query
, &parsed
->ref
);
80 // Otherwise, everything up until that first slash we found is the host name,
81 // which will end up being the UNC host. For example "file://foo/bar.txt"
82 // will get a server name of "foo" and a path of "/bar". Later, on Windows,
83 // this should be treated as the filename "\\foo\bar.txt" in proper UNC
85 int host_len
= next_slash
- after_slashes
;
87 parsed
->host
= MakeRange(after_slashes
, next_slash
);
90 if (next_slash
< spec_len
) {
91 ParsePathInternal(spec
, MakeRange(next_slash
, spec_len
),
92 &parsed
->path
, &parsed
->query
, &parsed
->ref
);
98 // A subcomponent of DoParseFileURL, the input should be a local file, with the
99 // beginning of the path indicated by the index in |path_begin|. This will
100 // initialize the host, path, query, and ref, and leave the other output
101 // components untouched (DoInitFileURL handles these for us).
102 template<typename CHAR
>
103 void DoParseLocalFile(const CHAR
* spec
,
107 parsed
->host
.reset();
108 ParsePathInternal(spec
, MakeRange(path_begin
, spec_len
),
109 &parsed
->path
, &parsed
->query
, &parsed
->ref
);
112 // Backend for the external functions that operates on either char type.
113 // Handles cases where there is a scheme, but also when handed the first
114 // character following the "file:" at the beginning of the spec. If so,
115 // this is usually a slash, but needn't be; we allow paths like "file:c:\foo".
116 template<typename CHAR
>
117 void DoParseFileURL(const CHAR
* spec
, int spec_len
, Parsed
* parsed
) {
118 DCHECK(spec_len
>= 0);
120 // Get the parts we never use for file URLs out of the way.
121 parsed
->username
.reset();
122 parsed
->password
.reset();
123 parsed
->port
.reset();
125 // Many of the code paths don't set these, so it's convenient to just clear
126 // them. We'll write them in those cases we need them.
127 parsed
->query
.reset();
130 // Strip leading & trailing spaces and control characters.
132 TrimURL(spec
, &begin
, &spec_len
);
134 // Find the scheme, if any.
135 int num_slashes
= CountConsecutiveSlashes(spec
, begin
, spec_len
);
139 // See how many slashes there are. We want to handle cases like UNC but also
140 // "/c:/foo". This is when there is no scheme, so we can allow pages to do
141 // links like "c:/foo/bar" or "//foo/bar". This is also called by the
142 // relative URL resolver when it determines there is an absolute URL, which
143 // may give us input like "/c:/foo".
144 after_slashes
= begin
+ num_slashes
;
145 if (DoesBeginWindowsDriveSpec(spec
, after_slashes
, spec_len
)) {
146 // Windows path, don't try to extract the scheme (for example, "c:\foo").
147 parsed
->scheme
.reset();
148 after_scheme
= after_slashes
;
149 } else if (DoesBeginUNCPath(spec
, begin
, spec_len
, false)) {
150 // Windows UNC path: don't try to extract the scheme, but keep the slashes.
151 parsed
->scheme
.reset();
152 after_scheme
= begin
;
156 // ExtractScheme doesn't understand the possibility of filenames with
157 // colons in them, in which case it returns the entire spec up to the
158 // colon as the scheme. So handle /foo.c:5 as a file but foo.c:5 as
159 // the foo.c: scheme.
161 ExtractScheme(&spec
[begin
], spec_len
- begin
, &parsed
->scheme
)) {
162 // Offset the results since we gave ExtractScheme a substring.
163 parsed
->scheme
.begin
+= begin
;
164 after_scheme
= parsed
->scheme
.end() + 1;
166 // No scheme found, remember that.
167 parsed
->scheme
.reset();
168 after_scheme
= begin
;
172 // Handle empty specs ones that contain only whitespace or control chars,
173 // or that are just the scheme (for example "file:").
174 if (after_scheme
== spec_len
) {
175 parsed
->host
.reset();
176 parsed
->path
.reset();
180 num_slashes
= CountConsecutiveSlashes(spec
, after_scheme
, spec_len
);
181 after_slashes
= after_scheme
+ num_slashes
;
183 // Check whether the input is a drive again. We checked above for windows
184 // drive specs, but that's only at the very beginning to see if we have a
185 // scheme at all. This test will be duplicated in that case, but will
186 // additionally handle all cases with a real scheme such as "file:///C:/".
187 if (!DoesBeginWindowsDriveSpec(spec
, after_slashes
, spec_len
) &&
189 // Anything not beginning with a drive spec ("c:\") on Windows is treated
190 // as UNC, with the exception of three slashes which always means a file.
191 // Even IE7 treats file:///foo/bar as "/foo/bar", which then fails.
192 DoParseUNC(spec
, after_slashes
, spec_len
, parsed
);
196 // file: URL with exactly 2 slashes is considered to have a host component.
197 if (num_slashes
== 2) {
198 DoParseUNC(spec
, after_slashes
, spec_len
, parsed
);
203 // Easy and common case, the full path immediately follows the scheme
204 // (modulo slashes), as in "file://c:/foo". Just treat everything from
205 // there to the end as the path. Empty hosts have 0 length instead of -1.
206 // We include the last slash as part of the path if there is one.
207 DoParseLocalFile(spec
,
208 num_slashes
> 0 ? after_scheme
+ num_slashes
- 1 : after_scheme
,
214 void ParseFileURL(const char* url
, int url_len
, Parsed
* parsed
) {
215 DoParseFileURL(url
, url_len
, parsed
);
218 void ParseFileURL(const base::char16
* url
, int url_len
, Parsed
* parsed
) {
219 DoParseFileURL(url
, url_len
, parsed
);