1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Functions for canonicalizing "path" URLs. Not to be confused with the path
6 // of a URL, these are URLs that have no authority section, only a path. For
7 // example, "javascript:" and "data:".
9 #include "url/url_canon.h"
10 #include "url/url_canon_internal.h"
16 // Canonicalize the given |component| from |source| into |output| and
17 // |new_component|. If |separator| is non-zero, it is pre-pended to |output|
18 // prior to the canonicalized component; i.e. for the '?' or '#' characters.
19 template<typename CHAR
, typename UCHAR
>
20 bool DoCanonicalizePathComponent(const CHAR
* source
,
21 const Component
& component
,
24 Component
* new_component
) {
26 if (component
.is_valid()) {
28 output
->push_back(separator
);
29 // Copy the path using path URL's more lax escaping rules (think for
30 // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all
31 // ASCII characters alone. This helps readability of JavaStript.
32 new_component
->begin
= output
->length();
33 int end
= component
.end();
34 for (int i
= component
.begin
; i
< end
; i
++) {
35 UCHAR uch
= static_cast<UCHAR
>(source
[i
]);
36 if (uch
< 0x20 || uch
>= 0x80)
37 success
&= AppendUTF8EscapedChar(source
, &i
, end
, output
);
39 output
->push_back(static_cast<char>(uch
));
41 new_component
->len
= output
->length() - new_component
->begin
;
44 new_component
->reset();
49 template <typename CHAR
, typename UCHAR
>
50 bool DoCanonicalizePathURL(const URLComponentSource
<CHAR
>& source
,
54 // Scheme: this will append the colon.
55 bool success
= CanonicalizeScheme(source
.scheme
, parsed
.scheme
,
56 output
, &new_parsed
->scheme
);
58 // We assume there's no authority for path URLs. Note that hosts should never
60 new_parsed
->username
.reset();
61 new_parsed
->password
.reset();
62 new_parsed
->host
.reset();
63 new_parsed
->port
.reset();
64 // We allow path URLs to have the path, query and fragment components, but we
65 // will canonicalize each of the via the weaker path URL rules.
66 success
&= DoCanonicalizePathComponent
<CHAR
, UCHAR
>(
67 source
.path
, parsed
.path
, '\0', output
, &new_parsed
->path
);
68 success
&= DoCanonicalizePathComponent
<CHAR
, UCHAR
>(
69 source
.query
, parsed
.query
, '?', output
, &new_parsed
->query
);
70 success
&= DoCanonicalizePathComponent
<CHAR
, UCHAR
>(
71 source
.ref
, parsed
.ref
, '#', output
, &new_parsed
->ref
);
78 bool CanonicalizePathURL(const char* spec
,
83 return DoCanonicalizePathURL
<char, unsigned char>(
84 URLComponentSource
<char>(spec
), parsed
, output
, new_parsed
);
87 bool CanonicalizePathURL(const base::char16
* spec
,
92 return DoCanonicalizePathURL
<base::char16
, base::char16
>(
93 URLComponentSource
<base::char16
>(spec
), parsed
, output
, new_parsed
);
96 bool ReplacePathURL(const char* base
,
97 const Parsed
& base_parsed
,
98 const Replacements
<char>& replacements
,
100 Parsed
* new_parsed
) {
101 URLComponentSource
<char> source(base
);
102 Parsed
parsed(base_parsed
);
103 SetupOverrideComponents(base
, replacements
, &source
, &parsed
);
104 return DoCanonicalizePathURL
<char, unsigned char>(
105 source
, parsed
, output
, new_parsed
);
108 bool ReplacePathURL(const char* base
,
109 const Parsed
& base_parsed
,
110 const Replacements
<base::char16
>& replacements
,
112 Parsed
* new_parsed
) {
113 RawCanonOutput
<1024> utf8
;
114 URLComponentSource
<char> source(base
);
115 Parsed
parsed(base_parsed
);
116 SetupUTF16OverrideComponents(base
, replacements
, &utf8
, &source
, &parsed
);
117 return DoCanonicalizePathURL
<char, unsigned char>(
118 source
, parsed
, output
, new_parsed
);