1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 #ifndef EXTENSIONS_COMMON_URL_PATTERN_H_
5 #define EXTENSIONS_COMMON_URL_PATTERN_H_
14 // A pattern that can be used to match URLs. A URLPattern is a very restricted
15 // subset of URL syntax:
17 // <url-pattern> := <scheme>://<host><port><path> | '<all_urls>'
18 // <scheme> := '*' | 'http' | 'https' | 'file' | 'ftp' | 'chrome' |
19 // 'chrome-extension' | 'filesystem'
20 // <host> := '*' | '*.' <anychar except '/' and '*'>+
21 // <port> := [':' ('*' | <port number between 0 and 65535>)]
22 // <path> := '/' <any chars>
24 // * Host is not used when the scheme is 'file'.
25 // * The path can have embedded '*' characters which act as glob wildcards.
26 // * '<all_urls>' is a special pattern that matches any URL that contains a
27 // valid scheme (as specified by valid_schemes_).
28 // * The '*' scheme pattern excludes file URLs.
30 // Examples of valid patterns:
33 // - https://*.google.com/foo*bar
35 // - http://127.0.0.1/*
37 // Examples of invalid patterns:
38 // - http://* -- path not specified
39 // - http://*foo/bar -- * not allowed as substring of host component
40 // - http://foo.*.bar/baz -- * must be first component
41 // - http:/bar -- scheme separator not found
42 // - foo://* -- invalid scheme
43 // - chrome:// -- we don't support chrome internal URLs
46 // A collection of scheme bitmasks for use with valid_schemes.
50 SCHEME_HTTPS
= 1 << 1,
53 SCHEME_CHROMEUI
= 1 << 4,
54 SCHEME_EXTENSION
= 1 << 5,
55 SCHEME_FILESYSTEM
= 1 << 6,
58 // SCHEME_ALL will match every scheme, including chrome://, chrome-
59 // extension://, about:, etc. Because this has lots of security
60 // implications, third-party extensions should usually not be able to get
61 // access to URL patterns initialized this way. If there is a reason
62 // for violating this general rule, document why this it safe.
66 // Error codes returned from Parse().
69 PARSE_ERROR_MISSING_SCHEME_SEPARATOR
,
70 PARSE_ERROR_INVALID_SCHEME
,
71 PARSE_ERROR_WRONG_SCHEME_SEPARATOR
,
72 PARSE_ERROR_EMPTY_HOST
,
73 PARSE_ERROR_INVALID_HOST_WILDCARD
,
74 PARSE_ERROR_EMPTY_PATH
,
75 PARSE_ERROR_INVALID_PORT
,
76 PARSE_ERROR_INVALID_HOST
,
80 // The <all_urls> string pattern.
81 static const char kAllUrlsPattern
[];
83 // Returns true if the given |scheme| is considered valid for extensions.
84 static bool IsValidSchemeForExtensions(const std::string
& scheme
);
86 explicit URLPattern(int valid_schemes
);
88 // Convenience to construct a URLPattern from a string. If the string is not
89 // known ahead of time, use Parse() instead, which returns success or failure.
90 URLPattern(int valid_schemes
, const std::string
& pattern
);
95 bool operator<(const URLPattern
& other
) const;
96 bool operator>(const URLPattern
& other
) const;
97 bool operator==(const URLPattern
& other
) const;
99 // Initializes this instance by parsing the provided string. Returns
100 // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On
101 // failure, this instance will have some intermediate values and is in an
103 ParseResult
Parse(const std::string
& pattern_str
);
105 // Gets the bitmask of valid schemes.
106 int valid_schemes() const { return valid_schemes_
; }
107 void SetValidSchemes(int valid_schemes
);
109 // Gets the host the pattern matches. This can be an empty string if the
110 // pattern matches all hosts (the input was <scheme>://*/<whatever>).
111 const std::string
& host() const { return host_
; }
112 void SetHost(const std::string
& host
);
114 // Gets whether to match subdomains of host().
115 bool match_subdomains() const { return match_subdomains_
; }
116 void SetMatchSubdomains(bool val
);
118 // Gets the path the pattern matches with the leading slash. This can have
119 // embedded asterisks which are interpreted using glob rules.
120 const std::string
& path() const { return path_
; }
121 void SetPath(const std::string
& path
);
123 // Returns true if this pattern matches all urls.
124 bool match_all_urls() const { return match_all_urls_
; }
125 void SetMatchAllURLs(bool val
);
127 // Sets the scheme for pattern matches. This can be a single '*' if the
128 // pattern matches all valid schemes (as defined by the valid_schemes_
129 // property). Returns false on failure (if the scheme is not valid).
130 bool SetScheme(const std::string
& scheme
);
131 // Note: You should use MatchesScheme() instead of this getter unless you
132 // absolutely need the exact scheme. This is exposed for testing.
133 const std::string
& scheme() const { return scheme_
; }
135 // Returns true if the specified scheme can be used in this URL pattern, and
136 // false otherwise. Uses valid_schemes_ to determine validity.
137 bool IsValidScheme(const std::string
& scheme
) const;
139 // Returns true if this instance matches the specified URL.
140 bool MatchesURL(const GURL
& test
) const;
142 // Returns true if this instance matches the specified security origin.
143 bool MatchesSecurityOrigin(const GURL
& test
) const;
145 // Returns true if |test| matches our scheme.
146 // Note that if test is "filesystem", this may fail whereas MatchesURL
147 // may succeed. MatchesURL is smart enough to look at the inner_url instead
148 // of the outer "filesystem:" part.
149 bool MatchesScheme(const std::string
& test
) const;
151 // Returns true if |test| matches our host.
152 bool MatchesHost(const std::string
& test
) const;
153 bool MatchesHost(const GURL
& test
) const;
155 // Returns true if |test| matches our path.
156 bool MatchesPath(const std::string
& test
) const;
158 // Returns true if the pattern is vague enough that it implies all hosts,
160 // This is an expensive method, and should be used sparingly!
161 // You should probably use URLPatternSet::ShouldWarnAllHosts(), which is
163 bool ImpliesAllHosts() const;
165 // Returns true if the pattern only matches a single origin. The pattern may
167 bool MatchesSingleOrigin() const;
169 // Sets the port. Returns false if the port is invalid.
170 bool SetPort(const std::string
& port
);
171 const std::string
& port() const { return port_
; }
173 // Returns a string representing this instance.
174 const std::string
& GetAsString() const;
176 // Determines whether there is a URL that would match this instance and
177 // another instance. This method is symmetrical: Calling
178 // other.OverlapsWith(this) would result in the same answer.
179 bool OverlapsWith(const URLPattern
& other
) const;
181 // Returns true if this pattern matches all possible URLs that |other| can
182 // match. For example, http://*.google.com encompasses http://www.google.com.
183 bool Contains(const URLPattern
& other
) const;
185 // Converts this URLPattern into an equivalent set of URLPatterns that don't
186 // use a wildcard in the scheme component. If this URLPattern doesn't use a
187 // wildcard scheme, then the returned set will contain one element that is
188 // equivalent to this instance.
189 std::vector
<URLPattern
> ConvertToExplicitSchemes() const;
191 static bool EffectiveHostCompare(const URLPattern
& a
, const URLPattern
& b
) {
192 if (a
.match_all_urls_
&& b
.match_all_urls_
)
194 return a
.host_
.compare(b
.host_
) < 0;
197 // Used for origin comparisons in a std::set.
198 class EffectiveHostCompareFunctor
{
200 bool operator()(const URLPattern
& a
, const URLPattern
& b
) const {
201 return EffectiveHostCompare(a
, b
);
205 // Get an error string for a ParseResult.
206 static const char* GetParseResultString(URLPattern::ParseResult parse_result
);
209 // Returns true if any of the |schemes| items matches our scheme.
210 bool MatchesAnyScheme(const std::vector
<std::string
>& schemes
) const;
212 // Returns true if all of the |schemes| items matches our scheme.
213 bool MatchesAllSchemes(const std::vector
<std::string
>& schemes
) const;
215 bool MatchesSecurityOriginHelper(const GURL
& test
) const;
217 // Returns true if our port matches the |port| pattern (it may be "*").
218 bool MatchesPortPattern(const std::string
& port
) const;
220 // If the URLPattern contains a wildcard scheme, returns a list of
221 // equivalent literal schemes, otherwise returns the current scheme.
222 std::vector
<std::string
> GetExplicitSchemes() const;
224 // A bitmask containing the schemes which are considered valid for this
225 // pattern. Parse() uses this to decide whether a pattern contains a valid
229 // True if this is a special-case "<all_urls>" pattern.
230 bool match_all_urls_
;
232 // The scheme for the pattern.
235 // The host without any leading "*" components.
238 // Whether we should match subdomains of the host. This is true if the first
239 // component of the pattern's host was "*".
240 bool match_subdomains_
;
245 // The path to match. This is everything after the host of the URL, or
246 // everything after the scheme in the case of file:// URLs.
249 // The path with "?" and "\" characters escaped for use with the
250 // MatchPattern() function.
251 std::string path_escaped_
;
253 // A string representing this URLPattern.
254 mutable std::string spec_
;
257 std::ostream
& operator<<(std::ostream
& out
, const URLPattern
& url_pattern
);
259 typedef std::vector
<URLPattern
> URLPatternList
;
261 #endif // EXTENSIONS_COMMON_URL_PATTERN_H_