2 // Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Free Software
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
30 /// Uniform Resource Locator
32 /// This class is used to manage URLs.
39 friend std::ostream
& operator<< (std::ostream
&o
, const URL
& u
);
41 /// Construct an URL from the given absolute url string.
43 /// A relative URL will be considered a filesystem
44 /// path relative to the current working directory.
46 /// Throws std::runtime_error on error
48 URL(const std::string
& absolute_url
);
50 /// Construct an URL from the given relative url string,
51 /// using the given URL for resolving it.
53 /// Throws std::runtime_error on error
55 URL(const std::string
& relative_url
, const URL
& baseurl
);
57 /// Return the 'protocol' member of this URL, as a string
58 const std::string
& protocol() const { return _proto
; }
60 /// Return the 'hostname' member of this URL, as a string
62 /// NOTE: return the empty string if protocol() is "file"
64 const std::string
& hostname() const { return _host
; }
66 /// Return the 'port' member of this URL, as a string
68 /// NOTE: return the empty string if the port isn't specified,
69 /// as this is an optional field.
71 const std::string
& port() const { return _port
; }
73 /// Return the 'path' member of this URL, as a string
75 /// The returned path starts with '/'
76 const std::string
& path() const { return _path
; }
78 /// Return the 'anchor' member of this URL, as a string
80 /// The anchor is the string after the '#' character
82 const std::string
& anchor() const { return _anchor
; }
84 /// Return the 'querystring' member of this URL, as a string
86 /// The query is the string after the '?' character
88 const std::string
& querystring() const { return _querystring
; }
90 /// Set the 'querystring' member of this URL to a new value
92 void set_querystring(const std::string
& value
) { _querystring
= value
; }
94 /// Return the full absolute URL as a string.
96 /// TODO: make output operator and operator+ for strings
97 std::string
str() const;
99 /// Parse a query string filling the provided map
101 /// @param query_string
102 /// the url-escaped query string
103 /// (can include or not the starting question mark)
105 /// @param target_map
106 /// A standard map to put parsed values into.
107 /// Note: existing elements of the map will be replaced.
109 /// @todo url-unescape names and values
111 /// @todo supports duplicated keys (var=value1&var=value2)
113 static void parse_querystring(const std::string
& query_string
,
114 std::map
<std::string
, std::string
>& target_map
);
117 /// Encode a string to URL-encoded format
118 /// converting all dodgy characters to %AB hex sequences
120 /// Characters that need escaping are:
121 /// - ASCII control characters: 0-31 and 127
122 /// - Non-ASCII chars: 128-255
123 /// - URL syntax characters: $ & + , / : ; = ? @
124 /// - Unsafe characters: SPACE " < > # % { } | \ ^ ~ [ ] `
125 /// Encoding is a % followed by two hexadecimal characters, case insensitive.
126 /// See RFC1738 http://www.rfc-editor.org/rfc/rfc1738.txt,
127 /// Section 2.2 "URL Character Encoding Issues"
131 /// The input/output string
133 static void encode(std::string
& str
);
136 /// Encode a string to URL-encoded format
137 /// converting all dodgy characters to %AB hex sequences. This
138 /// merely uses the void encode() function on a new string.
143 /// An encoded version of the input string
144 static std::string
encode(const std::string
& str
);
147 /// Decode a string from URL-encoded format
148 /// converting all hexadecimal sequences to ASCII characters.
150 /// A sequence to convert is % followed by two case-independent hexadecimal
151 /// digits, which is replaced by the equivalent ASCII character.
152 /// See RFC1738 http://www.rfc-editor.org/rfc/rfc1738.txt,
153 /// Section 2.2 "URL Character Encoding Issues"
156 /// The input/output string
158 static void decode(std::string
& str
);
161 void init_absolute(const std::string
& absurl
);
163 void init_relative(const std::string
& relurl
, const URL
& baseurl
);
165 /// Extract anchor from path
166 void split_anchor_from_path();
168 /// Extract tcp/ip port from path
169 void split_port_from_host();
171 /// Extract and parse query string from path
172 void split_querystring_from_path();
174 /// Normalize a 'path' component of an url
176 /// Normalization currently include removal
177 /// of adjacent slashes, "./" dirs components
178 /// removal, and "/../" components resolution
180 void normalize_path(std::string
& path
);
187 std::string _querystring
;
190 DSOEXPORT
std::ostream
& operator<< (std::ostream
&o
, const URL
& u
);
192 } // end of gnash namespace