update copyright date
[gnash.git] / libbase / URL.cpp
blob3b7d027a0435ce5060c589a69cdec07d69df7b9e
1 //
2 // Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010,
3 // 2011 Free Software Foundation, Inc
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software
17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 #include <iostream>
20 #include "URL.h"
22 #include <string>
23 #include <cstring>
24 #include <vector>
25 #include <cassert>
26 #include <sstream>
27 #include <algorithm>
28 #include <cerrno>
29 #include <GnashException.h>
31 // This is for getcwd(2)
33 #if !defined(_WIN32) && !defined(WIN32)
34 # include <unistd.h>
35 #else
36 # include <direct.h>
37 #endif
39 #include <boost/tokenizer.hpp>
40 #include <boost/scoped_array.hpp>
42 namespace gnash {
44 URL::URL(const std::string& relative_url, const URL& baseurl)
46 init_relative(relative_url, baseurl);
49 // only for UNIX
50 void
51 URL::normalize_path(std::string& path)
54 #if defined(_WIN32) || defined(WIN32) || defined(__OS2__) || defined(__amigaos4__)
55 return;
56 #endif
58 if (path.empty() || path[0] != '/') {
59 throw gnash::GnashException("invalid url");
62 std::vector<std::string> components;
64 std::string::iterator prev=path.begin();
65 for (std::string::iterator curr = prev + 1;
66 curr != path.end();
67 ++curr ) {
68 if ( *curr == '/') {
69 std::string comp = std::string(prev+1, curr);
70 //cerr << "comp:" << comp << endl;
71 prev = curr;
73 if ( comp == "" || comp == "." ) continue;
74 if ( comp == ".." && components.size() )
75 components.pop_back();
76 else components.push_back(comp);
79 // add last component
80 components.push_back(std::string(prev+1, path.end()));
82 path = "";
83 for (std::vector<std::string>::const_iterator i=components.begin(),
84 e=components.end();
85 i!=e; ++i) {
86 path += "/" + *i;
91 void
92 URL::init_absolute(const std::string& in)
94 // Find protocol
95 std::string::size_type pos = in.find("://");
96 if ( pos != std::string::npos ) {
97 // copy initial part to protocol
98 _proto = in.substr(0, pos);
100 // advance input pointer to past the :// part
101 pos += 3;
102 if ( pos == in.size() ) {
103 std::cerr << "protocol-only url!" << std::endl;
104 throw gnash::GnashException("protocol-only url");
107 // Find host
108 std::string::size_type pos1 = in.find('/', pos);
109 if ( pos1 == std::string::npos ) {
110 // no slashes ? all hostname, I presume
111 _host = in.substr(pos);
112 _path = "/";
114 // Extract the port number from the hostname, if any
115 split_port_from_host();
117 return;
120 // copy hostname
121 _host = in.substr(pos, pos1-pos);
123 // next come path
124 _path = in.substr(pos1);
125 } else {
126 _proto = "file";
127 _path = in;
130 // Extract anchor from path, if any
131 split_anchor_from_path();
133 // Extract the port number from the hostname, if any
134 split_port_from_host();
136 split_querystring_from_path();
138 normalize_path(_path);
142 URL::URL(const std::string& absolute_url)
144 //cerr << "URL(" << absolute_url << ")" << endl;
145 if ( ( absolute_url.size() && absolute_url[0] == '/' )
146 || absolute_url.find("://") != std::string::npos
147 || ( absolute_url.size() > 1 && absolute_url[1] == ':' ) //for win32
148 || ( absolute_url.size() > 2 && absolute_url.find(':',2) != std::string::npos ) //for aos4
150 //std::cerr << "It's absolute" << std::endl;
151 init_absolute(absolute_url);
153 } else {
154 const size_t incr = 1024;
155 // When does it get silly?
156 const size_t maxSize = 4096;
158 boost::scoped_array<char> buf;
159 char* dir = 0;
160 size_t bufSize = 0;
162 // This just assumes a failure in getcwd is a name-length error,
163 // though that perhaps isn't the case.
164 while (!dir) {
165 bufSize += incr;
166 buf.reset(new char[bufSize]);
167 dir = getcwd(buf.get(), bufSize);
168 if (bufSize >= maxSize) break;
171 if (!dir) {
172 std::stringstream err;
173 err << "getcwd failed: " << std::strerror(errno);
174 throw gnash::GnashException(err.str());
177 std::string currentDir(buf.get());
178 currentDir.append("/");
179 URL cwd(currentDir);
180 init_relative(absolute_url, cwd);
184 void
185 URL::init_relative(const std::string& relative_url, const URL& baseurl)
188 // If relative url starts with an hash, it's just
189 // an anchor change
190 if ( relative_url[0] == '#' ){
191 _proto = baseurl._proto;
192 _host = baseurl._host;
193 _port= baseurl._port;
194 _path = baseurl._path;
195 _anchor = relative_url.substr(1);
196 return;
199 // If has a protocol, call absolute_url ctor
200 if ( relative_url.find("://") != std::string::npos ) {
201 init_absolute(relative_url);
202 return;
205 // use protocol, port and host from baseurl
206 _proto = baseurl._proto;
207 _host = baseurl._host;
208 _port = baseurl._port;
210 if ( relative_url.size() && relative_url[0] == '/' ) {
211 // get path from here
212 _path = relative_url;
213 } else { // path-relative
214 std::string in = relative_url;
216 // see how many dirs we want to take
217 // off the baseurl path
218 int dirsback=0;
219 std::string::size_type pos;
220 while ( ( pos = in.find("../") ) == 0 ) {
221 ++dirsback;
222 pos+=3;
223 while (in[pos] == '/') {
224 ++pos;
226 in = in.substr(pos);
229 // find dirsback'th slash from end of
230 // baseurl path
231 std::string basedir = baseurl._path.substr(0,
232 baseurl._path.find_last_of("/")+1);
234 // for WIN32
235 if (basedir == "") {
236 basedir = baseurl._path.substr(0,
237 baseurl._path.find_last_of("\\")+1);
240 #ifndef __amigaos4__
241 assert(basedir[0] == '/'
242 || basedir[1] == ':'); // for WIN32
243 #ifndef __OS2__
244 // On OS/2 - a filepath such as x:file.swf is acceptable.......
245 assert(*(basedir.rbegin()) == '/'
246 || *(basedir.rbegin()) == '\\'); // for WIN32
247 #endif
248 #endif
249 std::string::size_type lpos = basedir.size()-1;
250 for (int i=0; i<dirsback; ++i) {
251 if ( lpos == 0 ) break;
252 std::string::size_type pos = basedir.rfind('/', lpos-1);
253 // no more slashes found, break and set at 1
254 if ( pos == std::string::npos ) lpos = 1;
255 else lpos = pos;
257 basedir.resize(lpos+1);
259 // get dirname from basurl path
260 _path = basedir + in;
264 split_anchor_from_path();
266 split_querystring_from_path();
268 normalize_path(_path);
273 std::string
274 URL::str() const
276 std::string ret = _proto + "://" + _host;
278 if (!_port.empty()) {
279 ret += ":" + _port;
282 ret += _path;
284 if (!_querystring.empty()) {
285 ret += "?" + _querystring;
288 if (!_anchor.empty()) {
289 ret += "#" + _anchor;
291 return ret;
294 void
295 URL::split_anchor_from_path()
297 assert(_anchor == "");
299 // Extract anchor from path, if any
300 std::string::size_type hashpos = _path.find('#');
301 if ( hashpos != std::string::npos ) {
302 _anchor = _path.substr(hashpos+1);
303 _path.erase(hashpos);
307 void
308 URL::split_port_from_host()
310 assert(_port == "");
312 // Extract anchor from path, if any
313 std::string::size_type hashpos = _host.find(':');
314 if ( hashpos != std::string::npos ) {
315 _port = _host.substr(hashpos+1);
316 _host.erase(hashpos);
320 void
321 URL::split_querystring_from_path()
323 assert(_querystring == "");
325 // extract the parameters from the URL
327 std::string::size_type qmpos = _path.find("?");
328 if (qmpos == std::string::npos) {
329 // no query string
330 return;
333 _querystring = _path.substr(qmpos + 1);
335 // update _path
336 _path.erase(qmpos);
340 void
341 URL::parse_querystring(const std::string& query_string,
342 std::map<std::string, std::string>& target_map)
345 if ( query_string.empty() ) return; // nothing to do
347 std::string qstring=query_string;;
349 if ( qstring[0] == '?' ) {
350 qstring=qstring.substr(1);
353 typedef boost::char_separator<char> Sep;
354 typedef boost::tokenizer< Sep > Tok;
355 Tok t1(qstring, Sep("&"));
356 for(Tok::iterator tit=t1.begin(); tit!=t1.end(); ++tit) {
357 const std::string& nameval = *tit;
359 std::string name;
360 std::string value;
362 size_t eq = nameval.find("=");
363 if ( eq == std::string::npos ) {
364 name = nameval;
365 } else {
366 name = nameval.substr(0, eq);
367 value = nameval.substr(eq+1);
370 decode(name);
371 decode(value);
373 target_map[name] = value;
378 void
379 URL::encode(std::string& input)
381 const std::string escapees = " \"#$%&+,/:;<=>?@[\\]^`{|}~_.!-(')";
382 const std::string hexdigits = "0123456789ABCDEF";
384 for (unsigned int i=0;i<input.length(); i++) {
385 unsigned c = input[i] & 0xFF; // ensure value is 0-255 not -ve
387 if (c < 32 || c > 126 || escapees.find((char)c) != std::string::npos) {
388 input[i] = '%';
389 input.insert(++i, hexdigits.substr(c >> 4, 1));
390 input.insert(++i, hexdigits.substr(c & 0xF, 1));
391 } else if ( c == ' ' ) {
392 input[i] = '+';
397 std::string
398 URL::encode(const std::string& str)
400 std::string escapestring(str);
401 encode(escapestring);
402 return escapestring;
405 void
406 URL::decode(std::string& input)
408 int hexcode;
410 for (unsigned int i=0; i<input.length(); i++) {
411 if (input[i] == '%' && (input.length() > i + 2) &&
412 isxdigit(input[i+1]) && isxdigit(input[i+2])) {
413 input[i+1] = toupper(input[i+1]);
414 input[i+2] = toupper(input[i+2]);
415 if (isdigit(input[i+1])) {
416 hexcode = (input[i+1] - '0') * 16;
417 } else {
418 hexcode = (input[i+1] - 'A' + 10) * 16;
421 if (isdigit(input[i+2])) {
422 hexcode += (input[i+2] - '0');
423 } else {
424 hexcode += (input[i+2] - 'A' + 10);
426 input[i] = (char)hexcode;
427 input.erase(i+1, 2);
428 } else if ( input[i] == '+' ) {
429 input[i] = ' ';
434 std::ostream&
435 operator<< (std::ostream& o, const URL& u)
437 return o << u.str();
440 } // end of gnash namespace
442 // local Variables:
443 // mode: C++
444 // indent-tabs-mode: nil
445 // End: