libbase/URL.cpp

   1 //
   2 //   Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010,
   3 //   2011 Free Software Foundation, Inc
   4 //
   5 // This program is free software; you can redistribute it and/or modify
   6 // it under the terms of the GNU General Public License as published by
   7 // the Free Software Foundation; either version 3 of the License, or
   8 // (at your option) any later version.
   9 //
  10 // This program is distributed in the hope that it will be useful,
  11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 // GNU General Public License for more details.
  14 //
  15 // You should have received a copy of the GNU General Public License
  16 // along with this program; if not, write to the Free Software
  17 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  18
  19 #include <iostream>
  20 #include "URL.h"
  21
  22 #include <string>
  23 #include <cstring>
  24 #include <vector>
  25 #include <cassert>
  26 #include <sstream>
  27 #include <algorithm>
  28 #include <cerrno>
  29 #include <GnashException.h>
  30
  31 // This is for getcwd(2)
  32
  33 #if !defined(_WIN32) && !defined(WIN32)
  34 # include <unistd.h>
  35 #else
  36 # include <direct.h>
  37 #endif
  38
  39 #include <boost/tokenizer.hpp>
  40 #include <boost/scoped_array.hpp>
  41
  42 namespace gnash {
  43
  44 URL::URL(const std::string& relative_url, const URL& baseurl)
  45 {
  46     init_relative(relative_url, baseurl);
  47 }
  48
  49 // only for UNIX
  50 void
  51 URL::normalize_path(std::string& path)
  52 {
  53
  54 #if defined(_WIN32) || defined(WIN32) || defined(__OS2__) || defined(__amigaos4__)
  55     return;
  56 #endif
  57
  58     if (path.empty() || path[0] != '/') {
  59         throw gnash::GnashException("invalid url");
  60     }
  61
  62     std::vector<std::string> components;
  63
  64     std::string::iterator prev=path.begin();
  65     for (std::string::iterator curr = prev + 1;
  66          curr != path.end();
  67          ++curr ) {
  68         if ( *curr == '/') {
  69             std::string comp = std::string(prev+1, curr);
  70             //cerr << "comp:" << comp << endl;
  71             prev = curr;
  72
  73             if ( comp == "" || comp == "." ) continue;
  74             if ( comp == ".." && components.size() )
  75                 components.pop_back();
  76             else components.push_back(comp);
  77         }
  78     }
  79     // add last component
  80     components.push_back(std::string(prev+1, path.end()));
  81
  82     path = "";
  83     for (std::vector<std::string>::const_iterator i=components.begin(),
  84              e=components.end();
  85          i!=e; ++i) {
  86         path += "/" + *i;
  87     }
  88
  89 }
  90
  91 void
  92 URL::init_absolute(const std::string& in)
  93 {
  94     // Find protocol
  95     std::string::size_type pos = in.find("://");
  96     if ( pos != std::string::npos ) {
  97         // copy initial part to protocol
  98         _proto = in.substr(0, pos);
  99
 100         // advance input pointer to past the :// part
 101         pos += 3;
 102         if ( pos == in.size() ) {
 103             std::cerr << "protocol-only url!" << std::endl;
 104             throw gnash::GnashException("protocol-only url");
 105         }
 106
 107         // Find host
 108         std::string::size_type pos1 = in.find('/', pos);
 109         if ( pos1 == std::string::npos ) {
 110             // no slashes ? all hostname, I presume
 111             _host = in.substr(pos);
 112             _path = "/";
 113
 114             // Extract the port number from the hostname, if any
 115             split_port_from_host();
 116
 117             return;
 118         }
 119
 120         // copy hostname
 121         _host = in.substr(pos, pos1-pos);
 122
 123         // next come path
 124         _path = in.substr(pos1);
 125     } else {
 126         _proto = "file";
 127         _path = in;
 128     }
 129
 130     // Extract anchor from path, if any
 131     split_anchor_from_path();
 132
 133     // Extract the port number from the hostname, if any
 134     split_port_from_host();
 135
 136     split_querystring_from_path();
 137
 138     normalize_path(_path);
 139 }
 140
 141
 142 URL::URL(const std::string& absolute_url)
 143 {
 144     //cerr << "URL(" << absolute_url << ")" << endl;
 145     if ( ( absolute_url.size() && absolute_url[0] == '/' )
 146          || absolute_url.find("://") != std::string::npos
 147          || ( absolute_url.size() > 1 && absolute_url[1] == ':' )        //for win32
 148          || ( absolute_url.size() > 2 && absolute_url.find(':',2) != std::string::npos ) //for aos4
 149         ) {
 150         //std::cerr << "It's absolute" << std::endl;
 151         init_absolute(absolute_url);
 152
 153     } else {
 154         const size_t incr = 1024;
 155         // When does it get silly?
 156         const size_t maxSize = 4096;
 157
 158         boost::scoped_array<char> buf;
 159         char* dir = 0;
 160         size_t bufSize = 0;
 161
 162         // This just assumes a failure in getcwd is a name-length error,
 163         // though that perhaps isn't the case.
 164         while (!dir) {
 165             bufSize += incr;
 166             buf.reset(new char[bufSize]);
 167             dir = getcwd(buf.get(), bufSize);
 168             if (bufSize >= maxSize) break;
 169         }
 170
 171         if (!dir) {
 172             std::stringstream err;
 173             err << "getcwd failed: " << std::strerror(errno);
 174             throw gnash::GnashException(err.str());
 175         }
 176
 177         std::string currentDir(buf.get());
 178         currentDir.append("/");
 179         URL cwd(currentDir);
 180         init_relative(absolute_url, cwd);
 181     }
 182 }
 183
 184 void
 185 URL::init_relative(const std::string& relative_url, const URL& baseurl)
 186 {
 187
 188     // If relative url starts with an hash, it's just
 189     // an anchor change
 190     if ( relative_url[0] == '#' ){
 191         _proto = baseurl._proto;
 192         _host = baseurl._host;
 193         _port= baseurl._port;
 194         _path = baseurl._path;
 195         _anchor = relative_url.substr(1);
 196         return;
 197     }
 198
 199     // If has a protocol, call absolute_url ctor
 200     if ( relative_url.find("://") != std::string::npos ) {
 201         init_absolute(relative_url);
 202         return;
 203     }
 204
 205     // use protocol, port and host from baseurl
 206     _proto = baseurl._proto;
 207     _host = baseurl._host;
 208     _port = baseurl._port;
 209
 210     if ( relative_url.size() && relative_url[0] == '/' ) {
 211         // get path from here
 212         _path = relative_url;
 213     } else { // path-relative
 214         std::string in = relative_url;
 215
 216         // see how many dirs we want to take
 217         // off the baseurl path
 218         int dirsback=0;
 219         std::string::size_type pos;
 220         while ( ( pos = in.find("../") ) == 0 ) {
 221             ++dirsback;
 222             pos+=3;
 223             while (in[pos] == '/') {
 224                 ++pos;
 225             }
 226             in = in.substr(pos);
 227         }
 228
 229         // find dirsback'th slash from end of
 230         // baseurl path
 231         std::string basedir = baseurl._path.substr(0,
 232                                       baseurl._path.find_last_of("/")+1);
 233
 234         // for WIN32
 235         if (basedir == "") {
 236             basedir = baseurl._path.substr(0,
 237                                       baseurl._path.find_last_of("\\")+1);
 238         }
 239
 240 #ifndef __amigaos4__
 241         assert(basedir[0] == '/'
 242                || basedir[1] == ':');  // for WIN32
 243 #ifndef __OS2__
 244         // On OS/2 - a filepath such as x:file.swf is acceptable.......
 245         assert(*(basedir.rbegin()) == '/'
 246                || *(basedir.rbegin()) == '\\');        // for WIN32
 247 #endif
 248 #endif
 249         std::string::size_type lpos =  basedir.size()-1;
 250         for (int i=0; i<dirsback; ++i) {
 251             if ( lpos == 0 ) break;
 252             std::string::size_type pos = basedir.rfind('/', lpos-1);
 253             // no more slashes found, break and set at 1
 254             if ( pos == std::string::npos ) lpos = 1;
 255             else lpos = pos;
 256         }
 257         basedir.resize(lpos+1);
 258
 259         // get dirname from basurl path
 260         _path = basedir + in;
 261
 262     }
 263
 264     split_anchor_from_path();
 265
 266     split_querystring_from_path();
 267
 268     normalize_path(_path);
 269
 270
 271 }
 272
 273 std::string
 274 URL::str() const
 275 {
 276     std::string ret = _proto + "://" + _host;
 277
 278     if (!_port.empty()) {
 279         ret += ":" + _port;
 280     }
 281
 282     ret += _path;
 283
 284     if (!_querystring.empty()) {
 285         ret += "?" + _querystring;
 286     }
 287
 288     if (!_anchor.empty()) {
 289         ret += "#" + _anchor;
 290     }
 291     return ret;
 292 }
 293
 294 void
 295 URL::split_anchor_from_path()
 296 {
 297     assert(_anchor == "");
 298
 299     // Extract anchor from path, if any
 300     std::string::size_type hashpos = _path.find('#');
 301     if ( hashpos != std::string::npos ) {
 302         _anchor = _path.substr(hashpos+1);
 303         _path.erase(hashpos);
 304     }
 305 }
 306
 307 void
 308 URL::split_port_from_host()
 309 {
 310     assert(_port == "");
 311
 312     // Extract anchor from path, if any
 313     std::string::size_type hashpos = _host.find(':');
 314     if ( hashpos != std::string::npos ) {
 315         _port = _host.substr(hashpos+1);
 316         _host.erase(hashpos);
 317     }
 318 }
 319
 320 void
 321 URL::split_querystring_from_path()
 322 {
 323     assert(_querystring == "");
 324
 325     // extract the parameters from the URL
 326
 327     std::string::size_type qmpos = _path.find("?");
 328     if (qmpos == std::string::npos) {
 329         // no query string
 330         return;
 331     }
 332
 333     _querystring = _path.substr(qmpos + 1);
 334
 335     // update _path
 336     _path.erase(qmpos);
 337
 338 }
 339
 340 void
 341 URL::parse_querystring(const std::string& query_string,
 342                        std::map<std::string, std::string>& target_map)
 343 {
 344
 345     if ( query_string.empty() ) return; // nothing to do
 346
 347     std::string qstring=query_string;;
 348
 349     if ( qstring[0] == '?' ) {
 350         qstring=qstring.substr(1);
 351     }
 352
 353     typedef boost::char_separator<char> Sep;
 354     typedef boost::tokenizer< Sep > Tok;
 355     Tok t1(qstring, Sep("&"));
 356     for(Tok::iterator tit=t1.begin(); tit!=t1.end(); ++tit) {
 357         const std::string& nameval = *tit;
 358
 359         std::string name;
 360         std::string value;
 361
 362         size_t eq = nameval.find("=");
 363         if ( eq == std::string::npos ) {
 364             name = nameval;
 365         } else {
 366             name = nameval.substr(0, eq);
 367             value = nameval.substr(eq+1);
 368         }
 369
 370         decode(name);
 371         decode(value);
 372
 373         target_map[name] = value;
 374     }
 375
 376 }
 377
 378 void
 379 URL::encode(std::string& input)
 380 {
 381     const std::string escapees = " \"#$%&+,/:;<=>?@[\\]^`{|}~_.!-(')";
 382     const std::string hexdigits = "0123456789ABCDEF";
 383
 384     for (unsigned int i=0;i<input.length(); i++) {
 385         unsigned c = input[i] & 0xFF;   // ensure value is 0-255 not -ve
 386
 387         if (c < 32 || c > 126 || escapees.find((char)c) != std::string::npos) {
 388             input[i] = '%';
 389             input.insert(++i, hexdigits.substr(c >> 4, 1));
 390             input.insert(++i, hexdigits.substr(c & 0xF, 1));
 391         } else if ( c == ' ' ) {
 392             input[i] = '+';
 393         }
 394     }
 395 }
 396
 397 std::string
 398 URL::encode(const std::string& str)
 399 {
 400     std::string escapestring(str);
 401     encode(escapestring);
 402     return escapestring;
 403 }
 404
 405 void
 406 URL::decode(std::string& input)
 407 {
 408     int hexcode;
 409
 410     for (unsigned int i=0; i<input.length(); i++) {
 411         if (input[i] == '%' && (input.length() > i + 2) &&
 412             isxdigit(input[i+1]) && isxdigit(input[i+2])) {
 413             input[i+1] = toupper(input[i+1]);
 414             input[i+2] = toupper(input[i+2]);
 415             if (isdigit(input[i+1])) {
 416                 hexcode = (input[i+1] - '0') * 16;
 417             } else {
 418                 hexcode = (input[i+1] - 'A' + 10) * 16;
 419             }
 420
 421             if (isdigit(input[i+2])) {
 422                 hexcode += (input[i+2] - '0');
 423             } else {
 424                 hexcode += (input[i+2] - 'A' + 10);
 425             }
 426             input[i] = (char)hexcode;
 427             input.erase(i+1, 2);
 428         } else if ( input[i] == '+' ) {
 429             input[i] = ' ';
 430         }
 431     }
 432 }
 433
 434 std::ostream&
 435 operator<< (std::ostream& o, const URL& u)
 436 {
 437     return o << u.str();
 438 }
 439
 440 } // end of gnash namespace
 441
 442 // local Variables:
 443 // mode: C++
 444 // indent-tabs-mode: nil
 445 // End: