lib/Damon/Request.cpp

   1 #include "Request.h"
   2 #include <boost/format.hpp>
   3 #include <boost/bind.hpp>
   4 #include <boost/thread/once.hpp>
   5 #include <Acari/ParsingHelpers.hpp>
   6 #include <Scorpion/Context.h>
   7 #include <Spin/Connection.h>
   8 #include <Spin/Connector.h>
   9 #include <Spin/Exceptions/HTTP.h>
  10 #include "Private/parseURL.h"
  11 #include "Session.h"
  12
  13 using namespace Acari;
  14
  15 namespace Damon
  16 {
  17         namespace
  18         {
  19                 template < typename C >
  20                 struct ValidateURL_
  21                 {
  22                         typedef C argument_type;
  23
  24                         ValidateURL_(const std::string & server, unsigned short port, std::string C::* p)
  25                                 : server_(server),
  26                                   port_(port),
  27                                   p_(p)
  28                         { /* no-op */ }
  29
  30                         bool operator()(const C & c) const
  31                         {
  32                                 using Private::parseURL;
  33
  34                                 std::string protocol;
  35                                 std::string server;
  36                                 boost::uint16_t port;
  37                                 std::string resource;
  38                                 std::string username;
  39                                 std::string password;
  40                                 boost::tie(protocol, server, port, resource, username, password) = parseURL(c.*p_);
  41
  42                                 return (server == server_) && (port_ == port);
  43                         }
  44
  45                         std::string server_;
  46                         unsigned short port_;
  47                         std::string C::* p_;
  48                 };
  49
  50                 template < typename C >
  51                 ValidateURL_< C > ValidateURL(const std::string & server, unsigned short port, std::string C::* p)
  52                 {
  53                         return ValidateURL_< C >(server, port, p);
  54                 }
  55         }
  56
  57         static unsigned long attribute_index__(0xFFFFFFFF);
  58         static boost::once_flag once_flag__(BOOST_ONCE_INIT);
  59
  60         void initAttributeIndex()
  61         {
  62                 assert(attribute_index__ == 0xFFFFFFFF);
  63                 attribute_index__ = Spin::Connection::allocateAttribute();
  64         }
  65
  66         Request::Request(const std::string & url, Method method/* = get__*/, AuthorizationMethod auth_method/* = none__*/)
  67                 : url_(url),
  68                   method_(method),
  69                   auth_method_(auth_method)
  70         { /* no-op */ }
  71
  72         Request::~Request()
  73         { /* no-op */ }
  74
  75         Response getResponse(boost::shared_ptr< Spin::Connection > connection)
  76         {
  77                 boost::call_once(initAttributeIndex, once_flag__);
  78                 bool retrying(false);
  79 retry:
  80                 /* HERE we should make the connection blocking in a scoped manner if it isn't already */
  81
  82                 std::vector< char > buffer;
  83
  84                 /* When we get here, we may have some data in the connection's
  85                  * attributes already, which we can get now */
  86                 boost::any & connection_attribute(connection->getAttribute(attribute_index__));
  87                 std::vector< char >::iterator where;
  88                 bool end_of_headers_found(false);
  89                 bool end_of_buffer_found(false);
  90                 boost::shared_ptr< Response > response;
  91                 do
  92                 {
  93                         if (connection_attribute.empty() /* no existing attribute */)
  94                         {       // parse the request, as it is new
  95                                 if (retrying || end_of_buffer_found || buffer.empty() || connection->poll())
  96                                 {
  97                                         std::vector< char > tmp_buffer(buffer);
  98                                         buffer.clear();
  99                                         std::pair< std::size_t, int > result(connection->read(buffer));
 100                                         switch (result.second /* reason */)             // this is a switch mainly for documentation purposes - it could have been an if, but I think this is clearer
 101                                         {
 102                                         case Spin::Connection::no_error__ :
 103                                         case Spin::Connection::should_retry__ :
 104                                         case Spin::Connection::should_read__ :
 105                                                 /* in any of these cases, we either have everything we came for or we
 106                                                  * should read some more. We might as well check whether we have
 107                                                  * everything we can for and retry only after that, because we will
 108                                                  * probably have either the complete response or have to allow the
 109                                                  * server a bit of time to push the rest of the data through. */
 110                                                 break;
 111                                         default :
 112                                                 throw std::runtime_error("Unexpected connection state"); // be more eloquent HERE
 113                                         }
 114                                         const std::size_t & size(result.first);
 115                                         buffer.resize(size);
 116                                         buffer.insert(buffer.begin(), tmp_buffer.begin(), tmp_buffer.end());
 117                                 }
 118                                 else
 119                                 { /* already have data to work with */ }
 120                                 boost::tie(response, where) = extractResponseHeader< Spin::Connection, Response, Spin::Exceptions::HTTP::UnsupportedProtocol >(connection, buffer.begin(), buffer.end());
 121                                 if (!response && where == buffer.begin())
 122                                 {
 123                                         /* the extraction errored out because the first line was incomplete.
 124                                          * If this is the case, we will store whatever we got in the connection
 125                                          * attributes and return. */
 126                                         break;
 127                                 }
 128                                 else
 129                                 { /* all is well */ }
 130                                 assert(response || where == buffer.end());
 131                         }
 132                         else
 133                         {
 134                                 std::vector< char > temp_buffer;
 135                                 boost::tie( response, temp_buffer, end_of_headers_found, end_of_buffer_found ) = boost::any_cast< boost::tuple< boost::shared_ptr< Response >, std::vector< char >, bool, bool > >(connection_attribute);
 136                                 buffer.insert(buffer.begin(), temp_buffer.begin(), temp_buffer.end());
 137                                 /* connection_attribute.clear(); ==> */ boost::any a; connection_attribute.swap(a);
 138                                 where = buffer.begin();
 139                         }
 140                 } while(!(response || where == buffer.end()));
 141                 /* from here on, white space is important as we need to count the number of
 142                  * carriage returns (13) or newlines (10). If we find two of them before
 143                  * finding a non-whitespace character (that is: two newlines or two carriage
 144                  * returns), the requests consists of only the header. If we find only one,
 145                  * there is a header before the end of the response, and there might be a
 146                  * body. */
 147                 end_of_buffer_found = false;
 148                 if (response) do
 149                 {
 150                         std::vector< char >::iterator whence(where);
 151                         whence = advanceThroughIgnorableWhiteSpace(whence, buffer.end());
 152                         if (moreHeaders(where, whence))
 153                         {
 154                                 where = whence;
 155                                 whence = findHeaderEnd(where, buffer.end());
 156                                 if (where != whence)
 157                                 {
 158                                         Details::Header header;
 159                                         boost::tie(header.name_, header.value_) = splitHeader< Spin::Exceptions::HTTP::InvalidHeader >(where, whence);
 160                                         response->header_fields_.push_back(header);
 161                                         where = whence;
 162                                 }
 163                                 else
 164                                 {       /* we're at the end of the current buffer, but more headers are still
 165                                          * to come. We'll break out of the loop (by setting end_of_buffer_found,
 166                                          * in some corner-cases, where will not be at buffer.end() but we will
 167                                          * still have found the end of the buffer) and store what we currently know
 168                                          * about the response in the connection attributes (the end of the headers
 169                                          * will not have been found, so this will happen automatically) */
 170                                         end_of_buffer_found = true;
 171                                 }
 172                         }
 173                         else // we have all of the header
 174                         {
 175                                 where = whence;
 176                                 end_of_headers_found = true;
 177                         }
 178                 } while (!end_of_headers_found && where != buffer.end() && !end_of_buffer_found);
 179                 if (end_of_headers_found)
 180                 {
 181                         /* When we get here, we don't know whether the response has a body.
 182                          * If it does, there is a Content-Length header among the headers
 183                          * that will contain the size of the body. */
 184                         Details::HeaderFields::const_iterator curr(response->header_fields_.begin());
 185                         Details::HeaderFields::const_iterator end(response->header_fields_.end());
 186                         while (curr != end && curr->name_ != "Content-Length") ++curr;
 187                         bool complete_body_found(false);
 188                         if (curr != end)
 189                         {
 190                                 std::size_t body_size(boost::lexical_cast< std::size_t >(curr->value_));
 191                                 std::vector< char >::iterator whence(where);
 192                                 /* When we get here, the "where" and "whence" iterators should both be
 193                                  * at the start of the body. We will advance "whence" to the end of the
 194                                  * body - which should be within the bounds of the buffer. */
 195                                 if (std::size_t(std::distance(whence, buffer.end())) < body_size)
 196                                 {
 197                                         connection_attribute = boost::make_tuple(response, std::vector< char >(where, buffer.end()), end_of_headers_found);
 198                                 }
 199                                 else
 200                                 {
 201                                         std::advance(whence, body_size);
 202                                         assert(response->body_.empty());
 203                                         response->body_.insert(response->body_.end(), where, whence);
 204                                         where = whence;
 205                                         complete_body_found = true;
 206                                 }
 207                         }
 208                         else
 209                         { /* request does not have a body */
 210                                 complete_body_found = true;
 211                         }
 212                         if (complete_body_found)
 213                         {
 214                                 /* Now, if whence is not at the end of the buffer, more requests may be
 215                                  * in the buffer. We need to handle those. */
 216                                 if (where != buffer.end())
 217                                 {
 218                                         connection_attribute = boost::make_tuple(boost::shared_ptr< Response >(), std::vector< char >(where, buffer.end()), false, end_of_buffer_found);
 219                                 }
 220                                 else
 221                                 { /* done */ }
 222                         }
 223                 }
 224                 else
 225                 {
 226                         connection_attribute = boost::make_tuple(response, std::vector< char >(where, buffer.end()), end_of_headers_found, end_of_buffer_found);
 227                         retrying = true; // force a read on the connection
 228                         goto retry; // tail recursion
 229                 }
 230
 231                 return Response(*response);
 232         }
 233
 234         /*DAMON_API */Response send(Session & session, const Request & request)
 235         {
 236                 using Private::parseURL;
 237
 238                 std::string protocol;
 239                 std::string server;
 240                 boost::uint16_t port;
 241                 std::string resource;
 242                 std::string username;
 243                 std::string password;
 244                 boost::tie(protocol, server, port, resource, username, password) = parseURL(request.url_);
 245                 bool secured(protocol == "https");
 246
 247                 if (secured)
 248                 {
 249                         if (!session.context_)
 250                         {
 251                                 session.context_ = new Scorpion::Context;
 252                         }
 253                         else
 254                         { /* already have a context */ }
 255                 }
 256                 else
 257                 { /* no security context required */ }
 258                 assert((secured && session.context_) || !secured);
 259
 260                 /* the part of the URL that's important for the connection cache is
 261                  * the server and the port. The rest of the URL (i.e. protocol and
 262                  * resource) isn't important as it doesn't affect with whom we talk
 263                  * but rather how (which should not change from one call to the same
 264                  * port to another) and with which resource (which may change, but
 265                  * we don't care about that). */
 266                 boost::format cache_key("%1%:%2%");
 267                 cache_key
 268                         % server
 269                         % port
 270                         ;
 271                 Session::ConnectionCache_::iterator cached_connection(session.connection_cache_.find(cache_key.str()));
 272                 do
 273                 {
 274                         if (cached_connection == session.connection_cache_.end())
 275                         {
 276                                 cached_connection = session.connection_cache_.insert(
 277                                         Session::ConnectionCache_::value_type(
 278                                         cache_key.str(),
 279                                                 (
 280                                                         secured
 281                                                                 ? Spin::Connector::getInstance().connect(*session.context_, server, port)
 282                                                                 : Spin::Connector::getInstance().connect(server, port)
 283                                                         )
 284                                                 )
 285                                         ).first;
 286                         }
 287                         else
 288                         { /* we've found the connection */ }
 289                         assert(cached_connection != session.connection_cache_.end());
 290                         if (!cached_connection->second->usable())
 291                         {
 292                                 session.connection_cache_.erase(cached_connection);
 293                                 cached_connection = session.connection_cache_.end();
 294                         }
 295                         else
 296                         { /* all is well */ }
 297                 } while(cached_connection == session.connection_cache_.end());
 298
 299                 cached_connection->second->write(request);
 300
 301                 return getResponse(cached_connection->second);
 302         }
 303
 304         /*DAMON_API */Response send(const Request & request)
 305         {
 306                 Session session;
 307
 308                 return send(session, request);
 309         }
 310
 311         /*DAMON_API */std::vector< Response > send(Session & session, const std::vector< Request > & requests)
 312         {
 313                 using Private::parseURL;
 314
 315                 if (requests.empty())
 316                         return std::vector< Response >();
 317                 else
 318                 { /* carry on */ }
 319
 320                 std::string protocol;
 321                 std::string server;
 322                 boost::uint16_t port;
 323                 std::string resource;
 324                 std::string username;
 325                 std::string password;
 326                 boost::tie(protocol, server, port, resource, username, password) = parseURL(requests[0].url_);
 327                 bool secured(protocol == "https");
 328
 329                 if (secured)
 330                 {
 331                         if (!session.context_)
 332                         {
 333                                 session.context_ = new Scorpion::Context;
 334                         }
 335                         else
 336                         { /* already have a context */ }
 337                 }
 338                 else
 339                 { /* no security context required */ }
 340                 assert((secured && session.context_) || !secured);
 341
 342                 /* the part of the URL that's important for the connection cache is
 343                  * the server and the port. The rest of the URL (i.e. protocol and
 344                  * resource) isn't important as it doesn't affect with whom we talk
 345                  * but rather how (which should not change from one call to the same
 346                  * port to another) and with which resource (which may change, but
 347                  * we don't care about that). */
 348                 boost::format cache_key("%1%:%2%");
 349                 cache_key
 350                         % server
 351                         % port
 352                         ;
 353                 Session::ConnectionCache_::iterator cached_connection(session.connection_cache_.find(cache_key.str()));
 354                 do
 355                 {
 356                         if (cached_connection == session.connection_cache_.end())
 357                         {
 358                                 cached_connection = session.connection_cache_.insert(
 359                                                 Session::ConnectionCache_::value_type(
 360                                                         cache_key.str(),
 361                                                         (
 362                                                                 secured
 363                                                                         ? Spin::Connector::getInstance().connect(*session.context_, server, port)
 364                                                                         : Spin::Connector::getInstance().connect(server, port)
 365                                                                 )
 366                                                         )
 367                                                 ).first;
 368                         }
 369                         else
 370                         { /* we've found the connection */ }
 371                         assert(cached_connection != session.connection_cache_.end());
 372                         if (!cached_connection->second->usable())
 373                         {
 374                                 session.connection_cache_.erase(cached_connection);
 375                                 cached_connection = session.connection_cache_.end();
 376                         }
 377                         else
 378                         { /* all is well */ }
 379                 } while(cached_connection == session.connection_cache_.end());
 380
 381                 // validate that all of the requests use the same server and port parts in the URL
 382                 assert(std::find_if(requests.begin(), requests.end(), std::not1(ValidateURL(server, port, &Request::url_))) == requests.end());
 383
 384                 std::for_each(requests.begin(), requests.end(), boost::bind(&Spin::Connection::write< Request >, cached_connection->second.get(), _1));
 385
 386                 std::vector< Response > responses;
 387                 for (std::vector< Request >::size_type i(0); i < requests.size(); ++i)
 388                         responses.push_back(getResponse(cached_connection->second));
 389                 return responses;
 390         }
 391
 392         /*DAMON_API */std::vector< Response > send(const std::vector< Request > & requests)
 393         {
 394                 Session session;
 395
 396                 return send(session, requests);
 397         }
 398
 399         namespace
 400         {
 401                 struct AppendHeader
 402                 {
 403                         AppendHeader(std::string & str)
 404                                 : str_(str)
 405                         { /* no-op */ }
 406
 407                         AppendHeader & operator()(const Details::Header & header)
 408                         {
 409                                 boost::format fmt("%1%: %2%\r\n");
 410                                 fmt % header.name_ % header.value_;
 411                                 str_ += fmt.str();
 412                                 return *this;
 413                         }
 414
 415                         std::string & str_;
 416                 };
 417         }
 418
 419         /*DAMON_API */std::string serialize(Request request)
 420         {
 421                 using Private::parseURL;
 422                 using Private::extractHost;
 423
 424                 std::string protocol;
 425                 std::string server;
 426                 boost::uint16_t port;
 427                 std::string resource;
 428                 std::string username;
 429                 std::string password;
 430                 boost::tie(protocol, server, port, resource, username, password) = parseURL(request.url_);
 431                 std::string retval;
 432                 switch (request.method_)
 433                 {
 434                 case Request::options__ :
 435                         retval = "OPTIONS ";
 436                         break;
 437                 case Request::get__ :
 438                         retval = "GET ";
 439                         break;
 440                 case Request::head__ :
 441                         retval = "HEAD ";
 442                         break;
 443                 case Request::post__ :
 444                         retval = "POST ";
 445                         break;
 446                 case Request::put__ :
 447                         retval = "PUT ";
 448                         break;
 449                 case Request::delete__ :
 450                         retval = "DELETE ";
 451                         break;
 452                 case Request::trace__ :
 453                         retval = "TRACE ";
 454                         break;
 455                 case Request::connect__ :
 456                         retval = "CONNECT ";
 457                         break;
 458                 default :
 459                         throw std::runtime_error("Unknown method");    // be more eloquent HERE
 460                 }
 461                 retval += resource;
 462                 retval += " HTTP/1.1\r\n";
 463
 464                 if (std::find_if(request.header_fields_.begin(), request.header_fields_.end(), bind(&Details::Header::name_, _1) == "Host") == request.header_fields_.end())
 465                         request.header_fields_.push_back(Details::Header("Host", extractHost(request.url_)));
 466                 else
 467                 { /* Already has a Host header */ }
 468                 if (std::find_if(request.header_fields_.begin(), request.header_fields_.end(), bind(&Details::Header::name_, _1) == "Content-Length") == request.header_fields_.end())
 469                         request.header_fields_.push_back(Details::Header("Content-Length", boost::lexical_cast< std::string >(request.body_.size())));
 470                 else
 471                 { /* Already has a Content-Length header */ }
 472                 if (request.auth_method_ == Request::basic__ && !username.empty())
 473                         request.header_fields_.push_back(Details::createBasicAuthorizationHeader(username, password));
 474                 else
 475                 { /* no authorization method, don't care about username & password */ }
 476                 std::for_each(request.header_fields_.begin(), request.header_fields_.end(), AppendHeader(retval));
 477                 retval += "\r\n";
 478                 retval.insert(retval.end(), request.body_.begin(), request.body_.end());
 479
 480                 return retval;
 481         }
 482 }