2 * HappyHTTP - a simple HTTP library
5 * Copyright (c) 2006 Ben Campbell
7 * This software is provided 'as-is', without any express or implied
8 * warranty. In no event will the authors be held liable for any damages
9 * arising from the use of this software.
11 * Permission is granted to anyone to use this software for any purpose,
12 * including commercial applications, and to alter it and redistribute it
13 * freely, subject to the following restrictions:
15 * 1. The origin of this software must not be misrepresented; you must not
16 * claim that you wrote the original software. If you use this software in a
17 * product, an acknowledgment in the product documentation would be
18 * appreciated but is not required.
20 * 2. Altered source versions must be plainly marked as such, and must not
21 * be misrepresented as being the original software.
23 * 3. This notice may not be removed or altered from any source distribution.
28 #include "happyhttp.h"
31 // #include <sys/types.h>
32 #include <sys/socket.h>
33 #include <netinet/in.h>
34 #include <arpa/inet.h>
35 #include <netdb.h> // for gethostbyname()
41 #define vsnprintf _vsnprintf
64 const char* GetWinsockErrorString( int err
);
68 //---------------------------------------------------------------------
70 //---------------------------------------------------------------------
74 void BailOnSocketError( const char* context
)
78 int e
= WSAGetLastError();
79 const char* msg
= GetWinsockErrorString( e
);
81 const char* msg
= strerror( errno
);
83 throw Wobbly( "%s: %s", context
, msg
);
89 const char* GetWinsockErrorString( int err
)
93 case 0: return "No error";
94 case WSAEINTR
: return "Interrupted system call";
95 case WSAEBADF
: return "Bad file number";
96 case WSAEACCES
: return "Permission denied";
97 case WSAEFAULT
: return "Bad address";
98 case WSAEINVAL
: return "Invalid argument";
99 case WSAEMFILE
: return "Too many open sockets";
100 case WSAEWOULDBLOCK
: return "Operation would block";
101 case WSAEINPROGRESS
: return "Operation now in progress";
102 case WSAEALREADY
: return "Operation already in progress";
103 case WSAENOTSOCK
: return "Socket operation on non-socket";
104 case WSAEDESTADDRREQ
: return "Destination address required";
105 case WSAEMSGSIZE
: return "Message too long";
106 case WSAEPROTOTYPE
: return "Protocol wrong type for socket";
107 case WSAENOPROTOOPT
: return "Bad protocol option";
108 case WSAEPROTONOSUPPORT
: return "Protocol not supported";
109 case WSAESOCKTNOSUPPORT
: return "Socket type not supported";
110 case WSAEOPNOTSUPP
: return "Operation not supported on socket";
111 case WSAEPFNOSUPPORT
: return "Protocol family not supported";
112 case WSAEAFNOSUPPORT
: return "Address family not supported";
113 case WSAEADDRINUSE
: return "Address already in use";
114 case WSAEADDRNOTAVAIL
: return "Can't assign requested address";
115 case WSAENETDOWN
: return "Network is down";
116 case WSAENETUNREACH
: return "Network is unreachable";
117 case WSAENETRESET
: return "Net connection reset";
118 case WSAECONNABORTED
: return "Software caused connection abort";
119 case WSAECONNRESET
: return "Connection reset by peer";
120 case WSAENOBUFS
: return "No buffer space available";
121 case WSAEISCONN
: return "Socket is already connected";
122 case WSAENOTCONN
: return "Socket is not connected";
123 case WSAESHUTDOWN
: return "Can't send after socket shutdown";
124 case WSAETOOMANYREFS
: return "Too many references, can't splice";
125 case WSAETIMEDOUT
: return "Connection timed out";
126 case WSAECONNREFUSED
: return "Connection refused";
127 case WSAELOOP
: return "Too many levels of symbolic links";
128 case WSAENAMETOOLONG
: return "File name too long";
129 case WSAEHOSTDOWN
: return "Host is down";
130 case WSAEHOSTUNREACH
: return "No route to host";
131 case WSAENOTEMPTY
: return "Directory not empty";
132 case WSAEPROCLIM
: return "Too many processes";
133 case WSAEUSERS
: return "Too many users";
134 case WSAEDQUOT
: return "Disc quota exceeded";
135 case WSAESTALE
: return "Stale NFS file handle";
136 case WSAEREMOTE
: return "Too many levels of remote in path";
137 case WSASYSNOTREADY
: return "Network system is unavailable";
138 case WSAVERNOTSUPPORTED
: return "Winsock version out of range";
139 case WSANOTINITIALISED
: return "WSAStartup not yet called";
140 case WSAEDISCON
: return "Graceful shutdown in progress";
141 case WSAHOST_NOT_FOUND
: return "Host not found";
142 case WSANO_DATA
: return "No host data of that type was found";
151 // return true if socket has data waiting to be read
152 bool datawaiting( int sock
)
156 FD_SET( sock
, &fds
);
162 int r
= select( sock
+1, &fds
, NULL
, NULL
, &tv
);
164 BailOnSocketError( "select" );
166 if( FD_ISSET( sock
, &fds
) )
173 // Try to work out address from string
175 struct in_addr
*atoaddr( const char* address
)
177 struct hostent
*host
;
178 static struct in_addr saddr
;
180 // First try nnn.nnn.nnn.nnn form
181 saddr
.s_addr
= inet_addr(address
);
182 if (saddr
.s_addr
!= -1)
185 host
= gethostbyname(address
);
187 return (struct in_addr
*) *host
->h_addr_list
;
198 //---------------------------------------------------------------------
202 //---------------------------------------------------------------------
205 Wobbly::Wobbly( const char* fmt
, ... )
209 int n
= vsnprintf( m_Message
, MAXLEN
, fmt
, ap
);
212 m_Message
[MAXLEN
-1] = '\0';
222 //---------------------------------------------------------------------
226 //---------------------------------------------------------------------
227 Connection::Connection( const char* host
, int port
) :
228 m_ResponseBeginCB(0),
230 m_ResponseCompleteCB(0),
240 void Connection::setcallbacks(
241 ResponseBegin_CB begincb
,
242 ResponseData_CB datacb
,
243 ResponseComplete_CB completecb
,
246 m_ResponseBeginCB
= begincb
;
247 m_ResponseDataCB
= datacb
;
248 m_ResponseCompleteCB
= completecb
;
249 m_UserData
= userdata
;
253 void Connection::connect()
255 in_addr
* addr
= atoaddr( m_Host
.c_str() );
257 throw Wobbly( "Invalid network address" );
260 memset( (char*)&address
, 0, sizeof(address
) );
261 address
.sin_family
= AF_INET
;
262 address
.sin_port
= htons( m_Port
);
263 address
.sin_addr
.s_addr
= addr
->s_addr
;
265 m_Sock
= socket( AF_INET
, SOCK_STREAM
, 0 );
267 BailOnSocketError( "socket()" );
269 // printf("Connecting to %s on port %d.\n",inet_ntoa(*addr), port);
271 if( ::connect( m_Sock
, (sockaddr
const*)&address
, sizeof(address
) ) < 0 )
272 BailOnSocketError( "connect()" );
276 void Connection::close()
280 ::closesocket( m_Sock
);
287 // discard any incomplete responses
288 while( !m_Outstanding
.empty() )
290 delete m_Outstanding
.front();
291 m_Outstanding
.pop_front();
296 Connection::~Connection()
301 void Connection::request( const char* method
,
303 const char* headers
[],
304 const unsigned char* body
,
308 bool gotcontentlength
= false; // already in headers?
310 // check headers for content-length
311 // TODO: check for "Host" and "Accept-Encoding" too
312 // and avoid adding them ourselves in putrequest()
315 const char** h
= headers
;
318 const char* name
= *h
++;
319 const char* value
= *h
++;
320 assert( value
!= 0 ); // name with no value!
322 if( 0==strcasecmp( name
, "content-length" ) )
323 gotcontentlength
= true;
327 putrequest( method
, url
);
329 if( body
&& !gotcontentlength
)
330 putheader( "Content-Length", bodysize
);
334 const char** h
= headers
;
337 const char* name
= *h
++;
338 const char* value
= *h
++;
339 putheader( name
, value
);
345 send( body
, bodysize
);
352 void Connection::putrequest( const char* method
, const char* url
)
354 if( m_State
!= IDLE
)
355 throw Wobbly( "Request already issued" );
357 m_State
= REQ_STARTED
;
360 sprintf( req
, "%s %s HTTP/1.1", method
, url
);
361 m_Buffer
.push_back( req
);
363 putheader( "Host", m_Host
.c_str() ); // required for HTTP1.1
365 // don't want any fancy encodings please
366 putheader("Accept-Encoding", "identity");
368 // Push a new response onto the queue
369 Response
*r
= new Response( method
, *this );
370 m_Outstanding
.push_back( r
);
374 void Connection::putheader( const char* header
, const char* value
)
376 if( m_State
!= REQ_STARTED
)
377 throw Wobbly( "putheader() failed" );
378 m_Buffer
.push_back( string(header
) + ": " + string( value
) );
381 void Connection::putheader( const char* header
, int numericvalue
)
384 sprintf( buf
, "%d", numericvalue
);
385 putheader( header
, buf
);
388 void Connection::endheaders()
390 if( m_State
!= REQ_STARTED
)
391 throw Wobbly( "Cannot send header" );
394 m_Buffer
.push_back( "" );
397 vector
< string
>::const_iterator it
;
398 for( it
= m_Buffer
.begin(); it
!= m_Buffer
.end(); ++it
)
399 msg
+= (*it
) + "\r\n";
403 // printf( "%s", msg.c_str() );
404 send( (const unsigned char*)msg
.c_str(), msg
.size() );
409 void Connection::send( const unsigned char* buf
, int numbytes
)
411 // fwrite( buf, 1,numbytes, stdout );
416 while( numbytes
> 0 )
419 int n
= ::send( m_Sock
, (const char*)buf
, numbytes
, 0 );
421 int n
= ::send( m_Sock
, buf
, numbytes
, 0 );
424 BailOnSocketError( "send()" );
431 void Connection::pump()
433 if( m_Outstanding
.empty() )
434 return; // no requests outstanding
436 assert( m_Sock
>0 ); // outstanding requests but no connection!
438 if( !datawaiting( m_Sock
) )
439 return; // recv will block
441 unsigned char buf
[ 2048 ];
442 int a
= recv( m_Sock
, (char*)buf
, sizeof(buf
), 0 );
444 BailOnSocketError( "recv()" );
448 // connection has closed
450 Response
* r
= m_Outstanding
.front();
451 r
->notifyconnectionclosed();
452 assert( r
->completed() );
454 m_Outstanding
.pop_front();
456 // any outstanding requests will be discarded
462 while( used
< a
&& !m_Outstanding
.empty() )
465 Response
* r
= m_Outstanding
.front();
466 int u
= r
->pump( &buf
[used
], a
-used
);
468 // delete response once completed
472 m_Outstanding
.pop_front();
477 // NOTE: will lose bytes if response queue goes empty
478 // (but server shouldn't be sending anything if we don't have
479 // anything outstanding anyway)
480 assert( used
== a
); // all bytes should be used up by here.
489 //---------------------------------------------------------------------
493 //---------------------------------------------------------------------
496 Response::Response( const char* method
, Connection
& conn
) :
497 m_Connection( conn
),
498 m_State( STATUSLINE
),
511 const char* Response::getheader( const char* name
) const
513 std::string
lname( name
);
514 std::transform( lname
.begin(), lname
.end(), lname
.begin(), (int(*)(int))tolower
);
516 std::map
< std::string
, std::string
>::const_iterator it
= m_Headers
.find( lname
);
517 if( it
== m_Headers
.end() )
520 return it
->second
.c_str();
524 int Response::getstatus() const
526 // only valid once we've got the statusline
527 assert( m_State
!= STATUSLINE
);
532 const char* Response::getreason() const
534 // only valid once we've got the statusline
535 assert( m_State
!= STATUSLINE
);
536 return m_Reason
.c_str();
541 // Connection has closed
542 void Response::notifyconnectionclosed()
544 if( m_State
== COMPLETE
)
547 // eof can be valid...
548 if( m_State
== BODY
&&
552 Finish(); // we're all done!
556 throw Wobbly( "Connection closed unexpectedly" );
562 int Response::pump( const unsigned char* data
, int datasize
)
564 assert( datasize
!= 0 );
565 int count
= datasize
;
567 while( count
> 0 && m_State
!= COMPLETE
)
569 if( m_State
== STATUSLINE
||
570 m_State
== HEADERS
||
571 m_State
== TRAILERS
||
572 m_State
== CHUNKLEN
||
573 m_State
== CHUNKEND
)
575 // we want to accumulate a line
578 char c
= (char)*data
++;
582 // now got a whole line!
586 ProcessStatusLine( m_LineBuf
);
589 ProcessHeaderLine( m_LineBuf
);
592 ProcessTrailerLine( m_LineBuf
);
595 ProcessChunkLenLine( m_LineBuf
);
598 // just soak up the crlf after body and go to next state
599 assert( m_Chunked
== true );
606 break; // break out of line accumulation!
610 if( c
!= '\r' ) // just ignore CR
615 else if( m_State
== BODY
)
619 bytesused
= ProcessDataChunked( data
, count
);
621 bytesused
= ProcessDataNonChunked( data
, count
);
627 // return number of bytes used
628 return datasize
- count
;
633 void Response::ProcessChunkLenLine( std::string
const& line
)
635 // chunklen in hex at beginning of line
636 m_ChunkLeft
= strtol( line
.c_str(), NULL
, 16 );
638 if( m_ChunkLeft
== 0 )
640 // got the whole body, now check for trailing headers
642 m_HeaderAccum
.clear();
651 // handle some body data in chunked mode
652 // returns number of bytes used.
653 int Response::ProcessDataChunked( const unsigned char* data
, int count
)
661 // invoke callback to pass out the data
662 if( m_Connection
.m_ResponseDataCB
)
663 (m_Connection
.m_ResponseDataCB
)( this, m_Connection
.m_UserData
, data
, n
);
668 assert( m_ChunkLeft
>= 0);
669 if( m_ChunkLeft
== 0 )
671 // chunk completed! now soak up the trailing CRLF before next chunk
677 // handle some body data in non-chunked mode.
678 // returns number of bytes used.
679 int Response::ProcessDataNonChunked( const unsigned char* data
, int count
)
684 // we know how many bytes to expect
685 int remaining
= m_Length
- m_BytesRead
;
690 // invoke callback to pass out the data
691 if( m_Connection
.m_ResponseDataCB
)
692 (m_Connection
.m_ResponseDataCB
)( this, m_Connection
.m_UserData
, data
, n
);
696 // Finish if we know we're done. Else we're waiting for connection close.
697 if( m_Length
!= -1 && m_BytesRead
== m_Length
)
704 void Response::Finish()
708 // invoke the callback
709 if( m_Connection
.m_ResponseCompleteCB
)
710 (m_Connection
.m_ResponseCompleteCB
)( this, m_Connection
.m_UserData
);
714 void Response::ProcessStatusLine( std::string
const& line
)
716 const char* p
= line
.c_str();
718 // skip any leading space
719 while( *p
&& *p
== ' ' )
723 while( *p
&& *p
!= ' ' )
724 m_VersionString
+= *p
++;
725 while( *p
&& *p
== ' ' )
730 while( *p
&& *p
!= ' ' )
732 while( *p
&& *p
== ' ' )
735 // rest of line is reason
739 m_Status
= atoi( status
.c_str() );
740 if( m_Status
< 100 || m_Status
> 999 )
741 throw Wobbly( "BadStatusLine (%s)", line
.c_str() );
744 printf( "version: '%s'\n", m_VersionString.c_str() );
745 printf( "status: '%d'\n", m_Status );
746 printf( "reason: '%s'\n", m_Reason.c_str() );
749 if( m_VersionString
== "HTTP:/1.0" )
751 else if( 0==m_VersionString
.compare( 0,7,"HTTP/1." ) )
754 throw Wobbly( "UnknownProtocol (%s)", m_VersionString
.c_str() );
755 // TODO: support for HTTP/0.9
758 // OK, now we expect headers!
760 m_HeaderAccum
.clear();
764 // process accumulated header data
765 void Response::FlushHeader()
767 if( m_HeaderAccum
.empty() )
768 return; // no flushing required
770 const char* p
= m_HeaderAccum
.c_str();
774 while( *p
&& *p
!= ':' )
775 header
+= tolower( *p
++ );
782 while( *p
&& (*p
==' ' || *p
=='\t') )
785 value
= p
; // rest of line is value
787 m_Headers
[ header
] = value
;
788 // printf("header: ['%s': '%s']\n", header.c_str(), value.c_str() );
790 m_HeaderAccum
.clear();
794 void Response::ProcessHeaderLine( std::string
const& line
)
796 const char* p
= line
.c_str();
802 // HTTP code 100 handling (we ignore 'em)
803 if( m_Status
== CONTINUE
)
804 m_State
= STATUSLINE
; // reset parsing, expect new status line
806 BeginBody(); // start on body now!
812 // it's a continuation line - just add it to previous data
814 while( *p
&& isspace( *p
) )
817 m_HeaderAccum
+= ' ';
822 // begin a new header
829 void Response::ProcessTrailerLine( std::string
const& line
)
831 // TODO: handle trailers?
832 // (python httplib doesn't seem to!)
836 // just ignore all the trailers...
841 // OK, we've now got all the headers read in, so we're ready to start
842 // on the body. But we need to see what info we can glean from the headers
844 void Response::BeginBody()
848 m_Length
= -1; // unknown
851 // using chunked encoding?
852 const char* trenc
= getheader( "transfer-encoding" );
853 if( trenc
&& 0==strcasecmp( trenc
, "chunked") )
856 m_ChunkLeft
= -1; // unknown
859 m_WillClose
= CheckClose();
862 const char* contentlen
= getheader( "content-length" );
863 if( contentlen
&& !m_Chunked
)
865 m_Length
= atoi( contentlen
);
868 // check for various cases where we expect zero-length body
869 if( m_Status
== NO_CONTENT
||
870 m_Status
== NOT_MODIFIED
||
871 ( m_Status
>= 100 && m_Status
< 200 ) || // 1xx codes have no body
878 // if we're not using chunked mode, and no length has been specified,
879 // assume connection will close at end.
880 if( !m_WillClose
&& !m_Chunked
&& m_Length
== -1 )
885 // Invoke the user callback, if any
886 if( m_Connection
.m_ResponseBeginCB
)
887 (m_Connection
.m_ResponseBeginCB
)( this, m_Connection
.m_UserData
);
890 printf("---------BeginBody()--------\n");
891 printf("Length: %d\n", m_Length );
892 printf("WillClose: %d\n", (int)m_WillClose );
893 printf("Chunked: %d\n", (int)m_Chunked );
894 printf("ChunkLeft: %d\n", (int)m_ChunkLeft );
895 printf("----------------------------\n");
897 // now start reading body data!
905 // return true if we think server will automatically close connection
906 bool Response::CheckClose()
908 if( m_Version
== 11 )
911 // the connection stays open unless "connection: close" is specified.
912 const char* conn
= getheader( "connection" );
913 if( conn
&& 0==strcasecmp( conn
, "close" ) )
920 // keep-alive header indicates persistant connection
921 if( getheader( "keep-alive" ) )
924 // TODO: some special case handling for Akamai and netscape maybe?
925 // (see _check_close() in python httplib.py for details)
932 } // end namespace happyhttp