Scan media entities as well, not just url entities. This should expand more
[bitlbee.git] / lib / http_client.c
blob98a99f7c8d48d2f74305443d8c43a3a73bff11d6
1 /********************************************************************\
2 * BitlBee -- An IRC to other IM-networks gateway *
3 * *
4 * Copyright 2002-2011 Wilmer van der Gaast and others *
5 \********************************************************************/
7 /* HTTP(S) module */
9 /*
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or
13 (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License with
21 the Debian GNU/Linux distribution in /usr/share/common-licenses/GPL;
22 if not, write to the Free Software Foundation, Inc., 59 Temple Place,
23 Suite 330, Boston, MA 02111-1307 USA
26 #include <string.h>
27 #include <stdio.h>
29 #include "http_client.h"
30 #include "url.h"
31 #include "sock.h"
34 static gboolean http_connected( gpointer data, int source, b_input_condition cond );
35 static gboolean http_ssl_connected( gpointer data, int returncode, void *source, b_input_condition cond );
36 static gboolean http_incoming_data( gpointer data, int source, b_input_condition cond );
37 static void http_free( struct http_request *req );
40 struct http_request *http_dorequest( char *host, int port, int ssl, char *request, http_input_function func, gpointer data )
42 struct http_request *req;
43 int error = 0;
45 req = g_new0( struct http_request, 1 );
47 if( ssl )
49 req->ssl = ssl_connect( host, port, TRUE, http_ssl_connected, req );
50 if( req->ssl == NULL )
51 error = 1;
53 else
55 req->fd = proxy_connect( host, port, http_connected, req );
56 if( req->fd < 0 )
57 error = 1;
60 if( error )
62 http_free( req );
63 return NULL;
66 req->func = func;
67 req->data = data;
68 req->request = g_strdup( request );
69 req->request_length = strlen( request );
70 req->redir_ttl = 3;
72 if( getenv( "BITLBEE_DEBUG" ) )
73 printf( "About to send HTTP request:\n%s\n", req->request );
75 return( req );
78 struct http_request *http_dorequest_url( char *url_string, http_input_function func, gpointer data )
80 url_t *url = g_new0( url_t, 1 );
81 char *request;
82 void *ret;
84 if( !url_set( url, url_string ) )
86 g_free( url );
87 return NULL;
90 if( url->proto != PROTO_HTTP && url->proto != PROTO_HTTPS )
92 g_free( url );
93 return NULL;
96 request = g_strdup_printf( "GET %s HTTP/1.0\r\n"
97 "Host: %s\r\n"
98 "Connection: close\r\n"
99 "User-Agent: BitlBee " BITLBEE_VERSION " " ARCH "/" CPU "\r\n"
100 "\r\n", url->file, url->host );
102 ret = http_dorequest( url->host, url->port,
103 url->proto == PROTO_HTTPS, request, func, data );
105 g_free( url );
106 g_free( request );
107 return ret;
110 /* This one is actually pretty simple... Might get more calls if we can't write
111 the whole request at once. */
112 static gboolean http_connected( gpointer data, int source, b_input_condition cond )
114 struct http_request *req = data;
115 int st;
117 if( source < 0 )
118 goto error;
120 if( req->inpa > 0 )
121 b_event_remove( req->inpa );
123 sock_make_nonblocking( req->fd );
125 if( req->ssl )
127 st = ssl_write( req->ssl, req->request + req->bytes_written,
128 req->request_length - req->bytes_written );
129 if( st < 0 )
131 if( ssl_errno != SSL_AGAIN )
133 ssl_disconnect( req->ssl );
134 goto error;
138 else
140 st = write( source, req->request + req->bytes_written,
141 req->request_length - req->bytes_written );
142 if( st < 0 )
144 if( !sockerr_again() )
146 closesocket( req->fd );
147 goto error;
152 if( st > 0 )
153 req->bytes_written += st;
155 if( req->bytes_written < req->request_length )
156 req->inpa = b_input_add( source,
157 req->ssl ? ssl_getdirection( req->ssl ) : B_EV_IO_WRITE,
158 http_connected, req );
159 else
160 req->inpa = b_input_add( source, B_EV_IO_READ, http_incoming_data, req );
162 return FALSE;
164 error:
165 if( req->status_string == NULL )
166 req->status_string = g_strdup( "Error while writing HTTP request" );
168 req->func( req );
169 http_free( req );
170 return FALSE;
173 static gboolean http_ssl_connected( gpointer data, int returncode, void *source, b_input_condition cond )
175 struct http_request *req = data;
177 if( source == NULL )
179 if( returncode != 0 )
181 char *err = ssl_verify_strerror( returncode );
182 req->status_string = g_strdup_printf(
183 "Certificate verification problem 0x%x: %s",
184 returncode, err ? err : "Unknown" );
185 g_free( err );
187 return http_connected( data, -1, cond );
190 req->fd = ssl_getfd( source );
192 return http_connected( data, req->fd, cond );
195 static gboolean http_incoming_data( gpointer data, int source, b_input_condition cond )
197 struct http_request *req = data;
198 int evil_server = 0;
199 char buffer[2048];
200 char *end1, *end2;
201 int st;
203 if( req->inpa > 0 )
204 b_event_remove( req->inpa );
206 if( req->ssl )
208 st = ssl_read( req->ssl, buffer, sizeof( buffer ) );
209 if( st < 0 )
211 if( ssl_errno != SSL_AGAIN )
213 /* goto cleanup; */
215 /* YAY! We have to deal with crappy Microsoft
216 servers that LOVE to send invalid TLS
217 packets that abort connections! \o/ */
219 goto got_reply;
222 else if( st == 0 )
224 goto got_reply;
227 else
229 st = read( req->fd, buffer, sizeof( buffer ) );
230 if( st < 0 )
232 if( !sockerr_again() )
234 req->status_string = g_strdup( strerror( errno ) );
235 goto cleanup;
238 else if( st == 0 )
240 goto got_reply;
244 if( st > 0 )
246 req->reply_headers = g_realloc( req->reply_headers, req->bytes_read + st + 1 );
247 memcpy( req->reply_headers + req->bytes_read, buffer, st );
248 req->bytes_read += st;
251 /* There will be more! */
252 req->inpa = b_input_add( req->fd,
253 req->ssl ? ssl_getdirection( req->ssl ) : B_EV_IO_READ,
254 http_incoming_data, req );
256 if( ssl_pending( req->ssl ) )
257 return http_incoming_data( data, source, cond );
258 else
259 return FALSE;
261 got_reply:
262 /* Maybe if the webserver is overloaded, or when there's bad SSL
263 support... */
264 if( req->bytes_read == 0 )
266 req->status_string = g_strdup( "Empty HTTP reply" );
267 goto cleanup;
270 /* Zero termination is very convenient. */
271 req->reply_headers[req->bytes_read] = 0;
273 /* Find the separation between headers and body, and keep stupid
274 webservers in mind. */
275 end1 = strstr( req->reply_headers, "\r\n\r\n" );
276 end2 = strstr( req->reply_headers, "\n\n" );
278 if( end2 && end2 < end1 )
280 end1 = end2 + 1;
281 evil_server = 1;
283 else if( end1 )
285 end1 += 2;
287 else
289 req->status_string = g_strdup( "Malformed HTTP reply" );
290 goto cleanup;
293 *end1 = 0;
295 if( getenv( "BITLBEE_DEBUG" ) )
296 printf( "HTTP response headers:\n%s\n", req->reply_headers );
298 if( evil_server )
299 req->reply_body = end1 + 1;
300 else
301 req->reply_body = end1 + 2;
303 req->body_size = req->reply_headers + req->bytes_read - req->reply_body;
305 if( ( end1 = strchr( req->reply_headers, ' ' ) ) != NULL )
307 if( sscanf( end1 + 1, "%d", &req->status_code ) != 1 )
309 req->status_string = g_strdup( "Can't parse status code" );
310 req->status_code = -1;
312 else
314 char *eol;
316 if( evil_server )
317 eol = strchr( end1, '\n' );
318 else
319 eol = strchr( end1, '\r' );
321 req->status_string = g_strndup( end1 + 1, eol - end1 - 1 );
323 /* Just to be sure... */
324 if( ( eol = strchr( req->status_string, '\r' ) ) )
325 *eol = 0;
326 if( ( eol = strchr( req->status_string, '\n' ) ) )
327 *eol = 0;
330 else
332 req->status_string = g_strdup( "Can't locate status code" );
333 req->status_code = -1;
336 if( ( ( req->status_code >= 301 && req->status_code <= 303 ) ||
337 req->status_code == 307 ) && req->redir_ttl-- > 0 )
339 char *loc, *new_request, *new_host;
340 int error = 0, new_port, new_proto;
342 /* We might fill it again, so let's not leak any memory. */
343 g_free( req->status_string );
344 req->status_string = NULL;
346 loc = strstr( req->reply_headers, "\nLocation: " );
347 if( loc == NULL ) /* We can't handle this redirect... */
349 req->status_string = g_strdup( "Can't locate Location: header" );
350 goto cleanup;
353 loc += 11;
354 while( *loc == ' ' )
355 loc ++;
357 /* TODO/FIXME: Possibly have to handle relative redirections,
358 and rewrite Host: headers. Not necessary for now, it's
359 enough for passport authentication like this. */
361 if( *loc == '/' )
363 /* Just a different pathname... */
365 /* Since we don't cache the servername, and since we
366 don't need this yet anyway, I won't implement it. */
368 req->status_string = g_strdup( "Can't handle recursive redirects" );
370 goto cleanup;
372 else
374 /* A whole URL */
375 url_t *url;
376 char *s;
377 const char *new_method;
379 s = strstr( loc, "\r\n" );
380 if( s == NULL )
381 goto cleanup;
383 url = g_new0( url_t, 1 );
384 *s = 0;
386 if( !url_set( url, loc ) )
388 req->status_string = g_strdup( "Malformed redirect URL" );
389 g_free( url );
390 goto cleanup;
393 /* Find all headers and, if necessary, the POST request contents.
394 Skip the old Host: header though. This crappy code here means
395 anything using this http_client MUST put the Host: header at
396 the top. */
397 if( !( ( s = strstr( req->request, "\r\nHost: " ) ) &&
398 ( s = strstr( s + strlen( "\r\nHost: " ), "\r\n" ) ) ) )
400 req->status_string = g_strdup( "Error while rebuilding request string" );
401 g_free( url );
402 goto cleanup;
405 /* More or less HTTP/1.0 compliant, from my reading of RFC 2616.
406 Always perform a GET request unless we received a 301. 303 was
407 meant for this but it's HTTP/1.1-only and we're specifically
408 speaking HTTP/1.0. ...
410 Well except someone at identi.ca's didn't bother reading any
411 RFCs and just return HTTP/1.1-specific status codes to HTTP/1.0
412 requests. Fuckers. So here we are, handle 301..303,307. */
413 if( strncmp( req->request, "GET", 3 ) == 0 )
414 /* GETs never become POSTs. */
415 new_method = "GET";
416 else if( req->status_code == 302 || req->status_code == 303 )
417 /* 302 de-facto becomes GET, 303 as specified by RFC 2616#10.3.3 */
418 new_method = "GET";
419 else
420 /* 301 de-facto should stay POST, 307 specifally RFC 2616#10.3.8 */
421 new_method = "POST";
423 /* Okay, this isn't fun! We have to rebuild the request... :-( */
424 new_request = g_strdup_printf( "%s %s HTTP/1.0\r\nHost: %s%s",
425 new_method, url->file, url->host, s );
427 new_host = g_strdup( url->host );
428 new_port = url->port;
429 new_proto = url->proto;
431 /* If we went from POST to GET, truncate the request content. */
432 if( new_request[0] != req->request[0] && new_request[0] == 'G' &&
433 ( s = strstr( new_request, "\r\n\r\n" ) ) )
434 s[4] = '\0';
436 g_free( url );
439 if( req->ssl )
440 ssl_disconnect( req->ssl );
441 else
442 closesocket( req->fd );
444 req->fd = -1;
445 req->ssl = NULL;
447 if( getenv( "BITLBEE_DEBUG" ) )
448 printf( "New headers for redirected HTTP request:\n%s\n", new_request );
450 if( new_proto == PROTO_HTTPS )
452 req->ssl = ssl_connect( new_host, new_port, TRUE, http_ssl_connected, req );
453 if( req->ssl == NULL )
454 error = 1;
456 else
458 req->fd = proxy_connect( new_host, new_port, http_connected, req );
459 if( req->fd < 0 )
460 error = 1;
462 g_free( new_host );
464 if( error )
466 req->status_string = g_strdup( "Connection problem during redirect" );
467 g_free( new_request );
468 goto cleanup;
471 g_free( req->request );
472 g_free( req->reply_headers );
473 req->request = new_request;
474 req->request_length = strlen( new_request );
475 req->bytes_read = req->bytes_written = req->inpa = 0;
476 req->reply_headers = req->reply_body = NULL;
478 return FALSE;
481 /* Assume that a closed connection means we're finished, this indeed
482 breaks with keep-alive connections and faulty connections. */
483 req->finished = 1;
485 cleanup:
486 if( req->ssl )
487 ssl_disconnect( req->ssl );
488 else
489 closesocket( req->fd );
491 if( getenv( "BITLBEE_DEBUG" ) && req )
492 printf( "Finishing HTTP request with status: %s\n",
493 req->status_string ? req->status_string : "NULL" );
495 req->func( req );
496 http_free( req );
497 return FALSE;
500 static void http_free( struct http_request *req )
502 g_free( req->request );
503 g_free( req->reply_headers );
504 g_free( req->status_string );
505 g_free( req );