Scan media entities as well, not just url entities. This should expand more
[bitlbee.git] / lib / misc.c
bloba70657576df70b7a40bded9d09e6aa0f4749f6e1
1 /********************************************************************\
2 * BitlBee -- An IRC to other IM-networks gateway *
3 * *
4 * Copyright 2002-2006 Wilmer van der Gaast and others *
5 \********************************************************************/
7 /*
8 * Various utility functions. Some are copied from Gaim to support the
9 * IM-modules, most are from BitlBee.
11 * Copyright (C) 1998-1999, Mark Spencer <markster@marko.net>
12 * (and possibly other members of the Gaim team)
13 * Copyright 2002-2006 Wilmer van der Gaast <wilmer@gaast.net>
17 This program is free software; you can redistribute it and/or modify
18 it under the terms of the GNU General Public License as published by
19 the Free Software Foundation; either version 2 of the License, or
20 (at your option) any later version.
22 This program is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 GNU General Public License for more details.
27 You should have received a copy of the GNU General Public License with
28 the Debian GNU/Linux distribution in /usr/share/common-licenses/GPL;
29 if not, write to the Free Software Foundation, Inc., 59 Temple Place,
30 Suite 330, Boston, MA 02111-1307 USA
33 #define BITLBEE_CORE
34 #include "nogaim.h"
35 #include "base64.h"
36 #include "md5.h"
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <ctype.h>
41 #include <glib.h>
42 #include <time.h>
44 #ifdef HAVE_RESOLV_A
45 #include <arpa/nameser.h>
46 #include <resolv.h>
47 #endif
49 #include "md5.h"
50 #include "ssl_client.h"
52 void strip_linefeed(gchar *text)
54 int i, j;
55 gchar *text2 = g_malloc(strlen(text) + 1);
57 for (i = 0, j = 0; text[i]; i++)
58 if (text[i] != '\r')
59 text2[j++] = text[i];
60 text2[j] = '\0';
62 strcpy(text, text2);
63 g_free(text2);
66 time_t get_time(int year, int month, int day, int hour, int min, int sec)
68 struct tm tm;
70 memset(&tm, 0, sizeof(struct tm));
71 tm.tm_year = year - 1900;
72 tm.tm_mon = month - 1;
73 tm.tm_mday = day;
74 tm.tm_hour = hour;
75 tm.tm_min = min;
76 tm.tm_sec = sec >= 0 ? sec : time(NULL) % 60;
78 return mktime(&tm);
81 time_t mktime_utc( struct tm *tp )
83 struct tm utc;
84 time_t res, tres;
86 tp->tm_isdst = -1;
87 res = mktime( tp );
88 /* Problem is, mktime() just gave us the GMT timestamp for the
89 given local time... While the given time WAS NOT local. So
90 we should fix this now.
92 Now I could choose between messing with environment variables
93 (kludgy) or using timegm() (not portable)... Or doing the
94 following, which I actually prefer...
96 tzset() may also work but in other places I actually want to
97 use local time.
99 FFFFFFFFFFFFFFFFFFFFFUUUUUUUUUUUUUUUUUUUU!! */
100 gmtime_r( &res, &utc );
101 utc.tm_isdst = -1;
102 if( utc.tm_hour == tp->tm_hour && utc.tm_min == tp->tm_min )
103 /* Sweet! We're in UTC right now... */
104 return res;
106 tres = mktime( &utc );
107 res += res - tres;
109 /* Yes, this is a hack. And it will go wrong around DST changes.
110 BUT this is more likely to be threadsafe than messing with
111 environment variables, and possibly more portable... */
113 return res;
116 typedef struct htmlentity
118 char code[7];
119 char is[3];
120 } htmlentity_t;
122 static const htmlentity_t ent[] =
124 { "lt", "<" },
125 { "gt", ">" },
126 { "amp", "&" },
127 { "apos", "'" },
128 { "quot", "\"" },
129 { "aacute", "á" },
130 { "eacute", "é" },
131 { "iacute", "é" },
132 { "oacute", "ó" },
133 { "uacute", "ú" },
134 { "agrave", "à" },
135 { "egrave", "è" },
136 { "igrave", "ì" },
137 { "ograve", "ò" },
138 { "ugrave", "ù" },
139 { "acirc", "â" },
140 { "ecirc", "ê" },
141 { "icirc", "î" },
142 { "ocirc", "ô" },
143 { "ucirc", "û" },
144 { "auml", "ä" },
145 { "euml", "ë" },
146 { "iuml", "ï" },
147 { "ouml", "ö" },
148 { "uuml", "ü" },
149 { "nbsp", " " },
150 { "", "" }
153 void strip_html( char *in )
155 char *start = in;
156 char out[strlen(in)+1];
157 char *s = out, *cs;
158 int i, matched;
159 int taglen;
161 memset( out, 0, sizeof( out ) );
163 while( *in )
165 if( *in == '<' && ( isalpha( *(in+1) ) || *(in+1) == '/' ) )
167 /* If in points at a < and in+1 points at a letter or a slash, this is probably
168 a HTML-tag. Try to find a closing > and continue there. If the > can't be
169 found, assume that it wasn't a HTML-tag after all. */
171 cs = in;
173 while( *in && *in != '>' )
174 in ++;
176 taglen = in - cs - 1; /* not <0 because the above loop runs at least once */
177 if( *in )
179 if( g_strncasecmp( cs+1, "b", taglen) == 0 )
180 *(s++) = '\x02';
181 else if( g_strncasecmp( cs+1, "/b", taglen) == 0 )
182 *(s++) = '\x02';
183 else if( g_strncasecmp( cs+1, "i", taglen) == 0 )
184 *(s++) = '\x1f';
185 else if( g_strncasecmp( cs+1, "/i", taglen) == 0 )
186 *(s++) = '\x1f';
187 else if( g_strncasecmp( cs+1, "br", taglen) == 0 )
188 *(s++) = '\n';
189 in ++;
191 else
193 in = cs;
194 *(s++) = *(in++);
197 else if( *in == '&' )
199 cs = ++in;
200 while( *in && isalpha( *in ) )
201 in ++;
203 if( *in == ';' ) in ++;
204 matched = 0;
206 for( i = 0; *ent[i].code; i ++ )
207 if( g_strncasecmp( ent[i].code, cs, strlen( ent[i].code ) ) == 0 )
209 int j;
211 for( j = 0; ent[i].is[j]; j ++ )
212 *(s++) = ent[i].is[j];
214 matched = 1;
215 break;
218 /* None of the entities were matched, so return the string */
219 if( !matched )
221 in = cs - 1;
222 *(s++) = *(in++);
225 else
227 *(s++) = *(in++);
231 strcpy( start, out );
234 char *escape_html( const char *html )
236 const char *c = html;
237 GString *ret;
238 char *str;
240 if( html == NULL )
241 return( NULL );
243 ret = g_string_new( "" );
245 while( *c )
247 switch( *c )
249 case '&':
250 ret = g_string_append( ret, "&amp;" );
251 break;
252 case '<':
253 ret = g_string_append( ret, "&lt;" );
254 break;
255 case '>':
256 ret = g_string_append( ret, "&gt;" );
257 break;
258 case '"':
259 ret = g_string_append( ret, "&quot;" );
260 break;
261 default:
262 ret = g_string_append_c( ret, *c );
264 c ++;
267 str = ret->str;
268 g_string_free( ret, FALSE );
269 return( str );
272 /* Decode%20a%20file%20name */
273 void http_decode( char *s )
275 char *t;
276 int i, j, k;
278 t = g_new( char, strlen( s ) + 1 );
280 for( i = j = 0; s[i]; i ++, j ++ )
282 if( s[i] == '%' )
284 if( sscanf( s + i + 1, "%2x", &k ) )
286 t[j] = k;
287 i += 2;
289 else
291 *t = 0;
292 break;
295 else
297 t[j] = s[i];
300 t[j] = 0;
302 strcpy( s, t );
303 g_free( t );
306 /* Warning: This one explodes the string. Worst-cases can make the string 3x its original size! */
307 /* This fuction is safe, but make sure you call it safely as well! */
308 void http_encode( char *s )
310 char t[strlen(s)+1];
311 int i, j;
313 strcpy( t, s );
314 for( i = j = 0; t[i]; i ++, j ++ )
316 /* Warning: isalnum() is locale-aware, so don't use it here! */
317 if( ( t[i] >= 'A' && t[i] <= 'Z' ) ||
318 ( t[i] >= 'a' && t[i] <= 'z' ) ||
319 ( t[i] >= '0' && t[i] <= '9' ) ||
320 strchr( "._-~", t[i] ) )
322 s[j] = t[i];
324 else
326 sprintf( s + j, "%%%02X", ((unsigned char*)t)[i] );
327 j += 2;
330 s[j] = 0;
333 /* Strip newlines from a string. Modifies the string passed to it. */
334 char *strip_newlines( char *source )
336 int i;
338 for( i = 0; source[i] != '\0'; i ++ )
339 if( source[i] == '\n' || source[i] == '\r' )
340 source[i] = ' ';
342 return source;
345 /* Wrap an IPv4 address into IPv6 space. Not thread-safe... */
346 char *ipv6_wrap( char *src )
348 static char dst[64];
349 int i;
351 for( i = 0; src[i]; i ++ )
352 if( ( src[i] < '0' || src[i] > '9' ) && src[i] != '.' )
353 break;
355 /* Hmm, it's not even an IP... */
356 if( src[i] )
357 return src;
359 g_snprintf( dst, sizeof( dst ), "::ffff:%s", src );
361 return dst;
364 /* Unwrap an IPv4 address into IPv6 space. Thread-safe, because it's very simple. :-) */
365 char *ipv6_unwrap( char *src )
367 int i;
369 if( g_strncasecmp( src, "::ffff:", 7 ) != 0 )
370 return src;
372 for( i = 7; src[i]; i ++ )
373 if( ( src[i] < '0' || src[i] > '9' ) && src[i] != '.' )
374 break;
376 /* Hmm, it's not even an IP... */
377 if( src[i] )
378 return src;
380 return ( src + 7 );
383 /* Convert from one charset to another.
385 from_cs, to_cs: Source and destination charsets
386 src, dst: Source and destination strings
387 size: Size if src. 0 == use strlen(). strlen() is not reliable for UNICODE/UTF16 strings though.
388 maxbuf: Maximum number of bytes to write to dst
390 Returns the number of bytes written to maxbuf or -1 on an error.
392 signed int do_iconv( char *from_cs, char *to_cs, char *src, char *dst, size_t size, size_t maxbuf )
394 GIConv cd;
395 size_t res;
396 size_t inbytesleft, outbytesleft;
397 char *inbuf = src;
398 char *outbuf = dst;
400 cd = g_iconv_open( to_cs, from_cs );
401 if( cd == (GIConv) -1 )
402 return -1;
404 inbytesleft = size ? size : strlen( src );
405 outbytesleft = maxbuf - 1;
406 res = g_iconv( cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft );
407 *outbuf = '\0';
408 g_iconv_close( cd );
410 if( res != 0 )
411 return -1;
412 else
413 return outbuf - dst;
416 /* A pretty reliable random number generator. Tries to use the /dev/random
417 devices first, and falls back to the random number generator from libc
418 when it fails. Opens randomizer devices with O_NONBLOCK to make sure a
419 lack of entropy won't halt BitlBee. */
420 void random_bytes( unsigned char *buf, int count )
422 #ifndef _WIN32
423 static int use_dev = -1;
425 /* Actually this probing code isn't really necessary, is it? */
426 if( use_dev == -1 )
428 if( access( "/dev/random", R_OK ) == 0 || access( "/dev/urandom", R_OK ) == 0 )
429 use_dev = 1;
430 else
432 use_dev = 0;
433 srand( ( getpid() << 16 ) ^ time( NULL ) );
437 if( use_dev )
439 int fd;
441 /* At least on Linux, /dev/random can block if there's not
442 enough entropy. We really don't want that, so if it can't
443 give anything, use /dev/urandom instead. */
444 if( ( fd = open( "/dev/random", O_RDONLY | O_NONBLOCK ) ) >= 0 )
445 if( read( fd, buf, count ) == count )
447 close( fd );
448 return;
450 close( fd );
452 /* urandom isn't supposed to block at all, but just to be
453 sure. If it blocks, we'll disable use_dev and use the libc
454 randomizer instead. */
455 if( ( fd = open( "/dev/urandom", O_RDONLY | O_NONBLOCK ) ) >= 0 )
456 if( read( fd, buf, count ) == count )
458 close( fd );
459 return;
461 close( fd );
463 /* If /dev/random blocks once, we'll still try to use it
464 again next time. If /dev/urandom also fails for some
465 reason, stick with libc during this session. */
467 use_dev = 0;
468 srand( ( getpid() << 16 ) ^ time( NULL ) );
471 if( !use_dev )
472 #endif
474 int i;
476 /* Possibly the LSB of rand() isn't very random on some
477 platforms. Seems okay on at least Linux and OSX though. */
478 for( i = 0; i < count; i ++ )
479 buf[i] = rand() & 0xff;
483 int is_bool( char *value )
485 if( *value == 0 )
486 return 0;
488 if( ( g_strcasecmp( value, "true" ) == 0 ) || ( g_strcasecmp( value, "yes" ) == 0 ) || ( g_strcasecmp( value, "on" ) == 0 ) )
489 return 1;
490 if( ( g_strcasecmp( value, "false" ) == 0 ) || ( g_strcasecmp( value, "no" ) == 0 ) || ( g_strcasecmp( value, "off" ) == 0 ) )
491 return 1;
493 while( *value )
494 if( !isdigit( *value ) )
495 return 0;
496 else
497 value ++;
499 return 1;
502 int bool2int( char *value )
504 int i;
506 if( ( g_strcasecmp( value, "true" ) == 0 ) || ( g_strcasecmp( value, "yes" ) == 0 ) || ( g_strcasecmp( value, "on" ) == 0 ) )
507 return 1;
508 if( ( g_strcasecmp( value, "false" ) == 0 ) || ( g_strcasecmp( value, "no" ) == 0 ) || ( g_strcasecmp( value, "off" ) == 0 ) )
509 return 0;
511 if( sscanf( value, "%d", &i ) == 1 )
512 return i;
514 return 0;
517 struct ns_srv_reply **srv_lookup( char *service, char *protocol, char *domain )
519 struct ns_srv_reply **replies = NULL;
520 #ifdef HAVE_RESOLV_A
521 struct ns_srv_reply *reply = NULL;
522 char name[1024];
523 unsigned char querybuf[1024];
524 const unsigned char *buf;
525 ns_msg nsh;
526 ns_rr rr;
527 int i, n, len, size;
529 g_snprintf( name, sizeof( name ), "_%s._%s.%s", service, protocol, domain );
531 if( ( size = res_query( name, ns_c_in, ns_t_srv, querybuf, sizeof( querybuf ) ) ) <= 0 )
532 return NULL;
534 if( ns_initparse( querybuf, size, &nsh ) != 0 )
535 return NULL;
537 n = 0;
538 while( ns_parserr( &nsh, ns_s_an, n, &rr ) == 0 )
540 size = ns_rr_rdlen( rr );
541 buf = ns_rr_rdata( rr );
543 len = 0;
544 for( i = 6; i < size && buf[i]; i += buf[i] + 1 )
545 len += buf[i] + 1;
547 if( i > size )
548 break;
550 reply = g_malloc( sizeof( struct ns_srv_reply ) + len );
551 memcpy( reply->name, buf + 7, len );
553 for( i = buf[6]; i < len && buf[7+i]; i += buf[7+i] + 1 )
554 reply->name[i] = '.';
556 if( i > len )
558 g_free( reply );
559 break;
562 reply->prio = ( buf[0] << 8 ) | buf[1];
563 reply->weight = ( buf[2] << 8 ) | buf[3];
564 reply->port = ( buf[4] << 8 ) | buf[5];
566 n ++;
567 replies = g_renew( struct ns_srv_reply *, replies, n + 1 );
568 replies[n-1] = reply;
570 if( replies )
571 replies[n] = NULL;
572 #endif
574 return replies;
577 void srv_free( struct ns_srv_reply **srv )
579 int i;
581 if( srv == NULL )
582 return;
584 for( i = 0; srv[i]; i ++ )
585 g_free( srv[i] );
586 g_free( srv );
589 /* Word wrapping. Yes, I know this isn't UTF-8 clean. I'm willing to take the risk. */
590 char *word_wrap( const char *msg, int line_len )
592 GString *ret = g_string_sized_new( strlen( msg ) + 16 );
594 while( strlen( msg ) > line_len )
596 int i;
598 /* First try to find out if there's a newline already. Don't
599 want to add more splits than necessary. */
600 for( i = line_len; i > 0 && msg[i] != '\n'; i -- );
601 if( msg[i] == '\n' )
603 g_string_append_len( ret, msg, i + 1 );
604 msg += i + 1;
605 continue;
608 for( i = line_len; i > 0; i -- )
610 if( msg[i] == '-' )
612 g_string_append_len( ret, msg, i + 1 );
613 g_string_append_c( ret, '\n' );
614 msg += i + 1;
615 break;
617 else if( msg[i] == ' ' )
619 g_string_append_len( ret, msg, i );
620 g_string_append_c( ret, '\n' );
621 msg += i + 1;
622 break;
625 if( i == 0 )
627 g_string_append_len( ret, msg, line_len );
628 g_string_append_c( ret, '\n' );
629 msg += line_len;
632 g_string_append( ret, msg );
634 return g_string_free( ret, FALSE );
637 gboolean ssl_sockerr_again( void *ssl )
639 if( ssl )
640 return ssl_errno == SSL_AGAIN;
641 else
642 return sockerr_again();
645 /* Returns values: -1 == Failure (base64-decoded to something unexpected)
646 0 == Okay
647 1 == Password doesn't match the hash. */
648 int md5_verify_password( char *password, char *hash )
650 md5_byte_t *pass_dec = NULL;
651 md5_byte_t pass_md5[16];
652 md5_state_t md5_state;
653 int ret = -1, i;
655 if( base64_decode( hash, &pass_dec ) == 21 )
657 md5_init( &md5_state );
658 md5_append( &md5_state, (md5_byte_t*) password, strlen( password ) );
659 md5_append( &md5_state, (md5_byte_t*) pass_dec + 16, 5 ); /* Hmmm, salt! */
660 md5_finish( &md5_state, pass_md5 );
662 for( i = 0; i < 16; i ++ )
664 if( pass_dec[i] != pass_md5[i] )
666 ret = 1;
667 break;
671 /* If we reached the end of the loop, it was a match! */
672 if( i == 16 )
673 ret = 0;
676 g_free( pass_dec );
678 return ret;
681 /* Split commands (root-style, *not* IRC-style). Handles "quoting of"
682 white\ space in 'various ways'. Returns a NULL-terminated static
683 char** so watch out with nested use! Definitely not thread-safe. */
684 char **split_command_parts( char *command )
686 static char *cmd[IRC_MAX_ARGS+1];
687 char *s, q = 0;
688 int k;
690 memset( cmd, 0, sizeof( cmd ) );
691 cmd[0] = command;
692 k = 1;
693 for( s = command; *s && k < IRC_MAX_ARGS; s ++ )
694 if( *s == ' ' && !q )
696 *s = 0;
697 while( *++s == ' ' );
698 if( *s == '"' || *s == '\'' )
700 q = *s;
701 s ++;
703 if( *s )
705 cmd[k++] = s;
706 s --;
708 else
710 break;
713 else if( *s == '\\' && ( ( !q && s[1] ) || ( q && q == s[1] ) ) )
715 char *cpy;
717 for( cpy = s; *cpy; cpy ++ )
718 cpy[0] = cpy[1];
720 else if( *s == q )
722 q = *s = 0;
725 /* Full zero-padding for easier argc checking. */
726 while( k <= IRC_MAX_ARGS )
727 cmd[k++] = NULL;
729 return cmd;
732 char *get_rfc822_header( char *text, char *header, int len )
734 int hlen = strlen( header ), i;
735 char *ret;
737 if( text == NULL )
738 return NULL;
740 if( len == 0 )
741 len = strlen( text );
743 i = 0;
744 while( ( i + hlen ) < len )
746 /* Maybe this is a bit over-commented, but I just hate this part... */
747 if( g_strncasecmp( text + i, header, hlen ) == 0 )
749 /* Skip to the (probable) end of the header */
750 i += hlen;
752 /* Find the first non-[: \t] character */
753 while( i < len && ( text[i] == ':' || text[i] == ' ' || text[i] == '\t' ) ) i ++;
755 /* Make sure we're still inside the string */
756 if( i >= len ) return( NULL );
758 /* Save the position */
759 ret = text + i;
761 /* Search for the end of this line */
762 while( i < len && text[i] != '\r' && text[i] != '\n' ) i ++;
764 /* Make sure we're still inside the string */
765 if( i >= len ) return( NULL );
767 /* Copy the found data */
768 return( g_strndup( ret, text + i - ret ) );
771 /* This wasn't the header we were looking for, skip to the next line. */
772 while( i < len && ( text[i] != '\r' && text[i] != '\n' ) ) i ++;
773 while( i < len && ( text[i] == '\r' || text[i] == '\n' ) ) i ++;
775 /* End of headers? */
776 if( ( i >= 4 && strncmp( text + i - 4, "\r\n\r\n", 4 ) == 0 ) ||
777 ( i >= 2 && ( strncmp( text + i - 2, "\n\n", 2 ) == 0 ||
778 strncmp( text + i - 2, "\r\r", 2 ) == 0 ) ) )
780 break;
784 return NULL;