2 * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client
3 * Copyright (C) 2002-2012 Match Grun and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 * Functions for an E-Mail address harvester.
26 #include "claws-features.h"
34 #include "proctypes.h"
37 #include "addrharvest.h"
40 #ifdef USE_ALT_ADDRBOOK
41 #include "addressbook-dbus.h"
43 #include "file-utils.h"
45 /* Mail header names of interest */
46 static gchar
*_headerFrom_
= HEADER_FROM
;
47 static gchar
*_headerReplyTo_
= HEADER_REPLY_TO
;
48 static gchar
*_headerSender_
= HEADER_SENDER
;
49 static gchar
*_headerErrorsTo_
= HEADER_ERRORS_TO
;
50 static gchar
*_headerCC_
= HEADER_CC
;
51 static gchar
*_headerTo_
= HEADER_TO
;
53 #define ADDR_BUFFSIZE 1024
54 #define MSG_BUFFSIZE 2048
55 #define MSGNUM_BUFFSIZE 32
56 #define DFL_FOLDER_SIZE 20
58 /* Noise strings included by some other E-Mail clients */
59 #define REM_NAME_STRING "(Email)"
60 #define REM_NAME_STRING2 "(Email 2)"
62 /* Directories to ignore */
63 #define DIR_IGNORE ".\t.."
75 #ifdef USE_ALT_ADDRBOOK
84 * Build header table entry.
85 * Enter: harvester Harvester object.
88 static void addrharvest_build_entry(
89 AddressHarvester
* harvester
, gchar
*name
)
93 entry
= g_new0( HeaderEntry
, 1 );
95 entry
->selected
= FALSE
;
98 harvester
->headerTable
= g_list_append( harvester
->headerTable
, entry
);
104 static gint
addrharvest_free_table_vis( gpointer key
, gpointer value
, gpointer data
) {
114 static void addrharvest_free_table( AddressHarvester
* harvester
) {
118 /* Free header list */
119 node
= harvester
->headerTable
;
121 entry
= ( HeaderEntry
* ) node
->data
;
122 entry
->header
= NULL
;
123 entry
->selected
= FALSE
;
124 entry
->folder
= NULL
;
127 node
= g_list_next( node
);
129 g_list_free( harvester
->headerTable
);
130 harvester
->headerTable
= NULL
;
132 /* Free duplicate table */
133 g_hash_table_foreach_remove( harvester
->dupTable
, addrharvest_free_table_vis
, NULL
);
134 g_hash_table_destroy( harvester
->dupTable
);
135 harvester
->dupTable
= NULL
;
142 AddressHarvester
*addrharvest_create( void ) {
143 AddressHarvester
*harvester
;
145 harvester
= g_new0( AddressHarvester
, 1 );
146 harvester
->path
= NULL
;
147 harvester
->dupTable
= g_hash_table_new( g_str_hash
, g_str_equal
);
148 harvester
->folderSize
= DFL_FOLDER_SIZE
;
149 harvester
->retVal
= MGU_SUCCESS
;
151 /* Build header table */
152 harvester
->headerTable
= NULL
;
153 addrharvest_build_entry( harvester
, _headerFrom_
);
154 addrharvest_build_entry( harvester
, _headerReplyTo_
);
155 addrharvest_build_entry( harvester
, _headerSender_
);
156 addrharvest_build_entry( harvester
, _headerErrorsTo_
);
157 addrharvest_build_entry( harvester
, _headerCC_
);
158 addrharvest_build_entry( harvester
, _headerTo_
);
167 * Specify path to folder that will be harvested.
168 * Entry: harvester Harvester object.
169 * value Full directory path.
171 void addrharvest_set_path( AddressHarvester
* harvester
, const gchar
*value
) {
172 cm_return_if_fail( harvester
!= NULL
);
173 harvester
->path
= mgu_replace_string( harvester
->path
, value
);
174 g_strstrip( harvester
->path
);
178 * Specify maximum folder size.
179 * Entry: harvester Harvester object.
182 void addrharvest_set_folder_size(
183 AddressHarvester
* harvester
, const gint value
)
185 cm_return_if_fail( harvester
!= NULL
);
187 harvester
->folderSize
= value
;
192 * Specify folder recursion.
193 * Entry: harvester Harvester object.
194 * value TRUE to process sub-folders, FALSE to process folder only.
196 void addrharvest_set_recurse(
197 AddressHarvester
* harvester
, const gboolean value
)
199 cm_return_if_fail( harvester
!= NULL
);
200 harvester
->folderRecurse
= value
;
204 * Search (case insensitive) for header entry with specified name.
205 * Enter: harvester Harvester.
207 * Return: Header, or NULL if not found.
209 static HeaderEntry
*addrharvest_find(
210 AddressHarvester
* harvester
, const gchar
*name
) {
215 node
= harvester
->headerTable
;
220 if (g_ascii_strncasecmp(entry
->header
, name
,
221 strlen(entry
->header
)) == 0 ) {
225 node
= g_list_next( node
);
231 * Set selection for specified heaader.
232 * Enter: harvester Harvester.
234 * value Value to set.
236 void addrharvest_set_header(
237 AddressHarvester
* harvester
, const gchar
*name
, const gboolean value
)
241 cm_return_if_fail( harvester
!= NULL
);
242 entry
= addrharvest_find( harvester
, name
);
243 if( entry
!= NULL
) {
244 entry
->selected
= value
;
250 * Enter: harvester Harvester.
252 * Return: Address count, or -1 if header not found.
254 gint
addrharvest_get_count( AddressHarvester
* harvester
, const gchar
*name
) {
259 cm_return_val_if_fail( harvester
!= NULL
, count
);
260 entry
= addrharvest_find( harvester
, name
);
261 if( entry
!= NULL
) {
262 count
= entry
->count
;
268 * Free up object by releasing internal memory.
269 * Enter: harvester Harvester.
271 void addrharvest_free( AddressHarvester
*harvester
) {
272 cm_return_if_fail( harvester
!= NULL
);
274 /* Free internal stuff */
275 addrharvest_free_table( harvester
);
276 g_free( harvester
->path
);
279 harvester
->path
= NULL
;
280 harvester
->retVal
= MGU_SUCCESS
;
281 harvester
->headerTable
= NULL
;
283 harvester
->folderSize
= 0;
285 /* Now release object */
289 #ifdef USE_ALT_ADDRBOOK
290 static gchar
* get_namepart(const gchar
* name
, Namepart namepart
) {
291 gchar
*pos
, *part
= NULL
;
292 gchar
*token
= g_strdup(name
);
294 pos
= g_strrstr(token
, " ");
295 if (namepart
== FIRST
) {
298 part
= g_strdup(token
);
304 part
= g_strdup(token
);
307 part
= g_strdup(pos
);
316 * Insert address into cache.
317 * Enter: harvester Harvester object.
318 * entry Header object.
319 * cache Address cache to load.
321 * address eMail address.
323 static void addrharvest_insert_cache(
324 AddressHarvester
*harvester
, HeaderEntry
*entry
,
325 AddressCache
*cache
, const gchar
*name
,
326 const gchar
*address
)
328 #ifndef USE_ALT_ADDRBOOK
337 folder
= entry
->folder
;
338 if( folder
== NULL
) {
339 newFolder
= TRUE
; /* No folder yet */
341 if( entry
->count
% harvester
->folderSize
== 0 ) {
342 newFolder
= TRUE
; /* Folder is full */
345 ContactEntry
* person
;
350 key
= g_utf8_strdown( address
, -1 );
351 person
= g_hash_table_lookup( harvester
->dupTable
, key
);
352 #ifndef USE_ALT_ADDRBOOK
354 /* Update existing person to use longest name */
355 value
= ADDRITEM_NAME(person
);
356 if( strlen( name
) > strlen( value
) ) {
357 addritem_person_set_common_name( person
, name
);
362 /* Folder if required */
364 cnt
= 1 + ( entry
->count
/ harvester
->folderSize
);
365 folderName
=g_strdup_printf( "%s (%d)",
366 entry
->header
, cnt
);
367 folder
= addritem_create_item_folder();
368 addritem_folder_set_name( folder
, folderName
);
369 addritem_folder_set_remarks( folder
, "" );
370 addrcache_id_folder( cache
, folder
);
371 addrcache_add_folder( cache
, folder
);
372 entry
->folder
= folder
;
373 g_free( folderName
);
377 person
= addrcache_add_contact(
378 cache
, folder
, name
, address
, "" );
379 g_hash_table_insert( harvester
->dupTable
, key
, person
);
382 addritem_parse_first_last( person
);
385 person
= g_new0(ContactEntry
, 1);
386 person
->first_name
= get_namepart(name
, FIRST
);
387 person
->last_name
= get_namepart(name
, LAST
);
388 person
->email
= g_strdup(address
);
389 g_hash_table_insert(harvester
->dupTable
, key
, person
);
396 * Remove specified string from name.
398 * str String to remove.
400 static void addrharvest_del_email( gchar
*name
, gchar
*str
) {
404 lenr
= strlen( str
);
405 while((p
= strcasestr( name
, str
)) != NULL
) {
407 memmove( p
, p
+ lenr
, lenn
);
412 * Find position of at (@) character in buffer.
413 * Enter: buffer Start of buffer.
414 * Return: Position of at character, or NULL if not found.
415 * Note: This function searches for the last occurrence of an 'at' character
416 * prior to a valid delimiter character for the end of address. This enables
417 * an address to be found where it is also used as the name of the
418 * recipient. For example:
419 * "axle.rose@netscape.com" <axle.rose@netscape.com>
420 * The last occurrence of the at character is detected.
422 static gchar
*addrharvest_find_at( const gchar
*buffer
) {
426 atCh
= strchr( buffer
, '@' );
428 /* Search forward for another one */
451 * Find start and end of address string.
452 * Enter: buf Start address of buffer to process (not modified).
453 * atp Pointer to email at (@) character.
454 * bp Pointer to start of email address (returned).
455 * ep Pointer to end of email address (returned).
457 static void addrharvest_find_address(
458 const gchar
*buf
, const gchar
*atp
, const gchar
**bp
,
463 /* Find first non-separator char */
467 if( strchr( ",; \n\r", *p
) == NULL
) break;
472 /* Search forward for end of address */
476 if( strchr( ",;", *p
) ) break;
483 * Extract E-Mail address from buffer. If found, address is removed from
485 * Enter: buffer Address buffer.
486 * Return: E-Mail address, or NULL if none found. Must g_free() when done.
488 static gchar
*addrharvest_extract_address( gchar
*buffer
) {
490 gchar
*atCh
, *p
, *bp
, *ep
;
494 atCh
= addrharvest_find_at( buffer
);
496 /* Search back for start of address */
499 while( p
>= buffer
) {
509 /* Search fwd for end */
517 else if( *p
== ' ' ) {
528 addr
= g_strndup( bp
, len
+ 1 );
529 memmove( bp
, ep
, len
);
538 * Parse address from header buffer creating address in cache.
539 * Enter: harvester Harvester object.
540 * entry Header object.
541 * cache Address cache to load.
542 * hdrBuf Pointer to header buffer.
544 static void addrharvest_parse_address(
545 AddressHarvester
*harvester
, HeaderEntry
*entry
,
546 AddressCache
*cache
, const gchar
*hdrBuf
)
548 gchar buffer
[ ADDR_BUFFSIZE
+ 2 ];
551 gchar
*atCh
, *email
, *name
;
554 /* Search for an address */
555 while((atCh
= addrharvest_find_at( hdrBuf
)) != NULL
) {
556 /* Find addres string */
557 addrharvest_find_address( hdrBuf
, atCh
, &bp
, &ep
);
559 /* Copy into buffer */
560 bufLen
= ( size_t ) ( ep
- bp
);
561 if( bufLen
> ADDR_BUFFSIZE
-1 ) {
562 bufLen
= ADDR_BUFFSIZE
- 1;
564 strncpy( buffer
, bp
, bufLen
);
565 buffer
[ bufLen
] = '\0';
566 buffer
[ bufLen
+ 1 ] = '\0';
567 buffer
[ bufLen
+ 2 ] = '\0';
569 /* Extract address from buffer */
570 email
= addrharvest_extract_address( buffer
);
572 /* Unescape characters */
573 mgu_str_unescape( buffer
);
575 /* Remove noise characaters */
576 addrharvest_del_email( buffer
, REM_NAME_STRING
);
577 addrharvest_del_email( buffer
, REM_NAME_STRING2
);
579 /* Remove leading trailing quotes and spaces */
580 mgu_str_ltc2space( buffer
, '\"', '\"' );
581 mgu_str_ltc2space( buffer
, '\'', '\'' );
582 mgu_str_ltc2space( buffer
, '\"', '\"' );
583 mgu_str_ltc2space( buffer
, '(', ')' );
584 g_strstrip( buffer
);
586 if( g_ascii_strcasecmp( buffer
, email
) == 0 )
589 name
= conv_unmime_header(buffer
, NULL
, TRUE
);
591 /* Insert into address book */
592 #ifndef USE_ALT_ADDRBOOK
593 addrharvest_insert_cache(
594 harvester
, entry
, cache
, name
, email
);
596 addrharvest_insert_cache(
597 harvester
, entry
, NULL
, name
, email
);
607 * Test whether buffer contains a header that appears in header list.
608 * Enter: listHdr Header list.
610 * Return: TRUE if header in list.
612 static gboolean
addrharvest_check_hdr( GList
*listHdr
, gchar
*buf
) {
615 gchar
*p
, *hdr
, *nhdr
;
619 p
= strchr( buf
, ':' );
621 len
= ( size_t ) ( p
- buf
);
622 hdr
= g_strndup( buf
, len
);
626 if (g_ascii_strncasecmp(nhdr
, hdr
, strlen(nhdr
)) == 0 ) {
630 node
= g_list_next( node
);
638 * Read header into a linked list of lines.
639 * Enter: fp File to read.
640 * listHdr List of header lines of interest.
641 * done End of headers or end of file reached.
642 * Return: Linked list of lines.
644 static GSList
*addrharvest_get_header( FILE *fp
, GList
*listHdr
, gboolean
*done
) {
646 gchar buf
[ MSG_BUFFSIZE
+ 2 ];
653 if( claws_fgets( buf
, MSG_BUFFSIZE
, fp
) == NULL
) {
658 /* Test for end of headers */
659 if( buf
[0] == '\r' || buf
[0] == '\n' ) {
664 /* Test whether required header */
665 foundHdr
= addrharvest_check_hdr( listHdr
, buf
);
667 /* Read all header lines. Only add reqd ones to list */
673 list
= g_slist_append( list
, p
);
676 /* Read first character */
678 if( ch
== ' ' || ch
== '\t' ) {
679 /* Continuation character - read into buffer */
680 if( claws_fgets( buf
, MSG_BUFFSIZE
, fp
) == NULL
) {
689 /* Push back character for next header */
700 * Read specified file into address book.
701 * Enter: harvester Harvester object.
702 * fileName File to read.
703 * cache Address cache to load.
706 static gint
addrharvest_readfile(
707 AddressHarvester
*harvester
, const gchar
*fileName
,
708 AddressCache
*cache
, GList
*listHdr
)
712 gchar
*buf
, *addr
, *p
;
717 msgFile
= claws_fopen( fileName
, "rb" );
719 /* Cannot open file */
720 retVal
= MGU_OPEN_FILE
;
726 list
= addrharvest_get_header( msgFile
, listHdr
, &done
);
733 buf
= mgu_list_coalesce( list
);
734 g_slist_free_full( list
, g_free
);
736 if(( p
= strchr( buf
, ':' ) ) != NULL
) {
740 entry
= addrharvest_find( harvester
, buf
);
741 if( entry
&& entry
->selected
) {
742 /* Sanitize control characters */
745 if( *p
== '\r' || *p
== '\n' || *p
== '\t' )
749 addrharvest_parse_address(
750 harvester
, entry
, cache
, addr
);
756 claws_fclose( msgFile
);
761 * Read all files in specified directory into address book. Directories are
762 * traversed recursively if necessary.
763 * Enter: harvester Harvester object.
764 * cache Address cache to load.
765 * msgList List of message numbers, or NULL to process folder.
766 * dir Directory to process.
768 static void addrharvest_harvest_dir(
769 AddressHarvester
*harvester
, AddressCache
*cache
, GList
*listHdr
,
775 GError
*error
= NULL
;
778 debug_print("Harvesting addresses from dir '%s'\n", dir
);
780 if( ( dp
= g_dir_open( dir
, 0, &error
) ) == NULL
) {
781 debug_print("opening '%s' failed: %d (%s)\n", dir
,
782 error
->code
, error
->message
);
787 /* Process directory */
788 while( (d
= g_dir_read_name( dp
)) != NULL
) {
789 fullname
= g_strconcat(dir
, G_DIR_SEPARATOR_S
, d
, NULL
);
790 if( g_file_test(fullname
, G_FILE_TEST_IS_DIR
) ) {
791 if( harvester
->folderRecurse
) {
792 if( strstr( DIR_IGNORE
, d
) != NULL
) {
797 addrharvest_harvest_dir(
798 harvester
, cache
, listHdr
, (gchar
*)fullname
);
801 if( g_file_test(fullname
, G_FILE_TEST_IS_REGULAR
) ) {
802 if( ( num
= to_number( d
) ) >= 0 ) {
803 addrharvest_readfile(
804 harvester
, fullname
, cache
, listHdr
);
813 * Read list of files in specified directory into address book.
814 * Enter: harvester Harvester object.
815 * cache Address cache to load.
816 * msgList List of message numbers, or NULL to process folder.
818 static void addrharvest_harvest_list(
819 AddressHarvester
*harvester
, AddressCache
*cache
, GList
*listHdr
,
826 if (!g_file_test(harvester
->path
, G_FILE_TEST_EXISTS
| G_FILE_TEST_IS_DIR
)) {
827 debug_print("'%s' doesn't exist or is not a dir\n", harvester
->path
);
831 /* Process message list */
834 num
= GPOINTER_TO_UINT( node
->data
);
835 fullname
= g_strdup_printf("%s%c%d",
836 harvester
->path
, G_DIR_SEPARATOR
, num
);
837 addrharvest_readfile( harvester
, fullname
, cache
, listHdr
);
839 node
= g_list_next( node
);
844 * ============================================================================
845 * Read all files in specified directory into address book.
846 * Enter: harvester Harvester object.
847 * cache Address cache to load.
848 * msgList List of message numbers, or NULL to process folder.
850 * ============================================================================
852 gint
addrharvest_harvest(
853 AddressHarvester
*harvester
, AddressCache
*cache
, GList
*msgList
)
859 retVal
= MGU_BAD_ARGS
;
860 cm_return_val_if_fail( harvester
!= NULL
, retVal
);
861 #ifndef USE_ALT_ADDRBOOK
862 cm_return_val_if_fail( cache
!= NULL
, retVal
);
864 cm_return_val_if_fail( harvester
->path
!= NULL
, retVal
);
866 #ifndef USE_ALT_ADDRBOOK
868 addrcache_clear( cache
);
869 cache
->dataRead
= FALSE
;
871 /* Build list of headers of interest */
873 node
= harvester
->headerTable
;
878 if( entry
->selected
) {
881 p
= g_utf8_strdown( entry
->header
, -1 );
882 listHdr
= g_list_append( listHdr
, p
);
884 node
= g_list_next( node
);
887 /* Process directory/files */
888 if( msgList
== NULL
) {
889 addrharvest_harvest_dir( harvester
, cache
, listHdr
, harvester
->path
);
892 addrharvest_harvest_list( harvester
, cache
, listHdr
, msgList
);
894 g_list_free_full( listHdr
, g_free
);
896 #ifndef USE_ALT_ADDRBOOK
898 cache
->modified
= FALSE
;
899 cache
->dataRead
= TRUE
;
905 * ============================================================================
906 * Test whether any headers have been selected for processing.
907 * Enter: harvester Harvester object.
908 * Return: TRUE if a header was selected, FALSE if none were selected.
909 * ============================================================================
911 gboolean
addrharvest_check_header( AddressHarvester
*harvester
) {
916 cm_return_val_if_fail( harvester
!= NULL
, retVal
);
918 node
= harvester
->headerTable
;
922 entry
= ( HeaderEntry
* ) node
->data
;
923 if( entry
->selected
) return TRUE
;
924 node
= g_list_next( node
);
930 * ============================================================================
932 * ============================================================================