New importer, Nat. Geo. Wild.
[nonametv.git] / tools / se / nonametv-parse-maildir
blobde269f29c409497e74be6babf26529aa91434a6e
1 #!/usr/bin/perl -w
3 use strict;
5 use FindBin;
6 use lib "$FindBin::Bin/../../lib";
8 use Mail::Box;
9 use Mail::Box::Manager;
11 use File::Temp qw/tempdir/;
12 use File::Copy qw/move copy/;
13 use File::Basename;
15 use Getopt::Long;
16 use Digest::MD5;
18 use Encode qw/decode/;
20 use NonameTV;
21 use NonameTV::DataStore;
22 use NonameTV::Factory qw/CreateFileStore CreateDataStore/;
23 use NonameTV::Log qw/d p w f SetVerbosity StartLogSection EndLogSection/;
25 my $opt = { manual => 0,
26 test => 0,
27 verbose => 0,
28 quiet => 0,
31 my $res = GetOptions( $opt, qw/manual test verbose/ );
33 $opt->{verbose} = 1 if $opt->{manual};
35 SetVerbosity( $opt->{verbose}, $opt->{quiet} );
37 if( not $res ) {
38 print << 'EOHELP';
39 nonametv-process-maildir [--manual] [--test] [--verbose]
41 Extract files from mails in a maildir and store them using
42 NonameTV::FileStore.
44 --manual Ask the user which Importer to use if the information
45 cannot be derived from the message.
47 --test Run the importer in test-mode.
49 EOHELP
51 exit 1;
54 my $ds = CreateDataStore();
56 my $root = "/home/mattias/Maildir/.Projects";
57 my @folders = qw(xmltv-data
58 NonameTV-Data.Disney
59 NonameTV-Data.Kanal9
60 NonameTV-Data.NationalGeographic
61 NonameTV-Data.KanalLokal
62 NonameTV-Data.Axess
65 # Flag messages AFTER they have been processed. Then I normally
66 # only have to move the message to the correct folder.
68 my $tmpdir = tempdir( CLEANUP => 1 );
70 my $mgr = Mail::Box::Manager->new;
72 StartLogSection( "nonametv-parse-maildir", 0 );
74 foreach my $dir (@folders) {
75 my $folder = $mgr->open( "$root.$dir",
76 access => 'rw',
77 extract => 'ALWAYS' );
79 foreach my $message ($folder->messages) {
80 next if $message->isDeleted();
81 next if $message->label( 'flagged' );
82 next unless $message->isMultipart;
84 # Use Encode::MIME::Header
85 my $subject = decode( 'MIME-Header', $message->get( 'Subject' ) ) || '';
86 my $from = $message->get( 'From' );
88 d "Processing $from - $subject";
90 if( process_parts( $message, $dir, $from, $subject ) ) {
91 $message->label( 'flagged' => 1 );
92 d "Processing succeeded.";
94 else {
95 d "Processing failed.";
100 $mgr->closeAllFolders;
102 EndLogSection( "nonametv-parse-maildir" );
104 sub process_parts
106 my( $part, $dir, $from, $subject ) = @_;
108 my $type = $part->get('Content-Type');
109 return 0 if $type eq "text/plain";
111 if($part->isMultipart)
113 my $count = 0;
114 foreach my $child ($part->parts)
116 $count += process_parts( $child, $dir, $from, $subject );
118 return $count;
120 else
122 my $disp = $part->body->disposition;
123 my $name = decode( 'MIME-Header',
124 $part->body->dispositionFilename ) || 'noname';
125 my $size = $part->body->size;
127 my $channel = guess_channel( $dir, $from, $subject, $name, $type, $size );
129 if( (not defined $channel) and $opt->{manual} )
131 print "$name ($size bytes): ";
132 $channel = <STDIN>;
133 $channel =~ tr/\n\r //d;
134 $channel = undef if $channel eq "";
137 if( not defined $channel ) {
138 d "$name skipped.";
139 return 0;
142 my $chd = $ds->sa->Lookup( 'channels', {xmltvid => $channel} );
144 die "Unknown channel $channel" if not defined $chd;
146 p "Adding file $name for $channel";
148 my $fs = GetFileStore( $chd->{grabber} );
150 my $content = $part->decoded->string;
151 $fs->AddFile( $channel, $name, \$content );
154 return $opt->{test} ? 0 : 1;
157 sub guess_channel
159 my( $dir, $from, $subject, $filename, $type, $size ) = @_;
161 if( $dir =~ /Expressen/ and $filename =~ /\.doc$/i ) {
162 return "sport.expressen.se";
165 if( $dir =~ /Disney/ ) {
166 # The Disney filenames are very unreliable.
167 if( $filename =~ /\bxd/i ) {
168 return "xd.disneychannel.se";
170 if( $filename =~ /\bdxd/i ) {
171 return "xd.disneychannel.se";
173 elsif( $filename =~ /playhouse/i ) {
174 return "playhouse.disneychannel.se";
176 elsif( $filename =~ /disney\s*channel/i ) {
177 return "disneychannel.se";
179 elsif( $filename eq "noname" ) {
180 return undef;
182 elsif( $subject =~ /toon\s*disney/i ) {
183 return "toon.disneychannel.se";
185 elsif( $subject =~ /playhouse/i ) {
186 return "playhouse.disneychannel.se";
188 elsif( $subject =~ /disney\s*channel/i ) {
189 return "disneychannel.se";
192 return undef;
196 if( $dir =~ /Aftonbladet/ ) {
197 if( $type eq "application/msword" or $size > 10000 ) {
198 return "tv7.aftonbladet.se";
202 if( $dir =~ /Kanal9/ and $filename =~ /\.doc$/i ) {
203 return "kanal9.se";
206 if( $dir =~ /NationalGeographic/ and $filename =~ /\.doc$/i ) {
207 return "hd.ngcsverige.com" if $filename =~ /^ng\s*swe\s*hd\b/i;
208 return "ngcsverige.com" if $filename =~ /^ng\s*swe\b/i;
211 if( $dir =~ /KanalLokal/ and $filename =~ /\.xml$/i ) {
212 if( $filename =~ /^1 STO/ ) {
213 return "stockholm.kanallokal.se";
215 elsif( $filename =~ /^2 OST/ ) {
216 return "ostergotland.kanallokal.se";
218 elsif( $filename =~ /^3 SKA/ ) {
219 return "skane.kanallokal.se";
221 elsif( $filename =~ /^4 GBG/ ) {
222 return "goteborg.kanallokal.se";
226 if( $dir =~ /Axess/ and $filename =~ /\.doc$/i ) {
227 return "axess.se";
230 return undef;
233 my $filestores = {};
235 sub GetFileStore {
236 my( $importer ) = @_;
238 if( not defined $filestores->{$importer} ) {
239 $filestores->{$importer} = CreateFileStore( $importer );
242 return $filestores->{$importer}
245 sub md5sum {
246 my( $file ) = @_;
247 open(FILE, $file) or die "Can't open '$file': $!";
248 binmode(FILE);
250 return Digest::MD5->new->addfile(*FILE)->hexdigest;