Augmenter: hook Augmenters into DataStore and enable augmenting for some Importers
[nonametv.git] / lib / NonameTV / DataStore.pm
blob18d13ea5f27c41175dd31561a6d6f3d8d40e6125
1 package NonameTV::DataStore;
3 use strict;
5 use NonameTV qw/FixProgrammeData/;
6 use NonameTV::Augmenter;
7 use NonameTV::Log qw/d p w f/;
8 use SQLAbstraction::mysql;
10 use Carp qw/confess/;
11 use DateTime::Format::Strptime;
13 use Storable qw/dclone/;
14 use Encode qw/decode_utf8/;
16 use utf8;
18 =head1 NAME
20 NonameTV::DataStore
22 =head1 DESCRIPTION
24 Interface to the datastore for NonameTV. The datastore is normally
25 an SQL database, but the interface for this class makes no
26 assumption about it.
28 =head1 METHODS
30 =over 4
32 =cut
34 =item new
36 The constructor for the object. Called with a hashref as the only parameter.
37 This is a ref to the configuration for the object from the nonametv.conf-
38 file.
40 The configuration must contain the following keys:
42 type
44 "MySQL" is currently the only allowed type.
46 dbhost, dbname, username, password
48 Specifies how to connect to the MySQL database.
50 =cut
52 sub new {
53 my $class = ref( $_[0] ) || $_[0];
55 my $self = {};
56 bless $self, $class;
58 # Copy the parameters supplied in the constructor.
59 foreach my $key ( keys( %{ $_[1] } ) ) {
60 $self->{$key} = ( $_[1] )->{$key};
63 defined( $self->{type} ) and $self->{type} eq "MySQL"
64 or die "type must be MySQL: $self->{type}";
66 defined( $self->{dbhost} ) or die "You must specify dbhost";
67 defined( $self->{dbname} ) or die "You must specify dbname";
68 defined( $self->{username} ) or die "You must specify username";
69 defined( $self->{password} ) or die "You must specify password";
71 $self->{sa} = SQLAbstraction::mysql->new(
73 dbhost => $self->{dbhost},
74 dbname => $self->{dbname},
75 dbuser => $self->{username},
76 dbpassword => $self->{password},
80 $self->{sa}->Connect();
82 $self->{SILENCE_END_START_OVERLAP} = 0;
83 $self->{SILENCE_DUPLICATE_SKIP} = 0;
85 return $self;
88 =item Creating a new batch
90 To create a new batch or replace an old batch completely,
91 do the following steps:
93 StartBatch( $batch_id );
94 AddProgramme( ... );
95 AddProgramme( ... );
96 ...
97 EndBatch( $success, $message );
99 =item StartBatch
101 Called by an importer to signal the start of a batch of updates.
102 Takes a single parameter containing a string that uniquely identifies
103 a set of programmes.
105 =cut
107 sub StartBatch {
108 my ( $self, $batchname ) = @_;
110 confess("Nested calls to StartBatch")
111 if ( defined( $self->{currbatch} ) );
113 my $id = $self->{sa}->Lookup( 'batches', { name => $batchname }, 'id' );
115 if ( defined($id) ) {
116 $self->{sa}->DoSql("START TRANSACTION");
117 $self->{sa}->Delete( 'programs', { batch_id => $id } );
119 else {
120 $id = $self->{sa}->Add( 'batches', { name => $batchname } );
121 $self->{sa}->DoSql("START TRANSACTION");
124 $self->{last_start} = "1970-01-01 00:00:00";
125 $self->{last_prog} = undef;
127 $self->SetBatch( $id, $batchname );
130 # Hidden method used internally and by DataStore::Updater.
131 sub SetBatch {
132 my $self = shift;
133 my ( $id, $batchname ) = @_;
135 $self->{currbatch} = $id;
136 $self->{currbatchname} = $batchname;
137 $self->{batcherror} = 0;
140 # Hidden method used internally and by DataStore::Updater.
141 sub ClearBatch {
142 my $self = shift;
144 delete $self->{currbatch};
145 delete $self->{currbatchname};
146 delete $self->{batcherror};
149 =item CommitPrograms
151 CommitPrograms is a dummy method in this class. It only actually does
152 something in NonameTV::DataStore::Helper.
154 =cut
156 sub CommitPrograms {
157 my $self = shift;
160 =item EndBatch
162 Called by an importer to signal the end of a batch of updates.
163 Takes two parameters:
165 An integer containing 1 if the batch was processed
166 successfully, 0 if the batch failed and the database should
167 be rolled back to the contents as they were before StartBatch was called.
168 and -1 if the batch should be rolled back because it has not changed.
170 A string containing a log-message to add to the batchrecord. If success==1,
171 then the log-message is stored in the field 'message'. If success==0, then
172 the log-message is stored in abort_message. If success==-1, the log message
173 is not stored. The log-message can be undef.
175 =cut
177 sub EndBatch {
178 my ( $self, $success, $log ) = @_;
180 confess("EndBatch called without StartBatch")
181 unless defined( $self->{currbatch} );
183 $log = "" if not defined $log;
185 $self->AddLastProgramme(undef);
187 if ( $success == 0 or $self->{batcherror} ) {
188 $self->{sa}->DoSql("Rollback");
189 d "Rolling back changes";
191 if ( defined($log) ) {
192 $self->SetBatchAbortMessage( $self->{currbatch}, $log );
195 elsif ( $success == 1 ) {
196 $self->{sa}->Update(
197 'batches',
198 { id => $self->{currbatch} },
200 last_update => time(),
201 message => $log,
202 abort_message => "",
206 if( exists( $self->{augment} ) ){
207 if( $self->{augment} == 1 ){
208 my $augmenter = NonameTV::Augmenter->new( $self );
209 $augmenter->AugmentBatch( $self->{currbatchname} );
213 $self->{sa}->DoSql("Commit");
215 elsif ( $success == -1 ) {
216 $self->{sa}->DoSql("Rollback");
218 else {
219 confess("Wrong value for success");
222 delete $self->{currbatch};
225 sub SetBatchAbortMessage {
226 my $self = shift;
227 my ( $batch, $message ) = @_;
229 $self->{sa}
230 ->Update( 'batches', { id => $batch }, { abort_message => $message } );
233 =item AddProgramme
235 Called by an importer to add a programme for the current batch.
236 Takes a single parameter contining a hashref with information
237 about the programme.
239 $ds->AddProgramme( {
240 channel_id => 1,
241 start_time => "2004-12-24 14:00:00",
242 end_time => "2004-12-24 15:00:00", # Optional
243 title => "Kalle Anka och hans vänner",
244 subtitle => "Episode title" # Optional
245 description => "Traditionsenligt julfirande",
246 episode => "0 . 12/13 . 0/3", # Season, episode and part as xmltv_ns
247 # Optional
248 category => [ "sport" ], # Optional
249 } );
251 The times must be in UTC. The strings must be properly encoded perl-strings.
253 To specify a period of no programmes, either set the end_time of the last
254 programme explicitly, or add a special program like this:
256 $ds->AddProgramme( {
257 channel_id => 1,
258 start_time => "2004-12-24 23:00:00",
259 title => "end-of-transmission",
260 } );
263 =cut
265 sub AddProgramme {
266 my ( $self, $data ) = @_;
268 confess("You must call StartBatch before AddProgramme")
269 unless exists $self->{currbatch};
271 confess(
272 "Required item channel_id missing"
273 ) if not defined( $data->{channel_id} );
275 return if $self->{batcherror};
277 if ( ( $data->{start_time} eq $self->{last_start} )
278 and ( $data->{title} = $self->{last_title} ) )
280 w "Skipping duplicate entry for $data->{start_time}"
281 unless $self->{SILENCE_DUPLICATE_SKIP};
282 return;
284 elsif ( $data->{start_time} le $self->{last_start} ) {
285 w "Starttime must be later than last starttime: "
286 . $self->{last_start} . " -> "
287 . $data->{start_time} . ": "
288 . $data->{title};
289 return;
292 my $first_programme = defined $self->{last_prog} ? 0 : 1;
294 $self->AddLastProgramme( $data->{start_time} );
296 $self->{last_start} = $data->{start_time};
297 $self->{last_title} = $data->{title};
299 if ( $data->{title} eq 'end-of-transmission' and not $first_programme ) {
301 # If this is not the first programme in the batch, then
302 # we have already added all the necessary info with the call to
303 # AddLastProgramme. Do not add an explicit entry for end-of-transmission
304 # since this might collide with the start of tomorrows shows.
305 return;
308 if ( exists( $data->{end_time} ) ) {
309 if ( $data->{start_time} ge $data->{end_time} ) {
310 w "Stoptime must be later than starttime: "
311 . $data->{start_time} . " -> "
312 . $data->{end_time} . ": "
313 . $data->{title};
314 return;
318 FixProgrammeData($data);
320 $self->{last_prog} = dclone($data);
323 sub AddLastProgramme {
324 my $self = shift;
325 my ($nextstart) = @_;
327 my $data = $self->{last_prog};
328 return if not defined $data;
330 if ( defined($nextstart) ) {
331 if ( defined( $data->{end_time} ) ) {
332 if ( $nextstart lt $data->{end_time} ) {
333 w "Starttime must be later than or equal to last endtime: "
334 . $data->{end_time} . " -> "
335 . $nextstart
336 unless $self->{SILENCE_END_START_OVERLAP};
338 $data->{end_time} = $nextstart;
341 else {
342 $data->{end_time} = $nextstart;
346 $self->AddProgrammeRaw($data);
347 $self->{last_prog} = undef;
350 =item AddProgrammeRaw
352 Same as AddProgramme but does not check for overlapping programmes or
353 require that the programmes are added in order.
355 =cut
357 sub AddProgrammeRaw {
358 my ( $self, $data ) = @_;
360 die("You must call StartBatch before AddProgramme")
361 unless exists $self->{currbatch};
363 return if $self->{batcherror};
365 if ( $data->{title} !~ /\S/ ) {
366 w "Empty title at " . $data->{start_time};
367 $data->{title} = "end-of-transmission";
370 $data->{batch_id} = $self->{currbatch};
372 if ( not defined( $data->{category} ) ) {
373 delete( $data->{category} );
376 if ( not defined( $data->{program_type} ) ) {
377 delete( $data->{program_type} );
380 if ( exists( $data->{description} ) and defined( $data->{description} ) ) {
382 # Strip leading and trailing whitespace from description.
383 $data->{description} =~ s/^\s+//;
384 $data->{description} =~ s/\s+$//;
387 if ( $self->{sa}->Add( 'programs', $data, 0 ) == -1 ) {
388 my $err = $self->{dbh_errstr};
390 # Check for common error-conditions
391 my $data_org = $self->{sa}->Lookup(
392 "programs",
394 channel_id => $data->{channel_id},
395 start_time => $data->{start_time}
399 if ( defined($data_org) ) {
400 if ( $data_org->{title} eq "end-of-transmission" ) {
401 w "Replacing end-of-transmission "
402 . "for $data->{channel_id}-$data->{start_time}";
404 $self->{sa}->Delete(
405 "programs",
407 channel_id => $data->{channel_id},
408 start_time => $data->{start_time}
412 if ( $self->{sa}->Add( 'programs', $data, 0 ) == -1 ) {
413 w $self->{dbh_errstr};
414 $self->{batcherror} = 1;
417 elsif ( $data_org->{title} eq $data->{title} ) {
418 w "Skipping duplicate entry "
419 . "for $data->{channel_id}-$data->{start_time}"
420 unless $self->{SILENCE_DUPLICATE_SKIP};
422 else {
423 w "Duplicate programs "
424 . $data->{start_time} . ": '"
425 . $data->{title} . "', '"
426 . $data_org->{title}
427 . "'";
428 $self->{batcherror} = 1;
431 else {
432 if ($err) {
433 w $err;
434 } else {
435 w "FIXME error but empty err variable! (might be field in ce that is not in the programs table)";
437 $self->{batcherror} = 1;
442 =item ClearChannel
444 Delete all programs for a channel. Takes one parameter, the channel id
445 for the channel in question.
447 Returns the number of programs that were deleted.
449 =cut
451 sub ClearChannel {
452 my $self = shift;
453 my ($chid) = @_;
455 my $deleted = $self->{sa}->Delete( 'programs', { channel_id => $chid } );
457 $self->{sa}->DoSql( "delete from batches where name like '${chid}_%'", [] );
458 $self->{sa}->Delete( 'files', { channelid => $chid } );
460 return $deleted;
463 =item FindGrabberChannels
465 Returns an array with all channels associated with a specific channel.
466 Each channel is described by a hashref with keys matching the database.
468 Takes one parameter: the name of the grabber.
469 The result is ordered by xmltvid.
471 =cut
473 sub FindGrabberChannels {
474 my $self = shift;
475 my ($grabber) = @_;
477 my @result;
479 return $self->{sa}->LookupMany( 'channels', { grabber => $grabber },
480 [ 'xmltvid' ] );
483 =item LookupCat
485 Lookup a category found in an infile and translate it to
486 a proper program_type and category for use in AddProgramme.
488 my( $pty, $cat ) = $ds->LookupCat( 'Viasat', 'MUSIK' );
489 $ds->AddProgramme( { ..., category => $cat, program_type => $pty } );
491 =cut
493 sub LookupCat {
494 my $self = shift;
495 my ( $type, $org ) = @_;
497 return ( undef, undef ) if ( not defined($org) ) or ( $org !~ /\S/ );
499 $org =~ s/^\s+//;
500 $org =~ s/\s+$//;
502 # I should be using locales, but I don't dare turn them on.
503 $org = lc($org);
504 $org =~ tr/ÅÄÖ/åäö/;
506 # The field has room for 50 characters. Unicode may occupy
507 # several bytes with one character.
508 # Treat all categories with the same X character prefix
509 # as equal.
510 $org = substr( $org, 0, 44 );
512 $self->LoadCategories()
513 if not exists( $self->{categories} );
515 if ( not exists( $self->{categories}->{"$type++$org"} ) ) {
517 # MySQL considers some characters as equal, e.g. e and é.
518 # Trying to insert both anime and animé will give an error-message
519 # from MySql. Therefore, I try to lookup the new entry before adding
520 # it to see if MySQL thinks it already exists. I should probably
521 # normalize the strings before inserting them instead...
522 my $data =
523 $self->{sa}->Lookup( "trans_cat", { type => $type, original => $org } );
524 if ( defined($data) ) {
525 $self->{categories}->{ $type . "++" . $org } =
526 [ $data->{program_type}, $data->{category} ];
528 else {
529 $self->AddCategory( $type, $org );
533 if ( defined( $self->{categories}->{"$type++$org"} ) ) {
534 return @{ ( $self->{categories}->{"$type++$org"} ) };
536 else {
537 return ( undef, undef );
542 =item Reset
544 Reset the datastore-object to its initial state. This method can be called
545 between imports to make sure that errors from one import does not affect
546 the next import.
548 =cut
550 sub Reset {
551 my $self = shift;
553 if ( defined( $self->{currbatch} ) ) {
554 $self->EndBatch(0);
558 =item StartTransaction
560 Start a new datastore transaction. Can be used to wrap a set of datastore
561 operations into a single transaction that can either be committed or
562 reverted.
564 $ds->StartTransaction();
565 # Do stuff to the datastore
566 $ds->EndTransaction(1); # Commit the changes.
568 =cut
570 sub StartTransaction {
571 my $self = shift;
573 $self->{sa}->DoSql("START TRANSACTION");
576 =item EndTransaction
578 End a datastore transaction. Takes a boolean parameter that decides if
579 the transaction shall be committed (true) or reverted (false).
581 =cut
583 sub EndTransaction {
584 my $self = shift;
585 my ($commit) = @_;
587 if ($commit) {
588 $self->{sa}->DoSql("COMMIT");
590 else {
591 $self->{sa}->DoSql("ROLLBACK");
595 sub LoadCategories {
596 my $self = shift;
598 my $d = {};
600 my $sth = $self->{sa}->Iterate( 'trans_cat', {} );
601 if ( not defined($sth) ) {
602 $self->{categories} = {};
603 w "No categories found in database.";
604 return;
607 while ( my $data = $sth->fetchrow_hashref() ) {
608 $d->{ $data->{type} . "++" . $data->{original} } =
609 [ $data->{program_type}, $data->{category} ];
611 $sth->finish();
613 $self->{categories} = $d;
616 sub AddCategory {
617 my $self = shift;
618 my ( $type, $org ) = @_;
620 $self->{sa}->Add(
621 'trans_cat',
623 type => $type,
624 original => $org
627 $self->{categories}->{"$type++$org"} = [ undef, undef ];
630 =item sa
632 Returns the SQLAbstraction object to give direct access to the database.
634 =cut
636 sub sa {
637 my $self = shift;
639 return $self->{sa};
642 =item ParsePrograms
644 Replacement for fetching our own export and parsing with ParseXmltv.
645 Returns an array of hashrefs just like ParseXmltv.
646 Parameter is a batch name. (file name of the export without .xml.gz)
648 =cut
650 sub ParsePrograms {
651 my $self;
652 my $res;
653 my $sth;
655 my $parser = DateTime::Format::Strptime->new( pattern => '%Y-%m-%d %H:%M:%S' );
657 # replacement for ParseXmltv on our own Export
658 $self = shift;
659 my $batch_id = shift;
660 my( $xmltv_id, $date ) = ($batch_id =~ m|^(.*)_([-\d]+)$|);
661 my $next_date = $parser->parse_datetime( $date . ' 00:00:00' )->add( days => 1 )->ymd('-');
663 my $channel = $self->sa->Lookup( 'channels', { xmltvid => $xmltv_id } );
664 my $def_cat;
665 my $def_pty;
666 if( $channel ){
667 $def_cat = $channel->{def_cat};
668 $def_pty = $channel->{def_pty};
671 ( $res, $sth ) = $self->sa->Sql( "
672 SELECT p.* from programs p, channels c
673 WHERE (c.xmltvid = ?)
674 and (p.channel_id = c.id)
675 and (p.start_time >= ?)
676 and (p.start_time <= ?)
677 ORDER BY start_time asc, end_time desc",
678 [$xmltv_id, $date . ' 00:00:00', $next_date . ' 23:59:59'] );
680 my @result;
682 my $done;
683 my $ce = $sth->fetchrow_hashref();
684 if( !defined( $ce ) ) {
685 return undef;
687 while( my $next_ce = $sth->fetchrow_hashref() ) {
688 # Break loop once we have got the whole day?
689 if( $ce->{start_time} gt $date . ' 23:59:59' ) {
690 $done = 1;
691 last;
693 if( $ce->{aspect} eq 'unknown' ) {
694 delete $ce->{aspect};
696 foreach my $key (keys %$ce) {
697 if( !defined( $ce->{$key} )) {
698 delete $ce->{$key};
699 } elsif( $ce->{$key} eq '' ){
700 delete $ce->{$key};
701 } elsif( $ce->{$key} eq '0000-00-00 00:00:00') {
702 delete $ce->{$key};
706 $ce->{start_dt} = $parser->parse_datetime( $ce->{start_time} );
707 delete $ce->{start_time};
709 if( exists( $ce->{end_time} )) {
710 $ce->{stop_dt} = $parser->parse_datetime( $ce->{end_time} );
711 delete $ce->{end_time};
712 } else {
713 $ce->{stop_dt} = $parser->parse_datetime( $next_ce->{start_time} );
716 if( !defined( $ce->{category} ) && $def_cat){
717 $ce->{category} = $def_cat;
719 if( !defined( $ce->{program_type} ) && $def_pty){
720 $ce->{program_type} = $def_pty;
723 push (@result, $ce);
724 $ce = $next_ce;
727 return \@result;
730 =back
732 =head1 COPYRIGHT
734 Copyright (C) 2006 Mattias Holmlund.
736 =cut
740 ### Setup coding system
741 ## Local Variables:
742 ## coding: utf-8
743 ## End: