Augmenter: hook Augmenters into DataStore and enable augmenting for some Importers
[nonametv.git] / lib / NonameTV / Augmenter.pm
blobb1904163c57730318358318cc357c417104e72d5
1 package NonameTV::Augmenter;
3 use strict;
4 use warnings;
6 use NonameTV::Factory qw/CreateAugmenter/;
7 use NonameTV::Log qw/d/;
10 # THIS IS NOT THE BASE CLASS FOR AUGMENTERS! (CONTRARY TO HOW IMPORTER.PM IS THE BASE CLASS FOR IMPORTERS)
12 # Augmenters augment programmes with additional data
14 # tools/nonametv-augment
16 # Fixups applies manually configured fixups, see tv_grab_uk_rt for use cases
17 # PreviouslyShown copies data from previous showings on the same or other channels
18 # TheTVDB replaces programme related data with community data
19 # TMDb replaces programme related data with community data
21 # The configuration is stored in one table in the database
23 # fields in the configuration
24 # channel_id - the channel id (foreign key, may be null)
25 # augmenter - name of the augmenter to run, case sensitive
26 # title - program title to match
27 # otherfield - other field to match (e.g. episodetitle, program_type, description, start_time)
28 # othervalue - value to match in otherfield
29 # remoteref - reference in foreign database, e.g. movie id in tmdb or series id in tvdb
30 # matchby - which field is matched in the remote datastore in which way
31 # e.g. match episode by episodetitle in tmdb
32 # or match episode by absolute episode number in tmdb
35 # DasErste, Fixups, Tagesschau, , , , setcategorynews
36 # DasErste, Fixups, Frauenfussballlaenderspiel, , , , settypesports
37 # Tele5 match by episodetitle for known series at TVDB
38 # Tele5 match Babylon5 by absolute number (need to add that to the backend first)
39 # Tele5 match by title for programmes with otherfield category=movie at TMDB
40 # ZDFneo, TheTVDB, Weeds, , , 74845, episodetitle
41 # ZDFneo, TheTVDB, Inspector Barnaby
42 # ZDFneo match by episodetitle for known series at TVDB
45 # Usual order of execution
46 # run Importers that really import
47 # run Augmenters
48 # run Importers to copy/mix/transform (combine, downconvert, timeshift)
49 # run Exporters
52 # Logic
53 # 1) get timestamp of last start of augmenter
54 # 2) find batches that have been updated (reset to station data) since then
55 # 3) order batches by batch id
56 # 4) for each batch
57 # 5) collect all augmenters that match by channel_id
58 # 6) for each programme ordered by start time
59 # 7) select matching augmenters
60 # 7b) skip to next programme if none matches or it is the same as last time
61 # 8) order augmenters by priority
62 # 9) apply augmenter with highest priority
66 # API for each Augmenter
68 # initialize
69 # create backend API instances etc.
71 # augment (Programme)
72 # input: programme + rule
73 # output: programme + error
76 sub new( @@ ){
77 my $class = ref( $_[0] ) || $_[0];
79 my $self = { };
80 bless $self, $class;
82 $self->{datastore} = $_[1];
84 return $self;
87 sub ReadLastUpdate( @ ){
88 my $self = shift;
90 my $ds = $self->{datastore};
92 my $last_update = $ds->sa->Lookup( 'state', { name => "augmenter_last_update" },
93 'value' );
95 if( not defined( $last_update ) )
97 $ds->sa->Add( 'state', { name => "augmenter_last_update", value => 0 } );
98 $last_update = 0;
101 return $last_update;
104 sub WriteLastUpdate( @@ ){
105 my $self = shift;
106 my( $update_started ) = @_;
108 my $ds = $self->{datastore};
110 $ds->sa->Update( 'state', { name => "augmenter_last_update" },
111 { value => $update_started } );
114 sub cmp_rules_by_score( ){
115 if(!defined( $a->{score} ) && !defined( $b->{score} )){
116 return 0;
117 } elsif(!defined( $a->{score} ) ){
118 return 1;
119 } elsif(!defined( $b->{score} ) ){
120 return -1;
121 } else {
122 return -($a->{score} <=> $b->{score});
126 sub sprint_rule( @ ){
127 my ($rule_ref) = @_;
128 my $result = '';
130 if( $rule_ref ){
131 if( $rule_ref->{channel_id} ){
132 $result .= 'channel=' . $rule_ref->{channel_id} . ', ';
134 if( $rule_ref->{title} ){
135 $result .= 'title=\'' . $rule_ref->{title} . '\', ';
137 if( $rule_ref->{otherfield} ){
138 if( defined( $rule_ref->{othervalue} ) ){
139 $result .= $rule_ref->{otherfield} . '=\'' . $rule_ref->{othervalue} . '\', ';
140 } else {
141 $result .= $rule_ref->{otherfield} . '=NULL, ';
144 if( $rule_ref->{augmenter} ){
145 $result .= $rule_ref->{augmenter};
146 if( $rule_ref->{matchby} ){
147 $result .= '::' . $rule_ref->{matchby};
149 if( $rule_ref->{remoteref} ){
150 $result .= '( ' . $rule_ref->{remoteref} . ' )';
155 return( $result );
158 sub sprint_augment( @@ ){
159 my ($programme_ref, $augment_ref) = @_;
160 my $result = '';
162 if( $programme_ref && $augment_ref){
163 foreach my $attribute ( 'title', 'subtitle', 'episode',
164 'program_type', 'category', 'actors' ) {
165 if( exists( $augment_ref->{$attribute} ) ){
166 if( defined( $programme_ref->{$attribute} ) && defined( $augment_ref->{$attribute} ) ) {
167 if( $programme_ref->{$attribute} ne $augment_ref->{$attribute} ){
168 $result .= ' changing ' . $attribute . " to \'" .
169 $augment_ref->{$attribute} . "\' was \'" .
170 $programme_ref->{$attribute} . "\'\n";
171 # TODO add verbose mode
172 # } else {
173 # $result .= ' leaving ' . $attribute . " unchanged\n";
175 } elsif( defined( $programme_ref->{$attribute} ) ){
176 $result .= ' removing ' . $attribute . "\n";
177 } elsif( defined( $augment_ref->{$attribute} ) ){
178 $result .= ' adding ' . $attribute . " as \'" .
179 $augment_ref->{$attribute} . "\'\n";
185 return( $result );
188 sub AugmentBatch( @@ ) {
189 my( $self, $batchid )=@_;
192 # set up for augmenting one specific channel by batchid
194 (my $channel_xmltvid )=($batchid =~ m|^(\S+)_|);
196 my( $res, $sth ) = $self->{datastore}->sa->Sql( "
197 SELECT ar.*
198 FROM channels c, augmenterrules ar
199 WHERE c.xmltvid LIKE ?
200 AND (ar.channel_id = c.id
201 OR ar.channel_id IS NULL)",
202 [$channel_xmltvid] );
204 my $augmenter = { };
205 my @ruleset;
207 my $iter;
208 while( defined( $iter = $sth->fetchrow_hashref() ) ){
209 # set up augmenters
210 if( !defined( $augmenter->{ $iter->{'augmenter'} } ) ){
211 d( "creating augmenter '" . $iter->{'augmenter'} . "' augmenter\n" );
212 $augmenter->{ $iter->{'augmenter'} }= CreateAugmenter( $iter->{'augmenter'}, $self->{datastore} );
215 # append rule to array
216 push( @ruleset, $iter );
219 if( @ruleset == 0 ){
220 d( 'no augmenterrules for this batch' );
221 return;
225 d( "ruleset for this batch: \n" );
226 foreach my $therule ( @ruleset ) {
227 d( sprint_rule( $therule ) . "\n" );
232 # augment all programs from one batch by batchid
235 # program metadata from augmenter
236 my $newprogram;
237 # result code from augmenter
238 my $result;
240 ( $res, $sth ) = $self->{datastore}->sa->Sql( "
241 SELECT p.* from programs p, batches b
242 WHERE (p.batch_id = b.id)
243 AND (b.name LIKE ?)
244 ORDER BY start_time asc, end_time desc",
245 # name of batch to use for testing
246 [$batchid] );
248 my $ce;
249 while( defined( $ce = $sth->fetchrow_hashref() ) ) {
250 # copy ruleset to working set
251 my @rules = @ruleset;
253 if( defined( $ce->{subtitle} ) ) {
254 d( "augmenting program: " . $ce->{title} . " - \"" . $ce->{subtitle} . "\"\n" );
255 } else {
256 d( "augmenting program: " . $ce->{title} . "\n" );
259 # loop until no more rules match
260 while( 1 ){
262 # order rules by quality of match
264 foreach( @rules ){
265 my $score = 0;
267 # match by channel_id
268 if( defined( $_->{channel_id} ) ) {
269 if( $_->{channel_id} eq $ce->{channel_id} ){
270 $score += 1;
271 } else {
272 $_->{score} = undef;
273 next;
277 # match by title
278 if( defined( $_->{title} ) ) {
279 # regexp?
280 if( $_->{title} =~ m|^\^| ) {
281 if( $ce->{title} =~ m|$_->{title}| ){
282 $score += 4;
283 } else {
284 $_->{score} = undef;
285 next;
287 } else {
288 if( $_->{title} eq $ce->{title} ){
289 $score += 4;
290 } else {
291 $_->{score} = undef;
292 next;
297 # match by other field
298 if( defined( $_->{otherfield} ) ){
299 if( defined( $_->{othervalue} ) ) {
300 if( $_->{othervalue} eq $ce->{$_->{otherfield}} ){
301 $score += 2;
302 } else {
303 $_->{score} = undef;
304 next;
306 } else {
307 if( !defined( $_->{othervalue} ) ){
308 $score += 2;
309 } else {
310 $_->{score} = undef;
311 next;
316 $_->{score} = $score;
319 @rules = sort{ cmp_rules_by_score }( @rules );
320 #printf( "rules after sorting: %s\n", Dumper( \@rules ) );
322 # take the best matching rule from the array (we apply it now and don't want it to match another time)
323 my $rule = shift( @rules );
325 # end loop if the best matching rule is not a mathing rule after all
326 if( !defined( $rule->{score} ) ){
327 last;
330 d( 'best matching rule: ' . sprint_rule( $rule ) . "\n" );
332 # apply the rule
333 ( $newprogram, $result ) = $augmenter->{$rule->{augmenter}}->AugmentProgram( $ce, $rule );
335 if( defined( $newprogram) ) {
336 d( "augmenting as follows:\n" . sprint_augment( $ce, $newprogram ) );
337 while( my( $key, $value )=each( %$newprogram ) ) {
338 if( $value ) {
339 $ce->{$key} = $value;
340 } else {
341 delete( $ce->{$key} );
345 # handle description as a special case. We will not remove it, only replace it.
346 if( exists( $newprogram->{description} ) ) {
347 if( !$newprogram->{description} ) {
348 delete( $newprogram->{description} );
352 # TODO collect updates, compare and only push back to database what really has been changed
353 $self->{datastore}->sa->Update( 'programs', {
354 channel_id => $ce->{channel_id},
355 start_time => $ce->{start_time}
356 }, $newprogram );
359 # go around and find the next best matching rule