Xmltv exporter modified to support channel list
[nonametv.git] / lib / NonameTV / Importer / NovaTV.pm
blob541ccc63c770b15795ce101935d47e28ffb88a60
1 package NonameTV::Importer::NovaTV;
3 use strict;
4 use warnings;
6 =pod
8 Import data from Word-files delivered via e-mail. Each day
9 is handled as a separate batch.
11 Features:
13 =cut
15 use utf8;
17 use DateTime;
18 use XML::LibXML;
19 #use Text::Capitalize qw/capitalize_title/;
21 use NonameTV qw/MyGet Wordfile2Xml Htmlfile2Xml norm AddCategory/;
22 use NonameTV::DataStore::Helper;
23 use NonameTV::Log qw/info progress error logdie
24 log_to_string log_to_string_result/;
26 use NonameTV::Importer::BaseFile;
28 use base 'NonameTV::Importer::BaseFile';
30 sub new {
31 my $proto = shift;
32 my $class = ref($proto) || $proto;
33 my $self = $class->SUPER::new( @_ );
34 bless ($self, $class);
36 $self->{grabber_name} = "NovaTV";
38 my $dsh = NonameTV::DataStore::Helper->new( $self->{datastore} );
39 $self->{datastorehelper} = $dsh;
41 return $self;
44 sub ImportContentFile
46 my $self = shift;
47 my( $file, $chd ) = @_;
49 if( $file !~ /program/i and $file !~ /izmjena/i and $file !~ /\.doc/ ) {
50 progress( "NovaTV: Skipping unknown file $file" );
51 return;
54 progress( "NovaTV: Processing $file" );
56 $self->{fileerror} = 0;
58 my $xmltvid=$chd->{xmltvid};
59 my $channel_id = $chd->{id};
60 my $dsh = $self->{datastorehelper};
61 my $ds = $self->{datastore};
64 my $doc;
65 $doc = Wordfile2Xml( $file );
67 if( not defined( $doc ) ) {
68 error( "NovaTV $file: Failed to parse" );
69 return;
72 my @nodes = $doc->findnodes( '//span[@style="text-transform:uppercase"]/text()' );
73 foreach my $node (@nodes) {
74 my $str = $node->getData();
75 $node->setData( uc( $str ) );
78 # Find all paragraphs.
79 my $ns = $doc->find( "//div" );
81 if( $ns->size() == 0 ) {
82 error( "NovaTV $file: No divs found." ) ;
83 return;
86 my $currdate = undef;
87 my $nowyear = DateTime->today->year();
88 my $date;
89 my @ces;
90 my $targetshow;
91 my $description;
92 my $subtitle;
93 my $directors;
94 my $actors;
96 foreach my $div ($ns->get_nodelist) {
98 my( $text ) = norm( $div->findvalue( '.' ) );
100 #print "> $text\n";
102 if( $text eq "" ) {
103 # blank line
105 elsif( $text =~ /^PROGRAM NOVE TV za/i ) {
106 progress("NovaTV: OK, this is the file with the schedules: $file");
108 #elsif( $text =~ /^([[:upper:]]+) (\d+)\.(\d+)/ ) { # the line with the date in format 'MONDAY 12.4.'
109 elsif( $text =~ /^(\S+) (\d+)\.(\d+)/ ) { # the line with the date in format 'MONDAY 12.4.'
111 $date = ParseDate( $text , $nowyear );
113 if( defined $date ) {
114 progress("NovaTV: Date $date");
116 $dsh->EndBatch( 1 )
117 if defined $currdate;
119 my $batch_id = "${xmltvid}_" . $date->ymd();
120 $dsh->StartBatch( $batch_id, $channel_id );
121 $dsh->StartDate( $date->ymd("-") , "07:00" );
122 $currdate = $date;
125 # save last day if we have it in memory
126 if( @ces ){
127 foreach my $element (@ces) {
129 progress("NovaTV: $element->{start_time} : $element->{title}");
131 $dsh->AddProgramme( $element );
135 # empty last day array
136 undef @ces;
137 undef $targetshow;
138 undef $description;
139 undef $subtitle;
140 undef $directors;
141 undef $actors;
143 elsif( $text =~ /^(\d+)\.(\d+) (\S+)/ ) { # the line with the show in format '19.30 Show title, genre'
145 my( $starttime, $title, $genre ) = ParseShow( $text , $date );
147 my $ce = {
148 channel_id => $chd->{id},
149 start_time => $starttime->hms(":"),
150 title => norm($title),
153 if( $genre ){
154 my($program_type, $category ) = $ds->LookupCat( 'NovaTV', $genre );
155 AddCategory( $ce, $program_type, $category );
158 # add the programme to the array
159 # as we have to add description later
160 push( @ces , $ce );
163 elsif( isCroUcase( $text ) ) { # the line with description title in format 'ALL IN CAPS'
165 # if we have something in the description buffer
166 # then this is for the last targetshow
167 if( $targetshow and $description ){
168 $targetshow->{description} = $description if defined $description;
169 $targetshow->{subtitle} = $subtitle if defined $subtitle;
170 $targetshow->{directors} = $directors if defined $directors;
171 $targetshow->{actors} = $actors if defined $actors;
172 undef $description;
173 undef $subtitle;
174 undef $directors;
175 undef $actors;
178 my $utext = utf8ucase( $text );
180 # find if we have the show with that name
181 foreach my $element (@ces) {
183 my $utitle = utf8ucase( $element->{title} );
185 if( $utext eq $utitle ){
186 $targetshow = $element;
187 last;
191 else {
193 # if we know the target show then this is the description
194 if( $targetshow ){
196 $description .= $text;
198 # subtitle if present in the first description line
199 if( $text =~ /^\(.*\)/ ){
200 $subtitle = $text;
203 # subtitle if present in one text line
204 if( $text =~ s/^Redatelj: // ){
205 $directors = $text;
208 # actor if present in the one text line
209 if( $text =~ s/^Glume: // ){
210 $actors = $text;
213 } else {
214 #error( "Ignoring $text" );
218 $dsh->EndBatch( 1 );
220 return;
223 sub ParseDate {
224 my( $text, $year ) = @_;
226 #my( $dayname, $day, $month ) = ($text =~ /([[:upper:]]+) (\d+)\.(\d+)/);
227 my( $dayname, $day, $month ) = ($text =~ /(\S+) (\d+)\.(\d+)/);
229 my $dt = DateTime->new( year => $year,
230 month => $month,
231 day => $day,
232 hour => 0,
233 minute => 0,
234 second => 0,
235 time_zone => 'Europe/Zagreb',
238 return $dt;
241 sub ParseShow {
242 my( $text, $date ) = @_;
243 my( $title, $genre );
245 my( $hour, $min, $string ) = ($text =~ /(\d+)\.(\d+) (.*)/);
247 if( $string =~ /,/ ){
248 ( $title, $genre ) = $string =~ m/(.*, )(.*)$/;
249 if( $title ){
250 $title =~ s/, $//;
253 else
255 $title = $string;
258 my $sdt = $date->clone()->add( hours => $hour , minutes => $min );
260 return( $sdt , $title , $genre );
263 sub utf8ucase {
264 my( $str ) = @_;
265 my $newstr = $str;
267 $newstr =~ s/\xC4\x8D/\xC4\x8C/; # tvrdo c
268 $newstr =~ s/\xC4\x87/\xC4\x86/; # meko c
269 $newstr =~ s/\xC4\x91/\xC4\x90/; # d
270 $newstr =~ s/\xC5\xA1/\xC5\xA0/; # s
271 $newstr =~ s/\xC5\xBE/\xC5\xBD/; # z
273 $newstr = uc($newstr);
275 return( $newstr );
278 sub isCroUcase {
279 my( $str ) = @_;
281 if( $str =~ /[[:lower:]]/ ){
282 return 0;
285 return 1;
290 ### Setup coding system
291 ## Local Variables:
292 ## coding: utf-8
293 ## End: