New importer - MTVAdria
[nonametv.git] / lib / NonameTV / Importer / RiTv.pm
blob7515fbdef0cdba241f42fb2c89778b9934b958db
1 package NonameTV::Importer::RiTv;
3 use strict;
4 use warnings;
6 =pod
8 Channels: RiTv
10 Import data from Word-files delivered via e-mail. Each day
11 is handled as a separate batch.
13 Features:
15 =cut
17 use utf8;
19 use POSIX;
20 use DateTime;
21 use XML::LibXML;
22 #use Text::Capitalize qw/capitalize_title/;
24 use NonameTV qw/MyGet Wordfile2Xml Htmlfile2Xml norm AddCategory/;
25 use NonameTV::DataStore::Helper;
26 use NonameTV::Log qw/info progress error logdie
27 log_to_string log_to_string_result/;
29 use NonameTV::Importer::BaseFile;
31 use base 'NonameTV::Importer::BaseFile';
33 sub new {
34 my $proto = shift;
35 my $class = ref($proto) || $proto;
36 my $self = $class->SUPER::new( @_ );
37 bless ($self, $class);
39 $self->{grabber_name} = "RiTv";
41 my $dsh = NonameTV::DataStore::Helper->new( $self->{datastore} );
42 $self->{datastorehelper} = $dsh;
44 return $self;
47 sub ImportContentFile {
48 my $self = shift;
49 my( $file, $chd ) = @_;
51 $self->{fileerror} = 0;
53 my $channel_id = $chd->{id};
54 my $xmltvid = $chd->{xmltvid};
55 my $dsh = $self->{datastorehelper};
56 my $ds = $self->{datastore};
58 if( $file =~ /\.xml$/i ){
59 $self->ImportXML( $file, $channel_id, $xmltvid );
60 } elsif( $file =~ /\.doc$/i ){
61 $self->ImportDOC( $file, $channel_id, $xmltvid );
64 return;
67 sub ImportDOC
69 my $self = shift;
70 my( $file, $channel_id, $xmltvid ) = @_;
72 my $dsh = $self->{datastorehelper};
73 my $ds = $self->{datastore};
75 return if( $file !~ /\.doc$/i );
77 progress( "RiTv: $xmltvid: Processing $file" );
79 my $doc;
80 $doc = Wordfile2Xml( $file );
82 if( not defined( $doc ) ) {
83 error( "RiTv $xmltvid: $file: Failed to parse" );
84 return;
87 my @nodes = $doc->findnodes( '//span[@style="text-transform:uppercase"]/text()' );
88 foreach my $node (@nodes) {
89 my $str = $node->getData();
90 $node->setData( uc( $str ) );
93 # Find all paragraphs.
94 my $ns = $doc->find( "//div" );
96 if( $ns->size() == 0 ) {
97 error( "RiTv $xmltvid: $file: No divs found." ) ;
98 return;
101 my $currdate = "x";
102 my $date = undef;
103 my @ces;
104 my $description;
106 foreach my $div ($ns->get_nodelist) {
108 my( $text ) = norm( $div->findvalue( '.' ) );
110 # skip the bottom of the document
111 # all after 'TJEDNI PROGRAM'
112 last if( $text =~ /^TJEDNI PROGRAM$/ );
114 #print ">$text<\n";
116 if( isDate( $text ) ) { # the line with the date in format 'Friday 1st August 2008'
118 $date = ParseDate( $text );
120 if( $date ) {
122 progress("RiTv: $xmltvid: Date is $date");
124 if( $date ne $currdate ) {
126 if( $currdate ne "x" ){
127 # save last day if we have it in memory
128 FlushDayData( $xmltvid, $dsh , @ces );
129 $dsh->EndBatch( 1 );
132 my $batch_id = "${xmltvid}_" . $date;
133 $dsh->StartBatch( $batch_id, $channel_id );
134 $dsh->StartDate( $date , "00:00" );
135 $currdate = $date;
139 # empty last day array
140 undef @ces;
141 undef $description;
143 } elsif( isShow( $text ) ) {
145 my( $time, $title, $genre ) = ParseShow( $text );
147 my $ce = {
148 channel_id => $channel_id,
149 start_time => $time,
150 title => norm($title),
153 if( $genre ){
155 my($program_type, $category ) = $ds->LookupCat( "RiTv", $genre );
156 AddCategory( $ce, $program_type, $category );
158 $ce->{description} = $genre;
161 # add the programme to the array
162 # as we have to add description later
163 push( @ces , $ce );
165 } else {
167 # the last element is the one to which
168 # this description belongs to
169 my $element = $ces[$#ces];
171 # remove ' - ' from the start
172 $text =~ s/^\s*-\s*//;
173 $element->{description} .= $text;
177 # save last day if we have it in memory
178 FlushDayData( $xmltvid, $dsh , @ces );
180 $dsh->EndBatch( 1 );
182 return;
185 sub FlushDayData {
186 my ( $xmltvid, $dsh , @data ) = @_;
188 if( @data ){
189 foreach my $element (@data) {
191 progress("RiTv: $xmltvid: $element->{start_time} - $element->{title}");
193 $dsh->AddProgramme( $element );
198 sub isDate {
199 my ( $text ) = @_;
201 # format 'RASPORED PROGRAMA ZA PETAK 18.07.2008.'
202 if( $text =~ /^RASPORED PROGRAMA ZA (ponedjeljak|utorak|srijedu|Četvrtak|petak|subotu|nedjelju)\s*\d+\.\d+\.\d+\.\s*$/i ){
203 return 1;
206 return 0;
209 sub ParseDate {
210 my( $text ) = @_;
212 my( $dayname, $day, $month, $year ) = ( $text =~ /^RASPORED PROGRAMA ZA (\S+)\s*(\d+)\.(\d+)\.(\d+)\.\s*$/ );
214 return sprintf( '%d-%02d-%02d', $year, $month, $day );
217 sub isShow {
218 my ( $text ) = @_;
220 # format '12:40 Glazbeni program'
221 if( $text =~ /^\d+\.\d+\s+.*/i ){
222 return 1;
225 return 0;
228 sub ParseShow {
229 my( $text ) = @_;
231 my( $hour, $min, $title, $genre );
233 if( $text =~ /\,/ ){
234 ( $genre ) = ( $text =~ /\,\s*(.*)/ );
235 $text =~ s/\,\s*.*//;
238 ( $hour, $min, $title ) = ( $text =~ /^(\d+)\.(\d+)\s+(.*)/ );
240 return( $hour . ":" . $min , $title , $genre );
245 ### Setup coding system
246 ## Local Variables:
247 ## coding: utf-8
248 ## End: