lib/NonameTV.pm

   1 package NonameTV;
   2
   3 use strict;
   4 use warnings;
   5
   6 # Mark this source-file as encoded in utf-8.
   7 use utf8;
   8 use Env;
   9
  10 use LWP::UserAgent;
  11 use File::Temp qw/tempfile tempdir/;
  12 use File::Slurp;
  13
  14 use NonameTV::StringMatcher;
  15 use NonameTV::Log qw/w/;
  16 use XML::LibXML;
  17
  18 BEGIN {
  19     use Exporter   ();
  20     our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
  21
  22     # set the version for version checking
  23     $VERSION     = 0.3;
  24
  25     @ISA         = qw(Exporter);
  26     @EXPORT      = qw( );
  27     %EXPORT_TAGS = ( );     # eg: TAG => [ qw!name1 name2! ],
  28     @EXPORT_OK   = qw/MyGet expand_entities
  29                       Html2Xml Htmlfile2Xml
  30                       Word2Xml Wordfile2Xml
  31                       File2Xml Content2Xml
  32                       FindParagraphs
  33                       norm AddCategory
  34                       ParseDescCatSwe FixProgrammeData
  35                       ParseXml ParseXmltv
  36                       MonthNumber
  37                       CompareArrays
  38                      /;
  39 }
  40 our @EXPORT_OK;
  41
  42 my $wvhtml = '/usr/bin/wvHtml --charset=utf8';
  43 # my $wvhtml = '/usr/bin/wvHtml';
  44
  45 my $ua = LWP::UserAgent->new( agent => "Grabber from http://tv.swedb.se",
  46                               cookie_jar => {} );
  47
  48 # Fetch a url. Returns ($content, true) if data was fetched from server and
  49 # different from the last time the same url was fetched, ($content, false) if
  50 # it was fetched from the server and was the same as the last time it was
  51 # fetched and (undef,$error_message) if there was an error fetching the data.
  52
  53 sub MyGet
  54 {
  55   my( $url ) = @_;
  56   my $res = $ua->get( $url );
  57
  58   if( $res->is_success )
  59   {
  60     return ($res->content, not defined( $res->header( 'X-Content-Unchanged' ) ) );
  61   }
  62   else
  63   {
  64     return (undef, $res->status_line );
  65   }
  66 }
  67
  68 # åäö ÅÄÖ
  69 my %ent = (
  70            257  => 'ä',
  71            231  => 'c', # This should really be a c with a special mark on it.
  72                         # Unicode 000E7, UTF-8 195 167.
  73            337  => 'ö',
  74            8211 => '-',
  75            8212 => '--',
  76            8216 => "'",
  77            8217 => "'",
  78            8220 => '"',
  79            8221 => '"',
  80            8230 => '...',
  81            8364 => "(euro)",
  82            );
  83
  84 sub _expand
  85 {
  86   my( $num, $str ) = @_;
  87
  88   if( not defined( $ent{$num} ) )
  89   {
  90     $ent{$num} = "";
  91     print STDERR "Unknown entity $num in $str\n";
  92   }
  93
  94   return $ent{$num};
  95 }
  96
  97 sub expand_entities
  98 {
  99   my( $str ) = @_;
 100
 101   $str =~ s/\&#(\d+);/_expand($1,$str)/eg;
 102
 103   return $str;
 104 }
 105
 106 =item Html2Xml( $content )
 107
 108 Convert the HTML in $content into an XML::LibXML::Document.
 109
 110 Prints an error-message to STDERR and returns undef if the conversion
 111 fails.
 112
 113 =cut
 114
 115 sub Html2Xml {
 116   my( $html ) = @_;
 117
 118   my $xml = XML::LibXML->new;
 119   $xml->recover(1);
 120
 121   # Remove character that makes the parser stop.
 122   $html =~ s/\x00//g;
 123
 124   my $doc;
 125   eval { $doc = $xml->parse_html_string($html, {
 126     recover => 1,
 127     suppress_errors => 1,
 128     suppress_warnings => 1,
 129   }); };
 130
 131   if( $@ ne "" ) {
 132     my ($package, $filename, $line) = caller;
 133     print "parse_html_string failed: $@ when called from $filename:$line\n";
 134     return undef;
 135   }
 136
 137   return $doc;
 138 }
 139
 140 =item Htmlfile2Xml( $filename )
 141
 142 Convert the HTML in a file into an XML::LibXML::Document.
 143
 144 Prints an error-message to STDERR and returns undef if the conversion
 145 fails.
 146
 147 =cut
 148
 149 sub Htmlfile2Xml
 150 {
 151   my( $filename ) = @_;
 152
 153   my $html = read_file( $filename );
 154
 155   return Html2Xml( $html );
 156 }
 157
 158
 159 =item Word2Xml( $content )
 160
 161 Convert the Microsoft Word document in $content into html and return
 162 the html as an XML::LibXML::Document.
 163
 164 Prints an error-message to STDERR and returns undef if the conversion
 165 fails.
 166
 167 =cut
 168
 169 sub Word2Xml
 170 {
 171   my( $content ) = @_;
 172
 173   my( $fh, $filename ) = tempfile();
 174   print $fh $content;
 175   close( $fh );
 176
 177   my $doc = Wordfile2Xml( $filename );
 178   unlink( $filename );
 179   return $doc;
 180 }
 181
 182 sub Wordfile2Xml
 183 {
 184   my( $filename ) = @_;
 185
 186   my $html = qx/$wvhtml "$filename" -/;
 187   if( $? )
 188   {
 189     w "$wvhtml $filename - failed: $?";
 190     return undef;
 191   }
 192
 193   # Remove character that makes LibXML choke.
 194   $html =~ s/\&hellip;/.../g;
 195
 196   return Html2Xml( $html );
 197 }
 198
 199 sub File2Xml {
 200   my( $filename ) = @_;
 201
 202   my $data = read_file( $filename );
 203   my $doc;
 204   if( $data =~ /^\<\!DOCTYPE HTML/ )
 205   {
 206     # This is an override that has already been run through wvHtml
 207     $doc = Html2Xml( $data );
 208   }
 209   else
 210   {
 211     $doc = Word2Xml( $data );
 212   }
 213
 214   return $doc;
 215 }
 216
 217 sub Content2Xml {
 218   my( $cref ) = @_;
 219
 220   my $doc;
 221   if( $$cref =~ /^\<\!DOCTYPE HTML/ )
 222   {
 223     # This is an override that has already been run through wvHtml
 224     $doc = Html2Xml( $$cref );
 225   }
 226   else
 227   {
 228     $doc = Word2Xml( $$cref );
 229   }
 230
 231   return $doc;
 232 }
 233
 234 =pod
 235
 236 FindParagraphs( $doc, $expr )
 237
 238 Finds all paragraphs in the part of an xml-tree that matches an
 239 xpath-expression. Returns a reference to an array of strings.
 240 All paragraphs are normalized and empty strings are removed from the
 241 array.
 242
 243 Both <div> and <br> are taken into account when splitting the document
 244 into paragraphs.
 245
 246 Use the expression '//body//.' for html-documents when you want to see
 247 all paragraphs in the page.
 248
 249 =cut
 250
 251 my %paraelem = (
 252                 p => 1,
 253                 br => 1,
 254                 div => 1,
 255                 td => 1,
 256                 );
 257
 258 sub FindParagraphs {
 259   my( $doc, $elements ) = @_;
 260
 261   my $ns = $doc->find( $elements );
 262
 263   my @paragraphs;
 264   my $p = "";
 265
 266   foreach my $node ($ns->get_nodelist()) {
 267     if( $node->nodeName eq "#text" ) {
 268       $p .= $node->textContent();
 269     }
 270     elsif( defined $paraelem{ $node->nodeName } ) {
 271       $p = norm( $p );
 272       if( $p ne "" ) {
 273         push @paragraphs, $p;
 274         $p = "";
 275       }
 276     }
 277   }
 278
 279   return \@paragraphs;
 280 }
 281
 282
 283 # Remove any strange quotation marks and other syntactic marks
 284 # that we don't want to have. Remove leading and trailing space as well
 285 # multiple whitespace characters.
 286 # Returns the empty string if called with an undef string.
 287 sub norm
 288 {
 289   my( $str ) = @_;
 290
 291   return "" if not defined( $str );
 292
 293 # Uncomment the code below and change the regexp to learn which
 294 # character code perl thinks a certain character has.
 295 # These codes can be used in \x{YY} expressions as shown below.
 296 #  if( $str =~ /unique string/ ) {
 297 #    for( my $i=0; $i < length( $str ); $i++ ) {
 298 #      printf( "%2x: %s\n", ord( substr( $str, $i, 1 ) ),
 299 #               substr( $str, $i, 1 ) );
 300 #    }
 301 #  }
 302
 303   $str = expand_entities( $str );
 304
 305   $str =~ tr/\x{96}\x{93}\x{94}/-""/; #
 306   $str =~ tr/\x{201d}\x{201c}/""/;
 307   $str =~ tr/\x{2022}/*/; # Bullet
 308   $str =~ tr/\x{2013}\x{2018}\x{2019}/-''/;
 309   $str =~ tr/\x{017c}\x{0144}\x{0105}/zna/;
 310   $str =~ s/\x{85}/... /g;
 311   $str =~ s/\x{2026}/.../sg;
 312   $str =~ s/\x{2007}/ /sg;
 313
 314   $str =~ s/^\s+//;
 315   $str =~ s/\s+$//;
 316   $str =~ tr/\n\r\t /    /s;
 317
 318   return $str;
 319 }
 320
 321 =item AddCategory
 322
 323 Add program_type and category to an entry if the entry does not already
 324 have a program_type and category.
 325
 326 AddCategory( $ce, $program_type, $category );
 327
 328 =cut
 329
 330 sub AddCategory
 331 {
 332   my( $ce, $program_type, $category ) = @_;
 333
 334   if( not defined( $ce->{program_type} ) and defined( $program_type )
 335       and ( $program_type =~ /\S/ ) )
 336   {
 337     $ce->{program_type} = $program_type;
 338   }
 339
 340   if( not defined( $ce->{category} ) and defined( $category )
 341       and ( $category =~ /\S/ ) )
 342   {
 343     $ce->{category} = $category;
 344   }
 345 }
 346
 347 =item ParseDescCatSwe
 348
 349 Parse a program description in Swedish and return program_type
 350 and category that can be deduced from the description.
 351
 352   my( $pty, $cat ) = ParseDescCatSwe( "Amerikansk äventyrsserie" );
 353
 354 =cut
 355
 356 my $sm = NonameTV::StringMatcher->new();
 357 $sm->AddRegexp( qr/kriminalserie/i,      [ 'series', 'Crime/Mystery' ] );
 358 $sm->AddRegexp( qr/deckarserie/i,        [ 'series', 'Crime/Mystery' ] );
 359 $sm->AddRegexp( qr/polisserie/i,         [ 'series', 'Crime/Mystery' ] );
 360 $sm->AddRegexp( qr/familjeserie/i,       [ 'series', undef ] );
 361 $sm->AddRegexp( qr/tecknad serie/i,      [ 'series', undef ] );
 362 $sm->AddRegexp( qr/animerad serie/i,     [ 'series', undef ] );
 363 $sm->AddRegexp( qr/dramakomediserie/i,   [ 'series', 'Comedy' ] );
 364 $sm->AddRegexp( qr/dramaserie/i,         [ 'series', 'Drama' ] );
 365 $sm->AddRegexp( qr/resedokumentärserie/i,[ 'series', 'Food/Travel' ] );
 366 $sm->AddRegexp( qr/komediserie/i,        [ 'series', 'Comedy' ] );
 367 $sm->AddRegexp( qr/realityserie/i,       [ 'series', 'Reality' ] );
 368 $sm->AddRegexp( qr/realityshow/i,        [ 'series', 'Reality' ] );
 369 $sm->AddRegexp( qr/dokusåpa/i,           [ 'series', 'Reality' ] );
 370 $sm->AddRegexp( qr/actiondramaserie/i,   [ 'series', 'Action' ] );
 371 $sm->AddRegexp( qr/actionserie/i,        [ 'series', 'Action' ] );
 372 $sm->AddRegexp( qr/underhållningsserie/i,[ 'series', undef ] );
 373 $sm->AddRegexp( qr/äventyrsserie/i,      [ 'series', 'Action/Adv' ] );
 374 $sm->AddRegexp( qr/äventyrskomediserie/i,[ 'series', 'Comedy' ] );
 375 $sm->AddRegexp( qr/dokumentärserie/i,    [ 'series', 'Documentary' ] );
 376 $sm->AddRegexp( qr/dramadokumentär/i,    [ undef,    'Documentary' ] );
 377
 378 $sm->AddRegexp( qr/barnserie/i,          [ 'series', "Children's" ] );
 379 $sm->AddRegexp( qr/matlagningsserie/i,   [ 'series', 'Cooking' ] );
 380 $sm->AddRegexp( qr/motorserie/i,         [ 'series', undef ] );
 381 $sm->AddRegexp( qr/fixarserie/i,         [ 'series', "Home/How-to" ] );
 382 $sm->AddRegexp( qr/science[-\s]*fiction[-\s]*serie/i,
 383                 [ 'series', 'SciFi' ] );
 384 $sm->AddRegexp( qr/barnprogram/i,          [ undef, "Children's" ] );
 385
 386 # Movies
 387 $sm->AddRegexp( qr/\b(familje|drama|action)*komedi\b/i,  [ 'movie', "Comedy" ] );
 388
 389 $sm->AddRegexp( qr/\b(krigs|kriminal)*drama\b/i,  [ 'movie', "Drama" ] );
 390
 391 $sm->AddRegexp( qr/\baction(drama|film)*\b/i,     [ 'movie', "Action/Adv" ] );
 392
 393 $sm->AddRegexp( qr/\b.ventyrsdrama\b/i,           [ 'movie', "Action/Adv" ] );
 394
 395 $sm->AddRegexp( qr/\bv.stern(film)*\b/i,          [ 'movie', undef ] );
 396
 397 $sm->AddRegexp( qr/\b(drama)*thriller\b/i,        [ 'movie', "Crime" ] );
 398
 399 $sm->AddRegexp( qr/\bscience\s*fiction(rysare)*\b/i, [ 'movie', "SciFi" ] );
 400
 401 $sm->AddRegexp( qr/\b(l.ng)*film\b/i,             [ 'movie', undef ] );
 402
 403
 404 sub ParseDescCatSwe
 405 {
 406   my( $desc ) = @_;
 407
 408   return (undef, undef) if not defined $desc;
 409
 410   my $res = $sm->Match( $desc );
 411   if( defined( $res ) )
 412   {
 413     return @{$res};
 414   }
 415   else
 416   {
 417     return (undef,undef);
 418   }
 419 }
 420
 421 sub FixProgrammeData
 422 {
 423   my( $d ) = @_;
 424
 425   $d->{title} =~ s/^s.songs+tart\s*:*\s*//gi;
 426   $d->{title} =~ s/^seriestart\s*:*\s*//gi;
 427   $d->{title} =~ s/^reprisstart\s*:*\s*//gi;
 428   $d->{title} =~ s/^programstart\s*:*\s*//gi;
 429
 430   $d->{title} =~ s/^s.songs*avslutning\s*:*\s*//gi;
 431   $d->{title} =~ s/^sista\s+delen\s*:*\s*//gi;
 432   $d->{title} =~ s/^sista\s+avsnittet\s*:*\s*//gi;
 433
 434   if( $d->{title} =~ s/^((matin.)|(fredagsbio))\s*:\s*//gi )
 435   {
 436     $d->{program_type} = 'movie';
 437     $d->{category} = 'Movies';
 438   }
 439
 440   # Set program_type to series if the entry has an episode-number
 441   # with a defined episode (i.e. second part),
 442   # but doesn't have a program_type.
 443   if( exists( $d->{episode} ) and defined( $d->{episode} ) and
 444       ($d->{episode} !~ /^\s*\.\s*\./) and
 445       ( (not defined($d->{program_type})) or ($d->{program_type} =~ /^\s*$/) ) )
 446   {
 447     $d->{program_type} = "series";
 448   }
 449 }
 450
 451 =pod
 452
 453 my $doc = ParseXml( $strref );
 454
 455 Parse an xml-string into an XML::LibXML document. Takes a reference to a
 456 string as the only reference.
 457
 458 =cut
 459
 460 my $xml;
 461
 462 sub ParseXml {
 463   my( $cref ) = @_;
 464
 465   if( not defined $xml ) {
 466     $xml = XML::LibXML->new;
 467     $xml->load_ext_dtd(0);
 468   }
 469
 470   my $doc;
 471   eval {
 472     $doc = $xml->parse_string($$cref);
 473   };
 474   if( $@ ne "" )   {
 475     w "Failed to parse xml: $@";
 476     return undef;
 477   }
 478
 479   return $doc;
 480 }
 481
 482 =pod
 483
 484 Parse a reference to an xml-string in xmltv-format into a reference to an
 485 array of hashes with programme-info.
 486
 487 =cut
 488
 489 sub ParseXmltv {
 490   my( $cref ) = @_;
 491
 492   my $doc = ParseXml( $cref );
 493   return undef if not defined $doc;
 494
 495   my @d;
 496
 497   # Find all "programme"-entries.
 498   my $ns = $doc->find( "//programme" );
 499   if( $ns->size() == 0 ) {
 500     return;
 501   }
 502
 503   foreach my $pgm ($ns->get_nodelist) {
 504     my $start = $pgm->findvalue( '@start' );
 505     my $start_dt = create_dt( $start );
 506
 507     my $stop = $pgm->findvalue( '@stop' );
 508     my $stop_dt = create_dt( $stop );
 509
 510     my $title = $pgm->findvalue( 'title' );
 511     my $subtitle = $pgm->findvalue( 'sub-title' );
 512
 513     my $desc = $pgm->findvalue( 'desc' );
 514     my $cat1 = $pgm->findvalue( 'category[1]' );
 515     my $cat2 = $pgm->findvalue( 'category[2]' );
 516     my $episode = $pgm->findvalue( 'episode-num[@system="xmltv_ns"]' );
 517     my $production_date = $pgm->findvalue( 'date' );
 518     my $url = $pgm->findvalue( 'url' );
 519
 520     my $aspect = $pgm->findvalue( 'video/aspect' );
 521
 522     my @actors;
 523     my $ns = $pgm->find( ".//actor" );
 524
 525     foreach my $act ($ns->get_nodelist) {
 526       push @actors, $act->findvalue(".");
 527     }
 528
 529     my @directors;
 530     $ns = $pgm->find( ".//director" );
 531
 532     foreach my $dir ($ns->get_nodelist) {
 533       push @directors, $dir->findvalue(".");
 534     }
 535
 536     my %e = (
 537       start_dt => $start_dt,
 538       stop_dt => $stop_dt,
 539       title => $title,
 540       description => $desc,
 541     );
 542
 543     if( $subtitle =~ /\S/ ) {
 544       $e{subtitle} = $subtitle;
 545     }
 546
 547     if( $episode =~ /\S/ ) {
 548       $e{episode} = $episode;
 549     }
 550
 551     if( $url =~ /\S/ ) {
 552       $e{url} = $url;
 553     }
 554
 555     if( $cat1 =~ /^[a-z]/ ) {
 556       $e{program_type} = $cat1;
 557     }
 558     elsif( $cat1 =~ /^[A-Z]/ ) {
 559       $e{category} = $cat1;
 560     }
 561
 562     if( $cat2 =~ /^[a-z]/ ) {
 563       $e{program_type} = $cat2;
 564     }
 565     elsif( $cat2 =~ /^[A-Z]/ ) {
 566       $e{category} = $cat2;
 567     }
 568
 569     if( $production_date =~ /\S/ ) {
 570       $e{production_date} = "$production_date-01-01";
 571     }
 572
 573     if( $aspect =~ /\S/ ) {
 574       $e{aspect} = $aspect;
 575     }
 576
 577     if( scalar( @directors ) > 0 ) {
 578       $e{directors} = join ", ", @directors;
 579     }
 580
 581     if( scalar( @actors ) > 0 ) {
 582       $e{actors} = join ", ", @actors;
 583     }
 584
 585
 586     push @d, \%e;
 587   }
 588
 589   return \@d;
 590 }
 591
 592 sub create_dt
 593 {
 594   my( $datetime ) = @_;
 595
 596   my( $year, $month, $day, $hour, $minute, $second, $tz ) =
 597     ($datetime =~ /(\d{4})(\d{2})(\d{2})
 598                    (\d{2})(\d{2})(\d{2})\s+
 599                    (\S+)$/x);
 600
 601   my $dt = DateTime->new(
 602                           year => $year,
 603                           month => $month,
 604                           day => $day,
 605                           hour => $hour,
 606                           minute => $minute,
 607                           second => $second,
 608                           time_zone => $tz
 609                           );
 610
 611   return $dt;
 612 }
 613
 614 =pod
 615
 616 Convert month name to month number
 617
 618 =cut
 619
 620 sub MonthNumber {
 621   my( $monthname , $lang ) = @_;
 622
 623   my( @months_1, @months_2, @months_3, @months_4, @months_5 );
 624
 625   if( $lang =~ /^en$/ ){
 626     @months_1 = qw/jan feb mar apr may jun jul aug sep oct nov dec/;
 627     @months_2 = qw/janu febr marc apr may june july augu sept octo nov dec/;
 628     @months_3 = qw/january february march april may june july august september october november december/;
 629     @months_4 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 630     @months_5 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 631   } elsif( $lang =~ /^hr$/ ){
 632     @months_1 = qw/sij vel ozu tra svi lip srp kol ruj lis stu pro/;
 633     @months_2 = qw/sijecanj veljaca ozujak travanj svibanj lipanj srpanj kolovoz rujan listopad studeni prosinac/;
 634     @months_3 = qw/sijecnja veljace ozujka travnja svibnja lipnja srpnja kolovoza rujna listopada studenoga prosinca/;
 635     @months_4 = qw/sijeÃ¨a veljače ožujka travnja svibnja lipnja srpnja kolovoza rujna listopada studenog prosinca/;
 636     @months_5 = qw/siječnja veljace ozujka travnja svibnja lipnja srpnja kolovoza rujna listopada studenog prosinca/;
 637   } elsif( $lang =~ /^sr$/ ){
 638     @months_1 = qw/jan feb mar apr maj jun jul aug sep okt nov dec/;
 639     @months_2 = qw/januar februar mart april maj jun juli august septembar oktobar novembar decembar/;
 640     @months_3 = qw/januara februara marta aprila maja juna jula augusta septembra oktobra novembra decembra/;
 641     @months_4 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 642     @months_5 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 643   } elsif( $lang =~ /^it$/ ){
 644     @months_1 = qw/gen feb mar apr mag giu lug ago set ott nov dic/;
 645     @months_2 = qw/gennaio febbraio marzo aprile maggio giugno luglio agosto settembre ottobre novembre dicembre/;
 646     @months_3 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 647     @months_4 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 648     @months_5 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 649   } elsif( $lang =~ /^fr$/ ){
 650     @months_1 = qw/jan fav mar avr mai jui jul aou sep oct nov dec/;
 651     @months_2 = qw/JANVIER FÉVRIER mars avril mai juin juillet Août septembre octobre novembre DÉCEMBRE/;
 652     @months_3 = qw/janvier favrier mMARS AVRIL MAI JUIN juillet aout septembre octobre novembre DÉCEMBRE/;
 653     @months_4 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 654     @months_5 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 655   } elsif( $lang =~ /^ru$/ ){
 656     @months_1 = qw/jan fav mar aprelja maja jui jul aou sep oct nov dec/;
 657     @months_2 = qw/JANVIER FÉVRIER mars avril mai juin juillet aout septembre octobre novembre DÉCEMBRE/;
 658     @months_3 = qw/janvier favrier mars avril mai juin juillet aout septembre octobre novembre DÉCEMBRE/;
 659     @months_4 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 660     @months_5 = qw/1 2 3 4 5 6 7 8 9 10 11 12/;
 661   }
 662
 663   my %monthnames = ();
 664
 665   for( my $i = 0; $i < scalar(@months_1); $i++ ){
 666     $monthnames{$months_1[$i]} = $i+1;
 667   }
 668
 669   for( my $i = 0; $i < scalar(@months_2); $i++ ){
 670     $monthnames{$months_2[$i]} = $i+1;
 671   }
 672
 673   for( my $i = 0; $i < scalar(@months_3); $i++ ){
 674     $monthnames{$months_3[$i]} = $i+1;
 675   }
 676
 677   for( my $i = 0; $i < scalar(@months_4); $i++ ){
 678     $monthnames{$months_4[$i]} = $i+1;
 679   }
 680
 681   for( my $i = 0; $i < scalar(@months_5); $i++ ){
 682     $monthnames{$months_5[$i]} = $i+1;
 683   }
 684
 685   my $month = $monthnames{$monthname};
 686   my $lcmonth = $monthnames{lc $monthname};
 687
 688   return $month||$lcmonth;
 689 }
 690
 691 =begin nd
 692
 693 Function: CompareArrays
 694
 695 Compare two arrays (new and old) and call functions to reflect added,
 696 deleted and unchanged entries.
 697
 698 Parameters:
 699
 700   $new - A reference to the new array
 701   $old - A reference to the old array
 702   $cb - A hashref with callback functions
 703
 704 CompareArrays calls the following callback functions:
 705
 706   $cb->{cmp}( $enew, $eold ) - Compare an entry from $new with an
 707                            entry from $old.  Shall return -1 if $ea is
 708                            less than $eb, 0 if they are equal and 1
 709                            otherwise.
 710
 711   $cb->{added}($enew) - Called for all entries that are present in
 712                       $new but not in $old.
 713
 714   $cb->{deleted}($eold) - Called for all entries that are present in
 715                         $old but not in $new.
 716
 717   $cb->{equal}($enew, $eold) - Called for all entries that are present in
 718                            both $new and $old.
 719
 720 Additionally, $cb->{max} shall contain an entry that is always
 721 regarded as greater than any possible entry in $new and $old.
 722
 723 Returns:
 724
 725   nothing
 726
 727 =cut
 728
 729 sub CompareArrays #( $new, $old, $cb )
 730 {
 731   my( $new, $old, $cb ) = @_;
 732
 733   my @a = sort { $cb->{cmp}( $a, $b ) } @{$new};
 734   my @b = sort { $cb->{cmp}( $a, $b ) } @{$old};
 735
 736   push @a, $cb->{max};
 737   push @b, $cb->{max};
 738
 739   my $ia = 0;
 740   my $ib = 0;
 741
 742   while( 1 ) {
 743     my $da = $a[$ia];
 744     my $db = $b[$ib];
 745
 746     # If both arrays have reached the end, we are done.
 747     if( ($cb->{cmp}($da, $cb->{max}) == 0) and
 748         ($cb->{cmp}($db, $cb->{max}) == 0 ) ) {
 749       last;
 750     }
 751
 752     my $cmp = $cb->{cmp}($da, $db);
 753
 754     if( $cmp == 0 ) {
 755       $cb->{equal}($da, $db);
 756       $ia++;
 757       $ib++;
 758     }
 759     elsif( $cmp < 0 ) {
 760       $cb->{added}( $da );
 761       $ia++;
 762     }
 763     else {
 764       $cb->{deleted}($db);
 765       $ib++;
 766     }
 767   }
 768 }
 769
 770 1;
 771
 772 ### Setup coding system
 773 ## Local Variables:
 774 ## coding: utf-8
 775 ## End: