C4/TTParser.pm

   1 #!/usr/bin/env perl
   2
   3 # Copyright Tamil 2011
   4 #
   5 # This file is part of Koha.
   6 #
   7 # Koha is free software; you can redistribute it and/or modify it
   8 # under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 3 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # Koha is distributed in the hope that it will be useful, but
  13 # WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
  19
  20 #simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token
  21 package C4::TTParser;
  22 use base qw(HTML::Parser);
  23 use C4::TmplToken;
  24 use strict;
  25 use warnings;
  26
  27 #seems to be handled post tokenizer
  28 ##hash where key is tag we are interested in and the value is a hash of the attributes we want
  29 #my %interesting_tags = (
  30 #    img => { alt => 1 },
  31 #);
  32
  33 #tokens found so far (used like a stack)
  34 my ( @tokens );
  35
  36 #shiftnext token or undef
  37 sub next_token{
  38     return shift @tokens;
  39 }
  40
  41 #unshift token back on @tokens
  42 sub unshift_token{
  43     my $self = shift;
  44     unshift @tokens, shift;
  45 }
  46
  47 #have a peep at next token
  48 sub peep_token{
  49     return $tokens[0];
  50 }
  51
  52 #wrapper for parse
  53 #please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse)
  54 #signature build_tokens( self, filename)
  55 sub build_tokens{
  56     my ($self, $filename) = @_;
  57     $self->{filename} = $filename;
  58     $self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, original text )
  59     $self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, original text, is_cdata )
  60     $self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, original text )
  61     $self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration
  62     $self->handler(comment => "comment", "self, line, text, is_cdata"); # comments
  63 #    $self->handler(default => "default", "self, line, text, is_cdata"); # anything else
  64     $self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA
  65     $self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
  66     $self->parse_file($filename);
  67     return $self;
  68 }
  69
  70 #handle parsing of text
  71 sub text{
  72     my $self = shift;
  73     my $line = shift;
  74     my $work = shift; # original text
  75     my $is_cdata = shift;
  76     while($work){
  77         # if there is a template_toolkit tag
  78         if( $work =~ m/\[%.*?%\]/ ){
  79             #everything before this tag is text (or possibly CDATA), add a text token to tokens if $`
  80             if( $` ){
  81                 my $t = C4::TmplToken->new( $`, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
  82                 push @tokens, $t;
  83             }
  84
  85             #the match itself is a DIRECTIVE $&
  86             my $t = C4::TmplToken->new( $&, C4::TmplTokenType::DIRECTIVE, $line, $self->{filename} );
  87             push @tokens, $t;
  88
  89             # put work still to do back into work
  90             $work = $' ? $' : 0;
  91         } else {
  92             # If there is some left over work, treat it as text token
  93             my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
  94
  95             push @tokens, $t;
  96             last;
  97         }
  98     }
  99 }
 100
 101 sub declaration {
 102     my $self = shift;
 103     my $line = shift;
 104     my $work = shift; #original text
 105     my $is_cdata = shift;
 106     my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
 107     push @tokens, $t;
 108 }
 109
 110 sub comment {
 111     my $self = shift;
 112     my $line = shift;
 113     my $work = shift; #original text
 114     my $is_cdata = shift;
 115     my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
 116     push @tokens, $t;
 117 }
 118
 119 sub default {
 120     my $self = shift;
 121     my $line = shift;
 122     my $work = shift; #original text
 123     my $is_cdata = shift;
 124     my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
 125     push @tokens, $t;
 126 }
 127
 128
 129 #handle opening html tags
 130 sub start{
 131     my $self = shift;
 132     my $line = shift;
 133     my $tag = shift;
 134     my $hash = shift; #hash of attr/value pairs
 135     my $text = shift; #original text
 136     my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename});
 137     my %attr;
 138     # tags seem to be uses in an 'interesting' way elsewhere..
 139     for my $key( %$hash ) {
 140         next unless defined $hash->{$key};
 141         if ($key eq "/"){
 142             $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ];
 143             }
 144         else {
 145         $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
 146             }
 147     }
 148     $t->set_attributes( \%attr );
 149     push @tokens, $t;
 150 }
 151
 152 #handle closing html tags
 153 sub end{
 154     my $self = shift;
 155     my $line = shift;
 156     my $tag = shift;
 157     my $hash = shift;
 158     my $text = shift;
 159     # what format should this be in?
 160     my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename} );
 161     my %attr;
 162     # tags seem to be uses in an 'interesting' way elsewhere..
 163     for my $key( %$hash ) {
 164         next unless defined $hash->{$key};
 165         $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
 166     }
 167     $t->set_attributes( \%attr );
 168     push @tokens, $t;
 169 }
 170
 171 1;