5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 #simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token
22 use base
qw(HTML::Parser);
27 #seems to be handled post tokenizer
28 ##hash where key is tag we are interested in and the value is a hash of the attributes we want
29 #my %interesting_tags = (
30 # img => { alt => 1 },
33 #tokens found so far (used like a stack)
36 #shiftnext token or undef
41 #unshift token back on @tokens
44 unshift @tokens, shift;
47 #have a peep at next token
53 #please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse)
54 #signature build_tokens( self, filename)
56 my ($self, $filename) = @_;
57 $self->{filename
} = $filename;
58 $self->handler(start
=> "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, original text )
59 $self->handler(text
=> "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, original text, is_cdata )
60 $self->handler(end
=> "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, original text )
61 $self->handler(declaration
=> "declaration", "self, line, text, is_cdata"); # declaration
62 $self->handler(comment
=> "comment", "self, line, text, is_cdata"); # comments
63 $self->handler(process
=> "process", "self, line, text, is_cdata"); # processing statement <?...?>
64 # $self->handler(default => "default", "self, line, text, is_cdata"); # anything else
65 $self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA
66 $self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
67 open(my $fh, "<:encoding(utf8)", $filename) || die "Cannot open $filename ($!)";
68 $self->parse_file($fh);
72 #handle parsing of text
76 my $work = shift; # original text
79 # if there is a template_toolkit tag
80 if( $work =~ m/\[%.*?%\]/ ){
81 #everything before this tag is text (or possibly CDATA), add a text token to tokens if $`
83 my $t = C4::TmplToken->new( $`, ($is_cdata? C4
::TmplTokenType
::CDATA
: C4
::TmplTokenType
::TEXT
), $line, $self->{filename
} );
87 #the match itself is a DIRECTIVE $&
88 my $t = C4
::TmplToken
->new( $&, C4
::TmplTokenType
::DIRECTIVE
, $line, $self->{filename
} );
91 # put work still to do back into work
94 # If there is some left over work, treat it as text token
95 my $t = C4
::TmplToken
->new( $work, ($is_cdata? C4
::TmplTokenType
::CDATA
: C4
::TmplTokenType
::TEXT
), $line, $self->{filename
} );
106 my $work = shift; #original text
107 my $is_cdata = shift;
108 my $t = C4
::TmplToken
->new( $work, ($is_cdata? C4
::TmplTokenType
::CDATA
: C4
::TmplTokenType
::TEXT
), $line, $self->{filename
} );
115 my $work = shift; #original text
116 my $is_cdata = shift;
117 my $t = C4
::TmplToken
->new( $work, ($is_cdata? C4
::TmplTokenType
::CDATA
: C4
::TmplTokenType
::TEXT
), $line, $self->{filename
} );
124 my $work = shift; #original text
125 my $is_cdata = shift;
126 my $t = C4
::TmplToken
->new( $work, ($is_cdata? C4
::TmplTokenType
::CDATA
: C4
::TmplTokenType
::TEXT
), $line, $self->{filename
} );
133 my $work = shift; #original text
134 my $is_cdata = shift;
135 my $t = C4
::TmplToken
->new( $work, ($is_cdata? C4
::TmplTokenType
::CDATA
: C4
::TmplTokenType
::TEXT
), $line, $self->{filename
} );
140 #handle opening html tags
145 my $hash = shift; #hash of attr/value pairs
146 my $text = shift; #original text
147 my $t = C4
::TmplToken
->new( $text, C4
::TmplTokenType
::TAG
, $line, $self->{filename
});
149 # tags seem to be uses in an 'interesting' way elsewhere..
150 for my $key( %$hash ) {
151 next unless defined $hash->{$key};
153 $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ];
156 $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
159 $t->set_attributes( \
%attr );
163 #handle closing html tags
170 # what format should this be in?
171 my $t = C4
::TmplToken
->new( $text, C4
::TmplTokenType
::TAG
, $line, $self->{filename
} );
173 # tags seem to be uses in an 'interesting' way elsewhere..
174 for my $key( %$hash ) {
175 next unless defined $hash->{$key};
176 $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
178 $t->set_attributes( \
%attr );