Bug 6755 follow up
[koha.git] / C4 / TTParser.pm
blobe088124684d96f12ee68bb9b4ecd4a4da3024e98
1 #!/usr/bin/env perl
2 #simple parser for HTML with Template Toolkit directives. Tokens are put into @tokens and are accesible via next_token and peep_token
3 package C4::TTParser;
4 use base qw(HTML::Parser);
5 use C4::TmplToken;
6 use strict;
7 use warnings;
9 #seems to be handled post tokenizer
10 ##hash where key is tag we are interested in and the value is a hash of the attributes we want
11 #my %interesting_tags = (
12 # img => { alt => 1 },
13 #);
15 #tokens found so far (used like a stack)
16 my ( @tokens );
18 #shiftnext token or undef
19 sub next_token{
20 return shift @tokens;
23 #unshift token back on @tokens
24 sub unshift_token{
25 my $self = shift;
26 unshift @tokens, shift;
29 #have a peep at next token
30 sub peep_token{
31 return $tokens[0];
34 #wrapper for parse
35 #please use this method INSTEAD of the HTML::Parser->parse_file method (and HTML::Parser->parse)
36 #signature build_tokens( self, filename)
37 sub build_tokens{
38 my ($self, $filename) = @_;
39 $self->{filename} = $filename;
40 $self->handler(start => "start", "self, line, tagname, attr, text"); #signature is start( self, linenumber, tagname, hash of attributes, origional text )
41 $self->handler(text => "text", "self, line, text, is_cdata"); #signature is text( self, linenumber, origional text, is_cdata )
42 $self->handler(end => "end", "self, line, tag, attr, text"); #signature is end( self, linenumber, tagename, origional text )
43 $self->handler(declaration => "declaration", "self, line, text, is_cdata"); # declaration
44 $self->handler(comment => "comment", "self, line, text, is_cdata"); # comments
45 # $self->handler(default => "default", "self, line, text, is_cdata"); # anything else
46 $self->marked_sections(1); #treat anything inside CDATA tags as text, should really make it a C4::TmplTokenType::CDATA
47 $self->unbroken_text(1); #make contiguous whitespace into a single token (can span multiple lines)
48 $self->parse_file($filename);
49 return $self;
52 #handle parsing of text
53 sub text{
54 my $self = shift;
55 my $line = shift;
56 my $work = shift; # original text
57 my $is_cdata = shift;
58 while($work){
59 # if there is a template_toolkit tag
60 if( $work =~ m/\[%.*?\]/ ){
61 #everything before this tag is text (or possibly CDATA), add a text token to tokens if $`
62 if( $` ){
63 my $t = C4::TmplToken->new( $`, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
64 push @tokens, $t;
67 #the match itself is a DIRECTIVE $&
68 my $t = C4::TmplToken->new( $&, C4::TmplTokenType::DIRECTIVE, $line, $self->{filename} );
69 push @tokens, $t;
71 # put work still to do back into work
72 $work = $' ? $' : 0;
73 } else {
74 # If there is some left over work, treat it as text token
75 my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
77 push @tokens, $t;
78 last;
83 sub declaration {
84 my $self = shift;
85 my $line = shift;
86 my $work = shift; #original text
87 my $is_cdata = shift;
88 my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
89 push @tokens, $t;
92 sub comment {
93 my $self = shift;
94 my $line = shift;
95 my $work = shift; #original text
96 my $is_cdata = shift;
97 my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
98 push @tokens, $t;
101 sub default {
102 my $self = shift;
103 my $line = shift;
104 my $work = shift; #original text
105 my $is_cdata = shift;
106 my $t = C4::TmplToken->new( $work, ($is_cdata? C4::TmplTokenType::CDATA : C4::TmplTokenType::TEXT), $line, $self->{filename} );
107 push @tokens, $t;
111 #handle opening html tags
112 sub start{
113 my $self = shift;
114 my $line = shift;
115 my $tag = shift;
116 my $hash = shift; #hash of attr/value pairs
117 my $text = shift; #origional text
118 my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename});
119 my %attr;
120 # tags seem to be uses in an 'interesting' way elsewhere..
121 for my $key( %$hash ) {
122 next unless defined $hash->{$key};
123 if ($key eq "/"){
124 $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 1 ];
126 else {
127 $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
130 $t->set_attributes( \%attr );
131 push @tokens, $t;
134 #handle closing html tags
135 sub end{
136 my $self = shift;
137 my $line = shift;
138 my $tag = shift;
139 my $hash = shift;
140 my $text = shift;
141 # what format should this be in?
142 my $t = C4::TmplToken->new( $text, C4::TmplTokenType::TAG, $line, $self->{filename} );
143 my %attr;
144 # tags seem to be uses in an 'interesting' way elsewhere..
145 for my $key( %$hash ) {
146 next unless defined $hash->{$key};
147 $attr{+lc($key)} = [ $key, $hash->{$key}, $key."=".$hash->{$key}, 0 ];
149 $t->set_attributes( \%attr );
150 push @tokens, $t;