Bug 22454: Unit tests
[koha.git] / Koha / XSLT_Handler.pm
blob0682c3ca20a5fb0c2d6c295059dbf9f988e874e4
1 package Koha::XSLT_Handler;
3 # Copyright 2014 Rijksmuseum
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 3 of the License, or (at your option) any later
10 # version.
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 =head1 NAME
22 Koha::XSLT_Handler - Facilitate use of XSLT transformations
24 =head1 SYNOPSIS
26 use Koha::XSLT_Handler;
27 my $xslt_engine = Koha::XSLT_Handler->new;
28 my $output = $xslt_engine->transform($xml, $xsltfilename);
29 $output = $xslt_engine->transform({ xml => $xml, file => $file });
30 $output = $xslt_engine->transform({ xml => $xml, code => $code });
31 my $err= $xslt_engine->err; # error code
32 $xslt_engine->refresh($xsltfilename);
34 =head1 DESCRIPTION
36 A XSLT handler object on top of LibXML and LibXSLT, allowing you to
37 run XSLT stylesheets repeatedly without loading them again.
38 Errors occurring during loading, parsing or transforming are reported
39 via the err attribute.
40 Reloading XSLT files can be done with the refresh method.
42 =head1 METHODS
44 =head2 new
46 Create handler object (via Class::Accessor)
48 =head2 transform
50 Run transformation for specific string and stylesheet
52 =head2 refresh
54 Allow to reload stylesheets when transforming again
56 =head1 PROPERTIES
58 =head2 err
60 Error code (see list of ERROR CODES)
62 =head2 do_not_return_source
64 If true, transform returns undef on failure. By default, it returns the
65 original string passed. Errors are reported as described.
67 =head2 print_warns
69 If set, print error messages to STDERR. False by default. Looks at the
70 DEBUG environment variable too.
72 =head1 ERROR CODES
74 =head2 Error XSLTH_ERR_NO_FILE
76 No XSLT file passed
78 =head2 Error XSLTH_ERR_FILE_NOT_FOUND
80 XSLT file not found
82 =head2 Error XSLTH_ERR_LOADING
84 Error while loading stylesheet xml: [optional warnings]
86 =head2 Error XSLTH_ERR_PARSING_CODE
88 Error while parsing stylesheet: [optional warnings]
90 =head2 Error XSLTH_ERR_PARSING_DATA
92 Error while parsing input: [optional warnings]
94 =head2 Error XSLTH_ERR_TRANSFORMING
96 Error while transforming input: [optional warnings]
98 =head2 Error XSLTH_NO_STRING_PASSED
100 No string to transform
102 =head1 INTERNALS
104 For documentation purposes. You are not encouraged to access them.
106 =head2 last_xsltfile
108 Contains the last successfully executed XSLT filename
110 =head2 xslt_hash
112 Hash reference to loaded stylesheets
114 =head1 ADDITIONAL COMMENTS
116 =cut
118 use Modern::Perl;
119 use XML::LibXML;
120 use XML::LibXSLT;
122 use base qw(Class::Accessor);
124 __PACKAGE__->mk_ro_accessors(qw( err ));
125 __PACKAGE__->mk_accessors(qw( do_not_return_source print_warns ));
127 use constant XSLTH_ERR_1 => 'XSLTH_ERR_NO_FILE';
128 use constant XSLTH_ERR_2 => 'XSLTH_ERR_FILE_NOT_FOUND';
129 use constant XSLTH_ERR_3 => 'XSLTH_ERR_LOADING';
130 use constant XSLTH_ERR_4 => 'XSLTH_ERR_PARSING_CODE';
131 use constant XSLTH_ERR_5 => 'XSLTH_ERR_PARSING_DATA';
132 use constant XSLTH_ERR_6 => 'XSLTH_ERR_TRANSFORMING';
133 use constant XSLTH_ERR_7 => 'XSLTH_NO_STRING_PASSED';
135 =head2 transform
137 my $output= $xslt_engine->transform( $xml, $xsltfilename, [$format] );
138 #Alternatively:
139 #$output = $xslt_engine->transform({ xml => $xml, file => $file, [parameters => $parameters], [format => ['chars'|'bytes'|'xmldoc']] });
140 #$output = $xslt_engine->transform({ xml => $xml, code => $code, [parameters => $parameters], [format => ['chars'|'bytes'|'xmldoc']] });
141 if( $xslt_engine->err ) {
142 #decide what to do on failure..
144 my $output2= $xslt_engine->transform( $xml2 );
146 Pass a xml string and a fully qualified path of a XSLT file.
147 Instead of a filename, you may also pass a URL.
148 You may also pass the contents of a xsl file as a string like $code above.
149 If you do not pass a filename, the last file used is assumed.
150 Normally returns the transformed string; if you pass format => 'xmldoc' in
151 the hash format, it returns a xml document object.
152 Check the error number in err to know if something went wrong.
153 In that case do_not_return_source did determine the return value.
155 =cut
157 sub transform {
158 my $self = shift;
160 #check parameters
161 # old style: $xml, $filename, $format
162 # new style: $hashref
163 my ( $xml, $filename, $xsltcode, $format );
164 my $parameters = {};
165 if( ref $_[0] eq 'HASH' ) {
166 $xml = $_[0]->{xml};
167 $xsltcode = $_[0]->{code};
168 $filename = $_[0]->{file} if !$xsltcode; #xsltcode gets priority
169 $parameters = $_[0]->{parameters} if ref $_[0]->{parameters} eq 'HASH';
170 $format = $_[0]->{format} || 'chars';
171 } else {
172 ( $xml, $filename, $format ) = @_;
173 $format ||= 'chars';
176 #Initialized yet?
177 if ( !$self->{xslt_hash} ) {
178 $self->_init;
180 else {
181 $self->_set_error; #clear last error
183 my $retval = $self->{do_not_return_source} ? undef : $xml;
185 #check if no string passed
186 if ( !defined $xml ) {
187 $self->_set_error( XSLTH_ERR_7 );
188 return; #always undef
191 #load stylesheet
192 my $key = $self->_load( $filename, $xsltcode );
193 my $stsh = $key? $self->{xslt_hash}->{$key}: undef;
194 return $retval if $self->{err};
196 #parse input and transform
197 my $parser = XML::LibXML->new();
198 my $source = eval { $parser->parse_string($xml) };
199 if ($@) {
200 $self->_set_error( XSLTH_ERR_5, $@ );
201 return $retval;
203 my $result = eval {
204 #$parameters is an optional hashref that contains
205 #key-value pairs to be sent to the XSLT.
206 #Numbers may be bare but strings must be double quoted
207 #(e.g. "'string'" or '"string"'). See XML::LibXSLT for
208 #more details.
210 #NOTE: Parameters are not cached. They are provided for
211 #each different transform.
212 my $transformed = $stsh->transform($source, %$parameters);
213 $format eq 'bytes'
214 ? $stsh->output_as_bytes( $transformed )
215 : $format eq 'xmldoc'
216 ? $transformed
217 : $stsh->output_as_chars( $transformed ); # default: chars
219 if ($@) {
220 $self->_set_error( XSLTH_ERR_6, $@ );
221 return $retval;
223 $self->{last_xsltfile} = $key;
224 return $result;
227 =head2 refresh
229 $xslt_engine->refresh;
230 $xslt_engine->refresh( $xsltfilename );
232 Pass a file for an individual refresh or no file to refresh all.
233 Refresh returns the number of items affected.
234 What we actually do, is just clear the internal cache for reloading next
235 time when transform is called.
236 The return value is mainly theoretical. Since this is supposed to work
237 always(...), there is no actual need to test it.
238 Note that refresh does also clear the error information.
240 =cut
242 sub refresh {
243 my ( $self, $file ) = @_;
244 $self->_set_error;
245 return if !$self->{xslt_hash};
246 my $rv;
247 if ($file) {
248 $rv = delete $self->{xslt_hash}->{$file} ? 1 : 0;
250 else {
251 $rv = scalar keys %{ $self->{xslt_hash} };
252 $self->{xslt_hash} = {};
254 return $rv;
257 # ************** INTERNAL ROUTINES ********************************************
259 # _init
260 # Internal routine for initialization.
262 sub _init {
263 my $self = shift;
265 $self->_set_error;
266 $self->{xslt_hash} = {};
267 $self->{print_warns} = exists $self->{print_warns}
268 ? $self->{print_warns}
269 : $ENV{DEBUG} // 0;
270 $self->{do_not_return_source} = 0
271 unless exists $self->{do_not_return_source};
273 #by default we return source on a failing transformation
274 #but it could be passed at construction time already
275 return;
278 # _load
279 # Internal routine for loading a new stylesheet.
281 sub _load {
282 my ( $self, $filename, $code ) = @_;
283 my ( $digest, $codelen, $salt, $rv );
284 $salt = 'AZ'; #just a constant actually
286 #If no file or code passed, use the last file again
287 if ( !$filename && !$code ) {
288 my $last = $self->{last_xsltfile};
289 if ( !$last || !exists $self->{xslt_hash}->{$last} ) {
290 $self->_set_error( XSLTH_ERR_1 );
291 return;
293 return $last;
296 #check if it is loaded already
297 if( $code ) {
298 $codelen = length( $code );
299 $digest = eval { crypt($code, $salt) };
300 if( $digest && exists $self->{xslt_hash}->{$digest.$codelen} ) {
301 return $digest.$codelen;
303 } elsif( $filename && exists $self->{xslt_hash}->{$filename} ) {
304 return $filename;
307 #Check file existence (skipping URLs)
308 if( $filename && $filename !~ /^https?:\/\// && !-e $filename ) {
309 $self->_set_error( XSLTH_ERR_2 );
310 return;
313 #load sheet
314 my $parser = XML::LibXML->new;
315 my $style_doc = eval {
316 $parser->load_xml( $self->_load_xml_args($filename, $code) )
318 if ($@) {
319 $self->_set_error( XSLTH_ERR_3, $@ );
320 return;
323 #parse sheet
324 my $xslt = XML::LibXSLT->new;
325 $rv = $code? $digest.$codelen: $filename;
326 $self->{xslt_hash}->{$rv} = eval { $xslt->parse_stylesheet($style_doc) };
327 if ($@) {
328 $self->_set_error( XSLTH_ERR_4, $@ );
329 delete $self->{xslt_hash}->{$rv};
330 return;
332 return $rv;
335 sub _load_xml_args {
336 my $self = shift;
337 return $_[1]? { 'string' => $_[1]//'' }: { 'location' => $_[0]//'' };
340 # _set_error
341 # Internal routine for handling error information.
343 sub _set_error {
344 my ( $self, $errcode, $warn ) = @_;
346 $self->{err} = $errcode; #set or clear error
347 warn 'XSLT_Handler: '. $warn if $warn && $self->{print_warns};
350 =head1 AUTHOR
352 Marcel de Rooy, Rijksmuseum Netherlands
354 =cut