Update release notes for 19.05.09 release
[koha.git] / Koha / XSLT_Handler.pm
blob3d97f8ceae6a0b52e9ed0144cbff77583fcc06d2
1 package Koha::XSLT_Handler;
3 # Copyright 2014, 2019 Rijksmuseum, Prosentient Systems
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 3 of the License, or (at your option) any later
10 # version.
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 =head1 NAME
22 Koha::XSLT_Handler - Facilitate use of XSLT transformations
24 =head1 SYNOPSIS
26 use Koha::XSLT_Handler;
27 my $xslt_engine = Koha::XSLT_Handler->new;
28 my $output = $xslt_engine->transform($xml, $xsltfilename);
29 $output = $xslt_engine->transform({ xml => $xml, file => $file });
30 $output = $xslt_engine->transform({ xml => $xml, code => $code });
31 my $err= $xslt_engine->err; # error code
32 $xslt_engine->refresh($xsltfilename);
34 =head1 DESCRIPTION
36 A XSLT handler object on top of LibXML and LibXSLT, allowing you to
37 run XSLT stylesheets repeatedly without loading them again.
38 Errors occurring during loading, parsing or transforming are reported
39 via the err attribute.
40 Reloading XSLT files can be done with the refresh method.
42 =head1 METHODS
44 =head2 new
46 Create handler object
48 =head2 transform
50 Run transformation for specific string and stylesheet
52 =head2 refresh
54 Allow to reload stylesheets when transforming again
56 =head1 PROPERTIES
58 =head2 err
60 Error code (see list of ERROR CODES)
62 =head2 do_not_return_source
64 If true, transform returns undef on failure. By default, it returns the
65 original string passed. Errors are reported as described.
67 =head2 print_warns
69 If set, print error messages to STDERR. False by default. Looks at the
70 DEBUG environment variable too.
72 =head1 ERROR CODES
74 =head2 Error XSLTH_ERR_NO_FILE
76 No XSLT file passed
78 =head2 Error XSLTH_ERR_FILE_NOT_FOUND
80 XSLT file not found
82 =head2 Error XSLTH_ERR_LOADING
84 Error while loading stylesheet xml: [optional warnings]
86 =head2 Error XSLTH_ERR_PARSING_CODE
88 Error while parsing stylesheet: [optional warnings]
90 =head2 Error XSLTH_ERR_PARSING_DATA
92 Error while parsing input: [optional warnings]
94 =head2 Error XSLTH_ERR_TRANSFORMING
96 Error while transforming input: [optional warnings]
98 =head2 Error XSLTH_NO_STRING_PASSED
100 No string to transform
102 =head1 INTERNALS
104 For documentation purposes. You are not encouraged to access them.
106 =head2 last_xsltfile
108 Contains the last successfully executed XSLT filename
110 =head2 xslt_hash
112 Hash reference to loaded stylesheets
114 =head1 ADDITIONAL COMMENTS
116 =cut
118 use Modern::Perl;
119 use XML::LibXML;
120 use XML::LibXSLT;
121 use Koha::XSLT::Security;
123 use base qw(Class::Accessor);
125 __PACKAGE__->mk_ro_accessors(qw( err ));
126 __PACKAGE__->mk_accessors(qw( do_not_return_source print_warns ));
128 use constant XSLTH_ERR_1 => 'XSLTH_ERR_NO_FILE';
129 use constant XSLTH_ERR_2 => 'XSLTH_ERR_FILE_NOT_FOUND';
130 use constant XSLTH_ERR_3 => 'XSLTH_ERR_LOADING';
131 use constant XSLTH_ERR_4 => 'XSLTH_ERR_PARSING_CODE';
132 use constant XSLTH_ERR_5 => 'XSLTH_ERR_PARSING_DATA';
133 use constant XSLTH_ERR_6 => 'XSLTH_ERR_TRANSFORMING';
134 use constant XSLTH_ERR_7 => 'XSLTH_NO_STRING_PASSED';
136 =head2 new
138 my $xslt_engine = Koha::XSLT_Handler->new;
140 =cut
142 sub new {
143 my ($class, $params) = @_;
144 my $self = $class->SUPER::new($params);
145 $self->{_security} = Koha::XSLT::Security->new;
146 $self->{_security}->register_callbacks;
147 return $self;
150 =head2 transform
152 my $output= $xslt_engine->transform( $xml, $xsltfilename, [$format] );
153 #Alternatively:
154 #$output = $xslt_engine->transform({ xml => $xml, file => $file, [parameters => $parameters], [format => ['chars'|'bytes'|'xmldoc']] });
155 #$output = $xslt_engine->transform({ xml => $xml, code => $code, [parameters => $parameters], [format => ['chars'|'bytes'|'xmldoc']] });
156 if( $xslt_engine->err ) {
157 #decide what to do on failure..
159 my $output2= $xslt_engine->transform( $xml2 );
161 Pass a xml string and a fully qualified path of a XSLT file.
162 Instead of a filename, you may also pass a URL.
163 You may also pass the contents of a xsl file as a string like $code above.
164 If you do not pass a filename, the last file used is assumed.
165 Normally returns the transformed string; if you pass format => 'xmldoc' in
166 the hash format, it returns a xml document object.
167 Check the error number in err to know if something went wrong.
168 In that case do_not_return_source did determine the return value.
170 =cut
172 sub transform {
173 my $self = shift;
175 #check parameters
176 # old style: $xml, $filename, $format
177 # new style: $hashref
178 my ( $xml, $filename, $xsltcode, $format );
179 my $parameters = {};
180 if( ref $_[0] eq 'HASH' ) {
181 $xml = $_[0]->{xml};
182 $xsltcode = $_[0]->{code};
183 $filename = $_[0]->{file} if !$xsltcode; #xsltcode gets priority
184 $parameters = $_[0]->{parameters} if ref $_[0]->{parameters} eq 'HASH';
185 $format = $_[0]->{format} || 'chars';
186 } else {
187 ( $xml, $filename, $format ) = @_;
188 $format ||= 'chars';
191 #Initialized yet?
192 if ( !$self->{xslt_hash} ) {
193 $self->_init;
195 else {
196 $self->_set_error; #clear last error
198 my $retval = $self->{do_not_return_source} ? undef : $xml;
200 #check if no string passed
201 if ( !defined $xml ) {
202 $self->_set_error( XSLTH_ERR_7 );
203 return; #always undef
206 #load stylesheet
207 my $key = $self->_load( $filename, $xsltcode );
208 my $stsh = $key? $self->{xslt_hash}->{$key}: undef;
209 return $retval if $self->{err};
211 #parse input and transform
212 my $parser = XML::LibXML->new();
213 $self->{_security}->set_parser_options($parser);
214 my $source = eval { $parser->parse_string($xml) };
215 if ($@) {
216 $self->_set_error( XSLTH_ERR_5, $@ );
217 return $retval;
219 my $result = eval {
220 #$parameters is an optional hashref that contains
221 #key-value pairs to be sent to the XSLT.
222 #Numbers may be bare but strings must be double quoted
223 #(e.g. "'string'" or '"string"'). See XML::LibXSLT for
224 #more details.
226 #NOTE: Parameters are not cached. They are provided for
227 #each different transform.
228 my $transformed = $stsh->transform($source, %$parameters);
229 $format eq 'bytes'
230 ? $stsh->output_as_bytes( $transformed )
231 : $format eq 'xmldoc'
232 ? $transformed
233 : $stsh->output_as_chars( $transformed ); # default: chars
235 if ($@) {
236 $self->_set_error( XSLTH_ERR_6, $@ );
237 return $retval;
239 $self->{last_xsltfile} = $key;
240 return $result;
243 =head2 refresh
245 $xslt_engine->refresh;
246 $xslt_engine->refresh( $xsltfilename );
248 Pass a file for an individual refresh or no file to refresh all.
249 Refresh returns the number of items affected.
250 What we actually do, is just clear the internal cache for reloading next
251 time when transform is called.
252 The return value is mainly theoretical. Since this is supposed to work
253 always(...), there is no actual need to test it.
254 Note that refresh does also clear the error information.
256 =cut
258 sub refresh {
259 my ( $self, $file ) = @_;
260 $self->_set_error;
261 return if !$self->{xslt_hash};
262 my $rv;
263 if ($file) {
264 $rv = delete $self->{xslt_hash}->{$file} ? 1 : 0;
266 else {
267 $rv = scalar keys %{ $self->{xslt_hash} };
268 $self->{xslt_hash} = {};
270 return $rv;
273 # ************** INTERNAL ROUTINES ********************************************
275 # _init
276 # Internal routine for initialization.
278 sub _init {
279 my $self = shift;
281 $self->_set_error;
282 $self->{xslt_hash} = {};
283 $self->{print_warns} = 1 unless exists $self->{print_warns};
284 $self->{do_not_return_source} = 0
285 unless exists $self->{do_not_return_source};
287 #by default we return source on a failing transformation
288 #but it could be passed at construction time already
289 return;
292 # _load
293 # Internal routine for loading a new stylesheet.
295 sub _load {
296 my ( $self, $filename, $code ) = @_;
297 my ( $digest, $codelen, $salt, $rv );
298 $salt = 'AZ'; #just a constant actually
300 #If no file or code passed, use the last file again
301 if ( !$filename && !$code ) {
302 my $last = $self->{last_xsltfile};
303 if ( !$last || !exists $self->{xslt_hash}->{$last} ) {
304 $self->_set_error( XSLTH_ERR_1 );
305 return;
307 return $last;
310 #check if it is loaded already
311 if( $code ) {
312 $codelen = length( $code );
313 $digest = eval { crypt($code, $salt) };
314 if( $digest && exists $self->{xslt_hash}->{$digest.$codelen} ) {
315 return $digest.$codelen;
317 } elsif( $filename && exists $self->{xslt_hash}->{$filename} ) {
318 return $filename;
321 #Check file existence (skipping URLs)
322 if( $filename && $filename !~ /^https?:\/\// && !-e $filename ) {
323 $self->_set_error( XSLTH_ERR_2 );
324 return;
327 #load sheet
328 my $parser = XML::LibXML->new;
329 $self->{_security}->set_parser_options($parser);
330 my $style_doc = eval {
331 $parser->load_xml( $self->_load_xml_args($filename, $code) )
333 if ($@) {
334 $self->_set_error( XSLTH_ERR_3, $@ );
335 return;
338 #parse sheet
339 my $xslt = XML::LibXSLT->new;
340 $self->{_security}->set_callbacks($xslt);
342 $rv = $code? $digest.$codelen: $filename;
343 $self->{xslt_hash}->{$rv} = eval { $xslt->parse_stylesheet($style_doc) };
344 if ($@) {
345 $self->_set_error( XSLTH_ERR_4, $@ );
346 delete $self->{xslt_hash}->{$rv};
347 return;
349 return $rv;
352 sub _load_xml_args {
353 my $self = shift;
354 return $_[1]? { 'string' => $_[1]//'' }: { 'location' => $_[0]//'' };
357 # _set_error
358 # Internal routine for handling error information.
360 sub _set_error {
361 my ( $self, $errcode, $warn ) = @_;
363 $self->{err} = $errcode; #set or clear error
364 warn 'XSLT_Handler: '. $warn if $warn && $self->{print_warns};
367 =head1 AUTHOR
369 Marcel de Rooy, Rijksmuseum Netherlands
370 David Cook, Prosentient Systems
372 =cut