Bug 12648: Fix conflict with bug 8096
[koha.git] / opac / oai.pl
blob137a5f65990d1dc634174068fdcc9de655a57d38
1 #!/usr/bin/perl
3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
10 # version.
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 use strict;
22 use warnings;
24 use CGI qw( :standard -oldstyle_urls -utf8 );
25 use vars qw( $GZIP );
26 use C4::Context;
29 BEGIN {
30 eval { require PerlIO::gzip };
31 $GZIP = ($@) ? 0 : 1;
34 unless ( C4::Context->preference('OAI-PMH') ) {
35 print
36 header(
37 -type => 'text/plain; charset=utf-8',
38 -charset => 'utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
42 exit;
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
47 print header(
48 -type => 'text/xml; charset=utf-8',
49 -charset => 'utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
54 else {
55 print header(
56 -type => 'text/xml; charset=utf-8',
57 -charset => 'utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
64 # __END__ Main Prog
68 # Extends HTTP::OAI::ResumptionToken
69 # A token is identified by:
70 # - metadataPrefix
71 # - from
72 # - until
73 # - offset
75 package C4::OAI::ResumptionToken;
77 use strict;
78 use warnings;
79 use HTTP::OAI;
81 use base ("HTTP::OAI::ResumptionToken");
84 sub new {
85 my ($class, %args) = @_;
87 my $self = $class->SUPER::new(%args);
89 my ($metadata_prefix, $offset, $from, $until, $set);
90 if ( $args{ resumptionToken } ) {
91 ($metadata_prefix, $offset, $from, $until, $set)
92 = split( '/', $args{resumptionToken} );
94 else {
95 $metadata_prefix = $args{ metadataPrefix };
96 $from = $args{ from } || '1970-01-01';
97 $until = $args{ until };
98 unless ( $until) {
99 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
100 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
102 $offset = $args{ offset } || 0;
103 $set = $args{set};
106 $self->{ metadata_prefix } = $metadata_prefix;
107 $self->{ offset } = $offset;
108 $self->{ from } = $from;
109 $self->{ until } = $until;
110 $self->{ set } = $set;
112 $self->resumptionToken(
113 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
114 $self->cursor( $offset );
116 return $self;
119 # __END__ C4::OAI::ResumptionToken
123 package C4::OAI::Identify;
125 use strict;
126 use warnings;
127 use HTTP::OAI;
128 use C4::Context;
130 use base ("HTTP::OAI::Identify");
132 sub new {
133 my ($class, $repository) = @_;
135 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
136 my $self = $class->SUPER::new(
137 baseURL => $baseURL,
138 repositoryName => C4::Context->preference("LibraryName"),
139 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
140 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
141 granularity => 'YYYY-MM-DD',
142 earliestDatestamp => '0001-01-01',
143 deletedRecord => 'no',
146 # FIXME - alas, the description element is not so simple; to validate
147 # against the OAI-PMH schema, it cannot contain just a string,
148 # but one or more elements that validate against another XML schema.
149 # For now, simply omitting it.
150 # $self->description( "Koha OAI Repository" );
152 $self->compression( 'gzip' );
154 return $self;
157 # __END__ C4::OAI::Identify
161 package C4::OAI::ListMetadataFormats;
163 use strict;
164 use warnings;
165 use HTTP::OAI;
167 use base ("HTTP::OAI::ListMetadataFormats");
169 sub new {
170 my ($class, $repository) = @_;
172 my $self = $class->SUPER::new();
174 if ( $repository->{ conf } ) {
175 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
176 my $format = $repository->{ conf }->{ format }->{ $name };
177 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
178 metadataPrefix => $format->{metadataPrefix},
179 schema => $format->{schema},
180 metadataNamespace => $format->{metadataNamespace}, ) );
183 else {
184 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
185 metadataPrefix => 'oai_dc',
186 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
187 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
188 ) );
189 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
190 metadataPrefix => 'marcxml',
191 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
192 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
193 ) );
196 return $self;
199 # __END__ C4::OAI::ListMetadataFormats
203 package C4::OAI::Record;
205 use strict;
206 use warnings;
207 use HTTP::OAI;
208 use HTTP::OAI::Metadata::OAI_DC;
210 use base ("HTTP::OAI::Record");
212 sub new {
213 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
215 my $self = $class->SUPER::new(%args);
217 $timestamp =~ s/ /T/, $timestamp .= 'Z';
218 $self->header( new HTTP::OAI::Header(
219 identifier => $args{identifier},
220 datestamp => $timestamp,
221 ) );
223 foreach my $setSpec (@$setSpecs) {
224 $self->header->setSpec($setSpec);
227 my $parser = XML::LibXML->new();
228 my $record_dom = $parser->parse_string( $marcxml );
229 my $format = $args{metadataPrefix};
230 if ( $format ne 'marcxml' ) {
231 my %args = (
232 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
234 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
236 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
238 return $self;
241 # __END__ C4::OAI::Record
245 package C4::OAI::GetRecord;
247 use strict;
248 use warnings;
249 use HTTP::OAI;
250 use C4::OAI::Sets;
252 use base ("HTTP::OAI::GetRecord");
255 sub new {
256 my ($class, $repository, %args) = @_;
258 my $self = HTTP::OAI::GetRecord->new(%args);
260 my $dbh = C4::Context->dbh;
261 my $sth = $dbh->prepare("
262 SELECT marcxml, timestamp
263 FROM biblioitems
264 WHERE biblionumber=? " );
265 my $prefix = $repository->{koha_identifier} . ':';
266 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
267 $sth->execute( $biblionumber );
268 my ($marcxml, $timestamp);
269 unless ( ($marcxml, $timestamp) = $sth->fetchrow ) {
270 return HTTP::OAI::Response->new(
271 requestURL => $repository->self_url(),
272 errors => [ new HTTP::OAI::Error(
273 code => 'idDoesNotExist',
274 message => "There is no biblio record with this identifier",
275 ) ] ,
279 my $oai_sets = GetOAISetsBiblio($biblionumber);
280 my @setSpecs;
281 foreach (@$oai_sets) {
282 push @setSpecs, $_->{spec};
285 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
286 $self->record( C4::OAI::Record->new(
287 $repository, $marcxml, $timestamp, \@setSpecs, %args ) );
289 return $self;
292 # __END__ C4::OAI::GetRecord
296 package C4::OAI::ListIdentifiers;
298 use strict;
299 use warnings;
300 use HTTP::OAI;
301 use C4::OAI::Sets;
303 use base ("HTTP::OAI::ListIdentifiers");
306 sub new {
307 my ($class, $repository, %args) = @_;
309 my $self = HTTP::OAI::ListIdentifiers->new(%args);
311 my $token = new C4::OAI::ResumptionToken( %args );
312 my $dbh = C4::Context->dbh;
313 my $set;
314 if(defined $token->{'set'}) {
315 $set = GetOAISetBySpec($token->{'set'});
317 my $max = $repository->{koha_max_count};
318 my $sql = "
319 SELECT biblioitems.biblionumber, biblioitems.timestamp
320 FROM biblioitems
322 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
323 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
324 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
325 $sql .= "
326 LIMIT " . ($max+1) . "
327 OFFSET $token->{offset}
329 my $sth = $dbh->prepare( $sql );
330 my @bind_params = ($token->{'from'}, $token->{'until'});
331 push @bind_params, $set->{'id'} if defined $set;
332 $sth->execute( @bind_params );
334 my $count = 0;
335 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
336 $count++;
337 if ( $count > $max ) {
338 $self->resumptionToken(
339 new C4::OAI::ResumptionToken(
340 metadataPrefix => $token->{metadata_prefix},
341 from => $token->{from},
342 until => $token->{until},
343 offset => $token->{offset} + $max,
344 set => $token->{set}
347 last;
349 $timestamp =~ s/ /T/, $timestamp .= 'Z';
350 $self->identifier( new HTTP::OAI::Header(
351 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
352 datestamp => $timestamp,
353 ) );
356 return $self;
359 # __END__ C4::OAI::ListIdentifiers
361 package C4::OAI::Description;
363 use strict;
364 use warnings;
365 use HTTP::OAI;
366 use HTTP::OAI::SAXHandler qw/ :SAX /;
368 sub new {
369 my ( $class, %args ) = @_;
371 my $self = {};
373 if(my $setDescription = $args{setDescription}) {
374 $self->{setDescription} = $setDescription;
376 if(my $handler = $args{handler}) {
377 $self->{handler} = $handler;
380 bless $self, $class;
381 return $self;
384 sub set_handler {
385 my ( $self, $handler ) = @_;
387 $self->{handler} = $handler if $handler;
389 return $self;
392 sub generate {
393 my ( $self ) = @_;
395 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
397 return $self;
400 # __END__ C4::OAI::Description
402 package C4::OAI::ListSets;
404 use strict;
405 use warnings;
406 use HTTP::OAI;
407 use C4::OAI::Sets;
409 use base ("HTTP::OAI::ListSets");
411 sub new {
412 my ( $class, $repository, %args ) = @_;
414 my $self = HTTP::OAI::ListSets->new(%args);
416 my $token = C4::OAI::ResumptionToken->new(%args);
417 my $sets = GetOAISets;
418 my $pos = 0;
419 foreach my $set (@$sets) {
420 if ($pos < $token->{offset}) {
421 $pos++;
422 next;
424 my @descriptions;
425 foreach my $desc (@{$set->{'descriptions'}}) {
426 push @descriptions, C4::OAI::Description->new(
427 setDescription => $desc,
430 $self->set(
431 HTTP::OAI::Set->new(
432 setSpec => $set->{'spec'},
433 setName => $set->{'name'},
434 setDescription => \@descriptions,
437 $pos++;
438 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
441 $self->resumptionToken(
442 new C4::OAI::ResumptionToken(
443 metadataPrefix => $token->{metadata_prefix},
444 offset => $pos
446 ) if ( $pos > $token->{offset} );
448 return $self;
451 # __END__ C4::OAI::ListSets;
453 package C4::OAI::ListRecords;
455 use strict;
456 use warnings;
457 use HTTP::OAI;
458 use C4::OAI::Sets;
460 use base ("HTTP::OAI::ListRecords");
463 sub new {
464 my ($class, $repository, %args) = @_;
466 my $self = HTTP::OAI::ListRecords->new(%args);
468 my $token = new C4::OAI::ResumptionToken( %args );
469 my $dbh = C4::Context->dbh;
470 my $set;
471 if(defined $token->{'set'}) {
472 $set = GetOAISetBySpec($token->{'set'});
474 my $max = $repository->{koha_max_count};
475 my $sql = "
476 SELECT biblioitems.biblionumber, biblioitems.marcxml, biblioitems.timestamp
477 FROM biblioitems
479 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
480 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
481 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
482 $sql .= "
483 LIMIT " . ($max + 1) . "
484 OFFSET $token->{offset}
487 my $sth = $dbh->prepare( $sql );
488 my @bind_params = ($token->{'from'}, $token->{'until'});
489 push @bind_params, $set->{'id'} if defined $set;
490 $sth->execute( @bind_params );
492 my $count = 0;
493 while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) {
494 $count++;
495 if ( $count > $max ) {
496 $self->resumptionToken(
497 new C4::OAI::ResumptionToken(
498 metadataPrefix => $token->{metadata_prefix},
499 from => $token->{from},
500 until => $token->{until},
501 offset => $token->{offset} + $max,
502 set => $token->{set}
505 last;
507 my $oai_sets = GetOAISetsBiblio($biblionumber);
508 my @setSpecs;
509 foreach (@$oai_sets) {
510 push @setSpecs, $_->{spec};
512 $self->record( C4::OAI::Record->new(
513 $repository, $marcxml, $timestamp, \@setSpecs,
514 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
515 metadataPrefix => $token->{metadata_prefix}
516 ) );
519 return $self;
522 # __END__ C4::OAI::ListRecords
526 package C4::OAI::Repository;
528 use base ("HTTP::OAI::Repository");
530 use strict;
531 use warnings;
533 use HTTP::OAI;
534 use HTTP::OAI::Repository qw/:validate/;
536 use XML::SAX::Writer;
537 use XML::LibXML;
538 use XML::LibXSLT;
539 use YAML::Syck qw( LoadFile );
540 use CGI qw/:standard -oldstyle_urls/;
542 use C4::Context;
543 use C4::Biblio;
546 sub new {
547 my ($class, %args) = @_;
548 my $self = $class->SUPER::new(%args);
550 $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID");
551 $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount");
552 $self->{ koha_metadata_format } = ['oai_dc', 'marcxml'];
553 $self->{ koha_stylesheet } = { }; # Build when needed
555 # Load configuration file if defined in OAI-PMH:ConfFile syspref
556 if ( my $file = C4::Context->preference("OAI-PMH:ConfFile") ) {
557 $self->{ conf } = LoadFile( $file );
558 my @formats = keys %{ $self->{conf}->{format} };
559 $self->{ koha_metadata_format } = \@formats;
562 # Check for grammatical errors in the request
563 my @errs = validate_request( CGI::Vars() );
565 # Is metadataPrefix supported by the respository?
566 my $mdp = param('metadataPrefix') || '';
567 if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) {
568 push @errs, new HTTP::OAI::Error(
569 code => 'cannotDisseminateFormat',
570 message => "Dissemination as '$mdp' is not supported",
574 my $response;
575 if ( @errs ) {
576 $response = HTTP::OAI::Response->new(
577 requestURL => self_url(),
578 errors => \@errs,
581 else {
582 my %attr = CGI::Vars();
583 my $verb = delete( $attr{verb} );
584 if ( $verb eq 'ListSets' ) {
585 $response = C4::OAI::ListSets->new($self, %attr);
587 elsif ( $verb eq 'Identify' ) {
588 $response = C4::OAI::Identify->new( $self );
590 elsif ( $verb eq 'ListMetadataFormats' ) {
591 $response = C4::OAI::ListMetadataFormats->new( $self );
593 elsif ( $verb eq 'GetRecord' ) {
594 $response = C4::OAI::GetRecord->new( $self, %attr );
596 elsif ( $verb eq 'ListRecords' ) {
597 $response = C4::OAI::ListRecords->new( $self, %attr );
599 elsif ( $verb eq 'ListIdentifiers' ) {
600 $response = C4::OAI::ListIdentifiers->new( $self, %attr );
604 $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) );
605 $response->generate;
607 bless $self, $class;
608 return $self;
612 sub stylesheet {
613 my ( $self, $format ) = @_;
615 my $stylesheet = $self->{ koha_stylesheet }->{ $format };
616 unless ( $stylesheet ) {
617 my $xsl_file = $self->{ conf }
618 ? $self->{ conf }->{ format }->{ $format }->{ xsl_file }
619 : ( C4::Context->config('intrahtdocs') .
620 '/prog/en/xslt/' .
621 C4::Context->preference('marcflavour') .
622 'slim2OAIDC.xsl' );
623 my $parser = XML::LibXML->new();
624 my $xslt = XML::LibXSLT->new();
625 my $style_doc = $parser->parse_file( $xsl_file );
626 $stylesheet = $xslt->parse_stylesheet( $style_doc );
627 $self->{ koha_stylesheet }->{ $format } = $stylesheet;
630 return $stylesheet;
635 =head1 NAME
637 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
639 =head1 SYNOPSIS
641 use C4::OAI::Repository;
643 my $repository = C4::OAI::Repository->new();
645 =head1 DESCRIPTION
647 This object extend HTTP::OAI::Repository object.
648 It accepts OAI-PMH HTTP requests and returns result.
650 This OAI-PMH server can operate in a simple mode and extended one.
652 In simple mode, repository configuration comes entirely from Koha system
653 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
654 records in marcxml or dublin core format. Dublin core records are created from
655 koha marcxml records tranformed with XSLT. Used XSL file is located in
656 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
657 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
658 UNIMARC.
660 In extende mode, it's possible to parameter other format than marcxml or Dublin
661 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
662 list available metadata formats and XSL file used to create them from marcxml
663 records. If this syspref isn't set, Koha OAI server works in simple mode. A
664 configuration file koha-oai.conf can look like that:
667 format:
669 metadataPrefix: vs
670 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
671 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
672 xsl_file: /usr/local/koha/xslt/vs.xsl
673 marcxml:
674 metadataPrefix: marxml
675 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
676 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
677 oai_dc:
678 metadataPrefix: oai_dc
679 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
680 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
681 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl
683 =cut