Bug 11944: (follow-up) Convert DB tables to utf8_unicode_ci
[koha.git] / opac / oai.pl
blobd6ef2e9539101411af23499c85b5552255cfb329
1 #!/usr/bin/perl
3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it under the
8 # terms of the GNU General Public License as published by the Free Software
9 # Foundation; either version 2 of the License, or (at your option) any later
10 # version.
12 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
14 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License along
17 # with Koha; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 use strict;
22 use warnings;
24 use CGI qw( :standard -oldstyle_urls -utf8 );
25 use vars qw( $GZIP );
26 use C4::Context;
29 BEGIN {
30 eval { require PerlIO::gzip };
31 $GZIP = ($@) ? 0 : 1;
34 unless ( C4::Context->preference('OAI-PMH') ) {
35 print
36 header(
37 -type => 'text/plain; charset=utf-8',
38 -charset => 'utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
42 exit;
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
47 print header(
48 -type => 'text/xml; charset=utf-8',
49 -charset => 'utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
54 else {
55 print header(
56 -type => 'text/xml; charset=utf-8',
57 -charset => 'utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
64 # __END__ Main Prog
68 # Extends HTTP::OAI::ResumptionToken
69 # A token is identified by:
70 # - metadataPrefix
71 # - from
72 # - until
73 # - offset
75 package C4::OAI::ResumptionToken;
77 use strict;
78 use warnings;
79 use HTTP::OAI;
81 use base ("HTTP::OAI::ResumptionToken");
84 sub new {
85 my ($class, %args) = @_;
87 my $self = $class->SUPER::new(%args);
89 my ($metadata_prefix, $offset, $from, $until, $set);
90 if ( $args{ resumptionToken } ) {
91 ($metadata_prefix, $offset, $from, $until, $set)
92 = split( '/', $args{resumptionToken} );
94 else {
95 $metadata_prefix = $args{ metadataPrefix };
96 $from = $args{ from } || '1970-01-01';
97 $until = $args{ until };
98 unless ( $until) {
99 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
100 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
102 $offset = $args{ offset } || 0;
103 $set = $args{set};
106 $self->{ metadata_prefix } = $metadata_prefix;
107 $self->{ offset } = $offset;
108 $self->{ from } = $from;
109 $self->{ until } = $until;
110 $self->{ set } = $set;
112 $self->resumptionToken(
113 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
114 $self->cursor( $offset );
116 return $self;
119 # __END__ C4::OAI::ResumptionToken
123 package C4::OAI::Identify;
125 use strict;
126 use warnings;
127 use HTTP::OAI;
128 use C4::Context;
130 use base ("HTTP::OAI::Identify");
132 sub new {
133 my ($class, $repository) = @_;
135 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
136 my $self = $class->SUPER::new(
137 baseURL => $baseURL,
138 repositoryName => C4::Context->preference("LibraryName"),
139 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
140 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
141 granularity => 'YYYY-MM-DD',
142 earliestDatestamp => '0001-01-01',
143 deletedRecord => 'no',
146 # FIXME - alas, the description element is not so simple; to validate
147 # against the OAI-PMH schema, it cannot contain just a string,
148 # but one or more elements that validate against another XML schema.
149 # For now, simply omitting it.
150 # $self->description( "Koha OAI Repository" );
152 $self->compression( 'gzip' );
154 return $self;
157 # __END__ C4::OAI::Identify
161 package C4::OAI::ListMetadataFormats;
163 use strict;
164 use warnings;
165 use HTTP::OAI;
167 use base ("HTTP::OAI::ListMetadataFormats");
169 sub new {
170 my ($class, $repository) = @_;
172 my $self = $class->SUPER::new();
174 if ( $repository->{ conf } ) {
175 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
176 my $format = $repository->{ conf }->{ format }->{ $name };
177 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
178 metadataPrefix => $format->{metadataPrefix},
179 schema => $format->{schema},
180 metadataNamespace => $format->{metadataNamespace}, ) );
183 else {
184 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
185 metadataPrefix => 'oai_dc',
186 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
187 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
188 ) );
189 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
190 metadataPrefix => 'marcxml',
191 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
192 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
193 ) );
196 return $self;
199 # __END__ C4::OAI::ListMetadataFormats
203 package C4::OAI::Record;
205 use strict;
206 use warnings;
207 use HTTP::OAI;
208 use HTTP::OAI::Metadata::OAI_DC;
210 use base ("HTTP::OAI::Record");
212 sub new {
213 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
215 my $self = $class->SUPER::new(%args);
217 $timestamp =~ s/ /T/, $timestamp .= 'Z';
218 $self->header( new HTTP::OAI::Header(
219 identifier => $args{identifier},
220 datestamp => $timestamp,
221 ) );
223 foreach my $setSpec (@$setSpecs) {
224 $self->header->setSpec($setSpec);
227 my $parser = XML::LibXML->new();
228 my $record_dom = $parser->parse_string( $marcxml );
229 my $format = $args{metadataPrefix};
230 if ( $format ne 'marcxml' ) {
231 my %args = (
232 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
234 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
236 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
238 return $self;
241 # __END__ C4::OAI::Record
245 package C4::OAI::GetRecord;
247 use strict;
248 use warnings;
249 use HTTP::OAI;
250 use C4::OAI::Sets;
252 use base ("HTTP::OAI::GetRecord");
255 sub new {
256 my ($class, $repository, %args) = @_;
258 my $self = HTTP::OAI::GetRecord->new(%args);
260 my $dbh = C4::Context->dbh;
261 my $sth = $dbh->prepare("
262 SELECT marcxml, timestamp
263 FROM biblioitems
264 WHERE biblionumber=? " );
265 my $prefix = $repository->{koha_identifier} . ':';
266 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
267 $sth->execute( $biblionumber );
268 my ($marcxml, $timestamp);
269 unless ( ($marcxml, $timestamp) = $sth->fetchrow ) {
270 return HTTP::OAI::Response->new(
271 requestURL => $repository->self_url(),
272 errors => [ new HTTP::OAI::Error(
273 code => 'idDoesNotExist',
274 message => "There is no biblio record with this identifier",
275 ) ] ,
279 my $oai_sets = GetOAISetsBiblio($biblionumber);
280 my @setSpecs;
281 foreach (@$oai_sets) {
282 push @setSpecs, $_->{spec};
285 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
286 $self->record( C4::OAI::Record->new(
287 $repository, $marcxml, $timestamp, \@setSpecs, %args ) );
289 return $self;
292 # __END__ C4::OAI::GetRecord
296 package C4::OAI::ListIdentifiers;
298 use strict;
299 use warnings;
300 use HTTP::OAI;
301 use C4::OAI::Sets;
303 use base ("HTTP::OAI::ListIdentifiers");
306 sub new {
307 my ($class, $repository, %args) = @_;
309 my $self = HTTP::OAI::ListIdentifiers->new(%args);
311 my $token = new C4::OAI::ResumptionToken( %args );
312 my $dbh = C4::Context->dbh;
313 my $set;
314 if(defined $token->{'set'}) {
315 $set = GetOAISetBySpec($token->{'set'});
317 my $sql = "
318 SELECT biblioitems.biblionumber, biblioitems.timestamp
319 FROM biblioitems
321 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
322 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
323 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
324 $sql .= "
325 LIMIT $repository->{'koha_max_count'}
326 OFFSET $token->{'offset'}
328 my $sth = $dbh->prepare( $sql );
329 my @bind_params = ($token->{'from'}, $token->{'until'});
330 push @bind_params, $set->{'id'} if defined $set;
331 $sth->execute( @bind_params );
333 my $pos = $token->{offset};
334 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
335 $timestamp =~ s/ /T/, $timestamp .= 'Z';
336 $self->identifier( new HTTP::OAI::Header(
337 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
338 datestamp => $timestamp,
339 ) );
340 $pos++;
342 $self->resumptionToken(
343 new C4::OAI::ResumptionToken(
344 metadataPrefix => $token->{metadata_prefix},
345 from => $token->{from},
346 until => $token->{until},
347 offset => $pos,
348 set => $token->{set}
350 ) if ($pos > $token->{offset});
352 return $self;
355 # __END__ C4::OAI::ListIdentifiers
357 package C4::OAI::Description;
359 use strict;
360 use warnings;
361 use HTTP::OAI;
362 use HTTP::OAI::SAXHandler qw/ :SAX /;
364 sub new {
365 my ( $class, %args ) = @_;
367 my $self = {};
369 if(my $setDescription = $args{setDescription}) {
370 $self->{setDescription} = $setDescription;
372 if(my $handler = $args{handler}) {
373 $self->{handler} = $handler;
376 bless $self, $class;
377 return $self;
380 sub set_handler {
381 my ( $self, $handler ) = @_;
383 $self->{handler} = $handler if $handler;
385 return $self;
388 sub generate {
389 my ( $self ) = @_;
391 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
393 return $self;
396 # __END__ C4::OAI::Description
398 package C4::OAI::ListSets;
400 use strict;
401 use warnings;
402 use HTTP::OAI;
403 use C4::OAI::Sets;
405 use base ("HTTP::OAI::ListSets");
407 sub new {
408 my ( $class, $repository, %args ) = @_;
410 my $self = HTTP::OAI::ListSets->new(%args);
412 my $token = C4::OAI::ResumptionToken->new(%args);
413 my $sets = GetOAISets;
414 my $pos = 0;
415 foreach my $set (@$sets) {
416 if ($pos < $token->{offset}) {
417 $pos++;
418 next;
420 my @descriptions;
421 foreach my $desc (@{$set->{'descriptions'}}) {
422 push @descriptions, C4::OAI::Description->new(
423 setDescription => $desc,
426 $self->set(
427 HTTP::OAI::Set->new(
428 setSpec => $set->{'spec'},
429 setName => $set->{'name'},
430 setDescription => \@descriptions,
433 $pos++;
434 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
437 $self->resumptionToken(
438 new C4::OAI::ResumptionToken(
439 metadataPrefix => $token->{metadata_prefix},
440 offset => $pos
442 ) if ( $pos > $token->{offset} );
444 return $self;
447 # __END__ C4::OAI::ListSets;
449 package C4::OAI::ListRecords;
451 use strict;
452 use warnings;
453 use HTTP::OAI;
454 use C4::OAI::Sets;
456 use base ("HTTP::OAI::ListRecords");
459 sub new {
460 my ($class, $repository, %args) = @_;
462 my $self = HTTP::OAI::ListRecords->new(%args);
464 my $token = new C4::OAI::ResumptionToken( %args );
465 my $dbh = C4::Context->dbh;
466 my $set;
467 if(defined $token->{'set'}) {
468 $set = GetOAISetBySpec($token->{'set'});
470 my $sql = "
471 SELECT biblioitems.biblionumber, biblioitems.marcxml, biblioitems.timestamp
472 FROM biblioitems
474 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
475 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
476 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
477 $sql .= "
478 LIMIT $repository->{'koha_max_count'}
479 OFFSET $token->{'offset'}
482 my $sth = $dbh->prepare( $sql );
483 my @bind_params = ($token->{'from'}, $token->{'until'});
484 push @bind_params, $set->{'id'} if defined $set;
485 $sth->execute( @bind_params );
487 my $pos = $token->{offset};
488 while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) {
489 my $oai_sets = GetOAISetsBiblio($biblionumber);
490 my @setSpecs;
491 foreach (@$oai_sets) {
492 push @setSpecs, $_->{spec};
494 $self->record( C4::OAI::Record->new(
495 $repository, $marcxml, $timestamp, \@setSpecs,
496 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
497 metadataPrefix => $token->{metadata_prefix}
498 ) );
499 $pos++;
501 $self->resumptionToken(
502 new C4::OAI::ResumptionToken(
503 metadataPrefix => $token->{metadata_prefix},
504 from => $token->{from},
505 until => $token->{until},
506 offset => $pos,
507 set => $token->{set}
509 ) if ($pos > $token->{offset});
511 return $self;
514 # __END__ C4::OAI::ListRecords
518 package C4::OAI::Repository;
520 use base ("HTTP::OAI::Repository");
522 use strict;
523 use warnings;
525 use HTTP::OAI;
526 use HTTP::OAI::Repository qw/:validate/;
528 use XML::SAX::Writer;
529 use XML::LibXML;
530 use XML::LibXSLT;
531 use YAML::Syck qw( LoadFile );
532 use CGI qw/:standard -oldstyle_urls/;
534 use C4::Context;
535 use C4::Biblio;
538 sub new {
539 my ($class, %args) = @_;
540 my $self = $class->SUPER::new(%args);
542 $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID");
543 $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount");
544 $self->{ koha_metadata_format } = ['oai_dc', 'marcxml'];
545 $self->{ koha_stylesheet } = { }; # Build when needed
547 # Load configuration file if defined in OAI-PMH:ConfFile syspref
548 if ( my $file = C4::Context->preference("OAI-PMH:ConfFile") ) {
549 $self->{ conf } = LoadFile( $file );
550 my @formats = keys %{ $self->{conf}->{format} };
551 $self->{ koha_metadata_format } = \@formats;
554 # Check for grammatical errors in the request
555 my @errs = validate_request( CGI::Vars() );
557 # Is metadataPrefix supported by the respository?
558 my $mdp = param('metadataPrefix') || '';
559 if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) {
560 push @errs, new HTTP::OAI::Error(
561 code => 'cannotDisseminateFormat',
562 message => "Dissemination as '$mdp' is not supported",
566 my $response;
567 if ( @errs ) {
568 $response = HTTP::OAI::Response->new(
569 requestURL => self_url(),
570 errors => \@errs,
573 else {
574 my %attr = CGI::Vars();
575 my $verb = delete( $attr{verb} );
576 if ( $verb eq 'ListSets' ) {
577 $response = C4::OAI::ListSets->new($self, %attr);
579 elsif ( $verb eq 'Identify' ) {
580 $response = C4::OAI::Identify->new( $self );
582 elsif ( $verb eq 'ListMetadataFormats' ) {
583 $response = C4::OAI::ListMetadataFormats->new( $self );
585 elsif ( $verb eq 'GetRecord' ) {
586 $response = C4::OAI::GetRecord->new( $self, %attr );
588 elsif ( $verb eq 'ListRecords' ) {
589 $response = C4::OAI::ListRecords->new( $self, %attr );
591 elsif ( $verb eq 'ListIdentifiers' ) {
592 $response = C4::OAI::ListIdentifiers->new( $self, %attr );
596 $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) );
597 $response->generate;
599 bless $self, $class;
600 return $self;
604 sub stylesheet {
605 my ( $self, $format ) = @_;
607 my $stylesheet = $self->{ koha_stylesheet }->{ $format };
608 unless ( $stylesheet ) {
609 my $xsl_file = $self->{ conf }
610 ? $self->{ conf }->{ format }->{ $format }->{ xsl_file }
611 : ( C4::Context->config('intrahtdocs') .
612 '/prog/en/xslt/' .
613 C4::Context->preference('marcflavour') .
614 'slim2OAIDC.xsl' );
615 my $parser = XML::LibXML->new();
616 my $xslt = XML::LibXSLT->new();
617 my $style_doc = $parser->parse_file( $xsl_file );
618 $stylesheet = $xslt->parse_stylesheet( $style_doc );
619 $self->{ koha_stylesheet }->{ $format } = $stylesheet;
622 return $stylesheet;
627 =head1 NAME
629 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
631 =head1 SYNOPSIS
633 use C4::OAI::Repository;
635 my $repository = C4::OAI::Repository->new();
637 =head1 DESCRIPTION
639 This object extend HTTP::OAI::Repository object.
640 It accepts OAI-PMH HTTP requests and returns result.
642 This OAI-PMH server can operate in a simple mode and extended one.
644 In simple mode, repository configuration comes entirely from Koha system
645 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
646 records in marcxml or dublin core format. Dublin core records are created from
647 koha marcxml records tranformed with XSLT. Used XSL file is located in
648 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
649 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
650 UNIMARC.
652 In extende mode, it's possible to parameter other format than marcxml or Dublin
653 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
654 list available metadata formats and XSL file used to create them from marcxml
655 records. If this syspref isn't set, Koha OAI server works in simple mode. A
656 configuration file koha-oai.conf can look like that:
659 format:
661 metadataPrefix: vs
662 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
663 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
664 xsl_file: /usr/local/koha/xslt/vs.xsl
665 marcxml:
666 metadataPrefix: marxml
667 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
668 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
669 oai_dc:
670 metadataPrefix: oai_dc
671 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
672 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
673 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl
675 =cut