3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 use CGI
qw( :standard -oldstyle_urls -utf8 );
30 eval { require PerlIO::gzip };
34 unless ( C4::Context->preference('OAI-PMH') ) {
37 -type => 'text/plain; charset=utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
48 -type => 'text/xml; charset=utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
56 -type => 'text/xml; charset=utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
71 # Extends HTTP::OAI::ResumptionToken
72 # A token is identified by:
78 package C4::OAI::ResumptionToken;
84 use base ("HTTP::OAI::ResumptionToken");
88 my ($class, %args) = @_;
90 my $self = $class->SUPER::new(%args);
92 my ($metadata_prefix, $offset, $from, $until, $set);
93 if ( $args{ resumptionToken } ) {
94 ($metadata_prefix, $offset, $from, $until, $set)
95 = split( '/', $args{resumptionToken} );
98 $metadata_prefix = $args{ metadataPrefix };
99 $from = $args{ from } || '1970-01-01';
100 $until = $args{ until };
102 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
103 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
105 #Add times to the arguments, when necessary, so they correctly match against the DB timestamps
106 $from .= 'T00:00:00Z' if length($from) == 10;
107 $until .= 'T23:59:59Z' if length($until) == 10;
108 $offset = $args{ offset } || 0;
109 $set = $args{set} || '';
112 $self->{ metadata_prefix } = $metadata_prefix;
113 $self->{ offset } = $offset;
114 $self->{ from } = $from;
115 $self->{ until } = $until;
116 $self->{ set } = $set;
117 $self->{ from_arg } = _strip_UTC_designators($from);
118 $self->{ until_arg } = _strip_UTC_designators($until);
120 $self->resumptionToken(
121 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
122 $self->cursor( $offset );
127 sub _strip_UTC_designators {
128 my ( $timestamp ) = @_;
129 $timestamp =~ s/T/ /g;
130 $timestamp =~ s/Z//g;
134 # __END__ C4::OAI::ResumptionToken
138 package C4::OAI::Identify;
145 use base ("HTTP::OAI::Identify");
148 my ($class, $repository) = @_;
150 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
151 my $self = $class->SUPER::new(
153 repositoryName => C4::Context->preference("LibraryName"),
154 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
155 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
156 granularity => 'YYYY-MM-DD',
157 earliestDatestamp => '0001-01-01',
158 deletedRecord => C4::Context->preference("OAI-PMH:DeletedRecord") || 'no',
161 # FIXME - alas, the description element is not so simple; to validate
162 # against the OAI-PMH schema, it cannot contain just a string,
163 # but one or more elements that validate against another XML schema.
164 # For now, simply omitting it.
165 # $self->description( "Koha OAI Repository" );
167 $self->compression( 'gzip' );
172 # __END__ C4::OAI::Identify
176 package C4::OAI::ListMetadataFormats;
182 use base ("HTTP::OAI::ListMetadataFormats");
185 my ($class, $repository) = @_;
187 my $self = $class->SUPER::new();
189 if ( $repository->{ conf } ) {
190 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
191 my $format = $repository->{ conf }->{ format }->{ $name };
192 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
193 metadataPrefix => $format->{metadataPrefix},
194 schema => $format->{schema},
195 metadataNamespace => $format->{metadataNamespace}, ) );
199 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
200 metadataPrefix => 'oai_dc',
201 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
202 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
204 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
205 metadataPrefix => 'marcxml',
206 schema => 'http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
207 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim'
214 # __END__ C4::OAI::ListMetadataFormats
218 package C4::OAI::Record;
223 use HTTP::OAI::Metadata::OAI_DC;
225 use base ("HTTP::OAI::Record");
228 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
230 my $self = $class->SUPER::new(%args);
232 $timestamp =~ s/ /T/, $timestamp .= 'Z';
233 $self->header( new HTTP::OAI::Header(
234 identifier => $args{identifier},
235 datestamp => $timestamp,
238 foreach my $setSpec (@$setSpecs) {
239 $self->header->setSpec($setSpec);
242 my $parser = XML::LibXML->new();
243 my $record_dom = $parser->parse_string( $marcxml );
244 my $format = $args{metadataPrefix};
245 if ( $format ne 'marcxml' ) {
247 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
249 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
251 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
256 # __END__ C4::OAI::Record
258 package C4::OAI::DeletedRecord;
262 use HTTP::OAI::Metadata::OAI_DC;
264 use base ("HTTP::OAI::Record");
267 my ($class, $timestamp, $setSpecs, %args) = @_;
269 my $self = $class->SUPER::new(%args);
271 $timestamp =~ s/ /T/, $timestamp .= 'Z';
272 $self->header( new HTTP::OAI::Header(
274 identifier => $args{identifier},
275 datestamp => $timestamp,
278 foreach my $setSpec (@$setSpecs) {
279 $self->header->setSpec($setSpec);
285 # __END__ C4::OAI::DeletedRecord
289 package C4::OAI::GetRecord;
298 use base ("HTTP::OAI::GetRecord");
302 my ($class, $repository, %args) = @_;
304 my $self = HTTP::OAI::GetRecord->new(%args);
306 my $dbh = C4::Context->dbh;
307 my $sth = $dbh->prepare("
310 WHERE biblionumber=? " );
311 my $prefix = $repository->{koha_identifier} . ':';
312 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
313 $sth->execute( $biblionumber );
314 my ($timestamp, $deleted);
315 unless ( ($timestamp) = $sth->fetchrow ) {
316 unless ( ($timestamp) = $dbh->selectrow_array(q/
319 WHERE biblionumber=? /, undef, $biblionumber ))
321 return HTTP::OAI::Response->new(
322 requestURL => $repository->self_url(),
323 errors => [ new HTTP::OAI::Error(
324 code => 'idDoesNotExist',
325 message => "There is no biblio record with this identifier",
334 # We fetch it using this method, rather than the database directly,
335 # so it'll include the item data
337 $marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix})
339 my $oai_sets = GetOAISetsBiblio($biblionumber);
341 foreach (@$oai_sets) {
342 push @setSpecs, $_->{spec};
345 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
348 ? C4::OAI::DeletedRecord->new($timestamp, \@setSpecs, %args)
349 : C4::OAI::Record->new($repository, $marcxml, $timestamp, \@setSpecs, %args)
354 # __END__ C4::OAI::GetRecord
358 package C4::OAI::ListIdentifiers;
365 use base ("HTTP::OAI::ListIdentifiers");
369 my ($class, $repository, %args) = @_;
371 my $self = HTTP::OAI::ListIdentifiers->new(%args);
373 my $token = new C4::OAI::ResumptionToken( %args );
374 my $dbh = C4::Context->dbh;
376 if(defined $token->{'set'}) {
377 $set = GetOAISetBySpec($token->{'set'});
379 my $max = $repository->{koha_max_count};
381 (SELECT biblioitems.biblionumber, biblioitems.timestamp
384 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
385 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
386 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
388 (SELECT deletedbiblio.biblionumber, timestamp FROM deletedbiblio";
389 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
390 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
391 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
393 $sql .= ") ORDER BY biblionumber
394 LIMIT " . ($max+1) . "
395 OFFSET $token->{offset}
397 my $sth = $dbh->prepare( $sql );
398 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
399 push @bind_params, $set->{'id'} if defined $set;
400 push @bind_params, ($token->{'from'}, $token->{'until'});
401 push @bind_params, $set->{'id'} if defined $set;
402 $sth->execute( @bind_params );
405 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
407 if ( $count > $max ) {
408 $self->resumptionToken(
409 new C4::OAI::ResumptionToken(
410 metadataPrefix => $token->{metadata_prefix},
411 from => $token->{from},
412 until => $token->{until},
413 offset => $token->{offset} + $max,
419 $timestamp =~ s/ /T/, $timestamp .= 'Z';
420 $self->identifier( new HTTP::OAI::Header(
421 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
422 datestamp => $timestamp,
426 # Return error if no results
428 return HTTP::OAI::Response->new(
429 requestURL => $repository->self_url(),
430 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
437 # __END__ C4::OAI::ListIdentifiers
439 package C4::OAI::Description;
444 use HTTP::OAI::SAXHandler qw/ :SAX /;
447 my ( $class, %args ) = @_;
451 if(my $setDescription = $args{setDescription}) {
452 $self->{setDescription} = $setDescription;
454 if(my $handler = $args{handler}) {
455 $self->{handler} = $handler;
463 my ( $self, $handler ) = @_;
465 $self->{handler} = $handler if $handler;
473 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
478 # __END__ C4::OAI::Description
480 package C4::OAI::ListSets;
487 use base ("HTTP::OAI::ListSets");
490 my ( $class, $repository, %args ) = @_;
492 my $self = HTTP::OAI::ListSets->new(%args);
494 my $token = C4::OAI::ResumptionToken->new(%args);
495 my $sets = GetOAISets;
497 foreach my $set (@$sets) {
498 if ($pos < $token->{offset}) {
503 foreach my $desc (@{$set->{'descriptions'}}) {
504 push @descriptions, C4::OAI::Description->new(
505 setDescription => $desc,
510 setSpec => $set->{'spec'},
511 setName => $set->{'name'},
512 setDescription => \@descriptions,
516 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
519 $self->resumptionToken(
520 new C4::OAI::ResumptionToken(
521 metadataPrefix => $token->{metadata_prefix},
524 ) if ( $pos > $token->{offset} );
529 # __END__ C4::OAI::ListSets;
531 package C4::OAI::ListRecords;
540 use base ("HTTP::OAI::ListRecords");
544 my ($class, $repository, %args) = @_;
546 my $self = HTTP::OAI::ListRecords->new(%args);
548 my $token = new C4::OAI::ResumptionToken( %args );
549 my $dbh = C4::Context->dbh;
551 if(defined $token->{'set'}) {
552 $set = GetOAISetBySpec($token->{'set'});
554 my $max = $repository->{koha_max_count};
556 (SELECT biblioitems.biblionumber, biblioitems.timestamp, marcxml
559 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
560 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
561 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
563 (SELECT deletedbiblio.biblionumber, null as marcxml, timestamp FROM deletedbiblio";
564 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
565 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
566 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
568 $sql .= ") ORDER BY biblionumber
569 LIMIT " . ($max + 1) . "
570 OFFSET $token->{offset}
572 my $sth = $dbh->prepare( $sql );
573 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
574 push @bind_params, $set->{'id'} if defined $set;
575 push @bind_params, ($token->{'from'}, $token->{'until'});
576 push @bind_params, $set->{'id'} if defined $set;
577 $sth->execute( @bind_params );
580 my $format = $args{metadataPrefix} || $token->{metadata_prefix};
581 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
583 if ( $count > $max ) {
584 $self->resumptionToken(
585 new C4::OAI::ResumptionToken(
586 metadataPrefix => $token->{metadata_prefix},
587 from => $token->{from},
588 until => $token->{until},
589 offset => $token->{offset} + $max,
595 my $marcxml = $repository->get_biblio_marcxml($biblionumber, $format);
596 my $oai_sets = GetOAISetsBiblio($biblionumber);
598 foreach (@$oai_sets) {
599 push @setSpecs, $_->{spec};
602 $self->record( C4::OAI::Record->new(
603 $repository, $marcxml, $timestamp, \@setSpecs,
604 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
605 metadataPrefix => $token->{metadata_prefix}
608 $self->record( C4::OAI::DeletedRecord->new(
609 $timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber ) );
613 # Return error if no results
615 return HTTP::OAI::Response->new(
616 requestURL => $repository->self_url(),
617 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
624 # __END__ C4::OAI::ListRecords
628 package C4::OAI::Repository;
630 use base ("HTTP::OAI::Repository");
636 use HTTP::OAI::Repository qw/:validate/;
638 use XML::SAX::Writer;
641 use YAML::Syck qw( LoadFile );
642 use CGI qw
/:standard -oldstyle_urls/;
649 my ($class, %args) = @_;
650 my $self = $class->SUPER::new
(%args);
652 $self->{ koha_identifier
} = C4
::Context
->preference("OAI-PMH:archiveID");
653 $self->{ koha_max_count
} = C4
::Context
->preference("OAI-PMH:MaxCount");
654 $self->{ koha_metadata_format
} = ['oai_dc', 'marcxml'];
655 $self->{ koha_stylesheet
} = { }; # Build when needed
657 # Load configuration file if defined in OAI-PMH:ConfFile syspref
658 if ( my $file = C4
::Context
->preference("OAI-PMH:ConfFile") ) {
659 $self->{ conf
} = LoadFile
( $file );
660 my @formats = keys %{ $self->{conf
}->{format
} };
661 $self->{ koha_metadata_format
} = \
@formats;
664 # Check for grammatical errors in the request
665 my @errs = validate_request
( CGI
::Vars
() );
667 # Is metadataPrefix supported by the respository?
668 my $mdp = param
('metadataPrefix') || '';
669 if ( $mdp && !grep { $_ eq $mdp } @
{$self->{ koha_metadata_format
}} ) {
670 push @errs, new HTTP
::OAI
::Error
(
671 code
=> 'cannotDisseminateFormat',
672 message
=> "Dissemination as '$mdp' is not supported",
678 $response = HTTP
::OAI
::Response
->new(
679 requestURL
=> self_url
(),
684 my %attr = CGI
::Vars
();
685 my $verb = delete( $attr{verb
} );
686 if ( $verb eq 'ListSets' ) {
687 $response = C4
::OAI
::ListSets
->new($self, %attr);
689 elsif ( $verb eq 'Identify' ) {
690 $response = C4
::OAI
::Identify
->new( $self );
692 elsif ( $verb eq 'ListMetadataFormats' ) {
693 $response = C4
::OAI
::ListMetadataFormats
->new( $self );
695 elsif ( $verb eq 'GetRecord' ) {
696 $response = C4
::OAI
::GetRecord
->new( $self, %attr );
698 elsif ( $verb eq 'ListRecords' ) {
699 $response = C4
::OAI
::ListRecords
->new( $self, %attr );
701 elsif ( $verb eq 'ListIdentifiers' ) {
702 $response = C4
::OAI
::ListIdentifiers
->new( $self, %attr );
706 $response->set_handler( XML
::SAX
::Writer
->new( Output
=> *STDOUT
) );
714 sub get_biblio_marcxml
{
715 my ($self, $biblionumber, $format) = @_;
717 if ( my $conf = $self->{conf
} ) {
718 $with_items = $conf->{format
}->{$format}->{include_items
};
720 my $record = GetMarcBiblio
($biblionumber, $with_items, 1);
721 $record ?
$record->as_xml() : undef;
726 my ( $self, $format ) = @_;
728 my $stylesheet = $self->{ koha_stylesheet
}->{ $format };
729 unless ( $stylesheet ) {
730 my $xsl_file = $self->{ conf
}
731 ?
$self->{ conf
}->{ format
}->{ $format }->{ xsl_file
}
732 : ( C4
::Context
->config('intrahtdocs') .
734 C4
::Context
->preference('marcflavour') .
736 my $parser = XML
::LibXML
->new();
737 my $xslt = XML
::LibXSLT
->new();
738 my $style_doc = $parser->parse_file( $xsl_file );
739 $stylesheet = $xslt->parse_stylesheet( $style_doc );
740 $self->{ koha_stylesheet
}->{ $format } = $stylesheet;
750 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
754 use C4::OAI::Repository;
756 my $repository = C4::OAI::Repository->new();
760 This object extend HTTP::OAI::Repository object.
761 It accepts OAI-PMH HTTP requests and returns result.
763 This OAI-PMH server can operate in a simple mode and extended one.
765 In simple mode, repository configuration comes entirely from Koha system
766 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
767 records in marcxml or dublin core format. Dublin core records are created from
768 koha marcxml records tranformed with XSLT. Used XSL file is located in
769 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
770 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
773 In extende mode, it's possible to parameter other format than marcxml or Dublin
774 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
775 list available metadata formats and XSL file used to create them from marcxml
776 records. If this syspref isn't set, Koha OAI server works in simple mode. A
777 configuration file koha-oai.conf can look like that:
783 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
784 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
785 xsl_file: /usr/local/koha/xslt/vs.xsl
787 metadataPrefix: marxml
788 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
789 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
791 metadataPrefix: oai_dc
792 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
793 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
794 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl