3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 use CGI
qw( :standard -oldstyle_urls -utf8 );
30 eval { require PerlIO::gzip };
34 unless ( C4::Context->preference('OAI-PMH') ) {
37 -type => 'text/plain; charset=utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
48 -type => 'text/xml; charset=utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
56 -type => 'text/xml; charset=utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
71 # Extends HTTP::OAI::ResumptionToken
72 # A token is identified by:
78 package C4::OAI::ResumptionToken;
84 use base ("HTTP::OAI::ResumptionToken");
88 my ($class, %args) = @_;
90 my $self = $class->SUPER::new(%args);
92 my ($metadata_prefix, $offset, $from, $until, $set);
93 if ( $args{ resumptionToken } ) {
94 ($metadata_prefix, $offset, $from, $until, $set)
95 = split( '/', $args{resumptionToken} );
98 $metadata_prefix = $args{ metadataPrefix };
99 $from = $args{ from } || '1970-01-01';
100 $until = $args{ until };
102 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
103 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
105 #Add times to the arguments, when necessary, so they correctly match against the DB timestamps
106 $from .= 'T00:00:00Z' if length($from) == 10;
107 $until .= 'T23:59:59Z' if length($until) == 10;
108 $offset = $args{ offset } || 0;
109 $set = $args{set} || '';
112 $self->{ metadata_prefix } = $metadata_prefix;
113 $self->{ offset } = $offset;
114 $self->{ from } = $from;
115 $self->{ until } = $until;
116 $self->{ set } = $set;
117 $self->{ from_arg } = _strip_UTC_designators($from);
118 $self->{ until_arg } = _strip_UTC_designators($until);
120 $self->resumptionToken(
121 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
122 $self->cursor( $offset );
127 sub _strip_UTC_designators {
128 my ( $timestamp ) = @_;
129 $timestamp =~ s/T/ /g;
130 $timestamp =~ s/Z//g;
134 # __END__ C4::OAI::ResumptionToken
138 package C4::OAI::Identify;
145 use base ("HTTP::OAI::Identify");
148 my ($class, $repository) = @_;
150 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
151 my $self = $class->SUPER::new(
153 repositoryName => C4::Context->preference("LibraryName"),
154 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
155 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
156 granularity => 'YYYY-MM-DD',
157 earliestDatestamp => '0001-01-01',
158 deletedRecord => C4::Context->preference("OAI-PMH:DeletedRecord") || 'no',
161 # FIXME - alas, the description element is not so simple; to validate
162 # against the OAI-PMH schema, it cannot contain just a string,
163 # but one or more elements that validate against another XML schema.
164 # For now, simply omitting it.
165 # $self->description( "Koha OAI Repository" );
167 $self->compression( 'gzip' );
172 # __END__ C4::OAI::Identify
176 package C4::OAI::ListMetadataFormats;
182 use base ("HTTP::OAI::ListMetadataFormats");
185 my ($class, $repository) = @_;
187 my $self = $class->SUPER::new();
189 if ( $repository->{ conf } ) {
190 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
191 my $format = $repository->{ conf }->{ format }->{ $name };
192 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
193 metadataPrefix => $format->{metadataPrefix},
194 schema => $format->{schema},
195 metadataNamespace => $format->{metadataNamespace}, ) );
199 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
200 metadataPrefix => 'oai_dc',
201 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
202 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
204 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
205 metadataPrefix => 'marcxml',
206 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
207 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
214 # __END__ C4::OAI::ListMetadataFormats
218 package C4::OAI::Record;
223 use HTTP::OAI::Metadata::OAI_DC;
225 use base ("HTTP::OAI::Record");
228 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
230 my $self = $class->SUPER::new(%args);
232 $timestamp =~ s/ /T/, $timestamp .= 'Z';
233 $self->header( new HTTP::OAI::Header(
234 identifier => $args{identifier},
235 datestamp => $timestamp,
238 foreach my $setSpec (@$setSpecs) {
239 $self->header->setSpec($setSpec);
242 my $parser = XML::LibXML->new();
243 my $record_dom = $parser->parse_string( $marcxml );
244 my $format = $args{metadataPrefix};
245 if ( $format ne 'marcxml' ) {
247 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
249 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
251 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
256 # __END__ C4::OAI::Record
258 package C4::OAI::DeletedRecord;
262 use HTTP::OAI::Metadata::OAI_DC;
264 use base ("HTTP::OAI::Record");
267 my ($class, $timestamp, $setSpecs, %args) = @_;
269 my $self = $class->SUPER::new(%args);
271 $timestamp =~ s/ /T/, $timestamp .= 'Z';
272 $self->header( new HTTP::OAI::Header(
274 identifier => $args{identifier},
275 datestamp => $timestamp,
278 foreach my $setSpec (@$setSpecs) {
279 $self->header->setSpec($setSpec);
285 # __END__ C4::OAI::DeletedRecord
289 package C4::OAI::GetRecord;
298 use base ("HTTP::OAI::GetRecord");
302 my ($class, $repository, %args) = @_;
304 my $self = HTTP::OAI::GetRecord->new(%args);
306 my $dbh = C4::Context->dbh;
307 my $sth = $dbh->prepare("
310 WHERE biblionumber=? " );
311 my $prefix = $repository->{koha_identifier} . ':';
312 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
313 $sth->execute( $biblionumber );
314 my ($timestamp, $deleted);
315 unless ( ($timestamp) = $sth->fetchrow ) {
316 unless ( ($timestamp) = $dbh->selectrow_array(q/
319 WHERE biblionumber=? /, undef, $biblionumber ))
321 return HTTP::OAI::Response->new(
322 requestURL => $repository->self_url(),
323 errors => [ new HTTP::OAI::Error(
324 code => 'idDoesNotExist',
325 message => "There is no biblio record with this identifier",
334 # We fetch it using this method, rather than the database directly,
335 # so it'll include the item data
337 $marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix})
339 my $oai_sets = GetOAISetsBiblio($biblionumber);
341 foreach (@$oai_sets) {
342 push @setSpecs, $_->{spec};
345 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
348 ? C4::OAI::DeletedRecord->new($timestamp, \@setSpecs, %args)
349 : C4::OAI::Record->new($repository, $marcxml, $timestamp, \@setSpecs, %args)
354 # __END__ C4::OAI::GetRecord
358 package C4::OAI::ListIdentifiers;
365 use base ("HTTP::OAI::ListIdentifiers");
369 my ($class, $repository, %args) = @_;
371 my $self = HTTP::OAI::ListIdentifiers->new(%args);
373 my $token = new C4::OAI::ResumptionToken( %args );
374 my $dbh = C4::Context->dbh;
376 if(defined $token->{'set'}) {
377 $set = GetOAISetBySpec($token->{'set'});
379 my $max = $repository->{koha_max_count};
381 (SELECT biblioitems.biblionumber, biblioitems.timestamp
384 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
385 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
386 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
388 (SELECT deletedbiblio.biblionumber, timestamp FROM deletedbiblio";
389 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
390 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
391 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
393 $sql .= ") ORDER BY biblionumber
394 LIMIT " . ($max+1) . "
395 OFFSET $token->{offset}
397 my $sth = $dbh->prepare( $sql );
398 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
399 push @bind_params, $set->{'id'} if defined $set;
400 push @bind_params, ($token->{'from'}, $token->{'until'});
401 push @bind_params, $set->{'id'} if defined $set;
402 $sth->execute( @bind_params );
405 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
407 if ( $count > $max ) {
408 $self->resumptionToken(
409 new C4::OAI::ResumptionToken(
410 metadataPrefix => $token->{metadata_prefix},
411 from => $token->{from},
412 until => $token->{until},
413 offset => $token->{offset} + $max,
419 $timestamp =~ s/ /T/, $timestamp .= 'Z';
420 $self->identifier( new HTTP::OAI::Header(
421 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
422 datestamp => $timestamp,
426 # Return error if no results
428 return HTTP::OAI::Response->new(
429 requestURL => $repository->self_url(),
430 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
437 # __END__ C4::OAI::ListIdentifiers
439 package C4::OAI::Description;
444 use HTTP::OAI::SAXHandler qw/ :SAX /;
447 my ( $class, %args ) = @_;
451 if(my $setDescription = $args{setDescription}) {
452 $self->{setDescription} = $setDescription;
454 if(my $handler = $args{handler}) {
455 $self->{handler} = $handler;
463 my ( $self, $handler ) = @_;
465 $self->{handler} = $handler if $handler;
473 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
478 # __END__ C4::OAI::Description
480 package C4::OAI::ListSets;
487 use base ("HTTP::OAI::ListSets");
490 my ( $class, $repository, %args ) = @_;
492 my $self = HTTP::OAI::ListSets->new(%args);
494 my $token = C4::OAI::ResumptionToken->new(%args);
495 my $sets = GetOAISets;
497 foreach my $set (@$sets) {
498 if ($pos < $token->{offset}) {
503 foreach my $desc (@{$set->{'descriptions'}}) {
504 push @descriptions, C4::OAI::Description->new(
505 setDescription => $desc,
510 setSpec => $set->{'spec'},
511 setName => $set->{'name'},
512 setDescription => \@descriptions,
516 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
519 $self->resumptionToken(
520 new C4::OAI::ResumptionToken(
521 metadataPrefix => $token->{metadata_prefix},
524 ) if ( $pos > $token->{offset} );
529 # __END__ C4::OAI::ListSets;
531 package C4::OAI::ListRecords;
540 use base ("HTTP::OAI::ListRecords");
544 my ($class, $repository, %args) = @_;
546 my $self = HTTP::OAI::ListRecords->new(%args);
548 my $token = new C4::OAI::ResumptionToken( %args );
549 my $dbh = C4::Context->dbh;
551 if(defined $token->{'set'}) {
552 $set = GetOAISetBySpec($token->{'set'});
554 my $max = $repository->{koha_max_count};
556 (SELECT biblioitems.biblionumber, biblioitems.timestamp, marcxml
559 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
560 $sql .= " WHERE timestamp >= ? AND timestamp <= ? ";
561 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
563 (SELECT deletedbiblio.biblionumber, null as marcxml, timestamp FROM deletedbiblio";
564 $sql .= " JOIN oai_sets_biblios ON deletedbiblio.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
565 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
566 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
568 $sql .= ") ORDER BY biblionumber
569 LIMIT " . ($max + 1) . "
570 OFFSET $token->{offset}
572 my $sth = $dbh->prepare( $sql );
573 my @bind_params = ($token->{'from_arg'}, $token->{'until_arg'});
574 push @bind_params, $set->{'id'} if defined $set;
575 push @bind_params, ($token->{'from'}, $token->{'until'});
576 push @bind_params, $set->{'id'} if defined $set;
577 $sth->execute( @bind_params );
580 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
582 if ( $count > $max ) {
583 $self->resumptionToken(
584 new C4::OAI::ResumptionToken(
585 metadataPrefix => $token->{metadata_prefix},
586 from => $token->{from},
587 until => $token->{until},
588 offset => $token->{offset} + $max,
594 my $marcxml = $repository->get_biblio_marcxml($biblionumber, $args{metadataPrefix});
595 my $oai_sets = GetOAISetsBiblio($biblionumber);
597 foreach (@$oai_sets) {
598 push @setSpecs, $_->{spec};
601 $self->record( C4::OAI::Record->new(
602 $repository, $marcxml, $timestamp, \@setSpecs,
603 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
604 metadataPrefix => $token->{metadata_prefix}
607 $self->record( C4::OAI::DeletedRecord->new(
608 $timestamp, \@setSpecs, identifier => $repository->{ koha_identifier } . ':' . $biblionumber ) );
612 # Return error if no results
614 return HTTP::OAI::Response->new(
615 requestURL => $repository->self_url(),
616 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
623 # __END__ C4::OAI::ListRecords
627 package C4::OAI::Repository;
629 use base ("HTTP::OAI::Repository");
635 use HTTP::OAI::Repository qw/:validate/;
637 use XML::SAX::Writer;
640 use YAML::Syck qw( LoadFile );
641 use CGI qw
/:standard -oldstyle_urls/;
648 my ($class, %args) = @_;
649 my $self = $class->SUPER::new
(%args);
651 $self->{ koha_identifier
} = C4
::Context
->preference("OAI-PMH:archiveID");
652 $self->{ koha_max_count
} = C4
::Context
->preference("OAI-PMH:MaxCount");
653 $self->{ koha_metadata_format
} = ['oai_dc', 'marcxml'];
654 $self->{ koha_stylesheet
} = { }; # Build when needed
656 # Load configuration file if defined in OAI-PMH:ConfFile syspref
657 if ( my $file = C4
::Context
->preference("OAI-PMH:ConfFile") ) {
658 $self->{ conf
} = LoadFile
( $file );
659 my @formats = keys %{ $self->{conf
}->{format
} };
660 $self->{ koha_metadata_format
} = \
@formats;
663 # Check for grammatical errors in the request
664 my @errs = validate_request
( CGI
::Vars
() );
666 # Is metadataPrefix supported by the respository?
667 my $mdp = param
('metadataPrefix') || '';
668 if ( $mdp && !grep { $_ eq $mdp } @
{$self->{ koha_metadata_format
}} ) {
669 push @errs, new HTTP
::OAI
::Error
(
670 code
=> 'cannotDisseminateFormat',
671 message
=> "Dissemination as '$mdp' is not supported",
677 $response = HTTP
::OAI
::Response
->new(
678 requestURL
=> self_url
(),
683 my %attr = CGI
::Vars
();
684 my $verb = delete( $attr{verb
} );
685 if ( $verb eq 'ListSets' ) {
686 $response = C4
::OAI
::ListSets
->new($self, %attr);
688 elsif ( $verb eq 'Identify' ) {
689 $response = C4
::OAI
::Identify
->new( $self );
691 elsif ( $verb eq 'ListMetadataFormats' ) {
692 $response = C4
::OAI
::ListMetadataFormats
->new( $self );
694 elsif ( $verb eq 'GetRecord' ) {
695 $response = C4
::OAI
::GetRecord
->new( $self, %attr );
697 elsif ( $verb eq 'ListRecords' ) {
698 $response = C4
::OAI
::ListRecords
->new( $self, %attr );
700 elsif ( $verb eq 'ListIdentifiers' ) {
701 $response = C4
::OAI
::ListIdentifiers
->new( $self, %attr );
705 $response->set_handler( XML
::SAX
::Writer
->new( Output
=> *STDOUT
) );
713 sub get_biblio_marcxml
{
714 my ($self, $biblionumber, $format) = @_;
716 if ( my $conf = $self->{conf
} ) {
717 $with_items = $conf->{format
}->{$format}->{include_items
};
719 my $record = GetMarcBiblio
($biblionumber, $with_items, 1);
720 $record ?
$record->as_xml() : undef;
725 my ( $self, $format ) = @_;
727 my $stylesheet = $self->{ koha_stylesheet
}->{ $format };
728 unless ( $stylesheet ) {
729 my $xsl_file = $self->{ conf
}
730 ?
$self->{ conf
}->{ format
}->{ $format }->{ xsl_file
}
731 : ( C4
::Context
->config('intrahtdocs') .
733 C4
::Context
->preference('marcflavour') .
735 my $parser = XML
::LibXML
->new();
736 my $xslt = XML
::LibXSLT
->new();
737 my $style_doc = $parser->parse_file( $xsl_file );
738 $stylesheet = $xslt->parse_stylesheet( $style_doc );
739 $self->{ koha_stylesheet
}->{ $format } = $stylesheet;
749 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
753 use C4::OAI::Repository;
755 my $repository = C4::OAI::Repository->new();
759 This object extend HTTP::OAI::Repository object.
760 It accepts OAI-PMH HTTP requests and returns result.
762 This OAI-PMH server can operate in a simple mode and extended one.
764 In simple mode, repository configuration comes entirely from Koha system
765 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
766 records in marcxml or dublin core format. Dublin core records are created from
767 koha marcxml records tranformed with XSLT. Used XSL file is located in
768 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
769 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
772 In extende mode, it's possible to parameter other format than marcxml or Dublin
773 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
774 list available metadata formats and XSL file used to create them from marcxml
775 records. If this syspref isn't set, Koha OAI server works in simple mode. A
776 configuration file koha-oai.conf can look like that:
782 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
783 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
784 xsl_file: /usr/local/koha/xslt/vs.xsl
786 metadataPrefix: marxml
787 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
788 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
790 metadataPrefix: oai_dc
791 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
792 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
793 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl