3 # Copyright Biblibre 2008
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
24 use CGI qw
/:standard -oldstyle_urls/;
30 eval { require PerlIO::gzip };
34 unless ( C4::Context->preference('OAI-PMH') ) {
37 -type => 'text/plain; charset=utf-8',
39 -status => '404 OAI-PMH service is disabled',
41 "OAI-PMH service is disabled";
45 my @encodings = http('HTTP_ACCEPT_ENCODING');
46 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
48 -type => 'text/xml; charset=utf-8',
50 -Content-Encoding => 'gzip',
52 binmode( STDOUT, ":gzip" );
56 -type => 'text/xml; charset=utf-8',
61 binmode STDOUT, ':encoding(UTF-8)';
62 my $repository = C4::OAI::Repository->new();
68 # Extends HTTP::OAI::ResumptionToken
69 # A token is identified by:
75 package C4::OAI::ResumptionToken;
81 use base ("HTTP::OAI::ResumptionToken");
85 my ($class, %args) = @_;
87 my $self = $class->SUPER::new(%args);
89 my ($metadata_prefix, $offset, $from, $until, $set);
90 if ( $args{ resumptionToken } ) {
91 ($metadata_prefix, $offset, $from, $until, $set)
92 = split( '/', $args{resumptionToken} );
95 $metadata_prefix = $args{ metadataPrefix };
96 $from = $args{ from } || '1970-01-01';
97 $until = $args{ until };
99 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
100 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
102 $offset = $args{ offset } || 0;
106 $self->{ metadata_prefix } = $metadata_prefix;
107 $self->{ offset } = $offset;
108 $self->{ from } = $from;
109 $self->{ until } = $until;
110 $self->{ set } = $set;
112 $self->resumptionToken(
113 join( '/', $metadata_prefix, $offset, $from, $until, $set ) );
114 $self->cursor( $offset );
119 # __END__ C4::OAI::ResumptionToken
123 package C4::OAI::Identify;
130 use base ("HTTP::OAI::Identify");
133 my ($class, $repository) = @_;
135 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
136 my $self = $class->SUPER::new(
138 repositoryName => C4::Context->preference("LibraryName"),
139 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
140 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
141 granularity => 'YYYY-MM-DD',
142 earliestDatestamp => '0001-01-01',
143 deletedRecord => 'no',
146 # FIXME - alas, the description element is not so simple; to validate
147 # against the OAI-PMH schema, it cannot contain just a string,
148 # but one or more elements that validate against another XML schema.
149 # For now, simply omitting it.
150 # $self->description( "Koha OAI Repository" );
152 $self->compression( 'gzip' );
157 # __END__ C4::OAI::Identify
161 package C4::OAI::ListMetadataFormats;
167 use base ("HTTP::OAI::ListMetadataFormats");
170 my ($class, $repository) = @_;
172 my $self = $class->SUPER::new();
174 if ( $repository->{ conf } ) {
175 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
176 my $format = $repository->{ conf }->{ format }->{ $name };
177 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
178 metadataPrefix => $format->{metadataPrefix},
179 schema => $format->{schema},
180 metadataNamespace => $format->{metadataNamespace}, ) );
184 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
185 metadataPrefix => 'oai_dc',
186 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
187 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
189 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
190 metadataPrefix => 'marcxml',
191 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
192 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
199 # __END__ C4::OAI::ListMetadataFormats
203 package C4::OAI::Record;
208 use HTTP::OAI::Metadata::OAI_DC;
210 use base ("HTTP::OAI::Record");
213 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
215 my $self = $class->SUPER::new(%args);
217 $timestamp =~ s/ /T/, $timestamp .= 'Z';
218 $self->header( new HTTP::OAI::Header(
219 identifier => $args{identifier},
220 datestamp => $timestamp,
223 foreach my $setSpec (@$setSpecs) {
224 $self->header->setSpec($setSpec);
227 my $parser = XML::LibXML->new();
228 my $record_dom = $parser->parse_string( $marcxml );
229 my $format = $args{metadataPrefix};
230 if ( $format ne 'marcxml' ) {
232 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
234 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
236 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
241 # __END__ C4::OAI::Record
245 package C4::OAI::GetRecord;
252 use base ("HTTP::OAI::GetRecord");
256 my ($class, $repository, %args) = @_;
258 my $self = HTTP::OAI::GetRecord->new(%args);
260 my $dbh = C4::Context->dbh;
261 my $sth = $dbh->prepare("
262 SELECT marcxml, timestamp
264 WHERE biblionumber=? " );
265 my $prefix = $repository->{koha_identifier} . ':';
266 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
267 $sth->execute( $biblionumber );
268 my ($marcxml, $timestamp);
269 unless ( ($marcxml, $timestamp) = $sth->fetchrow ) {
270 return HTTP::OAI::Response->new(
271 requestURL => $repository->self_url(),
272 errors => [ new HTTP::OAI::Error(
273 code => 'idDoesNotExist',
274 message => "There is no biblio record with this identifier",
279 my $oai_sets = GetOAISetsBiblio($biblionumber);
281 foreach (@$oai_sets) {
282 push @setSpecs, $_->{spec};
285 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
286 $self->record( C4::OAI::Record->new(
287 $repository, $marcxml, $timestamp, \@setSpecs, %args ) );
292 # __END__ C4::OAI::GetRecord
296 package C4::OAI::ListIdentifiers;
303 use base ("HTTP::OAI::ListIdentifiers");
307 my ($class, $repository, %args) = @_;
309 my $self = HTTP::OAI::ListIdentifiers->new(%args);
311 my $token = new C4::OAI::ResumptionToken( %args );
312 my $dbh = C4::Context->dbh;
314 if(defined $token->{'set'}) {
315 $set = GetOAISetBySpec($token->{'set'});
317 my $max = $repository->{koha_max_count};
319 SELECT biblioitems.biblionumber, biblioitems.timestamp
322 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
323 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
324 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
326 LIMIT " . ($max+1) . "
327 OFFSET $token->{offset}
329 my $sth = $dbh->prepare( $sql );
330 my @bind_params = ($token->{'from'}, $token->{'until'});
331 push @bind_params, $set->{'id'} if defined $set;
332 $sth->execute( @bind_params );
335 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
337 if ( $count > $max ) {
338 $self->resumptionToken(
339 new C4::OAI::ResumptionToken(
340 metadataPrefix => $token->{metadata_prefix},
341 from => $token->{from},
342 until => $token->{until},
343 offset => $token->{offset} + $max,
349 $timestamp =~ s/ /T/, $timestamp .= 'Z';
350 $self->identifier( new HTTP::OAI::Header(
351 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
352 datestamp => $timestamp,
356 # Return error if no results
358 return HTTP::OAI::Response->new(
359 requestURL => $repository->self_url(),
360 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
367 # __END__ C4::OAI::ListIdentifiers
369 package C4::OAI::Description;
374 use HTTP::OAI::SAXHandler qw/ :SAX /;
377 my ( $class, %args ) = @_;
381 if(my $setDescription = $args{setDescription}) {
382 $self->{setDescription} = $setDescription;
384 if(my $handler = $args{handler}) {
385 $self->{handler} = $handler;
393 my ( $self, $handler ) = @_;
395 $self->{handler} = $handler if $handler;
403 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
408 # __END__ C4::OAI::Description
410 package C4::OAI::ListSets;
417 use base ("HTTP::OAI::ListSets");
420 my ( $class, $repository, %args ) = @_;
422 my $self = HTTP::OAI::ListSets->new(%args);
424 my $token = C4::OAI::ResumptionToken->new(%args);
425 my $sets = GetOAISets;
427 foreach my $set (@$sets) {
428 if ($pos < $token->{offset}) {
433 foreach my $desc (@{$set->{'descriptions'}}) {
434 push @descriptions, C4::OAI::Description->new(
435 setDescription => $desc,
440 setSpec => $set->{'spec'},
441 setName => $set->{'name'},
442 setDescription => \@descriptions,
446 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
449 $self->resumptionToken(
450 new C4::OAI::ResumptionToken(
451 metadataPrefix => $token->{metadata_prefix},
454 ) if ( $pos > $token->{offset} );
459 # __END__ C4::OAI::ListSets;
461 package C4::OAI::ListRecords;
468 use base ("HTTP::OAI::ListRecords");
472 my ($class, $repository, %args) = @_;
474 my $self = HTTP::OAI::ListRecords->new(%args);
476 my $token = new C4::OAI::ResumptionToken( %args );
477 my $dbh = C4::Context->dbh;
479 if(defined $token->{'set'}) {
480 $set = GetOAISetBySpec($token->{'set'});
482 my $max = $repository->{koha_max_count};
484 SELECT biblioitems.biblionumber, biblioitems.marcxml, biblioitems.timestamp
487 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
488 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
489 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
491 LIMIT " . ($max + 1) . "
492 OFFSET $token->{offset}
495 my $sth = $dbh->prepare( $sql );
496 my @bind_params = ($token->{'from'}, $token->{'until'});
497 push @bind_params, $set->{'id'} if defined $set;
498 $sth->execute( @bind_params );
501 while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) {
503 if ( $count > $max ) {
504 $self->resumptionToken(
505 new C4::OAI::ResumptionToken(
506 metadataPrefix => $token->{metadata_prefix},
507 from => $token->{from},
508 until => $token->{until},
509 offset => $token->{offset} + $max,
515 my $oai_sets = GetOAISetsBiblio($biblionumber);
517 foreach (@$oai_sets) {
518 push @setSpecs, $_->{spec};
520 $self->record( C4::OAI::Record->new(
521 $repository, $marcxml, $timestamp, \@setSpecs,
522 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
523 metadataPrefix => $token->{metadata_prefix}
527 # Return error if no results
529 return HTTP::OAI::Response->new(
530 requestURL => $repository->self_url(),
531 errors => [ new HTTP::OAI::Error( code => 'noRecordsMatch' ) ],
538 # __END__ C4::OAI::ListRecords
542 package C4::OAI::Repository;
544 use base ("HTTP::OAI::Repository");
550 use HTTP::OAI::Repository qw/:validate/;
552 use XML::SAX::Writer;
555 use YAML::Syck qw( LoadFile );
556 use CGI qw
/:standard -oldstyle_urls/;
563 my ($class, %args) = @_;
564 my $self = $class->SUPER::new
(%args);
566 $self->{ koha_identifier
} = C4
::Context
->preference("OAI-PMH:archiveID");
567 $self->{ koha_max_count
} = C4
::Context
->preference("OAI-PMH:MaxCount");
568 $self->{ koha_metadata_format
} = ['oai_dc', 'marcxml'];
569 $self->{ koha_stylesheet
} = { }; # Build when needed
571 # Load configuration file if defined in OAI-PMH:ConfFile syspref
572 if ( my $file = C4
::Context
->preference("OAI-PMH:ConfFile") ) {
573 $self->{ conf
} = LoadFile
( $file );
574 my @formats = keys %{ $self->{conf
}->{format
} };
575 $self->{ koha_metadata_format
} = \
@formats;
578 # Check for grammatical errors in the request
579 my @errs = validate_request
( CGI
::Vars
() );
581 # Is metadataPrefix supported by the respository?
582 my $mdp = param
('metadataPrefix') || '';
583 if ( $mdp && !grep { $_ eq $mdp } @
{$self->{ koha_metadata_format
}} ) {
584 push @errs, new HTTP
::OAI
::Error
(
585 code
=> 'cannotDisseminateFormat',
586 message
=> "Dissemination as '$mdp' is not supported",
592 $response = HTTP
::OAI
::Response
->new(
593 requestURL
=> self_url
(),
598 my %attr = CGI
::Vars
();
599 my $verb = delete( $attr{verb
} );
600 if ( $verb eq 'ListSets' ) {
601 $response = C4
::OAI
::ListSets
->new($self, %attr);
603 elsif ( $verb eq 'Identify' ) {
604 $response = C4
::OAI
::Identify
->new( $self );
606 elsif ( $verb eq 'ListMetadataFormats' ) {
607 $response = C4
::OAI
::ListMetadataFormats
->new( $self );
609 elsif ( $verb eq 'GetRecord' ) {
610 $response = C4
::OAI
::GetRecord
->new( $self, %attr );
612 elsif ( $verb eq 'ListRecords' ) {
613 $response = C4
::OAI
::ListRecords
->new( $self, %attr );
615 elsif ( $verb eq 'ListIdentifiers' ) {
616 $response = C4
::OAI
::ListIdentifiers
->new( $self, %attr );
620 $response->set_handler( XML
::SAX
::Writer
->new( Output
=> *STDOUT
) );
629 my ( $self, $format ) = @_;
631 my $stylesheet = $self->{ koha_stylesheet
}->{ $format };
632 unless ( $stylesheet ) {
633 my $xsl_file = $self->{ conf
}
634 ?
$self->{ conf
}->{ format
}->{ $format }->{ xsl_file
}
635 : ( C4
::Context
->config('intrahtdocs') .
637 C4
::Context
->preference('marcflavour') .
639 my $parser = XML
::LibXML
->new();
640 my $xslt = XML
::LibXSLT
->new();
641 my $style_doc = $parser->parse_file( $xsl_file );
642 $stylesheet = $xslt->parse_stylesheet( $style_doc );
643 $self->{ koha_stylesheet
}->{ $format } = $stylesheet;
653 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
657 use C4::OAI::Repository;
659 my $repository = C4::OAI::Repository->new();
663 This object extend HTTP::OAI::Repository object.
664 It accepts OAI-PMH HTTP requests and returns result.
666 This OAI-PMH server can operate in a simple mode and extended one.
668 In simple mode, repository configuration comes entirely from Koha system
669 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
670 records in marcxml or dublin core format. Dublin core records are created from
671 koha marcxml records tranformed with XSLT. Used XSL file is located in
672 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
673 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
676 In extende mode, it's possible to parameter other format than marcxml or Dublin
677 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
678 list available metadata formats and XSL file used to create them from marcxml
679 records. If this syspref isn't set, Koha OAI server works in simple mode. A
680 configuration file koha-oai.conf can look like that:
686 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
687 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
688 xsl_file: /usr/local/koha/xslt/vs.xsl
690 metadataPrefix: marxml
691 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
692 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
694 metadataPrefix: oai_dc
695 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
696 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
697 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl