bug 7291 follow-up DBRev number
[koha.git] / opac / oai.pl
blobbad250a2f9fe5059ea2bd9f1df0072b65371aecf
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
6 use CGI qw/:standard -oldstyle_urls/;
7 use vars qw( $GZIP );
8 use C4::Context;
11 BEGIN {
12 eval { require PerlIO::gzip };
13 $GZIP = ($@) ? 0 : 1;
16 unless ( C4::Context->preference('OAI-PMH') ) {
17 print
18 header(
19 -type => 'text/plain; charset=utf-8',
20 -charset => 'utf-8',
21 -status => '404 OAI-PMH service is disabled',
23 "OAI-PMH service is disabled";
24 exit;
27 my @encodings = http('HTTP_ACCEPT_ENCODING');
28 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
29 print header(
30 -type => 'text/xml; charset=utf-8',
31 -charset => 'utf-8',
32 -Content-Encoding => 'gzip',
34 binmode( STDOUT, ":gzip" );
36 else {
37 print header(
38 -type => 'text/xml; charset=utf-8',
39 -charset => 'utf-8',
43 binmode STDOUT, ':encoding(UTF-8)';
44 my $repository = C4::OAI::Repository->new();
46 # __END__ Main Prog
50 # Extends HTTP::OAI::ResumptionToken
51 # A token is identified by:
52 # - metadataPrefix
53 # - from
54 # - until
55 # - offset
57 package C4::OAI::ResumptionToken;
59 use strict;
60 use warnings;
61 use HTTP::OAI;
63 use base ("HTTP::OAI::ResumptionToken");
66 sub new {
67 my ($class, %args) = @_;
69 my $self = $class->SUPER::new(%args);
71 my ($metadata_prefix, $offset, $from, $until, $set);
72 if ( $args{ resumptionToken } ) {
73 ($metadata_prefix, $offset, $from, $until, $set)
74 = split( ':', $args{resumptionToken} );
76 else {
77 $metadata_prefix = $args{ metadataPrefix };
78 $from = $args{ from } || '1970-01-01';
79 $until = $args{ until };
80 unless ( $until) {
81 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
82 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
84 $offset = $args{ offset } || 0;
85 $set = $args{set};
88 $self->{ metadata_prefix } = $metadata_prefix;
89 $self->{ offset } = $offset;
90 $self->{ from } = $from;
91 $self->{ until } = $until;
92 $self->{ set } = $set;
94 $self->resumptionToken(
95 join( ':', $metadata_prefix, $offset, $from, $until, $set ) );
96 $self->cursor( $offset );
98 return $self;
101 # __END__ C4::OAI::ResumptionToken
105 package C4::OAI::Identify;
107 use strict;
108 use warnings;
109 use HTTP::OAI;
110 use C4::Context;
112 use base ("HTTP::OAI::Identify");
114 sub new {
115 my ($class, $repository) = @_;
117 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
118 my $self = $class->SUPER::new(
119 baseURL => $baseURL,
120 repositoryName => C4::Context->preference("LibraryName"),
121 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
122 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
123 granularity => 'YYYY-MM-DD',
124 earliestDatestamp => '0001-01-01',
125 deletedRecord => 'no',
128 # FIXME - alas, the description element is not so simple; to validate
129 # against the OAI-PMH schema, it cannot contain just a string,
130 # but one or more elements that validate against another XML schema.
131 # For now, simply omitting it.
132 # $self->description( "Koha OAI Repository" );
134 $self->compression( 'gzip' );
136 return $self;
139 # __END__ C4::OAI::Identify
143 package C4::OAI::ListMetadataFormats;
145 use strict;
146 use warnings;
147 use HTTP::OAI;
149 use base ("HTTP::OAI::ListMetadataFormats");
151 sub new {
152 my ($class, $repository) = @_;
154 my $self = $class->SUPER::new();
156 if ( $repository->{ conf } ) {
157 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
158 my $format = $repository->{ conf }->{ format }->{ $name };
159 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
160 metadataPrefix => $format->{metadataPrefix},
161 schema => $format->{schema},
162 metadataNamespace => $format->{metadataNamespace}, ) );
165 else {
166 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
167 metadataPrefix => 'oai_dc',
168 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
169 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
170 ) );
171 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
172 metadataPrefix => 'marcxml',
173 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
174 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
175 ) );
178 return $self;
181 # __END__ C4::OAI::ListMetadataFormats
185 package C4::OAI::Record;
187 use strict;
188 use warnings;
189 use HTTP::OAI;
190 use HTTP::OAI::Metadata::OAI_DC;
192 use base ("HTTP::OAI::Record");
194 sub new {
195 my ($class, $repository, $marcxml, $timestamp, $setSpecs, %args) = @_;
197 my $self = $class->SUPER::new(%args);
199 $timestamp =~ s/ /T/, $timestamp .= 'Z';
200 $self->header( new HTTP::OAI::Header(
201 identifier => $args{identifier},
202 datestamp => $timestamp,
203 ) );
205 foreach my $setSpec (@$setSpecs) {
206 $self->header->setSpec($setSpec);
209 my $parser = XML::LibXML->new();
210 my $record_dom = $parser->parse_string( $marcxml );
211 my $format = $args{metadataPrefix};
212 if ( $format ne 'marcxml' ) {
213 my %args = (
214 OPACBaseURL => "'" . C4::Context->preference('OPACBaseURL') . "'"
216 $record_dom = $repository->stylesheet($format)->transform($record_dom, %args);
218 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
220 return $self;
223 # __END__ C4::OAI::Record
227 package C4::OAI::GetRecord;
229 use strict;
230 use warnings;
231 use HTTP::OAI;
232 use C4::OAI::Sets;
234 use base ("HTTP::OAI::GetRecord");
237 sub new {
238 my ($class, $repository, %args) = @_;
240 my $self = HTTP::OAI::GetRecord->new(%args);
242 my $dbh = C4::Context->dbh;
243 my $sth = $dbh->prepare("
244 SELECT marcxml, timestamp
245 FROM biblioitems
246 WHERE biblionumber=? " );
247 my $prefix = $repository->{koha_identifier} . ':';
248 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
249 $sth->execute( $biblionumber );
250 my ($marcxml, $timestamp);
251 unless ( ($marcxml, $timestamp) = $sth->fetchrow ) {
252 return HTTP::OAI::Response->new(
253 requestURL => $repository->self_url(),
254 errors => [ new HTTP::OAI::Error(
255 code => 'idDoesNotExist',
256 message => "There is no biblio record with this identifier",
257 ) ] ,
261 my $oai_sets = GetOAISetsBiblio($biblionumber);
262 my @setSpecs;
263 foreach (@$oai_sets) {
264 push @setSpecs, $_->{spec};
267 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
268 $self->record( C4::OAI::Record->new(
269 $repository, $marcxml, $timestamp, \@setSpecs, %args ) );
271 return $self;
274 # __END__ C4::OAI::GetRecord
278 package C4::OAI::ListIdentifiers;
280 use strict;
281 use warnings;
282 use HTTP::OAI;
283 use C4::OAI::Sets;
285 use base ("HTTP::OAI::ListIdentifiers");
288 sub new {
289 my ($class, $repository, %args) = @_;
291 my $self = HTTP::OAI::ListIdentifiers->new(%args);
293 my $token = new C4::OAI::ResumptionToken( %args );
294 my $dbh = C4::Context->dbh;
295 my $set;
296 if(defined $token->{'set'}) {
297 $set = GetOAISetBySpec($token->{'set'});
299 my $sql = "
300 SELECT biblioitems.biblionumber, biblioitems.timestamp
301 FROM biblioitems
303 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
304 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
305 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
306 $sql .= "
307 LIMIT $repository->{'koha_max_count'}
308 OFFSET $token->{'offset'}
310 my $sth = $dbh->prepare( $sql );
311 my @bind_params = ($token->{'from'}, $token->{'until'});
312 push @bind_params, $set->{'id'} if defined $set;
313 $sth->execute( @bind_params );
315 my $pos = $token->{offset};
316 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
317 $timestamp =~ s/ /T/, $timestamp .= 'Z';
318 $self->identifier( new HTTP::OAI::Header(
319 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
320 datestamp => $timestamp,
321 ) );
322 $pos++;
324 $self->resumptionToken(
325 new C4::OAI::ResumptionToken(
326 metadataPrefix => $token->{metadata_prefix},
327 from => $token->{from},
328 until => $token->{until},
329 offset => $pos,
330 set => $token->{set}
332 ) if ($pos > $token->{offset});
334 return $self;
337 # __END__ C4::OAI::ListIdentifiers
339 package C4::OAI::Description;
341 use strict;
342 use warnings;
343 use HTTP::OAI;
344 use HTTP::OAI::SAXHandler qw/ :SAX /;
346 sub new {
347 my ( $class, %args ) = @_;
349 my $self = {};
351 if(my $setDescription = $args{setDescription}) {
352 $self->{setDescription} = $setDescription;
354 if(my $handler = $args{handler}) {
355 $self->{handler} = $handler;
358 bless $self, $class;
359 return $self;
362 sub set_handler {
363 my ( $self, $handler ) = @_;
365 $self->{handler} = $handler if $handler;
367 return $self;
370 sub generate {
371 my ( $self ) = @_;
373 g_data_element($self->{handler}, 'http://www.openarchives.org/OAI/2.0/', 'setDescription', {}, $self->{setDescription});
375 return $self;
378 # __END__ C4::OAI::Description
380 package C4::OAI::ListSets;
382 use strict;
383 use warnings;
384 use HTTP::OAI;
385 use C4::OAI::Sets;
387 use base ("HTTP::OAI::ListSets");
389 sub new {
390 my ( $class, $repository, %args ) = @_;
392 my $self = HTTP::OAI::ListSets->new(%args);
394 my $token = C4::OAI::ResumptionToken->new(%args);
395 my $sets = GetOAISets;
396 my $pos = 0;
397 foreach my $set (@$sets) {
398 if ($pos < $token->{offset}) {
399 $pos++;
400 next;
402 my @descriptions;
403 foreach my $desc (@{$set->{'descriptions'}}) {
404 push @descriptions, C4::OAI::Description->new(
405 setDescription => $desc,
408 $self->set(
409 HTTP::OAI::Set->new(
410 setSpec => $set->{'spec'},
411 setName => $set->{'name'},
412 setDescription => \@descriptions,
415 $pos++;
416 last if ($pos + 1 - $token->{offset}) > $repository->{koha_max_count};
419 $self->resumptionToken(
420 new C4::OAI::ResumptionToken(
421 metadataPrefix => $token->{metadata_prefix},
422 offset => $pos
424 ) if ( $pos > $token->{offset} );
426 return $self;
429 # __END__ C4::OAI::ListSets;
431 package C4::OAI::ListRecords;
433 use strict;
434 use warnings;
435 use HTTP::OAI;
436 use C4::OAI::Sets;
438 use base ("HTTP::OAI::ListRecords");
441 sub new {
442 my ($class, $repository, %args) = @_;
444 my $self = HTTP::OAI::ListRecords->new(%args);
446 my $token = new C4::OAI::ResumptionToken( %args );
447 my $dbh = C4::Context->dbh;
448 my $set;
449 if(defined $token->{'set'}) {
450 $set = GetOAISetBySpec($token->{'set'});
452 my $sql = "
453 SELECT biblioitems.biblionumber, biblioitems.marcxml, biblioitems.timestamp
454 FROM biblioitems
456 $sql .= " JOIN oai_sets_biblios ON biblioitems.biblionumber = oai_sets_biblios.biblionumber " if defined $set;
457 $sql .= " WHERE DATE(timestamp) >= ? AND DATE(timestamp) <= ? ";
458 $sql .= " AND oai_sets_biblios.set_id = ? " if defined $set;
459 $sql .= "
460 LIMIT $repository->{'koha_max_count'}
461 OFFSET $token->{'offset'}
464 my $sth = $dbh->prepare( $sql );
465 my @bind_params = ($token->{'from'}, $token->{'until'});
466 push @bind_params, $set->{'id'} if defined $set;
467 $sth->execute( @bind_params );
469 my $pos = $token->{offset};
470 while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) {
471 my $oai_sets = GetOAISetsBiblio($biblionumber);
472 my @setSpecs;
473 foreach (@$oai_sets) {
474 push @setSpecs, $_->{spec};
476 $self->record( C4::OAI::Record->new(
477 $repository, $marcxml, $timestamp, \@setSpecs,
478 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
479 metadataPrefix => $token->{metadata_prefix}
480 ) );
481 $pos++;
483 $self->resumptionToken(
484 new C4::OAI::ResumptionToken(
485 metadataPrefix => $token->{metadata_prefix},
486 from => $token->{from},
487 until => $token->{until},
488 offset => $pos,
489 set => $token->{set}
491 ) if ($pos > $token->{offset});
493 return $self;
496 # __END__ C4::OAI::ListRecords
500 package C4::OAI::Repository;
502 use base ("HTTP::OAI::Repository");
504 use strict;
505 use warnings;
507 use HTTP::OAI;
508 use HTTP::OAI::Repository qw/:validate/;
510 use XML::SAX::Writer;
511 use XML::LibXML;
512 use XML::LibXSLT;
513 use YAML::Syck qw( LoadFile );
514 use CGI qw/:standard -oldstyle_urls/;
516 use C4::Context;
517 use C4::Biblio;
520 sub new {
521 my ($class, %args) = @_;
522 my $self = $class->SUPER::new(%args);
524 $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID");
525 $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount");
526 $self->{ koha_metadata_format } = ['oai_dc', 'marcxml'];
527 $self->{ koha_stylesheet } = { }; # Build when needed
529 # Load configuration file if defined in OAI-PMH:ConfFile syspref
530 if ( my $file = C4::Context->preference("OAI-PMH:ConfFile") ) {
531 $self->{ conf } = LoadFile( $file );
532 my @formats = keys %{ $self->{conf}->{format} };
533 $self->{ koha_metadata_format } = \@formats;
536 # Check for grammatical errors in the request
537 my @errs = validate_request( CGI::Vars() );
539 # Is metadataPrefix supported by the respository?
540 my $mdp = param('metadataPrefix') || '';
541 if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) {
542 push @errs, new HTTP::OAI::Error(
543 code => 'cannotDisseminateFormat',
544 message => "Dissemination as '$mdp' is not supported",
548 my $response;
549 if ( @errs ) {
550 $response = HTTP::OAI::Response->new(
551 requestURL => self_url(),
552 errors => \@errs,
555 else {
556 my %attr = CGI::Vars();
557 my $verb = delete( $attr{verb} );
558 if ( $verb eq 'ListSets' ) {
559 $response = C4::OAI::ListSets->new($self, %attr);
561 elsif ( $verb eq 'Identify' ) {
562 $response = C4::OAI::Identify->new( $self );
564 elsif ( $verb eq 'ListMetadataFormats' ) {
565 $response = C4::OAI::ListMetadataFormats->new( $self );
567 elsif ( $verb eq 'GetRecord' ) {
568 $response = C4::OAI::GetRecord->new( $self, %attr );
570 elsif ( $verb eq 'ListRecords' ) {
571 $response = C4::OAI::ListRecords->new( $self, %attr );
573 elsif ( $verb eq 'ListIdentifiers' ) {
574 $response = C4::OAI::ListIdentifiers->new( $self, %attr );
578 $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) );
579 $response->generate;
581 bless $self, $class;
582 return $self;
586 sub stylesheet {
587 my ( $self, $format ) = @_;
589 my $stylesheet = $self->{ koha_stylesheet }->{ $format };
590 unless ( $stylesheet ) {
591 my $xsl_file = $self->{ conf }
592 ? $self->{ conf }->{ format }->{ $format }->{ xsl_file }
593 : ( C4::Context->config('intrahtdocs') .
594 '/prog/en/xslt/' .
595 C4::Context->preference('marcflavour') .
596 'slim2OAIDC.xsl' );
597 my $parser = XML::LibXML->new();
598 my $xslt = XML::LibXSLT->new();
599 my $style_doc = $parser->parse_file( $xsl_file );
600 $stylesheet = $xslt->parse_stylesheet( $style_doc );
601 $self->{ koha_stylesheet }->{ $format } = $stylesheet;
604 return $stylesheet;
609 =head1 NAME
611 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
613 =head1 SYNOPSIS
615 use C4::OAI::Repository;
617 my $repository = C4::OAI::Repository->new();
619 =head1 DESCRIPTION
621 This object extend HTTP::OAI::Repository object.
622 It accepts OAI-PMH HTTP requests and returns result.
624 This OAI-PMH server can operate in a simple mode and extended one.
626 In simple mode, repository configuration comes entirely from Koha system
627 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
628 records in marcxml or dublin core format. Dublin core records are created from
629 koha marcxml records tranformed with XSLT. Used XSL file is located in
630 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
631 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
632 UNIMARC.
634 In extende mode, it's possible to parameter other format than marcxml or Dublin
635 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
636 list available metadata formats and XSL file used to create them from marcxml
637 records. If this syspref isn't set, Koha OAI server works in simple mode. A
638 configuration file koha-oai.conf can look like that:
641 format:
643 metadataPrefix: vs
644 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
645 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
646 xsl_file: /usr/local/koha/xslt/vs.xsl
647 marcxml:
648 metadataPrefix: marxml
649 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
650 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
651 oai_dc:
652 metadataPrefix: oai_dc
653 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
654 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
655 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl
657 =cut