Bug 6829 Follow up patch
[koha.git] / opac / oai.pl
blob038b6d17925ae264c8697c7d35172b63214cb767
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use diagnostics;
7 use CGI qw/:standard -oldstyle_urls/;
8 use vars qw( $GZIP );
9 use C4::Context;
12 BEGIN {
13 eval { require PerlIO::gzip };
14 $GZIP = ($@) ? 0 : 1;
17 unless ( C4::Context->preference('OAI-PMH') ) {
18 print
19 header(
20 -type => 'text/plain; charset=utf-8',
21 -charset => 'utf-8',
22 -status => '404 OAI-PMH service is disabled',
24 "OAI-PMH service is disabled";
25 exit;
28 my @encodings = http('HTTP_ACCEPT_ENCODING');
29 if ( $GZIP && grep { defined($_) && $_ eq 'gzip' } @encodings ) {
30 print header(
31 -type => 'text/xml; charset=utf-8',
32 -charset => 'utf-8',
33 -Content-Encoding => 'gzip',
35 binmode( STDOUT, ":gzip" );
37 else {
38 print header(
39 -type => 'text/xml; charset=utf-8',
40 -charset => 'utf-8',
44 binmode( STDOUT, ":utf8" );
45 my $repository = C4::OAI::Repository->new();
47 # __END__ Main Prog
51 # Extends HTTP::OAI::ResumptionToken
52 # A token is identified by:
53 # - metadataPrefix
54 # - from
55 # - until
56 # - offset
58 package C4::OAI::ResumptionToken;
60 use strict;
61 use warnings;
62 use diagnostics;
63 use HTTP::OAI;
65 use base ("HTTP::OAI::ResumptionToken");
68 sub new {
69 my ($class, %args) = @_;
71 my $self = $class->SUPER::new(%args);
73 my ($metadata_prefix, $offset, $from, $until);
74 if ( $args{ resumptionToken } ) {
75 ($metadata_prefix, $offset, $from, $until)
76 = split( ':', $args{resumptionToken} );
78 else {
79 $metadata_prefix = $args{ metadataPrefix };
80 $from = $args{ from } || '1970-01-01';
81 $until = $args{ until };
82 unless ( $until) {
83 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday) = gmtime( time );
84 $until = sprintf( "%.4d-%.2d-%.2d", $year+1900, $mon+1,$mday );
86 $offset = $args{ offset } || 0;
89 $self->{ metadata_prefix } = $metadata_prefix;
90 $self->{ offset } = $offset;
91 $self->{ from } = $from;
92 $self->{ until } = $until;
94 $self->resumptionToken(
95 join( ':', $metadata_prefix, $offset, $from, $until ) );
96 $self->cursor( $offset );
98 return $self;
101 # __END__ C4::OAI::ResumptionToken
105 package C4::OAI::Identify;
107 use strict;
108 use warnings;
109 use diagnostics;
110 use HTTP::OAI;
111 use C4::Context;
113 use base ("HTTP::OAI::Identify");
115 sub new {
116 my ($class, $repository) = @_;
118 my ($baseURL) = $repository->self_url() =~ /(.*)\?.*/;
119 my $self = $class->SUPER::new(
120 baseURL => $baseURL,
121 repositoryName => C4::Context->preference("LibraryName"),
122 adminEmail => C4::Context->preference("KohaAdminEmailAddress"),
123 MaxCount => C4::Context->preference("OAI-PMH:MaxCount"),
124 granularity => 'YYYY-MM-DD',
125 earliestDatestamp => '0001-01-01',
126 deletedRecord => 'no',
129 # FIXME - alas, the description element is not so simple; to validate
130 # against the OAI-PMH schema, it cannot contain just a string,
131 # but one or more elements that validate against another XML schema.
132 # For now, simply omitting it.
133 # $self->description( "Koha OAI Repository" );
135 $self->compression( 'gzip' );
137 return $self;
140 # __END__ C4::OAI::Identify
144 package C4::OAI::ListMetadataFormats;
146 use strict;
147 use warnings;
148 use diagnostics;
149 use HTTP::OAI;
151 use base ("HTTP::OAI::ListMetadataFormats");
153 sub new {
154 my ($class, $repository) = @_;
156 my $self = $class->SUPER::new();
158 if ( $repository->{ conf } ) {
159 foreach my $name ( @{ $repository->{ koha_metadata_format } } ) {
160 my $format = $repository->{ conf }->{ format }->{ $name };
161 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
162 metadataPrefix => $format->{metadataPrefix},
163 schema => $format->{schema},
164 metadataNamespace => $format->{metadataNamespace}, ) );
167 else {
168 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
169 metadataPrefix => 'oai_dc',
170 schema => 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
171 metadataNamespace => 'http://www.openarchives.org/OAI/2.0/oai_dc/'
172 ) );
173 $self->metadataFormat( HTTP::OAI::MetadataFormat->new(
174 metadataPrefix => 'marcxml',
175 schema => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd',
176 metadataNamespace => 'http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim'
177 ) );
180 return $self;
183 # __END__ C4::OAI::ListMetadataFormats
187 package C4::OAI::Record;
189 use strict;
190 use warnings;
191 use diagnostics;
192 use HTTP::OAI;
193 use HTTP::OAI::Metadata::OAI_DC;
195 use base ("HTTP::OAI::Record");
197 sub new {
198 my ($class, $repository, $marcxml, $timestamp, %args) = @_;
200 my $self = $class->SUPER::new(%args);
202 $timestamp =~ s/ /T/, $timestamp .= 'Z';
203 $self->header( new HTTP::OAI::Header(
204 identifier => $args{identifier},
205 datestamp => $timestamp,
206 ) );
208 my $parser = XML::LibXML->new();
209 my $record_dom = $parser->parse_string( $marcxml );
210 my $format = $args{metadataPrefix};
211 if ( $format ne 'marcxml' ) {
212 $record_dom = $repository->stylesheet($format)->transform( $record_dom );
214 $self->metadata( HTTP::OAI::Metadata->new( dom => $record_dom ) );
216 return $self;
219 # __END__ C4::OAI::Record
223 package C4::OAI::GetRecord;
225 use strict;
226 use warnings;
227 use diagnostics;
228 use HTTP::OAI;
230 use base ("HTTP::OAI::GetRecord");
233 sub new {
234 my ($class, $repository, %args) = @_;
236 my $self = HTTP::OAI::GetRecord->new(%args);
238 my $dbh = C4::Context->dbh;
239 my $sth = $dbh->prepare("
240 SELECT marcxml, timestamp
241 FROM biblioitems
242 WHERE biblionumber=? " );
243 my $prefix = $repository->{koha_identifier} . ':';
244 my ($biblionumber) = $args{identifier} =~ /^$prefix(.*)/;
245 $sth->execute( $biblionumber );
246 my ($marcxml, $timestamp);
247 unless ( ($marcxml, $timestamp) = $sth->fetchrow ) {
248 return HTTP::OAI::Response->new(
249 requestURL => $repository->self_url(),
250 errors => [ new HTTP::OAI::Error(
251 code => 'idDoesNotExist',
252 message => "There is no biblio record with this identifier",
253 ) ] ,
257 #$self->header( HTTP::OAI::Header->new( identifier => $args{identifier} ) );
258 $self->record( C4::OAI::Record->new(
259 $repository, $marcxml, $timestamp, %args ) );
261 return $self;
264 # __END__ C4::OAI::GetRecord
268 package C4::OAI::ListIdentifiers;
270 use strict;
271 use warnings;
272 use diagnostics;
273 use HTTP::OAI;
275 use base ("HTTP::OAI::ListIdentifiers");
278 sub new {
279 my ($class, $repository, %args) = @_;
281 my $self = HTTP::OAI::ListIdentifiers->new(%args);
283 my $token = new C4::OAI::ResumptionToken( %args );
284 my $dbh = C4::Context->dbh;
285 my $sql = "SELECT biblionumber, timestamp
286 FROM biblioitems
287 WHERE timestamp >= ? AND timestamp <= ?
288 LIMIT " . $repository->{koha_max_count} . "
289 OFFSET " . $token->{offset};
290 my $sth = $dbh->prepare( $sql );
291 $sth->execute( $token->{from}, $token->{until} );
293 my $pos = $token->{offset};
294 while ( my ($biblionumber, $timestamp) = $sth->fetchrow ) {
295 $timestamp =~ s/ /T/, $timestamp .= 'Z';
296 $self->identifier( new HTTP::OAI::Header(
297 identifier => $repository->{ koha_identifier} . ':' . $biblionumber,
298 datestamp => $timestamp,
299 ) );
300 $pos++;
302 $self->resumptionToken( new C4::OAI::ResumptionToken(
303 metadataPrefix => $token->{metadata_prefix},
304 from => $token->{from},
305 until => $token->{until},
306 offset => $pos ) ) if ($pos > $token->{offset});
308 return $self;
311 # __END__ C4::OAI::ListIdentifiers
315 package C4::OAI::ListRecords;
317 use strict;
318 use warnings;
319 use diagnostics;
320 use HTTP::OAI;
322 use base ("HTTP::OAI::ListRecords");
325 sub new {
326 my ($class, $repository, %args) = @_;
328 my $self = HTTP::OAI::ListRecords->new(%args);
330 my $token = new C4::OAI::ResumptionToken( %args );
331 my $dbh = C4::Context->dbh;
332 my $sql = "SELECT biblionumber, marcxml, timestamp
333 FROM biblioitems
334 WHERE timestamp >= ? AND timestamp <= ?
335 LIMIT " . $repository->{koha_max_count} . "
336 OFFSET " . $token->{offset};
337 my $sth = $dbh->prepare( $sql );
338 $sth->execute( $token->{from}, $token->{until} );
340 my $pos = $token->{offset};
341 while ( my ($biblionumber, $marcxml, $timestamp) = $sth->fetchrow ) {
342 $self->record( C4::OAI::Record->new(
343 $repository, $marcxml, $timestamp,
344 identifier => $repository->{ koha_identifier } . ':' . $biblionumber,
345 metadataPrefix => $token->{metadata_prefix}
346 ) );
347 $pos++;
349 $self->resumptionToken( new C4::OAI::ResumptionToken(
350 metadataPrefix => $token->{metadata_prefix},
351 from => $token->{from},
352 until => $token->{until},
353 offset => $pos ) ) if ($pos > $token->{offset});
355 return $self;
358 # __END__ C4::OAI::ListRecords
362 package C4::OAI::Repository;
364 use base ("HTTP::OAI::Repository");
366 use strict;
367 use warnings;
368 use diagnostics;
370 use HTTP::OAI;
371 use HTTP::OAI::Repository qw/:validate/;
373 use XML::SAX::Writer;
374 use XML::LibXML;
375 use XML::LibXSLT;
376 use YAML::Syck qw( LoadFile );
377 use CGI qw/:standard -oldstyle_urls/;
379 use C4::Context;
380 use C4::Biblio;
383 sub new {
384 my ($class, %args) = @_;
385 my $self = $class->SUPER::new(%args);
387 $self->{ koha_identifier } = C4::Context->preference("OAI-PMH:archiveID");
388 $self->{ koha_max_count } = C4::Context->preference("OAI-PMH:MaxCount");
389 $self->{ koha_metadata_format } = ['oai_dc', 'marcxml'];
390 $self->{ koha_stylesheet } = { }; # Build when needed
392 # Load configuration file if defined in OAI-PMH:ConfFile syspref
393 if ( my $file = C4::Context->preference("OAI-PMH:ConfFile") ) {
394 $self->{ conf } = LoadFile( $file );
395 my @formats = keys %{ $self->{conf}->{format} };
396 $self->{ koha_metadata_format } = \@formats;
399 # Check for grammatical errors in the request
400 my @errs = validate_request( CGI::Vars() );
402 # Is metadataPrefix supported by the respository?
403 my $mdp = param('metadataPrefix') || '';
404 if ( $mdp && !grep { $_ eq $mdp } @{$self->{ koha_metadata_format }} ) {
405 push @errs, new HTTP::OAI::Error(
406 code => 'cannotDisseminateFormat',
407 message => "Dissemination as '$mdp' is not supported",
411 my $response;
412 if ( @errs ) {
413 $response = HTTP::OAI::Response->new(
414 requestURL => self_url(),
415 errors => \@errs,
418 else {
419 my %attr = CGI::Vars();
420 my $verb = delete( $attr{verb} );
421 if ( grep { $_ eq $verb } qw( ListSets ) ) {
422 $response = HTTP::OAI::Response->new(
423 requestURL => $self->self_url(),
424 errors => [ new HTTP::OAI::Error(
425 code => 'noSetHierarchy',
426 message => "Koha repository doesn't have sets",
427 ) ] ,
430 elsif ( $verb eq 'Identify' ) {
431 $response = C4::OAI::Identify->new( $self );
433 elsif ( $verb eq 'ListMetadataFormats' ) {
434 $response = C4::OAI::ListMetadataFormats->new( $self );
436 elsif ( $verb eq 'GetRecord' ) {
437 $response = C4::OAI::GetRecord->new( $self, %attr );
439 elsif ( $verb eq 'ListRecords' ) {
440 $response = C4::OAI::ListRecords->new( $self, %attr );
442 elsif ( $verb eq 'ListIdentifiers' ) {
443 $response = C4::OAI::ListIdentifiers->new( $self, %attr );
447 $response->set_handler( XML::SAX::Writer->new( Output => *STDOUT ) );
448 $response->generate;
450 bless $self, $class;
451 return $self;
455 sub stylesheet {
456 my ( $self, $format ) = @_;
458 my $stylesheet = $self->{ koha_stylesheet }->{ $format };
459 unless ( $stylesheet ) {
460 my $xsl_file = $self->{ conf }
461 ? $self->{ conf }->{ format }->{ $format }->{ xsl_file }
462 : ( C4::Context->config('intrahtdocs') .
463 '/prog/en/xslt/' .
464 C4::Context->preference('marcflavour') .
465 'slim2OAIDC.xsl' );
466 my $parser = XML::LibXML->new();
467 my $xslt = XML::LibXSLT->new();
468 my $style_doc = $parser->parse_file( $xsl_file );
469 $stylesheet = $xslt->parse_stylesheet( $style_doc );
470 $self->{ koha_stylesheet }->{ $format } = $stylesheet;
473 return $stylesheet;
478 =head1 NAME
480 C4::OAI::Repository - Handles OAI-PMH requests for a Koha database.
482 =head1 SYNOPSIS
484 use C4::OAI::Repository;
486 my $repository = C4::OAI::Repository->new();
488 =head1 DESCRIPTION
490 This object extend HTTP::OAI::Repository object.
491 It accepts OAI-PMH HTTP requests and returns result.
493 This OAI-PMH server can operate in a simple mode and extended one.
495 In simple mode, repository configuration comes entirely from Koha system
496 preferences (OAI-PMH:archiveID and OAI-PMH:MaxCount) and the server returns
497 records in marcxml or dublin core format. Dublin core records are created from
498 koha marcxml records tranformed with XSLT. Used XSL file is located in
499 koha-tmpl/intranet-tmpl/prog/en/xslt directory and choosed based on marcflavour,
500 respecively MARC21slim2OAIDC.xsl for MARC21 and MARC21slim2OAIDC.xsl for
501 UNIMARC.
503 In extende mode, it's possible to parameter other format than marcxml or Dublin
504 Core. A new syspref OAI-PMH:ConfFile specify a YAML configuration file which
505 list available metadata formats and XSL file used to create them from marcxml
506 records. If this syspref isn't set, Koha OAI server works in simple mode. A
507 configuration file koha-oai.conf can look like that:
510 format:
512 metadataPrefix: vs
513 metadataNamespace: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs
514 schema: http://veryspecial.tamil.fr/vs/format-pivot/1.1/vs.xsd
515 xsl_file: /usr/local/koha/xslt/vs.xsl
516 marcxml:
517 metadataPrefix: marxml
518 metadataNamespace: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim
519 schema: http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd
520 oai_dc:
521 metadataPrefix: oai_dc
522 metadataNamespace: http://www.openarchives.org/OAI/2.0/oai_dc/
523 schema: http://www.openarchives.org/OAI/2.0/oai_dc.xsd
524 xsl_file: /usr/local/koha/koha-tmpl/intranet-tmpl/xslt/UNIMARCslim2OAIDC.xsl
526 =cut