1 # ---------------------------------------------------------------------
2 # OAI Data Provider template (OAI-PMH v2.0)
5 # ------------------+--------------------+-----------------------------
6 # Hussein Suleman | hussein@vt.edu | www.husseinsspace.com
7 # ------------------+--------------------+-+---------------------------
8 # Department of Computer Science | www.cs.vt.edu
9 # Digital Library Research Laboratory | www.dlib.vt.edu
10 # -----------------------------------------+-------------+-------------
11 # Virginia Polytechnic Institute and State University | www.vt.edu
12 # -------------------------------------------------------+-------------
18 =head1 OAI::DP OAI Data Provider
20 This module provide a full implementation of the OAI-PMH v2 protocol
21 specification (http://www.openarchives.org/OAI/openarchivesprotocol.html).
23 It is simple to use, to answer to OAI-PMH requests you must create a new OAI::DP
24 instance and call its run() method.
26 This new instance is an instance of a subclass of the OAI::DP class and the job
27 of this subclass is to manage data and to format answers according to the meta data
28 model used (see OAI::DC for an example).
30 Tipical OAI service looks like:
32 my $OAI = new A_OAI_SUBCLASS(some parameters);
52 xmlnsprefix
=> 'http://www.openarchives.org/OAI/2.0/',
53 protocolversion
=> '2.0',
54 repositoryName
=> 'NoName Repository',
55 adminEmail
=> 'someone@somewhere.org',
56 granularity
=> 'YYYY-MM-DD',
57 deletedRecord
=> 'no',
58 metadatanamespace
=> {
59 oai_dc
=> 'http://www.openarchives.org/OAI/2.0/oai_dc/',
62 oai_dc
=> 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd',
67 metadatarootparameters
=> {
70 utility
=> new C4
::OAI
::Utility
,
74 bless $self, $classname;
86 # output XML HTTP header
91 # calculate timezone automatically
92 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=gmtime (time);
94 my $datestring = sprintf ("%04d-%02d-%02dT%02d:%02d:%02d%s",
95 $year+1900, $mon+1, $mday, $hour, $min, $sec,
101 foreach my $error (@
{$self->{'error'}})
103 $errors .= "<error code=\"$error->[0]\">$error->[1]</error>\n";
104 if (($error->[0] eq 'badVerb') || ($error->[0] eq 'badArgument'))
110 # add verb container if no errors
111 my $verbcontainer = '';
112 if ($#{$self->{'error'}} == -1)
114 $verbcontainer = '<'.$self->{'verb'}.">\n";
117 # compute request element with its parameters included if necessary
118 my $request = '<request';
119 if ($fullrequest == 1)
121 foreach my $param ($self->{'cgi'}->param)
123 $request .= " $param=\"".$self->{'cgi'}->param ($param)."\"";
126 $request .= '>'.$self->{'cgi'}->{'baseURL'}.'</request>';
128 "Content-type: text/xml\n\n".
129 "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n".
130 "<OAI-PMH xmlns=\"$self->{'xmlnsprefix'}\" ".
131 "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ".
132 "xsi:schemaLocation=\"$self->{'xmlnsprefix'} ".
133 "$self->{'xmlnsprefix'}OAI-PMH.xsd\">\n\n".
134 "<responseDate>$datestring</responseDate>\n".
141 # output XML HTTP footer
146 # add verb container if no errors
147 my $verbcontainer = '';
148 if ($#{$self->{'error'}} == -1)
150 $verbcontainer = '</'.$self->{'verb'}.">\n";
158 # add an error to the running list of errors (if its not there already)
161 my ($self, $errorcode, $errorstring) = @_;
164 foreach my $error (@
{$self->{'error'}})
166 if (($error->[0] eq $errorcode) && ($error->[1] eq $errorstring))
172 push (@
{$self->{'error'}}, [ $errorcode, $errorstring ] );
177 # create an error and output response
180 my ($self, $errorcode, $errorstring) = @_;
182 $self->AddError ($errorcode, $errorstring);
183 $self->xmlheader.$self->xmlfooter;
187 # check for the validity of the date according to the OAI spec
190 my ($self, $date) = @_;
192 my ($year, $month, $day, $hour, $minute, $second);
194 if ($date =~ /^([0-9]{4})-([0-9]{2})-([0-9]{2})/)
201 if (($month <= 0) || ($month > 12))
206 if ((((($year % 4) == 0) && (($year % 100) != 0)) || (($year % 400) == 0))
208 { $daysinmonth = 29; }
209 elsif (($month == 4) || ($month == 6) || ($month == 9) || ($month == 11))
210 { $daysinmonth = 30; }
212 { $daysinmonth = 28; }
214 { $daysinmonth = 31; }
215 if (($day <= 0) || ($day > $daysinmonth))
221 if ($date =~ /^[0-9]{4}-[0-9]{2}-[0-9]{2}T([0-9]{2}):([0-9]{2}):([0-9]{2})Z$/)
225 if (($hour < 0) || ($hour > 23) || ($minute < 0) || ($minute > 59))
229 if (($second < 0) || ($second > 59))
232 elsif (length ($date) > 10)
239 # check that the granularity is ok
240 sub GranularityisValid
242 my ($self, $date1, $date2) = @_;
244 my $granularity = $self->{'granularity'};
246 if (($granularity ne 'YYYY-MM-DDThh:mm:ssZ') && (length ($date1) > 10))
252 if (($granularity ne 'YYYY-MM-DDThh:mm:ssZ') && (length ($date2) > 10))
256 if (length ($date1) != length ($date2))
266 # check for bad arguments
274 'ListMetadataFormats' => [],
275 'ListIdentifiers' => [ 'metadataPrefix' ],
276 'GetRecord' => [ 'identifier', 'metadataPrefix' ],
277 'ListRecords' => [ 'metadataPrefix' ]
282 'ListMetadataFormats' => [ 'identifier' ],
283 'ListIdentifiers' => [ 'set', 'from', 'until', 'resumptionToken' ],
285 'ListRecords' => [ 'set', 'from', 'until', 'resumptionToken' ]
288 # get parameter lists
289 my $verb = $self->{'cgi'}->param ('verb');
290 my @parmsrequired = @
{$required{$verb}};
291 my @parmsoptional = @
{$optional{$verb}};
292 my @parmsall = (@parmsrequired, @parmsoptional);
293 my @names = $self->{'cgi'}->param;
295 foreach my $name (@names)
297 $paramhash{$name} = 1;
300 # check for required parameters
301 foreach my $name (@parmsrequired)
303 if ((! exists $paramhash{$name}) &&
304 ((($verb ne 'ListIdentifiers') && ($verb ne 'ListRecords')) ||
305 (! exists $paramhash{'resumptionToken'})))
307 return $self->Error ('badArgument', "missing $name parameter");
311 # check for illegal parameters
312 foreach my $name (@names)
315 foreach my $name2 (@parmsall)
320 if (($found == 0) && ($name ne 'verb'))
322 return $self->Error ('badArgument', "$name is an illegal parameter");
326 # check for duplicate parameters
327 foreach my $name (@names)
329 my @values = $self->{'cgi'}->param ($name);
332 return $self->Error ('badArgument', "multiple values are not allowed for the $name parameter");
336 # check for resumptionToken exclusivity
337 if ((($verb eq 'ListIdentifiers') || ($verb eq 'ListRecords')) &&
338 (exists $paramhash{'resumptionToken'}) &&
341 return $self->Error ('badArgument', 'resumptionToken cannot be combined with other parameters');
348 # convert date/timestamp into seconds for comparisons
351 my ($self, $date, $from) = @_;
353 my ($month, $day, $hour, $minute, $second);
355 if ((defined $from) && ($from == 1))
357 ($month, $day, $hour, $minute, $second) = (1, 1, 0, 0, 0);
361 ($month, $day, $hour, $minute, $second) = (12, 31, 23, 59, 59);
364 if ($date =~ /([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z/)
366 return mktime
($6, $5, $4, $3, $2-1, $1-1900);
368 elsif ($date =~ /([0-9]{4})-([0-9]{2})-([0-9]{2})/)
370 return mktime
($second, $minute, $hour, $3, $2-1, $1-1900);
379 # check if the metadata format is valid
380 sub MetadataFormatisValid
382 my ($self, $metadataFormat) = @_;
385 foreach my $i (keys %{$self->{'metadatanamespace'}})
387 if ($metadataFormat eq $i)
398 # format the header for a record
401 my ($self, $identifier, $datestamp, $status, $setSpecs) = @_;
403 my $statusattribute = '';
404 if ((defined $status) && ($status eq 'deleted'))
406 $statusattribute = " status=\"deleted\"";
410 if (defined $setSpecs)
412 foreach my $setSpec (@
$setSpecs)
414 $setstring .= '<setSpec>'.$setSpec."</setSpec>\n";
418 "<header$statusattribute>\n".
419 "<identifier>$identifier</identifier>\n".
420 "<datestamp>$datestamp</datestamp>\n".
426 # format the record by encapsulating it in a "record" container
429 my ($self, $identifier, $datestamp, $status, $setSpecs, $metadata, $about) = @_;
431 my $header = $self->FormatHeader ($identifier, $datestamp, $status, $setSpecs);
437 if ((defined $metadata) && ($metadata ne ''))
439 $output .= "<metadata>\n$metadata</metadata>\n";
441 if ((defined $about) && ($about ne ''))
443 $output .= "<about>\n$about</about>\n";
446 $output."</record>\n";
450 # standard handler for Identify verb
455 my $identity = $self->Archive_Identify;
456 if (! exists $identity->{'repositoryName'})
458 $identity->{'repositoryName'} = $self->{'repositoryName'};
460 if (! exists $identity->{'adminEmail'})
462 $identity->{'adminEmail'} = $self->{'adminEmail'};
464 $identity->{'protocolVersion'} = $self->{'protocolversion'};
465 $identity->{'baseURL'} = $self->{'cgi'}->{'baseURL'};
466 if (! exists $identity->{'granularity'})
468 $identity->{'granularity'} = $self->{'granularity'};
470 if (! exists $identity->{'deletedRecord'})
472 $identity->{'deletedRecord'} = $self->{'deletedRecord'};
474 if (! exists $identity->{'earliestDatestamp'})
476 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=gmtime (0);
478 my $datestring = sprintf ("%04d-%02d-%02dT%02d:%02d:%02d%s",
479 $year+1900, $mon+1, $mday, $hour, $min, $sec,
481 $identity->{'earliestDatestamp'} = $datestring;
484 $identity->{'mdorder'} = [ qw
( repositoryName baseURL protocolVersion adminEmail earliestDatestamp deletedRecord granularity compression description
) ];
486 # add in description for toolkit
487 if (! exists $identity->{'description'})
489 $identity->{'description'} = [];
494 'xmlns' => 'http://oai.dlib.vt.edu/OAI/metadata/toolkit',
495 'xsi:schemaLocation' =>
496 'http://oai.dlib.vt.edu/OAI/metadata/toolkit '.
497 'http://oai.dlib.vt.edu/OAI/metadata/toolkit.xsd'
500 'title' => 'VTOAI Perl Data Provider',
502 'name' => 'Hussein Suleman',
503 'email' => 'hussein@vt.edu',
504 'institution' => 'Virginia Tech',
505 'mdorder' => [ qw
( name email institution
) ],
508 'URL' => 'http://www.dlib.vt.edu/projects/OAI/',
509 'mdorder' => [ qw
( title author version URL
) ]
513 push (@
{$identity->{'description'}}, $desc);
516 $self->{'utility'}->FormatXML ($identity).
521 # standard handler for ListMetadataFormats verb
522 sub ListMetadataFormats
526 my $identifier = $self->{'cgi'}->param ('identifier');
527 my $metadataNamespace = $self->{'metadatanamespace'};
528 my $metadataSchema = $self->{'metadataschema'};
530 my $lmf = $self->Archive_ListMetadataFormats ($identifier);
533 $metadataNamespace = $$lmf[0];
534 $metadataSchema = $$lmf[1];
537 my $buffer = $self->xmlheader;
538 if ($#{$self->{'error'}} == -1)
540 foreach my $i (keys %{$metadataNamespace})
542 $buffer .= "<metadataFormat>\n".
543 "<metadataPrefix>$i</metadataPrefix>\n".
544 "<schema>$metadataSchema->{$i}</schema>\n".
545 "<metadataNamespace>$metadataNamespace->{$i}</metadataNamespace>\n".
546 "</metadataFormat>\n";
549 $buffer.$self->xmlfooter;
553 # standard handler for ListSets verb
558 my $setlist = $self->Archive_ListSets;
560 if ($#$setlist == -1)
562 $self->AddError ('noSetHierarchy', 'The repository does not support sets');
565 my $buffer = $self->xmlheader;
566 if ($#{$self->{'error'}} == -1)
568 foreach my $item (@
$setlist)
570 $buffer .= "<set>\n".
571 " <setSpec>".$self->{'utility'}->lclean ($$item[0])."</setSpec>\n".
572 " <setName>".$self->{'utility'}->lclean ($$item[1])."</setName>\n";
573 if (defined $$item[2])
575 $buffer .= '<setDescription>'.$$item[2].'</setDescription>';
577 $buffer .= "</set>\n";
580 $buffer.$self->xmlfooter;
584 # standard handler for GetRecord verb
589 my $identifier = $self->{'cgi'}->param ('identifier');
590 my $metadataPrefix = $self->{'cgi'}->param ('metadataPrefix');
592 my $recref = $self->Archive_GetRecord ($identifier, $metadataPrefix);
596 $recbuffer = $self->Archive_FormatRecord ($recref, $metadataPrefix);
599 my $buffer = $self->xmlheader;
600 if ($#{$self->{'error'}} == -1)
602 $buffer .= $recbuffer;
604 $buffer.$self->xmlfooter;
608 # create extended resumptionToken
609 sub createResumptionToken
611 my ($self, $resumptionToken, $resumptionParameters) = @_;
614 if (defined $resumptionParameters)
616 foreach my $key (keys %{$resumptionParameters})
618 $attrs .= " $key=\"$resumptionParameters->{$key}\"";
622 if (($resumptionToken ne '') || ($attrs ne ''))
624 "<resumptionToken".$attrs.">$resumptionToken</resumptionToken>\n";
633 # standard handler for ListRecords verb
638 my ($set, $from, $until, $metadataPrefix);
639 my ($resumptionToken, $allrows, $resumptionParameters);
641 $resumptionToken = $self->{'cgi'}->param ('resumptionToken');
642 if ($resumptionToken eq '')
644 $set = $self->{'cgi'}->param ('set');
645 $from = $self->{'cgi'}->param ('from');
646 $until = $self->{'cgi'}->param ('until');
647 $metadataPrefix = $self->{'cgi'}->param ('metadataPrefix');
651 if (!($self->DateisValid ($from)))
652 { return $self->Error ('badArgument', 'illegal from parameter'); }
653 if (!($self->GranularityisValid ($from)))
654 { return $self->Error ('badArgument', 'illegal granularity for from parameter'); }
658 if (!($self->DateisValid ($until)))
659 { return $self->Error ('badArgument', 'illegal until parameter'); }
660 if (!($self->GranularityisValid ($until)))
661 { return $self->Error ('badArgument', 'illegal granularity for until parameter'); }
663 if (($from ne '') && ($until ne '') && (!($self->GranularityisValid ($from, $until))))
665 return $self->Error ('badArgument', 'mismatched granularities in from/until');
669 ($allrows, $resumptionToken, $metadataPrefix, $resumptionParameters) =
670 $self->Archive_ListRecords ($set, $from, $until, $metadataPrefix, $resumptionToken);
673 foreach my $recref (@
$allrows)
675 $recbuffer .= $self->Archive_FormatRecord ($recref, $metadataPrefix);
678 my $buffer = $self->xmlheader;
679 if ($#{$self->{'error'}} == -1)
681 $buffer .= $recbuffer.$self->createResumptionToken ($resumptionToken, $resumptionParameters);
683 $buffer.$self->xmlfooter;
687 # standard handler for ListIdentifiers verb
692 my ($set, $from, $until, $metadataPrefix);
693 my ($resumptionToken, $allrows, $resumptionParameters);
695 $resumptionToken = $self->{'cgi'}->param ('resumptionToken');
696 if ($resumptionToken eq '')
698 $set = $self->{'cgi'}->param ('set');
699 $from = $self->{'cgi'}->param ('from');
700 $until = $self->{'cgi'}->param ('until');
701 $metadataPrefix = $self->{'cgi'}->param ('metadataPrefix');
705 if (!($self->DateisValid ($from)))
706 { return $self->Error ('badArgument', 'illegal from parameter'); }
707 if (!($self->GranularityisValid ($from)))
708 { return $self->Error ('badArgument', 'illegal granularity for from parameter'); }
712 if (!($self->DateisValid ($until)))
713 { return $self->Error ('badArgument', 'illegal until parameter'); }
714 if (!($self->GranularityisValid ($until)))
715 { return $self->Error ('badArgument', 'illegal granularity for until parameter'); }
717 if (($from ne '') && ($until ne '') && (!($self->GranularityisValid ($from, $until))))
719 return $self->Error ('badArgument', 'mismatched granularities in from/until');
723 ($allrows, $resumptionToken, $metadataPrefix, $resumptionParameters) =
724 $self->Archive_ListIdentifiers ($set, $from, $until, $metadataPrefix, $resumptionToken);
727 foreach my $recref (@
$allrows)
729 $recbuffer .= $self->Archive_FormatHeader ($recref, $metadataPrefix);
732 my $buffer = $self->xmlheader;
733 if ($#{$self->{'error'}} == -1)
735 $buffer .= $recbuffer.$self->createResumptionToken ($resumptionToken, $resumptionParameters);
737 $buffer.$self->xmlfooter;
741 # stub routines to get actual data from archives
744 sub Archive_FormatRecord
746 my ($self, $recref, $metadataFormat) = @_;
748 $self->FormatRecord ('identifier',
752 $self->{'utility'}->FormatXML ({}),
753 $self->{'utility'}->FormatXML ({})
758 sub Archive_FormatHeader
760 my ($self, $recref, $metadataFormat) = @_;
762 $self->FormatHeader ('identifier',
786 sub Archive_ListMetadataFormats
788 my ($self, $identifier) = @_;
794 sub Archive_GetRecord
796 my ($self, $identifier, $metadataPrefix) = @_;
804 sub Archive_ListRecords
806 my ($self, $set, $from, $until, $metadataPrefix, $resumptionToken) = @_;
810 $resumptionToken = '';
812 ( \
@allrows, $resumptionToken, $metadataPrefix, {} );
816 sub Archive_ListIdentifiers
818 my ($self, $set, $from, $until, $metadataPrefix, $resumptionToken) = @_;
822 $resumptionToken = '';
824 ( \
@allrows, $resumptionToken, $metadataPrefix, {} );
828 # main loop to process parameters and call appropriate verb handler
833 if (! exists $self->{'cgi'})
836 ##$self->{'cgi'} = new Pure::EZCGI;
837 $self->{'cgi'} = new CGI
;
839 $self->{'verb'} = $self->{'cgi'}->param ('verb');
841 # check for illegal verb
842 if (($self->{'verb'} ne 'Identify') &&
843 ($self->{'verb'} ne 'ListMetadataFormats') &&
844 ($self->{'verb'} ne 'ListSets') &&
845 ($self->{'verb'} ne 'ListIdentifiers') &&
846 ($self->{'verb'} ne 'GetRecord') &&
847 ($self->{'verb'} ne 'ListRecords'))
849 print $self->Error ('badVerb', 'illegal OAI verb');
853 # check for illegal parameters
854 my $aiv = $self->ArgumentisValid;
861 # run appropriate handler procedure
862 if ($self->{'verb'} eq 'Identify')
863 { print $self->Identify; }
864 elsif ($self->{'verb'} eq 'ListMetadataFormats')
865 { print $self->ListMetadataFormats; }
866 elsif ($self->{'verb'} eq 'GetRecord')
867 { print $self->GetRecord; }
868 elsif ($self->{'verb'} eq 'ListSets')
869 { print $self->ListSets; }
870 elsif ($self->{'verb'} eq 'ListRecords')
871 { print $self->ListRecords; }
872 elsif ($self->{'verb'} eq 'ListIdentifiers')
873 { print $self->ListIdentifiers; }
885 # fixed ($identifier) error
886 # added status to FormatRecord
888 # added metadataPrefix to GetRecord hander
890 # converted to OAI2.0 alpha1
892 # converted to OAI2.0 alpha2
894 # converted to OAI2.0 alpha3
896 # converted to OAI2.0 beta1
898 # converted to OAI2.0 beta2
899 # added better argument handling
901 # polished for OAI2.0