Bug 18928: (follow-up) Make DB update idempotent
[koha.git] / misc / migration_tools / build_oai_sets.pl
bloba19cc4c025e712b53132a6d5b3a316382528cdb9
1 #!/usr/bin/perl
3 # Copyright 2011 BibLibre
5 # This file is part of Koha.
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 =head1 DESCRIPTION
22 This script build OAI-PMH sets (to be used by opac/oai.pl) according to sets
23 and mappings defined in Koha. It reads informations from oai_sets and
24 oai_sets_mappings, and then fill table oai_sets_biblios with builded infos.
26 =head1 USAGE
28 build_oai_sets.pl [-h] [-v] [-r] [-i] [-l LENGTH [-o OFFSET]]
29 -h Print help message;
30 -v Be verbose
31 -r Truncate table oai_sets_biblios before inserting new rows
32 -i Embed items informations, mandatory if you defined mappings
33 on item fields
34 -l LENGTH Process LENGTH biblios
35 -o OFFSET If LENGTH is defined, start processing from OFFSET
37 =cut
39 use Modern::Perl;
40 use MARC::Record;
41 use MARC::File::XML;
42 use List::MoreUtils qw/uniq/;
43 use Getopt::Std;
45 use Koha::Script;
46 use C4::Context;
47 use C4::Charset qw/StripNonXmlChars/;
48 use C4::Biblio;
49 use C4::OAI::Sets;
51 my %opts;
52 $Getopt::Std::STANDARD_HELP_VERSION = 1;
53 my $go = getopts('vo:l:ihr', \%opts);
55 if(!$go or $opts{h}){
56 &print_usage;
57 exit;
60 my $verbose = $opts{v};
61 my $offset = $opts{o};
62 my $length = $opts{l};
63 my $embed_items = $opts{i};
64 my $reset = $opts{r};
66 my $dbh = C4::Context->dbh;
68 # Get OAI sets mappings
69 my $mappings = GetOAISetsMappings;
71 # Get all biblionumbers and marcxml
72 print "Retrieving biblios... " if $verbose;
73 my $query = qq{
74 SELECT biblionumber, metadata
75 FROM biblio_metadata
76 WHERE format='marcxml'
77 AND `schema` = ?
79 if($length) {
80 $query .= "LIMIT $length";
81 if($offset) {
82 $query .= " OFFSET $offset";
85 my $sth = $dbh->prepare($query);
86 $sth->execute( C4::Context->preference('marcflavour') );
87 my $results = $sth->fetchall_arrayref({});
88 print "done.\n" if $verbose;
90 # Build lists of parents sets
91 my $sets = GetOAISets;
92 my $parentsets;
93 foreach my $set (@$sets) {
94 my $setSpec = $set->{'spec'};
95 while($setSpec =~ /^(.+):(.+)$/) {
96 my $parent = $1;
97 my $parent_set = GetOAISetBySpec($parent);
98 if($parent_set) {
99 push @{ $parentsets->{$set->{'id'}} }, $parent_set->{'id'};
100 $setSpec = $parent;
101 } else {
102 last;
107 my $num_biblios = scalar @$results;
108 my $i = 1;
109 my $sets_biblios = {};
110 foreach my $res (@$results) {
111 my $biblionumber = $res->{'biblionumber'};
112 my $marcxml = $res->{'metadata'};
113 if($verbose and $i % 1000 == 0) {
114 my $percent = ($i * 100) / $num_biblios;
115 $percent = sprintf("%.2f", $percent);
116 say "Progression: $i/$num_biblios ($percent %)";
118 # The following lines are copied from GetMarcBiblio
119 # We don't call GetMarcBiblio to avoid a sql query to be executed each time
120 $marcxml = StripNonXmlChars($marcxml);
121 MARC::File::XML->default_record_format(C4::Context->preference('marcflavour'));
122 my $record;
123 eval {
124 $record = MARC::Record::new_from_xml($marcxml, "utf8", C4::Context->preference('marcflavour'));
126 if($@) {
127 warn "(biblio $biblionumber) Error while creating record from marcxml: $@";
128 next;
130 if($embed_items) {
131 C4::Biblio::EmbedItemsInMarcBiblio({
132 marc_record => $record,
133 biblionumber => $biblionumber });
136 my @biblio_sets = CalcOAISetsBiblio($record, $mappings);
137 foreach my $set_id (@biblio_sets) {
138 push @{ $sets_biblios->{$set_id} }, $biblionumber;
139 foreach my $parent_set_id ( @{ $parentsets->{$set_id} } ) {
140 push @{ $sets_biblios->{$parent_set_id} }, $biblionumber;
143 $i++;
145 say "Progression: done." if $verbose;
147 say "Summary:";
148 foreach my $set_id (keys %$sets_biblios) {
149 $sets_biblios->{$set_id} = [ uniq @{ $sets_biblios->{$set_id} } ];
150 my $set = GetOAISet($set_id);
151 my $setSpec = $set->{'spec'};
152 say "Set '$setSpec': ". scalar(@{$sets_biblios->{$set_id}}) ." biblios";
155 print "Updating database... ";
156 if($reset) {
157 ModOAISetsBiblios( {} );
159 AddOAISetsBiblios($sets_biblios);
160 print "done.\n";
162 sub print_usage {
163 print "build_oai_sets.pl: Build OAI-PMH sets, according to mappings defined in Koha\n";
164 print "Usage: build_oai_sets.pl [-h] [-v] [-i] [-l LENGTH [-o OFFSET]]\n\n";
165 print "\t-h\t\tPrint this help and exit\n";
166 print "\t-v\t\tBe verbose\n";
167 print "\t-i\t\tEmbed items informations, mandatory if you defined mappings on item fields\n";
168 print "\t-l LENGTH\tProcess LENGTH biblios\n";
169 print "\t-o OFFSET\tIf LENGTH is defined, start processing from OFFSET\n\n";