Bug 18887: Update database
[koha.git] / misc / stage_file.pl
blob1f11b6adac3e9316c3334364773c515dcb237588
1 #!/usr/bin/perl
3 # This file is part of Koha.
5 # Copyright (C) 2007 LibLime
6 # Parts Copyright BSZ 2011
7 # Parts Copyright C & P Bibliography Services 2012
9 # Koha is free software; you can redistribute it and/or modify it under the
10 # terms of the GNU General Public License as published by the Free Software
11 # Foundation; either version 2 of the License, or (at your option) any later
12 # version.
14 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
16 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License along
19 # with this program; if not, write to the Free Software Foundation, Inc.,
20 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 use strict;
23 use warnings;
24 BEGIN {
25 # find Koha's Perl modules
26 # test carefully before changing this
27 use FindBin;
28 eval { require "$FindBin::Bin/kohalib.pl" };
31 use C4::Context;
32 use C4::ImportBatch;
33 use C4::Matcher;
34 use Getopt::Long;
36 $| = 1;
38 # command-line parameters
39 my $record_type = "biblio";
40 my $encoding = "UTF-8";
41 my $authorities = 0;
42 my $match = 0;
43 my $add_items = 0;
44 my $input_file = "";
45 my $batch_comment = "";
46 my $want_help = 0;
47 my $no_replace;
48 my $format = 'ISO2709';
49 my $no_create;
50 my $item_action = 'always_add';
52 my $result = GetOptions(
53 'encoding:s' => \$encoding,
54 'file:s' => \$input_file,
55 'format:s' => \$format,
56 'match|match-bibs:s' => \$match,
57 'add-items' => \$add_items,
58 'item-action:s' => \$item_action,
59 'no-replace' => \$no_replace,
60 'no-create' => \$no_create,
61 'comment:s' => \$batch_comment,
62 'authorities' => \$authorities,
63 'h|help' => \$want_help
66 $record_type = 'auth' if ($authorities);
68 if (not $result or $input_file eq "" or $want_help) {
69 print_usage();
70 exit 0;
72 if ( $format !~ /^(MARCXML|ISO2709)$/i ) {
73 print "\n --format must be MARCXML or ISO2709\n";
74 print_usage();
75 exit 0;
78 unless (-r $input_file) {
79 die "$0: cannot open input file $input_file: $!\n";
82 my $dbh = C4::Context->dbh;
83 $dbh->{AutoCommit} = 0;
84 process_batch({
85 format => $format,
86 input_file => $input_file,
87 record_type => $record_type,
88 match => $match,
89 add_items => $add_items,
90 batch_comment => $batch_comment,
91 encoding => $encoding,
92 no_replace => $no_replace,
93 no_create => $no_create,
94 item_action => $item_action,
95 });
96 $dbh->commit();
98 exit 0;
100 sub process_batch {
101 my ( $params ) = @_; #Possible params are: format input_file record_type match add_items batch_comment encoding no_replace no_create item_action
102 my $format = $params->{format} // '';
103 my $record_type = $params->{record_type} // 'biblio';
105 my ( $errors, $marc_records );
106 if( $format eq 'ISO2709' ) {
107 ( $errors, $marc_records ) = C4::ImportBatch::RecordsFromISO2709File(
108 $params->{input_file}, $record_type, $params->{encoding} );
109 } elsif( $format eq 'MARCXML' ) {
110 ( $errors, $marc_records ) = C4::ImportBatch::RecordsFromMARCXMLFile(
111 $params->{input_file}, $params->{encoding} );
113 warn ( join ',', @$errors ) if @$errors;
114 my $num_input_records = ($marc_records) ? scalar(@$marc_records) : 0;
116 print "... staging MARC records -- please wait\n";
117 #FIXME: We should really allow the use of marc modification frameworks and to_marc plugins here if possible
118 my ($batch_id, $num_valid_records, $num_items, @import_errors) =
119 BatchStageMarcRecords($record_type, $params->{encoding}, $marc_records, $params->{input_file}, undef, $params->{batch_comment}, '', $params->{add_items}, 0,
120 100, \&print_progress_and_commit);
121 print "... finished staging MARC records\n";
123 my $num_with_matches = 0;
124 if ( $params->{match} ) {
125 my $matcher = C4::Matcher->fetch( $params->{match} );
126 if (defined $matcher) {
127 SetImportBatchMatcher( $batch_id, $params->{match} );
128 } elsif ($record_type eq 'biblio') {
129 $matcher = C4::Matcher->new($record_type);
130 $matcher->add_simple_matchpoint('isbn', 1000, '020', 'a', -1, 0, '');
131 $matcher->add_simple_required_check('245', 'a', -1, 0, '',
132 '245', 'a', -1, 0, '');
134 # set default record overlay behavior
135 SetImportBatchOverlayAction( $batch_id, $params->{no_replace} ? 'ignore' : 'replace' );
136 SetImportBatchNoMatchAction( $batch_id, $params->{no_create} ? 'ignore' : 'create_new' );
137 SetImportBatchItemAction( $batch_id, $params->{item_action} );
138 print "... looking for matches with records already in database\n";
139 $num_with_matches = BatchFindDuplicates($batch_id, $matcher, 10, 100, \&print_progress_and_commit);
140 print "... finished looking for matches\n";
143 my $num_invalid_records = scalar(@import_errors);
144 print <<_SUMMARY_;
146 MARC record staging report
147 ------------------------------------
148 Input file: $params->{input_file}
149 Record type: $record_type
150 Number of input records: $num_input_records
151 Number of valid records: $num_valid_records
152 Number of invalid records: $num_invalid_records
153 _SUMMARY_
154 if( $params->{match} ) {
155 print "Number of records matched: $num_with_matches\n";
156 } else {
157 print "Incoming records not matched against existing records (--match option not supplied)\n";
159 if ($record_type eq 'biblio') {
160 if ( $params->{add_items} ) {
161 print "Number of items parsed: $num_items\n";
162 } else {
163 print "No items parsed (--add-items option not supplied)\n";
167 print "\n";
168 print "Batch number assigned: $batch_id\n";
169 print "\n";
172 sub print_progress_and_commit {
173 my $recs = shift;
174 $dbh->commit();
175 print "... processed $recs records\n";
178 sub print_usage {
179 print <<_USAGE_;
180 $0: stage MARC file into reservoir.
182 Use this batch job to load a file of MARC bibliographic
183 (with optional item information) or authority records into
184 the Koha reservoir.
186 After running this program to stage your file, you can use
187 either the batch job commit_file.pl or the Koha
188 Tools option "Manage Staged MARC Records" to load the
189 records into the main Koha database.
191 Parameters:
192 --file <file_name> name of input MARC bib file
193 --authorities stage authority records instead of bibs
194 --encoding <encoding> encoding of MARC records, default is UTF-8.
195 Other possible options are: MARC-8,
196 ISO_5426, ISO_6937, ISO_8859-1, EUC-KR
197 --format The MARC transport format to use?
198 Defaults to ISO2709.
199 Available values, MARCXML, ISO2709.
200 --match <match_id> use this option to match records
201 in the file with records already in
202 the database for future overlay.
203 If <match_id> isn't defined, a default
204 MARC21 ISBN & title match rule will be applied
205 for bib imports.
206 --add-items use this option to specify that
207 item data is embedded in the MARC
208 bibs and should be parsed.
209 --item-action action to take if --add-items is specifed;
210 choices are 'always_add',
211 'add_only_for_matches', 'add_only_for_new',
212 'ignore', or 'replace'
213 --no-replace overlay action for record: default is to
214 replace extant with the imported record.
215 --no-create nomatch action for record: default is to
216 create new record with imported record.
217 --comment <comment> optional comment to describe
218 the record batch; if the comment
219 has spaces in it, surround the
220 comment with quotation marks.
221 --help or -h show this message.
222 _USAGE_