Bug 11192: Fix lf and ff07-02 definition in ccl.properties
[koha.git] / misc / stage_file.pl
blob2e154667588e9f9874bc3a5222e0ed8b5dc8d8c2
1 #!/usr/bin/perl
3 # This file is part of Koha.
5 # Copyright (C) 2007 LibLime
6 # Parts Copyright BSZ 2011
7 # Parts Copyright C & P Bibliography Services 2012
9 # Koha is free software; you can redistribute it and/or modify it under the
10 # terms of the GNU General Public License as published by the Free Software
11 # Foundation; either version 2 of the License, or (at your option) any later
12 # version.
14 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
16 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License along
19 # with this program; if not, write to the Free Software Foundation, Inc.,
20 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 use strict;
23 use warnings;
24 BEGIN {
25 # find Koha's Perl modules
26 # test carefully before changing this
27 use FindBin;
28 eval { require "$FindBin::Bin/kohalib.pl" };
31 use C4::Context;
32 use C4::ImportBatch;
33 use C4::Matcher;
34 use Getopt::Long;
36 $| = 1;
38 # command-line parameters
39 my $record_type = "biblio";
40 my $encoding = "";
41 my $authorities = 0;
42 my $match = 0;
43 my $add_items = 0;
44 my $input_file = "";
45 my $batch_comment = "";
46 my $want_help = 0;
47 my $no_replace ;
48 my $item_action = 'always_add';
50 my $result = GetOptions(
51 'encoding:s' => \$encoding,
52 'file:s' => \$input_file,
53 'match|match-bibs:s' => \$match,
54 'add-items' => \$add_items,
55 'item-action:s' => \$item_action,
56 'no-replace' => \$no_replace,
57 'comment:s' => \$batch_comment,
58 'authorities' => \$authorities,
59 'h|help' => \$want_help
62 $record_type = 'auth' if ($authorities);
64 if ($encoding eq "") {
65 $encoding = "utf8";
68 if (not $result or $input_file eq "" or $want_help) {
69 print_usage();
70 exit 0;
73 unless (-r $input_file) {
74 die "$0: cannot open input file $input_file: $!\n";
77 my $dbh = C4::Context->dbh;
78 $dbh->{AutoCommit} = 0;
79 process_batch($input_file, $record_type, $match, $add_items, $batch_comment);
80 $dbh->commit();
82 exit 0;
84 sub process_batch {
85 my ($input_file, $record_type, $match, $add_items, $batch_comment) = @_;
87 open IN, "<$input_file" or die "$0: cannot open input file $input_file: $!\n";
88 my $marc_records = "";
89 $/ = "\035";
90 my $num_input_records = 0;
91 while (<IN>) {
92 s/^\s+//;
93 s/\s+$//;
94 next unless $_; # skip if record has only whitespace, as might occur
95 # if file includes newlines between each MARC record
96 $marc_records .= $_; # FIXME - this sort of string concatenation
97 # is probably rather inefficient
98 $num_input_records++;
100 close IN;
102 print "... staging MARC records -- please wait\n";
103 my ($batch_id, $num_valid_records, $num_items, @import_errors) =
104 BatchStageMarcRecords($record_type, $encoding, $marc_records, $input_file, undef, $batch_comment, '', $add_items, 0,
105 100, \&print_progress_and_commit);
106 print "... finished staging MARC records\n";
108 my $num_with_matches = 0;
109 if ($match) {
110 my $matcher = C4::Matcher->fetch($match) ;
111 if (defined $matcher) {
112 SetImportBatchMatcher($batch_id, $match);
113 } elsif ($record_type eq 'biblio') {
114 $matcher = C4::Matcher->new($record_type);
115 $matcher->add_simple_matchpoint('isbn', 1000, '020', 'a', -1, 0, '');
116 $matcher->add_simple_required_check('245', 'a', -1, 0, '',
117 '245', 'a', -1, 0, '');
119 # set default record overlay behavior
120 SetImportBatchOverlayAction($batch_id, ($no_replace) ? 'ignore' : 'replace');
121 SetImportBatchNoMatchAction($batch_id, 'create_new');
122 SetImportBatchItemAction($batch_id, $item_action);
123 print "... looking for matches with records already in database\n";
124 $num_with_matches = BatchFindDuplicates($batch_id, $matcher, 10, 100, \&print_progress_and_commit);
125 print "... finished looking for matches\n";
128 my $num_invalid_records = scalar(@import_errors);
129 print <<_SUMMARY_;
131 MARC record staging report
132 ------------------------------------
133 Input file: $input_file
134 Record type: $record_type
135 Number of input records: $num_input_records
136 Number of valid records: $num_valid_records
137 Number of invalid records: $num_invalid_records
138 _SUMMARY_
139 if ($match) {
140 print "Number of records matched: $num_with_matches\n";
141 } else {
142 print "Incoming records not matched against existing records (--match option not supplied)\n";
144 if ($record_type eq 'biblio') {
145 if ($add_items) {
146 print "Number of items parsed: $num_items\n";
147 } else {
148 print "No items parsed (--add-items option not supplied)\n";
152 print "\n";
153 print "Batch number assigned: $batch_id\n";
154 print "\n";
157 sub print_progress_and_commit {
158 my $recs = shift;
159 $dbh->commit();
160 print "... processed $recs records\n";
163 sub print_usage {
164 print <<_USAGE_;
165 $0: stage MARC file into reservoir.
167 Use this batch job to load a file of MARC bibliographic
168 (with optional item information) or authority records into
169 the Koha reservoir.
171 After running this program to stage your file, you can use
172 either the batch job commit_file.pl or the Koha
173 Tools option "Manage Staged MARC Records" to load the
174 records into the main Koha database.
176 Parameters:
177 --file <file_name> name of input MARC bib file
178 --authorities stage authority records instead of bibs
179 --encoding <encoding> encoding of MARC records, default is utf8.
180 Other possible options are: MARC-8,
181 ISO_5426, ISO_6937, ISO_8859-1, EUC-KR
182 --match <match_id> use this option to match records
183 in the file with records already in
184 the database for future overlay.
185 If <match_id> isn't defined, a default
186 MARC21 ISBN & title match rule will be applied
187 for bib imports.
188 --add-items use this option to specify that
189 item data is embedded in the MARC
190 bibs and should be parsed.
191 --item-action action to take if --add-items is specifed;
192 choices are 'always_add',
193 'add_only_for_matches', 'add_only_for_new',
194 'ignore', or 'replace'
195 --no-replace overlay action for record: default is to
196 replace extant with the imported record.
197 --comment <comment> optional comment to describe
198 the record batch; if the comment
199 has spaces in it, surround the
200 comment with quotation marks.
201 --help or -h show this message.
202 _USAGE_