Bug 16573: fix linkid value on marc_subfield_structure
[koha.git] / misc / stage_file.pl
blob5fc70b72f7dd620155dc4af45a59481697db15ac
1 #!/usr/bin/perl
3 # This file is part of Koha.
5 # Copyright (C) 2007 LibLime
6 # Parts Copyright BSZ 2011
7 # Parts Copyright C & P Bibliography Services 2012
9 # Koha is free software; you can redistribute it and/or modify it under the
10 # terms of the GNU General Public License as published by the Free Software
11 # Foundation; either version 2 of the License, or (at your option) any later
12 # version.
14 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
16 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License along
19 # with this program; if not, write to the Free Software Foundation, Inc.,
20 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 use strict;
23 use warnings;
24 BEGIN {
25 # find Koha's Perl modules
26 # test carefully before changing this
27 use FindBin;
28 eval { require "$FindBin::Bin/kohalib.pl" };
31 use C4::Context;
32 use C4::ImportBatch;
33 use C4::Matcher;
34 use Getopt::Long;
36 $| = 1;
38 # command-line parameters
39 my $record_type = "biblio";
40 my $encoding = "";
41 my $authorities = 0;
42 my $match = 0;
43 my $add_items = 0;
44 my $input_file = "";
45 my $batch_comment = "";
46 my $want_help = 0;
47 my $no_replace ;
48 my $no_create;
49 my $item_action = 'always_add';
51 my $result = GetOptions(
52 'encoding:s' => \$encoding,
53 'file:s' => \$input_file,
54 'match|match-bibs:s' => \$match,
55 'add-items' => \$add_items,
56 'item-action:s' => \$item_action,
57 'no-replace' => \$no_replace,
58 'no-create' => \$no_create,
59 'comment:s' => \$batch_comment,
60 'authorities' => \$authorities,
61 'h|help' => \$want_help
64 $record_type = 'auth' if ($authorities);
66 if ($encoding eq "") {
67 $encoding = "utf8";
70 if (not $result or $input_file eq "" or $want_help) {
71 print_usage();
72 exit 0;
75 unless (-r $input_file) {
76 die "$0: cannot open input file $input_file: $!\n";
79 my $dbh = C4::Context->dbh;
80 $dbh->{AutoCommit} = 0;
81 process_batch($input_file, $record_type, $match, $add_items, $batch_comment);
82 $dbh->commit();
84 exit 0;
86 sub process_batch {
87 my ($input_file, $record_type, $match, $add_items, $batch_comment) = @_;
89 open IN, "<$input_file" or die "$0: cannot open input file $input_file: $!\n";
90 my $marc_records = "";
91 $/ = "\035";
92 my $num_input_records = 0;
93 while (<IN>) {
94 s/^\s+//;
95 s/\s+$//;
96 next unless $_; # skip if record has only whitespace, as might occur
97 # if file includes newlines between each MARC record
98 $marc_records .= $_; # FIXME - this sort of string concatenation
99 # is probably rather inefficient
100 $num_input_records++;
102 close IN;
104 print "... staging MARC records -- please wait\n";
105 #FIXME: We should really allow the use of marc modification frameworks and to_marc plugins here if possible
106 my ($batch_id, $num_valid_records, $num_items, @import_errors) =
107 BatchStageMarcRecords($record_type, $encoding, $marc_records, $input_file, undef, undef, $batch_comment, '', $add_items, 0,
108 100, \&print_progress_and_commit);
109 print "... finished staging MARC records\n";
111 my $num_with_matches = 0;
112 if ($match) {
113 my $matcher = C4::Matcher->fetch($match) ;
114 if (defined $matcher) {
115 SetImportBatchMatcher($batch_id, $match);
116 } elsif ($record_type eq 'biblio') {
117 $matcher = C4::Matcher->new($record_type);
118 $matcher->add_simple_matchpoint('isbn', 1000, '020', 'a', -1, 0, '');
119 $matcher->add_simple_required_check('245', 'a', -1, 0, '',
120 '245', 'a', -1, 0, '');
122 # set default record overlay behavior
123 SetImportBatchOverlayAction($batch_id, ($no_replace) ? 'ignore' : 'replace');
124 SetImportBatchNoMatchAction($batch_id, ($no_create) ? 'ignore' : 'create_new');
125 SetImportBatchItemAction($batch_id, $item_action);
126 print "... looking for matches with records already in database\n";
127 $num_with_matches = BatchFindDuplicates($batch_id, $matcher, 10, 100, \&print_progress_and_commit);
128 print "... finished looking for matches\n";
131 my $num_invalid_records = scalar(@import_errors);
132 print <<_SUMMARY_;
134 MARC record staging report
135 ------------------------------------
136 Input file: $input_file
137 Record type: $record_type
138 Number of input records: $num_input_records
139 Number of valid records: $num_valid_records
140 Number of invalid records: $num_invalid_records
141 _SUMMARY_
142 if ($match) {
143 print "Number of records matched: $num_with_matches\n";
144 } else {
145 print "Incoming records not matched against existing records (--match option not supplied)\n";
147 if ($record_type eq 'biblio') {
148 if ($add_items) {
149 print "Number of items parsed: $num_items\n";
150 } else {
151 print "No items parsed (--add-items option not supplied)\n";
155 print "\n";
156 print "Batch number assigned: $batch_id\n";
157 print "\n";
160 sub print_progress_and_commit {
161 my $recs = shift;
162 $dbh->commit();
163 print "... processed $recs records\n";
166 sub print_usage {
167 print <<_USAGE_;
168 $0: stage MARC file into reservoir.
170 Use this batch job to load a file of MARC bibliographic
171 (with optional item information) or authority records into
172 the Koha reservoir.
174 After running this program to stage your file, you can use
175 either the batch job commit_file.pl or the Koha
176 Tools option "Manage Staged MARC Records" to load the
177 records into the main Koha database.
179 Parameters:
180 --file <file_name> name of input MARC bib file
181 --authorities stage authority records instead of bibs
182 --encoding <encoding> encoding of MARC records, default is utf8.
183 Other possible options are: MARC-8,
184 ISO_5426, ISO_6937, ISO_8859-1, EUC-KR
185 --match <match_id> use this option to match records
186 in the file with records already in
187 the database for future overlay.
188 If <match_id> isn't defined, a default
189 MARC21 ISBN & title match rule will be applied
190 for bib imports.
191 --add-items use this option to specify that
192 item data is embedded in the MARC
193 bibs and should be parsed.
194 --item-action action to take if --add-items is specifed;
195 choices are 'always_add',
196 'add_only_for_matches', 'add_only_for_new',
197 'ignore', or 'replace'
198 --no-replace overlay action for record: default is to
199 replace extant with the imported record.
200 --no-create nomatch action for record: default is to
201 create new record with imported record.
202 --comment <comment> optional comment to describe
203 the record batch; if the comment
204 has spaces in it, surround the
205 comment with quotation marks.
206 --help or -h show this message.
207 _USAGE_