Bug 10996: Allow numeric subfields to be stripped on export
[koha.git] / tools / export.pl
bloba516a6bbbfb53aa85d453a6988ff8b0713d67d00
1 #!/usr/bin/perl
4 # This file is part of Koha.
6 # Koha is free software; you can redistribute it and/or modify it under the
7 # terms of the GNU General Public License as published by the Free Software
8 # Foundation; either version 2 of the License, or (at your option) any later
9 # version.
11 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
12 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License along with
16 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
17 # Suite 330, Boston, MA 02111-1307 USA
19 use Modern::Perl;
20 use MARC::File::XML;
21 use List::MoreUtils qw(uniq);
22 use Getopt::Long;
23 use CGI;
24 use C4::Auth;
25 use C4::AuthoritiesMarc; # GetAuthority
26 use C4::Biblio; # GetMarcBiblio
27 use C4::Branch; # GetBranches
28 use C4::Csv;
29 use C4::Koha; # GetItemTypes
30 use C4::Output;
31 use C4::Record;
33 my $query = new CGI;
35 my $clean;
36 my $output_format;
37 my $dont_export_items;
38 my $deleted_barcodes;
39 my $timestamp;
40 my $record_type;
41 my $help;
42 my $op = $query->param("op") || '';
43 my $filename = $query->param("filename") || 'koha.mrc';
44 my $dbh = C4::Context->dbh;
45 my $marcflavour = C4::Context->preference("marcflavour");
46 my $format = $query->param("format") || 'iso2709';
48 # Checks if the script is called from commandline
49 my $commandline = not defined $ENV{GATEWAY_INTERFACE};
51 if ( $commandline ) {
53 # Getting parameters
54 $op = 'export';
55 GetOptions(
56 'format=s' => \$output_format,
57 'date=s' => \$timestamp,
58 'dont_export_items' => \$dont_export_items,
59 'deleted_barcodes' => \$deleted_barcodes,
60 'clean' => \$clean,
61 'filename=s' => \$filename,
62 'record-type=s' => \$record_type,
63 'help|?' => \$help
66 if ($help) {
67 print <<_USAGE_;
68 export.pl [--format=format] [--date=date] [--record-type=TYPE] [--dont_export_items] [--deleted_barcodes] [--clean] --filename=outputfile
71 --format=FORMAT FORMAT is either 'xml' or 'marc' (default)
73 --date=DATE DATE should be entered as the 'dateformat' syspref is
74 set (dd/mm/yyyy for metric, yyyy-mm-dd for iso,
75 mm/dd/yyyy for us) records exported are the ones that
76 have been modified since DATE
78 --record-type=TYPE TYPE is 'bibs' or 'auths'
80 --deleted_barcodes If used, a list of barcodes of items deleted since DATE
81 is produced (or from all deleted items if no date is
82 specified). Used only if TYPE is 'bibs'
84 --clean removes NSE/NSB
85 _USAGE_
86 exit;
89 # Default parameters values :
90 $output_format ||= 'marc';
91 $timestamp ||= '';
92 $dont_export_items ||= 0;
93 $deleted_barcodes ||= 0;
94 $clean ||= 0;
95 $record_type ||= "bibs";
97 # Redirect stdout
98 open STDOUT, '>', $filename if $filename;
101 else {
103 $op = $query->param("op") || '';
104 $filename = $query->param("filename") || 'koha.mrc';
105 $filename =~ s/(\r|\n)//;
109 my ( $template, $loggedinuser, $cookie, $flags ) = get_template_and_user(
111 template_name => "tools/export.tmpl",
112 query => $query,
113 type => "intranet",
114 authnotrequired => $commandline,
115 flagsrequired => { tools => 'export_catalog' },
116 debug => 1,
120 my $limit_ind_branch =
121 ( C4::Context->preference('IndependentBranches')
122 && C4::Context->userenv
123 && !( C4::Context->userenv->{flags} & 1 )
124 && C4::Context->userenv->{branch} ) ? 1 : 0;
126 my $branch = $query->param("branch") || '';
127 if ( C4::Context->preference("IndependentBranches")
128 && C4::Context->userenv
129 && !( C4::Context->userenv->{flags} & 1 ) )
131 $branch = C4::Context->userenv->{'branch'};
134 my $backupdir = C4::Context->config('backupdir');
136 if ( $op eq "export" ) {
137 if ( $format eq "iso2709" or $format eq "xml" ) {
138 my $charset = 'utf-8';
139 my $mimetype = 'application/octet-stream';
140 binmode STDOUT, ':encoding(UTF-8)';
141 if ( $filename =~ m/\.gz$/ ) {
142 $mimetype = 'application/x-gzip';
143 $charset = '';
144 binmode STDOUT;
146 elsif ( $filename =~ m/\.bz2$/ ) {
147 $mimetype = 'application/x-bzip2';
148 binmode STDOUT;
149 $charset = '';
151 print $query->header(
152 -type => $mimetype,
153 -charset => $charset,
154 -attachment => $filename
155 ) unless ($commandline);
157 $record_type = $query->param("record_type") unless ($commandline);
158 $output_format = $query->param("output_format") || 'marc'
159 unless ($commandline);
160 my $export_remove_fields = $query->param("export_remove_fields");
161 my @biblionumbers = $query->param("biblionumbers");
162 my @itemnumbers = $query->param("itemnumbers");
163 my @sql_params;
164 my $sql_query;
165 my @recordids;
167 my $StartingBiblionumber = $query->param("StartingBiblionumber");
168 my $EndingBiblionumber = $query->param("EndingBiblionumber");
169 my $itemtype = $query->param("itemtype");
170 my $start_callnumber = $query->param("start_callnumber");
171 my $end_callnumber = $query->param("end_callnumber");
172 $timestamp = ($timestamp) ? C4::Dates->new($timestamp) : ''
173 if ($commandline);
174 my $start_accession =
175 ( $query->param("start_accession") )
176 ? C4::Dates->new( $query->param("start_accession") )
177 : '';
178 my $end_accession =
179 ( $query->param("end_accession") )
180 ? C4::Dates->new( $query->param("end_accession") )
181 : '';
182 $dont_export_items = $query->param("dont_export_item")
183 unless ($commandline);
185 my $strip_nonlocal_items = $query->param("strip_nonlocal_items");
187 my $biblioitemstable =
188 ( $commandline and $deleted_barcodes )
189 ? 'deletedbiblioitems'
190 : 'biblioitems';
191 my $itemstable =
192 ( $commandline and $deleted_barcodes )
193 ? 'deleteditems'
194 : 'items';
196 my $starting_authid = $query->param('starting_authid');
197 my $ending_authid = $query->param('ending_authid');
198 my $authtype = $query->param('authtype');
200 if ( $record_type eq 'bibs' and not @biblionumbers ) {
201 if ($timestamp) {
203 # Specific query when timestamp is used
204 # Actually it's used only with CLI and so all previous filters
205 # are not used.
206 # If one day timestamp is used via the web interface, this part will
207 # certainly have to be rewrited
208 my ( $query, $params ) = construct_query(
210 recordtype => $record_type,
211 timestamp => $timestamp,
212 biblioitemstable => $biblioitemstable,
215 $sql_query = $query;
216 @sql_params = @$params;
219 else {
220 my ( $query, $params ) = construct_query(
222 recordtype => $record_type,
223 biblioitemstable => $biblioitemstable,
224 itemstable => $itemstable,
225 StartingBiblionumber => $StartingBiblionumber,
226 EndingBiblionumber => $EndingBiblionumber,
227 branch => $branch,
228 start_callnumber => $start_callnumber,
229 end_callnumber => $end_callnumber,
230 start_accession => $start_accession,
231 end_accession => $end_accession,
232 itemtype => $itemtype,
235 $sql_query = $query;
236 @sql_params = @$params;
239 elsif ( $record_type eq 'auths' ) {
240 my ( $query, $params ) = construct_query(
242 recordtype => $record_type,
243 starting_authid => $starting_authid,
244 ending_authid => $ending_authid,
245 authtype => $authtype,
248 $sql_query = $query;
249 @sql_params = @$params;
252 elsif ( $record_type eq 'db' ) {
253 my $successful_export;
254 if ( $flags->{superlibrarian}
255 && C4::Context->config('backup_db_via_tools') )
257 $successful_export = download_backup(
259 directory => "$backupdir",
260 extension => 'sql',
261 filename => "$filename"
265 unless ($successful_export) {
266 my $remotehost = $query->remote_host();
267 $remotehost =~ s/(\n|\r)//;
268 warn
269 "A suspicious attempt was made to download the db at '$filename' by someone at "
270 . $remotehost . "\n";
272 exit;
274 elsif ( $record_type eq 'conf' ) {
275 my $successful_export;
276 if ( $flags->{superlibrarian}
277 && C4::Context->config('backup_conf_via_tools') )
279 $successful_export = download_backup(
281 directory => "$backupdir",
282 extension => 'tar',
283 filename => "$filename"
287 unless ($successful_export) {
288 my $remotehost = $query->remote_host();
289 $remotehost =~ s/(\n|\r)//;
290 warn
291 "A suspicious attempt was made to download the configuration at '$filename' by someone at "
292 . $remotehost . "\n";
294 exit;
296 elsif (@biblionumbers) {
297 push @recordids, (@biblionumbers);
299 else {
301 # Someone is trying to mess us up
302 exit;
305 unless (@biblionumbers) {
306 my $sth = $dbh->prepare($sql_query);
307 $sth->execute(@sql_params);
308 push @recordids, map {
309 map { $$_[0] } $_
310 } @{ $sth->fetchall_arrayref };
313 my $xml_header_written = 0;
314 for my $recordid ( uniq @recordids ) {
315 if ($deleted_barcodes) {
316 my $q = "
317 SELECT DISTINCT barcode
318 FROM deleteditems
319 WHERE deleteditems.biblionumber = ?
321 my $sth = $dbh->prepare($q);
322 $sth->execute($recordid);
323 while ( my $row = $sth->fetchrow_array ) {
324 print "$row\n";
327 else {
328 my $record;
329 if ( $record_type eq 'bibs' ) {
330 $record = eval { GetMarcBiblio($recordid); };
332 next if $@;
333 next if not defined $record;
334 C4::Biblio::EmbedItemsInMarcBiblio( $record, $recordid,
335 \@itemnumbers )
336 unless $dont_export_items;
337 if ( $strip_nonlocal_items
338 || $limit_ind_branch
339 || $dont_export_items )
341 my ( $homebranchfield, $homebranchsubfield ) =
342 GetMarcFromKohaField( 'items.homebranch', '' );
343 for my $itemfield ( $record->field($homebranchfield) ) {
345 # if stripping nonlocal items, use loggedinuser's branch if they didn't select one
346 $branch = C4::Context->userenv->{'branch'}
347 unless $branch;
348 $record->delete_field($itemfield)
349 if ( $dont_export_items
350 || $itemfield->subfield($homebranchsubfield) ne
351 $branch );
355 elsif ( $record_type eq 'auths' ) {
356 $record = C4::AuthoritiesMarc::GetAuthority($recordid);
357 next if not defined $record;
360 if ($export_remove_fields) {
361 for my $f ( split / /, $export_remove_fields ) {
362 if ( $f =~ m/^(\d{3})(.)?$/ ) {
363 my ( $field, $subfield ) = ( $1, $2 );
365 # skip if this record doesn't have this field
366 if ( defined $record->field($field) ) {
367 if ( defined $subfield ) {
368 my @tags = $record->field($field);
369 foreach my $t (@tags) {
370 $t->delete_subfields($subfield);
373 else {
374 $record->delete_fields($field);
380 RemoveAllNsb($record) if ($clean);
381 if ( $output_format eq "xml" ) {
382 unless ($xml_header_written) {
383 MARC::File::XML->default_record_format(
385 $marcflavour eq 'UNIMARC'
386 && $record_type eq 'auths'
387 ) ? 'UNIMARCAUTH' : $marcflavour
389 print MARC::File::XML::header();
390 print "\n";
391 $xml_header_written = 1;
393 print MARC::File::XML::record($record);
394 print "\n";
396 else {
397 print $record->as_usmarc();
401 if ($xml_header_written) {
402 print MARC::File::XML::footer();
403 print "\n";
406 exit;
408 elsif ( $format eq "csv" ) {
409 my @biblionumbers = uniq $query->param("biblionumbers");
410 my @itemnumbers = $query->param("itemnumbers");
411 my $output =
412 marc2csv( \@biblionumbers,
413 GetCsvProfileId( C4::Context->preference('ExportWithCsvProfile') ),
414 \@itemnumbers, );
415 print $query->header(
416 -type => 'application/octet-stream',
417 -'Content-Transfer-Encoding' => 'binary',
418 -attachment => "export.csv"
420 print $output;
421 exit;
423 } # if export
425 else {
427 my $itemtypes = GetItemTypes;
428 my @itemtypesloop;
429 foreach my $thisitemtype ( sort keys %$itemtypes ) {
430 my %row = (
431 value => $thisitemtype,
432 description => $itemtypes->{$thisitemtype}->{'description'},
434 push @itemtypesloop, \%row;
436 my $branches = GetBranches($limit_ind_branch);
437 my @branchloop;
438 for my $thisbranch (
439 sort { $branches->{$a}->{branchname} cmp $branches->{$b}->{branchname} }
440 keys %{$branches}
443 push @branchloop,
445 value => $thisbranch,
446 selected => $thisbranch eq $branch,
447 branchname => $branches->{$thisbranch}->{'branchname'},
451 my $authtypes = getauthtypes;
452 my @authtypesloop;
453 foreach my $thisauthtype ( sort keys %$authtypes ) {
454 next unless $thisauthtype;
455 my %row = (
456 value => $thisauthtype,
457 description => $authtypes->{$thisauthtype}->{'authtypetext'},
459 push @authtypesloop, \%row;
462 if ( $flags->{superlibrarian}
463 && C4::Context->config('backup_db_via_tools')
464 && $backupdir
465 && -d $backupdir )
467 $template->{VARS}->{'allow_db_export'} = 1;
468 $template->{VARS}->{'dbfiles'} = getbackupfilelist(
469 { directory => "$backupdir", extension => 'sql' } );
472 if ( $flags->{superlibrarian}
473 && C4::Context->config('backup_conf_via_tools')
474 && $backupdir
475 && -d $backupdir )
477 $template->{VARS}->{'allow_conf_export'} = 1;
478 $template->{VARS}->{'conffiles'} = getbackupfilelist(
479 { directory => "$backupdir", extension => 'tar' } );
482 $template->param(
483 branchloop => \@branchloop,
484 itemtypeloop => \@itemtypesloop,
485 authtypeloop => \@authtypesloop,
486 export_remove_fields => C4::Context->preference("ExportRemoveFields"),
489 output_html_with_http_headers $query, $cookie, $template->output;
492 sub construct_query {
493 my ($params) = @_;
495 my ( $sql_query, @sql_params );
497 if ( $params->{recordtype} eq "bibs" ) {
498 if ( $params->{timestamp} ) {
499 my $biblioitemstable = $params->{biblioitemstable};
500 $sql_query = " (
501 SELECT biblionumber
502 FROM $biblioitemstable
503 LEFT JOIN items USING(biblionumber)
504 WHERE $biblioitemstable.timestamp >= ?
505 OR items.timestamp >= ?
506 ) UNION (
507 SELECT biblionumber
508 FROM $biblioitemstable
509 LEFT JOIN deleteditems USING(biblionumber)
510 WHERE $biblioitemstable.timestamp >= ?
511 OR deleteditems.timestamp >= ?
512 ) ";
513 my $ts = $timestamp->output('iso');
514 @sql_params = ( $ts, $ts, $ts, $ts );
516 else {
517 my $biblioitemstable = $params->{biblioitemstable};
518 my $itemstable = $params->{itemstable};
519 my $StartingBiblionumber = $params->{StartingBiblionumber};
520 my $EndingBiblionumber = $params->{EndingBiblionumber};
521 my $branch = $params->{branch};
522 my $start_callnumber = $params->{start_callnumber};
523 my $end_callnumber = $params->{end_callnumber};
524 my $start_accession = $params->{start_accession};
525 my $end_accession = $params->{end_accession};
526 my $itemtype = $params->{itemtype};
527 my $items_filter =
528 $branch
529 || $start_callnumber
530 || $end_callnumber
531 || $start_accession
532 || $end_accession
533 || ( $itemtype && C4::Context->preference('item-level_itypes') );
534 $sql_query = $items_filter
535 ? "SELECT DISTINCT $biblioitemstable.biblionumber
536 FROM $biblioitemstable JOIN $itemstable
537 USING (biblionumber) WHERE 1"
538 : "SELECT $biblioitemstable.biblionumber FROM $biblioitemstable WHERE biblionumber >0 ";
540 if ($StartingBiblionumber) {
541 $sql_query .= " AND $biblioitemstable.biblionumber >= ? ";
542 push @sql_params, $StartingBiblionumber;
545 if ($EndingBiblionumber) {
546 $sql_query .= " AND $biblioitemstable.biblionumber <= ? ";
547 push @sql_params, $EndingBiblionumber;
550 if ($branch) {
551 $sql_query .= " AND homebranch = ? ";
552 push @sql_params, $branch;
555 if ($start_callnumber) {
556 $sql_query .= " AND itemcallnumber >= ? ";
557 push @sql_params, $start_callnumber;
560 if ($end_callnumber) {
561 $sql_query .= " AND itemcallnumber <= ? ";
562 push @sql_params, $end_callnumber;
564 if ($start_accession) {
565 $sql_query .= " AND dateaccessioned >= ? ";
566 push @sql_params, $start_accession->output('iso');
569 if ($end_accession) {
570 $sql_query .= " AND dateaccessioned <= ? ";
571 push @sql_params, $end_accession->output('iso');
574 if ($itemtype) {
575 $sql_query .=
576 ( C4::Context->preference('item-level_itypes') )
577 ? " AND items.itype = ? "
578 : " AND biblioitems.itemtype = ?";
579 push @sql_params, $itemtype;
583 elsif ( $params->{recordtype} eq "auths" ) {
584 if ( $params->{timestamp} ) {
586 #TODO
588 else {
589 my $starting_authid = $params->{starting_authid};
590 my $ending_authid = $params->{ending_authid};
591 my $authtype = $params->{authtype};
592 $sql_query =
593 "SELECT DISTINCT auth_header.authid FROM auth_header WHERE 1";
595 if ($starting_authid) {
596 $sql_query .= " AND auth_header.authid >= ? ";
597 push @sql_params, $starting_authid;
600 if ($ending_authid) {
601 $sql_query .= " AND auth_header.authid <= ? ";
602 push @sql_params, $ending_authid;
605 if ($authtype) {
606 $sql_query .= " AND auth_header.authtypecode = ? ";
607 push @sql_params, $authtype;
611 return ( $sql_query, \@sql_params );
614 sub getbackupfilelist {
615 my $args = shift;
616 my $directory = $args->{directory};
617 my $extension = $args->{extension};
618 my @files;
620 if ( opendir( my $dir, $directory ) ) {
621 while ( my $file = readdir($dir) ) {
622 next unless ( $file =~ m/\.$extension(\.(gz|bz2|xz))?/ );
623 push @files, $file
624 if ( -f "$directory/$file" && -r "$directory/$file" );
626 closedir($dir);
628 return \@files;
631 sub download_backup {
632 my $args = shift;
633 my $directory = $args->{directory};
634 my $extension = $args->{extension};
635 my $filename = $args->{filename};
637 return unless ( $directory && -d $directory );
638 return unless ( $filename =~ m/\.$extension(\.(gz|bz2|xz))?$/ );
639 return if ( $filename =~ m#/# );
640 $filename = "$directory/$filename";
641 return unless ( -f $filename && -r $filename );
642 return unless ( open( my $dump, '<', $filename ) );
643 binmode $dump;
645 while ( read( $dump, my $data, 64 * 1024 ) ) {
646 print $data;
648 close($dump);
649 return 1;