Bug 11559: (followup) Fix import bugs, display/parsing issues
[koha.git] / tools / export.pl
bloba6742484dba8a98902fc6ae2c1f71d4df067583e
1 #!/usr/bin/perl
4 # This file is part of Koha.
6 # Koha is free software; you can redistribute it and/or modify it
7 # under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 3 of the License, or
9 # (at your option) any later version.
11 # Koha is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with Koha; if not, see <http://www.gnu.org/licenses>.
19 use Modern::Perl;
20 use MARC::File::XML;
21 use List::MoreUtils qw(uniq);
22 use Getopt::Long;
23 use CGI qw ( -utf8 );
24 use C4::Auth;
25 use C4::AuthoritiesMarc; # GetAuthority
26 use C4::Biblio; # GetMarcBiblio
27 use C4::Branch; # GetBranches
28 use C4::Csv;
29 use C4::Koha; # GetItemTypes
30 use C4::Output;
31 use C4::Record;
32 use Koha::DateUtils;
34 my $query = new CGI;
36 my $clean;
37 my $dont_export_items;
38 my $deleted_barcodes;
39 my $timestamp;
40 my $record_type;
41 my $id_list_file;
42 my $help;
43 my $op = $query->param("op") || '';
44 my $filename = $query->param("filename") || 'koha.mrc';
45 my $dbh = C4::Context->dbh;
46 my $marcflavour = C4::Context->preference("marcflavour");
47 my $output_format = $query->param("format") || $query->param("output_format") || 'iso2709';
49 # Checks if the script is called from commandline
50 my $commandline = not defined $ENV{GATEWAY_INTERFACE};
53 # @biblionumbers is only use for csv export from circulation.pl
54 my @biblionumbers = uniq $query->param("biblionumbers");
56 if ( $commandline ) {
58 # Getting parameters
59 $op = 'export';
60 GetOptions(
61 'format=s' => \$output_format,
62 'date=s' => \$timestamp,
63 'dont_export_items' => \$dont_export_items,
64 'deleted_barcodes' => \$deleted_barcodes,
65 'clean' => \$clean,
66 'filename=s' => \$filename,
67 'record-type=s' => \$record_type,
68 'id_list_file=s' => \$id_list_file,
69 'help|?' => \$help
72 if ($help) {
73 print <<_USAGE_;
74 export.pl [--format=format] [--date=date] [--record-type=TYPE] [--dont_export_items] [--deleted_barcodes] [--clean] [--id_list_file=PATH] --filename=outputfile
77 --format=FORMAT FORMAT is either 'xml' or 'marc' (default)
79 --date=DATE DATE should be entered as the 'dateformat' syspref is
80 set (dd/mm/yyyy for metric, yyyy-mm-dd for iso,
81 mm/dd/yyyy for us) records exported are the ones that
82 have been modified since DATE
84 --record-type=TYPE TYPE is 'bibs' or 'auths'
86 --deleted_barcodes If used, a list of barcodes of items deleted since DATE
87 is produced (or from all deleted items if no date is
88 specified). Used only if TYPE is 'bibs'
90 --clean removes NSE/NSB
92 --id_list_file=PATH PATH is a path to a file containing a list of
93 IDs (biblionumber or authid) with one ID per line.
94 This list works as a filter; it is compatible with
95 other parameters for selecting records
96 _USAGE_
97 exit;
100 # Default parameters values :
101 $timestamp ||= '';
102 $dont_export_items ||= 0;
103 $deleted_barcodes ||= 0;
104 $clean ||= 0;
105 $record_type ||= "bibs";
106 $id_list_file ||= 0;
108 # Redirect stdout
109 open STDOUT, '>', $filename if $filename;
112 else {
114 $op = $query->param("op") || '';
115 $filename = $query->param("filename") || 'koha.mrc';
116 $filename =~ s/(\r|\n)//;
120 # Default value for output_format is 'iso2709'
121 $output_format ||= 'iso2709';
122 # Retrocompatibility for the format parameter
123 $output_format = 'iso2709' if $output_format eq 'marc';
125 my ( $template, $loggedinuser, $cookie, $flags ) = get_template_and_user(
127 template_name => "tools/export.tt",
128 query => $query,
129 type => "intranet",
130 authnotrequired => $commandline,
131 flagsrequired => { tools => 'export_catalog' },
132 debug => 1,
136 my $limit_ind_branch =
137 ( C4::Context->preference('IndependentBranches')
138 && C4::Context->userenv
139 && !C4::Context->IsSuperLibrarian()
140 && C4::Context->userenv->{branch} ) ? 1 : 0;
142 my @branch = $query->param("branch");
143 if ( C4::Context->preference("IndependentBranches")
144 && C4::Context->userenv
145 && !C4::Context->IsSuperLibrarian() )
147 @branch = ( C4::Context->userenv->{'branch'} );
149 # if stripping nonlocal items, use loggedinuser's branch
150 my $localbranch = C4::Context->userenv ? C4::Context->userenv->{'branch'} : undef;
152 my %branchmap = map { $_ => 1 } @branch; # for quick lookups
154 my $backupdir = C4::Context->config('backupdir');
156 if ( $op eq "export" ) {
157 if (
158 $output_format eq "iso2709"
159 or $output_format eq "xml"
160 or (
161 $output_format eq 'csv'
162 and not @biblionumbers
165 my $charset = 'utf-8';
166 my $mimetype = 'application/octet-stream';
168 binmode STDOUT, ':encoding(UTF-8)'
169 if $filename =~ m/\.gz$/
170 or $filename =~ m/\.bz2$/
171 or $output_format ne 'csv';
173 if ( $filename =~ m/\.gz$/ ) {
174 $mimetype = 'application/x-gzip';
175 $charset = '';
176 binmode STDOUT;
178 elsif ( $filename =~ m/\.bz2$/ ) {
179 $mimetype = 'application/x-bzip2';
180 binmode STDOUT;
181 $charset = '';
183 print $query->header(
184 -type => $mimetype,
185 -charset => $charset,
186 -attachment => $filename,
187 ) unless ($commandline);
189 $record_type = $query->param("record_type") unless ($commandline);
190 my $export_remove_fields = $query->param("export_remove_fields");
191 my @biblionumbers = $query->param("biblionumbers");
192 my @itemnumbers = $query->param("itemnumbers");
193 my @sql_params;
194 my $sql_query;
195 my @recordids;
197 my $StartingBiblionumber = $query->param("StartingBiblionumber");
198 my $EndingBiblionumber = $query->param("EndingBiblionumber");
199 my $itemtype = $query->param("itemtype");
200 my $start_callnumber = $query->param("start_callnumber");
201 my $end_callnumber = $query->param("end_callnumber");
202 if ( $commandline ) {
203 $timestamp = eval { output_pref( { dt => dt_from_string( $timestamp ), dateonly => 1 }); };
204 $timestamp = '' unless ( $timestamp );
207 my $start_accession =
208 ( $query->param("start_accession") )
209 ? eval { output_pref( { dt => dt_from_string( $query->param("start_accession") ), dateonly => 1, dateformat => 'iso' } ); }
210 : '';
211 my $end_accession =
212 ( $query->param("end_accession") )
213 ? eval { output_pref( { dt => dt_from_string( $query->param("end_accession") ), dateonly => 1, dateformat => 'iso' } ); }
214 : '';
215 $dont_export_items = $query->param("dont_export_item")
216 unless ($commandline);
218 my $strip_nonlocal_items = $query->param("strip_nonlocal_items");
220 my $biblioitemstable =
221 ( $commandline and $deleted_barcodes )
222 ? 'deletedbiblioitems'
223 : 'biblioitems';
224 my $itemstable =
225 ( $commandline and $deleted_barcodes )
226 ? 'deleteditems'
227 : 'items';
229 my $starting_authid = $query->param('starting_authid');
230 my $ending_authid = $query->param('ending_authid');
231 my $authtype = $query->param('authtype');
232 my $filefh;
233 if ($commandline) {
234 open $filefh,"<", $id_list_file or die "cannot open $id_list_file: $!" if $id_list_file;
235 } else {
236 $filefh = $query->upload("id_list_file");
238 my %id_filter;
239 if ($filefh) {
240 while (my $number=<$filefh>){
241 $number=~s/[\r\n]*$//;
242 $id_filter{$number}=1 if $number=~/^\d+$/;
246 if ( $record_type eq 'bibs' and not @biblionumbers ) {
247 if ($timestamp) {
249 # Specific query when timestamp is used
250 # Actually it's used only with CLI and so all previous filters
251 # are not used.
252 # If one day timestamp is used via the web interface, this part will
253 # certainly have to be rewrited
254 my ( $query, $params ) = construct_query(
256 recordtype => $record_type,
257 timestamp => $timestamp,
258 biblioitemstable => $biblioitemstable,
261 $sql_query = $query;
262 @sql_params = @$params;
265 else {
266 my ( $query, $params ) = construct_query(
268 recordtype => $record_type,
269 biblioitemstable => $biblioitemstable,
270 itemstable => $itemstable,
271 StartingBiblionumber => $StartingBiblionumber,
272 EndingBiblionumber => $EndingBiblionumber,
273 branch => \@branch,
274 start_callnumber => $start_callnumber,
275 end_callnumber => $end_callnumber,
276 start_accession => $start_accession,
277 end_accession => $end_accession,
278 itemtype => $itemtype,
281 $sql_query = $query;
282 @sql_params = @$params;
285 elsif ( $record_type eq 'auths' ) {
286 my ( $query, $params ) = construct_query(
288 recordtype => $record_type,
289 starting_authid => $starting_authid,
290 ending_authid => $ending_authid,
291 authtype => $authtype,
294 $sql_query = $query;
295 @sql_params = @$params;
298 elsif ( $record_type eq 'db' ) {
299 my $successful_export;
300 if ( $flags->{superlibrarian}
301 && C4::Context->config('backup_db_via_tools') )
303 $successful_export = download_backup(
305 directory => "$backupdir",
306 extension => 'sql',
307 filename => "$filename"
311 unless ($successful_export) {
312 my $remotehost = $query->remote_host();
313 $remotehost =~ s/(\n|\r)//;
314 warn
315 "A suspicious attempt was made to download the db at '$filename' by someone at "
316 . $remotehost . "\n";
318 exit;
320 elsif ( $record_type eq 'conf' ) {
321 my $successful_export;
322 if ( $flags->{superlibrarian}
323 && C4::Context->config('backup_conf_via_tools') )
325 $successful_export = download_backup(
327 directory => "$backupdir",
328 extension => 'tar',
329 filename => "$filename"
333 unless ($successful_export) {
334 my $remotehost = $query->remote_host();
335 $remotehost =~ s/(\n|\r)//;
336 warn
337 "A suspicious attempt was made to download the configuration at '$filename' by someone at "
338 . $remotehost . "\n";
340 exit;
342 elsif (@biblionumbers) {
343 push @recordids, (@biblionumbers);
345 else {
347 # Someone is trying to mess us up
348 exit;
350 unless (@biblionumbers) {
351 my $sth = $dbh->prepare($sql_query);
352 $sth->execute(@sql_params);
353 push @recordids, map {
354 map { $$_[0] } $_
355 } @{ $sth->fetchall_arrayref };
356 @recordids = grep { exists($id_filter{$_}) } @recordids if scalar(%id_filter);
359 my $xml_header_written = 0;
360 for my $recordid ( uniq @recordids ) {
361 if ($deleted_barcodes) {
362 my $q = "
363 SELECT DISTINCT barcode
364 FROM deleteditems
365 WHERE deleteditems.biblionumber = ?
367 my $sth = $dbh->prepare($q);
368 $sth->execute($recordid);
369 while ( my $row = $sth->fetchrow_array ) {
370 print "$row\n";
373 else {
374 my $record;
375 if ( $record_type eq 'bibs' ) {
376 $record = eval { GetMarcBiblio($recordid); };
378 next if $@;
379 next if not defined $record;
380 C4::Biblio::EmbedItemsInMarcBiblio( $record, $recordid,
381 \@itemnumbers )
382 unless $dont_export_items;
383 if ( $strip_nonlocal_items
384 || $limit_ind_branch
385 || $dont_export_items )
387 my ( $homebranchfield, $homebranchsubfield ) =
388 GetMarcFromKohaField( 'items.homebranch', '' );
389 for my $itemfield ( $record->field($homebranchfield) ) {
390 $record->delete_field($itemfield)
391 if ( $dont_export_items
392 || $localbranch ne $itemfield->subfield(
393 $homebranchsubfield) );
397 elsif ( $record_type eq 'auths' ) {
398 $record = C4::AuthoritiesMarc::GetAuthority($recordid);
399 next if not defined $record;
402 if ($export_remove_fields) {
403 for my $f ( split / /, $export_remove_fields ) {
404 if ( $f =~ m/^(\d{3})(.)?$/ ) {
405 my ( $field, $subfield ) = ( $1, $2 );
407 # skip if this record doesn't have this field
408 if ( defined $record->field($field) ) {
409 if ( defined $subfield ) {
410 my @tags = $record->field($field);
411 foreach my $t (@tags) {
412 $t->delete_subfields($subfield);
415 else {
416 $record->delete_fields($record->field($field));
422 RemoveAllNsb($record) if ($clean);
423 if ( $output_format eq "xml" ) {
424 unless ($xml_header_written) {
425 MARC::File::XML->default_record_format(
427 $marcflavour eq 'UNIMARC'
428 && $record_type eq 'auths'
429 ) ? 'UNIMARCAUTH' : $marcflavour
431 print MARC::File::XML::header();
432 print "\n";
433 $xml_header_written = 1;
435 print MARC::File::XML::record($record);
436 print "\n";
438 elsif ( $output_format eq 'iso2709' ) {
439 my $errorcount_on_decode = eval { scalar(MARC::File::USMARC->decode( $record->as_usmarc )->warnings()) };
440 if ($errorcount_on_decode or $@){
441 warn $@ if $@;
442 warn "record (number $recordid) is invalid and therefore not exported because its reopening generates warnings above";
443 next;
445 print $record->as_usmarc();
449 if ($xml_header_written) {
450 print MARC::File::XML::footer();
451 print "\n";
453 if ( $output_format eq 'csv' ) {
454 my $csv_profile_id = $query->param('csv_profile')
455 || GetCsvProfileId( C4::Context->preference('ExportWithCsvProfile') );
456 my $output =
457 marc2csv( \@recordids,
458 $csv_profile_id );
460 print $output;
463 exit;
465 elsif ( $output_format eq "csv" ) {
466 my @biblionumbers = uniq $query->param("biblionumbers");
467 my @itemnumbers = $query->param("itemnumbers");
468 my $csv_profile_id = $query->param('csv_profile') || GetCsvProfileId( C4::Context->preference('ExportWithCsvProfile') );
469 my $output =
470 marc2csv( \@biblionumbers,
471 $csv_profile_id,
472 \@itemnumbers, );
473 print $query->header(
474 -type => 'application/octet-stream',
475 -'Content-Transfer-Encoding' => 'binary',
476 -attachment => "export.csv"
478 print $output;
479 exit;
481 } # if export
483 else {
485 my $itemtypes = GetItemTypes;
486 my @itemtypesloop;
487 foreach my $thisitemtype ( sort keys %$itemtypes ) {
488 my %row = (
489 value => $thisitemtype,
490 description => $itemtypes->{$thisitemtype}->{'description'},
492 push @itemtypesloop, \%row;
494 my $branches = GetBranches($limit_ind_branch);
495 my @branchloop;
496 for my $thisbranch (
497 sort { $branches->{$a}->{branchname} cmp $branches->{$b}->{branchname} }
498 keys %{$branches}
501 push @branchloop,
503 value => $thisbranch,
504 selected => %branchmap ? $branchmap{$thisbranch} : 1,
505 branchname => $branches->{$thisbranch}->{'branchname'},
509 my $authtypes = getauthtypes;
510 my @authtypesloop;
511 foreach my $thisauthtype ( sort keys %$authtypes ) {
512 next unless $thisauthtype;
513 my %row = (
514 value => $thisauthtype,
515 description => $authtypes->{$thisauthtype}->{'authtypetext'},
517 push @authtypesloop, \%row;
520 if ( $flags->{superlibrarian}
521 && C4::Context->config('backup_db_via_tools')
522 && $backupdir
523 && -d $backupdir )
525 $template->{VARS}->{'allow_db_export'} = 1;
526 $template->{VARS}->{'dbfiles'} = getbackupfilelist(
527 { directory => "$backupdir", extension => 'sql' } );
530 if ( $flags->{superlibrarian}
531 && C4::Context->config('backup_conf_via_tools')
532 && $backupdir
533 && -d $backupdir )
535 $template->{VARS}->{'allow_conf_export'} = 1;
536 $template->{VARS}->{'conffiles'} = getbackupfilelist(
537 { directory => "$backupdir", extension => 'tar' } );
540 $template->param(
541 branchloop => \@branchloop,
542 itemtypeloop => \@itemtypesloop,
543 authtypeloop => \@authtypesloop,
544 export_remove_fields => C4::Context->preference("ExportRemoveFields"),
545 csv_profiles => C4::Csv::GetCsvProfiles('marc'),
548 output_html_with_http_headers $query, $cookie, $template->output;
551 sub construct_query {
552 my ($params) = @_;
554 my ( $sql_query, @sql_params );
556 if ( $params->{recordtype} eq "bibs" ) {
557 if ( $params->{timestamp} ) {
558 my $biblioitemstable = $params->{biblioitemstable};
559 $sql_query = " (
560 SELECT biblionumber
561 FROM $biblioitemstable
562 LEFT JOIN items USING(biblionumber)
563 WHERE $biblioitemstable.timestamp >= ?
564 OR items.timestamp >= ?
565 ) UNION (
566 SELECT biblionumber
567 FROM $biblioitemstable
568 LEFT JOIN deleteditems USING(biblionumber)
569 WHERE $biblioitemstable.timestamp >= ?
570 OR deleteditems.timestamp >= ?
571 ) ";
572 my $ts = eval { output_pref( { dt => dt_from_string( $timestamp ), dateonly => 1, dateformat => 'iso' }); };
573 @sql_params = ( $ts, $ts, $ts, $ts );
575 else {
576 my $biblioitemstable = $params->{biblioitemstable};
577 my $itemstable = $params->{itemstable};
578 my $StartingBiblionumber = $params->{StartingBiblionumber};
579 my $EndingBiblionumber = $params->{EndingBiblionumber};
580 my @branch = @{ $params->{branch} };
581 my $start_callnumber = $params->{start_callnumber};
582 my $end_callnumber = $params->{end_callnumber};
583 my $start_accession = $params->{start_accession};
584 my $end_accession = $params->{end_accession};
585 my $itemtype = $params->{itemtype};
586 my $items_filter =
587 @branch
588 || $start_callnumber
589 || $end_callnumber
590 || $start_accession
591 || $end_accession
592 || ( $itemtype && C4::Context->preference('item-level_itypes') );
593 $sql_query = $items_filter
594 ? "SELECT DISTINCT $biblioitemstable.biblionumber
595 FROM $biblioitemstable JOIN $itemstable
596 USING (biblionumber) WHERE 1"
597 : "SELECT $biblioitemstable.biblionumber FROM $biblioitemstable WHERE biblionumber >0 ";
599 if ($StartingBiblionumber) {
600 $sql_query .= " AND $biblioitemstable.biblionumber >= ? ";
601 push @sql_params, $StartingBiblionumber;
604 if ($EndingBiblionumber) {
605 $sql_query .= " AND $biblioitemstable.biblionumber <= ? ";
606 push @sql_params, $EndingBiblionumber;
609 if (@branch) {
610 $sql_query .= " AND homebranch IN (".join(',',map({'?'} @branch)).")";
611 push @sql_params, @branch;
614 if ($start_callnumber) {
615 $sql_query .= " AND itemcallnumber >= ? ";
616 push @sql_params, $start_callnumber;
619 if ($end_callnumber) {
620 $sql_query .= " AND itemcallnumber <= ? ";
621 push @sql_params, $end_callnumber;
623 if ($start_accession) {
624 $sql_query .= " AND dateaccessioned >= ? ";
625 push @sql_params, $start_accession;
628 if ($end_accession) {
629 $sql_query .= " AND dateaccessioned <= ? ";
630 push @sql_params, $end_accession;
633 if ($itemtype) {
634 $sql_query .=
635 ( C4::Context->preference('item-level_itypes') )
636 ? " AND items.itype = ? "
637 : " AND biblioitems.itemtype = ?";
638 push @sql_params, $itemtype;
642 elsif ( $params->{recordtype} eq "auths" ) {
643 if ( $params->{timestamp} ) {
645 #TODO
647 else {
648 my $starting_authid = $params->{starting_authid};
649 my $ending_authid = $params->{ending_authid};
650 my $authtype = $params->{authtype};
651 $sql_query =
652 "SELECT DISTINCT auth_header.authid FROM auth_header WHERE 1";
654 if ($starting_authid) {
655 $sql_query .= " AND auth_header.authid >= ? ";
656 push @sql_params, $starting_authid;
659 if ($ending_authid) {
660 $sql_query .= " AND auth_header.authid <= ? ";
661 push @sql_params, $ending_authid;
664 if ($authtype) {
665 $sql_query .= " AND auth_header.authtypecode = ? ";
666 push @sql_params, $authtype;
670 return ( $sql_query, \@sql_params );
673 sub getbackupfilelist {
674 my $args = shift;
675 my $directory = $args->{directory};
676 my $extension = $args->{extension};
677 my @files;
679 if ( opendir( my $dir, $directory ) ) {
680 while ( my $file = readdir($dir) ) {
681 next unless ( $file =~ m/\.$extension(\.(gz|bz2|xz))?/ );
682 push @files, $file
683 if ( -f "$directory/$file" && -r "$directory/$file" );
685 closedir($dir);
687 return \@files;
690 sub download_backup {
691 my $args = shift;
692 my $directory = $args->{directory};
693 my $extension = $args->{extension};
694 my $filename = $args->{filename};
696 return unless ( $directory && -d $directory );
697 return unless ( $filename =~ m/\.$extension(\.(gz|bz2|xz))?$/ );
698 return if ( $filename =~ m#/# );
699 $filename = "$directory/$filename";
700 return unless ( -f $filename && -r $filename );
701 return unless ( open( my $dump, '<', $filename ) );
702 binmode $dump;
704 while ( read( $dump, my $data, 64 * 1024 ) ) {
705 print $data;
707 close($dump);
708 return 1;