Bug 13457 - Followup for CPL and S codes
[koha.git] / tools / export.pl
blobbae69410be6004893cc827eaf30f0e4ee57e89cf
1 #!/usr/bin/perl
4 # This file is part of Koha.
6 # Koha is free software; you can redistribute it and/or modify it under the
7 # terms of the GNU General Public License as published by the Free Software
8 # Foundation; either version 2 of the License, or (at your option) any later
9 # version.
11 # Koha is distributed in the hope that it will be useful, but WITHOUT ANY
12 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License along with
16 # Koha; if not, write to the Free Software Foundation, Inc., 59 Temple Place,
17 # Suite 330, Boston, MA 02111-1307 USA
19 use Modern::Perl;
20 use MARC::File::XML;
21 use List::MoreUtils qw(uniq);
22 use Getopt::Long;
23 use CGI;
24 use C4::Auth;
25 use C4::AuthoritiesMarc; # GetAuthority
26 use C4::Biblio; # GetMarcBiblio
27 use C4::Branch; # GetBranches
28 use C4::Csv;
29 use C4::Koha; # GetItemTypes
30 use C4::Output;
31 use C4::Record;
33 my $query = new CGI;
35 my $clean;
36 my $dont_export_items;
37 my $deleted_barcodes;
38 my $timestamp;
39 my $record_type;
40 my $id_list_file;
41 my $help;
42 my $op = $query->param("op") || '';
43 my $filename = $query->param("filename") || 'koha.mrc';
44 my $dbh = C4::Context->dbh;
45 my $marcflavour = C4::Context->preference("marcflavour");
46 my $output_format = $query->param("format") || $query->param("output_format") || 'iso2709';
48 # Checks if the script is called from commandline
49 my $commandline = not defined $ENV{GATEWAY_INTERFACE};
51 if ( $commandline ) {
53 # Getting parameters
54 $op = 'export';
55 GetOptions(
56 'format=s' => \$output_format,
57 'date=s' => \$timestamp,
58 'dont_export_items' => \$dont_export_items,
59 'deleted_barcodes' => \$deleted_barcodes,
60 'clean' => \$clean,
61 'filename=s' => \$filename,
62 'record-type=s' => \$record_type,
63 'id_list_file=s' => \$id_list_file,
64 'help|?' => \$help
67 if ($help) {
68 print <<_USAGE_;
69 export.pl [--format=format] [--date=date] [--record-type=TYPE] [--dont_export_items] [--deleted_barcodes] [--clean] [--id_list_file=PATH] --filename=outputfile
72 --format=FORMAT FORMAT is either 'xml' or 'marc' (default)
74 --date=DATE DATE should be entered as the 'dateformat' syspref is
75 set (dd/mm/yyyy for metric, yyyy-mm-dd for iso,
76 mm/dd/yyyy for us) records exported are the ones that
77 have been modified since DATE
79 --record-type=TYPE TYPE is 'bibs' or 'auths'
81 --deleted_barcodes If used, a list of barcodes of items deleted since DATE
82 is produced (or from all deleted items if no date is
83 specified). Used only if TYPE is 'bibs'
85 --clean removes NSE/NSB
87 --id_list_file=PATH PATH is a path to a file containing a list of
88 IDs (biblionumber or authid) with one ID per line.
89 This list works as a filter; it is compatible with
90 other parameters for selecting records
91 _USAGE_
92 exit;
95 # Default parameters values :
96 $timestamp ||= '';
97 $dont_export_items ||= 0;
98 $deleted_barcodes ||= 0;
99 $clean ||= 0;
100 $record_type ||= "bibs";
101 $id_list_file ||= 0;
103 # Redirect stdout
104 open STDOUT, '>', $filename if $filename;
107 else {
109 $op = $query->param("op") || '';
110 $filename = $query->param("filename") || 'koha.mrc';
111 $filename =~ s/(\r|\n)//;
115 # Default value for output_format is 'iso2709'
116 $output_format ||= 'iso2709';
117 # Retrocompatibility for the format parameter
118 $output_format = 'iso2709' if $output_format eq 'marc';
120 my ( $template, $loggedinuser, $cookie, $flags ) = get_template_and_user(
122 template_name => "tools/export.tt",
123 query => $query,
124 type => "intranet",
125 authnotrequired => $commandline,
126 flagsrequired => { tools => 'export_catalog' },
127 debug => 1,
131 my $limit_ind_branch =
132 ( C4::Context->preference('IndependentBranches')
133 && C4::Context->userenv
134 && !C4::Context->IsSuperLibrarian()
135 && C4::Context->userenv->{branch} ) ? 1 : 0;
137 my $branch = $query->param("branch") || '';
138 if ( C4::Context->preference("IndependentBranches")
139 && C4::Context->userenv
140 && !C4::Context->IsSuperLibrarian() )
142 $branch = C4::Context->userenv->{'branch'};
145 my $backupdir = C4::Context->config('backupdir');
147 if ( $op eq "export" ) {
148 if ( $output_format eq "iso2709" or $output_format eq "xml" ) {
149 my $charset = 'utf-8';
150 my $mimetype = 'application/octet-stream';
151 binmode STDOUT, ':encoding(UTF-8)';
152 if ( $filename =~ m/\.gz$/ ) {
153 $mimetype = 'application/x-gzip';
154 $charset = '';
155 binmode STDOUT;
157 elsif ( $filename =~ m/\.bz2$/ ) {
158 $mimetype = 'application/x-bzip2';
159 binmode STDOUT;
160 $charset = '';
162 print $query->header(
163 -type => $mimetype,
164 -charset => $charset,
165 -attachment => $filename
166 ) unless ($commandline);
168 $record_type = $query->param("record_type") unless ($commandline);
169 my $export_remove_fields = $query->param("export_remove_fields");
170 my @biblionumbers = $query->param("biblionumbers");
171 my @itemnumbers = $query->param("itemnumbers");
172 my @sql_params;
173 my $sql_query;
174 my @recordids;
176 my $StartingBiblionumber = $query->param("StartingBiblionumber");
177 my $EndingBiblionumber = $query->param("EndingBiblionumber");
178 my $itemtype = $query->param("itemtype");
179 my $start_callnumber = $query->param("start_callnumber");
180 my $end_callnumber = $query->param("end_callnumber");
181 $timestamp = ($timestamp) ? C4::Dates->new($timestamp) : ''
182 if ($commandline);
183 my $start_accession =
184 ( $query->param("start_accession") )
185 ? C4::Dates->new( $query->param("start_accession") )
186 : '';
187 my $end_accession =
188 ( $query->param("end_accession") )
189 ? C4::Dates->new( $query->param("end_accession") )
190 : '';
191 $dont_export_items = $query->param("dont_export_item")
192 unless ($commandline);
194 my $strip_nonlocal_items = $query->param("strip_nonlocal_items");
196 my $biblioitemstable =
197 ( $commandline and $deleted_barcodes )
198 ? 'deletedbiblioitems'
199 : 'biblioitems';
200 my $itemstable =
201 ( $commandline and $deleted_barcodes )
202 ? 'deleteditems'
203 : 'items';
205 my $starting_authid = $query->param('starting_authid');
206 my $ending_authid = $query->param('ending_authid');
207 my $authtype = $query->param('authtype');
208 my $filefh;
209 if ($commandline) {
210 open $filefh,"<", $id_list_file or die "cannot open $id_list_file: $!" if $id_list_file;
211 } else {
212 $filefh = $query->upload("id_list_file");
214 my %id_filter;
215 if ($filefh) {
216 while (my $number=<$filefh>){
217 $number=~s/[\r\n]*$//;
218 $id_filter{$number}=1 if $number=~/^\d+$/;
222 if ( $record_type eq 'bibs' and not @biblionumbers ) {
223 if ($timestamp) {
225 # Specific query when timestamp is used
226 # Actually it's used only with CLI and so all previous filters
227 # are not used.
228 # If one day timestamp is used via the web interface, this part will
229 # certainly have to be rewrited
230 my ( $query, $params ) = construct_query(
232 recordtype => $record_type,
233 timestamp => $timestamp,
234 biblioitemstable => $biblioitemstable,
237 $sql_query = $query;
238 @sql_params = @$params;
241 else {
242 my ( $query, $params ) = construct_query(
244 recordtype => $record_type,
245 biblioitemstable => $biblioitemstable,
246 itemstable => $itemstable,
247 StartingBiblionumber => $StartingBiblionumber,
248 EndingBiblionumber => $EndingBiblionumber,
249 branch => $branch,
250 start_callnumber => $start_callnumber,
251 end_callnumber => $end_callnumber,
252 start_accession => $start_accession,
253 end_accession => $end_accession,
254 itemtype => $itemtype,
257 $sql_query = $query;
258 @sql_params = @$params;
261 elsif ( $record_type eq 'auths' ) {
262 my ( $query, $params ) = construct_query(
264 recordtype => $record_type,
265 starting_authid => $starting_authid,
266 ending_authid => $ending_authid,
267 authtype => $authtype,
270 $sql_query = $query;
271 @sql_params = @$params;
274 elsif ( $record_type eq 'db' ) {
275 my $successful_export;
276 if ( $flags->{superlibrarian}
277 && C4::Context->config('backup_db_via_tools') )
279 $successful_export = download_backup(
281 directory => "$backupdir",
282 extension => 'sql',
283 filename => "$filename"
287 unless ($successful_export) {
288 my $remotehost = $query->remote_host();
289 $remotehost =~ s/(\n|\r)//;
290 warn
291 "A suspicious attempt was made to download the db at '$filename' by someone at "
292 . $remotehost . "\n";
294 exit;
296 elsif ( $record_type eq 'conf' ) {
297 my $successful_export;
298 if ( $flags->{superlibrarian}
299 && C4::Context->config('backup_conf_via_tools') )
301 $successful_export = download_backup(
303 directory => "$backupdir",
304 extension => 'tar',
305 filename => "$filename"
309 unless ($successful_export) {
310 my $remotehost = $query->remote_host();
311 $remotehost =~ s/(\n|\r)//;
312 warn
313 "A suspicious attempt was made to download the configuration at '$filename' by someone at "
314 . $remotehost . "\n";
316 exit;
318 elsif (@biblionumbers) {
319 push @recordids, (@biblionumbers);
321 else {
323 # Someone is trying to mess us up
324 exit;
327 unless (@biblionumbers) {
328 my $sth = $dbh->prepare($sql_query);
329 $sth->execute(@sql_params);
330 push @recordids, map {
331 map { $$_[0] } $_
332 } @{ $sth->fetchall_arrayref };
333 @recordids = grep { exists($id_filter{$_}) } @recordids if scalar(%id_filter);
336 my $xml_header_written = 0;
337 for my $recordid ( uniq @recordids ) {
338 if ($deleted_barcodes) {
339 my $q = "
340 SELECT DISTINCT barcode
341 FROM deleteditems
342 WHERE deleteditems.biblionumber = ?
344 my $sth = $dbh->prepare($q);
345 $sth->execute($recordid);
346 while ( my $row = $sth->fetchrow_array ) {
347 print "$row\n";
350 else {
351 my $record;
352 if ( $record_type eq 'bibs' ) {
353 $record = eval { GetMarcBiblio($recordid); };
355 next if $@;
356 next if not defined $record;
357 C4::Biblio::EmbedItemsInMarcBiblio( $record, $recordid,
358 \@itemnumbers )
359 unless $dont_export_items;
360 if ( $strip_nonlocal_items
361 || $limit_ind_branch
362 || $dont_export_items )
364 my ( $homebranchfield, $homebranchsubfield ) =
365 GetMarcFromKohaField( 'items.homebranch', '' );
366 for my $itemfield ( $record->field($homebranchfield) ) {
368 # if stripping nonlocal items, use loggedinuser's branch if they didn't select one
369 $branch = C4::Context->userenv->{'branch'}
370 unless $branch;
371 $record->delete_field($itemfield)
372 if ( $dont_export_items
373 || $itemfield->subfield($homebranchsubfield) ne
374 $branch );
378 elsif ( $record_type eq 'auths' ) {
379 $record = C4::AuthoritiesMarc::GetAuthority($recordid);
380 next if not defined $record;
383 if ($export_remove_fields) {
384 for my $f ( split / /, $export_remove_fields ) {
385 if ( $f =~ m/^(\d{3})(.)?$/ ) {
386 my ( $field, $subfield ) = ( $1, $2 );
388 # skip if this record doesn't have this field
389 if ( defined $record->field($field) ) {
390 if ( defined $subfield ) {
391 my @tags = $record->field($field);
392 foreach my $t (@tags) {
393 $t->delete_subfields($subfield);
396 else {
397 $record->delete_fields($field);
403 RemoveAllNsb($record) if ($clean);
404 if ( $output_format eq "xml" ) {
405 unless ($xml_header_written) {
406 MARC::File::XML->default_record_format(
408 $marcflavour eq 'UNIMARC'
409 && $record_type eq 'auths'
410 ) ? 'UNIMARCAUTH' : $marcflavour
412 print MARC::File::XML::header();
413 print "\n";
414 $xml_header_written = 1;
416 print MARC::File::XML::record($record);
417 print "\n";
419 else {
420 my $errorcount_on_decode = eval { scalar(MARC::File::USMARC->decode( $record->as_usmarc )->warnings()) };
421 if ($errorcount_on_decode or $@){
422 warn $@ if $@;
423 warn "record (number $recordid) is invalid and therefore not exported because its reopening generates warnings above";
424 next;
426 print $record->as_usmarc();
430 if ($xml_header_written) {
431 print MARC::File::XML::footer();
432 print "\n";
435 exit;
437 elsif ( $output_format eq "csv" ) {
438 my @biblionumbers = uniq $query->param("biblionumbers");
439 my @itemnumbers = $query->param("itemnumbers");
440 my $output =
441 marc2csv( \@biblionumbers,
442 GetCsvProfileId( C4::Context->preference('ExportWithCsvProfile') ),
443 \@itemnumbers, );
444 print $query->header(
445 -type => 'application/octet-stream',
446 -'Content-Transfer-Encoding' => 'binary',
447 -attachment => "export.csv"
449 print $output;
450 exit;
452 } # if export
454 else {
456 my $itemtypes = GetItemTypes;
457 my @itemtypesloop;
458 foreach my $thisitemtype ( sort keys %$itemtypes ) {
459 my %row = (
460 value => $thisitemtype,
461 description => $itemtypes->{$thisitemtype}->{'description'},
463 push @itemtypesloop, \%row;
465 my $branches = GetBranches($limit_ind_branch);
466 my @branchloop;
467 for my $thisbranch (
468 sort { $branches->{$a}->{branchname} cmp $branches->{$b}->{branchname} }
469 keys %{$branches}
472 push @branchloop,
474 value => $thisbranch,
475 selected => $thisbranch eq $branch,
476 branchname => $branches->{$thisbranch}->{'branchname'},
480 my $authtypes = getauthtypes;
481 my @authtypesloop;
482 foreach my $thisauthtype ( sort keys %$authtypes ) {
483 next unless $thisauthtype;
484 my %row = (
485 value => $thisauthtype,
486 description => $authtypes->{$thisauthtype}->{'authtypetext'},
488 push @authtypesloop, \%row;
491 if ( $flags->{superlibrarian}
492 && C4::Context->config('backup_db_via_tools')
493 && $backupdir
494 && -d $backupdir )
496 $template->{VARS}->{'allow_db_export'} = 1;
497 $template->{VARS}->{'dbfiles'} = getbackupfilelist(
498 { directory => "$backupdir", extension => 'sql' } );
501 if ( $flags->{superlibrarian}
502 && C4::Context->config('backup_conf_via_tools')
503 && $backupdir
504 && -d $backupdir )
506 $template->{VARS}->{'allow_conf_export'} = 1;
507 $template->{VARS}->{'conffiles'} = getbackupfilelist(
508 { directory => "$backupdir", extension => 'tar' } );
511 $template->param(
512 branchloop => \@branchloop,
513 itemtypeloop => \@itemtypesloop,
514 authtypeloop => \@authtypesloop,
515 export_remove_fields => C4::Context->preference("ExportRemoveFields"),
518 output_html_with_http_headers $query, $cookie, $template->output;
521 sub construct_query {
522 my ($params) = @_;
524 my ( $sql_query, @sql_params );
526 if ( $params->{recordtype} eq "bibs" ) {
527 if ( $params->{timestamp} ) {
528 my $biblioitemstable = $params->{biblioitemstable};
529 $sql_query = " (
530 SELECT biblionumber
531 FROM $biblioitemstable
532 LEFT JOIN items USING(biblionumber)
533 WHERE $biblioitemstable.timestamp >= ?
534 OR items.timestamp >= ?
535 ) UNION (
536 SELECT biblionumber
537 FROM $biblioitemstable
538 LEFT JOIN deleteditems USING(biblionumber)
539 WHERE $biblioitemstable.timestamp >= ?
540 OR deleteditems.timestamp >= ?
541 ) ";
542 my $ts = $timestamp->output('iso');
543 @sql_params = ( $ts, $ts, $ts, $ts );
545 else {
546 my $biblioitemstable = $params->{biblioitemstable};
547 my $itemstable = $params->{itemstable};
548 my $StartingBiblionumber = $params->{StartingBiblionumber};
549 my $EndingBiblionumber = $params->{EndingBiblionumber};
550 my $branch = $params->{branch};
551 my $start_callnumber = $params->{start_callnumber};
552 my $end_callnumber = $params->{end_callnumber};
553 my $start_accession = $params->{start_accession};
554 my $end_accession = $params->{end_accession};
555 my $itemtype = $params->{itemtype};
556 my $items_filter =
557 $branch
558 || $start_callnumber
559 || $end_callnumber
560 || $start_accession
561 || $end_accession
562 || ( $itemtype && C4::Context->preference('item-level_itypes') );
563 $sql_query = $items_filter
564 ? "SELECT DISTINCT $biblioitemstable.biblionumber
565 FROM $biblioitemstable JOIN $itemstable
566 USING (biblionumber) WHERE 1"
567 : "SELECT $biblioitemstable.biblionumber FROM $biblioitemstable WHERE biblionumber >0 ";
569 if ($StartingBiblionumber) {
570 $sql_query .= " AND $biblioitemstable.biblionumber >= ? ";
571 push @sql_params, $StartingBiblionumber;
574 if ($EndingBiblionumber) {
575 $sql_query .= " AND $biblioitemstable.biblionumber <= ? ";
576 push @sql_params, $EndingBiblionumber;
579 if ($branch) {
580 $sql_query .= " AND homebranch = ? ";
581 push @sql_params, $branch;
584 if ($start_callnumber) {
585 $sql_query .= " AND itemcallnumber >= ? ";
586 push @sql_params, $start_callnumber;
589 if ($end_callnumber) {
590 $sql_query .= " AND itemcallnumber <= ? ";
591 push @sql_params, $end_callnumber;
593 if ($start_accession) {
594 $sql_query .= " AND dateaccessioned >= ? ";
595 push @sql_params, $start_accession->output('iso');
598 if ($end_accession) {
599 $sql_query .= " AND dateaccessioned <= ? ";
600 push @sql_params, $end_accession->output('iso');
603 if ($itemtype) {
604 $sql_query .=
605 ( C4::Context->preference('item-level_itypes') )
606 ? " AND items.itype = ? "
607 : " AND biblioitems.itemtype = ?";
608 push @sql_params, $itemtype;
612 elsif ( $params->{recordtype} eq "auths" ) {
613 if ( $params->{timestamp} ) {
615 #TODO
617 else {
618 my $starting_authid = $params->{starting_authid};
619 my $ending_authid = $params->{ending_authid};
620 my $authtype = $params->{authtype};
621 $sql_query =
622 "SELECT DISTINCT auth_header.authid FROM auth_header WHERE 1";
624 if ($starting_authid) {
625 $sql_query .= " AND auth_header.authid >= ? ";
626 push @sql_params, $starting_authid;
629 if ($ending_authid) {
630 $sql_query .= " AND auth_header.authid <= ? ";
631 push @sql_params, $ending_authid;
634 if ($authtype) {
635 $sql_query .= " AND auth_header.authtypecode = ? ";
636 push @sql_params, $authtype;
640 return ( $sql_query, \@sql_params );
643 sub getbackupfilelist {
644 my $args = shift;
645 my $directory = $args->{directory};
646 my $extension = $args->{extension};
647 my @files;
649 if ( opendir( my $dir, $directory ) ) {
650 while ( my $file = readdir($dir) ) {
651 next unless ( $file =~ m/\.$extension(\.(gz|bz2|xz))?/ );
652 push @files, $file
653 if ( -f "$directory/$file" && -r "$directory/$file" );
655 closedir($dir);
657 return \@files;
660 sub download_backup {
661 my $args = shift;
662 my $directory = $args->{directory};
663 my $extension = $args->{extension};
664 my $filename = $args->{filename};
666 return unless ( $directory && -d $directory );
667 return unless ( $filename =~ m/\.$extension(\.(gz|bz2|xz))?$/ );
668 return if ( $filename =~ m#/# );
669 $filename = "$directory/$filename";
670 return unless ( -f $filename && -r $filename );
671 return unless ( open( my $dump, '<', $filename ) );
672 binmode $dump;
674 while ( read( $dump, my $data, 64 * 1024 ) ) {
675 print $data;
677 close($dump);
678 return 1;