Bug 16214: Fix typo 'To.jon' -> 'To.json'
[koha.git] / misc / maintenance / sanitize_records.pl
blobb9caa4bfb7e8e47736091fded92e6b7728557bf3
1 #!/usr/bin/perl
3 # This file is part of Koha.
5 # Copyright 2014 BibLibre
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 use Modern::Perl;
21 use C4::Charset qw( SanitizeRecord );
22 use C4::Context;
23 use DBI;
24 use C4::Biblio;
25 use Getopt::Long;
26 use Pod::Usage;
28 my ( $help, $verbose, $confirm, $biblionumbers, $reindex, $filename,
29 $auto_search, $fix_ampersand );
30 my $result = GetOptions(
31 'h|help' => \$help,
32 'v|verbose' => \$verbose,
33 'c|confirm' => \$confirm,
34 'biblionumbers:s' => \$biblionumbers,
35 'reindex' => \$reindex,
36 'f|filename:s' => \$filename,
37 'auto-search' => \$auto_search,
38 'fix-ampersand' => \$fix_ampersand,
39 ) || pod2usage(1);
41 # This script only fix ampersand at the moment.
42 # It is enabled by default.
43 $fix_ampersand = 1;
45 if ($help) {
46 pod2usage(0);
49 unless ( $filename or $biblionumbers or $auto_search ) {
50 pod2usage(
51 -exitval => 1,
52 -message =>
53 qq{\n\tAt least one record number source should be provided.\n}
57 if ( $filename and $biblionumbers
58 or $filename and $auto_search
59 or $biblionumbers and $auto_search )
61 pod2usage(
62 -exitval => 1,
63 -message => qq{\n\tOnly one record number source should be provided.\n}
67 my @biblionumbers;
69 # We first detect if we have a file or biblos directly entered by command line
70 #or if we want to use findAmp() sub
71 if ($auto_search) {
72 @biblionumbers = biblios_to_sanitize();
74 elsif ($filename) {
75 if ( -e $filename ) {
76 open( my $fh, '<', $filename ) || die("Can't open $filename ($!)");
77 while (<$fh>) {
78 chomp;
79 my $line = $_;
80 push @biblionumbers, split( " |,", $line );
82 close $fh;
84 else {
85 pod2usage(
86 -exitval => 1,
87 -message =>
88 qq{\n\tThis filename does not exist. Please verify the path is correct.\n}
92 else {
93 @biblionumbers = split m|,|, $biblionumbers if $biblionumbers;
96 # We remove spaces
97 s/(^\s*|\s*$)//g for @biblionumbers;
99 # Remove empty lines
100 @biblionumbers = grep { !/^$/ } @biblionumbers;
102 say @biblionumbers . " records to process" if $verbose;
104 my @changes;
105 for my $biblionumber (@biblionumbers) {
106 print "processing record $biblionumber..." if $verbose;
107 unless ( $biblionumber =~ m|^\d+$| ) {
108 say " skipping. ERROR: Invalid biblionumber." if $verbose;
109 next;
111 my $record = C4::Biblio::GetMarcBiblio($biblionumber);
112 unless ($record) {
113 say " skipping. ERROR: Invalid record." if $verbose;
114 next;
117 my ( $cleaned_record, $has_been_modified ) =
118 C4::Charset::SanitizeRecord( $record, $biblionumber );
119 if ($has_been_modified) {
120 my $frameworkcode = C4::Biblio::GetFrameworkCode($record);
122 C4::Biblio::ModBiblio( $cleaned_record, $biblionumber, $frameworkcode )
123 if $confirm;
124 push @changes, $biblionumber;
125 say " Done!" if $verbose;
127 else {
128 say " Nothing to do." if $verbose;
132 if ($verbose) {
133 say "Total: "
134 . @changes
135 . " records "
136 . ( $confirm ? "cleaned!" : "to clean." );
139 if ( $reindex and $confirm and @changes ) {
140 say "Now, reindexing using -b -v" if $verbose;
141 my $kohapath = C4::Context->config('intranetdir');
142 my $cmd = qq|
143 $kohapath/misc/migration_tools/rebuild_zebra.pl -b -v -where "biblionumber IN ( |
144 . join( ',', @changes ) . q| )"
146 system($cmd);
149 sub biblios_to_sanitize {
150 my $dbh = C4::Context->dbh;
151 my $query = q{
152 SELECT biblionumber
153 FROM biblioitems
154 WHERE marcxml
155 LIKE "%&amp;amp;%"
157 return @{ $dbh->selectcol_arrayref( $query, { Slice => {} }, ) };
160 =head1 NAME
162 sanitize_records - This script sanitizes a record.
164 =head1 SYNOPSIS
166 sanitize_records.pl [-h|--help] [-v|--verbose] [-c|--confirm] [--biblionumbers=BIBLIONUMBER_LIST] [-f|--filename=FILENAME] [--auto-search] [--reindex] [--fix-ampersand]
168 You can either give some biblionumbers or a file with biblionumbers or ask for an auto-search.
170 =head1 OPTIONS
172 =over
174 =item B<-h|--help>
176 Print a brief help message
178 =item B<-v|--verbose>
180 Verbose mode.
182 =item B<-c|--confirm>
184 This flag must be provided in order for the script to actually
185 sanitize records. If it is not supplied, the script will
186 only report on the record list to process.
188 =item B<--biblionumbers=BIBLIONUMBER_LIST>
190 Give a biblionumber list using this parameter. They must be separated by
191 commas.
193 =item B<-f|--filename=FILENAME>
195 Give a biblionumber list using a filename. One biblionumber by line or separate them with a whitespace character.
197 =item B<--auto_search>
199 Automatically search records containing "&amp;" in biblioitems.marcxml or in the specified fields.
201 =item B<--fix-ampersand>
203 Replace '&amp;' by '&' in the records.
204 Replace '&amp;amp;amp;etc.' with '&amp;' in the records.
206 =item B<--reindex>
208 Reindex the modified records.
210 =back
212 =head1 AUTHOR
214 Alex Arnaud <alex.arnaud@biblibre.com>
215 Christophe Croullebois <christophe.croullebois@biblibre.com>
216 Jonathan Druart <jonathan.druart@biblibre.com>
218 =head1 COPYRIGHT
220 Copyright 2014 BibLibre
222 =head1 LICENSE
224 This file is part of Koha.
226 Koha is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
227 Foundation; either version 3 of the License, or (at your option) any later version.
229 You should have received a copy of the GNU General Public License along
230 with Koha; if not, write to the Free Software Foundation, Inc.,
231 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
233 =cut