Bug 6758: [QA Follow-up] Typo popular vs populate
[koha.git] / misc / maintenance / sanitize_records.pl
blob7079ea06e0c8f4a3ca9f79a4e078794d8c0d6864
1 #!/usr/bin/perl
3 # This file is part of Koha.
5 # Copyright 2014 BibLibre
7 # Koha is free software; you can redistribute it and/or modify it
8 # under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # Koha is distributed in the hope that it will be useful, but
13 # WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with Koha; if not, see <http://www.gnu.org/licenses>.
20 use Modern::Perl;
21 use C4::Charset qw( SanitizeRecord );
22 use C4::Context;
23 use DBI;
24 use C4::Biblio;
25 use Getopt::Long;
26 use Pod::Usage;
28 my ( $help, $verbose, $confirm, $biblionumbers, $reindex, $filename,
29 $auto_search, $fix_ampersand );
30 my $result = GetOptions(
31 'h|help' => \$help,
32 'v|verbose' => \$verbose,
33 'c|confirm' => \$confirm,
34 'biblionumbers:s' => \$biblionumbers,
35 'reindex' => \$reindex,
36 'f|filename:s' => \$filename,
37 'auto-search' => \$auto_search,
38 'fix-ampersand' => \$fix_ampersand,
39 ) || pod2usage(1);
41 # This script only fix ampersand at the moment.
42 # It is enabled by default.
43 $fix_ampersand = 1;
45 if ($help) {
46 pod2usage(0);
49 unless ( $filename or $biblionumbers or $auto_search ) {
50 pod2usage(
51 -exitval => 1,
52 -message =>
53 qq{\n\tAt least one record number source should be provided.\n}
57 if ( $filename and $biblionumbers
58 or $filename and $auto_search
59 or $biblionumbers and $auto_search )
61 pod2usage(
62 -exitval => 1,
63 -message => qq{\n\tOnly one record number source should be provided.\n}
67 my @biblionumbers;
69 # We first detect if we have a file or biblos directly entered by command line
70 #or if we want to use findAmp() sub
71 if ($auto_search) {
72 @biblionumbers = biblios_to_sanitize();
74 elsif ($filename) {
75 if ( -e $filename ) {
76 open( my $fh, '<', $filename ) || die("Can't open $filename ($!)");
77 while (<$fh>) {
78 chomp;
79 my $line = $_;
80 push @biblionumbers, split( " |,", $line );
82 close $fh;
84 else {
85 pod2usage(
86 -exitval => 1,
87 -message =>
88 qq{\n\tThis filename does not exist. Please verify the path is correct.\n}
92 else {
93 @biblionumbers = split m|,|, $biblionumbers if $biblionumbers;
96 # We remove spaces
97 s/(^\s*|\s*$)//g for @biblionumbers;
99 # Remove empty lines
100 @biblionumbers = grep { !/^$/ } @biblionumbers;
102 say @biblionumbers . " records to process" if $verbose;
104 my @changes;
105 for my $biblionumber (@biblionumbers) {
106 print "processing record $biblionumber..." if $verbose;
107 unless ( $biblionumber =~ m|^\d+$| ) {
108 say " skipping. ERROR: Invalid biblionumber." if $verbose;
109 next;
111 my $record = C4::Biblio::GetMarcBiblio({ biblionumber => $biblionumber });
112 unless ($record) {
113 say " skipping. ERROR: Invalid record." if $verbose;
114 next;
117 my ( $cleaned_record, $has_been_modified ) =
118 C4::Charset::SanitizeRecord( $record, $biblionumber );
119 if ($has_been_modified) {
120 my $frameworkcode = C4::Biblio::GetFrameworkCode($record);
122 C4::Biblio::ModBiblio( $cleaned_record, $biblionumber, $frameworkcode )
123 if $confirm;
124 push @changes, $biblionumber;
125 say " Done!" if $verbose;
127 else {
128 say " Nothing to do." if $verbose;
132 if ($verbose) {
133 say "Total: "
134 . @changes
135 . " records "
136 . ( $confirm ? "cleaned!" : "to clean." );
139 if ( $reindex and $confirm and @changes ) {
140 say "Now, reindexing using -b -v" if $verbose;
141 my $kohapath = C4::Context->config('intranetdir');
142 my $cmd = qq|
143 $kohapath/misc/migration_tools/rebuild_zebra.pl -b -v -where "biblionumber IN ( |
144 . join( ',', @changes ) . q| )"
146 system($cmd);
149 sub biblios_to_sanitize {
150 my $dbh = C4::Context->dbh;
151 my $query = q{
152 SELECT biblionumber
153 FROM biblio_metadata
154 WHERE format = 'marcxml'
155 AND marcflavour = ?
156 AND metadata LIKE "%&amp;amp;%"
158 return @{ $dbh->selectcol_arrayref( $query, { Slice => {} }, C4::Context->preference('marcflavour') ) };
161 =head1 NAME
163 sanitize_records - This script sanitizes a record.
165 =head1 SYNOPSIS
167 sanitize_records.pl [-h|--help] [-v|--verbose] [-c|--confirm] [--biblionumbers=BIBLIONUMBER_LIST] [-f|--filename=FILENAME] [--auto-search] [--reindex] [--fix-ampersand]
169 You can either give some biblionumbers or a file with biblionumbers or ask for an auto-search.
171 =head1 OPTIONS
173 =over
175 =item B<-h|--help>
177 Print a brief help message
179 =item B<-v|--verbose>
181 Verbose mode.
183 =item B<-c|--confirm>
185 This flag must be provided in order for the script to actually
186 sanitize records. If it is not supplied, the script will
187 only report on the record list to process.
189 =item B<--biblionumbers=BIBLIONUMBER_LIST>
191 Give a biblionumber list using this parameter. They must be separated by
192 commas.
194 =item B<-f|--filename=FILENAME>
196 Give a biblionumber list using a filename. One biblionumber by line or separate them with a whitespace character.
198 =item B<--auto-search>
200 Automatically search records containing "&amp;" in biblio_metadata.metadata or in the specified fields.
202 =item B<--fix-ampersand>
204 Replace '&amp;' by '&' in the records.
205 Replace '&amp;amp;amp;etc.' with '&amp;' in the records.
207 =item B<--reindex>
209 Reindex the modified records.
211 =back
213 =head1 AUTHOR
215 Alex Arnaud <alex.arnaud@biblibre.com>
216 Christophe Croullebois <christophe.croullebois@biblibre.com>
217 Jonathan Druart <jonathan.druart@biblibre.com>
219 =head1 COPYRIGHT
221 Copyright 2014 BibLibre
223 =head1 LICENSE
225 This file is part of Koha.
227 Koha is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software
228 Foundation; either version 3 of the License, or (at your option) any later version.
230 You should have received a copy of the GNU General Public License along
231 with Koha; if not, write to the Free Software Foundation, Inc.,
232 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
234 =cut