Fewer macros in xheader.c
[tar.git] / scripts / tar-snapshot-edit
blob1e51b406a6093d36fd533a3f8c8ddd17c077d476
1 #! /usr/bin/perl -w
2 # Display and edit the 'dev' field in tar's snapshots
3 # Copyright 2007-2024 Free Software Foundation, Inc.
5 # This file is part of GNU tar.
7 # GNU tar is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3 of the License, or
10 # (at your option) any later version.
12 # GNU tar is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 # tar-snapshot-edit
23 # This script is capable of replacing values in the 'dev' field of an
24 # incremental backup 'snapshot' file. This is useful when the device
25 # used to store files in a tar archive changes, without the files
26 # themselves changing. This may happen when, for example, a device
27 # driver changes major or minor numbers.
29 # It can also run a check on all the field values found in the
30 # snapshot file, printing out a detailed message when it finds values
31 # that would cause an "Unexpected field value in snapshot file",
32 # "Numerical result out of range", or "Invalid argument" error
33 # if tar were run using that snapshot file as input. (See the
34 # comments included in the definition of the check_field_values
35 # routine for more detailed information regarding these checks.)
39 # Author: Dustin J. Mitchell <dustin@zmanda.com>
41 # Modified Aug 25, 2011 by Nathan Stratton Treadway <nathanst AT ontko.com>:
42 # * update Perl syntax to work correctly with more recent versions of
43 # Perl. (The original code worked with in the v5.8 timeframe but
44 # not with Perl v5.10.1 and later.)
45 # * added a "-c" option to check the snapshot file for invalid field values.
46 # * handle NFS indicator character ("+") in version 0 and 1 files
47 # * preserve the original header/version line when editing version 1
48 # or 2 files.
49 # * tweak output formatting
51 # Modified March 13, 2013 by Nathan Stratton Treadway <nathanst AT ontko.com>:
52 # * configure field ranges used for -c option based on the system
53 # architecture (in response to the December 2012 update to GNU tar
54 # enabling support for systems with signed dev_t values).
55 # * when printing the list of device ids found in the snapshot file
56 # (when run in the default mode), print the raw device id values
57 # instead of the hex-string version in those cases where they
58 # can't be converted successfully.
60 use Getopt::Std;
61 use Config;
63 my %snapshot_field_ranges; # used in check_field_values function
65 ## reading
67 sub read_incr_db ($) {
68 my $filename = shift;
69 open(my $file, "<$filename") || die "Could not open '$filename' for reading";
71 my $header_str = <$file>;
72 my $file_version;
73 if ($header_str =~ /^GNU tar-[^-]*-([0-9]+)\n$/) {
74 $file_version = $1+0;
75 } else {
76 $file_version = 0;
79 print "\nFile: $filename\n";
80 print " Detected snapshot file version: $file_version\n\n";
82 if ($file_version == 0) {
83 return read_incr_db_0($file, $header_str);
84 } elsif ($file_version == 1) {
85 return read_incr_db_1($file, $header_str);
86 } elsif ($file_version == 2) {
87 return read_incr_db_2($file, $header_str);
88 } else {
89 die "Unrecognized snapshot version in header '$header_str'";
93 sub read_incr_db_0 ($$) {
94 my $file = shift;
95 my $header_str = shift;
97 my $hdr_timestamp_sec = $header_str;
98 chop $hdr_timestamp_sec;
99 my $hdr_timestamp_nsec = ''; # not present in file format 0
101 my $nfs;
102 my @dirs;
104 while (<$file>) {
105 /^(\+?)([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_");
107 if ( $1 eq "+" ) {
108 $nfs="1";
109 } else {
110 $nfs="0";
112 push @dirs, { nfs=>$nfs,
113 dev=>$2,
114 ino=>$3,
115 name=>$4 };
118 close($file);
120 # file version, timestamp, timestamp, dir list, file header line
121 return [ 0, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, ""];
124 sub read_incr_db_1 ($$) {
125 my $file = shift;
126 my $header_str = shift;
129 my $timestamp = <$file>; # "sec nsec"
130 my ($hdr_timestamp_sec, $hdr_timestamp_nsec) = ($timestamp =~ /([0-9]*) ([0-9]*)/);
132 my $nfs;
133 my @dirs;
135 while (<$file>) {
136 /^(\+?)([0-9]*) ([0-9]*) ([0-9]*) ([0-9]*) (.*)\n$/ || die("Bad snapshot line $_");
138 if ( $1 eq "+" ) {
139 $nfs="1";
140 } else {
141 $nfs="0";
144 push @dirs, { nfs=>$nfs,
145 timestamp_sec=>$2,
146 timestamp_nsec=>$3,
147 dev=>$4,
148 ino=>$5,
149 name=>$6 };
152 close($file);
154 # file version, timestamp, timestamp, dir list, file header line
155 return [ 1, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str ];
158 sub read_incr_db_2 ($$) {
159 my $file = shift;
160 my $header_str = shift;
162 $/="\0"; # $INPUT_RECORD_SEPARATOR
163 my $hdr_timestamp_sec = <$file>;
164 chop $hdr_timestamp_sec;
165 my $hdr_timestamp_nsec = <$file>;
166 chop $hdr_timestamp_nsec;
167 my @dirs;
169 while (1) {
170 last if eof($file);
172 my $nfs = <$file>;
173 my $timestamp_sec = <$file>;
174 my $timestamp_nsec = <$file>;
175 my $dev = <$file>;
176 my $ino = <$file>;
177 my $name = <$file>;
179 # get rid of trailing NULs
180 chop $nfs;
181 chop $timestamp_sec;
182 chop $timestamp_nsec;
183 chop $dev;
184 chop $ino;
185 chop $name;
187 my @dirents;
188 while (my $dirent = <$file>) {
189 chop $dirent;
190 push @dirents, $dirent;
191 last if ($dirent eq "");
193 die "missing terminator" unless (<$file> eq "\0");
195 push @dirs, { nfs=>$nfs,
196 timestamp_sec=>$timestamp_sec,
197 timestamp_nsec=>$timestamp_nsec,
198 dev=>$dev,
199 ino=>$ino,
200 name=>$name,
201 dirents=>\@dirents };
204 close($file);
205 $/ = "\n"; # reset to normal
207 # file version, timestamp, timestamp, dir list, file header line
208 return [ 2, $hdr_timestamp_sec, $hdr_timestamp_nsec, \@dirs, $header_str];
211 ## display
213 sub show_device_counts ($) {
214 my $info = shift;
215 my %devices;
216 foreach my $dir (@{$info->[3]}) {
217 my $dev = $dir->{'dev'};
218 $devices{$dev}++;
221 my $devstr;
222 foreach $dev (sort {$a <=> $b} keys %devices) {
223 $devstr = sprintf ("0x%04x", $dev);
224 if ( $dev > 0xffffffff or $dev < 0 or hex($devstr) != $dev ) {
225 # sprintf "%x" will not return a useful value for device ids
226 # that are negative or which overflow the integer size on this
227 # instance of Perl, so we convert the hex string back to a
228 # number, and if it doesn't (numerically) equal the original
229 # device id value, we know the hex conversion hasn't worked.
231 # Unfortunately, since we're running in "-w" mode, Perl will
232 # also print a warning message if the hex() routine is called
233 # on anything larger than "0xffffffff", even in 64-bit Perl
234 # where such values are actually supported... so we have to
235 # avoid calling hex() at all if the device id is too large or
236 # negative. (If it's negative, the conversion to an unsigned
237 # integer for the "%x" specifier will mean the result will
238 # always trigger hex()'s warning on a 64-bit machine.)
240 # These situations don't seem to occur very often, so for now
241 # when they do occur, we simply print the original text value
242 # that was read from the snapshot file; it will look a bit
243 # funny next to the values that do print in hex, but that's
244 # preferable to printing values that aren't actually correct.
245 $devstr = $dev;
247 printf " Device %s occurs $devices{$dev} times.\n", $devstr;
251 ## check field values
253 # initializes the global %snapshot_field_ranges hash, based on the "-a"
254 # command-line option if given, otherwise based on the "archname" of
255 # the current system.
257 # Each value in the hash is a two-element array containing the minimum
258 # and maximum allowed values, respectively, for that field in the snapshot
259 # file. GNU tar's allowed values for each architecture are determined
260 # in the incremen.c source file, where the TYPE_MIN and TYPE_MAX
261 # pre-processor expressions are used to determine the range that can be
262 # expressed by the C data type used for each field; the values in the
263 # array defined below should match those calculations. (For tar v1.27
264 # and later, the valid ranges for a particular tar binary can easily
265 # be determined using the "tar --show-snapshot-field-ranges" command.)
267 sub choose_architecture ($) {
268 my $opt_a = shift;
270 my $arch = $opt_a ? $opt_a : $Config{'archname'};
272 # These ranges apply to Linux 2.4/2.6 on iX86 systems, but are used
273 # by default on unrecognized/unsupported systems, too.
274 %iX86_linux_field_ranges = (
275 timestamp_sec => [ -2147483648, 2147483647 ], # min/max of time_t
276 timestamp_nsec => [ 0, 999999999 ], # 0 to BILLION-1
277 nfs => [ 0, 1 ],
278 dev => [ 0, 18446744073709551615 ], # min/max of dev_t
279 ino => [ 0, 4294967295 ], # min/max of ino_t
283 if ( $arch =~ m/^i[\dxX]86-linux/i ) {
284 %snapshot_field_ranges = %iX86_linux_field_ranges;
285 print "Checking snapshot field values using \"iX86-linux\" ranges.\n\n";
286 } elsif ( $arch =~ m/^x86_64-linux/i ) {
287 %snapshot_field_ranges = (
288 timestamp_sec => [ -9223372036854775808, 9223372036854775807 ],
289 timestamp_nsec => [ 0, 999999999 ],
290 nfs => [ 0, 1 ],
291 dev => [ 0, 18446744073709551615 ],
292 ino => [ 0, 18446744073709551615 ],
294 print "Checking snapshot field values using \"x86_64-linux\" ranges.\n\n";
295 } elsif ( $arch =~ m/^IA64.ARCHREV_0/i ) {
296 # HP/UX running on Itanium/ia64 architecture
297 %snapshot_field_ranges = (
298 timestamp_sec => [ -2147483648, 2147483647 ],
299 timestamp_nsec => [ 0, 999999999 ],
300 nfs => [ 0, 1 ],
301 dev => [ -2147483648, 2147483647 ],
302 ino => [ 0, 4294967295 ],
304 print "Checking snapshot field values using \"IA64.ARCHREV_0\" (HP/UX) ranges.\n\n";
305 } else {
306 %snapshot_field_ranges = %iX86_linux_field_ranges;
307 print "Unrecognized architecture \"$arch\"; defaulting to \"iX86-linux\".\n";
308 print "(Use -a option to override.)\n" unless $opt_a;
309 print "\n";
312 if ( ref(1) ne "" ) {
313 print "(\"bignum\" mode is in effect; skipping 64-bit-integer check.)\n\n"
314 } else {
315 # find the largest max value in the current set of ranges
316 my $maxmax = 0;
317 for $v (values %snapshot_field_ranges ) {
318 $maxmax = $v->[1] if ($v->[1] > $maxmax);
321 # "~0" translates into a platform-native integer with all bits turned
322 # on -- that is, the largest value that can be represented as
323 # an integer. We print a warning if our $maxmax value is greater
324 # than that largest integer, since in that case Perl will switch
325 # to using floats for those large max values. The wording of
326 # the message assumes that the only way this situation can exist
327 # is that the platform uses 32-bit integers but some of the
328 # snapshot-file fields have 64-bit values.
329 if ( ~0 < $maxmax ) {
330 print <<EOF
331 Note: this version of Perl uses 32-bit integers, which means that it
332 will switch to using floating-point numbers when checking the ranges
333 for 64-bit snapshot-file fields. This normally will work fine, but
334 might fail to detect cases where the value in the input field value is
335 only slightly out of range. (For example, a "9223372036854775808"
336 might not be recognized as being larger than 9223372036854775807.)
337 If you suspect you are experiencing this problem, you can try running
338 the program using the "-Mbignum" option, as in
339 \$ perl $0 -Mbignum -c [FILES]
340 (but doing so will make the program run *much* slower).
349 # returns a warning message if $field_value isn't a valid string
350 # representation of an integer, or if the resulting integer is out of range
351 # defined by the two-element array retrieved using up the $field_name key in
352 # the global %snapshot_field_ranges hash.
353 sub validate_integer_field ($$) {
354 my $field_value = shift;
355 my $field_name = shift;
357 my ($min, $max) = @{$snapshot_field_ranges{$field_name}};
359 my $msg = "";
361 if ( not $field_value =~ /^-?\d+$/ ) {
362 $msg = " $field_name value contains invalid characters: \"$field_value\"\n";
363 } else {
364 if ( $field_value < $min ) {
365 $msg = " $field_name value too low: \"$field_value\" < $min \n";
366 } elsif ( $field_value > $max ) {
367 $msg = " $field_name value too high: \"$field_value\" > $max \n";
370 return $msg;
374 # This routine loops through each directory entry in the $info data
375 # structure and prints a warning message if tar would abort with an
376 # "Unexpected field value in snapshot file", "Numerical result out of
377 # range", or "Invalid argument" error upon reading this snapshot file.
379 # (Note that the "Unexpected field value in snapshot file" error message
380 # was introduced along with the change to snapshot file format "2",
381 # starting with tar v1.16 [or, more precisely, v1.15.91], while the
382 # other two were introduced in v1.27.)
384 # The checks here are intended to match those found in the incremen.c
385 # source file. See the choose_architecture() function (above) for more
386 # information on how to configure the range of values considered valid
387 # by this script.
389 # (Note: the checks here are taken from the code that processes
390 # version 2 snapshot files, but to keep things simple we apply those
391 # same checks to files having earlier versions -- but only for
392 # the fields that actually exist in those input files.)
394 sub check_field_values ($) {
395 my $info = shift;
397 my $msg;
398 my $error_found = 0;
400 print " Checking field values in snapshot file...\n";
402 $snapver = $info->[0];
404 $msg = "";
405 $msg .= validate_integer_field($info->[1], 'timestamp_sec');
406 if ($snapver >= 1) {
407 $msg .= validate_integer_field($info->[2], 'timestamp_nsec');
409 if ( $msg ne "" ) {
410 $error_found = 1;
411 print "\n snapshot file header:\n";
412 print $msg;
416 foreach my $dir (@{$info->[3]}) {
418 $msg = "";
420 $msg .= validate_integer_field($dir->{'nfs'}, 'nfs');
421 if ($snapver >= 1) {
422 $msg .= validate_integer_field($dir->{'timestamp_sec'}, 'timestamp_sec');
423 $msg .= validate_integer_field($dir->{'timestamp_nsec'}, 'timestamp_nsec');
425 $msg .= validate_integer_field($dir->{'dev'}, 'dev');
426 $msg .= validate_integer_field($dir->{'ino'}, 'ino');
428 if ( $msg ne "" ) {
429 $error_found = 1;
430 print "\n directory: $dir->{'name'}\n";
431 print $msg;
435 print "\n Snapshot field value check complete" ,
436 $error_found ? "" : ", no errors found" ,
437 ".\n";
440 ## editing
442 sub replace_device_number ($@) {
443 my $info = shift(@_);
444 my @repl = @_;
446 my $count = 0;
448 foreach my $dir (@{$info->[3]}) {
449 foreach $x (@repl) {
450 if ($dir->{'dev'} eq $$x[0]) {
451 $dir->{'dev'} = $$x[1];
452 $count++;
453 last;
457 print " Updated $count records.\n"
460 ## writing
462 sub write_incr_db ($$) {
463 my $info = shift;
464 my $filename = shift;
465 my $file_version = $$info[0];
467 open($file, ">$filename") || die "Could not open '$filename' for writing";
469 if ($file_version == 0) {
470 write_incr_db_0($info, $file);
471 } elsif ($file_version == 1) {
472 write_incr_db_1($info, $file);
473 } elsif ($file_version == 2) {
474 write_incr_db_2($info, $file);
475 } else {
476 die "Unknown file version $file_version.";
479 close($file);
482 sub write_incr_db_0 ($$) {
483 my $info = shift;
484 my $file = shift;
486 my $timestamp_sec = $info->[1];
487 print $file "$timestamp_sec\n";
489 foreach my $dir (@{$info->[3]}) {
490 if ($dir->{'nfs'}) {
491 print $file '+'
493 print $file "$dir->{'dev'} ";
494 print $file "$dir->{'ino'} ";
495 print $file "$dir->{'name'}\n";
500 sub write_incr_db_1 ($$) {
501 my $info = shift;
502 my $file = shift;
504 print $file $info->[4];
506 my $timestamp_sec = $info->[1];
507 my $timestamp_nsec = $info->[2];
508 print $file "$timestamp_sec $timestamp_nsec\n";
510 foreach my $dir (@{$info->[3]}) {
511 if ($dir->{'nfs'}) {
512 print $file '+'
514 print $file "$dir->{'timestamp_sec'} ";
515 print $file "$dir->{'timestamp_nsec'} ";
516 print $file "$dir->{'dev'} ";
517 print $file "$dir->{'ino'} ";
518 print $file "$dir->{'name'}\n";
523 sub write_incr_db_2 ($$) {
524 my $info = shift;
525 my $file = shift;
527 print $file $info->[4];
529 my $timestamp_sec = $info->[1];
530 my $timestamp_nsec = $info->[2];
531 print $file $timestamp_sec . "\0";
532 print $file $timestamp_nsec . "\0";
534 foreach my $dir (@{$info->[3]}) {
535 print $file $dir->{'nfs'} . "\0";
536 print $file $dir->{'timestamp_sec'} . "\0";
537 print $file $dir->{'timestamp_nsec'} . "\0";
538 print $file $dir->{'dev'} . "\0";
539 print $file $dir->{'ino'} . "\0";
540 print $file $dir->{'name'} . "\0";
541 foreach my $dirent (@{$dir->{'dirents'}}) {
542 print $file $dirent . "\0";
544 print $file "\0";
548 ## main
550 sub main {
551 our ($opt_b, $opt_r, $opt_h, $opt_c, $opt_a);
552 getopts('br:hca:');
553 HELP_MESSAGE() if ($opt_h || $#ARGV == -1 || ($opt_b && !$opt_r) ||
554 ($opt_a && !$opt_c) || ($opt_r && $opt_c) );
556 my @repl;
557 if ($opt_r) {
558 foreach my $spec (split(/,/, $opt_r)) {
559 ($spec =~ /^([^-]+)-([^-]+)/) || die "Invalid replacement specification '$opt_r'";
560 push @repl, [interpret_dev($1), interpret_dev($2)];
564 choose_architecture($opt_a) if ($opt_c);
566 foreach my $snapfile (@ARGV) {
567 my $info = read_incr_db($snapfile);
568 if ($opt_r) {
569 if ($opt_b) {
570 rename($snapfile, $snapfile . "~") || die "Could not rename '$snapfile' to backup";
573 replace_device_number($info, @repl);
574 write_incr_db($info, $snapfile);
575 } elsif ($opt_c) {
576 check_field_values($info);
577 } else {
578 show_device_counts($info);
583 sub HELP_MESSAGE {
584 print <<EOF;
586 Usage:
587 tar-snapshot-edit SNAPFILE [SNAPFILE [...]]
588 tar-snapshot-edit -r 'DEV1-DEV2[,DEV3-DEV4...]' [-b] SNAPFILE [SNAPFILE [...]]
589 tar-snapshot-edit -c [-aARCH] SNAPFILE [SNAPFILE [...]]
591 With no options specified: print a summary of the 'device' values
592 found in each SNAPFILE.
594 With -r: replace occurrences of DEV1 with DEV2 in each SNAPFILE.
595 DEV1 and DEV2 may be specified in hex (e.g., 0xfe01), decimal (e.g.,
596 65025), or MAJ:MIN (e.g., 254:1). To replace multiple occurrences,
597 separate them with commas. If -b is also specified, backup files
598 (ending with '~') will be created.
600 With -c: Check the field values in each SNAPFILE and print warning
601 messages if any invalid values are found. (An invalid value is one
602 that would cause \"tar\" to abort with an error message such as
603 Unexpected field value in snapshot file
604 Numerical result out of range
606 Invalid argument
607 as it processed the snapshot file.)
609 Normally the program automatically chooses the valid ranges for
610 the fields based on the current system's architecture, but the
611 -a option can be used to override the selection, e.g. in order
612 to validate a snapshot file generated on a some other system.
613 (Currently only three architectures are supported, "iX86-linux",
614 "x86_64-linux", and "IA64.ARCHREV_0" [HP/UX running on Itanium/ia64],
615 and if the current system isn't recognized, then the iX86-linux
616 values are used by default.)
619 exit 1;
622 sub interpret_dev ($) {
623 my $dev = shift;
625 if ($dev =~ /^([0-9]+):([0-9]+)$/) {
626 return $1 * 256 + $2;
627 } elsif ($dev =~ /^0x[0-9a-fA-F]+$/) {
628 return oct $dev;
629 } elsif ($dev =~ /^[0-9]+$/) {
630 return $dev+0;
631 } else {
632 die "Invalid device specification '$dev'";
636 main