let fsck warn about missing recent files
[rersyncrecent.git] / bin / rrr-fsck
blob8bd978a67396d8cdfb8e9c6f1033faabce88b065
1 #!/usr/bin/perl
3 =head1 NAME
5 rrr-fsck -
7 =head1 SYNOPSIS
9 rrr-fsck [options] principalfile
11 =head1 OPTIONS
13 =over 8
15 =cut
17 my @opt = <<'=back' =~ /B<--(\S+)>/g;
19 =item B<--dry-run|n>
21 Does nothing, only prints what it would do.
23 =item B<--help|h>
25 Prints a brief message and exists.
27 =item B<--remoteroot=s>
29 If provided fsck will try to mirror missing files from this location.
30 For remote locations requiring authentication you may need to set the
31 environment variables USER and RSYNC_PASSWORD as well.
33 =item B<--verbose|v+>
35 More feedback.
37 =item B<--yes|y>
39 Consider all answers to asked questions to be I<yes>.
41 =back
43 =head1 DESCRIPTION
45 Compares disk contents with index contents and gathers files missing
46 on local disk and files missing in local index.
48 If remoteroot is given missing files are fetched from remote.
50 Files on the local disk that have no counterpart in the index are
51 considered obsolete and the user is asked for each file if the file
52 should be deleted. And if the user confirms it will be deleted.
54 =head1 BUGS
56 There is a race condition when the tree or the index is manipulated
57 while we are running. This implies that the result is only then 100%
58 correct when disk and index are not changed while we are running.
60 There should be an option to declare the files on disk authoritative
61 so that they are added to the index.
63 =cut
66 use strict;
67 use warnings;
69 use lib "/home/k/sources/rersyncrecent/lib";
71 use File::Basename qw(dirname);
72 use File::Find qw(find);
73 use ExtUtils::MakeMaker qw(prompt);
74 use File::Rsync::Mirror::Recent;
75 use File::Spec;
76 use Getopt::Long;
77 use List::Util qw(max);
78 use Pod::Usage qw(pod2usage);
79 use Time::HiRes qw(time sleep);
81 our %Opt;
82 GetOptions(\%Opt,
83 @opt,
84 ) or pod2usage(1);
86 if ($Opt{help}) {
87 pod2usage(0);
90 if (@ARGV == 1) {
91 } else {
92 pod2usage(1);
95 my($principal) = @ARGV;
96 my $recc = File::Rsync::Mirror::Recent->new
98 local => $principal,
99 localroot => dirname $principal,
101 for my $passthrough (qw(remoteroot verbose)) {
102 if (my $opt = $Opt{$passthrough}) {
103 $recc->$passthrough($opt);
106 my $root = $recc->localroot;
107 die "Alert: Root not defined, giving up" unless defined $root;
108 my $prf = $recc->principal_recentfile;
109 my $filenameroot = $prf->filenameroot;
110 my $serializer_suffix = $prf->serializer_suffix;
111 my $ignore_rx = qr((?x:
112 ^ \Q$filenameroot\E (?: - [0-9]*[smhdWMQYZ] \Q$serializer_suffix\E (?: \.lock (?: /.* )? )? | \.recent ) \z
115 my %diskfiles;
116 my $i;
117 my $last_verbosity = 0;
118 $|=1;
119 if ($Opt{verbose}) {
120 print "\n";
122 find({
123 wanted => sub {
124 my @lstat = lstat $_;
125 return unless -l _ or -f _;
126 my($reportname) = $File::Find::name =~ m{^\Q$root\E/*(.*)};
127 return if $reportname =~ $ignore_rx;
128 $i++;
129 if ($Opt{verbose} && time - $last_verbosity > 0.166666) {
130 printf "\r%8d files and symlinks checked on disk ", $i;
131 $last_verbosity = time;
133 $diskfiles{$File::Find::name} = $lstat[9];
135 no_chdir => 1,
137 $root
139 if ($Opt{verbose}) {
140 printf "\r%8d files and symlinks checked on disk\n", $i;
142 $i = 0;
144 if ($Opt{verbose}) {
145 print "\rChecking index";
147 my @newsargs = ();
148 if ($Opt{verbose}) {
149 @newsargs =
150 (callback => sub {
151 $i = scalar @{shift;};
152 if (time - $last_verbosity > 0.166666) {
153 printf "\r%8d entries read from index ", $i;
154 $last_verbosity = time;
158 my $indexfiles = $recc->news(@newsargs);
159 if ($Opt{verbose}) {
160 printf "\r%8d entries read from index\n", scalar @$indexfiles;
162 my %seen;
163 my %indexfiles = map {
164 ("$root/$_->{path}"=>$_->{epoch})
165 } grep {
166 defined $_->{path} &&
167 !$seen{$_->{path}}++
168 && $_->{type} eq "new"
169 } @$indexfiles;
170 my @missing_rfrfiles;
171 for my $rf (@{$recc->recentfiles}) {
172 my $rfrfile = $rf->rfile;
173 unless (-e $rfrfile) {
174 push @missing_rfrfiles, $rfrfile;
177 if (@missing_rfrfiles) {
178 warn "Warning: missing index files @missing_rfrfiles\n";
180 if ($Opt{verbose}) {
181 printf "\r%8d file objects found in index\n", scalar keys %indexfiles;
183 my $sprintfd = length(max scalar @$indexfiles, scalar keys %diskfiles);
184 warn sprintf(
185 "diskfiles: %*d\n".
186 "indexfiles: %*d\n",
187 $sprintfd, scalar keys %diskfiles,
188 $sprintfd, scalar keys %indexfiles,
190 my @diskmisses = sort { $indexfiles{$b} <=> $indexfiles{$a} } grep { ! exists $diskfiles{$_} } keys %indexfiles;
191 my @indexmisses = sort { $diskfiles{$a} <=> $diskfiles{$b} } grep { ! exists $indexfiles{$_} } keys %diskfiles;
192 warn sprintf(
193 "missing on disk: %*d\n".
194 "missing in index: %*d\n",
195 $sprintfd, scalar @diskmisses,
196 $sprintfd, scalar @indexmisses,
198 $DB::single++;
199 my $rf = $recc->principal_recentfile;
200 my $last_aggregate_call = time;
201 my @batch;
202 for my $dm (@diskmisses) {
203 if (0) {
204 } elsif ($Opt{"dry-run"}) {
205 if ($Opt{remoteroot}) {
206 warn "Would fetch $dm\n";
207 } else {
208 warn "Would remove from indexfile $dm\n";
210 } elsif ($Opt{remoteroot}) {
211 my $relative = substr $dm, 1 + length $root;
212 $rf->get_remotefile($relative);
213 } else {
214 warn "Removing from indexfile: $dm\n";
215 push @batch, {path => $dm, type => "delete"};
216 #$rf->update($dm,"delete");
217 #if (time > $last_aggregate_call + $rf->interval_secs) {
218 # warn "Aggregating\n";
219 # $rf->aggregate;
220 # $last_aggregate_call = time;
224 for my $im (@indexmisses) {
225 if ($Opt{"dry-run"}) {
226 if ($Opt{remoteroot}) {
227 warn "Would remove $im\n";
228 } else {
229 warn "Would add to indexfile $im\n";
231 } elsif ($Opt{remoteroot}) {
232 my $ans;
233 if ($Opt{yes}) {
234 warn "Going to unlink '$im'\n";
235 $ans = "y";
236 } else {
237 $ans = prompt "Unlink '$im'?", "y";
239 if ($ans =~ /^y/i) {
240 unlink $im or die "Could not unlink '$im': $!";
242 } else {
243 warn "Adding to indexfile: $im\n";
244 my @stat = stat $im or next;
245 push @batch, {epoch => $stat[9], path => $im, type => "new"};
246 #$rf->update($im,"new");
247 #if (time > $last_aggregate_call + $rf->interval_secs) {
248 # warn "Aggregating\n";
249 # $rf->aggregate;
250 # $last_aggregate_call = time;
254 unless ($Opt{"dry-run"}) {
255 if (@batch) {
256 $rf->batch_update(\@batch);
260 __END__
263 # Local Variables:
264 # mode: cperl
265 # coding: utf-8
266 # cperl-indent-level: 4
267 # End: