18 my $optpod = <<'=back';
26 Not a loop, just once.
28 =item B<--storefile=s>
30 Defaults to C<metabase-log.txt>.
40 It's a cronjob but it runs forever until killed. Later starting
41 cronjobs give up quickly when they do not get the lock.
45 Read the logfile from metabase.cpantesters.org, write the lines we
46 have not yet seen into file specified as C<storefile>. In the
47 following we collect a few facts we believe to know.
49 An example of a line in log.txt would be
51 [2012-10-06T16:30:00Z] [Chris Williams (BINGOS)] [pass] [RUBYKAT/Dist-Zilla-Plugin-GitFmtChanges-0.005.tar.gz] [i86pc-solaris-thread-multi-64int] [perl-v5.14.2] [12d49d6e-0fd3-11e2-bdcc-373e3b6b8117] [2012-10-06T16:30:00Z]
53 That same report in cpanstats.db would be
55 sqlite> select * from cpanstats where id=23763028;
56 23763028|12d428d4-0fd3-11e2-bdcc-373e3b6b8117|pass|201210|xxxx@xxxxxxx.xxxxxxxxx.xx.xx ((xxxxxxx x))|Dist-Zilla-Plugin-GitFmtChanges|0.005|i86pc-solaris-thread-multi-64int|5.14.2|solaris|2.11|201210061630|2
58 What is this relation between the two GUIDs? Part 2..5 are equal, part
59 1 is different, that seems to be a rule.
61 | log.txt | barbie | id |
62 | 12d49d6e-0fd3-11e2-bdcc-373e3b6b8117 | 12d428d4-0fd3-11e2-bdcc-373e3b6b8117 | 23763028 |
63 | 2012-10-06T16:30:00Z | 201210061630 | |
64 | 12519d2e-0fd3-11e2-8e23-9a24f233959e | 12518654-0fd3-11e2-8e23-9a24f233959e | 23763027 |
65 | 2012-10-06T16:29:59Z | 201210061629 | |
66 | 117831ba-0fd3-11e2-9e90-da8a1df5d9f4 | 1177fb5a-0fd3-11e2-9e90-da8a1df5d9f4 | 23763026 |
67 | 2012-10-06T16:29:59Z | 201210061629 | |
69 This means we have no common id. But:
71 | field | type | comment | useful? |
72 |----------+---------+--------------------------------+-----------------------------|
73 | id | INTEGER | invented | |
74 | guid | TEXT | similar but different | maybe but distrusted |
75 | state | TEXT | same | yes |
76 | postdate | TEXT | only month | |
77 | tester | TEXT | different via some db mapping | |
78 | dist | TEXT | cut out of fully composed name | yes, but not fully reliable |
79 | version | TEXT | " | yes, but not fully reliable |
80 | platform | TEXT | same | yes |
81 | perl | TEXT | cut out of longer variant | yes, but not fully reliable |
82 | osname | TEXT | missing in log.txt | |
83 | osvers | TEXT | missing in log.txt | |
84 | fulldate | TEXT | same but seconds cut off | yes (at least very likely) |
85 | type | INTEGER | missing in log.txt | |
87 So we must make up a compound key that is uniq most of the time but
88 sometimes not. The key is comprised of the columns that we considered
89 useful above. The new table shall just collect compound keys and
90 matching guids and a third column tells the source of the guid. Then
91 we can find 1:1 mappings and N:M mappings. A bug is a N:M mapping with
94 I believe we're better off when refill-cpanstatsdb.pl logs what it
105 use File
::Basename
();
107 use File
::ReadBackwards
();
110 use Pod
::Usage
qw(pod2usage);
111 use LWP
::UserAgent
();
113 use Time
::HiRes
qw(sleep time);
119 my @opt = $optpod =~ /B<--(\S+)>/g;
127 pod2usage
(0) if $Opt{help
};
130 my(@stat) = stat __FILE__
;
141 my $storefile = $Opt{storefile
} ||= "metabase-log.txt";
142 my $proclogfile = "$storefile-proc.log";
143 File
::Path
::mkpath File
::Basename
::dirname
$storefile;
144 my($cached_time,$cached_ts) = (0,"");
148 open my $fh, ">>", $proclogfile or die "Could not open >> '$proclogfile': $!";
149 $message =~ s/\s*\z/\n/;
150 if ( time != $cached_time ) {
155 $cached_ts = sprintf "%04d%02d%02dT%02d%02d%02d", @t[5,4,3,2,1,0];
157 print $fh "$cached_ts:$message";
160 my $url = "http://metabase.cpantesters.org/tail/log.txt";
161 my $fetchfile = "$storefile.mirrorer";
162 my $lockfile = "$fetchfile.LCK";
163 my $self_mtime = self_mtime
;
164 my $tltj_statusfile = "$ENV{HOME}/var/metabase-log/log-as-json.status";
167 # we are confident that we can read the TLTJ status file now without
168 # locking because we govern the call to TLTJ;
169 my $json = JSON
::XS
->new->pretty(1)->utf8(1)->indent(1)->space_before(1)->space_after(0);
170 my $status_content = do { open my $fh, $tltj_statusfile or mydie
"could not open: $!"; local $/; <$fh>};
171 my $status = $status_content ?
$json->decode($status_content) : {};
172 mylog
sprintf "Info[%s]: Found tltj status tell value: '%s', size storefile %s", $$, $status->{tell}, -s
$storefile;
176 use Fcntl
qw( :flock :seek O_RDONLY O_RDWR O_CREAT );
178 unless (open $lfh, "+<", $lockfile) {
179 unless ( open $lfh, ">>", $lockfile ) {
180 mylog
"ALERT: Could not open >> '$lockfile': $!";
183 unless ( open $lfh, "+<", $lockfile ) {
184 mylog
"ALERT: Could not open +< '$lockfile': $!";
188 if (flock $lfh, LOCK_EX
|LOCK_NB
) {
189 mylog
"Info[$$]: Got the lock, continuing";
191 mylog
"FATAL[$$]: lockfile '$lockfile' locked by a different process; cannot continue";
194 my $tltj = tltj_status
();
195 my $tell = $tltj->{tell};
196 my $can_continue = $tell == -s
$storefile;
197 unless ($can_continue) {
198 require File
::ReadBackwards
;
199 my $bw = File
::ReadBackwards
->new( $storefile ) or
200 mydie
"can't read '$storefile': $!" ;
201 while( defined( my $log_line = $bw->readline ) ) {
202 if ($log_line eq "...\n") {
209 $can_continue = $tell == -s
$storefile;
210 if ($tell == -s
$storefile) {
211 # we do not lock the status file because the whole system only
212 # works if neither $storefile gets altered nor the statusfile; but
213 # we do have the $lfh on the other lockfile and that should
214 # guarantee standstill; and should $tell ever not be equal to the
215 # size of the $storefile, then we should not get past this point.
217 $t->time_separator("");
218 $t->date_separator("");
219 my $ts = sprintf "%sz", $t->datetime;
220 unless (rename $storefile, "$storefile-$ts") {
221 mydie
"Could not rename $storefile to $storefile-$ts: $!";
223 mylog
("Info: Renamed $storefile to $storefile-$ts");
224 unless (0 == system "tail -n 10000 $storefile-$ts > $storefile") {
225 mydie
"Could not rewrite $storefile";
227 mylog
("Info: Left-truncated $storefile");
228 my $json = JSON
::XS
->new->pretty(1)->utf8(1)->indent(1)->space_before(1)->space_after(0);
232 tell => -s
$storefile,
235 unless (open $sfh, ">", $tltj_statusfile) {
236 mydie
"Could not open > $tltj_statusfile: $!";
238 print $sfh $json->encode($status);
240 or mydie
"Could not close $tltj_statusfile: $!";
242 mydie
sprintf "FATAL: tell from %s and size of %s must be equal, but: tell='%s', storefilesize '%s'",
243 $tltj_statusfile, $storefile, $tell, -s
$storefile;
245 my $ua = LWP
::UserAgent
->new;
248 my $bw = File
::ReadBackwards
->new($storefile);
250 mylog
("Could not read backwards '$storefile': $!");
254 while (defined(my $line = $bw->readline)) {
255 last if ++$cnt >= 2001; # arbitrary
259 my $time_to_leave = time + 21600; # 86400; # arbitrary
261 if ( self_mtime
> $self_mtime ) {
262 mylog
(sprintf "%s has been updated, good bye", __FILE__
);
264 } elsif (time > $time_to_leave) {
265 mylog
(sprintf "after running for that long, good bye");
269 my $time_to_repeat = $t+15; # arbitrary
270 #$ua->mirror($url, $fetchfile); # fyi: if this fails, leaves a $fetchfile-$$ behind
271 # XXX srt - quickfix against varnish cache
272 $ua->mirror($url."?".time(), $fetchfile); # fyi: if this fails, leaves a $fetchfile-$$ behind
273 if (open my $fh, $fetchfile) {
274 # [2010-10-29T21:05:10Z] [Chris Williams (BINGOS)] [pass] [RIZEN/Chat-Envolve-0.0100.tar.gz] [i386-dragonfly-64int] [perl-v5.10.0] [373b43ee-e3a0-11df-9e2d-9e9e6e8696e0] [2010-10-29T21:05:10Z]
275 my @qr = (qr/\[([^\]]*)\]/) x
8;
277 my %oldseen = map {($_ => 1)} @old; # @old has all lines
278 # (unsorted) that were in
279 # the previous iteration
284 LINE: while (<$fh>) {
285 my($date,$author,$result,$path,$arch,$perl,$uuid,$ts) = my @f = /@qr/ or next;
287 $seen{$_}++; # %seen contains all current ones
288 if (exists $oldseen{$_}) {
292 push @new, $_; # @new contains only lines that we did not
297 unless ( open $fh, ">>", $storefile ) {
298 mylog "Could
not open >>'$storefile': $!";
302 mylog sprintf "INFO
[%s]: Lines
read: %d, Newlines
read: %d", $$, $cnt, scalar @new;
303 my $newlines_written = 0;
306 unless ( open $fh, ">>", $storefile ) {
307 mylog "Could
not open >>'$storefile': $!";
312 # mylog "WRITTEN
: $line"; # very noisy
315 close $fh or mylog "Could
not close >>'$storefile': $!";
316 0 == system "$^X
$ENV{HOME
}/src/cpan
-testers
-matrix
/bin/tail
-log-to
-json
.pl
-o
$ENV{HOME
}/var/metabase
-log/log-as-json -logfile $ENV{HOME}/var
/metabase-log/log-as
-json
.log -statusfile
$tltj_statusfile $ENV{HOME
}/var/metabase
-log/metabase.log 2>$ENV{HOME}/var
/metabase-log/log-as
-json
.err
" or mylog("ALERT
: error running tail
-log-to
-json
");
319 mylog sprintf "INFO
[%s]: Newlines written
: %d, Keeping
in mem records
: %d", $$, $newlines_written, scalar @old;
322 if ( my @stat = stat $fetchfile ) {
323 my $keepcalm = $stat[9] + 30; # arbitrary
324 $time_to_repeat = $keepcalm if $keepcalm > $time_to_repeat;
325 if ( time - $keepcalm > 30 ) {
326 $time_to_repeat += 18; # they are late, do not try too often
329 $time_to_repeat += 18 unless $time_to_repeat > 150; # something unusual has happened
331 my $sleep = $time_to_repeat - time;
333 mylog "INFO
[$$]: (sleeping
$sleep)";