21 my @opt = <<'=back' =~ /B<--(\S+)>/g;
25 DSN. Defaults to C<dbi:SQLite:dbname=$workdir/cpanstats.db> where
26 $workdir comes from the configuration.
34 Defaults to 360000. Number of records to transfer.
40 Just the cpanstats table. We consider already written records correct
41 and do not repeat them.
43 2013-11-16 akoenig : without throttling we seem to copy 100 records
44 per second. That would mean for 36M records: more than 4 days.
46 After a first run for 360000 records, it turned out to be slower, this
47 run took 4800 seconds. The next run for 2M records did t in 21400
48 seconds, so we were faster again.
54 use lib
"$FindBin::Bin/../lib";
60 use File
::Basename
qw(dirname);
61 use File
::Path
qw(mkpath);
66 use Hash
::Util
qw(lock_keys);
69 lock_keys
%Opt, map { /([^=|!]+)/ } @opt;
78 $SIG{INT
} = $SIG{TERM
} = sub { my $sig = shift; warn "Caught $sig\n"; $SIGNAL=1 };
81 use Time
::HiRes
qw(sleep time);
82 use CPAN
::Blame
::Config
::Cnntp
;
83 my($workdir,$cpan_home,$ext_src);
85 $workdir = File
::Spec
->catdir
86 ($CPAN::Blame
::Config
::Cnntp
::Config
->{solver_vardir
},
88 $cpan_home = $CPAN::Blame
::Config
::Cnntp
::Config
->{cpan_home
};
89 $ext_src = $CPAN::Blame
::Config
::Cnntp
::Config
->{ext_src
};
93 $Opt{db
} ||= "dbi:SQLite:dbname=$workdir/cpanstats.db";
95 my $sldbi = DBI
->connect ($Opt{db
}); # return a dbi handle
96 my $pgdbi = DBI
->connect ("dbi:Pg:dbname=analysis");
98 # $pgdbi->do("create table cpanstats (
99 # id INTEGER PRIMARY KEY,
101 # state varchar(255),
102 # postdate varchar(255),
103 # tester varchar(255),
105 # version varchar(255),
106 # platform varchar(255),
108 # osname varchar(255),
109 # osvers varchar(255),
110 # fulldate varchar(255),
114 my $slsth = $sldbi->prepare("SELECT max(id) from cpanstats");
116 my($slmaxid) = $slsth->fetchrow_array();
117 my $pgsth = $pgdbi->prepare("SELECT max(id) from cpanstats");
119 my($pgmaxid) = $pgsth->fetchrow_array();
121 warn "slmaxid[$slmaxid] pgmaxid[$pgmaxid] opt-max[$Opt{max}]\n";
122 die "Nothing to copy" if $pgmaxid >= $slmaxid;
124 $slsth = $sldbi->prepare("SELECT id,guid,state,postdate,tester,dist,version,platform,perl,osname,osvers,fulldate,type from cpanstats where id > ? order by id limit $Opt{max}");
125 $slsth->execute($pgmaxid);
126 $pgsth = $pgdbi->prepare("INSERT INTO cpanstats
127 (id,guid,state,postdate,tester,dist,version,platform,perl,osname,osvers,fulldate,type) values
128 (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
131 my $lastreport = my $starttime = time;
132 my $report_every = 1800;
133 ROW
: while (my(@row) = $slsth->fetchrow_array) {
134 unless ($pgsth->execute(@row)) {
135 warn sprintf "ALERT: error inserting row/id[%s]: %s\n", $row[0], $pgdbi->errstr;
140 sleep 0.01 unless $i%1000;
141 if ( time - $lastreport > $report_every ) {
142 warn sprintf "Running for %.1f hours, reached i[%d]\n", (time-$^T
)/3600, $i;
146 my $tooktime = time - $starttime;
147 warn "records transferred[$i] tooktime[$tooktime]\n";
151 # cperl-indent-level: 4