[dfa/Cursor] Gut the monolithic lexer cache, as it is now actually a pessimization.
[pugs.git] / util / get-irc-logs.pl
bloba417fde7c1a7bdd3a3c5eb9509b9e8b4c5a0da27
1 #!/usr/bin/env perl
3 # this script downloads log files from colabti.de.
5 use strict;
6 use warnings;
8 use Getopt::Long;
9 use LWP::UserAgent;
10 #use LWP::UserAgent::Cached; # Debug purpose
12 my ($all, $help, $channel);
13 GetOptions(
14 'channel=s' => \$channel,
15 'help' => \$help,
16 'all' => \$all,
17 ) or help();
19 my $out_dir = shift || '.';
21 if ($help) {
22 help();
25 $out_dir ||= '.';
26 mkdir $out_dir if !-e $out_dir;
28 my $last_log;
29 if (!$all) {
30 my @existing_files = sort glob "$out_dir/*.log";
31 $last_log = pop @existing_files;
34 $channel ||= 'perl6';
35 $channel =~ s/^\#//;
37 my $ua = LWP::UserAgent->new;
38 #my $ua = LWP::UserAgent::Cached->new; # Debug purpose
39 $ua->env_proxy;
41 my $base_url = "http://colabti.de/irclogger/irclogger_logs";
42 warn " info: getting $base_url/$channel...\n";
43 my $res = $ua->get("$base_url/$channel");
44 #warn "Got!";
45 if ($res->is_success) {
46 my %links = extract_links($res->content);
47 while (my ($name, $url) = each %links) {
48 my $local_file = "$out_dir/$name";
49 if (!$all and -e $local_file and index($last_log, $name) == -1) {
50 warn " $local_file already exists, skipped.\n";
51 next;
52 } else {
53 warn "generating $local_file...\n";
54 $ua->mirror("http://colabti.de/$url", $local_file);
58 else {
59 die $res->status_line;
62 sub help {
63 print <<_EOC_;
64 Usage:
66 $0 <out-dir>
67 $0 --all tmp
69 Options:
70 --channel <name> Specify the IRC channel. Defaults to #perl6.
71 --all Download very log file even if there's one in out-dir.
72 --help Show this help.
73 _EOC_
74 exit(0);
77 sub extract_links {
78 my $html = shift;
79 my %links;
80 while ($html =~
81 m{"/irclogger/irclogger_log/$channel\?date=(\d+-\d+-\d+),\w+;raw=on"}g) {
82 my ($name, $url) = ("$channel-$1.log", $&);
83 $url =~ s/^"|"$//g;
84 $links{$name} = $url;
86 %links;