Introduce a 'games this month' column
[asr.git] / update.pl
blob199311e2f8a8e55d50e4eea094381e980935ffd8
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4 use Time::Local;
5 use POSIX qw/strftime/;
6 use LWP::UserAgent;
7 use lib qw(/home/pasky/WWW/asr);
8 use ASR;
10 our $ctx = ASR::Ladder->new;
12 our $delay = 5;
14 our ($lwp_response, $page_fail);
16 $|=1; # Turn off output buffering.
19 our ($csec, $cmin, $chr, $cday, $cmon, $cyear) = gmtime();
20 $cyear += 1900;
23 print "\n+++ Starting update job at $cyear-$cmon-$cday $chr:$cmin:$csec\n\n";
26 # Check games of new players one week back. We should do global updates way
27 # more often than that anyway.
28 our $deftime = time() - 7*86400;
30 # ======================
31 # Decide what to update!
32 # ======================
34 my $mode = $ARGV[0] || "all";
36 my @games;
37 our ($player, $uall) = ({}, 0);
38 if ($mode eq 'all') {
39 $delay = $delay || 10;
40 @games = allgames();
41 $uall = 1;
43 } elsif ($mode eq 'player') {
44 $player = $ctx->db_selectrow("SELECT *, UNIX_TIMESTAMP(last_game) AS last_game FROM player_last_game WHERE nick = ?", {}, $ARGV[1]);
45 if ($player) {
46 @games = playergames($player);
47 } else {
48 print $ctx->p("Invalid player nick: ".$ARGV[1]);
52 @games = sort { $a->{time} <=> $b->{time} } @games;
53 print $ctx->p("I have ".scalar(@games)." game(s) to investigate.\n");
55 # For each potentially new game, run the game parser on it.
56 foreach my $game (@games) {
57 parsegame($game);
61 # =================================================
62 # Helper routines to bulk update many KGS Usernames
63 # =================================================
65 sub allgames {
66 my @update;
67 my $q = $ctx->{dbi}->prepare("SELECT *, UNIX_TIMESTAMP(last_game) AS last_game FROM player_last_game ORDER BY last_update ASC");
68 $q->execute();
69 while (my $r = $q->fetchrow_hashref) {
70 push @update, playergames($r);
72 return @update;
75 # =================================
76 # Update an individual KGS Username
77 # =================================
79 sub playergames {
80 my ($player) = @_;
81 print $ctx->p("Updating KGS user name $player->{nick}.\n");
82 my $rank;
84 # Has the user been updated recently?
85 my $time = $player->{last_game} || $deftime;
86 #if (time() - ($player->{last_update} || 0) < 3600) {
87 # print $ctx->p("Player $player->{nick} was updated too recently; skipping.\n");
88 # return;
91 # Convert time to a format we can plug into the archives.
92 my ($sec,$min,$hr,$day,$mon,$year) = gmtime $time;
93 $year += 1900;
95 # Now get a list of all games spanning all months since the last update.
96 # We go back 5 hours before the last game in case we missed an ongoing
97 # game.
98 undef $page_fail;
99 my @games;
100 while(($year < $cyear) || ($year == $cyear && $mon <= $cmon)) {
101 push @games, getnewgames($year, $mon + 1, $player->{nick}, $time - 3600 * 5, \$rank);
102 # In case a game was playing during prev update, subtract 5 hours.
103 $mon++;
104 if ($mon>11) {
105 $mon = 0;
106 $year++;
109 # Finally, use the rank we found from the system to update the player.
110 if ($page_fail) {
111 $ctx->db_do("UPDATE player SET last_update = NOW() WHERE id = ?", {}, $player->{pid});
112 } else {
113 $ctx->db_do("UPDATE player SET rank = ?, last_update = NOW() WHERE id = ?", {}, $rank, $player->{pid});
116 return @games;
119 # ====================================================================
120 # Get a list of games from the KGS Archives page of a given user/month
121 # ====================================================================
123 # $time is the time of the oldest game to include. $rankref will be updated
124 # with the most recent rank.
126 sub getnewgames {
127 my ($year, $month, $user, $time, $rankref) = @_;
128 print $ctx->p("Getting page http://www.gokgs.com/gameArchives.jsp?user=$user&year=$year&month=$month...\n");
129 $_ = get("http://www.gokgs.com/gameArchives.jsp?user=$user&year=$year&month=$month");
130 if (!$_) {
131 print $ctx->p("Failed to get page: ".$lwp_response->status_line);
132 $page_fail = 1;
133 sleep($delay) if $delay;
134 return;
136 sleep($delay) if $delay;
137 my @games;
138 my $first = 1;
139 while (
140 m#<tr><td><a href="([^"]*)">Yes</a></td><td><a[^>]*>(\S+(?:\s+\[\S+\])?)(?:</a><br/?><a[^>]*>(\S+(?:\s+\[\S+\])?))?</a></td><td><a[^>]*>(\S+(?:\s+\[\S+\])?)(?:</a><br/?><a[^>]*>(\S+(?:\s+\[\S+\])?))?</a></td><td>19[^<]*19[^<]*</td><td>(\d+)/(\d+)/(\d+)\s+(\d+):(\d+)\s+([AP])M</td><td>(?:Free|Ranked|Simul)</td><td>([BW])\+[^<]*?</td></tr>#gs) {
141 # Order:
142 # URL
143 # W player 1
144 # (W player 2)
145 # B player 1
146 # (B player 2)
152 # AM/PM
153 # Winner
154 #print $ctx->p("Test: $1 $2 $3 $4 $5 $6 $7 $8 $9 $10\n");
155 my $ctime = timeval($6,$7,$8,$9,$10,$11);
157 my $url = $1;
158 my $quser = quotemeta $user;
159 my ($white, $black) = ($2, $4);
160 my $won = $12;
161 my ($wrank, $brank);
162 $wrank = rank($1) if $white =~ s/\s+\[(.*)\]//;
163 $brank = rank($1) if $black =~ s/\s+\[(.*)\]//;
165 $$rankref = lc $user eq lc $white ? $wrank : $brank if $first;
166 undef $first;
168 if ($ctime < $time) {
169 print $ctx->p("Discarding $url (and anything after) since it's dated before update threshold (".strftime("%H:%M %d/%m/%Y", gmtime $time).").\n");
170 last;
173 push @games, { url => $url, time => $ctime };
175 print $ctx->p("Potential new ladder game: $url - ".strftime("%H:%M %d/%m/%Y", gmtime $ctime)."\n");
177 return @games;
180 # Convert kyu / dan into decimal rank.
181 sub rank {
182 return $1 if $_[0] =~ /(\d+)k/i;
183 return 1 - $1 if $_[0] =~ /(\d+)d/i;
184 return;
187 # =========================================================================
188 # Investigate a game found in the archives to establish if it's a clan game
189 # =========================================================================
191 sub parsegame {
192 my ($game) = @_;
194 print $ctx->p("Investigating game: $game->{url}\n");
196 $ctx->db_do("START TRANSACTION");
198 # Check if the game isn't being re-parsed...
200 my $r = $ctx->db_selectone("SELECT id FROM game WHERE url = ?", {}, $game->{url});
201 if ($r) {
202 # Error message not really an error.
203 print $ctx->p("I've already seen this game...\n");
204 return;
208 my $sgf = get($game->{url});
209 sleep(1) if $delay; # Don't need to wait 10 secs for these...
212 my $zurl = $game->{url}; $zurl =~ s#/#_#g;
213 my $f;
214 if (open $f, ">/tmp/asrgames/$zurl") {
215 print $f $sgf;
216 close $f;
221 # Get all of the info we're interested in out of the SGF file.
223 my $komi = 0;
224 $komi = $1 if ($sgf =~ /KM\[(-?\d+(?:\.\d+)?)\]/);
225 my $handi = 0;
226 $handi = $1 if ($sgf =~ /HA\[(\d+)\]/);
227 $handi = 1 if $handi == 0 && $komi < 1;
229 my $white = 'unknown';
230 $white = $1 if ($sgf =~ /PW\[([^\]]+)\]/);
231 my $black = 'unknown';
232 $black = $1 if ($sgf =~ /PB\[([^\]]+)\]/);
234 my (@white_decision, @black_decision);
235 my ($result, $result_by) = (0, "Unknown");
236 if ($sgf =~ /RE\[([^\]]+)\]/) {
237 my $temp = $1;
238 if (lc $temp eq 'jigo') {
239 $result = 0;
240 } elsif ($temp =~ /([BW])+(.*)/) {
241 $result = ($1 eq 'B') ? 1 : -1;
242 $result_by = $2;
246 $ctx->db_do("INSERT INTO game SET winner = ?, url = ?", {}, $result > 0 ? 'black' : 'white', $game->{url});
247 $game->{id} = $ctx->lastid;
249 # From now on we can check if this is ladder game:
251 my $whiterec = $ctx->db_selectrow("SELECT * FROM player WHERE nick = ?", {}, $white);
252 my $blackrec = $ctx->db_selectrow("SELECT * FROM player WHERE nick = ?", {}, $black);
253 if (not defined $whiterec or not defined $blackrec) {
254 print $ctx->p("This is not a ladder game...\n");
255 return;
258 my $rules = "";
259 #if ($sgf !~ /RU\[Japanese\]/) {
260 if ($sgf =~ /RU\[([^\]]+)\]/) {
261 # print $ctx->p("Not a ladder game: Bad ruleset.\n");
262 $rules .= "RU[$1]";
264 if ($sgf !~ /SZ\[([^\]]+)\]/) {
265 # print $ctx->p("Not a ladder game: Bad board size.\n");
266 $rules .= "SZ[$1]";
268 if ($sgf !~ /TM\[(0|[1-9]|[1-5]\d|60)\]OT\[[1-5]x10 byo-yomi\]/) {
269 print $ctx->p("Not a ladder game: Bad time settings.\n");
270 return;
272 #if ($sgf =~ /TM\[([^\]]+)\]/) {
273 # $rules .= "TM[$1]";
275 #if ($sgf =~ /OT\[([^\]]+)\]/ ) {
276 # $rules .= "OT[$1]";
278 print $ctx->p("Fits ladder settings.\n");
280 my @m = ($sgf =~ /([BW]L\[\d+(?:\.\d+)?\])/g);
281 my $moves = @m;
282 if ($moves < 30) {
283 print $ctx->p("Not a ladder game: Only $moves moves (needs 30).\n");
284 return;
287 my $tagmoves = is_ladder($sgf, $black, $white);
288 if (!defined $tagmoves) {
289 print $ctx->p("Is not a ladder game: No tag.\n");
290 return;
291 } elsif ($tagmoves > 30) {
292 print $ctx->p("Is not a ladder game: Tag said too late (after move $tagmoves).\n");
293 return;
294 } else {
295 print $ctx->p("Tag OK: Said after move $tagmoves.\n");
298 # Now perform the necessary database updates.
300 my $black_score = $blackrec->{score};
301 my $white_score = $whiterec->{score};
302 if ($result > 0) {
303 $black_score = $ctx->recompute_score($black_score, $white_score);
304 $ctx->db_do("UPDATE player SET score = ? WHERE id = ?", {}, $black_score, $blackrec->{id});
305 } else {
306 $white_score = $ctx->recompute_score($white_score, $black_score);
307 $ctx->db_do("UPDATE player SET score = ? WHERE id = ?", {}, $white_score, $whiterec->{id});
310 $ctx->db_do("INSERT INTO ladder_game SET id = ?, black = ?, white = ?, black_score_old = ?, white_score_old = ?, black_score_new = ?, white_score_new = ?", {}, $game->{id}, $blackrec->{id}, $whiterec->{id}, $blackrec->{score}, $whiterec->{score}, $black_score, $white_score);
312 $ctx->db_do("COMMIT");
315 # Ensure that the ladder tag was said; returns moves before tag or undef.
317 sub is_ladder {
318 my ($sgf, $black, $white) = @_;
319 if ($sgf =~ /^(.*?)($black|$white)\s+\[[^\\]*\\\]:[^\n]*\b(?i:(?:ASR *ladd*er|add you to the ladder))\b/s) {
320 my @m = ($1 =~ /([BW]L\[\d+(?:\.\d+)?\])/g);
321 my $moves = @m;
322 return $moves;
324 return;
327 sub timeval {
328 my ($mo, $d, $y, $h, $mi, $p) = ($_[0] - 1, $_[1], $_[2]+2000, $_[3], $_[4], $_[5] eq 'P');
329 if ($p && $h < 12) {
330 $h += 12;
331 } elsif (!$p && $h == 12) {
332 $h = 0;
334 return timegm(0, $mi, $h, $d, $mo, $y);
337 sub get {
338 my $ua = LWP::UserAgent->new;
339 $ua->timeout(10);
340 $ua->env_proxy;
342 $lwp_response = $ua->get($_[0]);
344 if ($lwp_response->is_success) {
345 return $lwp_response->content; # or whatever
347 return undef;