From f89ffb40d6073d29ebbb5fb4cea4fbaa46604428 Mon Sep 17 00:00:00 2001 From: Joe Moudrik Date: Thu, 18 Jul 2013 16:52:42 +0200 Subject: [PATCH] gostyle: the basic library, intitial commit. --- PATCH_pachi.diff | 37 ++ QUESTIONARE/lukas_podpera | 1 + QUESTIONARE/vladimir_danek | 1 + README | 32 +- TEST_FILES/games/1930-01-00a.sgf | 32 ++ TEST_FILES/games/1960-00-00f.sgf | 22 + TEST_FILES/games/1976-00-00e.sgf | 33 ++ TEST_FILES/games/1984-03-11d.sgf | 24 + TEST_FILES/games/1990-00-00b.sgf | 32 ++ TEST_FILES/test_bdist.sgf | 6 + TEST_FILES/test_bdist2.sgf | 1 + TEST_FILES/test_capture.sgf | 7 + TEST_FILES/test_cont.sgf | 4 + config.py | 6 + data_about_players.py | 289 +++++++++++ game_to_vec.py | 1000 ++++++++++++++++++++++++++++++++++++++ ipython_session.py | 15 + load_questionare.py | 44 ++ pachi.py | 217 +++++++++ result_file.py | 56 +++ utils/__init__.py | 9 + utils/colors.py | 21 + utils/db_cache.py | 331 +++++++++++++ utils/godb_models.py | 486 ++++++++++++++++++ utils/godb_session.py | 173 +++++++ utils/misc.py | 249 ++++++++++ utils/rank.py | 90 ++++ utils/sgf_load.py | 70 +++ utils/timer.py | 99 ++++ utils/utils.py | 225 +++++++++ 30 files changed, 3610 insertions(+), 2 deletions(-) create mode 100644 PATCH_pachi.diff create mode 100644 QUESTIONARE/lukas_podpera create mode 100644 QUESTIONARE/vladimir_danek create mode 100644 TEST_FILES/games/1930-01-00a.sgf create mode 100644 TEST_FILES/games/1960-00-00f.sgf create mode 100644 TEST_FILES/games/1976-00-00e.sgf create mode 100644 TEST_FILES/games/1984-03-11d.sgf create mode 100644 TEST_FILES/games/1990-00-00b.sgf create mode 100644 TEST_FILES/test_bdist.sgf create mode 100644 TEST_FILES/test_bdist2.sgf create mode 100644 TEST_FILES/test_capture.sgf create mode 100644 TEST_FILES/test_cont.sgf create mode 100644 config.py create mode 100755 data_about_players.py create mode 100644 game_to_vec.py create mode 100644 ipython_session.py create mode 100644 load_questionare.py create mode 100644 pachi.py create mode 100644 result_file.py create mode 100644 utils/__init__.py create mode 100644 utils/colors.py create mode 100644 utils/db_cache.py create mode 100644 utils/godb_models.py create mode 100644 utils/godb_session.py create mode 100644 utils/misc.py create mode 100644 utils/rank.py create mode 100644 utils/sgf_load.py create mode 100644 utils/timer.py create mode 100644 utils/utils.py diff --git a/PATCH_pachi.diff b/PATCH_pachi.diff new file mode 100644 index 0000000..042b6cb --- /dev/null +++ b/PATCH_pachi.diff @@ -0,0 +1,37 @@ +diff --git a/pattern.c b/pattern.c +index 6843be0..c4e8aed 100644 +--- a/pattern.c ++++ b/pattern.c +@@ -16,7 +16,7 @@ + + + struct pattern_config DEFAULT_PATTERN_CONFIG = { +- .bdist_max = 4, ++ .bdist_max = 10, + + .spat_min = 3, .spat_max = MAX_PATTERN_DIST, + .spat_largest = true, +@@ -30,7 +30,7 @@ pattern_spec PATTERN_SPEC_MATCH_DEFAULT = { + [FEAT_SELFATARI] = ~0, + [FEAT_ATARI] = ~0, + [FEAT_BORDER] = ~0, +- [FEAT_CONTIGUITY] = 0, ++ [FEAT_CONTIGUITY] = ~0, + [FEAT_SPATIAL] = ~0, + }; + +@@ -530,10 +530,11 @@ pattern_match(struct pattern_config *pc, pattern_spec ps, + } + } + +- if (PS_ANY(CONTIGUITY) && !is_pass(b->last_move.coord) +- && coord_is_8adjecent(m->coord, b->last_move.coord, b)) { ++ if (PS_ANY(CONTIGUITY) && !is_pass(b->last_move.coord) ){ ++ //if (coord_is_8adjecent(m->coord, b->last_move.coord, b)) { } ++ + f->id = FEAT_CONTIGUITY; +- f->payload = 1; ++ f->payload = coord_gridcular_distance(m->coord, b->last_move.coord, b); + (f++, p->n++); + } + diff --git a/QUESTIONARE/lukas_podpera b/QUESTIONARE/lukas_podpera new file mode 100644 index 0000000..f60b186 --- /dev/null +++ b/QUESTIONARE/lukas_podpera @@ -0,0 +1 @@ +{"interviewee_name":"Lukas Podpera","interviewee_key":"84jf9j","interviewee_feedback":"Lee Sedol and Yi Se-tol is the same player ;)","group_lists":[{"name":"Elementary set","list":[{"name":"Chen Yaoye","id":0,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"no"},{"name":"Cho Chikun","id":1,"style":{"te":"6","or":"4","ag":"9","th":"10"},"tags":"","skip":"no"},{"name":"Cho U","id":2,"style":{"te":"7","or":"9","ag":"9","th":"7"},"tags":"","skip":"no"},{"name":"Gu Li","id":3,"style":{"te":"7","or":"6","ag":"8","th":"7"},"tags":"","skip":"no"},{"name":"Ishida Yoshio","id":4,"style":{"te":"9","or":"1","ag":"3","th":"2"},"tags":"","skip":"no"},{"name":"Lee Sedol","id":5,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"no"},{"name":"Luo Xihe","id":6,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Ma Xiaochun","id":7,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"O Meien","id":8,"style":{"te":"2","or":"4","ag":"7","th":"8"},"tags":"","skip":"no"},{"name":"Otake Hideo","id":9,"style":{"te":"9","or":"6","ag":"2","th":"4"},"tags":"","skip":"no"},{"name":"Rui Naiwei","id":10,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Sakata Eio","id":11,"style":{"te":"6","or":"1","ag":"9","th":"8"},"tags":"","skip":"no"},{"name":"Takemiya Masaki","id":12,"style":{"te":"1","or":"2","ag":"7","th":"3"},"tags":"","skip":"no"},{"name":"Yi Ch'ang-ho","id":13,"style":{"te":"9","or":"7","ag":"8","th":"2"},"tags":"","skip":"no"},{"name":"Yi Se-tol","id":14,"style":{"te":"6","or":"10","ag":"10","th":"9"},"tags":"","skip":"no"},{"name":"Yoda Norimoto","id":15,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"}]},{"name":"More players (OPTIONAL)","list":[{"name":"Chen Zude","id":16,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Cho Tae-hyeon","id":17,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Fujisawa Hideyuki","id":18,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Go Seigen","id":19,"style":{"te":"2","or":"1","ag":"8","th":"6"},"tags":"","skip":"no"},{"name":"Hane Naoki","id":20,"style":{"te":"9","or":"5","ag":"4","th":"3"},"tags":"","skip":"no"},{"name":"Honinbo Dosaku","id":21,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Honinbo Shusaku","id":22,"style":{"te":"9","or":"1","ag":"7","th":"2"},"tags":"","skip":"no"},{"name":"Honinbo Shuwa","id":23,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Huang Longshi","id":24,"style":{"te":"5","or":"1","ag":"10","th":"10"},"tags":"","skip":"no"},{"name":"Jie Li","id":25,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Kato Masao","id":26,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Kato Shin","id":27,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Kobayashi Koichi","id":28,"style":{"te":"10","or":"2","ag":"3","th":"3"},"tags":"","skip":"no"},{"name":"Kong JieMiyazawa Goro","id":29,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Nie Weiping","id":30,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Shao Zhenzhong","id":31,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Suzuki Goro","id":32,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Takao Shinji","id":33,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Wu Songsheng","id":34,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Yamashita Keigo","id":35,"style":{"te":"2","or":"4","ag":"9","th":"6"},"tags":"","skip":"no"},{"name":"Yuki Satoshi","id":36,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"}]},{"name":"If you watch KGS games (OPTIONAL)","list":[{"name":"billlin","id":37,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"cashewnut","id":38,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"cloudnotes","id":39,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"himountain","id":40,"style":{"te":"6","or":"5","ag":"8","th":"8"},"tags":"","skip":"no"},{"name":"hjekshdf","id":41,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"kyoungnang","id":42,"style":{"te":"7","or":"8","ag":"9","th":"8"},"tags":"","skip":"no"},{"name":"MilanMilan","id":43,"style":{"te":"7","or":"8","ag":"9","th":"8"},"tags":"","skip":"no"},{"name":"pblshtwzrs","id":44,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"smartrobot","id":45,"style":{"te":"9","or":"5","ag":"7","th":"6"},"tags":"","skip":"no"},{"name":"xmianzhu","id":46,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"}]},{"name":"Additional","list":[{"name":"Iyama Yuta","id":47,"style":{"te":"8","or":"9","ag":"8","th":"6"},"tags":"","skip":"no"}]}]} \ No newline at end of file diff --git a/QUESTIONARE/vladimir_danek b/QUESTIONARE/vladimir_danek new file mode 100644 index 0000000..895c645 --- /dev/null +++ b/QUESTIONARE/vladimir_danek @@ -0,0 +1 @@ +{"interviewee_name":"","interviewee_key":"25b9fb","interviewee_feedback":"","group_lists":[{"name":"Elementary set","list":[{"name":"Chen Yaoye","id":0,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Cho Chikun","id":1,"style":{"te":"9","or":"2","ag":"7","th":"9"},"tags":"","skip":"no"},{"name":"Cho U","id":2,"style":{"te":"7","or":"3","ag":"8","th":"6"},"tags":"","skip":"no"},{"name":"Gu Li","id":3,"style":{"te":"8","or":"10","ag":"10","th":"8"},"tags":"","skip":"no"},{"name":"Ishida Yoshio","id":4,"style":{"te":"10","or":"1","ag":"2","th":"3"},"tags":"","skip":"no"},{"name":"Lee Sedol","id":5,"style":{"te":"8","or":"10","ag":"10","th":"8"},"tags":"","skip":"no"},{"name":"Luo Xihe","id":6,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Ma Xiaochun","id":7,"style":{"te":"9","or":"2","ag":"4","th":"3"},"tags":"","skip":"no"},{"name":"O Meien","id":8,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Otake Hideo","id":9,"style":{"te":"5","or":"1","ag":"3","th":"2"},"tags":"","skip":"no"},{"name":"Rui Naiwei","id":10,"style":{"te":"8","or":"5","ag":"9","th":"6"},"tags":"","skip":"no"},{"name":"Sakata Eio","id":11,"style":{"te":"9","or":"2","ag":"9","th":"9"},"tags":"","skip":"no"},{"name":"Takemiya Masaki","id":12,"style":{"te":"2","or":"4","ag":"8","th":"3"},"tags":"","skip":"no"},{"name":"Yi Ch'ang-ho","id":13,"style":{"te":"10","or":"8","ag":"2","th":"4"},"tags":"","skip":"no"},{"name":"Yi Se-tol","id":14,"style":{"te":"8","or":"9","ag":"9","th":"9"},"tags":"","skip":"no"},{"name":"Yoda Norimoto","id":15,"style":{"te":"9","or":"2","ag":"3","th":"3"},"tags":"","skip":"no"}]},{"name":"More players (OPTIONAL)","list":[{"name":"Chen Zude","id":16,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Cho Tae-hyeon","id":17,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Fujisawa Hideyuki","id":18,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Go Seigen","id":19,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Hane Naoki","id":20,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Honinbo Dosaku","id":21,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Honinbo Shusaku","id":22,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Honinbo Shuwa","id":23,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Huang Longshi","id":24,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Jie Li","id":25,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Kato Masao","id":26,"style":{"te":"4","or":"2","ag":"7","th":"9"},"tags":"","skip":"no"},{"name":"Kato Shin","id":27,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Kobayashi Koichi","id":28,"style":{"te":"10","or":"1","ag":"3","th":"2"},"tags":"","skip":"no"},{"name":"Kong JieMiyazawa Goro","id":29,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Nie Weiping","id":30,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Shao Zhenzhong","id":31,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Suzuki Goro","id":32,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Takao Shinji","id":33,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Wu Songsheng","id":34,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Yamashita Keigo","id":35,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Yuki Satoshi","id":36,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"}]},{"name":"If you watch KGS games (OPTIONAL)","list":[{"name":"billlin","id":37,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"cashewnut","id":38,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"cloudnotes","id":39,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"himountain","id":40,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"hjekshdf","id":41,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"kyoungnang","id":42,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"MilanMilan","id":43,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"pblshtwzrs","id":44,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"smartrobot","id":45,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"xmianzhu","id":46,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"}]},{"name":"Additional","list":[]}]} \ No newline at end of file diff --git a/README b/README index f3b9af6..b342381 100644 --- a/README +++ b/README @@ -1,8 +1,36 @@ Hello!! -This repository includes various tools used to analyze Go-players style. +This repository includes various tools used to analyze Go-players style. Whats here: + - a tool for database of Go games, whose data structures (like a list of games) + we use all over the place, read utils/godb_models.py and utils/godb_session.py + + - a simple wrapper for pachi to work with the games and scan the raw patterns comfortably + (see pachi.py) + + - stuff to work and form pattern vectors from raw patterns (see game_to_vec.py ) + and to export the vectors as .tab files which can be then used by the Orange Datamining + toolkit to fiddle around with some machine learning stuff, http://orange.biolab.si/ + +All the major files have examples at the bottom and are pretty well commented. -The readme will be updated sometimes soon. +REQUIREMENTS: + - if you want to scan the games, you need pachi go engine, instructions below + + - if you want to toy with machine learning stuff, you need the Orange datamining toolkit + get it from http://orange.biolab.si/ +------ +How to prepare pachi: +run this in some dir (assuming in a parent dir of gostyle - this dir) + +$ git clone git://repo.or.cz/pachi.git +$ cd pachi +$ git checkout jmhack +# this is a ugly way to make the contiguity feature say the real distance +# instead of a proximity flag it does by def. +$ patch pattern.c ../gostyle/PATCH_pachi.diff +$ make +# this is necessary, otw pachi segfaults +$ touch patterns.spat diff --git a/TEST_FILES/games/1930-01-00a.sgf b/TEST_FILES/games/1930-01-00a.sgf new file mode 100644 index 0000000..c270abc --- /dev/null +++ b/TEST_FILES/games/1930-01-00a.sgf @@ -0,0 +1,32 @@ +(;SZ[19]FF[3] +PW[Maeda Nobuaki] +WR[4d] +PB[Hashimoto Utaro] +BR[4d] +DT[Published 1930-01] +TM[8h] +KM[0] +RE[W+2] +;B[qd];W[qp];B[ce];W[dc];B[dq];W[ci];B[cn];W[oc];B[np];W[pn];B[mc];W[oe] +;B[pf];W[cd];B[de];W[fd];B[eh];W[cl];B[jc];W[cg];B[ef];W[gc];B[pc];W[pb] +;B[qb];W[kd];B[me];W[kc];B[of];W[jq];B[gq];W[mb];B[lb];W[md];B[ld];W[lc] +;B[nd];W[nc];B[md];W[nb];B[kb];W[jd];B[ic];W[id];B[od];W[qa];B[ma];W[oa] +;B[hc];W[hd];B[la];W[na];B[ja];W[rb];B[qc];W[rc];B[rd];W[sc];B[dj];W[cj] +;B[pk];W[jo];B[nn];W[pl];B[qq];W[pq];B[pp];W[op];B[po];W[oo];B[qo];W[oq] +;B[on];W[qn];B[ro];W[om];B[rn];W[ql];B[qr];W[no];B[mo];W[mn];B[nm];W[nl] +;B[mm];W[ml];B[lm];W[mp];B[ln];W[lp];B[ok];W[ol];B[ll];W[qk];B[qm];W[pm] +;B[rm];W[pi];B[lj];W[qg];B[qf];W[rg];B[be];W[fp];B[gp];W[cp];B[cq];W[dp] +;B[ep];W[eo];B[eq];W[do];B[bp];W[bo];B[bq];W[bn];B[dk];W[dl];B[bc];W[ig] +;B[ck];W[bk];B[bl];W[bm];B[oh];W[dh];B[fj];W[hr];B[cb];W[hb];B[ib];W[db] +;B[gr];W[mj];B[di];W[li];B[kj];W[mi];B[ph];W[qi];B[or];W[nq];B[pr];W[nr] +;B[el];W[mg];B[pe];W[cc];B[ba];W[ij];B[ik];W[hk];B[il];W[gj];B[dg];W[ch] +;B[fk];W[jj];B[hl];W[fg];B[eg];W[ki];B[lf];W[bf];B[oi];W[af];B[cf];W[ah] +;B[bd];W[em];B[fm];W[en];B[ir];W[iq];B[hq];W[jr];B[hs];W[ha];B[ia];W[fb] +;B[rl];W[rk];B[qh];W[rh];B[pj];W[qj];B[jk];W[jm];B[ko];W[in];B[kl];W[gi] +;B[ff];W[gk];B[gl];W[gf];B[kg];W[jg];B[fh];W[gg];B[jf];W[if];B[kh];W[jh] +;B[rf];W[ns];B[ed];W[ec];B[fe];W[ge];B[kp];W[kq];B[fo];W[fn];B[go];W[gm] +;B[fl];W[js];B[gn];W[is];B[gh];W[hh];B[hr];W[sl];B[sm];W[sk];B[je];W[ke] +;B[kf];W[mf];B[le];W[nf];B[ne];W[fi];B[ei];W[ap];B[aq];W[ao];B[ie];W[he] +;B[sg];W[sh];B[sf];W[os];B[ps];W[ae];B[lo];W[ad];B[sd];W[ac];B[ab];W[da] +;B[ca];W[dd];B[pg];W[ri];B[nh];W[mh];B[ng];W[lk];B[kk];W[mk];B[ee] +) diff --git a/TEST_FILES/games/1960-00-00f.sgf b/TEST_FILES/games/1960-00-00f.sgf new file mode 100644 index 0000000..ab2e00c --- /dev/null +++ b/TEST_FILES/games/1960-00-00f.sgf @@ -0,0 +1,22 @@ +(;SZ[19]FF[3] +PW[Kikuchi Yasuro] +PB[Rin Kaiho] +BR[5d] +DT[1960] +TM[2h] +KM[4.5] +RE[B+R] +;B[cp];W[pq];B[dc];W[pc];B[fq];W[cf];B[pe];W[qe];B[qf];W[qd];B[pf];W[nc] +;B[po];W[qm];B[qq];W[qj];B[pp];W[oq];B[rp];W[lq];B[oj];W[qh];B[oh];W[ck] +;B[cm];W[iq];B[cd];W[ef];B[ci];W[dj];B[di];W[ei];B[dg];W[eg];B[df];W[de] +;B[eh];W[fh];B[dh];W[ce];B[ee];W[ej];B[dd];W[fe];B[ed];W[bh];B[bi];W[cg] +;B[ag];W[bd];B[bc];W[ah];B[ai];W[bg];B[bl];W[af];B[fi];W[gh];B[gi];W[hh] +;B[hi];W[bk];B[ak];W[dm];B[dn];W[en];B[do];W[fm];B[ih];W[gf];B[dl];W[cl] +;B[bn];W[em];B[hg];W[gg];B[gd];W[ig];B[jh];W[if];B[hl];W[gk];B[hk];W[gp] +;B[gq];W[fp];B[eq];W[in];B[jm];W[ic];B[kf];W[nf];B[ne];W[lf];B[lg];W[mf] +;B[pd];W[od];B[qc];W[rc];B[qb];W[rb];B[rf];W[pb];B[ph];W[ke];B[rm];W[rl] +;B[rn];W[ok];B[nk];W[pj];B[ol];W[oi];B[nj];W[pi];B[nh];W[ni];B[mi];W[pk] +;B[nl];W[fc];B[fd];W[eb];B[gb];W[gc];B[fb];W[ec];B[hc];W[hb];B[hd];W[ga] +;B[ib];W[fa];B[jb];W[db];B[jd];W[kg];B[kh];W[kc];B[kd];W[lc];B[jc];W[ie] +;B[gb];W[cb];B[ha];W[cc];B[jg];W[ri];B[ff];W[fg];B[ge] +) diff --git a/TEST_FILES/games/1976-00-00e.sgf b/TEST_FILES/games/1976-00-00e.sgf new file mode 100644 index 0000000..4f08d7e --- /dev/null +++ b/TEST_FILES/games/1976-00-00e.sgf @@ -0,0 +1,33 @@ +(;SZ[19]FF[3] +PW[Takagi Shoichi] +WR[7d] +PB[Juergen Mattern] +BR[6d ama] +DT[1976] +TM[2.5h] +OH[3] +HA[3] +RE[Jigo] +AB[pd][dp][pp] +;W[dd];B[pj];W[nq];B[kp];W[qq];B[qp];W[pq];B[op];W[np];B[oq];W[or];B[pr] +;W[qr];B[nr];W[ps];B[no];W[mo];B[mp];W[mq];B[mr];W[lq];B[lp];W[kr];B[nn] +;W[cj];B[jd];W[gd];B[qd];W[md];B[mf];W[le];B[jf];W[lf];B[mg];W[lg];B[jh] +;W[li];B[lh];W[kh];B[mh];W[kg];B[ki];W[jg];B[ji];W[ig];B[hi];W[lj];B[ik] +;W[pn];B[ro];W[pk];B[qj];W[oj];B[qk];W[oi];B[ph];W[nm];B[pl];W[mn];B[om] +;W[mm];B[pm];W[jm];B[jo];W[jq];B[im];W[jl];B[in];W[il];B[hl];W[jk];B[hk] +;W[ok];B[kj];W[kk];B[lk];W[mk];B[ll];W[ni];B[he];W[hh];B[jj];W[ml];B[ge] +;W[fd];B[gg];W[gh];B[ef];W[hd];B[id];W[eh];B[cf];W[ce];B[hb];W[gb];B[if] +;W[ch];B[hg];W[ih];B[ee];W[ed];B[be];W[bd];B[ir];W[kq];B[de];W[cd];B[cl] +;W[fl];B[mi];W[mj];B[iq];W[do];B[co];W[cn];B[eo];W[dn];B[cp];W[en];B[fo] +;W[fn];B[jp];W[ns];B[bn];W[gm];B[cm];W[hm];B[ek];W[fk];B[ej];W[fj];B[nc] +;W[og];B[pg];W[go];B[gp];W[hp];B[jn];W[km];B[gq];W[ho];B[ip];W[nd];B[of] +;W[hq];B[hr];W[bo];B[bp];W[bm];B[ao];W[gr];B[fq];W[fr];B[eq];W[oc];B[pb] +;W[pc];B[qc];W[ob];B[qb];W[dl];B[ck];W[dk];B[bj];W[bi];B[dj];W[ci];B[dm] +;W[el];B[lc];W[mc];B[hc];W[kb];B[jb];W[kc];B[kd];W[lb];B[fb];W[gc];B[ga] +;W[eb];B[ea];W[db];B[rq];W[rr];B[sr];W[bf];B[bg];W[ae];B[da];W[ca];B[fa] +;W[cb];B[cg];W[jc];B[ic];W[ja];B[ks];W[ms];B[jr];W[lr];B[ib];W[ng];B[nf] +;W[fg];B[gf];W[bk];B[bl];W[aj];B[ln];W[kn];B[lo];W[lm];B[ld];W[js];B[is] +;W[nb];B[pi];W[oe];B[pe];W[eg];B[dg];W[ag];B[al];W[bh];B[ak];W[bj];B[pa] +;W[dh];B[df];W[em];B[am];W[fc];B[ha];W[ke];B[je];W[ol];B[qn];W[oa];B[hn] +;W[gn];B[rs];W[qs];B[sq];W[ss];B[er];W[rs] +) diff --git a/TEST_FILES/games/1984-03-11d.sgf b/TEST_FILES/games/1984-03-11d.sgf new file mode 100644 index 0000000..2e418a7 --- /dev/null +++ b/TEST_FILES/games/1984-03-11d.sgf @@ -0,0 +1,24 @@ +(;SZ[19]FF[3] +PW[Chang Su-yeong] +WR[6d] +PB[Seo Pong-su] +BR[8d] +DT[1984-03-11] +KM[5.5] +RE[B+R] +;B[pd];W[dc];B[pq];W[cp];B[ep];W[qo];B[qm];W[oo];B[np];W[lp];B[no];W[pm] +;B[ql];W[nn];B[mn];W[nm];B[lo];W[jp];B[qp];W[qn];B[ol];W[rp];B[rq];W[pp] +;B[qq];W[pl];B[pk];W[ok];B[pj];W[nk];B[op];W[po];B[cq];W[gq];B[dp];W[qh] +;B[oj];W[rj];B[om];W[pn];B[rk];W[qj];B[qk];W[qf];B[pg];W[pf];B[qg];W[rg] +;B[ph];W[of];B[re];W[rf];B[nc];W[mm];B[qi];W[ri];B[rh];W[sh];B[mi];W[lk] +;B[co];W[qh];B[pi];W[lf];B[ki];W[ce];B[qd];W[md];B[ne];W[nd];B[oe];W[ng] +;B[mg];W[mh];B[lh];W[mf];B[nh];W[nf];B[mc];W[kc];B[ld];W[kd];B[lc];W[ke] +;B[ec];W[dd];B[db];W[cb];B[hd];W[hf];B[fb];W[fd];B[ge];W[gf];B[cc];W[cd] +;B[ic];W[jb];B[fe];W[ed];B[ff];W[fg];B[eg];W[he];B[jf];W[je];B[fh];W[gg] +;B[df];W[gd];B[cf];W[ck];B[ci];W[ek];B[bb];W[bc];B[ca];W[cc];B[hb];W[gc] +;B[gb];W[ba];B[aa];W[ac];B[da];W[ib];B[hc];W[bf];B[bg];W[be];B[gh];W[eh] +;B[ig];W[ie];B[ei];W[dh];B[di];W[ef];B[ee];W[de];B[fl];W[mr];B[nr];W[mp] +;B[mo];W[cm];B[em];W[dg];B[fk];W[jk];B[kl];W[kk];B[iq];W[hp];B[lr];W[mq] +;B[ms];W[jq];B[jr];W[kr];B[kq];W[kp];B[ks];W[nq];B[oq];W[or];B[ns];W[lq] +;B[ir];W[kr];B[ls];W[ip];B[im] +) diff --git a/TEST_FILES/games/1990-00-00b.sgf b/TEST_FILES/games/1990-00-00b.sgf new file mode 100644 index 0000000..d8e5668 --- /dev/null +++ b/TEST_FILES/games/1990-00-00b.sgf @@ -0,0 +1,32 @@ +(;SZ[19]FF[3] +PW[Maeda Ryoji] +WR[5d] +PB[Ishida Yoshio] +BR[9d] +HA[8] +RE[B+68] +AB[dd][dj][dp][jd][jp][pd][pj][pp] +;W[qf];B[of];W[qc];B[qd];W[pc];B[oc];W[ob];B[nc];W[nb];B[mc];W[rd];B[re] +;W[rc];B[qe];W[qn];B[qo];W[pn];B[pl];W[qk];B[ql];W[nq];B[pq];W[ro];B[rp] +;W[po];B[np];W[qp];B[rq];W[pk];B[ok];W[qj];B[ol];W[pi];B[oj];W[rl];B[rm] +;W[rk];B[qm];W[rf];B[mp];W[fq];B[eq];W[fp];B[hp];W[iq];B[jq];W[dn];B[eo] +;W[er];B[dr];W[ep];B[dq];W[do];B[en];W[em];B[fn];W[dl];B[gm];W[gr];B[co] +;W[cn];B[bo];W[bn];B[br];W[bq];B[cq];W[ip];B[io];W[hq];B[fk];W[cj];B[ci] +;W[ck];B[bi];W[di];B[ej];W[jo];B[in];W[dh];B[cg];W[ko];B[lp];W[gl];B[fl] +;W[fm];B[hl];W[gn];B[gk];W[hn];B[im];W[og];B[qi];W[ri];B[ng];W[nf];B[pg] +;W[he];B[hc];W[je];B[ie];W[oh];B[ph];W[oe];B[pf];W[qh];B[nh];W[cf];B[dg] +;W[ee];B[ed];W[id];B[if];W[hd];B[ic];W[jc];B[kd];W[hf];B[gc];W[ke];B[ig] +;W[ld];B[kc];W[lc];B[jb];W[kb];B[jc];W[me];B[oi];W[kg];B[lf];W[le];B[mb] +;W[od];B[lb];W[pe];B[jh];W[kh];B[ki];W[cd];B[mf];W[ne];B[cc];W[de];B[bc] +;W[eg];B[eh];W[ei];B[fh];W[fi];B[fg];W[gi];B[ef];W[ji];B[hg];W[bp];B[cp] +;W[bs];B[cs];W[ar];B[as];W[lr];B[kq];W[oq];B[op];W[qr];B[qq];W[pr];B[aq] +;W[ln];B[li];W[rn];B[sm];W[nn];B[mn];W[mm];B[mo];W[jl];B[nm];W[nl];B[om] +;W[ii];B[km];W[bg];B[ch];W[fe];B[be];W[bf];B[ce];W[ae];B[bd];W[il];B[hm] +;W[df];B[gg];W[bj];B[ge];W[ff];B[eg];W[gf];B[gd];W[jm];B[jn];W[kn];B[hj] +;W[ai];B[ah];W[aj];B[bh];W[kl];B[hi];W[kj];B[ml];W[ll];B[lm];W[mm];B[ih] +;W[lj];B[jj];W[mk];B[rr];W[nr];B[kr];W[os];B[qg];W[rg];B[qi];W[rh];B[ir] +;W[mi];B[ij];W[lh];B[hr];W[gq];B[mg];W[lq];B[kp];W[rs];B[ji];W[hs];B[js] +;W[sl];B[go];W[fo];B[es];W[ao];B[ap];W[an];B[mh];W[kf];B[ni];W[mj];B[on] +;W[ik];B[hk];W[fn];B[ho];W[ek];B[fj];W[fr];B[mq];W[mr];B[nk];W[ml];B[jk] +;W[kk];B[na];W[pa];B[ls];W[ms];B[ks];W[sr];B[sq];W[qs];B[pi];W[ss] +) diff --git a/TEST_FILES/test_bdist.sgf b/TEST_FILES/test_bdist.sgf new file mode 100644 index 0000000..2204d8a --- /dev/null +++ b/TEST_FILES/test_bdist.sgf @@ -0,0 +1,6 @@ +(;GM[1]FF[4]AP[qGo:1.5.4]ST[1] +SZ[19]HA[0]KM[5.5]PW[White]PB[Black] + +;B[aa];W[ss];B[bb];W[rr];B[cc];W[qq];B[dd];W[pp];B[ee];W[oo] +;B[ff];W[nn];B[gg];W[mm];B[hh];W[ll];B[ii];W[kk] +) \ No newline at end of file diff --git a/TEST_FILES/test_bdist2.sgf b/TEST_FILES/test_bdist2.sgf new file mode 100644 index 0000000..ed0466b --- /dev/null +++ b/TEST_FILES/test_bdist2.sgf @@ -0,0 +1 @@ +(;GM[1]FF[4]SZ[19]AP[Kombilo];B[aa];W[pp];B[bb];W[pj];B[cc];W[jq];B[dd];W[dp];B[ee];W[od]) diff --git a/TEST_FILES/test_capture.sgf b/TEST_FILES/test_capture.sgf new file mode 100644 index 0000000..164b210 --- /dev/null +++ b/TEST_FILES/test_capture.sgf @@ -0,0 +1,7 @@ +(;GM[1]FF[4]AP[qGo:1.5.4]ST[1] +SZ[19]HA[0]KM[5.5]PW[White]PB[Black] + +;B[aa];W[ba];B[ab];W[bb];B[ac];W[bc];B[sa];W[ad];B[ra];W[qa] +;B[rb];W[qb];B[sb];W[qc];B[rc];W[rd];B[sc];W[sd];B[jb];W[ib] +;B[ic];W[jc];B[kc];W[kb];B[jd];W[jj] +) \ No newline at end of file diff --git a/TEST_FILES/test_cont.sgf b/TEST_FILES/test_cont.sgf new file mode 100644 index 0000000..5c105fa --- /dev/null +++ b/TEST_FILES/test_cont.sgf @@ -0,0 +1,4 @@ +(;GM[1]FF[4]AP[qGo:1.5.4]ST[1] +SZ[19]HA[0]KM[5.5]PW[White]PB[Black] + +;B[aa];W[ba];B[da];W[ga];B[ka];W[pa];B[pg];W[pn];B[hn];W[il];B[fk];W[fn]) diff --git a/config.py b/config.py new file mode 100644 index 0000000..58f81ad --- /dev/null +++ b/config.py @@ -0,0 +1,6 @@ +PACHI_DIR='../pachi/' +DB_FILE='./GODB.db' +QUESTIONARE_DIRECTORY = './QUESTIONARE' + +OUTPUT_DIR = './OUTPUT' + diff --git a/data_about_players.py b/data_about_players.py new file mode 100755 index 0000000..fa767ac --- /dev/null +++ b/data_about_players.py @@ -0,0 +1,289 @@ +#!/usr/bin/python + +import numpy +import math + +import load_questionare + +from config import QUESTIONARE_DIRECTORY + +def questionare_average(questionare_list, silent=False, tex=False, cnt_limit=1): +# Otake Hideo & $4.3 \pm 0.5$ & $3.0 \pm 0.0$ & $4.6 \pm 1.2$ & $3.6 \pm 0.9$ \\ + total={} + count={} + for questionare in questionare_list: + for name in questionare.keys(): + if name in count: + count[name] += 1 + else: + count[name] = 1 + total[name] = [] + total[name].append(questionare[name]) + + l=[] + for name, counter in count.items(): + l.append( (counter, name) ) + l.sort() + l.reverse() + variance=[] + result={} + + for counter, name in l: + if counter >= cnt_limit: + means=[] + if not silent: + print "%d: %20s"%(counter, name), + + a = numpy.array(total[name]).transpose() + for b in a: + means.append(b.mean()) + if not silent: + if not tex: + print u"%2.3f \u00B1 %2.3f "%(b.mean(), numpy.sqrt(b.var())), + else: + print u"& $%2.1f \pm %2.1f$"%(b.mean(), numpy.sqrt(b.var())), + variance.append(numpy.sqrt(b.var())) + if not silent: + if not tex: + print + else: + print "\\\\" + variance.append(numpy.sqrt(b.var())) + result[name] = means + + if not silent: + if not tex: + print "Mean standard deviation is: %2.3f"%(numpy.array(variance).mean(),) + return result + +def questionare_average_raw(questionare_list): + import numpy + total={} + count={} + for questionare in questionare_list: + for name in questionare.keys(): + if name in count: + count[name] += 1 + else: + count[name] = 1 + total[name] = [] + total[name].append(questionare[name]) + + l=[] + for name, counter in count.items(): + l.append( (counter, name) ) + l.sort() + l.reverse() + variance=[] + result={} + for counter, name in l: + if counter > 1: + means=[] + print "%s, %d,"%(name, counter), + a = numpy.array(total[name]).transpose() + for b in a: + means.append(b.mean()) + print u"%2.3f,"%(b.mean()), + print + result[name] = means + return result + +class Data: + ### Explicit list of players + + ### + ### Following code consist of expert based knowledge kindly supplied by + ### Alexander Dinerstein 3-pro, Motoki Noguchi 7-dan and Vit Brunner 4-dan) + + ### The vector at each name corresponds with + ### ( + questionare_annotations = ['territory', 'orthodox', 'aggressiveness', 'thickness'] + ## the orthodox scale corresponds to novelty, the "name" of the scale remains + # the same for historical reasons --- the scales are defined the same though + + questionare_list = [ + #questionare_vit_brun + { + "Chen Yaoye": (7, 5, 7, 6), + "Cho Chikun": (9, 7, 7, 9), + "Cho U": (4, 6, 7, 4), + "Gu Li": (5, 6, 9, 5), + "Ishida Yoshio": (6, 3, 5, 5), + "Luo Xihe": (8, 4, 7, 7), + "Ma Xiaochun": (5, 7, 7, 7), + "O Meien": (3, 9, 6, 5), + "Otake Hideo": (4, 3, 6, 5), + "Rui Naiwei": (5, 6, 8, 5), + "Sakata Eio": (6, 4, 8, 6), + "Takemiya Masaki": (1, 4, 7, 2), + #"Yi Ch'ang-ho 2004-": (7, 6, 4, 4), + #"Yi Ch'ang-ho 2005+": (7, 6, 6, 4), + "Yi Ch'ang-ho": (7, 6, 6, 4), + "Yi Se-tol": (6, 5, 9, 5), + "Yoda Norimoto": (4, 4, 7, 3) + }, + # questionare_motoki_noguchi + { + "Cho Chikun": (8, 9, 8, 8 ), + "Cho U": (9, 7, 6, 8), + "Gu Li": (7, 8, 10, 4 ), + "Ishida Yoshio": (9, 6, 2, 6), + "Luo Xihe": (6, 8, 9, 7 ), + "Ma Xiaochun": (9, 6, 7, 8), + "O Meien": (1, 10, 10, 2 ), + "Otake Hideo": (4, 3, 5, 3), + "Rui Naiwei": (6, 6, 10, 2), + "Sakata Eio": (10, 5, 6, 10), + "Takemiya Masaki": (2,6, 6, 1), + #"Yi Ch'ang-ho 2004-": (8, 3, 2, 3), + # P: udelal jsem to z 2004- + "Yi Ch'ang-ho": (8, 3, 2, 3), + "Yi Se-tol": (5, 10, 10, 8 ), + "Yoda Norimoto": (8, 2, 2, 5), + "Fujisawa Hideyuki": (4, 8, 7, 4 ), + "Go Seigen": (8, 10, 9, 6), + "Hane Naoki": (8, 2, 4, 6 ), + "Honinbo Dosaku": (2, 10, 8, 5 ), + "Honinbo Shusaku": (8, 3, 2, 6), + "Honinbo Shuwa": (10, 8, 2, 10), + "Kato Masao": (2,3, 9, 4), + "Kobayashi Koichi": (8, 3, 3, 6), + "Miyazawa Goro": (1, 10, 10, 3), + "Takao Shinji": (4, 3, 7, 4 ), + "Yamashita Keigo": (2, 8, 10, 4 ), + "Yuki Satoshi": (2, 8, 10, 4) + }, + #questionare_alex_dinner + { + "Chen Yaoye": (5, 3, 5, 5), + "Cho Chikun": (10, 7, 5, 10), + "Cho U": (9, 5, 3, 7), + "Gu Li": (5, 7, 8, 3), + "Ishida Yoshio": (9, 6, 3, 5), + "Luo Xihe": (8, 10, 7, 4), + "Ma Xiaochun": (10, 6, 3, 9), + "O Meien": (4, 10, 9, 4), + "Otake Hideo": (5, 3, 3, 3), + "Rui Naiwei": (3, 5, 9, 3), + "Sakata Eio": (7, 5, 8, 8), + "Takemiya Masaki": (1, 9, 8, 1), + #"Yi Ch'ang-ho 2004-": (6, 6, 2, 1), + #"Yi Ch'ang-ho 2005+": (5, 4, 5, 3), + # commented because duplicates 2005+ + "Yi Ch'ang-ho": (5, 4, 5, 3), + "Yi Se-tol": (5, 5, 9, 7), + "Yoda Norimoto": (7, 7, 4, 2), + "Chen Zude": (3, 8, 6, 5), + "Cho Tae-hyeon": (1, 4, 4, 2), + "Fujisawa Hideyuki": (3, 10, 7, 4), + "Go Seigen": (4, 8, 7, 4), + "Hane Naoki": (7, 3, 4, 3), + "Jie Li": (5, 3, 5, 4), + "Kato Masao": (3, 6, 10, 4), + "Kobayashi Koichi": (10, 2, 2, 5), + "Miyazawa Goro": (2, 10, 9, 5), + "Nie Weiping": (3, 7, 8, 4), + "Shao Zhenzhong": (4, 5, 5, 4), + "Suzuki Goro": (4, 7, 5, 5), + "Takao Shinji": (6, 4, 4, 5), + "Wu Songsheng": (2, 10, 7, 4), + "Yamashita Keigo": (2, 10, 9, 2), + "Yuki Satoshi": (4, 9, 8, 5), + #"breakfast": (7, 7, 3, 4), + #"rapyuta/daien": (4, 7, 6, 5), + #"MilanMilan": (5, 5, 6, 4), + #"roln111-": (6, 5, 7, 5), + #"somerville": (4, 5, 5, 6), + #"artem92-": (7, 4, 3, 2), + #"TheCaptain": (3, 8, 7, 6) + } + ## + guys from the online questionare + ] + load_questionare.scan_d(QUESTIONARE_DIRECTORY) + + questionare_total = questionare_average(questionare_list, silent=True) + +def get_all_player_names(limit=1): + pc = {} + + for q in Data.questionare_list: + for p in q.keys(): + pc[p] = pc.get(p, 0) + 1 + + ps = set( p for p in pc.keys() if pc[p] >= limit ) + + return ps + +def get_interesting_pros(style, top, bottom, without_dist=True): + style_vec = numpy.array(style) + + dist = [ + ( math.sqrt( sum(numpy.power(style_vec - numpy.array(pro_style), 2))), + pro_name) for pro_name, pro_style in Data.questionare_total.iteritems() + ] + dist.sort() + if not without_dist: + return dist[:top], dist[-bottom:] + + def second((a, b)): + return b + + return map(second, dist[:top]), map(second, dist[-bottom:]) + +if __name__ == '__main__': + def main(tex=False): + """this prints the averaged questionare data, along with number of interviewees who + answered for the particular pro""" + #print get_all_player_names(4) + + questionare_total = questionare_average(Data.questionare_list, cnt_limit=2, silent=False, tex=tex) + + pa = get_all_player_names(2) + + vals = numpy.array([ va for pn, va in questionare_total.iteritems() if pn in pa ]) + #print vals.shape + + key2vec = {} + for desc, num in zip(Data.questionare_annotations, range(4)): + sli = vals[:, num] + key2vec[desc] = sli + if not tex: + print u"%s\n mean: %2.3f \u00B1 %2.3f"%(desc, sli.mean(), sli.std()) + else: + print u"%s & %2.3f \\pm %2.3f \\"%(desc, sli.mean(), sli.std()) + + from utils.utils import pearson_coef + + qa = Data.questionare_annotations + print + print "PAIRWISE CORRELATIONS" + print '', + print " | ".join("%15s"%an for an in (['']+qa)) + for i in xrange(len(qa)): + print "%15s | " % qa[i], + for j in xrange(len(Data.questionare_annotations)): + if i > j: + print "%15s |" % ('' ), + else: + p = pearson_coef(key2vec[qa[i]], key2vec[qa[j]]) + print "%15s |" % ( "%.3f" % p ), + print + + main() + + ## + ## + + def test_style(style): + near, dist = get_interesting_pros(style, 3, 3) + print "similar" + for p in near: + print p + print + print "distant" + for p in dist: + print p + + #test_style([1, 2, 3, 4]) + + + diff --git a/game_to_vec.py b/game_to_vec.py new file mode 100644 index 0000000..b77b581 --- /dev/null +++ b/game_to_vec.py @@ -0,0 +1,1000 @@ +import logging +import subprocess +from subprocess import PIPE + +import os +import sys +from os import remove +from os.path import abspath, exists +import math + +import itertools +from itertools import chain +import shutil +import re +import numpy + +import utils +from utils import misc, godb_models, godb_session +from utils.db_cache import declare_pure_function, cache_result +from utils.misc import first_true_pred, partial_right, partial +from utils.utils import head +from utils.colors import * +from utils.godb_models import ProcessingError + + +from result_file import ResultFile, get_output_resultfile, get_output_resultpair +from config import OUTPUT_DIR +import pachi +from pachi import scan_raw_patterns, generate_spatial_dictionary + +""" +Contains code for conversion of a game (or list of them) into a vector, +using pachi. +Moreover it allows to form .tab files to be used by the Orange datamining framework +""" + + +pat_file_regexp = '^\s*(\d+)\s*(.+)$' + +def _make_interval_annotations(l, varname): + """ + >>> _make_interval_annotations([10,11,12,13], 'X') + ['X <= 10', 'X == 11', 'X == 12', 'X == 13', 'X > 13'] + >>> _make_interval_annotations([22], 'X') + ['X <= 22', 'X > 22'] + >>> _make_interval_annotations([-1, 20], 'X') + ['X <= -1', '-1 < X <= 20', 'X > 20'] + + """ + if not all( misc.is_int(x) for x in l): + raise ValueError("Interval boundaries must be a number.") + if not l: + return [ "any " + varname ] + + prev = None + annots = [] + for point in l + ['LAST']: + s = varname + # if the interval size is 1 specify the interval precisely + if point != 'LAST' and prev == point - 1: + s = "%s == %d" % (s, point) + else: + # if not first, add left boundary + if prev != None: + # when we do not have right boundary as well + if point == "LAST": + # nicer like this I guess + s = "%s > %d" % (s, prev) + else: + s = "%d < %s" % (prev, s) + # if not last, add right boundary + if point != 'LAST': + s = "%s <= %d" % (s, point) + + annots.append(s) + prev = point + + return annots + +## game -> BlackWhite( vector_black, vector_white ) +class BWBdistVectorGenerator: + def __init__(self, by_line=[2,3,4], by_moves=[26,76]): + self.by_line = by_line + self.by_moves = by_moves + + if any( x%2 for x in by_moves ): + logging.warn("BWDistVectorGenerator called with odd number of moves" + "specifying the hist size => this means that the players" + "wont have the same number of moves in the buckets!!") + + # nice annotations + line_annots = _make_interval_annotations(by_line, 'bdist') + move_annots = _make_interval_annotations(by_moves, 'move') + #line_annots = [ 'bdist <= %d'%line for line in by_line ] + [ 'bdist other'] + # move_annots = [ 'move <= %d'%move for move in by_moves ] + [ 'move other'] + + self.annotations = [ "(bdist histogram: %s, %s)"%(m,b) for m,b in itertools.product(move_annots, line_annots) ] + self.types = [ "continuous" ] * len(self.annotations) + + def leq_fac(val): + return lambda x : x <= val + + # predicates giving bucket coordinate + self.line_preds = [ leq_fac(line) for line in by_line ] + [ lambda line : True ] + self.move_preds = [ leq_fac(movenum) for movenum in by_moves ] + [ lambda movenum : True ] + + def __repr__(self): + return 'BWBdistVectorGenerator(by_line=%s, by_moves=%s)'%(repr(self.by_line), + repr(self.by_moves)) + + def __call__(self, game): + """ + For a game, creates histograms of moves distance from border. + The histograms' granularity is specified by @by_line and @by_moves parameters. + + The @by_moves makes different histogram for each game phase, e.g.: + by_moves=[] makes one histogram for whole game + by_moves=[50] makes two histograms, one for first 50 moves (including) + second for the rest + by_moves=[26, 76] makes three histograms, + first 26 moves (X <=26) ~ opening + first 76 moves (26 < X <= 76) ~ middle game + rest of the game (76 < X) ~ end game + NOTE: of the by moves number should be even, so that we count the same + number of moves for every player. + + The @by_line specifies granularity of each histogram, that is + by_line = [3] each hist has 2 buckets, one counts moves on first three + lines, second for the rest + + by_line = [3, 4, 5] four buckets/histogram, X <= 3, X = 4, X = 5, X > 5 + """ + # scan game, ignore spatials + col_pat = pachi.scan_raw_patterns(game, patargs='xspat=0', skip_empty=False) + + buckets = {} + for color in PLAYER_COLORS: + # histogram + buckets[color] = numpy.zeros(len(self.line_preds) * len(self.move_preds)) + + for movenum, (color, pat) in enumerate(col_pat): + try: + bdist = pat.first_payload('border') + except StopIteration: + continue + + # X and Y coordinates + line_bucket = first_true_pred(self.line_preds, bdist + 1) # line = bdist + 1 + move_bucket = first_true_pred(self.move_preds, movenum + 1) # movenum is counted from zero + + # histogram[color][X][Y] += 1 + xy = line_bucket + move_bucket * len(self.line_preds) + buckets[color][xy] += 1 + + #print movenum, color, bdist, " \t", + #print line_bucket, move_bucket, + #print color, xy + + return BlackWhite(buckets[PLAYER_COLOR_BLACK], buckets[PLAYER_COLOR_WHITE]) + +## game -> BlackWhite( vector_black, vector_white ) +class BWLocalSeqVectorGenerator: + def __init__(self, local_threshold=5): + self.local_threshold = local_threshold + self.annotations = [ '(local seq < %d: sente)'%local_threshold, + '(local seq < %d: gote)'%local_threshold, + '(local seq < %d: sente - gote)'%local_threshold, ] + self.types = [ "continuous" ] * len(self.annotations) + + def __repr__(self): + return 'BWLocalSeqVectorGenerator(local_threshold=%s)'%(repr(self.local_threshold)) + + def __call__(self, game): + """self.local_threshold gives threshold specifiing what is considered to be a local + sequence, moves closer (or equal) than self.local_threshold in gridcular matrix + to each other are considered local.""" + # scan game, ignore spatials + col_pat = pachi.scan_raw_patterns(game, patargs='xspat=0', skip_empty=False) + + SENTE_COOR = 0 + GOTE_COOR = 1 + DIFF_COOR = 2 + + count = {PLAYER_COLOR_BLACK : numpy.zeros(3), + PLAYER_COLOR_WHITE : numpy.zeros(3)} + + last_local = False + seq_start = None + for movenum, (color, pat) in enumerate(col_pat): + if not pat.has_feature('cont'): + local = False + else: + local = pat.first_payload('cont') <= self.local_threshold + + # if the sequence just started + if local and not last_local: + # this color had to reply locally, so it was the other guy that + # started the sequence + seq_start = the_other_color(color) + + # if the sequence just ended + if not local and last_local: + # the player who started the sequence gets to continue elsewhere + if color == seq_start: + count[seq_start][ SENTE_COOR ] += 1 + # if he does not <=> he lost tempo with the sequence + else: + count[seq_start][ GOTE_COOR ] += 1 + + last_local = local + + for color in PLAYER_COLORS: + cnt = count[color] + cnt[DIFF_COOR] = cnt[SENTE_COOR] - cnt[GOTE_COOR] + + return BlackWhite(count[PLAYER_COLOR_BLACK], count[PLAYER_COLOR_WHITE]) + +## game -> BlackWhite( vector_black, vector_white ) +class BWCaptureVectorGenerator: + def __init__(self, by_moves=[26,76], offset=6, payload_size=4): + """The params @offset and @payload size have to be the constants from pachi/pattern.h, + corresponding to: + offset = PF_CAPTURE_COUNTSTONES + payload_size = CAPTURE_COUNTSTONES_PAYLOAD_SIZE + """ + self.offset = offset + self.payload_size = payload_size + self.by_moves = by_moves + + if any( x%2 for x in by_moves ): + logging.warn("BWCaptureVectorGenerator called with odd number of moves" + "specifying the hist size => this means that the players" + "wont have the same number of moves in the buckets!!") + + # nice annotations + capture_annots = [ 'captured', 'lost', 'difference' ] + move_annots = _make_interval_annotations(by_moves, 'move') + + self.annotations = [ "(capture histogram: %s, %s)"%(m,b) for m,b in itertools.product(move_annots, capture_annots) ] + self.types = [ "continuous" ] * len(self.annotations) + + def leq_fac(val): + return lambda x : x <= val + + # predicates giving bucket coordinate + self.move_preds = [ leq_fac(move) for move in by_moves ] + [ lambda movenum : True ] + + def __repr__(self): + args = map(repr, [self.by_moves, self.offset, self.payload_size]) + return 'BWCaptureVectorGenerator(by_moves=%s, offset=%s, payload_size=%s)'% tuple(args) + + def __call__(self, game): + # scan game, ignore spatials + col_pat = pachi.scan_raw_patterns(game, patargs='xspat=0', skip_empty=False) + + buckets = {} + for color in PLAYER_COLORS: + buckets[color] = numpy.zeros(len(self.move_preds)) + + for movenum, (color, pat) in enumerate(col_pat): + if pat.has_feature('capture'): + captured = pat.first_payload('capture') >> self.offset + captured = (2 ** self.payload_size - 1 ) & captured + + move_bucket = first_true_pred(self.move_preds, movenum + 1) # counted from zero + buckets[color][move_bucket] += captured + + ret = {} + for color in PLAYER_COLORS: + ret[color] = numpy.zeros(3 * len(self.move_preds)) + + for mp in xrange(len(self.move_preds)): + for color in PLAYER_COLORS: + # I captured + ret[color][3 * mp] = buckets[color][mp] + # I lost + ret[color][3 * mp + 1] = buckets[the_other_color(color)][mp] + # diff + ret[color][3 * mp + 2] = ret[color][3 * mp] - ret[color][3 * mp + 1] + + + return BlackWhite(ret[PLAYER_COLOR_BLACK], ret[PLAYER_COLOR_WHITE]) + +## game -> BlackWhite( vector_black, vector_white ) +class BWWinStatVectorGenerator: + def __init__(self): + self.annotations = [ + '(wins by points)', + '(wins by resign)', + '(wp - wr)', + '(lost by points)', + '(lost by resign)', + '(lp - lr)' + ] + self.types = [ "continuous" ] * len(self.annotations) + + def __repr__(self): + return 'BWWinStatVectorGenerator2()' + + def __call__(self, game): + """""" + result = str(game.sgf_header.get('RE', '0')) + + if result.lower() in ['0', 'jigo', 'draw']: + raise ProcessingError(repr(self) + " Jigo") + + match = re.match(r'^([BW])\+(.*)$', result) + if not match: + raise ProcessingError(repr(self) + ' Could not find result sgf tag.') + + player, val = match.group(1), match.group(2) + if ( val.lower().startswith('f') or # forfeit + val.lower().startswith('t') ): # time + raise ProcessingError(repr(self) + ' Forfeit, time.') + + loses = [0, 0, 0] + # by resign + if val.lower().startswith('r'): + wins = [0, 1, -1] + else: + # by points + try: + points = float(val) + except ValueError: + raise ProcessingError(repr(self) + ' Points not float.') + wins = [1, 0, 1] + + if player == 'B': + black = numpy.array( wins + loses ) + white = numpy.array( loses + wins ) + else: + white = numpy.array( wins + loses ) + black = numpy.array( loses + wins ) + + return BlackWhite(black, white) + + + +## game -> BlackWhite( vector_black, vector_white ) +class BWWinPointsStatVectorGenerator: + def __init__(self): + self.annotations = [ + '(wins #points)', + '(loses #points)', + ] + self.types = [ "continuous" ] * len(self.annotations) + + def __repr__(self): + return 'BWWinPointsStatVectorGenerator2()' + + def __call__(self, game): + """""" + result = str(game.sgf_header.get('RE', '0')) + + if result.lower() in ['0', 'jigo', 'draw']: + raise ProcessingError(repr(self) + " Jigo") + + match = re.match(r'^([BW])\+(.*)$', result) + if not match: + raise ProcessingError(repr(self) + ' Could not find result sgf tag.') + + player, val = match.group(1), match.group(2) + if ( val.lower().startswith('f') or # forfeit + val.lower().startswith('t') or # time + val.lower().startswith('r') # resign + ): + raise ProcessingError(repr(self) + ' Forfeit, time, resign.') + + try: + points = float(val) + except ValueError: + raise ProcessingError(repr(self) + ' Points not float.') + + # if black wins + black = numpy.array( [points, 0] ) + white = numpy.array( [0, points] ) + # if white wins + if player == 'W': + black, white = white, black + + return BlackWhite(black, white) + +# - for black - transform_rawpatfile - +# / +# game -> raw_patternscan_game -- +# \ +# - for white ----- || ----- + +#@cache_result +@declare_pure_function +def raw_patternscan_game(game, spatial_dict, patargs=''): + assert spatial_dict.exists(warn=True) + ret = get_output_resultpair(suffix='.raw.pat') + + with open(ret.black.filename, mode='w') as fb: + with open(ret.white.filename, mode='w') as fw: + for color, pat in scan_raw_patterns(game, spatial_dict, patargs=patargs): + fd = fb if color == PLAYER_COLOR_BLACK else fw + # write output for the desired player + fd.write("%s\n"%pat) + #logging.debug(gtp + ":" + pat) + + #logging.info("Generated Raw Patternfiles for game %s, %s"%(game, ret)) + return ret + +#@cache_result +@declare_pure_function +def transform_rawpatfile(rawpat_file, ignore=set(), transform={}, ignore_empty=True): + """Transforms raw pattern file line by line, by ignoring certain features (and their payloads) + @ignore and transforming payloads with @transform. If @ignore_empty is specified, + empty patterns are ignored. + + transform_rawpatfile(file, ignore=set('s', 'cont'), transform={'border':lambda x: x - 1}) + (s:20) + (s:10 border:5 cont:10) + (s:20 cont:1) + (capture:18) + + will produce + (border:4) + (capture:18) + """ + + ret = get_output_resultfile('.raw.pat') + with open(ret.filename, mode='w') as fout: + with open(rawpat_file.filename, mode='r') as fin: + for line in fin: + pat = pachi.Pattern(line).reduce(lambda feat, _: not feat in ignore) + fpairs = [] + for f, p in pat: + p = transform.get(f, lambda x:x)(p) + fpairs.append((f, p)) + + if ignore_empty and not fpairs: + continue + + fout.write( "%s\n"%pachi.Pattern(fpairs=fpairs) ) + return ret + +#@cache_result +@declare_pure_function +def summarize_rawpat_file(rawpat_file): + """Transforms raw pattern file into summarized one: + (s:20) + (s:10 border:5) + (s:20) + (s:40) + (s:20) + ========> + 3 (s:20) + 1 (s:10 border:5) + 1 (s:40) + """ + result_file = get_output_resultfile('.pat') + + script="cat %s | sort | uniq -c | sort -rn > %s "%(rawpat_file.filename, result_file.filename) + + p = subprocess.Popen(script, shell=True, stderr=PIPE) + _, stderr = p.communicate() + if stderr: + logging.warn("subprocess summarize stderr:\n%s"%(stderr,)) + if p.returncode: + raise RuntimeError("Child sumarize failed, exitcode %d."%(p.returncode,)) + + return result_file + +class SummarizeMerger(godb_models.Merger): + """Used to sum Summarized Pattern files: + patfile_1: + 3 (s:20) + 1 (s:10 border:5) + 1 (s:40) + + patfile_2: + 3 (s:20) + 2 (s:15) + 1 (s:10 border:5) + + m = SummarizeMerger() + m.add(patfile_1) + m.add(patfile_2) + patres = m.finish() + + Now, patres is: + 6 (s:20) + 2 (s:15) + 2 (s:10 border:5) + 1 (s:40) + """ + def __init__(self): + self.reset() + + def start(self, bw_gen): + self.reset() + + def reset(self): + self.cd = {} + + def add(self, pat_file, color): + with open(pat_file.filename) as fin: + for line in fin: + match = re.match(pat_file_regexp, line) + if not match: + raise IOError("Wrong file format: " + pat_file) + count, pattern = int(match.group(1)), match.group(2) + self.cd[pattern] = self.cd.get(pattern, 0) + count + + def finish(self): + result_file = get_output_resultfile('.pat') + with open(result_file.filename, 'w') as fout: + firstlen = None + for pattern, count in sorted(self.cd.iteritems(), key=lambda kv : - kv[1]): + if firstlen == None: + # get number of decimal places, so that the file is nicely formatted + firstlen = 1 + int(math.log10(count)) + + # prefix the count with 2 spaces, see pat_file_regexp for format + s = "%" + str(2 + firstlen) + "d %s\n" + fout.write(s%(count, pattern)) + + self.reset() + return result_file + + +class VectorSumMerger(godb_models.Merger): + def __init__(self): + self.reset() + + def start(self, bw_gen): + assert all( tp == 'continuous' for tp in bw_gen.types ) + self.sofar = numpy.zeros(len(bw_gen.types)) + + def reset(self): + self.sofar = None + + def add(self, vector, color=None): + if self.sofar == None: + self.sofar = numpy.zeros(vector.shape) + self.sofar += vector + + def finish(self): + if self.sofar == None: + self.sofar = numpy.zeros(0) + ret = self.sofar + self.reset() + return ret + +class VectorArithmeticMeanMerger(godb_models.Merger): + def __init__(self): + self.reset() + + def start(self, bw_gen): + self.reset() + self.summ.start(bw_gen) + + def reset(self): + self.count = 0 + self.summ = VectorSumMerger() + + def add(self, vector, color=None): + self.count += 1 + self.summ.add(vector) + + def finish(self): + if not self.count: + ret = self.summ.finish() + else: + ret = self.summ.finish() / self.count + + self.reset() + return ret + +# so that the fc has nice repr +@declare_pure_function +def identity(obj): + return obj + +@declare_pure_function +def linear_rescale(vec, a=-1, b=1): + """Linearly rescales elements in vector so that: + min(vec) gets mapped to a + max(vec) gets mapped to b + the intermediate values get remapped linearly between + """ + assert a <= b + MIN, MAX = vec.min(), vec.max() + if MIN == MAX: + # return average value of the set + return (float(a + b) / 2) * numpy.ones(vec.shape) + return a + (vec - MIN) * ( float(b - a) / (MAX - MIN) ) + +@declare_pure_function +def natural_rescale(vec): + return vec / numpy.sum(vec) + +@declare_pure_function +def log_rescale(vec, a=-1, b=1): + return linear_rescale(numpy.log(1 + vec), a, b) + +class VectorApply(godb_models.Merger): + def __init__(self, merger, + add_fc=identity, + finish_fc=identity ): + self.merger = merger + self.finish_fc = finish_fc + self.add_fc = add_fc + + def start(self, bw_gen): + self.merger.start(bw_gen) + + def add(self, vector, color=None): + self.merger.add(self.add_fc(vector), color) + + def finish(self): + return self.finish_fc( self.merger.finish() ) + + def __repr__(self): + return "VectorApply(%s, add_fc=%s, finish_fc=%s)" % (repr(self.merger), + repr(self.add_fc), + repr(self.finish_fc)) + +class PatternVectorMaker: + def __init__(self, all_pat, n): + self.all_pat = all_pat + self.n = n + + self.annotations = [] + self.pat2order = {} + + with open(self.all_pat.filename, 'r') as fin: + # take first n patterns + for num, line in enumerate(fin): + if num >= self.n: + break + match = re.match(pat_file_regexp, line) + if not match: + raise IOError("Wrong file format: " + self.all_pat) + pattern = match.group(2) + self.pat2order[pattern] = num + self.annotations.append(pattern) + + self.types = [ "continuous" ] * len(self.annotations) + + if len(self.pat2order) < self.n: + raise ValueError("Input file all_pat '%s' does not have enough lines."%(self.all_pat)) + + def __repr__(self): + return "PatternVectorMaker(all_pat=%s, n=%d)"%(self.all_pat, self.n) + + def __call__(self, sum_patfile): + vector = numpy.zeros(self.n) + added = 0 + with open(sum_patfile.filename, 'r') as fin: + for line in fin: + match = re.match(pat_file_regexp, line) + if not match: + raise IOError("Wrong file format: " + str(sum_patfile)) + + index = self.pat2order.get(match.group(2), None) + if index != None: + vector[index] += int(match.group(1)) + added += 1 + + # no need to walk through the whole files, the patterns (match.group(2)) + # are unique since the patfile is summarized + if added >= self.n: + break + + return vector + +## game -> BlackWhite( vector_black, vector_white ) +class BWPatternVectorGenerator: + def __init__(self, bw_game_summarize, pattern_vector_maker): + self.pattern_vector_maker = pattern_vector_maker + self.bw_game_summarize = bw_game_summarize + + self.annotations = pattern_vector_maker.annotations + self.types = pattern_vector_maker.types + + def __repr__(self): + return "BWPatternVectorGenerator(bw_game_summarize=%s, pattern_vector_maker=%s)"%( + repr(self.bw_game_summarize), repr(self.pattern_vector_maker)) + + def __call__(self, game): + bw = self.bw_game_summarize(game) + return bw.map_both(self.pattern_vector_maker) + +#@cache_result +@declare_pure_function +def process_game(game, init, pathway): + bw = init(game) + return bw.map_pathway(pathway) + +@cache_result +@declare_pure_function +def process_one_side_list(osl, merger, bw_processor): + return osl.for_one_side_list( merger, bw_processor) + +## Process One Side List +class OSLVectorGenerator: + """ + Maps one side lists to vectors, using different game vector generators (e.g. BWPatternVectorGenerator), e.g: + OSLVectorGenerator([(vg1, m1), (vg2, m2)]) + +game1 m1.add(vg1(game1)) m2.add(vg2(game1)) +game2 m1.add(vg1(game2)) m2.add(vg2(game2)) +. | | +. | | +. | | +game666 m1.add(vg1(game666)) m2.add(vg2(game666)) + m1.finish() m2.finish() + = [1,2,3,4,5] = [6,7,8,9,10] + vg1.annotations vg2.annotations + = [f1, ..., f5] =[f6, ..., f10] + ---------------------------------------------- + result = [ 1,2,3,4,5,6,7,8,9,10 ] + annotations = [ f1, ..., f10 ] + """ + def __init__(self, gen_n_merge, annotate_featurewise=True): + self.gen_n_merge = gen_n_merge + self.annotate_featurewise = annotate_featurewise + self.functions = [] + self.annotations = [] + self.types = [] + + for num, (game_vg, merger) in enumerate(gen_n_merge): + self.functions.append( + # this function maps one_side_list to a vector + # where vectors from a game in the osl are merged using the merger + partial_right(process_one_side_list, merger, game_vg )) + + anns = game_vg.annotations + if annotate_featurewise: + anns = [ 'f%d%s' % (num, an) for an in anns ] + + self.annotations.extend(anns) + self.types.extend(game_vg.types) + + def __repr__(self): + return "OSLVectorGenerator(gen_n_merge=%s, annotate_featurewise=%s)"%(repr(self.gen_n_merge), + repr(self.annotate_featurewise) ) + + def __call__(self, osl): + # stack vectors from different generators together + return numpy.hstack( [ f(osl) for f in self.functions ] ) + +def make_all_pat(osl, bw_summarize_pathway): + return process_one_side_list(osl, SummarizeMerger(), bw_summarize_pathway) + +@cache_result +@declare_pure_function +def osl_vector_gen_cached(osl_gen, osl): + """Just to emulate caching for osl_gen.__call__ method. + this is a bit ugly, since this should really be handled by the caching itself to allow for + decorating class methods.""" + return osl_gen(osl) + +@declare_pure_function +def minus(a,b): + return a-b + +@cache_result +@declare_pure_function +def make_tab_file(datamap, vg_osl, osl_name_as_meta=True, osl_size_as_meta=True, image_name_as_meta=True): + """As specified in http://orange.biolab.si/doc/reference/Orange.data.formats/ + If image_name_as_meta or osl_name_as_meta parameters are present, the names of the + respective objects are added as meta columns. + """ + tab_file = get_output_resultfile('.tab') + + def tab_denoted(fout, l): + """Writes tab-denoted elements of list @l to output stream @fout""" + strings = map(str, l) + for el in strings: + if '\t' in el: + raise RuntimeError("Elements of tab-denoted list must not contain tabs.") + fout.write('\t'.join(strings) + '\n') + + def get_meta(osl_m, osl_size_m, image_m): + return list( itertools.compress((osl_m, osl_size_m, image_m), + (osl_name_as_meta, osl_size_as_meta, image_name_as_meta))) + + with open(tab_file.filename, 'w') as fout: + # annotations - column names + tab_denoted(fout, chain( vg_osl.annotations, + datamap.image_annotations, + get_meta('OSL name', 'OSL size', 'Image name'))) + + # column data types + tab_denoted(fout, chain( vg_osl.types, + datamap.image_types, + get_meta('string', 'continuous', 'string'))) + + # column info type: empty (normal columns) / class (main class attribute) / multiclass / meta + tab_denoted(fout, chain( # attributes are no class + [''] * len(vg_osl.types), + # for the first class attribute if present + [ 'class' ] * len(datamap.image_types[:1]), + # for the following class attributes if present + [ 'meta' ] * len(datamap.image_types[1:]), + #[ 'multiclass' ] * len(datamap.image_types[1:]), + # meta information if requested + get_meta('meta', 'meta', 'meta'))) + + # the data itself + for num, (osl, image) in enumerate(datamap): + logging.info('Tab file %d%% (%d / %d)'%(100* (num+1) / len(datamap), num+1, len(datamap))) + + tab_denoted(fout, chain( # the osl + osl_vector_gen_cached(vg_osl, osl), + # the image + map(float, image.data), + # the meta data + get_meta(osl.name, float(len(osl)), image.name))) + + return tab_file + +# +## +# +## Playground: +# +## +# + +if __name__ == '__main__': + def main(): + import logging + from logging import handlers + if False: + logger = logging.getLogger() + logger.setLevel(logging.INFO) + ch = handlers.WatchedFileHandler('LOG', mode='w') + logger.addHandler(ch) + + from utils.godb_models import Game, GameList, OneSideList, PLAYER_COLOR_BLACK, PLAYER_COLOR_WHITE + from utils.godb_session import godb_session_maker + from utils import db_cache + + def test1(): + ## import'n'init + s = godb_session_maker(filename=':memory:') + + ## Prepare data + + gl = GameList("pokus") + s.godb_scan_dir_as_gamelist('./TEST_FILES/games', gl) + s.add(gl) + + # add all the games into the all.pat file + osl = OneSideList("all.pat") + osl.batch_add(gl.games, PLAYER_COLOR_BLACK) + osl.batch_add(gl.games, PLAYER_COLOR_WHITE) + s.add(osl) + s.commit() + + ## Prepare the pattern vector game processing pathway + ## game -> BlackWhite( vector_black, vector_white ) + + spatial_dict = generate_spatial_dictionary(gl, spatmin=2) + + # the pathway: game -> bw rawpat files -> bw transformed rawpat files -> bw summarized pat files + bw_game_summarize = partial_right(process_game, + partial_right(raw_patternscan_game, spatial_dict), + [ partial_right(transform_rawpatfile, + #transform={ 'border':partial_right(minus, 1) }, + ignore=['border', 'cont']), + summarize_rawpat_file + ]) + all_pat = make_all_pat(osl, bw_game_summarize) + + vg_pat = BWPatternVectorGenerator( bw_game_summarize, + PatternVectorMaker(all_pat, 100) ) + vg_local = BWLocalSeqVectorGenerator() + vg_bdist = BWBdistVectorGenerator() + + ## Process One game + + + """ + print vg_pat(game) + print vg_local(game) + print vg_bdist(game) + """ + + ## Process One Side List + + gen_n_merge = [ (vg_pat, VectorApply(VectorSumMerger(), finish_fc=linear_rescale)), + (vg_local, VectorArithmeticMeanMerger()), + (vg_bdist, VectorArithmeticMeanMerger())] + + vg_osl = OSLVectorGenerator(gen_n_merge) + + generate = partial( osl_vector_gen_cached, vg_osl) + + # not cached + #vec, annotations = vg_osl(osl), vg_osl.annotations + + # cached + + ## now the pathway is ready, we can process whatewer OSL we + # feel up to, osl in the following is just an example + vec, annotations = generate(osl), vg_osl.annotations + + for i in xrange(len(annotations)): + print vec[i], '\t\t', annotations[i] + + + def test_rescale(): + import numpy + from pylab import figure, scatter, subplot, show + + vec = numpy.random.random( size=10) + print vec + print linear_rescale(vec, a=-20, b=20) + + vec = numpy.array([ 452915., 288357., 271245., 111039., 84811., 74074., + 58663., 62257., 55296., 46359., 51022., 41049., + 31297., 35259., 34467., 30918., 29869., 36875., + 29592., 28075., 25823., 27479., 26343., 26964., + 24093., 24724., 23135., 22266., 21725., 21769., + 20130., 21625., 20200., 20619., 19741., 19049., + 17434., 20167., 19830., 16458., 16513., 21720., + 20933., 20216., 18414., 17442., 12046., 16186., + 16732., 16142., 15126., 15332., 15435., 12925., + 14072., 16321., 11391., 14884., 13147., 15162., + 14247., 15578., 11826., 12009., 11533., 12349., + 12219., 12590., 10581., 14550., 10699., 12384., + 11795., 10769., 12617., 12576., 12281., 11311., + 12479., 11327., 11398., 11814., 11050., 10248., + 10506., 11541., 12401., 9580., 11201., 10704., + 9766., 10402., 9422., 12888., 9473., 9536., + 10933., 10844., 11005., 8112., 0.]) + + figure(1) + subplot(321) + scatter(range(len(vec)), vec, marker='x', c='r') + subplot(322) + scatter(range(len(vec)), linear_rescale(vec), marker='x', c='g') + subplot(323) + scatter(range(len(vec)), numpy.log(1 + vec), marker='x', c='b') + subplot(324) + scatter(range(len(vec)), log_rescale(vec), marker='x', c='y') + subplot(325) + scatter(range(len(vec)), vec / sum(vec), marker='x', c='b') + show() + + def test_bdist_hist(): + s = godb_session_maker(filename=':memory:')#, echo=True) + game = s.godb_sgf_to_game('./TEST_FILES/test_bdist2.sgf') + + bdg = BWBdistVectorGenerator(by_line=[2, 3, 4], by_moves=[4, 6]) + bw = bdg(game) + assert len(bdg.annotations) == len(bw[0]) == len(bw[1]) + + print "Interval \t\tBlack\tWhite" + print "-" * 40 + for ann, b, w in zip( bdg.annotations, bw[0], bw[1] ): + print "%s\t\t"%(ann), int(b), "\t", int(w) + + def test_win_stat(): + s = godb_session_maker(filename=':memory:')#, echo=True) + #gl = s.godb_add_dir_as_gamelist('./files/') + + game = s.godb_sgf_to_game('../data/go_teaching_ladder/reviews/5443-breakfast-m711-A2.sgf') + + bdg = BWWinStatVectorGenerator() + #bdg = BWWinPointsStatVectorGenerator() + bw = bdg(game) + #continue + + assert len(bdg.annotations) == len(bw[0]) == len(bw[1]) + + print "Interval \t\tBlack\tWhite" + print "-" * 40 + for ann, b, w in zip( bdg.annotations, bw[0], bw[1] ): + print "%30s\t\t" % (ann), b, "\t", w + + ## + ## + + + def header(text): + print "=" * 10, "\n"+text +"\n", "=" * 10 + + header("PROCESSING PATHWAY TEST") + test1() + + return + + header("RESCALE TEST") + test_rescale() + header("BDIST HIST TEST") + test_bdist_hist() + #test_capture_hist() + header("WINSTAT TEST") + test_win_stat() + + main() + \ No newline at end of file diff --git a/ipython_session.py b/ipython_session.py new file mode 100644 index 0000000..cf11906 --- /dev/null +++ b/ipython_session.py @@ -0,0 +1,15 @@ +import logging +from logging import handlers +import sys + +import sqlalchemy + +import utils +from utils.godb_models import * +from utils.godb_session import godb_session_maker + +import pachi +from config import DB_FILE + +if __name__ == '__main__': + s = godb_session_maker(filename=DB_FILE) diff --git a/load_questionare.py b/load_questionare.py new file mode 100644 index 0000000..e5abacb --- /dev/null +++ b/load_questionare.py @@ -0,0 +1,44 @@ +import sys +import os +import json +import logging + +def load_file(filename): + with open(filename) as fin: + dump = json.load(fin) + + d = {} + + for group_list in dump['group_lists']: + for player in group_list['list']: + if player['skip'] != 'yes': + style = player['style'] + vec = [ style['te'], style['or'], style['ag'], style['th'] ] + try: + vec = map(int, vec) + except: + continue + + d[player['name']] = tuple(vec) + + return d + +def scan_d(directory): + ds = [] + for dirname, dirnames, filenames in os.walk(directory): + # print path to all filenames. + for filename in filenames: + fn = os.path.join(dirname, filename) + try: + ds.append(load_file(fn)) + except: + logging.warn("Scanning of questionare file '%s' failed, skipping."%fn) + continue + + return ds + +if __name__ == '__main__': + #fn = sys.argv[1] + #print load_file(fn) + + print scan_d('./QUESTIONARE') \ No newline at end of file diff --git a/pachi.py b/pachi.py new file mode 100644 index 0000000..3d3f532 --- /dev/null +++ b/pachi.py @@ -0,0 +1,217 @@ +import logging +import subprocess +from subprocess import PIPE + +import os +from os import remove +from os.path import abspath + +import sys +import shutil +import re +from collections import namedtuple + +from utils import utils, misc, db_cache +from utils.db_cache import declare_pure_function, cache_result +from utils.colors import PLAYER_COLOR_BLACK, PLAYER_COLOR_WHITE +from utils.godb_models import ProcessingError +from result_file import ResultFile +import result_file + +from config import PACHI_DIR + +PACHI_SPATIAL_DICT = os.path.join(PACHI_DIR, 'patterns.spat') + +class Pattern: + def __init__(self, pattern=None, fpairs=None): + if pattern != None: + match = re.match('^\((.*)\) *$', pattern) + if not match: + raise RuntimeError("Pattern format wrong: '%s'"%pattern) + + # (capture:104 border:6 atari:0 atari:0 cont:1 s:2620) + pattern = match.group(1) + + self.fpairs = [] + for featpair in pattern.split(): + feat, payload = featpair.split(':') + self.fpairs.append((feat, int(payload))) + elif fpairs != None: + self.fpairs = fpairs + else: + raise RuntimeError("Pattern unspecified...") + + def reduce(self, filterfc): + fpairs = [ (f, p) for f, p in self if filterfc(f, p) ] + return Pattern(fpairs=fpairs) + + def iter_feature_payloads(self, feature): + for f, p in self: + if f == feature: + yield p + + def first_payload(self, feature): + return self.iter_feature_payloads(feature).next() + + def has_feature(self, feature): + for f, p in self: + if f == feature: + return True + return False + + def __iter__(self): + return iter(self.fpairs) + + def __str__(self): + return "(%s)"%( ' '.join( "%s:%s"%(feat, payload) for feat, payload in self ) ) + +class IllegalMove(Exception): + pass + +@cache_result +@declare_pure_function +def generate_spatial_dictionary(game_list, spatmin=4, patargs='', check_size=329): + """ + Generates pachi spatial dictionary from games in the @gamelist. + + @check_size specifies min spatial dict size, if the filesize is below, raise runtime err. + Set this to 0 to disable the check. (328 is the size of empty spatial dict header) + """ + logging.info("Generating spatial dictionary from %s"%(repr(game_list))) + + # pachi does not handle larger number of handicap stones than 9 + without_large_handi = filter( lambda g : int(g.sgf_header.get('HA',0)) <= 9, game_list.games ) + l_old, l_new = len(game_list.games), len(without_large_handi) + if l_old != l_new: + logging.warn("The spatial dictionary list contains %d games with # of handicap stones >= 10. Skipping those."%( + l_old - l_new,)) + + games = '\n'.join([ abspath(game.sgf_file) for game in without_large_handi ]) + + spatial_dict = result_file.get_output_resultfile('.spat') + assert not spatial_dict.exists() + + script=""" + cd %s + SPATMIN='%s' SPATIAL_DICT_FILE='%s' PATARGS='%s' tools/pattern_spatial_gen.sh -"""%( + PACHI_DIR, spatmin, abspath(spatial_dict.filename), patargs) + + #with open("tmp_script", 'w') as tmp: + # tmp.write(script) + + p = subprocess.Popen(script, shell=True, stdin=PIPE) + o = p.communicate(input=games.encode('utf-8')) + #if stderr: + # logging.warn("subprocess pattern_spatial_gen stderr:\n%s"%(stderr,)) + if p.returncode: + raise RuntimeError("Child process `pachi/tools/pattern_spatial_gen` failed, exitcode %d."%(p.returncode,)) + if check_size and os.stat(spatial_dict.filename).st_size <= check_size: + raise RuntimeError("Spatial dict is empty. Probably an uncaught error in subprocess.") + + logging.info("Returning spatial dictionary %s"%(repr(spatial_dict))) + return spatial_dict + + +@cache_result +@declare_pure_function +def scan_raw_patterns(game, spatial_dict=None, patargs='', skip_empty=True): + """ + For a @game, returns list of pairs (player_color, pattern) for each move. + The pachi should be compiled to output all the features. + """ + if spatial_dict == None: + if 'xspat=0' not in patargs.split(','): + raise RuntimeError("Spatial dict not specified, though the spatial features are not turned off.") + spatial_str="" + else: + assert spatial_dict.exists(warn=True) + spatial_str="spatial_dict_filename=%s"%(abspath(spatial_dict.filename)) + + ## TODO + ## pachi has to have some patterns.spat even if the xspat=0 + ## otw segfault, thought it does not use it... + + gtpscript=""" + cd %s + + ./tools/sgf2gtp.py --stdout '%s' + """%(PACHI_DIR, abspath(game.sgf_file) ) + gtpstream = utils.check_output(gtpscript, shell=True) + + script = """ + cd %s + ./pachi -d 0 -e patternscan '%s' + """%( PACHI_DIR, ','.join(misc.filter_null([spatial_str, patargs])) ) + + p = subprocess.Popen(script, shell=True, stdout=PIPE, stdin=PIPE, stderr=PIPE) + + pats, stderr = p.communicate(input=gtpstream) + if stderr: + logging.warn("subprocess pachi:\n\tSCRIPT:\n%s\n\tSTDERR\n%s"%(script, stderr)) + + if p.returncode: + raise RuntimeError("Child process `pachi` failed, exitcode %d."%(p.returncode,)) + + lg = filter( lambda x : x, gtpstream.split('\n')) + lp = pats.split('\n') + + # ? illegal move + wrong = filter( lambda x: re.search('^\? ',x), lp) + if wrong: + raise ProcessingError("Illegal move") + #raise IllegalMove() #"In game %s"%game) + + # filter only lines beginning with = + lp = filter( lambda x: re.search('^= ',x), lp) + # remove '= ' from beginning + lp = map( lambda x: re.sub('^= ', '', x), lp) + + # the command list and the pattern list should be aligned + # - each gtp command emits one line of patterns from pachi + assert len(lg) == len(lp) + gtp_pat = zip(lg, lp) + + # keep pairs that contain something else than space in pattern + # - discards boardsize, handi, komi, ... that emit nothing ('= ') + gtp_pat = filter( lambda t: re.search('\S', t[1]), gtp_pat) + + # filter out other gtp commands than play + # - discards e.g. 'fixed_handicap' command and the resulting positions + # of handicap stones + gtp_pat = filter( lambda t: re.search('^play', t[0]), gtp_pat) + + # remove empty [()] + if skip_empty: + gtp_pat = filter( lambda (gtp, pat) : len(pat) != 4, gtp_pat) + + # remove brackets enclosing features + # [(s:99 atariescape:8)] + # => + # (s:99 atariescape:8) + def remover((gtp, pat)): + assert pat[0] == '[' + assert pat[-1] == ']' + return (gtp, pat[1:-1]) + gtp_pat = map(remover, gtp_pat) + + return [ ( PLAYER_COLOR_WHITE if gtp[5] == 'W' else PLAYER_COLOR_BLACK, + Pattern(pat)) + for gtp, pat in gtp_pat ] + +if __name__ == '__main__': + #import logging + #logger = logging.getLogger() + #logger.setLevel(logging.INFO) + #db_cache.init_cache(filename=':memory:') + + from utils.godb_models import Game, GameList, OneSideList, PLAYER_COLOR_BLACK, PLAYER_COLOR_WHITE + from utils.godb_session import godb_session_maker + + s = godb_session_maker(filename=':memory:') + + game = s.godb_sgf_to_game('./TEST_FILES/test_capture.sgf') + + pats = scan_raw_patterns(game, patargs='xspat=0') + for c, p in pats: + print c, list(p) + \ No newline at end of file diff --git a/result_file.py b/result_file.py new file mode 100644 index 0000000..609d687 --- /dev/null +++ b/result_file.py @@ -0,0 +1,56 @@ +from os.path import abspath, exists +import os + +from config import OUTPUT_DIR + +from utils.colors import * +from utils import misc + +class ResultFile: + def __init__(self, filename, create_empty=False): + self.filename = filename + if create_empty: + assert not self.exists() + open(self.filename,'w').close() + + def exists(self, warn=False): + status = exists(self.filename) + if not status and warn: + logging.warn("File '%s' does not exist."%(self.filename,)) + return status + + def __repr__(self): + return "ResultFile('%s')"%(self.filename,) + +def get_random_output_base(sub_len=3, levels=2): + h = misc.unique_hash() + assert len(h) > levels * sub_len + assert levels >= 1 + l = [OUTPUT_DIR] + for x in xrange(levels): + l.append( h[ x * sub_len : (x+1) * sub_len ] ) + + d = os.path.join(*l) + if not os.path.isdir(d): + os.makedirs(d) + return os.path.join(d, h) + +def get_output_resultfile(suffix=''): + ret = ResultFile( get_random_output_base() + suffix) + if ret.exists(): + raise RuntimeError("New output result file '%s' already exists, unique hash not really unique..."%(ret)) + return ret + +def get_output_resultpair(suffix=''): + basename = get_random_output_base() + ret1 = ResultFile(basename + '_B' + suffix) + ret2 = ResultFile(basename + '_W' + suffix) + rettup = BlackWhite(ret1, ret2) + for ret in rettup: + if ret.exists(): + raise RuntimeError("New output result file '%s' already exists, unique hash not really unique..."%(ret)) + return rettup + + + + diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..0fc2581 --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,9 @@ +import colors +import db_cache +import godb_models +import godb_session +import misc +import rank +import sgf_load +import timer +import utils diff --git a/utils/colors.py b/utils/colors.py new file mode 100644 index 0000000..4e4546e --- /dev/null +++ b/utils/colors.py @@ -0,0 +1,21 @@ +from collections import namedtuple + +PLAYER_COLOR_WHITE = 'W' +PLAYER_COLOR_BLACK = 'B' + +PLAYER_COLORS = ( PLAYER_COLOR_BLACK, PLAYER_COLOR_WHITE ) + +class BlackWhite(namedtuple('BlackWhite', 'black white')): + def map_both(self, f): + return BlackWhite(*map( f, self )) + + def map_pathway(self, func_list): + bw = self + for f in func_list: + bw = bw.map_both(f) + return bw + +def the_other_color(color): + if color == PLAYER_COLOR_BLACK: + return PLAYER_COLOR_WHITE + return PLAYER_COLOR_BLACK diff --git a/utils/db_cache.py b/utils/db_cache.py new file mode 100644 index 0000000..1fdf6af --- /dev/null +++ b/utils/db_cache.py @@ -0,0 +1,331 @@ +from sqlalchemy import Table, Column, Integer, ForeignKey, Text, Date, PickleType, Float +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +import time +import logging +import inspect +import types +import functools + +import utils + +logger = logging.getLogger(__name__) + +""" +A simple caching scheme for pure functions, supporting pure functions as +args as well. + +Changes of code do NOT make the cache invalid - so you should delete the +cache database yourself if you change any pure functions. +""" + +## hodnoty def kwargs to ovsem meni, jen kdyz se fce predava parametrem (funkci kterou taky cachujeme), +## nikoliv kdyz je volana primo +## pze kdyz se predava parametrem, tak ta vnejsi fce nevi jaky ma def param + +# By default (without running init_cache ) a dict (=> cache not persistent across runs & processes) +cache_object = {} + +Base = declarative_base() +class CacheLine(Base): + """ + Maps key -> value, saving time of creation, which is used as a criterion for time expiration. + """ + __tablename__ = 'cacheline' + id = Column(Integer, primary_key=True) + time = Column(Float) + key = Column(Text, index=True) + value = Column(PickleType) + + def __str__(self): + return "(%s, %s) -> %s" % (self.key, self.time, self.value) + + def __repr__(self): + return "CacheLine(%s)" % (str(self)) + +class DBCacheObject: + """ The cache uses the same interface as dict.""" + def __init__(self, db_session, expire): + self.session = db_session + self.expire = expire + + def delete_expired(self): + expired_before = time.time() - self.expire + self.session.query(CacheLine).filter(CacheLine.time < expired_before).delete() + self.session.commit() + + def __getitem__(self, key): + # with correct key + q = self.session.query(CacheLine).filter(CacheLine.key == key) + + # if expiration rate set + if self.expire: + expired_before = time.time() - self.expire + # not expired + q = q.filter(CacheLine.time > expired_before) + + # order by time + by_time = q.order_by(CacheLine.time).all() + + # the last one + if len(by_time): + return by_time[-1].value + + raise KeyError + + def __setitem__(self, key, value): + l = CacheLine(time=time.time(), key=key, value=value) + self.session.add(l) + self.session.commit() + + +def delete_expired(): + global cache_object + if not isinstance(cache_object, DBCacheObject): + logging.warn("Cannot remove expired elemets from cache - not a DBCacheObject") + return + + logging.info("Deleting expired cache rows...") + cache_object.delete_expired() + +def _print_all(): + global cache_object + + if isinstance(cache_object, DBCacheObject): + it = cache_object.session.query(CacheLine).all() + else: + it = cache_object.iteritems() + + print "CACHE:" + for a in it: + print "\t", a + + +# +# Pure function +# + +class PureFunction(object): + """PureFunction is a class that has nice function repr like + instead of the default repr + . + + By using it, the user declares, that calls to the same function with + same arguments will always (in time, accross different processes, ..) + have the same results and can be thus cached. + """ + def __init__(self, f): + self.f = f + assert isinstance(f, types.FunctionType) + functools.update_wrapper(self, f) + + def getargspec(self): + return inspect.getargspec(self.f) + + def get_default_kwargs(self): + args, varargs, varkw, defaults = self.getargspec() + if defaults: + return dict(zip(args[-len(defaults):], defaults)) + + def __call__(self, *args, **kwargs): + logger.debug("calling %s"%repr(self)) + return self.f(*args, **kwargs) + + def __repr__(self): + return ''%(utils.repr_origin(self.f)) + #return ''%(utils.repr_origin(self.f), repr( self.get_default_kwargs())) + +# to be used as a deco +declare_pure_function = PureFunction + +# +# +# + +def init_cache(filename='CACHE.db', expires=0, sqlalchemy_echo=False): + """ + Initialize cache, sets up the global cache_object. + + filename -- specifies the sqlite dbfile to store the results to + expires -- specifies expiration in seconds. If you set this to 0, + cached data are valid forever + echo -- whether to output sqlalchemy logs + """ + if filename == None: + # By default, the cache object is a dict + if expires: + logger.warn('Dictionary cache object does not support time expiration of cached values!') + else: + engine = create_engine('sqlite:///%s'%filename, echo=sqlalchemy_echo) + Base.metadata.create_all(engine) + Session = sessionmaker(bind=engine) + session = Session() + + global cache_object + cache_object = DBCacheObject(session, expires) + +def close_cache(): + global cache_object + cache_object.session.close() + +def make_key(f, f_args, f_kwargs): + if isinstance(f, PureFunction): + spect = f.getargspec() + elif isinstance(f, types.FunctionType): + spect = inspect.getargspec(f) + else: + raise TypeError("Unable to obtain arg specification for function : '%s'"%(repr(f))) + + args, varargs, varkw, defaults = spect + default_kwargs = {} + if defaults: + default_kwargs = dict(zip(args[-len(defaults):], defaults)) + for (key, val) in f_kwargs.iteritems(): + assert key in default_kwargs + + f_kwargs_joined = default_kwargs + f_kwargs_joined.update(f_kwargs) + + #rep = "%s(args=%s, kwargs=%s)"%(utils.function_nice_repr(f), repr(f_args), repr(f_kwargs_joined)) + + rep = "%s(%s)"%(repr(f), + ', '.join(map(repr, f_args) + + [ '%s=%s'%(key, repr( val)) for key, val in f_kwargs_joined.iteritems() ])) + + ## XXX "normal temporary" objects + if 'at 0x' in rep: + logger.warn("Object(s) specified in '%s' do not have a proper repr."%(rep)) + + return rep + +# +# The deco +# + +def cache_result(fun): + """Compute the key, look if the result of a computation is in the + cache. If so, return it, otw run the function, cache the result and + return it.""" + def g_factory(f): + def g(*args, **kwargs): + global cache_object + key = make_key(f, args, kwargs) + try: + cached = cache_object[key] + logger.info("Returning CACHED for: '%s'"%(key)) + return cached + except KeyError: + ret = f(*args, **kwargs) + cache_object[key] = ret + logger.info("CACHING for: '%s'"%(key)) + return ret + return g + + # if we got PureFunction, the returned function should also be pure + # please see the PureFunction.__doc__ + if isinstance(fun, PureFunction): + g = g_factory(fun) + functools.update_wrapper(g, fun.f) + return PureFunction(g) + + return functools.wraps(fun)(g_factory(fun)) + +if __name__ == "__main__": + logging.basicConfig() + l = logging.getLogger(__name__) + l.setLevel(logging.INFO) + + init_cache(filename=':memory:', expires=0.1) + + @cache_result + @declare_pure_function + def add(a, b): + return a + b + + @cache_result + @declare_pure_function + def call_10(f): + return f(10) + + @cache_result + @declare_pure_function + def multmap(l): + return ( reduce( (lambda x, y: x*y) , l), time.time() ) + + def test1(): + multmap([1,2,3]) + multmap([1,2,3]) + print "sleep 0.1" + time.sleep(0.1) + multmap([1,2,3]) + + def test2(): + """Stateless (pure) class and a pure function as arguments""" + + class Adder: + """ The Adder must be `stateless` in a sense that results + of __call__ will always produce the same results for the + same args. Moreover the Adder must have __repr__ which has + all the information to uniquely define the Adder instance - + once again, so that the statement about __call__ holds. + + The user is responsible for the statelessness! + (as with @declare_pure_function) + """ + def __init__(self, offset): + self.offset = offset + def __call__(self, a, b=10): + return a + self.offset + def __repr__(self): + return "Adder(offset=%s)"%self.offset + + a = Adder(2) + + @cache_result + @declare_pure_function + def my_map(f, l): + return map(f, l) + + my_map(a, range(10)) + my_map(a, range(10)) + + @declare_pure_function + def multiplicator(x, mult=2): + return x * mult + + my_map(multiplicator, range(10)) + + from utils import partial, partial_right + + my_map(partial_right(multiplicator, 2), range(10)) + my_map(partial(a, 2), range(10)) + my_map(partial_right(multiplicator, 2), range(10)) + my_map(partial(a, 2), range(10)) + + def test3(): + """Test warning for nonpure functions as arguments""" + @cache_result + def h(x): + return 2 * x + + h(10) + print + call_10(h) + + def test4(): + """Test timeout""" + multmap([1,2,3]) + multmap([1,2,3, 4]) + multmap([1,2,3]) + _print_all() + time.sleep(0.5) + multmap([1,2,3]) + _print_all() + delete_expired() + _print_all() + + + test1() + #test2() + \ No newline at end of file diff --git a/utils/godb_models.py b/utils/godb_models.py new file mode 100644 index 0000000..69eeee7 --- /dev/null +++ b/utils/godb_models.py @@ -0,0 +1,486 @@ +import os +from itertools import chain + +import logging +import re + +from sqlalchemy import Table, Column, Integer, ForeignKey, Text, Date, Float, Enum, UniqueConstraint, PickleType +from sqlalchemy.orm import relationship, backref +from sqlalchemy.ext.declarative import declarative_base + +import utils +from rank import Rank +from colors import * + +""" +This contains the sqlalchemy ORM models which also form basic of our datastructures, have a look at it. + +""" + +Base = declarative_base() + +class ProcessingError(Exception): + pass + +class SchizophrenicPlayerError(Exception): + """Used in context of problems with games between the same players. + E.g. Anonymous vs. Anonymous""" + pass + + +## +## hack to workaround this bug: http://bugs.python.org/issue5876 +## > Oh ok, gotcha: repr() always returns a str string. If obj.__repr__() returns a +## > Unicode string, the string is encoded to the default encoding. By default, the +## > default encoding is ASCII. +## => unicode chars in repr cause "ordinal not in range err" +import functools +import misc +def ununicode(f): + @functools.wraps(f) + def g(*args, **kwargs): + return misc.unicode2ascii(f(*args, **kwargs)) + return g + +class Player(Base): + """Class (and ORM Table) about go player. + The name must be unique (born name, ..). This class should have one instance (db record) + for one go player. + + Player may change name, rank, .. in time, or use different nicknames, etc. + The consistency (so that all these variations are connected) is maintained + together with the PlayerInTime. + """ + __tablename__ = 'player' + id = Column(Integer, primary_key=True, index=True) + name = Column(Text, nullable=False, unique=True, index=True) + note = Column(Text) + # list in_times from backrefs + + #__table_args__ = ( UniqueConstraint('name'), ) + + def __init__(self, name, note=u''): + self.name = name + self.note = note + + def iter_games_as(self, color, pit_filter=lambda pit:True): + return chain.from_iterable(pit.iter_games_as(color) + for pit in self.in_times if pit_filter(pit) ) + + def iter_one_side_associations(self, + pit_filter=lambda pit:True, + **kwargs): + return chain.from_iterable( pit.iter_one_side_associations(**kwargs) + for pit in self.in_times if pit_filter(pit) ) + # Shortcuts + def iter_games_as_white(self, **kwargs): + return self.iter_games_as(PLAYER_COLOR_WHITE, **kwargs) + def iter_games_as_black(self, **kwargs): + return self.iter_games_as(PLAYER_COLOR_BLACK, **kwargs) + def iter_games(self): + return chain(self.iter_games_as_black(), self.iter_games_as_white()) + + def __str__(self): + return self.name + + @ununicode + def __repr__(self): + return u"Player(%s, '%s','%s')" % (self.id, + self.name, + self.note) + +import pickle + +class PlayerInTime(Base): + """Captures evolution of players in time - change of rank, name, different identities.""" + __tablename__ = 'player_in_time' + id = Column(Integer, primary_key=True, index=True) + + player_id = Column(Integer, ForeignKey('player.id'), index=True) + player = relationship("Player", backref=backref('in_times', order_by=id)) + + name = Column(Text) + + rank = Column(PickleType) # (pickler=pickle)) + note = Column(Text) + # list games_as_black from backrefs + # list games_as_white from backrefs + + def __init__(self, player, name='', rank=None, note=''): + if isinstance(rank, basestring): + rank = Rank.from_string(rank) + + self.player = player + self.name = name + self.rank = rank + self.note = note + + def get_games_as(self, color): + if color == PLAYER_COLOR_BLACK : + return self.games_as_black + if color == PLAYER_COLOR_WHITE : + return self.games_as_white + raise KeyError(color) + + def iter_games_as(self, color): + return iter(self.get_games_as(color)) + + def iter_one_side_associations(self, + color_filter=lambda color:True, + game_filter=lambda game:True ): + return ( OneSideListAssociation(game, color) + for color in PLAYER_COLORS if color_filter(color) + for game in self.iter_games_as(color) if game_filter(game) ) + + def __str__(self): + return self.name + ( " (%s)"%(self.rank) if self.rank else '') + + def str2(self): + return self.name + ( " [%s]"%(self.rank) if self.rank else '') + + @ununicode + def __repr__(self): + return u"PlayerInTime(%s, %s, '%s', '%s', '%s')" % ( + self.id, + repr(self.player), + self.name, + self.rank, + self.note ) + +class Game(Base): + """Class (and ORM Table) holding game information like + - sgf filename + - info about players - who played black, who played white + - sgf header with further info + """ + __tablename__ = 'game' + id = Column(Integer, primary_key=True, index=True) + sgf_file = Column(Text, nullable=False) + + black_id = Column(Integer, ForeignKey('player_in_time.id'), index=True) + white_id = Column(Integer, ForeignKey('player_in_time.id'), index=True) + + black = relationship("PlayerInTime", primaryjoin="PlayerInTime.id==Game.black_id", + backref=backref('games_as_black', order_by=id)) + white = relationship("PlayerInTime", primaryjoin="PlayerInTime.id==Game.white_id", + backref=backref('games_as_white', order_by=id)) + + sgf_header = Column(PickleType) + + # We store the whole header instead of these + + #date = Column(Text) + #komi = Column(Float) + #handicap = Column(Integer) + #size = Column(Integer) + #result = Column(Text) + #note = Column(Text) + + def __init__(self, sgf_file, black, white, sgf_header={}): + self.sgf_file = sgf_file + self.black = black + self.white = white + self.sgf_header = sgf_header + + @ununicode + def __repr__(self): + return u"Game(%s, '%s', '%s', '%s')" %( + self.id, + self.sgf_file, + repr(self.white) if self.white else '', + repr(self.black) if self.black else '') + + def abs_path(self): + return os.path.abspath(self.sgf_file) + + def iter_pit_color(self): + yield (self.black, PLAYER_COLOR_BLACK) + yield (self.white, PLAYER_COLOR_WHITE) + + def get_player_by_color(self, color): + for gpit, gcolor in self.iter_pit_color(): + if color == gcolor: + return gpit.player + raise ValueError("Wrong color '%s'."%color) + + def get_player_color(self, player): + if self.black.player == self.white.player : + # we cannot expect for this method to return different values for one player... + # (so this would always return black, because it has no way of knowing if we ask for + # black or white player) + raise SchizophrenicPlayerError("Asked for color for game between identical players: %s"%(self,)) + + for gpit, gcolor in self.iter_pit_color(): + if player == gpit.player: + return gcolor + + raise ValueError("Game %s is not a game of %s."%(repr(self), repr(player))) + + def get_year(self, try_filename_prefix=True): + # Year from DT field of sgf file + dt = self.sgf_header.get('DT', 'Unknown') + year = utils.get_year(dt) + + # try to guess name from filename prefix (e.g. gogod) + if year == None and try_filename_prefix: + fn = os.path.basename(self.sgf_file)[:4] + return utils.get_year(fn) + + # return year or None if failure + return year + + def open_in_viewer(self): + utils.viewer_open(self.abs_path()) + +game_list_association = Table('game_list_association', Base.metadata, + Column('game_list_id', Integer, ForeignKey('game_list.id'), index=True), + Column('game_id', Integer, ForeignKey('game.id'), index=True) +) + +class GameList(Base): + """List of games. + """ + __tablename__ = 'game_list' + id = Column(Integer, primary_key=True, index=True) + name = Column(Text, nullable=False, unique=True, index=True) + + games = relationship('Game', secondary=game_list_association, backref='game_lists') + + def __init__(self, name, games=None): + self.name = name + if games != None: + assert not self.games + self.games = list(games) + + def iter_players_black(self): + """Iterate players who played in a game (from this list) as black.""" + for game in self.games: + yield game.black.player + + def iter_players_white(self): + """Look at self.get_players_black and guess.""" + for game in self.games: + yield game.white.player + + def iter_players(self): + """Iterate players who played a game from this list.""" + return chain(self.iter_players_black(), self.iter_players_white()) + + def append(self, game): + self.games.append(game) + + #def __str__(self): + # ret = [ self.name ] + map(str, self.games) + # + # return '\n'.join(ret) + + def __getitem__(self, val): + return self.games[val] + + def __len__(self): + return len(self.games) + + @ununicode + def __repr__(self): + return "GameList(%s, '%s', #games = %d)" %( self.id, self.name, len(self) ) + + +class Merger: + def __init__(self): + pass + def __repr__(self): + return self.__class__.__name__ + "()" + def start(self, bw_gen): + raise NotImplementedError + def add(self, result, color): + raise NotImplementedError + def finish(self): + raise NotImplementedError + + +class OneSideListAssociation(Base): + __tablename__ = 'one_side_list_association' + id = Column(Integer, primary_key=True, index=True) + one_side_list_id = Column(Integer, ForeignKey('one_side_list.id'), index=True) + game_id = Column(Integer, ForeignKey('game.id'), index=True) + + # what is the color of the player of interest in this game? + color = Column(Enum(PLAYER_COLOR_BLACK, PLAYER_COLOR_WHITE)) + game = relationship("Game", backref="one_side_lists_assoc") + + # one game ( for one side ) can be in one game list only once + __table_args__ = ( UniqueConstraint('one_side_list_id', 'game_id', 'color'), ) + + def __init__(self, game, color): + self.game = game + self.color = color + + def __iter__(self): + yield self.game + yield self.color + + @ununicode + def __repr__(self): + return u"OneSideListAssociation(%s, '%s')" %( repr(self.game), self.color ) + +class OneSideList(Base): + """List of games, for e.g. players with 10kyu, Honinbo Shusaku's games, ... + + Note that the list distinguishes between sides. That is, if you are interested + in both sides (default behaviour of the `add` method), the game will be added + twice - once for black, once for white. + """ + __tablename__ = 'one_side_list' + id = Column(Integer, primary_key=True, index=True) + name = Column(Text, nullable=False, unique=True, index=True) + + list_assocs = relationship('OneSideListAssociation', backref='one_side_list') + + def __init__(self, name, assocs=None): + self.name = name + if assocs != None: + assert not self.list_assocs + self.list_assocs = list(assocs) + + def __getitem__(self, val): + return self.list_assocs[val] + + def batch_add(self, games, color): + """Add games played with one color in batch.""" + self.list_assocs += [ OneSideListAssociation(game, color) for game in games ] + + def add(self, game, player=None, color=None): + """Adds game to the list. If @player (or @color) specified, adds only + one side of the game - the one that @player played (or played with @color). + Otw. both sides get added (game is added twice - once for black, once for white) + """ + if player != None: + pc = game.get_player_color(player) + if color and color != cp: + raise ValueError( """Provided color (%s) is different from provided player's (%s) color in the game %s."""% + ( color, player, game )) + color = pc + + if color != None: + # if color of the desired player specified + citer = ( color, ) + else: + # add both black's game and white's game + citer = PLAYER_COLORS + + for color in citer: + self.list_assocs.append(OneSideListAssociation(game, color)) + + def for_one_side_list(self, merger, bw_processor): + """ + Processes the whole OneSideList, so that @bw_processor is called on every game. And the + result of interest (black or white) is added to @merger, via @merger.add(result, color). + At the end @merger.finish() is called and this should return the desired data. + """ + #assert isinstance(merger, Merger) + + merger.start(bw_processor) + + for ga in self.list_assocs: + try: + black, white = bw_processor(ga.game) + except ProcessingError as exc: + logging.debug("Exception %s occured in processing the game %s, skipping!!"%(repr(exc), ga.game)) + continue + except Exception as exc: + logging.exception("Exception %s occured in processing the game %s!!"%(repr(exc), ga.game)) + raise + #continue + + desired = black if ga.color == PLAYER_COLOR_BLACK else white + merger.add(desired, ga.color) + + return merger.finish() + + def __len__(self): + return len(self.list_assocs) + + def __str__(self): + ret = [ self.name ] + for ga in self.list_assocs: + ret.append("%s : %s"%(ga.color, ga.game)) + + return '\n'.join(ret) + + @ununicode + def __repr__(self): + return "OneSideList(%s, '%s', #games = %d)"%( self.id, self.name, len(self) ) + +class DataMap(Base): + """ + One DataMap holds info about the mapping: + OneSideList -> ImageData + """ + __tablename__ = 'datamap' + id = Column(Integer, primary_key=True, index=True) + name = Column(Text, nullable=False, unique=True, index=True) + + # information about the image domain + image_types = Column(PickleType) + image_annotations = Column(PickleType) + + relations = relationship("DataMapRelation", backref='datamap') + + def add(self, one_side_list, image): + self.relations.append(DataMapRelation(one_side_list=one_side_list, + image=image)) + def __len__(self): + return len(self.relations) + + def __getitem__(self, val): + return self.relations[val] + + @ununicode + def __repr__(self): + return "DataMap(%d, '%s', #relations = %d )"%(self.id, self.name, len( self.relations)) + +class DataMapRelation(Base): + """ + One OneSideList gets mapped to data (usually a python vector). + """ + __tablename__ = 'datamap_relation' + id = Column(Integer, primary_key=True, index=True) + # id of the current dataset + datamap_id = Column(Integer, ForeignKey('datamap.id'), index=True) + # domain + one_side_list_id = Column(Integer, ForeignKey('one_side_list.id'), index=True) + # image + image_id = Column(Integer, ForeignKey('image_data.id'), index=True) + + one_side_list = relationship("OneSideList")#, backref='relations') + image = relationship("ImageData") + + def __iter__(self): + yield self.one_side_list + yield self.image + + def __repr__(self): + return "DataMapRelation(%s,%s)" % (repr(self.one_side_list), + repr(self.image)) + + +class ImageData(Base): + """ Class used to hold python-pickled data under unique name. Meant to be + used for holding right side of the mapping defined by DataMapRelation, + so that multiple OneSideLists may share the same image. + """ + __tablename__ = 'image_data' + id = Column(Integer, primary_key=True, index=True) + # e.g. 'style: Otake Hideo' + name = Column(Text, nullable=False, unique=True, index=True) + # e.g. the style vector itself + data = Column(PickleType) + + def __init__(self, name, data): + self.name = name + self.data = data + + @ununicode + def __repr__(self): + return "ImageData(%s, %s, %s)"%(self.id, self.name, self.data) + diff --git a/utils/godb_session.py b/utils/godb_session.py new file mode 100644 index 0000000..378718f --- /dev/null +++ b/utils/godb_session.py @@ -0,0 +1,173 @@ +from sqlalchemy import create_engine, and_ +from sqlalchemy.orm import sessionmaker, aliased +from sqlalchemy.orm.session import Session + +import logging +import os +import re + +from godb_models import Player, PlayerInTime, Game, GameList, OneSideList, OneSideListAssociation, DataMap, DataMapRelation, ImageData, Base +from rank import Rank +import misc +import timer + +from sgf_load import load_sgf_file_headers, ParseError + +class GodbSession(Session): + def godb_get_player(self, name, note=u''): + """Looks if the player with @name is in the DB and returns it. + Otw. creates a new player with these attributes. + This new player is NOT added into the session. + """ + pls = self.query(Player).filter(Player.name==name).all() + assert len(pls) <= 1 + if len(pls) == 1: + player = pls[0] + if player.note != note: + logging.warn("%s has different note than '%s'"%(repr(player), note)) + return player + if len(pls) == 0: + return Player(name, note) + + def godb_get_player_in_time(self, name, current_name=None, current_rank=None, current_note=''): + """ + NOT adding anything into the session. + """ + player = self.godb_get_player(name) + if current_name == None: + current_name = name + + pits = self.query(PlayerInTime).filter( PlayerInTime.player == player ) + + if current_name: + pits = pits.filter( PlayerInTime.name == current_name ) + if current_rank: + pits = pits.filter( PlayerInTime.rank == current_rank ) + if current_note: + pits = pits.filter( PlayerInTime.note == current_note ) + + pit_all = pits.all() + if len(pit_all): + return pit_all[0] + + return PlayerInTime(player, current_name, current_rank, current_note) + + def godb_sgf_to_game(self, filename): + """ + Creates a Game object from .sgf file. + + Currently, only sgf files with a single gametree are supported. + + Does NOT add the game in the session but it + DOES ADD players in the game in there. + """ + try: + headers = load_sgf_file_headers(filename) + except ParseError: + logging.warn("Could not parse '%s', probably not a .sgf file, skipping."%(filename,)) + return None + + if not headers: + logging.warn("No headers in .sgf file '%s', skipping."%(filename,)) + return None + + if len(headers) > 1: + logging.warn("More game trees in a file, skipping '%s'."%(filename,)) + return None + + hd = headers[0] + + # load players' names and ranks + # we add them to the session to have consistency and correctly interjoined objects + # (e.g. when pw == pb (anonymous) then only the first call actually + # creates a new object. The second call uses the same object. + pw = self.godb_get_player_in_time(hd.get('PW', ''), current_rank=Rank.from_string(hd.get('WR',''))) + self.add(pw) + pb = self.godb_get_player_in_time(hd.get('PB', ''), current_rank=Rank.from_string(hd.get('BR',''))) + self.add(pb) + + return Game( filename.decode('utf-8'), pb, pw, hd ) + + def godb_add_dir_as_gamelist(self, *args, **kwargs): + logging.warn("deprecated call to godb_add_dir_as_gamelist") + return self.godb_scan_dir_as_gamelist(self, *args, **kwargs) + + def godb_scan_dir_as_gamelist(self, directory, gamelist=None): + """Recursively scans the @directory for sgf files. + The valid games are added into a gamelist (either provided by @gamelist kwarg, + or new if @gamelist == None). + + Both players in each of the games scanned are added into the session. + (see self.godb_sgf_to_game) + + The gamelist is returned and NOT added into the session. + """ + t = timer.Timer() + games = [] + t.start() + for filepath in misc.iter_files(directory): + if re.search('sgf$', filepath): + logging.debug("Scanning '%s'"%(filepath)) + + # create Game object from the sgf file + t.start() + game = self.godb_sgf_to_game(filepath) + if game: + games.append(game) + t.stop() + + t.stop_n_log(' Total time', 'Game') + + if gamelist == None: + gamelist = GameList("Games from '%s'."%(directory,)) + + gamelist.games += games + logging.info("Added %d games to: %s"%(len(games), gamelist)) + + return gamelist + + """ + ## TODO make it faster!!! + def godb_list_player_games_white(self, pits): + #pits = self.query(PlayerInTime.id).filter(PlayerInTime.player_id == player.id).all() + #pits = player.in_times + return self.query(Game).filter(Game.white_id.in_(pits) ).all() + def godb_list_player_games_black(self, pits): + #pits = ( pit.id for pit in player.in_times ) + #pits = self.query(PlayerInTime.id).filter(PlayerInTime.player_id == player.id).all() + return self.query(Game).filter(Game.black_id.in_(pits) ).all() + #return self.query(Game).\ + # join(PlayerInTime, Game.black_id==PlayerInTime.id).\ + # filter(PlayerInTime.player_id == player.id).all() + """ + +_godb_session = sessionmaker(class_=GodbSession) + +def godb_session_maker(filename, echo=False): + engine = create_engine('sqlite:///%s'%filename, echo=echo) + Base.metadata.create_all(engine) + + s = _godb_session(bind=engine) + # for wingide completion... + isinstance(s, GodbSession) + return s + +if __name__ == '__main__': + import logging + logging.getLogger().setLevel(logging.DEBUG) + + s = godb_session_maker(filename=':memory:')#, echo=True) + + g = s.godb_sgf_to_game('../TEST_FILES/games/1990-00-00b.sgf') + print unicode(g) + + gl = s.godb_scan_dir_as_gamelist('../TEST_FILES/games') + + s.add(gl) + s.commit() + + print gl + for g in gl: + print "%20s vs. %s" % (g.white, g.black) + + \ No newline at end of file diff --git a/utils/misc.py b/utils/misc.py new file mode 100644 index 0000000..5efef0f --- /dev/null +++ b/utils/misc.py @@ -0,0 +1,249 @@ +#coding: utf-8 +import unicodedata +from itertools import chain, izip, count +import hashlib +import random +import logging +import threading +import os +import time +import datetime + +def identity(x): + return x + +# +# Functional tools +# + +def filter_null(iterable): + return [ x for x in iterable if x ] + +def filter_both(predicate, iterable): + yes, no = [], [] + for i in iterable: + if predicate(i): + yes.append(i) + else: + no.append(i) + return yes, no + +def flatten(list_of_lists): + return chain.from_iterable(list_of_lists) + +def flatten_twice(list_of_lists_of_lists): + return flatten(flatten( list_of_lists_of_lists )) + +def argmax(pairs): + """Given an iterable of pairs (key, value), return the key corresponding to the greatest value.""" + return max(pairs, key=lambda x:x[1])[0] + +def argmin(pairs): + return min(pairs, key=lambda x:x[1])[0] + +def argmax_index(values): + """Given an iterable of values, return the index of the greatest value.""" + return argmax(izip(count(), values)) + +def argmin_index(values): + return argmin(izip(count(), values)) + +def bucket_by_key(iterable, key_fc): + """ + Throws items in @iterable into buckets given by @key_fc function. + e.g. + >>> bucket_by_key([1,2,-3,4,5,6,-7,8,-9], lambda num: 'neg' if num < 0 else 'nonneg') + {'neg': [-3, -7, -9], 'nonneg': [1, 2, 4, 5, 6, 8]} + """ + buckets = {} + for item in iterable: + buckets.setdefault(key_fc(item), []).append(item) + return buckets + +def first_true_pred(predicates, value): + """Given a list of predicates and a value, return the index of first predicate, + s.t. predicate(value) == True. If no such predicate found, raises IndexError.""" + for num, pred in enumerate(predicates): + if pred(value): + return num + raise IndexError + +# +# Partial +# + +class MyPartial: + """ + An alternative implementation of functools partial, allowing to specify args + from the right as well. + """ + def __init__(self, func, args=(), keywords={}, right=False): + self.func = func + self.args = args + self.keywords = keywords + self.right = right + + def _frepr(self): + return repr(self.func) + + def __repr__(self): + return "MyPartial(%s, %s, %s%s)"%(self._frepr(), + repr(self.args), repr(self.keywords), + ", right=True" if self.right else '') + + def _merge_args(self, args_new): + if self.right: + return args_new + tuple(self.args) + return tuple(self.args) + args_new + + def _merge_kwargs(self, kwargs_new): + kwargs = self.keywords.copy() + kwargs.update(kwargs_new) + return kwargs + + def __call__(self, *args_new, **kwargs_new): + args = self._merge_args(args_new) + kwargs = self._merge_kwargs(kwargs_new) + return self.func(*args, **kwargs) + +def partial(f, *args, **kwargs): + """ + def minus(a, b): + return a - b + + partial(minus, 10) is like: + + lambda b : minus(10, b) + """ + return MyPartial(f, args, kwargs) + +def partial_right(f, *args, **kwargs): + """ + def minus(a, b): + return a - b + + partial_right(minus, 10) is like: + + lambda a : minus(a, 10) + """ + return MyPartial(f, args, kwargs, right=True) + +# +# Type info and conversion +# + +def is_conv(x, cls): + try: + cls(x) + return True + except: + return False + +def is_int(x): + return is_conv(x, int) + +def is_float(x): + return is_conv(x, float) + +def is_type(x, type): + try: + return x == type(x) + except: + return False + +def is_type_int(x): + return is_type(x, int) + +def is_type_float(x): + return is_type(x, float) + +# +# Hash utils & random strings +# + +def sha256(txt): + return hashlib.sha256(txt).hexdigest() +def sha512(txt): + return hashlib.sha512(txt).hexdigest() + +def random_hash(LEN=10): + return str(random.randint(10**(LEN-1),10**LEN-1)) + +def unique_hash(length=32): + """Returns "unique" hash. (the shorter the length, the less unique it is). + I consider one in 16**32 to be pretty unique. :-) (supposing that sha256 works). + + (Am I mistaken??)""" + return sha256( "%.20f %s %d %d"%(time.time(), random_hash(), os.getpid(), threading.current_thread().ident ) ) [:length] + +time_based_random_hash = unique_hash + +def tmp_names(base=random_hash(), first_simple=False): + i = 0 + if first_simple: + yield "%s"%(base) + i += 1 + while True: + yield "%s_%d"%(base, i) + i+=1 + +# +# Text stuff +# + +def encode_utf8(st): + return unicode(st).encode('utf-8') + +def remove_accents(istr): + nkfd_form = unicodedata.normalize('NFKD', unicode(istr)) + return u"".join([c for c in nkfd_form if not unicodedata.combining(c)]) + +def unicode2ascii(unistr): + return remove_accents(unistr).encode('ascii', 'ignore') + +def pad2len(s, l, padchar): + if len(s) < l: + return s + padchar * (l - len(s)) + return s + +# +# +# + +def iter_files(directory): + for (dirpath, _, filenames) in os.walk(directory): + for name in filenames: + yield os.path.join(dirpath, name) + +def timeit(f, *args, **kwargs): + t0 = time.time() + ret = f(*args, **kwargs) + diff = time.time() - t0 + print "TOOK: %.3f sec"%(diff,) + return ret + +if __name__ == '__main__': + def test_partial(): + def fc ( a, b, c='def C', d='def D' ): + return "a=%d, b=%d, c=%s, d=%s"%(a,b,c,d) + + nor = partial(fc, 20, c=10) + zpr = partial_right(fc, 20, c=10) + + print "puvodni:", repr(fc) + print "normalni:", repr(nor) + print "zprava:", repr(zpr) + + print "normalni(10) = ", nor(10) + print "zprava(10) = ", zpr(10) + + nor2 = partial(nor, 10) + print "double:", nor2 + print "double()", nor2() + + class Fobj: + def __call__(self, a, b): + return "a=%s b=%s"%(a,b) + + ca = partial(Fobj(), 10) + print ca diff --git a/utils/rank.py b/utils/rank.py new file mode 100644 index 0000000..10b2050 --- /dev/null +++ b/utils/rank.py @@ -0,0 +1,90 @@ +from sqlalchemy import types +from time import mktime +from datetime import datetime +import re + +import misc + +class RankInitExc(Exception): + pass + +class Rank: + KEYS={'k': lambda x: x, # 1kyu -> 1, 30kyu -> 30 + 'd': lambda x: -x + 1, # 1dan -> 0, 10dan -> -9 + 'p': lambda x: -x - 9} # 1pro -> -10, 10pro -> -19 + + DOMAIN_MAX = { 'k' : 30, + 'd' : 10, + 'p' : 10 } + + @staticmethod + def from_key(number): + ranks = list(Rank.iter_all()) + dists = [ abs(number - r.key()) for r in ranks ] + return misc.argmin( zip(ranks, dists) ) + + @staticmethod + def from_string(string): + mo = re.match('^([1-9][0-9]?) ?([kdp]).*', string) + if not mo: + return None + try: + return Rank(int(mo.group(1)), mo.group(2)) + except (ValueError, RankInitExc): + return None + + @staticmethod + def iter_all(): + for key, domain in Rank.DOMAIN_MAX.iteritems(): + for x in xrange(domain): + yield Rank( x + 1, key ) + + def __init__(self, number, kdp): + self.number, self.kdp = number, kdp + + if not self.kdp in self.KEYS: + raise RankInitExc("kdp must be either 'k' for kyu players," + " 'd' for dan players or 'p' for proffesionals") + + def check_domain(bottom, val, up): + assert bottom <= up + if not( bottom <= val <= up): + raise RankInitExc("Must be %d <= %d <= %d.") + + check_domain(1, self.number, self.DOMAIN_MAX[self.kdp]) + + def as_tuple(self): + return self.number, self.kdp + + def key(self): + return self.KEYS[self.kdp](self.number) + + def __str__(self): + return "%d%s"%(self.number, self.kdp) + + def __repr__(self): + return "Rank(%s, key=%d)"%(self, self.key()) + + def __hash__(self): + return self.key().__hash__() + + def __cmp__(self, other): + if not isinstance(other, Rank): + return -1 + return ( - self.key()).__cmp__( - other.key()) + + +if __name__ == "__main__": + + assert Rank(6, 'd') > Rank(2, 'd') > Rank(1, 'k') > Rank(10, 'k') + + def print_rank(): + print "[" + for rank in Rank.iter_all(): + value = rank.key() + text = str(rank) + print '{"value" : "%s", "text" : "%s"},' % (value, text) + print "]" + + print_rank() + \ No newline at end of file diff --git a/utils/sgf_load.py b/utils/sgf_load.py new file mode 100644 index 0000000..1ad1435 --- /dev/null +++ b/utils/sgf_load.py @@ -0,0 +1,70 @@ +import codecs +import logging + +### XXX XXX +import sys +sys.path.append('../') + +from sgflib import SGFParser + +def my_err(exc): + wrong_part = exc.object[exc.start:exc.end+1] + try: + us = wrong_part.decode('utf-8') + return (us, exc.end+1) + except: + diff = exc.end-exc.start + if diff > 4: + logging.warn("sgf_load.py : Long chain of chars (%d) badly encoded."%diff) + return (u'?'*(exc.end-exc.start), exc.end) + +codecs.register_error('my_err', my_err) + +class ParseError(Exception): + pass + +def load_sgf_file_headers(filename): + """Returns list of dictionaries. + Each dictionary contains all header fields of corresponding gametree.""" + with open(filename, 'r') as f: + sgfdata = f.read() + + try: + collection = SGFParser(sgfdata).parse() + except: + raise ParseError + + ret = [] + for gametree in collection: + ret.append(process_gametree(gametree)) + + return ret + +def list_attributes(node): + return node.data.keys() + +def get_attribute(node, atr): + try: + atr = node.data[atr].data[0] + ret = atr.decode(errors='my_err') + return ret + except KeyError: + return None + +def process_gametree(gametree): + # cursor for tree traversal + c = gametree.cursor() + # first node is the header + header = c.node + + attributes = list_attributes(header) + d = {} + for key in attributes: + atr = get_attribute(header, key) + if atr: + d[key] = atr + return d + +if __name__ == '__main__': + + print load_sgf_file_headers('../TEST_FILES/games/1930-01-00a.sgf') \ No newline at end of file diff --git a/utils/timer.py b/utils/timer.py new file mode 100644 index 0000000..7fdf868 --- /dev/null +++ b/utils/timer.py @@ -0,0 +1,99 @@ +import time +import math +import logging + +class Timer: + """Class for measuring lenghts of nested time intervals. + Intervals are opened either like: + + >>> t = Timer() + >>> t.start() + >>> do_stuff() + >>> t.stop() + + Or using the with statement: + + >>> with t(): + do_stuff() + """ + def __init__(self): + self.times = [] + self.next_args = {} + + def start(self): + """Opens time interval.""" + self.times.append((time.time(), [], self.next_args)) + self.next_args = {} + + def stop(self): + """Closes time interval. Returns tuple (duration, avg_child_duration).""" + now = time.time() + my_start, children_durations, _ = self.times[-1] + del self.times[-1] + + my_duration = now - my_start + # add duration of this object to the parent list + if self.times: + self.times[-1][1].append(my_duration) + + return (my_duration, children_durations) + + def stop_n_log(self, comment=' time elapsed', child_name='child'): + total, children = self.stop() + msg = "%s: %.3f s"%(comment, total) + if children: + c_sum, c_len = sum(children), len(children) + mean = c_sum / c_len + sd = 0 + if c_len > 1: + sd = math.sqrt(sum( (val - mean)**2 for val in children ) / (c_len - 1)) + msg += """, time not spent in children (overhead) %.3f s = %.2f%% + #%d x %s took %.3f s: + mean: %.3f s (sd = %.3f s)"""%( + total - c_sum, 100.0 * (total - c_sum) / total, + c_len, child_name, c_sum, mean, sd) + logging.info(msg + '\n') + + def stop_arg(self): + _, _, kwargs = self.times[-1] + log = kwargs.pop('log', False) + if log: + return self.stop_n_log(**kwargs) + return self.stop() + + def __call__(self, **kwargs): + self.next_args = kwargs + return self + + def __enter__(self): + self.start() + + def __exit__(self, *args): + self.stop_arg() + +import random + +def test(): + t = Timer() + + t.start() + with t(log=True, comment='test loop 1', child_name='blabla'): + for a in xrange(1000): + with t(): + time.sleep(random.random()/20000) + + # eq to + with t(log=True, comment='test loop 2', child_name='blabla2'): + for a in xrange(1000): + t.start() + time.sleep(random.random()/20000) + t.stop() + + t.stop_n_log(child_name='test loop') + + +if "__main__" == __name__: + logging.getLogger().setLevel(logging.INFO) + test() + + \ No newline at end of file diff --git a/utils/utils.py b/utils/utils.py new file mode 100644 index 0000000..14dd32a --- /dev/null +++ b/utils/utils.py @@ -0,0 +1,225 @@ +import re +import subprocess +import os +import copy +from os.path import abspath, exists +import shutil +import functools +import inspect +import itertools +import numpy + +import misc +from colors import BlackWhite +import types + +VIEWER_LIST=['qgo', 'kombilo'] + +def viewer_open(sgf_filename, executable=VIEWER_LIST[1]): + p = subprocess.Popen([executable, sgf_filename]) + return + +def bark(): + subprocess.call('bark', shell=True) + +def check_output(*args, **kwargs): + if hasattr(subprocess, 'check_output'): + return subprocess.check_output(*args, **kwargs) + else: + if 'stdout' in kwargs: + raise ValueError('stdout argument not allowed, it will be overridden.') + process = subprocess.Popen(stdout=subprocess.PIPE, *args, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = args[0] + raise subprocess.CalledProcessError(retcode, cmd, output=output) + return output + +def get_year(datestr, match_century=True): + """Function trying to extract date from a string - usually a DT field of a sgf_file. + First, look for the first year string in the @datestr. + if not found and the @match_century is true, we look for strings like + "18th century", which results in year 1750 (the mean of years in 18th century) + + Returns None if no year found. + """ + # 1982-10-10 + # note the non-greedy .*? expansion => the first date string in the result gets matched + # that is get_year("1982 1999") = 1982 + match = re.match( '.*?([0-9]{4}).*', datestr) + if match: + return int(match.group(1)) + + if match_century: + # 17th century, 18th c. + match = re.match( '.*[^0-9]?([0-9]{2}th c).*', datestr) + if match: + century = int(match.group(1)[:2]) + # returns "mean" year of the century: + # 17th century -> 1650 + return century * 100 - 50 + + return None + +def get_poly_s(coefs): + """Returns a string with polynomial equation; e.g.: + + >>> from utils import get_poly_s + >>> get_poly_s([0.5,0,4]) + 'y = 0.50x^2 + 4.00' + >>> get_poly_s([1,2,3,4]) + 'y = 1.00x^3 + 2.00x^2 + 3.00x + 4.00' + """ + C = [] + for pw, co in enumerate(reversed(coefs)): + if co: + s = "%.2f" % co + if pw: + s += 'x' + if pw > 1: + s += '^%d' % pw + C.append(s) + return 'y = ' + ' + '.join(reversed(C)) + + +""" + +class ReprWrapper(object): + def __init__(self, repr_f, f): + self.repr_f = repr_f + self.f = f + functools.update_wrapper(self, f) + def __call__(self, *args, **kwargs): + return self.f(*args, **kwargs) + def __repr__(self): + return self.repr_f(self.f) +""" + +def repr_origin(f): + if hasattr(f, 'im_class'): + prefix = f.im_class + else: + prefix = f.__module__ + return "%s.%s"%(prefix, f.__name__) + +def head(f, n=10): + print f.filename + with open(f.filename, 'r') as fin: + for line in itertools.islice(fin, n): + print line + +def iter_splits(l, parts=None, max_size=None, min_size=None): + """Will yield consequent sublist of the @l list, trying to result + evenly sized sublists. Exactly one of the parameters @parts or + @max_size or @min_size must be specified. + + specifiing parts = N will yield N sublists of (almost) even size. The + list size difference is guaranted to be at most 1. + + >>> list(iter_splits(range(5), parts=2)) + [[0, 1, 2], [3, 4]] + >>> list(iter_splits(range(5), parts=4)) + [[0, 1], [2], [3], [4]] + + + + specifiing max_size = N returns the smallest possible number of + consequent sublists so that whole list is divided and size of each + part is <= N + + >>> list(iter_splits(range(5), max_size=3)) + [[0, 1, 2], [3, 4]] + >>> list(iter_splits(range(5), max_size=10)) + [[0, 1, 2, 3, 4]] + + Calling iter_splits(l, max_size=N) is just a shorthand for calling + iter_splits(l, parts=len(l) / N + bool(len(l)% N) ) + + + + Similarly min_size = N returns the largest possible number of + consequent sublists so that whole list is divided and size of each + part is >= N + + Calling iter_splits(l, min_size=N) is just a shorthand for calling + iter_splits(l, parts=len(l) / N ) + """ + if bool(parts) + bool(max_size) + bool( min_size) != 1: + raise TypeError('Exactly one of parts, max_size or exact_size arguments must be specified (and nonzero)') + + if parts: + print parts + pn, rest = len(l) / parts, len(l) % parts + if pn == 0: + raise ValueError("Number of parts to split must not be larger than the number of elements.") + + def sizes(pn, rest): + for i in xrange(parts): + if rest: + yield pn + 1 + rest -= 1 + else: + yield pn + + stop = 0 + for size in sizes(pn, rest): + start, stop = stop, stop + size + yield l[start: stop] + + if max_size: + pn, rest = len(l) / max_size, len(l) % max_size + if rest: + pn += 1 + for split in iter_splits(l, parts=pn): + yield split + + if min_size: + for split in iter_splits(l, parts=len(l)/min_size): + yield split + +def iter_exact_splits(l, split_size): + tail = copy.copy(l) + + while tail: + head, tail = tail[:split_size], tail[split_size:] + # the last head could be shorter + if len(head) == split_size: + yield head + +def pearson_coef(vec1, vec2): + assert vec1.shape == vec2.shape + def norm(vec): + return numpy.sqrt((vec*vec).sum()) + def center(vec): + return vec - vec.mean() + vec1, vec2 = center(vec1), center(vec2) + return (vec1 * vec2).sum() / (norm(vec1) * norm(vec2)) + + +if __name__ == '__main__': + def test_split(): + l = range(20) + + for kw in ['parts', 'max_size', 'min_size']: + for val in range(10, 20): + print "iter_splits(%s, **{%s : %s}))" % (l, kw, val) + res = list(iter_splits(l, **{kw : val})) + print kw, "=", val + print " len = ", len(res), ", max(size) = ", max(map(len, res)), ", min(size) = ", min(map(len, res)) + print " ", res + + assert list(itertools.chain.from_iterable(res)) == l + if kw == 'parts': + assert len(res) == val + if kw == 'max_size': + assert max(map(len, res)) <= val + if kw == 'min_size': + assert min(map(len, res)) >= val + + #test_partial() + #test_split() + + get_random_output_base(0, 1) \ No newline at end of file -- 2.11.4.GIT