From f89ffb40d6073d29ebbb5fb4cea4fbaa46604428 Mon Sep 17 00:00:00 2001
From: Joe Moudrik <j.moudrik@gmail.com>
Date: Thu, 18 Jul 2013 16:52:42 +0200
Subject: [PATCH] gostyle: the basic library, intitial commit.

---
 PATCH_pachi.diff                 |   37 ++
 QUESTIONARE/lukas_podpera        |    1 +
 QUESTIONARE/vladimir_danek       |    1 +
 README                           |   32 +-
 TEST_FILES/games/1930-01-00a.sgf |   32 ++
 TEST_FILES/games/1960-00-00f.sgf |   22 +
 TEST_FILES/games/1976-00-00e.sgf |   33 ++
 TEST_FILES/games/1984-03-11d.sgf |   24 +
 TEST_FILES/games/1990-00-00b.sgf |   32 ++
 TEST_FILES/test_bdist.sgf        |    6 +
 TEST_FILES/test_bdist2.sgf       |    1 +
 TEST_FILES/test_capture.sgf      |    7 +
 TEST_FILES/test_cont.sgf         |    4 +
 config.py                        |    6 +
 data_about_players.py            |  289 +++++++++++
 game_to_vec.py                   | 1000 ++++++++++++++++++++++++++++++++++++++
 ipython_session.py               |   15 +
 load_questionare.py              |   44 ++
 pachi.py                         |  217 +++++++++
 result_file.py                   |   56 +++
 utils/__init__.py                |    9 +
 utils/colors.py                  |   21 +
 utils/db_cache.py                |  331 +++++++++++++
 utils/godb_models.py             |  486 ++++++++++++++++++
 utils/godb_session.py            |  173 +++++++
 utils/misc.py                    |  249 ++++++++++
 utils/rank.py                    |   90 ++++
 utils/sgf_load.py                |   70 +++
 utils/timer.py                   |   99 ++++
 utils/utils.py                   |  225 +++++++++
 30 files changed, 3610 insertions(+), 2 deletions(-)
 create mode 100644 PATCH_pachi.diff
 create mode 100644 QUESTIONARE/lukas_podpera
 create mode 100644 QUESTIONARE/vladimir_danek
 create mode 100644 TEST_FILES/games/1930-01-00a.sgf
 create mode 100644 TEST_FILES/games/1960-00-00f.sgf
 create mode 100644 TEST_FILES/games/1976-00-00e.sgf
 create mode 100644 TEST_FILES/games/1984-03-11d.sgf
 create mode 100644 TEST_FILES/games/1990-00-00b.sgf
 create mode 100644 TEST_FILES/test_bdist.sgf
 create mode 100644 TEST_FILES/test_bdist2.sgf
 create mode 100644 TEST_FILES/test_capture.sgf
 create mode 100644 TEST_FILES/test_cont.sgf
 create mode 100644 config.py
 create mode 100755 data_about_players.py
 create mode 100644 game_to_vec.py
 create mode 100644 ipython_session.py
 create mode 100644 load_questionare.py
 create mode 100644 pachi.py
 create mode 100644 result_file.py
 create mode 100644 utils/__init__.py
 create mode 100644 utils/colors.py
 create mode 100644 utils/db_cache.py
 create mode 100644 utils/godb_models.py
 create mode 100644 utils/godb_session.py
 create mode 100644 utils/misc.py
 create mode 100644 utils/rank.py
 create mode 100644 utils/sgf_load.py
 create mode 100644 utils/timer.py
 create mode 100644 utils/utils.py

diff --git a/PATCH_pachi.diff b/PATCH_pachi.diff
new file mode 100644
index 0000000..042b6cb
--- /dev/null
+++ b/PATCH_pachi.diff
@@ -0,0 +1,37 @@
+diff --git a/pattern.c b/pattern.c
+index 6843be0..c4e8aed 100644
+--- a/pattern.c
++++ b/pattern.c
+@@ -16,7 +16,7 @@
+ 
+ 
+ struct pattern_config DEFAULT_PATTERN_CONFIG = {
+-	.bdist_max = 4,
++	.bdist_max = 10,
+ 
+ 	.spat_min = 3, .spat_max = MAX_PATTERN_DIST,
+ 	.spat_largest = true,
+@@ -30,7 +30,7 @@ pattern_spec PATTERN_SPEC_MATCH_DEFAULT = {
+ 	[FEAT_SELFATARI] = ~0,
+ 	[FEAT_ATARI] = ~0,
+ 	[FEAT_BORDER] = ~0,
+-	[FEAT_CONTIGUITY] = 0,
++	[FEAT_CONTIGUITY] = ~0,
+ 	[FEAT_SPATIAL] = ~0,
+ };
+ 
+@@ -530,10 +530,11 @@ pattern_match(struct pattern_config *pc, pattern_spec ps,
+ 		}
+ 	}
+ 
+-	if (PS_ANY(CONTIGUITY) && !is_pass(b->last_move.coord)
+-	    && coord_is_8adjecent(m->coord, b->last_move.coord, b)) {
++	if (PS_ANY(CONTIGUITY) && !is_pass(b->last_move.coord) ){
++	    //if (coord_is_8adjecent(m->coord, b->last_move.coord, b)) { }
++
+ 		f->id = FEAT_CONTIGUITY;
+-		f->payload = 1;
++		f->payload = coord_gridcular_distance(m->coord, b->last_move.coord, b);
+ 		(f++, p->n++);
+ 	}
+ 
diff --git a/QUESTIONARE/lukas_podpera b/QUESTIONARE/lukas_podpera
new file mode 100644
index 0000000..f60b186
--- /dev/null
+++ b/QUESTIONARE/lukas_podpera
@@ -0,0 +1 @@
+{"interviewee_name":"Lukas Podpera","interviewee_key":"84jf9j","interviewee_feedback":"Lee Sedol and Yi Se-tol is the same player ;)","group_lists":[{"name":"Elementary set","list":[{"name":"Chen Yaoye","id":0,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"no"},{"name":"Cho Chikun","id":1,"style":{"te":"6","or":"4","ag":"9","th":"10"},"tags":"","skip":"no"},{"name":"Cho U","id":2,"style":{"te":"7","or":"9","ag":"9","th":"7"},"tags":"","skip":"no"},{"name":"Gu Li","id":3,"style":{"te":"7","or":"6","ag":"8","th":"7"},"tags":"","skip":"no"},{"name":"Ishida Yoshio","id":4,"style":{"te":"9","or":"1","ag":"3","th":"2"},"tags":"","skip":"no"},{"name":"Lee Sedol","id":5,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"no"},{"name":"Luo Xihe","id":6,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Ma Xiaochun","id":7,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"O Meien","id":8,"style":{"te":"2","or":"4","ag":"7","th":"8"},"tags":"","skip":"no"},{"name":"Otake Hideo","id":9,"style":{"te":"9","or":"6","ag":"2","th":"4"},"tags":"","skip":"no"},{"name":"Rui Naiwei","id":10,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Sakata Eio","id":11,"style":{"te":"6","or":"1","ag":"9","th":"8"},"tags":"","skip":"no"},{"name":"Takemiya Masaki","id":12,"style":{"te":"1","or":"2","ag":"7","th":"3"},"tags":"","skip":"no"},{"name":"Yi Ch'ang-ho","id":13,"style":{"te":"9","or":"7","ag":"8","th":"2"},"tags":"","skip":"no"},{"name":"Yi Se-tol","id":14,"style":{"te":"6","or":"10","ag":"10","th":"9"},"tags":"","skip":"no"},{"name":"Yoda Norimoto","id":15,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"}]},{"name":"More players (OPTIONAL)","list":[{"name":"Chen Zude","id":16,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Cho Tae-hyeon","id":17,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Fujisawa Hideyuki","id":18,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Go Seigen","id":19,"style":{"te":"2","or":"1","ag":"8","th":"6"},"tags":"","skip":"no"},{"name":"Hane Naoki","id":20,"style":{"te":"9","or":"5","ag":"4","th":"3"},"tags":"","skip":"no"},{"name":"Honinbo Dosaku","id":21,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Honinbo Shusaku","id":22,"style":{"te":"9","or":"1","ag":"7","th":"2"},"tags":"","skip":"no"},{"name":"Honinbo Shuwa","id":23,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Huang Longshi","id":24,"style":{"te":"5","or":"1","ag":"10","th":"10"},"tags":"","skip":"no"},{"name":"Jie Li","id":25,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Kato Masao","id":26,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Kato Shin","id":27,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Kobayashi Koichi","id":28,"style":{"te":"10","or":"2","ag":"3","th":"3"},"tags":"","skip":"no"},{"name":"Kong JieMiyazawa Goro","id":29,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Nie Weiping","id":30,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Shao Zhenzhong","id":31,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Suzuki Goro","id":32,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Takao Shinji","id":33,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Wu Songsheng","id":34,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Yamashita Keigo","id":35,"style":{"te":"2","or":"4","ag":"9","th":"6"},"tags":"","skip":"no"},{"name":"Yuki Satoshi","id":36,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"}]},{"name":"If you watch KGS games (OPTIONAL)","list":[{"name":"billlin","id":37,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"cashewnut","id":38,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"cloudnotes","id":39,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"himountain","id":40,"style":{"te":"6","or":"5","ag":"8","th":"8"},"tags":"","skip":"no"},{"name":"hjekshdf","id":41,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"kyoungnang","id":42,"style":{"te":"7","or":"8","ag":"9","th":"8"},"tags":"","skip":"no"},{"name":"MilanMilan","id":43,"style":{"te":"7","or":"8","ag":"9","th":"8"},"tags":"","skip":"no"},{"name":"pblshtwzrs","id":44,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"smartrobot","id":45,"style":{"te":"9","or":"5","ag":"7","th":"6"},"tags":"","skip":"no"},{"name":"xmianzhu","id":46,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"}]},{"name":"Additional","list":[{"name":"Iyama Yuta","id":47,"style":{"te":"8","or":"9","ag":"8","th":"6"},"tags":"","skip":"no"}]}]}
\ No newline at end of file
diff --git a/QUESTIONARE/vladimir_danek b/QUESTIONARE/vladimir_danek
new file mode 100644
index 0000000..895c645
--- /dev/null
+++ b/QUESTIONARE/vladimir_danek
@@ -0,0 +1 @@
+{"interviewee_name":"","interviewee_key":"25b9fb","interviewee_feedback":"","group_lists":[{"name":"Elementary set","list":[{"name":"Chen Yaoye","id":0,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Cho Chikun","id":1,"style":{"te":"9","or":"2","ag":"7","th":"9"},"tags":"","skip":"no"},{"name":"Cho U","id":2,"style":{"te":"7","or":"3","ag":"8","th":"6"},"tags":"","skip":"no"},{"name":"Gu Li","id":3,"style":{"te":"8","or":"10","ag":"10","th":"8"},"tags":"","skip":"no"},{"name":"Ishida Yoshio","id":4,"style":{"te":"10","or":"1","ag":"2","th":"3"},"tags":"","skip":"no"},{"name":"Lee Sedol","id":5,"style":{"te":"8","or":"10","ag":"10","th":"8"},"tags":"","skip":"no"},{"name":"Luo Xihe","id":6,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Ma Xiaochun","id":7,"style":{"te":"9","or":"2","ag":"4","th":"3"},"tags":"","skip":"no"},{"name":"O Meien","id":8,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Otake Hideo","id":9,"style":{"te":"5","or":"1","ag":"3","th":"2"},"tags":"","skip":"no"},{"name":"Rui Naiwei","id":10,"style":{"te":"8","or":"5","ag":"9","th":"6"},"tags":"","skip":"no"},{"name":"Sakata Eio","id":11,"style":{"te":"9","or":"2","ag":"9","th":"9"},"tags":"","skip":"no"},{"name":"Takemiya Masaki","id":12,"style":{"te":"2","or":"4","ag":"8","th":"3"},"tags":"","skip":"no"},{"name":"Yi Ch'ang-ho","id":13,"style":{"te":"10","or":"8","ag":"2","th":"4"},"tags":"","skip":"no"},{"name":"Yi Se-tol","id":14,"style":{"te":"8","or":"9","ag":"9","th":"9"},"tags":"","skip":"no"},{"name":"Yoda Norimoto","id":15,"style":{"te":"9","or":"2","ag":"3","th":"3"},"tags":"","skip":"no"}]},{"name":"More players (OPTIONAL)","list":[{"name":"Chen Zude","id":16,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Cho Tae-hyeon","id":17,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Fujisawa Hideyuki","id":18,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Go Seigen","id":19,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Hane Naoki","id":20,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Honinbo Dosaku","id":21,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Honinbo Shusaku","id":22,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Honinbo Shuwa","id":23,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Huang Longshi","id":24,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Jie Li","id":25,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Kato Masao","id":26,"style":{"te":"4","or":"2","ag":"7","th":"9"},"tags":"","skip":"no"},{"name":"Kato Shin","id":27,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Kobayashi Koichi","id":28,"style":{"te":"10","or":"1","ag":"3","th":"2"},"tags":"","skip":"no"},{"name":"Kong JieMiyazawa Goro","id":29,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Nie Weiping","id":30,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Shao Zhenzhong","id":31,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Suzuki Goro","id":32,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Takao Shinji","id":33,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Wu Songsheng","id":34,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Yamashita Keigo","id":35,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"Yuki Satoshi","id":36,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"}]},{"name":"If you watch KGS games (OPTIONAL)","list":[{"name":"billlin","id":37,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"cashewnut","id":38,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"cloudnotes","id":39,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"himountain","id":40,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"hjekshdf","id":41,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"kyoungnang","id":42,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"MilanMilan","id":43,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"pblshtwzrs","id":44,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"smartrobot","id":45,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"},{"name":"xmianzhu","id":46,"style":{"te":"","or":"","ag":"","th":""},"tags":"","skip":"yes"}]},{"name":"Additional","list":[]}]}
\ No newline at end of file
diff --git a/README b/README
index f3b9af6..b342381 100644
--- a/README
+++ b/README
@@ -1,8 +1,36 @@
 Hello!!
 
-This repository includes various tools used to analyze Go-players style.
+This repository includes various tools used to analyze Go-players style. Whats here:
+    - a tool for database of Go games, whose data structures (like a list of games)
+    we use all over the place, read utils/godb_models.py and utils/godb_session.py
+    
+    - a simple wrapper for pachi to work with the games and scan the raw patterns comfortably
+        (see pachi.py)
+    
+    - stuff to work and form pattern vectors from raw patterns (see game_to_vec.py )
+        and to export the vectors as .tab files which can be then used by the Orange Datamining
+        toolkit to fiddle around with some machine learning stuff, http://orange.biolab.si/
+    
+All the major files have examples at the bottom and are pretty well commented.
 
-The readme will be updated sometimes soon.
 
+REQUIREMENTS:
+    - if you want to scan the games, you need pachi go engine, instructions below
+    
+    - if you want to toy with machine learning stuff, you need the Orange datamining toolkit
+        get it from http://orange.biolab.si/
 
 
+------
+How to prepare pachi:
+run this in some dir (assuming in a parent dir of gostyle - this dir)
+
+$ git clone git://repo.or.cz/pachi.git
+$ cd pachi
+$ git checkout jmhack
+# this is a ugly way to make the contiguity feature say the real distance
+# instead of a proximity flag it does by def.
+$ patch  pattern.c ../gostyle/PATCH_pachi.diff
+$ make
+# this is necessary, otw pachi segfaults
+$ touch patterns.spat
diff --git a/TEST_FILES/games/1930-01-00a.sgf b/TEST_FILES/games/1930-01-00a.sgf
new file mode 100644
index 0000000..c270abc
--- /dev/null
+++ b/TEST_FILES/games/1930-01-00a.sgf
@@ -0,0 +1,32 @@
+(;SZ[19]FF[3]
+PW[Maeda Nobuaki]
+WR[4d]
+PB[Hashimoto Utaro]
+BR[4d]
+DT[Published 1930-01]
+TM[8h]
+KM[0]
+RE[W+2]
+;B[qd];W[qp];B[ce];W[dc];B[dq];W[ci];B[cn];W[oc];B[np];W[pn];B[mc];W[oe]
+;B[pf];W[cd];B[de];W[fd];B[eh];W[cl];B[jc];W[cg];B[ef];W[gc];B[pc];W[pb]
+;B[qb];W[kd];B[me];W[kc];B[of];W[jq];B[gq];W[mb];B[lb];W[md];B[ld];W[lc]
+;B[nd];W[nc];B[md];W[nb];B[kb];W[jd];B[ic];W[id];B[od];W[qa];B[ma];W[oa]
+;B[hc];W[hd];B[la];W[na];B[ja];W[rb];B[qc];W[rc];B[rd];W[sc];B[dj];W[cj]
+;B[pk];W[jo];B[nn];W[pl];B[qq];W[pq];B[pp];W[op];B[po];W[oo];B[qo];W[oq]
+;B[on];W[qn];B[ro];W[om];B[rn];W[ql];B[qr];W[no];B[mo];W[mn];B[nm];W[nl]
+;B[mm];W[ml];B[lm];W[mp];B[ln];W[lp];B[ok];W[ol];B[ll];W[qk];B[qm];W[pm]
+;B[rm];W[pi];B[lj];W[qg];B[qf];W[rg];B[be];W[fp];B[gp];W[cp];B[cq];W[dp]
+;B[ep];W[eo];B[eq];W[do];B[bp];W[bo];B[bq];W[bn];B[dk];W[dl];B[bc];W[ig]
+;B[ck];W[bk];B[bl];W[bm];B[oh];W[dh];B[fj];W[hr];B[cb];W[hb];B[ib];W[db]
+;B[gr];W[mj];B[di];W[li];B[kj];W[mi];B[ph];W[qi];B[or];W[nq];B[pr];W[nr]
+;B[el];W[mg];B[pe];W[cc];B[ba];W[ij];B[ik];W[hk];B[il];W[gj];B[dg];W[ch]
+;B[fk];W[jj];B[hl];W[fg];B[eg];W[ki];B[lf];W[bf];B[oi];W[af];B[cf];W[ah]
+;B[bd];W[em];B[fm];W[en];B[ir];W[iq];B[hq];W[jr];B[hs];W[ha];B[ia];W[fb]
+;B[rl];W[rk];B[qh];W[rh];B[pj];W[qj];B[jk];W[jm];B[ko];W[in];B[kl];W[gi]
+;B[ff];W[gk];B[gl];W[gf];B[kg];W[jg];B[fh];W[gg];B[jf];W[if];B[kh];W[jh]
+;B[rf];W[ns];B[ed];W[ec];B[fe];W[ge];B[kp];W[kq];B[fo];W[fn];B[go];W[gm]
+;B[fl];W[js];B[gn];W[is];B[gh];W[hh];B[hr];W[sl];B[sm];W[sk];B[je];W[ke]
+;B[kf];W[mf];B[le];W[nf];B[ne];W[fi];B[ei];W[ap];B[aq];W[ao];B[ie];W[he]
+;B[sg];W[sh];B[sf];W[os];B[ps];W[ae];B[lo];W[ad];B[sd];W[ac];B[ab];W[da]
+;B[ca];W[dd];B[pg];W[ri];B[nh];W[mh];B[ng];W[lk];B[kk];W[mk];B[ee]
+)
diff --git a/TEST_FILES/games/1960-00-00f.sgf b/TEST_FILES/games/1960-00-00f.sgf
new file mode 100644
index 0000000..ab2e00c
--- /dev/null
+++ b/TEST_FILES/games/1960-00-00f.sgf
@@ -0,0 +1,22 @@
+(;SZ[19]FF[3]
+PW[Kikuchi Yasuro]
+PB[Rin Kaiho]
+BR[5d]
+DT[1960]
+TM[2h]
+KM[4.5]
+RE[B+R]
+;B[cp];W[pq];B[dc];W[pc];B[fq];W[cf];B[pe];W[qe];B[qf];W[qd];B[pf];W[nc]
+;B[po];W[qm];B[qq];W[qj];B[pp];W[oq];B[rp];W[lq];B[oj];W[qh];B[oh];W[ck]
+;B[cm];W[iq];B[cd];W[ef];B[ci];W[dj];B[di];W[ei];B[dg];W[eg];B[df];W[de]
+;B[eh];W[fh];B[dh];W[ce];B[ee];W[ej];B[dd];W[fe];B[ed];W[bh];B[bi];W[cg]
+;B[ag];W[bd];B[bc];W[ah];B[ai];W[bg];B[bl];W[af];B[fi];W[gh];B[gi];W[hh]
+;B[hi];W[bk];B[ak];W[dm];B[dn];W[en];B[do];W[fm];B[ih];W[gf];B[dl];W[cl]
+;B[bn];W[em];B[hg];W[gg];B[gd];W[ig];B[jh];W[if];B[hl];W[gk];B[hk];W[gp]
+;B[gq];W[fp];B[eq];W[in];B[jm];W[ic];B[kf];W[nf];B[ne];W[lf];B[lg];W[mf]
+;B[pd];W[od];B[qc];W[rc];B[qb];W[rb];B[rf];W[pb];B[ph];W[ke];B[rm];W[rl]
+;B[rn];W[ok];B[nk];W[pj];B[ol];W[oi];B[nj];W[pi];B[nh];W[ni];B[mi];W[pk]
+;B[nl];W[fc];B[fd];W[eb];B[gb];W[gc];B[fb];W[ec];B[hc];W[hb];B[hd];W[ga]
+;B[ib];W[fa];B[jb];W[db];B[jd];W[kg];B[kh];W[kc];B[kd];W[lc];B[jc];W[ie]
+;B[gb];W[cb];B[ha];W[cc];B[jg];W[ri];B[ff];W[fg];B[ge]
+)
diff --git a/TEST_FILES/games/1976-00-00e.sgf b/TEST_FILES/games/1976-00-00e.sgf
new file mode 100644
index 0000000..4f08d7e
--- /dev/null
+++ b/TEST_FILES/games/1976-00-00e.sgf
@@ -0,0 +1,33 @@
+(;SZ[19]FF[3]
+PW[Takagi Shoichi]
+WR[7d]
+PB[Juergen Mattern]
+BR[6d ama]
+DT[1976]
+TM[2.5h]
+OH[3]
+HA[3]
+RE[Jigo]
+AB[pd][dp][pp]
+;W[dd];B[pj];W[nq];B[kp];W[qq];B[qp];W[pq];B[op];W[np];B[oq];W[or];B[pr]
+;W[qr];B[nr];W[ps];B[no];W[mo];B[mp];W[mq];B[mr];W[lq];B[lp];W[kr];B[nn]
+;W[cj];B[jd];W[gd];B[qd];W[md];B[mf];W[le];B[jf];W[lf];B[mg];W[lg];B[jh]
+;W[li];B[lh];W[kh];B[mh];W[kg];B[ki];W[jg];B[ji];W[ig];B[hi];W[lj];B[ik]
+;W[pn];B[ro];W[pk];B[qj];W[oj];B[qk];W[oi];B[ph];W[nm];B[pl];W[mn];B[om]
+;W[mm];B[pm];W[jm];B[jo];W[jq];B[im];W[jl];B[in];W[il];B[hl];W[jk];B[hk]
+;W[ok];B[kj];W[kk];B[lk];W[mk];B[ll];W[ni];B[he];W[hh];B[jj];W[ml];B[ge]
+;W[fd];B[gg];W[gh];B[ef];W[hd];B[id];W[eh];B[cf];W[ce];B[hb];W[gb];B[if]
+;W[ch];B[hg];W[ih];B[ee];W[ed];B[be];W[bd];B[ir];W[kq];B[de];W[cd];B[cl]
+;W[fl];B[mi];W[mj];B[iq];W[do];B[co];W[cn];B[eo];W[dn];B[cp];W[en];B[fo]
+;W[fn];B[jp];W[ns];B[bn];W[gm];B[cm];W[hm];B[ek];W[fk];B[ej];W[fj];B[nc]
+;W[og];B[pg];W[go];B[gp];W[hp];B[jn];W[km];B[gq];W[ho];B[ip];W[nd];B[of]
+;W[hq];B[hr];W[bo];B[bp];W[bm];B[ao];W[gr];B[fq];W[fr];B[eq];W[oc];B[pb]
+;W[pc];B[qc];W[ob];B[qb];W[dl];B[ck];W[dk];B[bj];W[bi];B[dj];W[ci];B[dm]
+;W[el];B[lc];W[mc];B[hc];W[kb];B[jb];W[kc];B[kd];W[lb];B[fb];W[gc];B[ga]
+;W[eb];B[ea];W[db];B[rq];W[rr];B[sr];W[bf];B[bg];W[ae];B[da];W[ca];B[fa]
+;W[cb];B[cg];W[jc];B[ic];W[ja];B[ks];W[ms];B[jr];W[lr];B[ib];W[ng];B[nf]
+;W[fg];B[gf];W[bk];B[bl];W[aj];B[ln];W[kn];B[lo];W[lm];B[ld];W[js];B[is]
+;W[nb];B[pi];W[oe];B[pe];W[eg];B[dg];W[ag];B[al];W[bh];B[ak];W[bj];B[pa]
+;W[dh];B[df];W[em];B[am];W[fc];B[ha];W[ke];B[je];W[ol];B[qn];W[oa];B[hn]
+;W[gn];B[rs];W[qs];B[sq];W[ss];B[er];W[rs]
+)
diff --git a/TEST_FILES/games/1984-03-11d.sgf b/TEST_FILES/games/1984-03-11d.sgf
new file mode 100644
index 0000000..2e418a7
--- /dev/null
+++ b/TEST_FILES/games/1984-03-11d.sgf
@@ -0,0 +1,24 @@
+(;SZ[19]FF[3]
+PW[Chang Su-yeong]
+WR[6d]
+PB[Seo Pong-su]
+BR[8d]
+DT[1984-03-11]
+KM[5.5]
+RE[B+R]
+;B[pd];W[dc];B[pq];W[cp];B[ep];W[qo];B[qm];W[oo];B[np];W[lp];B[no];W[pm]
+;B[ql];W[nn];B[mn];W[nm];B[lo];W[jp];B[qp];W[qn];B[ol];W[rp];B[rq];W[pp]
+;B[qq];W[pl];B[pk];W[ok];B[pj];W[nk];B[op];W[po];B[cq];W[gq];B[dp];W[qh]
+;B[oj];W[rj];B[om];W[pn];B[rk];W[qj];B[qk];W[qf];B[pg];W[pf];B[qg];W[rg]
+;B[ph];W[of];B[re];W[rf];B[nc];W[mm];B[qi];W[ri];B[rh];W[sh];B[mi];W[lk]
+;B[co];W[qh];B[pi];W[lf];B[ki];W[ce];B[qd];W[md];B[ne];W[nd];B[oe];W[ng]
+;B[mg];W[mh];B[lh];W[mf];B[nh];W[nf];B[mc];W[kc];B[ld];W[kd];B[lc];W[ke]
+;B[ec];W[dd];B[db];W[cb];B[hd];W[hf];B[fb];W[fd];B[ge];W[gf];B[cc];W[cd]
+;B[ic];W[jb];B[fe];W[ed];B[ff];W[fg];B[eg];W[he];B[jf];W[je];B[fh];W[gg]
+;B[df];W[gd];B[cf];W[ck];B[ci];W[ek];B[bb];W[bc];B[ca];W[cc];B[hb];W[gc]
+;B[gb];W[ba];B[aa];W[ac];B[da];W[ib];B[hc];W[bf];B[bg];W[be];B[gh];W[eh]
+;B[ig];W[ie];B[ei];W[dh];B[di];W[ef];B[ee];W[de];B[fl];W[mr];B[nr];W[mp]
+;B[mo];W[cm];B[em];W[dg];B[fk];W[jk];B[kl];W[kk];B[iq];W[hp];B[lr];W[mq]
+;B[ms];W[jq];B[jr];W[kr];B[kq];W[kp];B[ks];W[nq];B[oq];W[or];B[ns];W[lq]
+;B[ir];W[kr];B[ls];W[ip];B[im]
+)
diff --git a/TEST_FILES/games/1990-00-00b.sgf b/TEST_FILES/games/1990-00-00b.sgf
new file mode 100644
index 0000000..d8e5668
--- /dev/null
+++ b/TEST_FILES/games/1990-00-00b.sgf
@@ -0,0 +1,32 @@
+(;SZ[19]FF[3]
+PW[Maeda Ryoji]
+WR[5d]
+PB[Ishida Yoshio]
+BR[9d]
+HA[8]
+RE[B+68]
+AB[dd][dj][dp][jd][jp][pd][pj][pp]
+;W[qf];B[of];W[qc];B[qd];W[pc];B[oc];W[ob];B[nc];W[nb];B[mc];W[rd];B[re]
+;W[rc];B[qe];W[qn];B[qo];W[pn];B[pl];W[qk];B[ql];W[nq];B[pq];W[ro];B[rp]
+;W[po];B[np];W[qp];B[rq];W[pk];B[ok];W[qj];B[ol];W[pi];B[oj];W[rl];B[rm]
+;W[rk];B[qm];W[rf];B[mp];W[fq];B[eq];W[fp];B[hp];W[iq];B[jq];W[dn];B[eo]
+;W[er];B[dr];W[ep];B[dq];W[do];B[en];W[em];B[fn];W[dl];B[gm];W[gr];B[co]
+;W[cn];B[bo];W[bn];B[br];W[bq];B[cq];W[ip];B[io];W[hq];B[fk];W[cj];B[ci]
+;W[ck];B[bi];W[di];B[ej];W[jo];B[in];W[dh];B[cg];W[ko];B[lp];W[gl];B[fl]
+;W[fm];B[hl];W[gn];B[gk];W[hn];B[im];W[og];B[qi];W[ri];B[ng];W[nf];B[pg]
+;W[he];B[hc];W[je];B[ie];W[oh];B[ph];W[oe];B[pf];W[qh];B[nh];W[cf];B[dg]
+;W[ee];B[ed];W[id];B[if];W[hd];B[ic];W[jc];B[kd];W[hf];B[gc];W[ke];B[ig]
+;W[ld];B[kc];W[lc];B[jb];W[kb];B[jc];W[me];B[oi];W[kg];B[lf];W[le];B[mb]
+;W[od];B[lb];W[pe];B[jh];W[kh];B[ki];W[cd];B[mf];W[ne];B[cc];W[de];B[bc]
+;W[eg];B[eh];W[ei];B[fh];W[fi];B[fg];W[gi];B[ef];W[ji];B[hg];W[bp];B[cp]
+;W[bs];B[cs];W[ar];B[as];W[lr];B[kq];W[oq];B[op];W[qr];B[qq];W[pr];B[aq]
+;W[ln];B[li];W[rn];B[sm];W[nn];B[mn];W[mm];B[mo];W[jl];B[nm];W[nl];B[om]
+;W[ii];B[km];W[bg];B[ch];W[fe];B[be];W[bf];B[ce];W[ae];B[bd];W[il];B[hm]
+;W[df];B[gg];W[bj];B[ge];W[ff];B[eg];W[gf];B[gd];W[jm];B[jn];W[kn];B[hj]
+;W[ai];B[ah];W[aj];B[bh];W[kl];B[hi];W[kj];B[ml];W[ll];B[lm];W[mm];B[ih]
+;W[lj];B[jj];W[mk];B[rr];W[nr];B[kr];W[os];B[qg];W[rg];B[qi];W[rh];B[ir]
+;W[mi];B[ij];W[lh];B[hr];W[gq];B[mg];W[lq];B[kp];W[rs];B[ji];W[hs];B[js]
+;W[sl];B[go];W[fo];B[es];W[ao];B[ap];W[an];B[mh];W[kf];B[ni];W[mj];B[on]
+;W[ik];B[hk];W[fn];B[ho];W[ek];B[fj];W[fr];B[mq];W[mr];B[nk];W[ml];B[jk]
+;W[kk];B[na];W[pa];B[ls];W[ms];B[ks];W[sr];B[sq];W[qs];B[pi];W[ss]
+)
diff --git a/TEST_FILES/test_bdist.sgf b/TEST_FILES/test_bdist.sgf
new file mode 100644
index 0000000..2204d8a
--- /dev/null
+++ b/TEST_FILES/test_bdist.sgf
@@ -0,0 +1,6 @@
+(;GM[1]FF[4]AP[qGo:1.5.4]ST[1]
+SZ[19]HA[0]KM[5.5]PW[White]PB[Black]
+
+;B[aa];W[ss];B[bb];W[rr];B[cc];W[qq];B[dd];W[pp];B[ee];W[oo]
+;B[ff];W[nn];B[gg];W[mm];B[hh];W[ll];B[ii];W[kk]
+)
\ No newline at end of file
diff --git a/TEST_FILES/test_bdist2.sgf b/TEST_FILES/test_bdist2.sgf
new file mode 100644
index 0000000..ed0466b
--- /dev/null
+++ b/TEST_FILES/test_bdist2.sgf
@@ -0,0 +1 @@
+(;GM[1]FF[4]SZ[19]AP[Kombilo];B[aa];W[pp];B[bb];W[pj];B[cc];W[jq];B[dd];W[dp];B[ee];W[od])
diff --git a/TEST_FILES/test_capture.sgf b/TEST_FILES/test_capture.sgf
new file mode 100644
index 0000000..164b210
--- /dev/null
+++ b/TEST_FILES/test_capture.sgf
@@ -0,0 +1,7 @@
+(;GM[1]FF[4]AP[qGo:1.5.4]ST[1]
+SZ[19]HA[0]KM[5.5]PW[White]PB[Black]
+
+;B[aa];W[ba];B[ab];W[bb];B[ac];W[bc];B[sa];W[ad];B[ra];W[qa]
+;B[rb];W[qb];B[sb];W[qc];B[rc];W[rd];B[sc];W[sd];B[jb];W[ib]
+;B[ic];W[jc];B[kc];W[kb];B[jd];W[jj]
+)
\ No newline at end of file
diff --git a/TEST_FILES/test_cont.sgf b/TEST_FILES/test_cont.sgf
new file mode 100644
index 0000000..5c105fa
--- /dev/null
+++ b/TEST_FILES/test_cont.sgf
@@ -0,0 +1,4 @@
+(;GM[1]FF[4]AP[qGo:1.5.4]ST[1]
+SZ[19]HA[0]KM[5.5]PW[White]PB[Black]
+
+;B[aa];W[ba];B[da];W[ga];B[ka];W[pa];B[pg];W[pn];B[hn];W[il];B[fk];W[fn])
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..58f81ad
--- /dev/null
+++ b/config.py
@@ -0,0 +1,6 @@
+PACHI_DIR='../pachi/'
+DB_FILE='./GODB.db'
+QUESTIONARE_DIRECTORY = './QUESTIONARE'
+
+OUTPUT_DIR = './OUTPUT'
+
diff --git a/data_about_players.py b/data_about_players.py
new file mode 100755
index 0000000..fa767ac
--- /dev/null
+++ b/data_about_players.py
@@ -0,0 +1,289 @@
+#!/usr/bin/python
+
+import numpy
+import math
+
+import load_questionare
+
+from config import QUESTIONARE_DIRECTORY
+
+def questionare_average(questionare_list, silent=False, tex=False, cnt_limit=1):
+# Otake Hideo          & $4.3 \pm 0.5$ & $3.0 \pm 0.0$ & $4.6 \pm 1.2$ & $3.6 \pm 0.9$ \\
+    total={}
+    count={}	
+    for questionare in questionare_list:
+        for name in questionare.keys():
+            if name in count:
+                count[name] += 1
+            else:
+                count[name] = 1
+                total[name] = []
+            total[name].append(questionare[name])
+
+    l=[]
+    for name, counter in count.items():
+        l.append( (counter, name) )
+    l.sort()
+    l.reverse()
+    variance=[]
+    result={}
+    
+    for counter, name in l:
+        if counter >= cnt_limit:
+            means=[]
+            if not silent:
+                print "%d: %20s"%(counter, name),
+                    
+            a = numpy.array(total[name]).transpose()
+            for b in a:
+                means.append(b.mean())
+                if not silent:
+                    if not tex:
+                        print u"%2.3f \u00B1 %2.3f  "%(b.mean(), numpy.sqrt(b.var())),
+                    else:
+                        print u"& $%2.1f \pm %2.1f$"%(b.mean(), numpy.sqrt(b.var())),
+                    variance.append(numpy.sqrt(b.var()))
+            if not silent:
+                if not tex:
+                    print
+                else:
+                    print "\\\\"
+                variance.append(numpy.sqrt(b.var()))
+            result[name] = means
+            
+    if not silent:
+        if not tex:
+            print "Mean standard deviation is: %2.3f"%(numpy.array(variance).mean(),)
+    return result
+
+def questionare_average_raw(questionare_list):
+    import numpy
+    total={}
+    count={}	
+    for questionare in questionare_list:
+        for name in questionare.keys():
+            if name in count:
+                count[name] += 1
+            else:
+                count[name] = 1
+                total[name] = []
+            total[name].append(questionare[name])
+
+    l=[]
+    for name, counter in count.items():
+        l.append( (counter, name) )
+    l.sort()
+    l.reverse()
+    variance=[]
+    result={}
+    for counter, name in l:
+        if counter > 1:
+            means=[]
+            print "%s, %d,"%(name, counter),
+            a = numpy.array(total[name]).transpose()
+            for b in a:
+                means.append(b.mean())
+                print u"%2.3f,"%(b.mean()),
+            print
+            result[name] = means
+    return result
+
+class Data:
+    ### Explicit list of players
+    
+    ###
+    ###	Following code consist of expert based knowledge kindly supplied by 
+    ###      Alexander Dinerstein 3-pro, Motoki Noguchi 7-dan and Vit Brunner 4-dan)
+
+    ### The vector at each name corresponds with 
+    ### ( 
+    questionare_annotations =  ['territory', 'orthodox', 'aggressiveness', 'thickness']
+    ## the orthodox scale corresponds to novelty, the "name" of the scale remains
+    # the same for historical reasons --- the scales are defined the same though
+    
+    questionare_list = [ 
+    #questionare_vit_brun
+        {
+            "Chen Yaoye": (7, 5, 7, 6),
+            "Cho Chikun": (9, 7, 7, 9),
+            "Cho U": (4, 6, 7, 4),
+            "Gu Li": (5, 6, 9, 5),
+            "Ishida Yoshio": (6, 3, 5, 5),
+            "Luo Xihe": (8, 4, 7, 7),
+            "Ma Xiaochun": (5, 7, 7, 7),
+            "O Meien": (3, 9, 6, 5),
+            "Otake Hideo": (4, 3, 6, 5),
+            "Rui Naiwei": (5, 6, 8, 5),
+            "Sakata Eio": (6, 4, 8, 6),
+            "Takemiya Masaki": (1, 4, 7, 2),
+            #"Yi Ch'ang-ho 2004-": (7, 6, 4, 4),
+            #"Yi Ch'ang-ho 2005+": (7, 6, 6, 4),
+            "Yi Ch'ang-ho": (7, 6, 6, 4),
+            "Yi Se-tol": (6, 5, 9, 5),
+            "Yoda Norimoto": (4, 4, 7, 3)
+        }, 
+    # questionare_motoki_noguchi
+        {
+            "Cho Chikun": (8, 9, 8, 8 ),
+            "Cho U": (9, 7, 6, 8),
+            "Gu Li": (7, 8, 10, 4 ),
+            "Ishida Yoshio": (9, 6, 2, 6),
+            "Luo Xihe": (6, 8, 9, 7 ),
+            "Ma Xiaochun": (9, 6, 7, 8),
+            "O Meien": (1, 10, 10, 2 ),
+            "Otake Hideo": (4, 3, 5, 3),
+            "Rui Naiwei": (6, 6, 10, 2),
+            "Sakata Eio": (10, 5, 6, 10),
+            "Takemiya Masaki": (2,6, 6, 1),
+            #"Yi Ch'ang-ho 2004-": (8, 3, 2, 3),
+            # P: udelal jsem to z 2004-
+            "Yi Ch'ang-ho": (8, 3, 2, 3),
+            "Yi Se-tol": (5, 10, 10, 8 ),
+            "Yoda Norimoto": (8, 2, 2, 5),
+            "Fujisawa Hideyuki": (4, 8, 7, 4 ),
+            "Go Seigen": (8, 10, 9, 6),
+            "Hane Naoki": (8, 2, 4, 6 ),
+            "Honinbo Dosaku": (2, 10, 8, 5 ),
+            "Honinbo Shusaku": (8, 3, 2, 6),
+            "Honinbo Shuwa": (10, 8, 2, 10),
+            "Kato Masao": (2,3, 9, 4),
+            "Kobayashi Koichi": (8, 3, 3, 6),
+            "Miyazawa Goro": (1, 10, 10, 3),
+            "Takao Shinji": (4, 3, 7, 4 ),
+            "Yamashita Keigo": (2, 8, 10, 4 ),
+            "Yuki Satoshi": (2, 8, 10, 4)
+        }, 
+    #questionare_alex_dinner
+        {
+            "Chen Yaoye": (5, 3, 5, 5), 
+            "Cho Chikun": (10, 7, 5, 10), 
+            "Cho U": (9, 5, 3, 7), 
+            "Gu Li": (5, 7, 8, 3), 
+            "Ishida Yoshio": (9, 6, 3, 5), 
+            "Luo Xihe": (8, 10, 7, 4), 
+            "Ma Xiaochun": (10, 6, 3, 9), 
+            "O Meien": (4, 10, 9, 4), 
+            "Otake Hideo": (5, 3, 3, 3), 
+            "Rui Naiwei": (3, 5, 9, 3), 
+            "Sakata Eio": (7, 5, 8, 8), 
+            "Takemiya Masaki": (1, 9, 8, 1), 
+            #"Yi Ch'ang-ho 2004-": (6, 6, 2, 1), 
+            #"Yi Ch'ang-ho 2005+": (5, 4, 5, 3), 
+            # commented because duplicates 2005+
+            "Yi Ch'ang-ho": (5, 4, 5, 3),
+            "Yi Se-tol": (5, 5, 9, 7), 
+            "Yoda Norimoto": (7, 7, 4, 2), 
+            "Chen Zude": (3, 8, 6, 5), 
+            "Cho Tae-hyeon": (1, 4, 4, 2), 
+            "Fujisawa Hideyuki": (3, 10, 7, 4), 
+            "Go Seigen": (4, 8, 7, 4), 
+            "Hane Naoki": (7, 3, 4, 3), 
+            "Jie Li": (5, 3, 5, 4), 
+            "Kato Masao": (3, 6, 10, 4), 
+            "Kobayashi Koichi": (10, 2, 2, 5), 
+            "Miyazawa Goro": (2, 10, 9, 5), 
+            "Nie Weiping": (3, 7, 8, 4), 
+            "Shao Zhenzhong": (4, 5, 5, 4), 
+            "Suzuki Goro": (4, 7, 5, 5), 
+            "Takao Shinji": (6, 4, 4, 5), 
+            "Wu Songsheng": (2, 10, 7, 4), 
+            "Yamashita Keigo": (2, 10, 9, 2), 
+            "Yuki Satoshi": (4, 9, 8, 5), 
+            #"breakfast": (7, 7, 3, 4), 
+            #"rapyuta/daien": (4, 7, 6, 5), 
+            #"MilanMilan": (5, 5, 6, 4), 
+            #"roln111-": (6, 5, 7, 5), 
+            #"somerville": (4, 5, 5, 6), 
+            #"artem92-": (7, 4, 3, 2), 
+            #"TheCaptain": (3, 8, 7, 6)
+        }
+        ## + guys from the online questionare
+        ] + load_questionare.scan_d(QUESTIONARE_DIRECTORY)
+    
+    questionare_total = questionare_average(questionare_list, silent=True)
+
+def get_all_player_names(limit=1):
+    pc = {}
+    
+    for q in Data.questionare_list:
+        for p in q.keys():
+            pc[p] = pc.get(p, 0) + 1
+    
+    ps = set( p for p in pc.keys() if pc[p] >= limit )
+    
+    return ps
+
+def get_interesting_pros(style, top, bottom, without_dist=True):
+    style_vec = numpy.array(style)
+    
+    dist = [
+        ( math.sqrt( sum(numpy.power(style_vec - numpy.array(pro_style),  2))), 
+        pro_name) for pro_name, pro_style in Data.questionare_total.iteritems() 
+    ]
+    dist.sort()
+    if not without_dist:
+        return dist[:top], dist[-bottom:]
+    
+    def second((a, b)):
+        return b
+    
+    return map(second, dist[:top]), map(second, dist[-bottom:]) 
+
+if __name__ == '__main__':
+    def main(tex=False):
+        """this prints the averaged questionare data, along with number of interviewees who
+        answered for the particular pro"""
+        #print get_all_player_names(4) 
+        
+        questionare_total = questionare_average(Data.questionare_list, cnt_limit=2, silent=False, tex=tex)
+        
+        pa = get_all_player_names(2)
+        
+        vals = numpy.array([ va for pn, va in questionare_total.iteritems() if pn in pa ])
+        #print vals.shape
+        
+        key2vec = {}
+        for desc, num in zip(Data.questionare_annotations, range(4)):
+            sli = vals[:, num]
+            key2vec[desc] = sli
+            if not tex:
+                print u"%s\n  mean: %2.3f \u00B1 %2.3f"%(desc, sli.mean(),  sli.std())
+            else:
+                print u"%s & %2.3f \\pm %2.3f \\"%(desc, sli.mean(),  sli.std())
+            
+        from utils.utils import pearson_coef
+                
+        qa = Data.questionare_annotations
+        print 
+        print "PAIRWISE CORRELATIONS"
+        print '', 
+        print " | ".join("%15s"%an for an in (['']+qa))
+        for i in xrange(len(qa)):
+            print "%15s | " % qa[i], 
+            for j in xrange(len(Data.questionare_annotations)):
+                if i > j:
+                    print "%15s |" % ('' ), 
+                else:
+                    p = pearson_coef(key2vec[qa[i]], key2vec[qa[j]])
+                    print "%15s |" % ( "%.3f" % p ), 
+            print
+    
+    main()
+        
+    ##
+    ##
+
+    def test_style(style):
+        near, dist = get_interesting_pros(style,  3, 3) 
+        print "similar"
+        for p in near:
+            print p
+        print
+        print "distant"
+        for p in dist:
+            print p
+           
+    #test_style([1, 2, 3, 4])
+    
+        
+
diff --git a/game_to_vec.py b/game_to_vec.py
new file mode 100644
index 0000000..b77b581
--- /dev/null
+++ b/game_to_vec.py
@@ -0,0 +1,1000 @@
+import logging
+import subprocess
+from subprocess import PIPE
+
+import os
+import sys
+from os import remove
+from os.path import abspath, exists
+import math
+
+import itertools
+from itertools import chain
+import shutil
+import re
+import numpy
+
+import utils
+from utils import misc, godb_models, godb_session
+from utils.db_cache import declare_pure_function, cache_result
+from utils.misc import first_true_pred, partial_right, partial
+from utils.utils import head
+from utils.colors import * 
+from utils.godb_models import ProcessingError
+
+
+from result_file import ResultFile, get_output_resultfile, get_output_resultpair
+from config import OUTPUT_DIR
+import pachi
+from pachi import scan_raw_patterns, generate_spatial_dictionary
+
+"""
+Contains code for conversion of a game (or list of them) into a vector,
+using pachi.
+Moreover it allows to form .tab files to be used by the Orange datamining framework
+"""
+
+
+pat_file_regexp = '^\s*(\d+)\s*(.+)$'
+
+def _make_interval_annotations(l, varname):
+    """
+    >>> _make_interval_annotations([10,11,12,13], 'X')
+    ['X <= 10', 'X == 11', 'X == 12', 'X == 13', 'X > 13']
+    >>> _make_interval_annotations([22], 'X')
+    ['X <= 22', 'X > 22']
+    >>> _make_interval_annotations([-1, 20], 'X')
+    ['X <= -1', '-1 < X <= 20', 'X > 20']
+    
+    """
+    if not all( misc.is_int(x) for x in l):
+        raise ValueError("Interval boundaries must be a number.")
+    if not l:
+        return [ "any " +  varname ]
+    
+    prev = None
+    annots = []
+    for point in l + ['LAST']:
+        s = varname
+        # if the interval size is 1 specify the interval precisely
+        if point !=  'LAST' and prev == point -  1:
+            s = "%s == %d" % (s, point)
+        else:
+            # if not first, add left boundary
+            if prev != None:
+                # when we do not have right boundary as well
+                if point ==  "LAST":
+                    # nicer like this I guess
+                    s =  "%s > %d" % (s, prev)
+                else:
+                    s =  "%d < %s" % (prev, s)
+            # if not last, add right boundary
+            if point !=  'LAST':
+                s =  "%s <= %d" % (s, point)
+            
+        annots.append(s)
+        prev = point
+        
+    return annots
+
+## game -> BlackWhite( vector_black, vector_white )
+class BWBdistVectorGenerator:
+    def __init__(self,  by_line=[2,3,4], by_moves=[26,76]):
+        self.by_line = by_line
+        self.by_moves = by_moves
+        
+        if any( x%2 for x in by_moves ):
+            logging.warn("BWDistVectorGenerator called with odd number of moves"
+                         "specifying the hist size => this means that the players"
+                         "wont have the same number of moves in the buckets!!")
+            
+        # nice annotations 
+        line_annots = _make_interval_annotations(by_line, 'bdist')
+        move_annots = _make_interval_annotations(by_moves, 'move')
+        #line_annots = [ 'bdist <= %d'%line for line in by_line ] + [ 'bdist other']
+        # move_annots = [ 'move <= %d'%move for move in by_moves ] + [ 'move other']
+        
+        self.annotations = [ "(bdist histogram: %s, %s)"%(m,b) for m,b in itertools.product(move_annots, line_annots) ]
+        self.types = [ "continuous" ] * len(self.annotations)
+        
+        def leq_fac(val):
+            return lambda x : x <= val
+    
+        # predicates giving bucket coordinate
+        self.line_preds = [ leq_fac(line) for line in by_line ] + [ lambda line : True ]
+        self.move_preds = [ leq_fac(movenum) for movenum in by_moves ] + [ lambda movenum : True ]
+    
+    def __repr__(self):
+        return 'BWBdistVectorGenerator(by_line=%s, by_moves=%s)'%(repr(self.by_line),
+                                                                  repr(self.by_moves))
+        
+    def __call__(self, game):
+        """
+        For a game, creates histograms of moves distance from border.
+        The histograms' granularity is specified by @by_line and @by_moves parameters.
+    
+        The @by_moves makes different histogram for each game phase, e.g.:
+            by_moves=[]         makes one histogram for whole game
+            by_moves=[50]       makes two histograms, one for first 50 moves (including)
+                                                    second for the rest
+            by_moves=[26, 76]   makes three histograms,
+                                    first  26 moves (X <=26)        ~ opening
+                                    first  76 moves (26 < X <= 76)  ~ middle game
+                                    rest of the game (76 < X)       ~ end game
+            NOTE: of the by moves number should be even, so that we count the same
+                number of moves for every player.
+    
+        The @by_line specifies granularity of each histogram, that is
+            by_line = [3]       each hist has 2 buckets, one counts moves on first three
+                                lines, second for the rest
+    
+            by_line = [3, 4, 5] four buckets/histogram, X <= 3, X = 4, X = 5, X > 5
+        """
+        # scan game, ignore spatials
+        col_pat = pachi.scan_raw_patterns(game, patargs='xspat=0', skip_empty=False)
+    
+        buckets = {}
+        for color in PLAYER_COLORS:
+            # histogram
+            buckets[color] = numpy.zeros(len(self.line_preds) * len(self.move_preds))
+    
+        for movenum, (color, pat) in enumerate(col_pat):
+            try:
+                bdist = pat.first_payload('border')
+            except StopIteration:
+                continue
+    
+            # X and Y coordinates
+            line_bucket = first_true_pred(self.line_preds, bdist + 1) # line = bdist + 1
+            move_bucket = first_true_pred(self.move_preds, movenum + 1) # movenum is counted from zero
+    
+            # histogram[color][X][Y] += 1
+            xy = line_bucket + move_bucket * len(self.line_preds)
+            buckets[color][xy] += 1
+    
+            #print movenum, color, bdist, "  \t",
+            #print line_bucket, move_bucket,
+            #print color, xy
+    
+        return BlackWhite(buckets[PLAYER_COLOR_BLACK], buckets[PLAYER_COLOR_WHITE])
+
+## game -> BlackWhite( vector_black, vector_white )
+class BWLocalSeqVectorGenerator:
+    def __init__(self, local_threshold=5):
+        self.local_threshold = local_threshold
+        self.annotations = [ '(local seq < %d: sente)'%local_threshold,
+                             '(local seq < %d: gote)'%local_threshold,
+                             '(local seq < %d: sente - gote)'%local_threshold, ]
+        self.types = [ "continuous" ] * len(self.annotations)
+        
+    def __repr__(self):
+        return 'BWLocalSeqVectorGenerator(local_threshold=%s)'%(repr(self.local_threshold))
+        
+    def __call__(self, game):
+        """self.local_threshold gives threshold specifiing what is considered to be a local
+            sequence, moves closer (or equal) than self.local_threshold in gridcular matrix
+            to each other are considered local."""
+        # scan game, ignore spatials
+        col_pat = pachi.scan_raw_patterns(game, patargs='xspat=0', skip_empty=False)
+    
+        SENTE_COOR = 0
+        GOTE_COOR = 1
+        DIFF_COOR = 2
+    
+        count = {PLAYER_COLOR_BLACK : numpy.zeros(3),
+                 PLAYER_COLOR_WHITE : numpy.zeros(3)}
+    
+        last_local = False
+        seq_start = None
+        for movenum, (color, pat) in enumerate(col_pat):
+            if not pat.has_feature('cont'):
+                local = False
+            else:
+                local = pat.first_payload('cont') <= self.local_threshold
+    
+            # if the sequence just started
+            if local and not last_local:
+                # this color had to reply locally, so it was the other guy that
+                # started the sequence
+                seq_start = the_other_color(color)
+    
+            # if the sequence just ended
+            if not local and last_local:
+                # the player who started the sequence gets to continue elsewhere
+                if color == seq_start:
+                    count[seq_start][ SENTE_COOR ] += 1
+                # if he does not <=> he lost tempo with the sequence
+                else:
+                    count[seq_start][ GOTE_COOR ] += 1
+                    
+            last_local = local
+            
+        for color in PLAYER_COLORS:
+            cnt = count[color]
+            cnt[DIFF_COOR] = cnt[SENTE_COOR] - cnt[GOTE_COOR]
+    
+        return BlackWhite(count[PLAYER_COLOR_BLACK], count[PLAYER_COLOR_WHITE])
+    
+## game -> BlackWhite( vector_black, vector_white )
+class BWCaptureVectorGenerator:
+    def __init__(self, by_moves=[26,76], offset=6, payload_size=4):
+        """The params @offset and @payload size have to be the constants from pachi/pattern.h,
+        corresponding to:
+            offset =  PF_CAPTURE_COUNTSTONES        
+            payload_size = CAPTURE_COUNTSTONES_PAYLOAD_SIZE
+        """
+        self.offset = offset
+        self.payload_size = payload_size
+        self.by_moves = by_moves
+        
+        if any( x%2 for x in by_moves ):
+            logging.warn("BWCaptureVectorGenerator called with odd number of moves"
+                         "specifying the hist size => this means that the players"
+                         "wont have the same number of moves in the buckets!!")
+            
+        # nice annotations 
+        capture_annots = [ 'captured', 'lost', 'difference' ]
+        move_annots = _make_interval_annotations(by_moves, 'move')
+        
+        self.annotations = [ "(capture histogram: %s, %s)"%(m,b) for m,b in itertools.product(move_annots, capture_annots) ]
+        self.types = [ "continuous" ] * len(self.annotations)
+        
+        def leq_fac(val):
+            return lambda x : x <= val
+    
+        # predicates giving bucket coordinate
+        self.move_preds = [ leq_fac(move) for move in by_moves ] + [ lambda movenum : True ]
+        
+    def __repr__(self):
+        args =  map(repr, [self.by_moves,  self.offset,  self.payload_size])
+        return 'BWCaptureVectorGenerator(by_moves=%s, offset=%s, payload_size=%s)'% tuple(args)
+        
+    def __call__(self, game):
+        # scan game, ignore spatials
+        col_pat = pachi.scan_raw_patterns(game, patargs='xspat=0', skip_empty=False)
+        
+        buckets = {}
+        for color in PLAYER_COLORS:
+            buckets[color] = numpy.zeros(len(self.move_preds))
+    
+        for movenum, (color, pat) in enumerate(col_pat):
+            if pat.has_feature('capture'):
+                captured = pat.first_payload('capture') >> self.offset
+                captured = (2 ** self.payload_size - 1 ) & captured
+                
+                move_bucket = first_true_pred(self.move_preds, movenum + 1) # counted from zero
+                buckets[color][move_bucket] +=  captured
+        
+        ret = {}
+        for color in PLAYER_COLORS:
+            ret[color] = numpy.zeros(3 * len(self.move_preds))
+        
+        for mp in xrange(len(self.move_preds)):
+            for color in PLAYER_COLORS:
+                # I captured
+                ret[color][3 * mp] =  buckets[color][mp]
+                # I lost
+                ret[color][3 * mp + 1] =  buckets[the_other_color(color)][mp]
+                # diff
+                ret[color][3 * mp + 2] =  ret[color][3 * mp] - ret[color][3 * mp + 1]
+                
+    
+        return BlackWhite(ret[PLAYER_COLOR_BLACK], ret[PLAYER_COLOR_WHITE])
+    
+## game -> BlackWhite( vector_black, vector_white )
+class BWWinStatVectorGenerator:
+    def __init__(self):
+        self.annotations = [
+                             '(wins by points)',
+                             '(wins by resign)',
+                             '(wp - wr)', 
+                             '(lost by points)', 
+                             '(lost by resign)', 
+                             '(lp - lr)'
+                             ]
+        self.types = [ "continuous" ] * len(self.annotations)
+        
+    def __repr__(self):
+        return 'BWWinStatVectorGenerator2()'
+        
+    def __call__(self, game):
+        """"""
+        result = str(game.sgf_header.get('RE', '0'))
+        
+        if result.lower() in ['0',  'jigo',  'draw']:
+            raise ProcessingError(repr(self) + " Jigo")
+        
+        match = re.match(r'^([BW])\+(.*)$', result)
+        if not match:
+            raise ProcessingError(repr(self) + ' Could not find result sgf tag.')
+        
+        player, val = match.group(1),  match.group(2)
+        if ( val.lower().startswith('f') or  # forfeit
+             val.lower().startswith('t') ):  # time
+            raise ProcessingError(repr(self) + ' Forfeit, time.')
+        
+        loses = [0, 0, 0]
+        # by resign
+        if val.lower().startswith('r'):
+            wins = [0, 1, -1]
+        else:
+            # by points
+            try:
+                points = float(val)
+            except ValueError:
+                raise ProcessingError(repr(self) + ' Points not float.')
+            wins = [1, 0, 1]
+        
+        if player == 'B':
+            black = numpy.array( wins + loses )
+            white = numpy.array( loses + wins )
+        else:
+            white = numpy.array( wins + loses )
+            black = numpy.array( loses + wins )
+        
+        return BlackWhite(black, white)
+
+    
+            
+## game -> BlackWhite( vector_black, vector_white )
+class BWWinPointsStatVectorGenerator:
+    def __init__(self):
+        self.annotations = [
+                             '(wins #points)',
+                             '(loses #points)',
+                             ]
+        self.types = [ "continuous" ] * len(self.annotations)
+        
+    def __repr__(self):
+        return 'BWWinPointsStatVectorGenerator2()'
+        
+    def __call__(self, game):
+        """"""
+        result = str(game.sgf_header.get('RE', '0'))
+        
+        if result.lower() in ['0',  'jigo',  'draw']:
+            raise ProcessingError(repr(self) + " Jigo")
+        
+        match = re.match(r'^([BW])\+(.*)$', result)
+        if not match:
+            raise ProcessingError(repr(self) + ' Could not find result sgf tag.')
+        
+        player, val = match.group(1),  match.group(2)
+        if ( val.lower().startswith('f') or  # forfeit
+             val.lower().startswith('t') or  # time
+             val.lower().startswith('r')     # resign 
+             ): 
+            raise ProcessingError(repr(self) + ' Forfeit, time, resign.')
+        
+        try:
+            points = float(val)
+        except ValueError:
+            raise ProcessingError(repr(self) + ' Points not float.')
+        
+        # if black wins 
+        black = numpy.array( [points,  0] )
+        white = numpy.array( [0,  points] )
+        # if white wins 
+        if player == 'W':
+            black,  white =  white, black
+            
+        return BlackWhite(black, white)
+
+#                                 - for black - transform_rawpatfile - 
+#                                / 
+# game -> raw_patternscan_game --
+#                                \ 
+#                                 - for white  ----- || -----
+
+#@cache_result
+@declare_pure_function
+def raw_patternscan_game(game, spatial_dict, patargs=''):
+    assert spatial_dict.exists(warn=True)
+    ret = get_output_resultpair(suffix='.raw.pat') 
+
+    with open(ret.black.filename, mode='w') as fb:
+        with open(ret.white.filename, mode='w') as fw:
+            for color, pat in scan_raw_patterns(game, spatial_dict, patargs=patargs):
+                fd = fb if color == PLAYER_COLOR_BLACK else fw
+                # write output for the desired player
+                fd.write("%s\n"%pat)
+                #logging.debug(gtp + ":" + pat)
+
+    #logging.info("Generated Raw Patternfiles for game %s, %s"%(game, ret))
+    return ret
+
+#@cache_result
+@declare_pure_function
+def transform_rawpatfile(rawpat_file, ignore=set(), transform={}, ignore_empty=True):
+    """Transforms raw pattern file line by line, by ignoring certain features (and their payloads)
+    @ignore and transforming payloads with @transform. If @ignore_empty is specified,
+    empty patterns are ignored.
+
+    transform_rawpatfile(file, ignore=set('s', 'cont'), transform={'border':lambda x: x - 1})
+    (s:20)
+    (s:10 border:5 cont:10)
+    (s:20 cont:1)
+    (capture:18)
+
+    will produce
+    (border:4)
+    (capture:18)
+    """
+
+    ret = get_output_resultfile('.raw.pat')
+    with open(ret.filename, mode='w') as fout:
+        with open(rawpat_file.filename, mode='r') as fin:
+            for line in fin:
+                pat = pachi.Pattern(line).reduce(lambda feat, _: not feat in ignore)
+                fpairs = []
+                for f, p in pat:
+                    p = transform.get(f, lambda x:x)(p)
+                    fpairs.append((f, p))
+    
+                if ignore_empty and not fpairs:
+                    continue
+    
+                fout.write( "%s\n"%pachi.Pattern(fpairs=fpairs) )
+    return ret
+
+#@cache_result
+@declare_pure_function
+def summarize_rawpat_file(rawpat_file):
+    """Transforms raw pattern file into summarized one:
+    (s:20)
+    (s:10 border:5)
+    (s:20)
+    (s:40)
+    (s:20)
+    ========>
+      3 (s:20)
+      1 (s:10 border:5)
+      1 (s:40)
+    """
+    result_file = get_output_resultfile('.pat')
+
+    script="cat %s | sort | uniq -c | sort -rn > %s "%(rawpat_file.filename, result_file.filename)
+
+    p = subprocess.Popen(script, shell=True, stderr=PIPE)    
+    _, stderr = p.communicate()
+    if stderr:
+        logging.warn("subprocess summarize stderr:\n%s"%(stderr,))
+    if p.returncode:
+        raise RuntimeError("Child sumarize failed, exitcode %d."%(p.returncode,))
+
+    return result_file
+
+class SummarizeMerger(godb_models.Merger):
+    """Used to sum Summarized Pattern files:
+    patfile_1:
+      3 (s:20)
+      1 (s:10 border:5)
+      1 (s:40)
+
+    patfile_2:
+      3 (s:20)
+      2 (s:15)
+      1 (s:10 border:5)
+
+    m = SummarizeMerger()
+    m.add(patfile_1)
+    m.add(patfile_2)
+    patres = m.finish()
+
+    Now, patres is:
+      6 (s:20)
+      2 (s:15)
+      2 (s:10 border:5)
+      1 (s:40)
+    """
+    def __init__(self):
+        self.reset()
+        
+    def start(self, bw_gen):
+        self.reset()
+    
+    def reset(self):
+        self.cd = {}
+
+    def add(self, pat_file, color):
+        with open(pat_file.filename) as fin:
+            for line in fin:
+                match = re.match(pat_file_regexp, line)
+                if not match:
+                    raise IOError("Wrong file format: " + pat_file)
+                count, pattern = int(match.group(1)), match.group(2)
+                self.cd[pattern] = self.cd.get(pattern, 0) + count
+
+    def finish(self):
+        result_file = get_output_resultfile('.pat')
+        with open(result_file.filename, 'w') as fout:
+            firstlen = None
+            for pattern, count in sorted(self.cd.iteritems(), key=lambda kv : - kv[1]):
+                if firstlen == None:
+                    # get number of decimal places, so that the file is nicely formatted
+                    firstlen = 1 + int(math.log10(count))
+
+                # prefix the count with 2 spaces, see pat_file_regexp for format
+                s = "%" + str(2 + firstlen) + "d %s\n"
+                fout.write(s%(count, pattern))
+
+        self.reset()
+        return result_file
+    
+    
+class VectorSumMerger(godb_models.Merger):
+    def __init__(self):
+        self.reset()
+        
+    def start(self, bw_gen):
+        assert all( tp == 'continuous' for tp in bw_gen.types )
+        self.sofar = numpy.zeros(len(bw_gen.types))
+        
+    def reset(self):
+        self.sofar = None
+        
+    def add(self, vector, color=None):
+        if self.sofar == None:
+            self.sofar = numpy.zeros(vector.shape)
+        self.sofar += vector
+        
+    def finish(self):
+        if self.sofar == None:
+            self.sofar = numpy.zeros(0)
+        ret = self.sofar
+        self.reset()
+        return ret
+    
+class VectorArithmeticMeanMerger(godb_models.Merger):
+    def __init__(self):
+        self.reset()
+        
+    def start(self, bw_gen):
+        self.reset()
+        self.summ.start(bw_gen)
+        
+    def reset(self):
+        self.count = 0
+        self.summ = VectorSumMerger()
+        
+    def add(self, vector, color=None):
+        self.count += 1
+        self.summ.add(vector)
+        
+    def finish(self):
+        if not self.count:
+            ret = self.summ.finish()
+        else:
+            ret = self.summ.finish() / self.count
+            
+        self.reset()
+        return ret
+    
+# so that the fc has nice repr
+@declare_pure_function
+def identity(obj):
+    return obj
+
+@declare_pure_function
+def linear_rescale(vec, a=-1,  b=1):
+    """Linearly rescales elements in vector so that:
+        min(vec) gets mapped to a
+        max(vec) gets mapped to b
+        the intermediate values get remapped linearly between
+        """
+    assert a <= b
+    MIN, MAX = vec.min(), vec.max()
+    if MIN == MAX:
+        # return average value of the set
+        return (float(a + b) / 2) * numpy.ones(vec.shape)
+    return a + (vec - MIN) * ( float(b - a) / (MAX -  MIN) )
+
+@declare_pure_function
+def natural_rescale(vec):
+    return vec / numpy.sum(vec)
+
+@declare_pure_function
+def log_rescale(vec, a=-1,  b=1):
+    return linear_rescale(numpy.log(1  + vec), a, b)
+
+class VectorApply(godb_models.Merger):
+    def __init__(self, merger,
+                 add_fc=identity,
+                 finish_fc=identity ):
+        self.merger =  merger
+        self.finish_fc = finish_fc
+        self.add_fc = add_fc
+        
+    def start(self, bw_gen):
+        self.merger.start(bw_gen)
+        
+    def add(self, vector, color=None):
+        self.merger.add(self.add_fc(vector), color)
+        
+    def finish(self):
+        return self.finish_fc( self.merger.finish() )
+    
+    def __repr__(self):
+        return "VectorApply(%s, add_fc=%s, finish_fc=%s)" % (repr(self.merger),
+                                                             repr(self.add_fc),
+                                                             repr(self.finish_fc))
+    
+class PatternVectorMaker:
+    def __init__(self, all_pat, n):
+        self.all_pat = all_pat
+        self.n = n
+        
+        self.annotations = []
+        self.pat2order = {}
+        
+        with open(self.all_pat.filename, 'r') as fin:
+            # take first n patterns
+            for num, line in enumerate(fin):
+                if num >= self.n:
+                    break
+                match = re.match(pat_file_regexp, line)
+                if not match:
+                    raise IOError("Wrong file format: " + self.all_pat)
+                pattern = match.group(2)
+                self.pat2order[pattern] = num 
+                self.annotations.append(pattern)
+        
+        self.types = [ "continuous" ] * len(self.annotations)
+        
+        if len(self.pat2order) < self.n:
+            raise ValueError("Input file all_pat '%s' does not have enough lines."%(self.all_pat))
+        
+    def __repr__(self):
+        return "PatternVectorMaker(all_pat=%s, n=%d)"%(self.all_pat, self.n)
+    
+    def __call__(self, sum_patfile):
+        vector = numpy.zeros(self.n)
+        added = 0
+        with open(sum_patfile.filename, 'r') as fin:
+            for line in fin:
+                match = re.match(pat_file_regexp, line)
+                if not match:
+                    raise IOError("Wrong file format: " + str(sum_patfile))
+                
+                index = self.pat2order.get(match.group(2), None)
+                if index != None:
+                    vector[index] += int(match.group(1))
+                    added += 1
+                    
+                    # no need to walk through the whole files, the patterns (match.group(2))
+                    # are unique since the patfile is summarized
+                    if added >= self.n:
+                        break
+                
+        return vector
+    
+## game -> BlackWhite( vector_black, vector_white )
+class BWPatternVectorGenerator:
+    def __init__(self, bw_game_summarize, pattern_vector_maker):
+        self.pattern_vector_maker = pattern_vector_maker
+        self.bw_game_summarize = bw_game_summarize
+        
+        self.annotations = pattern_vector_maker.annotations
+        self.types = pattern_vector_maker.types
+    
+    def __repr__(self):
+        return "BWPatternVectorGenerator(bw_game_summarize=%s, pattern_vector_maker=%s)"%(
+                repr(self.bw_game_summarize), repr(self.pattern_vector_maker))
+        
+    def __call__(self, game):
+        bw = self.bw_game_summarize(game)
+        return bw.map_both(self.pattern_vector_maker)
+
+#@cache_result
+@declare_pure_function
+def process_game(game, init, pathway):
+    bw = init(game)
+    return bw.map_pathway(pathway)
+
+@cache_result
+@declare_pure_function
+def process_one_side_list(osl, merger, bw_processor):
+    return osl.for_one_side_list( merger, bw_processor)
+
+## Process One Side List
+class OSLVectorGenerator:
+    """
+    Maps one side lists to vectors, using different game vector generators (e.g. BWPatternVectorGenerator), e.g:
+   OSLVectorGenerator([(vg1, m1), (vg2, m2)]) 
+             
+game1       m1.add(vg1(game1))      m2.add(vg2(game1))
+game2       m1.add(vg1(game2))      m2.add(vg2(game2))
+.                   |                       |
+.                   |                       |
+.                   |                       |
+game666     m1.add(vg1(game666))    m2.add(vg2(game666))
+            m1.finish()             m2.finish()
+               = [1,2,3,4,5]           = [6,7,8,9,10]
+            vg1.annotations         vg2.annotations
+             = [f1, ..., f5]          =[f6, ..., f10]
+    ----------------------------------------------
+    result = [  1,2,3,4,5,6,7,8,9,10 ]
+    annotations = [ f1, ..., f10 ]
+    """
+    def __init__(self, gen_n_merge, annotate_featurewise=True):
+        self.gen_n_merge = gen_n_merge
+        self.annotate_featurewise = annotate_featurewise
+        self.functions = []
+        self.annotations = []
+        self.types = []
+        
+        for num, (game_vg, merger) in enumerate(gen_n_merge):
+            self.functions.append(
+                # this function maps one_side_list to a vector
+                # where vectors from a game in the osl are merged using the merger
+                partial_right(process_one_side_list, merger, game_vg ))
+            
+            anns = game_vg.annotations
+            if annotate_featurewise:
+                anns = [ 'f%d%s' % (num, an) for an in anns ]
+                
+            self.annotations.extend(anns)
+            self.types.extend(game_vg.types)
+            
+    def __repr__(self):
+        return "OSLVectorGenerator(gen_n_merge=%s, annotate_featurewise=%s)"%(repr(self.gen_n_merge),
+                                                                              repr(self.annotate_featurewise) )
+        
+    def __call__(self, osl):
+        # stack vectors from different generators together
+        return numpy.hstack( [ f(osl) for f in self.functions ] )
+
+def make_all_pat(osl, bw_summarize_pathway):
+    return process_one_side_list(osl, SummarizeMerger(), bw_summarize_pathway)
+
+@cache_result
+@declare_pure_function
+def osl_vector_gen_cached(osl_gen, osl):
+    """Just to emulate caching for osl_gen.__call__ method.
+    this is a bit ugly, since this should really be handled by the caching itself to allow for
+    decorating class methods."""
+    return osl_gen(osl)
+
+@declare_pure_function
+def minus(a,b):
+    return a-b
+
+@cache_result
+@declare_pure_function
+def make_tab_file(datamap, vg_osl, osl_name_as_meta=True, osl_size_as_meta=True, image_name_as_meta=True):
+    """As specified in  http://orange.biolab.si/doc/reference/Orange.data.formats/
+    If image_name_as_meta or osl_name_as_meta parameters are present, the names of the
+    respective objects are added as meta columns.
+    """
+    tab_file = get_output_resultfile('.tab')
+    
+    def tab_denoted(fout,  l):
+        """Writes tab-denoted elements of list @l to output stream @fout"""
+        strings = map(str, l)
+        for el in strings:
+            if '\t' in el:
+                raise RuntimeError("Elements of tab-denoted list must not contain tabs.")
+        fout.write('\t'.join(strings) + '\n')
+        
+    def get_meta(osl_m, osl_size_m, image_m):
+        return list( itertools.compress((osl_m, osl_size_m, image_m),
+                                        (osl_name_as_meta, osl_size_as_meta, image_name_as_meta)))
+    
+    with open(tab_file.filename,  'w') as fout:
+        # annotations - column names
+        tab_denoted(fout, chain( vg_osl.annotations,
+                                 datamap.image_annotations,
+                                 get_meta('OSL name', 'OSL size', 'Image name')))
+        
+        # column data types
+        tab_denoted(fout, chain( vg_osl.types,
+                                 datamap.image_types,
+                                 get_meta('string', 'continuous', 'string')))
+        
+        # column info type: empty (normal columns) / class (main class attribute) / multiclass / meta
+        tab_denoted(fout, chain( # attributes are no class
+                                 [''] * len(vg_osl.types), 
+                                 # for the first class attribute if present
+                                 [ 'class' ] * len(datamap.image_types[:1]), 
+                                 # for the following class attributes if present
+                                 [ 'meta' ] * len(datamap.image_types[1:]), 
+                                 #[ 'multiclass' ] * len(datamap.image_types[1:]), 
+                                 # meta information if requested
+                                 get_meta('meta', 'meta', 'meta')))
+        
+        # the data itself
+        for num, (osl, image) in enumerate(datamap):
+            logging.info('Tab file %d%% (%d / %d)'%(100* (num+1) / len(datamap),  num+1,  len(datamap)))
+            
+            tab_denoted(fout, chain( # the osl
+                                     osl_vector_gen_cached(vg_osl, osl), 
+                                     # the image
+                                     map(float, image.data), 
+                                     # the meta data 
+                                     get_meta(osl.name, float(len(osl)), image.name)))
+
+    return tab_file
+
+#
+##
+#
+##          Playground:
+#
+##
+#
+
+if __name__ == '__main__':
+    def main():
+        import logging
+        from logging import handlers
+        if False:
+            logger = logging.getLogger()
+            logger.setLevel(logging.INFO)
+            ch = handlers.WatchedFileHandler('LOG', mode='w')
+            logger.addHandler(ch)
+        
+        from utils.godb_models import Game, GameList, OneSideList, PLAYER_COLOR_BLACK, PLAYER_COLOR_WHITE
+        from utils.godb_session import godb_session_maker
+        from utils import db_cache
+        
+        def test1():
+            ## import'n'init
+            s = godb_session_maker(filename=':memory:')
+        
+            ## Prepare data
+        
+            gl = GameList("pokus")
+            s.godb_scan_dir_as_gamelist('./TEST_FILES/games', gl)
+            s.add(gl)
+        
+            # add all the games into the all.pat file
+            osl = OneSideList("all.pat")
+            osl.batch_add(gl.games, PLAYER_COLOR_BLACK)
+            osl.batch_add(gl.games, PLAYER_COLOR_WHITE)
+            s.add(osl)
+            s.commit()
+            
+            ## Prepare the pattern vector game processing pathway
+            ## game -> BlackWhite( vector_black, vector_white )
+        
+            spatial_dict = generate_spatial_dictionary(gl, spatmin=2)
+        
+            # the pathway: game -> bw rawpat files -> bw transformed rawpat files -> bw summarized pat files 
+            bw_game_summarize = partial_right(process_game,
+                                              partial_right(raw_patternscan_game, spatial_dict),
+                                              [ partial_right(transform_rawpatfile,
+                                                              #transform={ 'border':partial_right(minus, 1) },
+                                                              ignore=['border', 'cont']),
+                                                summarize_rawpat_file
+                                                ])
+            all_pat = make_all_pat(osl, bw_game_summarize)
+            
+            vg_pat = BWPatternVectorGenerator( bw_game_summarize,
+                                      PatternVectorMaker(all_pat, 100) )
+            vg_local = BWLocalSeqVectorGenerator()
+            vg_bdist = BWBdistVectorGenerator()
+            
+            ## Process One game
+            
+            
+            """
+            print vg_pat(game)
+            print vg_local(game)
+            print vg_bdist(game)
+            """
+            
+            ## Process One Side List
+            
+            gen_n_merge = [ (vg_pat, VectorApply(VectorSumMerger(), finish_fc=linear_rescale)), 
+                            (vg_local, VectorArithmeticMeanMerger()),
+                            (vg_bdist, VectorArithmeticMeanMerger())]
+           
+            vg_osl = OSLVectorGenerator(gen_n_merge)
+            
+            generate = partial( osl_vector_gen_cached, vg_osl)
+            
+            # not cached
+            #vec, annotations = vg_osl(osl), vg_osl.annotations
+            
+            # cached
+            
+            ## now the pathway is ready, we can process whatewer OSL we
+            # feel up to, osl in the following is just an example
+            vec, annotations = generate(osl), vg_osl.annotations
+            
+            for i in xrange(len(annotations)):
+                print vec[i], '\t\t', annotations[i]
+            
+        
+        def test_rescale():
+            import numpy
+            from pylab import figure,  scatter,  subplot, show
+            
+            vec =  numpy.random.random( size=10)    
+            print vec    
+            print linear_rescale(vec, a=-20,  b=20)    
+            
+            vec = numpy.array([ 452915.,  288357.,  271245.,  111039.,   84811.,   74074.,
+                 58663.,   62257.,   55296.,   46359.,   51022.,   41049.,
+                 31297.,   35259.,   34467.,   30918.,   29869.,   36875.,
+                 29592.,   28075.,   25823.,   27479.,   26343.,   26964.,
+                 24093.,   24724.,   23135.,   22266.,   21725.,   21769.,
+                 20130.,   21625.,   20200.,   20619.,   19741.,   19049.,
+                 17434.,   20167.,   19830.,   16458.,   16513.,   21720.,
+                 20933.,   20216.,   18414.,   17442.,   12046.,   16186.,
+                 16732.,   16142.,   15126.,   15332.,   15435.,   12925.,
+                 14072.,   16321.,   11391.,   14884.,   13147.,   15162.,
+                 14247.,   15578.,   11826.,   12009.,   11533.,   12349.,
+                 12219.,   12590.,   10581.,   14550.,   10699.,   12384.,
+                 11795.,   10769.,   12617.,   12576.,   12281.,   11311.,
+                 12479.,   11327.,   11398.,   11814.,   11050.,   10248.,
+                 10506.,   11541.,   12401.,    9580.,   11201.,   10704.,
+                  9766.,   10402.,    9422.,   12888.,    9473.,    9536.,
+                 10933.,   10844.,   11005.,    8112.,       0.])
+            
+            figure(1)
+            subplot(321)
+            scatter(range(len(vec)), vec,  marker='x',  c='r')
+            subplot(322)
+            scatter(range(len(vec)), linear_rescale(vec),  marker='x',  c='g')
+            subplot(323)
+            scatter(range(len(vec)), numpy.log(1 + vec),  marker='x',  c='b')
+            subplot(324)
+            scatter(range(len(vec)), log_rescale(vec),  marker='x', c='y')
+            subplot(325)
+            scatter(range(len(vec)), vec / sum(vec),  marker='x',  c='b')
+            show()
+            
+        def test_bdist_hist():
+            s = godb_session_maker(filename=':memory:')#, echo=True)
+            game = s.godb_sgf_to_game('./TEST_FILES/test_bdist2.sgf')
+            
+            bdg = BWBdistVectorGenerator(by_line=[2, 3, 4],  by_moves=[4, 6])
+            bw =  bdg(game)
+            assert len(bdg.annotations) ==  len(bw[0]) ==  len(bw[1])
+            
+            print "Interval \t\tBlack\tWhite"
+            print "-" *  40
+            for ann, b, w in zip( bdg.annotations, bw[0], bw[1] ):
+                print "%s\t\t"%(ann),  int(b), "\t", int(w)
+            
+        def test_win_stat():
+            s = godb_session_maker(filename=':memory:')#, echo=True)
+            #gl =  s.godb_add_dir_as_gamelist('./files/')
+            
+            game = s.godb_sgf_to_game('../data/go_teaching_ladder/reviews/5443-breakfast-m711-A2.sgf')
+            
+            bdg = BWWinStatVectorGenerator()
+            #bdg = BWWinPointsStatVectorGenerator()
+            bw =  bdg(game)
+                #continue
+            
+            assert len(bdg.annotations) ==  len(bw[0]) ==  len(bw[1])
+            
+            print "Interval \t\tBlack\tWhite"
+            print "-" *  40
+            for ann, b, w in zip( bdg.annotations, bw[0], bw[1] ):
+                print "%30s\t\t" % (ann),  b, "\t", w
+            
+        ##
+        ##
+        
+        
+        def header(text):
+            print "=" *  10, "\n"+text +"\n", "=" * 10
+            
+        header("PROCESSING PATHWAY TEST")
+        test1()
+        
+        return 
+    
+        header("RESCALE TEST")
+        test_rescale()
+        header("BDIST HIST TEST")
+        test_bdist_hist()
+        #test_capture_hist()
+        header("WINSTAT TEST")
+        test_win_stat()
+
+    main()    
+    
\ No newline at end of file
diff --git a/ipython_session.py b/ipython_session.py
new file mode 100644
index 0000000..cf11906
--- /dev/null
+++ b/ipython_session.py
@@ -0,0 +1,15 @@
+import logging
+from logging import handlers
+import sys
+
+import sqlalchemy
+
+import utils
+from utils.godb_models import *
+from utils.godb_session import godb_session_maker
+
+import pachi
+from config import DB_FILE
+
+if __name__ == '__main__':
+    s = godb_session_maker(filename=DB_FILE)
diff --git a/load_questionare.py b/load_questionare.py
new file mode 100644
index 0000000..e5abacb
--- /dev/null
+++ b/load_questionare.py
@@ -0,0 +1,44 @@
+import sys
+import os
+import json
+import logging
+
+def load_file(filename):
+    with open(filename) as fin:
+        dump = json.load(fin)
+    
+    d = {}
+        
+    for group_list in dump['group_lists']:
+        for player in group_list['list']:
+            if player['skip'] != 'yes':
+                style = player['style']
+                vec = [ style['te'], style['or'], style['ag'], style['th'] ]
+                try:
+                    vec = map(int, vec)
+                except:
+                    continue
+                
+                d[player['name']] = tuple(vec)
+                
+    return d
+
+def scan_d(directory):
+    ds = []
+    for dirname, dirnames, filenames in os.walk(directory):
+        # print path to all filenames.
+        for filename in filenames:
+            fn = os.path.join(dirname, filename)
+            try:
+                ds.append(load_file(fn))
+            except:
+                logging.warn("Scanning of questionare file '%s' failed, skipping."%fn)
+                continue
+                
+    return ds
+
+if __name__ == '__main__':
+    #fn = sys.argv[1]
+    #print load_file(fn)
+    
+    print scan_d('./QUESTIONARE')
\ No newline at end of file
diff --git a/pachi.py b/pachi.py
new file mode 100644
index 0000000..3d3f532
--- /dev/null
+++ b/pachi.py
@@ -0,0 +1,217 @@
+import logging
+import subprocess
+from subprocess import PIPE
+
+import os
+from os import remove
+from os.path import abspath
+
+import sys
+import shutil
+import re
+from collections import namedtuple
+
+from utils import utils, misc, db_cache
+from utils.db_cache import declare_pure_function, cache_result
+from utils.colors import PLAYER_COLOR_BLACK,  PLAYER_COLOR_WHITE
+from utils.godb_models import ProcessingError
+from result_file import ResultFile
+import result_file
+
+from config import PACHI_DIR
+
+PACHI_SPATIAL_DICT = os.path.join(PACHI_DIR, 'patterns.spat')
+
+class Pattern:
+    def __init__(self, pattern=None, fpairs=None):
+        if pattern != None:
+            match = re.match('^\((.*)\) *$', pattern)
+            if not match:
+                    raise RuntimeError("Pattern format wrong: '%s'"%pattern)
+            
+            # (capture:104 border:6 atari:0 atari:0 cont:1 s:2620)
+            pattern = match.group(1)
+        
+            self.fpairs = []
+            for featpair in pattern.split():
+                feat, payload = featpair.split(':')        
+                self.fpairs.append((feat, int(payload)))
+        elif fpairs != None:
+            self.fpairs = fpairs
+        else:
+            raise RuntimeError("Pattern unspecified...")
+    
+    def reduce(self, filterfc):
+        fpairs = [ (f, p) for f, p in self if filterfc(f, p) ]
+        return Pattern(fpairs=fpairs)
+    
+    def iter_feature_payloads(self, feature):
+        for f, p in self:
+            if f == feature:
+                yield p
+    
+    def first_payload(self, feature):
+        return self.iter_feature_payloads(feature).next()
+
+    def has_feature(self, feature):
+        for f, p in self:
+            if f == feature:
+                return True
+        return False
+        
+    def __iter__(self):
+        return iter(self.fpairs)
+            
+    def __str__(self):
+        return "(%s)"%( ' '.join( "%s:%s"%(feat, payload) for feat, payload in self ) )
+
+class IllegalMove(Exception):
+    pass
+
+@cache_result
+@declare_pure_function
+def generate_spatial_dictionary(game_list, spatmin=4, patargs='', check_size=329):
+    """
+    Generates pachi spatial dictionary from games in the @gamelist.
+    
+    @check_size specifies min spatial dict size, if the filesize is below, raise runtime err.
+    Set this to 0 to disable the check. (328 is the size of empty spatial dict header)
+    """
+    logging.info("Generating spatial dictionary from %s"%(repr(game_list)))
+    
+    # pachi does not handle larger number of handicap stones than 9
+    without_large_handi = filter( lambda g : int(g.sgf_header.get('HA',0)) <= 9, game_list.games )
+    l_old, l_new =  len(game_list.games), len(without_large_handi)
+    if l_old != l_new:
+        logging.warn("The spatial dictionary list contains %d games with # of handicap stones >= 10. Skipping those."%(
+                l_old - l_new,))
+        
+    games = '\n'.join([ abspath(game.sgf_file) for game in without_large_handi ])
+    
+    spatial_dict = result_file.get_output_resultfile('.spat')
+    assert not spatial_dict.exists()
+    
+    script="""
+    cd %s
+    SPATMIN='%s' SPATIAL_DICT_FILE='%s' PATARGS='%s' tools/pattern_spatial_gen.sh -"""%(
+        PACHI_DIR, spatmin, abspath(spatial_dict.filename), patargs)
+    
+    #with open("tmp_script", 'w') as tmp:
+    #   tmp.write(script)
+    
+    p = subprocess.Popen(script, shell=True, stdin=PIPE)    
+    o = p.communicate(input=games.encode('utf-8'))
+    #if stderr:
+    #    logging.warn("subprocess pattern_spatial_gen stderr:\n%s"%(stderr,))
+    if p.returncode:
+        raise RuntimeError("Child process `pachi/tools/pattern_spatial_gen` failed, exitcode %d."%(p.returncode,))
+    if check_size and os.stat(spatial_dict.filename).st_size <= check_size:
+        raise RuntimeError("Spatial dict is empty. Probably an uncaught error in subprocess.")
+    
+    logging.info("Returning spatial dictionary %s"%(repr(spatial_dict)))
+    return spatial_dict
+
+
+@cache_result
+@declare_pure_function
+def scan_raw_patterns(game, spatial_dict=None, patargs='', skip_empty=True):
+    """
+    For a @game, returns list of pairs (player_color, pattern) for each move.
+    The pachi should be compiled to output all the features.
+    """
+    if spatial_dict == None:
+        if 'xspat=0' not in patargs.split(','):
+            raise RuntimeError("Spatial dict not specified, though the spatial features are not turned off.")
+        spatial_str=""
+    else:
+        assert spatial_dict.exists(warn=True)
+        spatial_str="spatial_dict_filename=%s"%(abspath(spatial_dict.filename))
+        
+    ## TODO
+    ## pachi has to have some patterns.spat even if the xspat=0
+    ## otw segfault, thought it does not use it...
+        
+    gtpscript="""
+    cd %s
+    
+    ./tools/sgf2gtp.py --stdout '%s'
+    """%(PACHI_DIR, abspath(game.sgf_file) )
+    gtpstream = utils.check_output(gtpscript, shell=True)
+    
+    script = """
+    cd %s
+    ./pachi -d 0 -e patternscan '%s'
+    """%( PACHI_DIR, ','.join(misc.filter_null([spatial_str, patargs])) )
+    
+    p = subprocess.Popen(script, shell=True, stdout=PIPE, stdin=PIPE, stderr=PIPE)    
+    
+    pats, stderr = p.communicate(input=gtpstream)
+    if stderr:
+        logging.warn("subprocess pachi:\n\tSCRIPT:\n%s\n\tSTDERR\n%s"%(script, stderr))
+        
+    if p.returncode:
+        raise RuntimeError("Child process `pachi` failed, exitcode %d."%(p.returncode,))
+    
+    lg = filter( lambda x : x, gtpstream.split('\n'))
+    lp = pats.split('\n')
+    
+    # ? illegal move
+    wrong = filter( lambda x: re.search('^\? ',x), lp)
+    if wrong:
+        raise ProcessingError("Illegal move")
+        #raise IllegalMove() #"In game %s"%game)
+    
+    # filter only lines beginning with =
+    lp = filter( lambda x: re.search('^= ',x), lp)
+    # remove '= ' from beginning
+    lp = map( lambda x: re.sub('^= ', '', x), lp) 
+    
+    # the command list and the pattern list should be aligned
+    #  - each gtp command emits one line of patterns from pachi
+    assert len(lg) == len(lp)
+    gtp_pat = zip(lg, lp)
+    
+    # keep pairs that contain something else than space in pattern
+    #  - discards boardsize, handi, komi, ... that emit nothing ('= ')
+    gtp_pat = filter( lambda t: re.search('\S', t[1]), gtp_pat)
+    
+    # filter out other gtp commands than play
+    #  - discards e.g. 'fixed_handicap' command and the resulting positions
+    #    of handicap stones
+    gtp_pat = filter( lambda t: re.search('^play', t[0]), gtp_pat)
+    
+    # remove empty [()]
+    if skip_empty:
+        gtp_pat = filter( lambda (gtp, pat) : len(pat) != 4, gtp_pat)
+        
+    # remove brackets enclosing features
+    # [(s:99 atariescape:8)]
+    # =>
+    # (s:99 atariescape:8)
+    def remover((gtp, pat)):
+        assert pat[0] == '['
+        assert pat[-1] == ']'
+        return (gtp, pat[1:-1])
+    gtp_pat = map(remover, gtp_pat)
+        
+    return [ ( PLAYER_COLOR_WHITE if gtp[5] == 'W' else PLAYER_COLOR_BLACK,
+               Pattern(pat))
+             for gtp, pat in gtp_pat ]
+
+if __name__ == '__main__':
+    #import logging
+    #logger = logging.getLogger()
+    #logger.setLevel(logging.INFO)
+    #db_cache.init_cache(filename=':memory:')
+    
+    from utils.godb_models import Game, GameList, OneSideList, PLAYER_COLOR_BLACK, PLAYER_COLOR_WHITE
+    from utils.godb_session import godb_session_maker
+    
+    s = godb_session_maker(filename=':memory:')
+    
+    game = s.godb_sgf_to_game('./TEST_FILES/test_capture.sgf')
+        
+    pats =  scan_raw_patterns(game,  patargs='xspat=0')    
+    for c, p in pats:
+        print c, list(p)
+    
\ No newline at end of file
diff --git a/result_file.py b/result_file.py
new file mode 100644
index 0000000..609d687
--- /dev/null
+++ b/result_file.py
@@ -0,0 +1,56 @@
+from os.path import abspath, exists
+import os
+
+from config import OUTPUT_DIR
+
+from utils.colors import * 
+from utils import misc
+
+class ResultFile:
+    def __init__(self, filename, create_empty=False):
+        self.filename = filename
+        if create_empty:
+            assert not self.exists()
+            open(self.filename,'w').close()
+            
+    def exists(self, warn=False):
+        status = exists(self.filename)
+        if not status and warn:
+            logging.warn("File '%s' does not exist."%(self.filename,))
+        return status
+            
+    def __repr__(self):
+        return "ResultFile('%s')"%(self.filename,)
+
+def get_random_output_base(sub_len=3, levels=2):
+    h = misc.unique_hash()
+    assert len(h) > levels * sub_len
+    assert levels >= 1
+    l = [OUTPUT_DIR]
+    for x in xrange(levels):
+        l.append( h[ x * sub_len : (x+1) * sub_len ] )
+        
+    d = os.path.join(*l)
+    if not os.path.isdir(d):
+        os.makedirs(d)
+    return os.path.join(d, h)
+    
+def get_output_resultfile(suffix=''):
+    ret = ResultFile( get_random_output_base() + suffix)
+    if ret.exists():
+        raise RuntimeError("New output result file '%s' already exists, unique hash not really unique..."%(ret))
+    return ret
+
+def get_output_resultpair(suffix=''):
+    basename = get_random_output_base()
+    ret1 = ResultFile(basename + '_B' + suffix)
+    ret2 = ResultFile(basename + '_W' + suffix)
+    rettup = BlackWhite(ret1, ret2)
+    for ret in rettup:
+        if ret.exists():
+            raise RuntimeError("New output result file '%s' already exists, unique hash not really unique..."%(ret))
+    return rettup
+
+
+
+
diff --git a/utils/__init__.py b/utils/__init__.py
new file mode 100644
index 0000000..0fc2581
--- /dev/null
+++ b/utils/__init__.py
@@ -0,0 +1,9 @@
+import colors
+import db_cache
+import godb_models
+import godb_session
+import misc
+import rank
+import sgf_load
+import timer
+import utils
diff --git a/utils/colors.py b/utils/colors.py
new file mode 100644
index 0000000..4e4546e
--- /dev/null
+++ b/utils/colors.py
@@ -0,0 +1,21 @@
+from collections import namedtuple
+
+PLAYER_COLOR_WHITE = 'W'
+PLAYER_COLOR_BLACK = 'B'
+
+PLAYER_COLORS = ( PLAYER_COLOR_BLACK, PLAYER_COLOR_WHITE )
+
+class BlackWhite(namedtuple('BlackWhite', 'black white')):
+    def map_both(self, f):
+        return BlackWhite(*map( f, self ))
+    
+    def map_pathway(self, func_list):
+        bw = self
+        for f in func_list:
+            bw = bw.map_both(f)
+        return bw
+
+def the_other_color(color):
+    if color == PLAYER_COLOR_BLACK:
+        return PLAYER_COLOR_WHITE
+    return PLAYER_COLOR_BLACK
diff --git a/utils/db_cache.py b/utils/db_cache.py
new file mode 100644
index 0000000..1fdf6af
--- /dev/null
+++ b/utils/db_cache.py
@@ -0,0 +1,331 @@
+from sqlalchemy import Table, Column, Integer, ForeignKey, Text, Date, PickleType, Float
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+import time
+import logging
+import inspect
+import types
+import functools
+
+import utils
+
+logger = logging.getLogger(__name__)
+
+"""
+A simple caching scheme for pure functions, supporting pure functions as
+args as well.
+
+Changes of code do NOT make the cache invalid - so you should delete the
+cache database yourself if you change any pure functions.
+"""
+
+## hodnoty def kwargs to ovsem meni, jen kdyz se fce predava parametrem (funkci kterou taky cachujeme),
+## nikoliv kdyz je volana primo
+## pze kdyz se predava parametrem, tak ta vnejsi fce nevi jaky ma def param
+
+# By default (without running init_cache ) a dict (=> cache not persistent across runs & processes)
+cache_object = {}
+
+Base = declarative_base()
+class CacheLine(Base):
+    """
+    Maps key -> value, saving time of creation, which is used as a criterion for time expiration.
+    """
+    __tablename__ = 'cacheline'
+    id = Column(Integer, primary_key=True)
+    time = Column(Float)
+    key = Column(Text, index=True)
+    value = Column(PickleType)
+    
+    def __str__(self):
+        return "(%s, %s) -> %s" % (self.key, self.time, self.value)
+    
+    def __repr__(self):
+        return "CacheLine(%s)" % (str(self))
+        
+class DBCacheObject:
+    """ The cache uses the same interface as dict."""
+    def __init__(self, db_session, expire):
+        self.session = db_session
+        self.expire = expire
+    
+    def delete_expired(self):
+        expired_before = time.time() - self.expire
+        self.session.query(CacheLine).filter(CacheLine.time < expired_before).delete()
+        self.session.commit()
+    
+    def __getitem__(self, key):
+        # with correct key
+        q = self.session.query(CacheLine).filter(CacheLine.key == key)
+        
+        # if expiration rate set
+        if self.expire:
+            expired_before = time.time() - self.expire
+            # not expired
+            q = q.filter(CacheLine.time > expired_before)
+            
+        # order by time
+        by_time = q.order_by(CacheLine.time).all()
+        
+        # the last one
+        if len(by_time):
+            return by_time[-1].value
+        
+        raise KeyError
+    
+    def __setitem__(self, key, value):
+        l = CacheLine(time=time.time(), key=key, value=value)
+        self.session.add(l)
+        self.session.commit()
+        
+        
+def delete_expired():
+    global cache_object
+    if not isinstance(cache_object, DBCacheObject):
+        logging.warn("Cannot remove expired elemets from cache - not a DBCacheObject")
+        return
+
+    logging.info("Deleting expired cache rows...")
+    cache_object.delete_expired()
+    
+def _print_all():
+    global cache_object
+    
+    if isinstance(cache_object, DBCacheObject):
+        it = cache_object.session.query(CacheLine).all()
+    else:
+        it = cache_object.iteritems()
+    
+    print "CACHE:"
+    for a in it:
+        print "\t", a
+        
+        
+#
+# Pure function 
+#
+
+class PureFunction(object):
+    """PureFunction is a class that has nice function repr like
+    <pure_function __main__.f> instead of the default repr
+    <function f at 0x11e5320>.
+
+    By using it, the user declares, that calls to the same function with
+    same arguments will always (in time, accross different processes, ..)
+    have the same results and can be thus cached.
+    """
+    def __init__(self, f):
+        self.f = f
+        assert isinstance(f, types.FunctionType)
+        functools.update_wrapper(self, f)
+        
+    def getargspec(self):
+        return inspect.getargspec(self.f)
+    
+    def get_default_kwargs(self):
+        args, varargs, varkw, defaults = self.getargspec()
+        if defaults:
+            return dict(zip(args[-len(defaults):], defaults))
+    
+    def __call__(self, *args, **kwargs):
+        logger.debug("calling %s"%repr(self))
+        return self.f(*args, **kwargs)
+    
+    def __repr__(self):
+        return '<pure_function %s>'%(utils.repr_origin(self.f))
+        #return '<pure_function %s def_kwargs=%s>'%(utils.repr_origin(self.f), repr( self.get_default_kwargs()))
+
+# to be used as a deco
+declare_pure_function = PureFunction
+
+#
+#
+#
+    
+def init_cache(filename='CACHE.db', expires=0, sqlalchemy_echo=False):
+    """
+    Initialize cache, sets up the global cache_object.
+    
+    filename -- specifies the sqlite dbfile to store the results to
+    expires -- specifies expiration in seconds. If you set this to 0,
+               cached data are valid forever
+    echo -- whether to output sqlalchemy logs
+    """
+    if filename == None:
+        # By default, the cache object is a dict
+        if expires:
+            logger.warn('Dictionary cache object does not support time expiration of cached values!')
+    else:
+        engine = create_engine('sqlite:///%s'%filename, echo=sqlalchemy_echo)
+        Base.metadata.create_all(engine) 
+        Session = sessionmaker(bind=engine) 
+        session = Session()
+        
+        global cache_object
+        cache_object = DBCacheObject(session, expires)
+
+def close_cache():
+    global cache_object
+    cache_object.session.close()
+
+def make_key(f, f_args, f_kwargs):
+    if isinstance(f, PureFunction):
+        spect = f.getargspec()
+    elif isinstance(f, types.FunctionType):
+        spect = inspect.getargspec(f)
+    else:
+        raise TypeError("Unable to obtain arg specification for function : '%s'"%(repr(f)))
+    
+    args, varargs, varkw, defaults = spect
+    default_kwargs = {}
+    if defaults:
+        default_kwargs = dict(zip(args[-len(defaults):], defaults))
+    for (key, val) in f_kwargs.iteritems():
+        assert key in default_kwargs
+        
+    f_kwargs_joined = default_kwargs
+    f_kwargs_joined.update(f_kwargs)
+    
+    #rep = "%s(args=%s, kwargs=%s)"%(utils.function_nice_repr(f), repr(f_args), repr(f_kwargs_joined))
+    
+    rep = "%s(%s)"%(repr(f),
+                    ', '.join(map(repr, f_args)
+                              + [ '%s=%s'%(key, repr( val)) for key, val in f_kwargs_joined.iteritems() ]))
+    
+    ## XXX "normal temporary" objects
+    if 'at 0x' in rep:
+        logger.warn("Object(s) specified in '%s' do not have a proper repr."%(rep))
+            
+    return rep
+    
+#
+# The deco
+#
+
+def cache_result(fun):
+    """Compute the key, look if the result of a computation is in the
+    cache. If so, return it, otw run the function, cache the result and
+    return it."""
+    def g_factory(f):
+        def g(*args, **kwargs):
+            global cache_object
+            key = make_key(f, args, kwargs)
+            try:
+                cached = cache_object[key]
+                logger.info("Returning CACHED for: '%s'"%(key))
+                return cached
+            except KeyError:
+                ret = f(*args, **kwargs)
+                cache_object[key] = ret
+                logger.info("CACHING for: '%s'"%(key))
+                return ret
+        return g
+    
+    # if we got PureFunction, the returned function should also be pure
+    # please see the PureFunction.__doc__
+    if isinstance(fun, PureFunction):
+        g = g_factory(fun)
+        functools.update_wrapper(g, fun.f)
+        return PureFunction(g)
+    
+    return functools.wraps(fun)(g_factory(fun))
+    
+if __name__ == "__main__":
+    logging.basicConfig()
+    l = logging.getLogger(__name__)
+    l.setLevel(logging.INFO)
+    
+    init_cache(filename=':memory:', expires=0.1)
+    
+    @cache_result
+    @declare_pure_function
+    def add(a, b):
+        return a + b
+    
+    @cache_result
+    @declare_pure_function
+    def call_10(f):
+        return f(10)
+    
+    @cache_result
+    @declare_pure_function
+    def multmap(l):
+        return ( reduce( (lambda x, y: x*y) , l), time.time() )
+    
+    def test1():
+        multmap([1,2,3])
+        multmap([1,2,3])
+        print "sleep 0.1"
+        time.sleep(0.1)
+        multmap([1,2,3])
+        
+    def test2():
+        """Stateless (pure) class and a pure function as arguments"""
+        
+        class Adder:
+            """ The Adder must be `stateless` in a sense that results
+            of __call__ will always produce the same results for the
+            same args.  Moreover the Adder must have __repr__ which has
+            all the information to uniquely define the Adder instance -
+            once again, so that the statement about __call__ holds.
+            
+            The user is responsible for the statelessness!
+            (as with @declare_pure_function)
+            """
+            def __init__(self, offset):
+                self.offset = offset
+            def __call__(self, a, b=10):
+                return a + self.offset
+            def __repr__(self):
+                return "Adder(offset=%s)"%self.offset
+            
+        a = Adder(2)
+        
+        @cache_result
+        @declare_pure_function
+        def my_map(f, l):
+            return map(f, l)
+        
+        my_map(a, range(10))
+        my_map(a, range(10))
+        
+        @declare_pure_function
+        def multiplicator(x, mult=2):
+            return x * mult 
+        
+        my_map(multiplicator, range(10))
+        
+        from utils import partial, partial_right
+        
+        my_map(partial_right(multiplicator, 2), range(10))
+        my_map(partial(a, 2), range(10))
+        my_map(partial_right(multiplicator, 2), range(10))
+        my_map(partial(a, 2), range(10))
+        
+    def test3():
+        """Test warning for nonpure functions as arguments"""
+        @cache_result
+        def h(x):
+            return 2 * x
+        
+        h(10)
+        print
+        call_10(h)
+        
+    def test4():
+        """Test timeout"""
+        multmap([1,2,3])
+        multmap([1,2,3, 4])
+        multmap([1,2,3])
+        _print_all()
+        time.sleep(0.5)
+        multmap([1,2,3])
+        _print_all()
+        delete_expired()
+        _print_all()
+        
+        
+    test1()
+    #test2()
+        
\ No newline at end of file
diff --git a/utils/godb_models.py b/utils/godb_models.py
new file mode 100644
index 0000000..69eeee7
--- /dev/null
+++ b/utils/godb_models.py
@@ -0,0 +1,486 @@
+import os 
+from itertools import chain
+
+import logging
+import re
+
+from sqlalchemy import Table, Column, Integer, ForeignKey, Text, Date, Float, Enum, UniqueConstraint, PickleType
+from sqlalchemy.orm import relationship, backref
+from sqlalchemy.ext.declarative import declarative_base
+
+import utils
+from rank import Rank
+from colors import *
+
+"""
+This contains the sqlalchemy ORM models which also form basic of our datastructures, have a look at it.
+
+"""
+
+Base = declarative_base()
+
+class ProcessingError(Exception):
+    pass
+
+class SchizophrenicPlayerError(Exception):
+    """Used in context of problems with games between the same players.
+    E.g. Anonymous vs. Anonymous"""
+    pass
+
+
+##
+##  hack to workaround this bug: http://bugs.python.org/issue5876
+## > Oh ok, gotcha: repr() always returns a str string. If obj.__repr__() returns a 
+## > Unicode string, the string is encoded to the default encoding. By default, the 
+## > default encoding is ASCII.
+## => unicode chars in repr cause "ordinal not in range err"
+import functools
+import misc
+def ununicode(f):
+    @functools.wraps(f)
+    def g(*args, **kwargs):
+        return misc.unicode2ascii(f(*args, **kwargs))
+    return g 
+
+class Player(Base):
+    """Class (and ORM Table) about go player.
+    The name must be unique (born name, ..). This class should have one instance (db record)
+    for one go player.
+
+    Player may change name, rank, .. in time, or use different nicknames, etc.
+    The consistency (so that all these variations are connected) is maintained
+    together with the PlayerInTime.
+    """
+    __tablename__ = 'player'
+    id = Column(Integer, primary_key=True, index=True)
+    name = Column(Text, nullable=False, unique=True, index=True)
+    note = Column(Text)
+    # list in_times from backrefs
+
+    #__table_args__ = ( UniqueConstraint('name'), )
+
+    def __init__(self, name, note=u''):
+        self.name = name
+        self.note = note
+
+    def iter_games_as(self, color, pit_filter=lambda pit:True):
+        return chain.from_iterable(pit.iter_games_as(color) 
+                                   for pit in self.in_times if pit_filter(pit) )
+
+    def iter_one_side_associations(self,
+                                   pit_filter=lambda pit:True,
+                                   **kwargs):
+        return chain.from_iterable( pit.iter_one_side_associations(**kwargs)
+                                    for pit in self.in_times if pit_filter(pit) )
+    # Shortcuts
+    def iter_games_as_white(self, **kwargs):
+        return self.iter_games_as(PLAYER_COLOR_WHITE, **kwargs)
+    def iter_games_as_black(self, **kwargs):
+        return self.iter_games_as(PLAYER_COLOR_BLACK, **kwargs)
+    def iter_games(self):
+        return chain(self.iter_games_as_black(), self.iter_games_as_white())
+
+    def __str__(self):
+        return self.name
+
+    @ununicode
+    def __repr__(self):
+        return u"Player(%s, '%s','%s')" % (self.id,
+                                          self.name,
+                                          self.note)
+    
+import pickle
+
+class PlayerInTime(Base):
+    """Captures evolution of players in time - change of rank, name, different identities."""
+    __tablename__ = 'player_in_time'
+    id = Column(Integer, primary_key=True, index=True)
+
+    player_id = Column(Integer, ForeignKey('player.id'), index=True)
+    player = relationship("Player", backref=backref('in_times', order_by=id))
+
+    name = Column(Text)
+    
+    rank = Column(PickleType) # (pickler=pickle))
+    note = Column(Text)
+    # list games_as_black from backrefs
+    # list games_as_white from backrefs
+
+    def __init__(self, player, name='', rank=None, note=''):
+        if isinstance(rank, basestring):
+            rank = Rank.from_string(rank)
+            
+        self.player = player
+        self.name = name
+        self.rank = rank
+        self.note = note
+
+    def get_games_as(self, color):
+        if color == PLAYER_COLOR_BLACK :
+            return self.games_as_black
+        if color == PLAYER_COLOR_WHITE :
+            return self.games_as_white
+        raise KeyError(color)
+
+    def iter_games_as(self, color):
+        return iter(self.get_games_as(color))
+
+    def iter_one_side_associations(self,
+                                   color_filter=lambda color:True,
+                                   game_filter=lambda game:True ):
+        return ( OneSideListAssociation(game, color)
+                 for color in PLAYER_COLORS if color_filter(color)
+                 for game in self.iter_games_as(color) if game_filter(game) )
+
+    def __str__(self):
+        return self.name + ( " (%s)"%(self.rank) if self.rank else '')
+    
+    def str2(self):
+        return self.name + ( " [%s]"%(self.rank) if self.rank else '')
+
+    @ununicode
+    def __repr__(self):
+        return u"PlayerInTime(%s, %s, '%s', '%s', '%s')" % (
+            self.id,
+            repr(self.player),
+            self.name,
+            self.rank,
+            self.note )
+
+class Game(Base):
+    """Class (and ORM Table) holding game information like
+        - sgf filename
+        - info about players - who played black, who played white
+        - sgf header with further info
+    """
+    __tablename__ = 'game'
+    id = Column(Integer, primary_key=True, index=True)    
+    sgf_file = Column(Text, nullable=False)    
+
+    black_id = Column(Integer, ForeignKey('player_in_time.id'), index=True)
+    white_id = Column(Integer, ForeignKey('player_in_time.id'), index=True)
+
+    black = relationship("PlayerInTime", primaryjoin="PlayerInTime.id==Game.black_id",
+                         backref=backref('games_as_black', order_by=id))
+    white = relationship("PlayerInTime", primaryjoin="PlayerInTime.id==Game.white_id",
+                         backref=backref('games_as_white', order_by=id))    
+
+    sgf_header = Column(PickleType)    
+
+    # We store the whole header instead of these
+
+    #date = Column(Text)    
+    #komi = Column(Float)    
+    #handicap = Column(Integer)
+    #size = Column(Integer)
+    #result = Column(Text)
+    #note = Column(Text)
+
+    def __init__(self, sgf_file, black, white, sgf_header={}):
+        self.sgf_file = sgf_file
+        self.black = black
+        self.white = white
+        self.sgf_header = sgf_header
+
+    @ununicode
+    def __repr__(self):
+        return u"Game(%s, '%s', '%s', '%s')" %(
+            self.id,
+            self.sgf_file,
+            repr(self.white) if self.white else '', 
+            repr(self.black) if self.black else '')
+
+    def abs_path(self):
+        return os.path.abspath(self.sgf_file)
+
+    def iter_pit_color(self):
+        yield (self.black, PLAYER_COLOR_BLACK)
+        yield (self.white, PLAYER_COLOR_WHITE)
+
+    def get_player_by_color(self, color):
+        for gpit, gcolor in self.iter_pit_color():
+            if color == gcolor:
+                return gpit.player
+        raise ValueError("Wrong color '%s'."%color)
+
+    def get_player_color(self, player):
+        if self.black.player == self.white.player :
+            # we cannot expect for this method to return different values for one player...
+            # (so this would always return black, because it has no way of knowing if we ask for
+            # black or white player)
+            raise SchizophrenicPlayerError("Asked for color for game between identical players: %s"%(self,))
+
+        for gpit, gcolor in self.iter_pit_color():
+            if player == gpit.player:
+                return gcolor
+
+        raise ValueError("Game %s is not a game of %s."%(repr(self), repr(player)))
+    
+    def get_year(self, try_filename_prefix=True):
+        # Year from DT field of sgf file
+        dt = self.sgf_header.get('DT', 'Unknown')
+        year = utils.get_year(dt)
+        
+        # try to guess name from filename prefix (e.g. gogod)
+        if year == None and try_filename_prefix:
+            fn = os.path.basename(self.sgf_file)[:4]
+            return utils.get_year(fn)
+        
+        # return year or None if failure
+        return year
+
+    def open_in_viewer(self):
+        utils.viewer_open(self.abs_path())
+
+game_list_association = Table('game_list_association', Base.metadata,
+                              Column('game_list_id', Integer, ForeignKey('game_list.id'), index=True),
+                              Column('game_id', Integer, ForeignKey('game.id'), index=True)
+)
+
+class GameList(Base):
+    """List of games.
+    """
+    __tablename__ = 'game_list'
+    id = Column(Integer, primary_key=True, index=True)
+    name = Column(Text, nullable=False, unique=True, index=True)
+
+    games = relationship('Game', secondary=game_list_association, backref='game_lists')
+
+    def __init__(self, name, games=None):
+        self.name = name
+        if games != None:
+            assert not self.games
+            self.games = list(games)
+
+    def iter_players_black(self):
+        """Iterate players who played in a game (from this list) as black."""
+        for game in self.games:
+            yield game.black.player 
+
+    def iter_players_white(self):
+        """Look at self.get_players_black and guess."""
+        for game in self.games:
+            yield game.white.player 
+
+    def iter_players(self):
+        """Iterate players who played a game from this list."""
+        return chain(self.iter_players_black(), self.iter_players_white())
+
+    def append(self, game):
+        self.games.append(game)
+
+    #def __str__(self):
+    #    ret = [ self.name ] + map(str, self.games)
+    #    
+    #    return '\n'.join(ret)
+    
+    def __getitem__(self, val):
+        return self.games[val]
+
+    def __len__(self):
+        return len(self.games)
+
+    @ununicode
+    def __repr__(self):
+        return "GameList(%s, '%s', #games = %d)" %( self.id, self.name, len(self) )
+    
+    
+class Merger:
+    def __init__(self):
+        pass
+    def __repr__(self):
+        return self.__class__.__name__ + "()"
+    def start(self, bw_gen):
+        raise NotImplementedError
+    def add(self, result, color):
+        raise NotImplementedError
+    def finish(self):
+        raise NotImplementedError
+    
+
+class OneSideListAssociation(Base):
+    __tablename__ = 'one_side_list_association'
+    id = Column(Integer, primary_key=True, index=True)
+    one_side_list_id = Column(Integer, ForeignKey('one_side_list.id'), index=True)
+    game_id = Column(Integer, ForeignKey('game.id'), index=True) 
+
+    # what is the color of the player of interest in this game?
+    color = Column(Enum(PLAYER_COLOR_BLACK, PLAYER_COLOR_WHITE))
+    game = relationship("Game", backref="one_side_lists_assoc")
+
+    # one game ( for one side ) can be in one game list only once
+    __table_args__ = ( UniqueConstraint('one_side_list_id', 'game_id', 'color'), )
+
+    def __init__(self, game, color):
+        self.game = game
+        self.color = color
+        
+    def __iter__(self):
+        yield self.game
+        yield self.color
+
+    @ununicode
+    def __repr__(self):
+        return u"OneSideListAssociation(%s, '%s')" %( repr(self.game), self.color )
+    
+class OneSideList(Base):
+    """List of games, for e.g. players with 10kyu, Honinbo Shusaku's games, ...
+
+    Note that the list distinguishes between sides. That is, if you are interested
+    in both sides (default behaviour of the `add` method), the game will be added
+    twice - once for black, once for white.
+    """
+    __tablename__ = 'one_side_list'
+    id = Column(Integer, primary_key=True, index=True)
+    name = Column(Text, nullable=False, unique=True, index=True)
+
+    list_assocs = relationship('OneSideListAssociation', backref='one_side_list')    
+
+    def __init__(self, name, assocs=None):
+        self.name = name
+        if assocs != None:
+            assert not self.list_assocs
+            self.list_assocs = list(assocs)
+            
+    def __getitem__(self, val):
+        return self.list_assocs[val]
+
+    def batch_add(self, games, color):
+        """Add games played with one color in batch."""
+        self.list_assocs += [ OneSideListAssociation(game, color) for game in games ]
+
+    def add(self, game, player=None, color=None):
+        """Adds game to the list. If @player (or @color) specified, adds only
+        one side of the game - the one that @player played (or played with @color).
+        Otw. both sides get added (game is added twice - once for black, once for white)
+        """
+        if player != None: 
+            pc = game.get_player_color(player)
+            if color and color != cp:
+                raise ValueError( """Provided color (%s) is different from provided player's (%s) color in the game %s."""%
+                                  ( color, player, game ))
+            color = pc
+
+        if color != None:
+            # if color of the desired player specified
+            citer = ( color, )
+        else:
+            # add both black's game and white's game
+            citer = PLAYER_COLORS
+
+        for color in citer:
+            self.list_assocs.append(OneSideListAssociation(game, color))
+
+    def for_one_side_list(self, merger, bw_processor):
+        """
+        Processes the whole OneSideList, so that @bw_processor is called on every game. And the 
+        result of interest (black or white) is added to @merger, via @merger.add(result, color).
+        At the end @merger.finish() is called and this should return the desired data.
+        """
+        #assert isinstance(merger, Merger)
+        
+        merger.start(bw_processor)
+        
+        for ga in self.list_assocs:
+            try:
+                black, white = bw_processor(ga.game)
+            except ProcessingError as exc:
+                logging.debug("Exception %s occured in processing the game %s, skipping!!"%(repr(exc), ga.game))
+                continue
+            except Exception as exc:
+                logging.exception("Exception %s occured in processing the game %s!!"%(repr(exc), ga.game))
+                raise
+                #continue
+
+            desired = black if ga.color == PLAYER_COLOR_BLACK else white
+            merger.add(desired, ga.color)
+
+        return merger.finish()
+    
+    def __len__(self):
+        return len(self.list_assocs)
+
+    def __str__(self):
+        ret = [ self.name ]
+        for ga in self.list_assocs:
+            ret.append("%s : %s"%(ga.color, ga.game))
+
+        return '\n'.join(ret)
+
+    @ununicode
+    def __repr__(self):
+        return "OneSideList(%s, '%s', #games = %d)"%( self.id, self.name, len(self) )
+
+class DataMap(Base):
+    """
+    One DataMap holds info about the mapping:
+    OneSideList -> ImageData
+    """
+    __tablename__ = 'datamap'
+    id = Column(Integer, primary_key=True, index=True)
+    name = Column(Text, nullable=False, unique=True, index=True)
+    
+    # information about the image domain
+    image_types = Column(PickleType)
+    image_annotations = Column(PickleType)
+
+    relations = relationship("DataMapRelation", backref='datamap')
+    
+    def add(self, one_side_list, image):
+        self.relations.append(DataMapRelation(one_side_list=one_side_list,
+                                              image=image))
+    def __len__(self):
+        return len(self.relations)
+    
+    def __getitem__(self, val):
+        return self.relations[val]
+    
+    @ununicode
+    def __repr__(self):
+        return "DataMap(%d, '%s', #relations = %d )"%(self.id,  self.name, len( self.relations))
+    
+class DataMapRelation(Base):
+    """
+    One OneSideList gets mapped to data (usually a python vector).
+    """
+    __tablename__ = 'datamap_relation'
+    id = Column(Integer, primary_key=True, index=True)
+    # id of the current dataset
+    datamap_id = Column(Integer, ForeignKey('datamap.id'), index=True)
+    # domain
+    one_side_list_id = Column(Integer, ForeignKey('one_side_list.id'), index=True)
+    # image
+    image_id = Column(Integer, ForeignKey('image_data.id'), index=True)
+
+    one_side_list = relationship("OneSideList")#, backref='relations')
+    image = relationship("ImageData")
+    
+    def __iter__(self):
+        yield self.one_side_list
+        yield self.image
+    
+    def __repr__(self):
+        return "DataMapRelation(%s,%s)" % (repr(self.one_side_list),
+                                           repr(self.image))
+    
+
+class ImageData(Base):
+    """ Class used to hold python-pickled data under unique name. Meant to be
+        used for holding right side of the mapping defined by DataMapRelation,
+        so that multiple OneSideLists may share the same image.
+    """
+    __tablename__ = 'image_data'
+    id = Column(Integer, primary_key=True, index=True)
+    # e.g. 'style: Otake Hideo'
+    name = Column(Text, nullable=False, unique=True, index=True)
+    # e.g. the style vector itself
+    data = Column(PickleType)
+
+    def __init__(self, name, data):
+        self.name = name
+        self.data = data
+
+    @ununicode
+    def __repr__(self):
+        return "ImageData(%s, %s, %s)"%(self.id, self.name, self.data)
+
diff --git a/utils/godb_session.py b/utils/godb_session.py
new file mode 100644
index 0000000..378718f
--- /dev/null
+++ b/utils/godb_session.py
@@ -0,0 +1,173 @@
+from sqlalchemy import create_engine, and_
+from sqlalchemy.orm import sessionmaker, aliased
+from sqlalchemy.orm.session import Session 
+
+import logging
+import os
+import re
+
+from godb_models import Player, PlayerInTime, Game, GameList, OneSideList, OneSideListAssociation, DataMap, DataMapRelation, ImageData, Base
+from rank import Rank
+import misc
+import timer
+
+from sgf_load import load_sgf_file_headers, ParseError
+
+class GodbSession(Session):
+    def godb_get_player(self, name, note=u''):
+        """Looks if the player with @name is in the DB and returns it.
+        Otw. creates a new player with these attributes.
+        This new player is NOT added into the session.        
+        """
+        pls = self.query(Player).filter(Player.name==name).all()
+        assert len(pls) <= 1
+        if len(pls) == 1:
+            player = pls[0]
+            if player.note != note:
+                logging.warn("%s has different note than '%s'"%(repr(player), note))
+            return player
+        if len(pls) == 0:
+            return Player(name, note)
+        
+    def godb_get_player_in_time(self, name, current_name=None, current_rank=None, current_note=''):
+        """
+        NOT adding anything into the session.        
+        """
+        player = self.godb_get_player(name)
+        if current_name == None:
+            current_name = name
+
+        pits = self.query(PlayerInTime).filter( PlayerInTime.player == player )
+
+        if current_name:
+            pits = pits.filter( PlayerInTime.name == current_name )
+        if current_rank:
+            pits = pits.filter( PlayerInTime.rank == current_rank )
+        if current_note:
+            pits = pits.filter( PlayerInTime.note == current_note )
+
+        pit_all = pits.all() 
+        if len(pit_all):
+            return pit_all[0]
+
+        return PlayerInTime(player, current_name, current_rank, current_note)
+
+    def godb_sgf_to_game(self, filename):
+        """
+        Creates a Game object from .sgf file.
+
+        Currently, only sgf files with a single gametree are supported.
+        
+        Does NOT add the game in the session but it 
+        DOES ADD players in the game in there.
+        """
+        try:
+            headers = load_sgf_file_headers(filename)
+        except ParseError:
+            logging.warn("Could not parse '%s', probably not a .sgf file, skipping."%(filename,))
+            return None
+            
+        if not headers:
+            logging.warn("No headers in .sgf file '%s', skipping."%(filename,))
+            return None
+
+        if len(headers) > 1:
+            logging.warn("More game trees in a file, skipping '%s'."%(filename,))
+            return None
+
+        hd = headers[0]
+
+        # load players' names and ranks
+        # we add them to the session to have consistency and correctly interjoined objects
+        # (e.g. when pw == pb (anonymous) then only the first call actually
+        # creates a new object. The second call uses the same object.
+        pw = self.godb_get_player_in_time(hd.get('PW', ''), current_rank=Rank.from_string(hd.get('WR','')))
+        self.add(pw)
+        pb = self.godb_get_player_in_time(hd.get('PB', ''), current_rank=Rank.from_string(hd.get('BR','')))
+        self.add(pb)
+
+        return Game( filename.decode('utf-8'), pb, pw, hd )
+    
+    def godb_add_dir_as_gamelist(self, *args, **kwargs):
+        logging.warn("deprecated call to godb_add_dir_as_gamelist")
+        return self.godb_scan_dir_as_gamelist(self, *args, **kwargs)
+
+    def godb_scan_dir_as_gamelist(self, directory, gamelist=None):
+        """Recursively scans the @directory for sgf files.
+        The valid games are added into a gamelist (either provided by @gamelist kwarg,
+        or new if @gamelist == None).
+        
+        Both players in each of the games scanned are added into the session.
+        (see self.godb_sgf_to_game)
+        
+        The gamelist is returned and NOT added into the session.
+        """
+        t = timer.Timer()
+        games = []
+        t.start()
+        for filepath in misc.iter_files(directory):
+            if re.search('sgf$', filepath):
+                logging.debug("Scanning '%s'"%(filepath))
+                
+                # create Game object from the sgf file
+                t.start()
+                game = self.godb_sgf_to_game(filepath)
+                if game:
+                    games.append(game)
+                t.stop()
+                
+        t.stop_n_log('  Total time', 'Game')
+
+        if gamelist == None:
+            gamelist = GameList("Games from '%s'."%(directory,))
+        
+        gamelist.games += games
+        logging.info("Added %d games to: %s"%(len(games), gamelist))
+            
+        return gamelist
+    
+    """
+    ## TODO make it faster!!!
+    def godb_list_player_games_white(self, pits):
+        #pits = self.query(PlayerInTime.id).filter(PlayerInTime.player_id == player.id).all()
+        #pits = player.in_times
+        return self.query(Game).filter(Game.white_id.in_(pits) ).all()
+    def godb_list_player_games_black(self, pits):
+        #pits = ( pit.id for pit in player.in_times )
+        #pits = self.query(PlayerInTime.id).filter(PlayerInTime.player_id == player.id).all()
+        return self.query(Game).filter(Game.black_id.in_(pits) ).all()
+        #return self.query(Game).\
+        #    join(PlayerInTime, Game.black_id==PlayerInTime.id).\
+        #    filter(PlayerInTime.player_id == player.id).all()
+    """
+
+_godb_session = sessionmaker(class_=GodbSession) 
+
+def godb_session_maker(filename, echo=False):
+    engine = create_engine('sqlite:///%s'%filename, echo=echo)
+    Base.metadata.create_all(engine) 
+
+    s = _godb_session(bind=engine)
+    # for wingide completion...
+    isinstance(s, GodbSession)
+    return s
+
+if __name__ == '__main__':
+    import logging
+    logging.getLogger().setLevel(logging.DEBUG)
+
+    s = godb_session_maker(filename=':memory:')#, echo=True)
+    
+    g = s.godb_sgf_to_game('../TEST_FILES/games/1990-00-00b.sgf')
+    print unicode(g)
+    
+    gl = s.godb_scan_dir_as_gamelist('../TEST_FILES/games')
+    
+    s.add(gl)
+    s.commit()
+    
+    print gl
+    for g in gl:
+        print "%20s vs. %s" % (g.white,  g.black)
+    
+    
\ No newline at end of file
diff --git a/utils/misc.py b/utils/misc.py
new file mode 100644
index 0000000..5efef0f
--- /dev/null
+++ b/utils/misc.py
@@ -0,0 +1,249 @@
+#coding: utf-8
+import unicodedata
+from itertools import chain, izip,  count
+import hashlib
+import random
+import logging
+import threading
+import os
+import time
+import datetime
+
+def identity(x):
+    return x
+
+#
+# Functional tools
+#
+
+def filter_null(iterable):
+    return [ x for x in iterable if x ]
+
+def filter_both(predicate, iterable):
+    yes, no = [], []
+    for i in iterable:
+        if predicate(i):
+            yes.append(i)
+        else:
+            no.append(i)
+    return yes, no
+
+def flatten(list_of_lists):
+    return chain.from_iterable(list_of_lists)
+
+def flatten_twice(list_of_lists_of_lists):
+    return flatten(flatten( list_of_lists_of_lists ))
+
+def argmax(pairs):
+    """Given an iterable of pairs (key, value), return the key corresponding to the greatest value."""
+    return max(pairs, key=lambda x:x[1])[0]
+
+def argmin(pairs):
+    return min(pairs, key=lambda x:x[1])[0]
+
+def argmax_index(values):
+    """Given an iterable of values, return the index of the greatest value."""
+    return argmax(izip(count(), values))
+
+def argmin_index(values):
+    return argmin(izip(count(), values))
+
+def bucket_by_key(iterable, key_fc):
+    """
+    Throws items in @iterable into buckets given by @key_fc function.
+    e.g.
+    >>> bucket_by_key([1,2,-3,4,5,6,-7,8,-9], lambda num: 'neg' if num < 0 else 'nonneg')
+    {'neg': [-3, -7, -9], 'nonneg': [1, 2, 4, 5, 6, 8]}
+    """
+    buckets = {}
+    for item in iterable:
+        buckets.setdefault(key_fc(item), []).append(item)
+    return buckets
+
+def first_true_pred(predicates, value):
+    """Given a list of predicates and a value, return the index of first predicate,
+    s.t. predicate(value) == True. If no such predicate found, raises IndexError."""
+    for num, pred in enumerate(predicates):
+        if pred(value):
+            return num
+    raise IndexError
+
+#
+# Partial 
+#
+
+class MyPartial:
+    """
+    An alternative implementation of functools partial, allowing to specify args
+    from the right as well.
+    """
+    def __init__(self, func, args=(), keywords={}, right=False):
+        self.func = func
+        self.args = args
+        self.keywords = keywords
+        self.right = right
+        
+    def _frepr(self):
+        return repr(self.func)
+        
+    def __repr__(self):
+        return "MyPartial(%s, %s, %s%s)"%(self._frepr(),
+                                          repr(self.args), repr(self.keywords),
+                                          ", right=True" if self.right else '')
+    
+    def _merge_args(self, args_new):
+        if self.right:
+            return args_new + tuple(self.args)
+        return tuple(self.args) + args_new
+    
+    def _merge_kwargs(self, kwargs_new):
+        kwargs = self.keywords.copy()
+        kwargs.update(kwargs_new)
+        return kwargs
+    
+    def __call__(self, *args_new, **kwargs_new):
+        args = self._merge_args(args_new)
+        kwargs = self._merge_kwargs(kwargs_new)
+        return self.func(*args, **kwargs)
+    
+def partial(f, *args, **kwargs):
+    """
+    def minus(a, b):
+        return a - b
+    
+    partial(minus, 10)  is like:
+    
+    lambda b : minus(10, b)
+    """
+    return MyPartial(f, args, kwargs)
+
+def partial_right(f, *args, **kwargs):
+    """
+    def minus(a, b):
+        return a - b
+    
+    partial_right(minus, 10)  is like:
+    
+    lambda a : minus(a, 10)
+    """
+    return MyPartial(f, args, kwargs, right=True)
+
+#
+# Type info and conversion
+#
+
+def is_conv(x, cls):
+    try:
+        cls(x)
+        return True
+    except:
+        return False
+
+def is_int(x):
+    return is_conv(x, int)
+
+def is_float(x):
+    return is_conv(x, float)
+
+def is_type(x, type):
+    try:
+        return x == type(x)
+    except:
+        return False
+
+def is_type_int(x):
+    return is_type(x, int)
+
+def is_type_float(x):
+    return is_type(x, float)
+
+#
+# Hash utils & random strings
+#
+
+def sha256(txt):
+    return hashlib.sha256(txt).hexdigest()
+def sha512(txt):
+    return hashlib.sha512(txt).hexdigest()
+
+def random_hash(LEN=10):
+    return str(random.randint(10**(LEN-1),10**LEN-1))
+
+def unique_hash(length=32):
+    """Returns "unique" hash. (the shorter the length, the less unique it is).
+    I consider one in 16**32 to be pretty unique. :-) (supposing that sha256 works).
+    
+    (Am I mistaken??)"""
+    return sha256( "%.20f %s %d %d"%(time.time(), random_hash(), os.getpid(), threading.current_thread().ident ) ) [:length]
+
+time_based_random_hash =  unique_hash
+
+def tmp_names(base=random_hash(), first_simple=False):
+    i = 0
+    if first_simple:
+        yield "%s"%(base)
+        i += 1
+    while True:
+        yield "%s_%d"%(base, i)
+        i+=1
+
+#
+# Text stuff
+#
+
+def encode_utf8(st):
+    return unicode(st).encode('utf-8')
+
+def remove_accents(istr):
+    nkfd_form = unicodedata.normalize('NFKD', unicode(istr))
+    return u"".join([c for c in nkfd_form if not unicodedata.combining(c)])
+
+def unicode2ascii(unistr):
+    return remove_accents(unistr).encode('ascii', 'ignore')
+
+def pad2len(s, l, padchar):
+    if len(s) < l:
+        return s + padchar * (l - len(s)) 
+    return s
+
+#
+#
+#
+
+def iter_files(directory):
+    for (dirpath, _, filenames) in os.walk(directory):
+        for name in filenames:
+                yield os.path.join(dirpath, name)
+
+def timeit(f, *args, **kwargs):
+    t0 = time.time()
+    ret = f(*args, **kwargs)
+    diff = time.time() - t0
+    print "TOOK: %.3f sec"%(diff,)
+    return ret
+
+if __name__ == '__main__':
+    def test_partial():
+        def fc ( a, b, c='def C', d='def D' ):
+            return "a=%d, b=%d, c=%s, d=%s"%(a,b,c,d)
+        
+        nor = partial(fc, 20, c=10)
+        zpr = partial_right(fc, 20, c=10)
+        
+        print "puvodni:", repr(fc)
+        print "normalni:", repr(nor) 
+        print "zprava:", repr(zpr) 
+        
+        print "normalni(10) = ", nor(10)
+        print "zprava(10) = ", zpr(10)
+            
+        nor2 = partial(nor, 10)    
+        print "double:", nor2
+        print "double()", nor2()
+        
+        class Fobj:
+            def __call__(self, a, b):
+                return "a=%s b=%s"%(a,b)
+        
+        ca = partial(Fobj(), 10)
+        print ca
diff --git a/utils/rank.py b/utils/rank.py
new file mode 100644
index 0000000..10b2050
--- /dev/null
+++ b/utils/rank.py
@@ -0,0 +1,90 @@
+from sqlalchemy import types
+from time import mktime
+from datetime import datetime
+import re
+
+import misc
+
+class RankInitExc(Exception):
+    pass
+
+class Rank:
+    KEYS={'k': lambda x: x,         # 1kyu -> 1, 30kyu -> 30
+          'd': lambda x: -x + 1,    # 1dan -> 0, 10dan -> -9
+          'p': lambda x: -x - 9}    # 1pro -> -10, 10pro -> -19
+    
+    DOMAIN_MAX = { 'k' : 30,
+                   'd' : 10,
+                   'p' : 10 }
+    
+    @staticmethod
+    def from_key(number):
+        ranks = list(Rank.iter_all())
+        dists = [ abs(number - r.key()) for r in ranks ]
+        return misc.argmin( zip(ranks, dists) )
+        
+    @staticmethod
+    def from_string(string):
+        mo = re.match('^([1-9][0-9]?) ?([kdp]).*', string)
+        if not mo:
+            return None
+        try:
+            return Rank(int(mo.group(1)), mo.group(2))
+        except (ValueError, RankInitExc):
+            return None
+    
+    @staticmethod
+    def iter_all():
+        for key, domain in Rank.DOMAIN_MAX.iteritems():
+            for x in xrange(domain):
+                yield Rank( x + 1, key )
+        
+    def __init__(self, number, kdp):
+        self.number, self.kdp = number, kdp
+            
+        if not self.kdp in self.KEYS:
+            raise RankInitExc("kdp must be either 'k' for kyu players,"
+                             " 'd' for dan players or 'p' for proffesionals")
+        
+        def check_domain(bottom, val, up):
+            assert bottom <= up
+            if not( bottom <= val <= up):
+                raise RankInitExc("Must be %d <= %d <= %d.")
+            
+        check_domain(1, self.number, self.DOMAIN_MAX[self.kdp])
+            
+    def as_tuple(self):
+        return self.number, self.kdp
+    
+    def key(self):
+        return self.KEYS[self.kdp](self.number)
+        
+    def __str__(self):
+        return "%d%s"%(self.number, self.kdp)
+    
+    def __repr__(self):
+        return "Rank(%s, key=%d)"%(self, self.key())
+        
+    def __hash__(self):
+        return self.key().__hash__()
+    
+    def __cmp__(self, other):
+        if not isinstance(other, Rank):
+            return -1
+        return ( - self.key()).__cmp__( - other.key())
+    
+
+if __name__ == "__main__":
+    
+    assert Rank(6, 'd') >  Rank(2, 'd') > Rank(1, 'k') > Rank(10, 'k')    
+        
+    def print_rank():
+        print "["
+        for rank in Rank.iter_all():
+            value =  rank.key()
+            text = str(rank)            
+            print '{"value" : "%s", "text" : "%s"},' % (value,  text)
+        print "]"
+        
+    print_rank()    
+    
\ No newline at end of file
diff --git a/utils/sgf_load.py b/utils/sgf_load.py
new file mode 100644
index 0000000..1ad1435
--- /dev/null
+++ b/utils/sgf_load.py
@@ -0,0 +1,70 @@
+import codecs
+import logging
+
+### XXX XXX
+import sys
+sys.path.append('../')
+
+from sgflib import SGFParser
+
+def my_err(exc):
+    wrong_part = exc.object[exc.start:exc.end+1]
+    try:
+        us = wrong_part.decode('utf-8')
+        return (us, exc.end+1)
+    except:
+        diff = exc.end-exc.start 
+        if diff > 4:
+            logging.warn("sgf_load.py : Long chain of chars (%d) badly encoded."%diff)
+        return (u'?'*(exc.end-exc.start), exc.end)
+
+codecs.register_error('my_err', my_err)
+
+class ParseError(Exception):
+    pass
+
+def load_sgf_file_headers(filename):
+    """Returns list of dictionaries.
+    Each dictionary contains all header fields of corresponding gametree."""
+    with open(filename, 'r') as f:
+        sgfdata = f.read()
+
+    try:
+        collection = SGFParser(sgfdata).parse()
+    except:
+        raise ParseError
+
+    ret = []
+    for gametree in collection:
+        ret.append(process_gametree(gametree))
+
+    return ret
+
+def list_attributes(node):
+    return node.data.keys()
+
+def get_attribute(node, atr):
+    try:
+        atr = node.data[atr].data[0]
+        ret = atr.decode(errors='my_err')
+        return ret
+    except KeyError:
+        return None
+
+def process_gametree(gametree):
+    # cursor for tree traversal
+    c = gametree.cursor()
+    # first node is the header
+    header = c.node
+
+    attributes = list_attributes(header)
+    d = {}
+    for key in attributes:
+        atr = get_attribute(header, key)
+        if atr:
+            d[key] = atr
+    return d
+
+if __name__ == '__main__':
+
+    print load_sgf_file_headers('../TEST_FILES/games/1930-01-00a.sgf')
\ No newline at end of file
diff --git a/utils/timer.py b/utils/timer.py
new file mode 100644
index 0000000..7fdf868
--- /dev/null
+++ b/utils/timer.py
@@ -0,0 +1,99 @@
+import time
+import math
+import logging
+
+class Timer:
+    """Class for measuring lenghts of nested time intervals.
+    Intervals are opened either like:
+    
+    >>> t = Timer()
+    >>> t.start()
+    >>> do_stuff()
+    >>> t.stop()
+    
+    Or using the with statement:
+    
+    >>> with t():
+           do_stuff()
+    """
+    def __init__(self):
+        self.times = []        
+        self.next_args = {}
+        
+    def start(self):
+        """Opens time interval."""
+        self.times.append((time.time(), [], self.next_args))
+        self.next_args = {}
+        
+    def stop(self):
+        """Closes time interval. Returns tuple (duration, avg_child_duration)."""
+        now = time.time()
+        my_start, children_durations, _ = self.times[-1]
+        del self.times[-1]
+        
+        my_duration = now - my_start
+        # add duration of this object to the parent list
+        if self.times:
+            self.times[-1][1].append(my_duration)
+            
+        return (my_duration, children_durations)
+        
+    def stop_n_log(self, comment='  time elapsed', child_name='child'):
+        total, children = self.stop()
+        msg = "%s: %.3f s"%(comment, total)
+        if children:
+            c_sum, c_len = sum(children), len(children)
+            mean = c_sum / c_len
+            sd = 0
+            if c_len > 1:
+                sd = math.sqrt(sum( (val - mean)**2 for val in children ) / (c_len - 1))
+            msg += """, time not spent in children (overhead) %.3f s = %.2f%%
+  #%d x %s took %.3f s:
+  mean: %.3f s (sd = %.3f s)"""%(
+                total - c_sum, 100.0 * (total - c_sum) / total,
+                c_len, child_name, c_sum, mean, sd)
+        logging.info(msg + '\n')
+        
+    def stop_arg(self):
+        _, _, kwargs = self.times[-1]
+        log = kwargs.pop('log', False)
+        if log:
+            return self.stop_n_log(**kwargs)
+        return self.stop()
+        
+    def __call__(self, **kwargs):
+        self.next_args = kwargs
+        return self
+    
+    def __enter__(self):
+        self.start()
+        
+    def __exit__(self, *args):
+        self.stop_arg()        
+        
+import random
+
+def test():
+    t = Timer()    
+        
+    t.start()
+    with t(log=True, comment='test loop 1', child_name='blabla'):
+        for a in xrange(1000):
+            with t():
+                time.sleep(random.random()/20000)
+                
+    # eq to
+    with t(log=True, comment='test loop 2', child_name='blabla2'):
+        for a in xrange(1000):
+            t.start()
+            time.sleep(random.random()/20000)
+            t.stop()
+            
+    t.stop_n_log(child_name='test loop')
+                
+
+if "__main__" == __name__:
+    logging.getLogger().setLevel(logging.INFO)
+    test()
+        
+    
\ No newline at end of file
diff --git a/utils/utils.py b/utils/utils.py
new file mode 100644
index 0000000..14dd32a
--- /dev/null
+++ b/utils/utils.py
@@ -0,0 +1,225 @@
+import re
+import subprocess
+import os
+import copy
+from os.path import abspath, exists
+import shutil
+import functools
+import inspect
+import itertools
+import numpy
+
+import misc
+from colors import BlackWhite
+import types
+
+VIEWER_LIST=['qgo', 'kombilo']
+
+def viewer_open(sgf_filename, executable=VIEWER_LIST[1]):
+    p = subprocess.Popen([executable, sgf_filename])
+    return
+
+def bark():
+    subprocess.call('bark', shell=True)
+
+def check_output(*args,  **kwargs):
+    if hasattr(subprocess, 'check_output'):
+        return subprocess.check_output(*args, **kwargs)
+    else:
+        if 'stdout' in kwargs:
+            raise ValueError('stdout argument not allowed, it will be overridden.')
+        process = subprocess.Popen(stdout=subprocess.PIPE, *args, **kwargs)
+        output, unused_err = process.communicate()
+        retcode = process.poll()
+        if retcode:
+            cmd = kwargs.get("args")
+            if cmd is None:
+                cmd = args[0]
+            raise subprocess.CalledProcessError(retcode, cmd, output=output)
+        return output
+
+def get_year(datestr,  match_century=True):
+    """Function trying to extract date from a string - usually a DT field of a sgf_file.
+    First, look for the first year string in the @datestr.
+    if not found and the @match_century is true, we look for strings like
+    "18th century", which results in year 1750 (the mean of years in 18th century)
+    
+    Returns None if no year found.
+    """
+    # 1982-10-10
+    # note the non-greedy .*? expansion => the first date string in the result gets matched
+    # that is get_year("1982 1999") = 1982
+    match = re.match( '.*?([0-9]{4}).*', datestr)
+    if match:
+            return int(match.group(1))
+        
+    if match_century:
+        # 17th century, 18th c.
+        match = re.match( '.*[^0-9]?([0-9]{2}th c).*', datestr)
+        if match:
+            century = int(match.group(1)[:2])
+            # returns "mean" year of the century:
+            # 17th century -> 1650
+            return century * 100 - 50
+    
+    return None
+
+def get_poly_s(coefs):
+    """Returns a string with polynomial equation; e.g.:
+    
+    >>> from utils import get_poly_s
+    >>> get_poly_s([0.5,0,4])
+    'y = 0.50x^2 + 4.00'
+    >>> get_poly_s([1,2,3,4])
+    'y = 1.00x^3 + 2.00x^2 + 3.00x + 4.00'
+    """
+    C = []
+    for pw, co in enumerate(reversed(coefs)):
+        if co:
+            s = "%.2f" % co
+            if pw:
+                s += 'x'
+                if pw > 1:
+                    s += '^%d' % pw
+            C.append(s)
+    return 'y = ' + ' + '.join(reversed(C))
+
+
+"""
+
+class ReprWrapper(object):
+    def __init__(self, repr_f, f):
+        self.repr_f = repr_f
+        self.f = f
+        functools.update_wrapper(self, f)
+    def __call__(self, *args, **kwargs):
+        return self.f(*args, **kwargs)
+    def __repr__(self):
+        return self.repr_f(self.f)
+"""
+
+def repr_origin(f):
+    if hasattr(f, 'im_class'):
+        prefix = f.im_class
+    else:
+        prefix = f.__module__
+    return "%s.%s"%(prefix, f.__name__)
+
+def head(f, n=10):
+    print f.filename
+    with open(f.filename, 'r') as fin:
+        for line in itertools.islice(fin, n):
+            print line
+
+def iter_splits(l, parts=None, max_size=None, min_size=None):
+    """Will yield consequent sublist of the @l list, trying to result
+    evenly sized sublists.  Exactly one of the parameters @parts or
+    @max_size or @min_size must be specified.
+
+    specifiing parts = N will yield N sublists of (almost) even size. The
+    list size difference is guaranted to be at most 1.
+
+    >>> list(iter_splits(range(5), parts=2))
+    [[0, 1, 2], [3, 4]]
+    >>> list(iter_splits(range(5), parts=4))
+    [[0, 1], [2], [3], [4]]
+    
+    
+    
+    specifiing max_size = N returns the smallest possible number of
+    consequent sublists so that whole list is divided and size of each
+    part is <= N
+    
+    >>> list(iter_splits(range(5), max_size=3))
+    [[0, 1, 2], [3, 4]]
+    >>> list(iter_splits(range(5), max_size=10))
+    [[0, 1, 2, 3, 4]]
+
+    Calling iter_splits(l, max_size=N) is just a shorthand for calling
+    iter_splits(l, parts=len(l) / N + bool(len(l)% N) )
+    
+    
+    
+    Similarly min_size = N returns the largest possible number of
+    consequent sublists so that whole list is divided and size of each
+    part is >= N
+    
+    Calling iter_splits(l, min_size=N) is just a shorthand for calling
+    iter_splits(l, parts=len(l) / N )
+    """
+    if bool(parts) + bool(max_size) + bool( min_size) != 1:
+        raise TypeError('Exactly one of parts, max_size or exact_size arguments must be specified (and nonzero)')
+    
+    if parts:
+        print parts
+        pn, rest = len(l) / parts, len(l) % parts
+        if pn == 0:
+            raise ValueError("Number of parts to split must not be larger than the number of elements.")
+        
+        def sizes(pn, rest):
+            for i in xrange(parts):
+                if rest:
+                    yield pn + 1
+                    rest -= 1
+                else:
+                    yield pn
+        
+        stop = 0
+        for size in sizes(pn, rest):
+            start, stop = stop, stop + size
+            yield l[start: stop]
+            
+    if max_size:
+        pn, rest = len(l) / max_size, len(l) % max_size
+        if rest:
+            pn += 1
+        for split in iter_splits(l, parts=pn):
+            yield split
+            
+    if min_size:
+        for split in iter_splits(l, parts=len(l)/min_size):
+            yield split
+        
+def iter_exact_splits(l, split_size):
+    tail = copy.copy(l)
+    
+    while tail:
+        head, tail = tail[:split_size], tail[split_size:]
+        # the last head could be shorter
+        if len(head) == split_size:
+            yield head
+            
+def pearson_coef(vec1, vec2):
+    assert vec1.shape == vec2.shape
+    def norm(vec):
+        return numpy.sqrt((vec*vec).sum())
+    def center(vec):
+        return vec - vec.mean()
+    vec1, vec2 = center(vec1), center(vec2)
+    return (vec1 * vec2).sum() / (norm(vec1) * norm(vec2)) 
+    
+            
+if __name__ == '__main__':
+    def test_split():
+        l = range(20)
+        
+        for kw in ['parts', 'max_size',  'min_size']:
+            for val in  range(10, 20):
+                print "iter_splits(%s, **{%s : %s}))" % (l,  kw,  val)
+                res = list(iter_splits(l, **{kw : val}))
+                print kw, "=", val
+                print "   len = ", len(res), ", max(size) = ", max(map(len, res)), ", min(size) = ", min(map(len, res))
+                print "   ", res
+                
+                assert list(itertools.chain.from_iterable(res)) == l
+                if kw == 'parts':
+                    assert len(res) == val
+                if kw == 'max_size':
+                    assert max(map(len, res)) <= val
+                if kw == 'min_size':
+                    assert min(map(len, res)) >= val
+                
+    #test_partial()
+    #test_split()    
+    
+    get_random_output_base(0, 1)
\ No newline at end of file
-- 
2.11.4.GIT