gostyle: the basic library, intitial commit.
[gostyle.git] / data_about_players.py
blobfa767ac82919737f3389dc79cc66039ef5ea4bc2
1 #!/usr/bin/python
3 import numpy
4 import math
6 import load_questionare
8 from config import QUESTIONARE_DIRECTORY
10 def questionare_average(questionare_list, silent=False, tex=False, cnt_limit=1):
11 # Otake Hideo & $4.3 \pm 0.5$ & $3.0 \pm 0.0$ & $4.6 \pm 1.2$ & $3.6 \pm 0.9$ \\
12 total={}
13 count={}
14 for questionare in questionare_list:
15 for name in questionare.keys():
16 if name in count:
17 count[name] += 1
18 else:
19 count[name] = 1
20 total[name] = []
21 total[name].append(questionare[name])
23 l=[]
24 for name, counter in count.items():
25 l.append( (counter, name) )
26 l.sort()
27 l.reverse()
28 variance=[]
29 result={}
31 for counter, name in l:
32 if counter >= cnt_limit:
33 means=[]
34 if not silent:
35 print "%d: %20s"%(counter, name),
37 a = numpy.array(total[name]).transpose()
38 for b in a:
39 means.append(b.mean())
40 if not silent:
41 if not tex:
42 print u"%2.3f \u00B1 %2.3f "%(b.mean(), numpy.sqrt(b.var())),
43 else:
44 print u"& $%2.1f \pm %2.1f$"%(b.mean(), numpy.sqrt(b.var())),
45 variance.append(numpy.sqrt(b.var()))
46 if not silent:
47 if not tex:
48 print
49 else:
50 print "\\\\"
51 variance.append(numpy.sqrt(b.var()))
52 result[name] = means
54 if not silent:
55 if not tex:
56 print "Mean standard deviation is: %2.3f"%(numpy.array(variance).mean(),)
57 return result
59 def questionare_average_raw(questionare_list):
60 import numpy
61 total={}
62 count={}
63 for questionare in questionare_list:
64 for name in questionare.keys():
65 if name in count:
66 count[name] += 1
67 else:
68 count[name] = 1
69 total[name] = []
70 total[name].append(questionare[name])
72 l=[]
73 for name, counter in count.items():
74 l.append( (counter, name) )
75 l.sort()
76 l.reverse()
77 variance=[]
78 result={}
79 for counter, name in l:
80 if counter > 1:
81 means=[]
82 print "%s, %d,"%(name, counter),
83 a = numpy.array(total[name]).transpose()
84 for b in a:
85 means.append(b.mean())
86 print u"%2.3f,"%(b.mean()),
87 print
88 result[name] = means
89 return result
91 class Data:
92 ### Explicit list of players
94 ###
95 ### Following code consist of expert based knowledge kindly supplied by
96 ### Alexander Dinerstein 3-pro, Motoki Noguchi 7-dan and Vit Brunner 4-dan)
98 ### The vector at each name corresponds with
99 ### (
100 questionare_annotations = ['territory', 'orthodox', 'aggressiveness', 'thickness']
101 ## the orthodox scale corresponds to novelty, the "name" of the scale remains
102 # the same for historical reasons --- the scales are defined the same though
104 questionare_list = [
105 #questionare_vit_brun
107 "Chen Yaoye": (7, 5, 7, 6),
108 "Cho Chikun": (9, 7, 7, 9),
109 "Cho U": (4, 6, 7, 4),
110 "Gu Li": (5, 6, 9, 5),
111 "Ishida Yoshio": (6, 3, 5, 5),
112 "Luo Xihe": (8, 4, 7, 7),
113 "Ma Xiaochun": (5, 7, 7, 7),
114 "O Meien": (3, 9, 6, 5),
115 "Otake Hideo": (4, 3, 6, 5),
116 "Rui Naiwei": (5, 6, 8, 5),
117 "Sakata Eio": (6, 4, 8, 6),
118 "Takemiya Masaki": (1, 4, 7, 2),
119 #"Yi Ch'ang-ho 2004-": (7, 6, 4, 4),
120 #"Yi Ch'ang-ho 2005+": (7, 6, 6, 4),
121 "Yi Ch'ang-ho": (7, 6, 6, 4),
122 "Yi Se-tol": (6, 5, 9, 5),
123 "Yoda Norimoto": (4, 4, 7, 3)
125 # questionare_motoki_noguchi
127 "Cho Chikun": (8, 9, 8, 8 ),
128 "Cho U": (9, 7, 6, 8),
129 "Gu Li": (7, 8, 10, 4 ),
130 "Ishida Yoshio": (9, 6, 2, 6),
131 "Luo Xihe": (6, 8, 9, 7 ),
132 "Ma Xiaochun": (9, 6, 7, 8),
133 "O Meien": (1, 10, 10, 2 ),
134 "Otake Hideo": (4, 3, 5, 3),
135 "Rui Naiwei": (6, 6, 10, 2),
136 "Sakata Eio": (10, 5, 6, 10),
137 "Takemiya Masaki": (2,6, 6, 1),
138 #"Yi Ch'ang-ho 2004-": (8, 3, 2, 3),
139 # P: udelal jsem to z 2004-
140 "Yi Ch'ang-ho": (8, 3, 2, 3),
141 "Yi Se-tol": (5, 10, 10, 8 ),
142 "Yoda Norimoto": (8, 2, 2, 5),
143 "Fujisawa Hideyuki": (4, 8, 7, 4 ),
144 "Go Seigen": (8, 10, 9, 6),
145 "Hane Naoki": (8, 2, 4, 6 ),
146 "Honinbo Dosaku": (2, 10, 8, 5 ),
147 "Honinbo Shusaku": (8, 3, 2, 6),
148 "Honinbo Shuwa": (10, 8, 2, 10),
149 "Kato Masao": (2,3, 9, 4),
150 "Kobayashi Koichi": (8, 3, 3, 6),
151 "Miyazawa Goro": (1, 10, 10, 3),
152 "Takao Shinji": (4, 3, 7, 4 ),
153 "Yamashita Keigo": (2, 8, 10, 4 ),
154 "Yuki Satoshi": (2, 8, 10, 4)
156 #questionare_alex_dinner
158 "Chen Yaoye": (5, 3, 5, 5),
159 "Cho Chikun": (10, 7, 5, 10),
160 "Cho U": (9, 5, 3, 7),
161 "Gu Li": (5, 7, 8, 3),
162 "Ishida Yoshio": (9, 6, 3, 5),
163 "Luo Xihe": (8, 10, 7, 4),
164 "Ma Xiaochun": (10, 6, 3, 9),
165 "O Meien": (4, 10, 9, 4),
166 "Otake Hideo": (5, 3, 3, 3),
167 "Rui Naiwei": (3, 5, 9, 3),
168 "Sakata Eio": (7, 5, 8, 8),
169 "Takemiya Masaki": (1, 9, 8, 1),
170 #"Yi Ch'ang-ho 2004-": (6, 6, 2, 1),
171 #"Yi Ch'ang-ho 2005+": (5, 4, 5, 3),
172 # commented because duplicates 2005+
173 "Yi Ch'ang-ho": (5, 4, 5, 3),
174 "Yi Se-tol": (5, 5, 9, 7),
175 "Yoda Norimoto": (7, 7, 4, 2),
176 "Chen Zude": (3, 8, 6, 5),
177 "Cho Tae-hyeon": (1, 4, 4, 2),
178 "Fujisawa Hideyuki": (3, 10, 7, 4),
179 "Go Seigen": (4, 8, 7, 4),
180 "Hane Naoki": (7, 3, 4, 3),
181 "Jie Li": (5, 3, 5, 4),
182 "Kato Masao": (3, 6, 10, 4),
183 "Kobayashi Koichi": (10, 2, 2, 5),
184 "Miyazawa Goro": (2, 10, 9, 5),
185 "Nie Weiping": (3, 7, 8, 4),
186 "Shao Zhenzhong": (4, 5, 5, 4),
187 "Suzuki Goro": (4, 7, 5, 5),
188 "Takao Shinji": (6, 4, 4, 5),
189 "Wu Songsheng": (2, 10, 7, 4),
190 "Yamashita Keigo": (2, 10, 9, 2),
191 "Yuki Satoshi": (4, 9, 8, 5),
192 #"breakfast": (7, 7, 3, 4),
193 #"rapyuta/daien": (4, 7, 6, 5),
194 #"MilanMilan": (5, 5, 6, 4),
195 #"roln111-": (6, 5, 7, 5),
196 #"somerville": (4, 5, 5, 6),
197 #"artem92-": (7, 4, 3, 2),
198 #"TheCaptain": (3, 8, 7, 6)
200 ## + guys from the online questionare
201 ] + load_questionare.scan_d(QUESTIONARE_DIRECTORY)
203 questionare_total = questionare_average(questionare_list, silent=True)
205 def get_all_player_names(limit=1):
206 pc = {}
208 for q in Data.questionare_list:
209 for p in q.keys():
210 pc[p] = pc.get(p, 0) + 1
212 ps = set( p for p in pc.keys() if pc[p] >= limit )
214 return ps
216 def get_interesting_pros(style, top, bottom, without_dist=True):
217 style_vec = numpy.array(style)
219 dist = [
220 ( math.sqrt( sum(numpy.power(style_vec - numpy.array(pro_style), 2))),
221 pro_name) for pro_name, pro_style in Data.questionare_total.iteritems()
223 dist.sort()
224 if not without_dist:
225 return dist[:top], dist[-bottom:]
227 def second((a, b)):
228 return b
230 return map(second, dist[:top]), map(second, dist[-bottom:])
232 if __name__ == '__main__':
233 def main(tex=False):
234 """this prints the averaged questionare data, along with number of interviewees who
235 answered for the particular pro"""
236 #print get_all_player_names(4)
238 questionare_total = questionare_average(Data.questionare_list, cnt_limit=2, silent=False, tex=tex)
240 pa = get_all_player_names(2)
242 vals = numpy.array([ va for pn, va in questionare_total.iteritems() if pn in pa ])
243 #print vals.shape
245 key2vec = {}
246 for desc, num in zip(Data.questionare_annotations, range(4)):
247 sli = vals[:, num]
248 key2vec[desc] = sli
249 if not tex:
250 print u"%s\n mean: %2.3f \u00B1 %2.3f"%(desc, sli.mean(), sli.std())
251 else:
252 print u"%s & %2.3f \\pm %2.3f \\"%(desc, sli.mean(), sli.std())
254 from utils.utils import pearson_coef
256 qa = Data.questionare_annotations
257 print
258 print "PAIRWISE CORRELATIONS"
259 print '',
260 print " | ".join("%15s"%an for an in (['']+qa))
261 for i in xrange(len(qa)):
262 print "%15s | " % qa[i],
263 for j in xrange(len(Data.questionare_annotations)):
264 if i > j:
265 print "%15s |" % ('' ),
266 else:
267 p = pearson_coef(key2vec[qa[i]], key2vec[qa[j]])
268 print "%15s |" % ( "%.3f" % p ),
269 print
271 main()
276 def test_style(style):
277 near, dist = get_interesting_pros(style, 3, 3)
278 print "similar"
279 for p in near:
280 print p
281 print
282 print "distant"
283 for p in dist:
284 print p
286 #test_style([1, 2, 3, 4])