some late comments :)
[gostyle.git] / data_about_players.py
blobf776d9db9bc5627b8f55858e03004a8bdd6e0881
1 #!/usr/bin/python
3 import numpy
4 import math
6 import load_questionare
8 from config import QUESTIONARE_DIRECTORY
10 """
11 This file covers extracting and averaging data from style questionares,
12 both from fixed predefined results that were covered manually (below,
13 see the Data class)
14 """
16 def questionare_average(questionare_list, silent=False, tex=False, cnt_limit=1):
17 """Averages data from different interviewees.
19 Parameters:
20 cnt_limit -- minimum number of interviewees that filled the quest. for the
21 given pro to be included
22 silent -- do not print anything
23 tex -- print the results in the tex table linese, suitable for export"""
24 # Otake Hideo & $4.3 \pm 0.5$ & $3.0 \pm 0.0$ & $4.6 \pm 1.2$ & $3.6 \pm 0.9$ \\
25 total={}
26 count={}
27 for questionare in questionare_list:
28 for name in questionare.keys():
29 if name in count:
30 count[name] += 1
31 else:
32 count[name] = 1
33 total[name] = []
34 total[name].append(questionare[name])
36 l=[]
37 for name, counter in count.items():
38 l.append( (counter, name) )
39 l.sort()
40 l.reverse()
41 variance=[]
42 result={}
44 for counter, name in l:
45 if counter >= cnt_limit:
46 means=[]
47 if not silent:
48 print "%d: %20s"%(counter, name),
50 a = numpy.array(total[name]).transpose()
51 for b in a:
52 means.append(b.mean())
53 if not silent:
54 if not tex:
55 print u"%2.3f \u00B1 %2.3f "%(b.mean(), numpy.sqrt(b.var())),
56 else:
57 print u"& $%2.1f \pm %2.1f$"%(b.mean(), numpy.sqrt(b.var())),
58 variance.append(numpy.sqrt(b.var()))
59 if not silent:
60 if not tex:
61 print
62 else:
63 print "\\\\"
64 variance.append(numpy.sqrt(b.var()))
65 result[name] = means
67 if not silent:
68 if not tex:
69 print "Mean standard deviation is: %2.3f"%(numpy.array(variance).mean(),)
70 return result
72 def questionare_average_raw(questionare_list):
73 """currently not used"""
74 import numpy
75 total={}
76 count={}
77 for questionare in questionare_list:
78 for name in questionare.keys():
79 if name in count:
80 count[name] += 1
81 else:
82 count[name] = 1
83 total[name] = []
84 total[name].append(questionare[name])
86 l=[]
87 for name, counter in count.items():
88 l.append( (counter, name) )
89 l.sort()
90 l.reverse()
91 variance=[]
92 result={}
93 for counter, name in l:
94 if counter > 1:
95 means=[]
96 print "%s, %d,"%(name, counter),
97 a = numpy.array(total[name]).transpose()
98 for b in a:
99 means.append(b.mean())
100 print u"%2.3f,"%(b.mean()),
101 print
102 result[name] = means
103 return result
105 class Data:
107 ### Following code consist of expert based knowledge kindly supplied by
108 ### Alexander Dinerstein 3-pro, Motoki Noguchi 7-dan and Vit Brunner 4-dan)
110 ### The vector at each name corresponds with
111 ### (
112 questionare_annotations = ['territory', 'orthodox', 'aggressiveness', 'thickness']
113 ## the orthodox scale corresponds to novelty, the "name" of the scale remains
114 # the same for historical reasons --- the scales are defined the same though
116 # a list of dictionaries (one dict per interviewee) that contain
117 # style estimates
119 questionare_list = [
120 #questionare_vit_brun
122 "Chen Yaoye": (7, 5, 7, 6),
123 "Cho Chikun": (9, 7, 7, 9),
124 "Cho U": (4, 6, 7, 4),
125 "Gu Li": (5, 6, 9, 5),
126 "Ishida Yoshio": (6, 3, 5, 5),
127 "Luo Xihe": (8, 4, 7, 7),
128 "Ma Xiaochun": (5, 7, 7, 7),
129 "O Meien": (3, 9, 6, 5),
130 "Otake Hideo": (4, 3, 6, 5),
131 "Rui Naiwei": (5, 6, 8, 5),
132 "Sakata Eio": (6, 4, 8, 6),
133 "Takemiya Masaki": (1, 4, 7, 2),
134 #"Yi Ch'ang-ho 2004-": (7, 6, 4, 4),
135 #"Yi Ch'ang-ho 2005+": (7, 6, 6, 4),
136 "Yi Ch'ang-ho": (7, 6, 6, 4),
137 "Yi Se-tol": (6, 5, 9, 5),
138 "Yoda Norimoto": (4, 4, 7, 3)
140 # questionare_motoki_noguchi
142 "Cho Chikun": (8, 9, 8, 8 ),
143 "Cho U": (9, 7, 6, 8),
144 "Gu Li": (7, 8, 10, 4 ),
145 "Ishida Yoshio": (9, 6, 2, 6),
146 "Luo Xihe": (6, 8, 9, 7 ),
147 "Ma Xiaochun": (9, 6, 7, 8),
148 "O Meien": (1, 10, 10, 2 ),
149 "Otake Hideo": (4, 3, 5, 3),
150 "Rui Naiwei": (6, 6, 10, 2),
151 "Sakata Eio": (10, 5, 6, 10),
152 "Takemiya Masaki": (2,6, 6, 1),
153 #"Yi Ch'ang-ho 2004-": (8, 3, 2, 3),
154 # P: udelal jsem to z 2004-
155 "Yi Ch'ang-ho": (8, 3, 2, 3),
156 "Yi Se-tol": (5, 10, 10, 8 ),
157 "Yoda Norimoto": (8, 2, 2, 5),
158 "Fujisawa Hideyuki": (4, 8, 7, 4 ),
159 "Go Seigen": (8, 10, 9, 6),
160 "Hane Naoki": (8, 2, 4, 6 ),
161 "Honinbo Dosaku": (2, 10, 8, 5 ),
162 "Honinbo Shusaku": (8, 3, 2, 6),
163 "Honinbo Shuwa": (10, 8, 2, 10),
164 "Kato Masao": (2,3, 9, 4),
165 "Kobayashi Koichi": (8, 3, 3, 6),
166 "Miyazawa Goro": (1, 10, 10, 3),
167 "Takao Shinji": (4, 3, 7, 4 ),
168 "Yamashita Keigo": (2, 8, 10, 4 ),
169 "Yuki Satoshi": (2, 8, 10, 4)
171 #questionare_alex_dinner
173 "Chen Yaoye": (5, 3, 5, 5),
174 "Cho Chikun": (10, 7, 5, 10),
175 "Cho U": (9, 5, 3, 7),
176 "Gu Li": (5, 7, 8, 3),
177 "Ishida Yoshio": (9, 6, 3, 5),
178 "Luo Xihe": (8, 10, 7, 4),
179 "Ma Xiaochun": (10, 6, 3, 9),
180 "O Meien": (4, 10, 9, 4),
181 "Otake Hideo": (5, 3, 3, 3),
182 "Rui Naiwei": (3, 5, 9, 3),
183 "Sakata Eio": (7, 5, 8, 8),
184 "Takemiya Masaki": (1, 9, 8, 1),
185 #"Yi Ch'ang-ho 2004-": (6, 6, 2, 1),
186 #"Yi Ch'ang-ho 2005+": (5, 4, 5, 3),
187 # commented because duplicates 2005+
188 "Yi Ch'ang-ho": (5, 4, 5, 3),
189 "Yi Se-tol": (5, 5, 9, 7),
190 "Yoda Norimoto": (7, 7, 4, 2),
191 "Chen Zude": (3, 8, 6, 5),
192 "Cho Tae-hyeon": (1, 4, 4, 2),
193 "Fujisawa Hideyuki": (3, 10, 7, 4),
194 "Go Seigen": (4, 8, 7, 4),
195 "Hane Naoki": (7, 3, 4, 3),
196 "Jie Li": (5, 3, 5, 4),
197 "Kato Masao": (3, 6, 10, 4),
198 "Kobayashi Koichi": (10, 2, 2, 5),
199 "Miyazawa Goro": (2, 10, 9, 5),
200 "Nie Weiping": (3, 7, 8, 4),
201 "Shao Zhenzhong": (4, 5, 5, 4),
202 "Suzuki Goro": (4, 7, 5, 5),
203 "Takao Shinji": (6, 4, 4, 5),
204 "Wu Songsheng": (2, 10, 7, 4),
205 "Yamashita Keigo": (2, 10, 9, 2),
206 "Yuki Satoshi": (4, 9, 8, 5),
207 #"breakfast": (7, 7, 3, 4),
208 #"rapyuta/daien": (4, 7, 6, 5),
209 #"MilanMilan": (5, 5, 6, 4),
210 #"roln111-": (6, 5, 7, 5),
211 #"somerville": (4, 5, 5, 6),
212 #"artem92-": (7, 4, 3, 2),
213 #"TheCaptain": (3, 8, 7, 6)
215 ## + guys from the online questionare
216 ] + load_questionare.scan_d(QUESTIONARE_DIRECTORY)
218 questionare_total = questionare_average(questionare_list, silent=True)
220 def get_all_player_names(limit=1):
221 """Utility function returning a list of all players that are answered by
222 at least @limit interviewees"""
223 pc = {}
225 for q in Data.questionare_list:
226 for p in q.keys():
227 pc[p] = pc.get(p, 0) + 1
229 ps = set( p for p in pc.keys() if pc[p] >= limit )
231 return ps
233 def get_interesting_pros(style, top, bottom, without_dist=True):
234 """Computes euclidean distance from a given @style and returns @top N closest
235 pros and @bottom N farthest pros. If without_dist=False (default is True),
236 the function returns the pros in a tuple with the distance itself."""
237 style_vec = numpy.array(style)
239 dist = [
240 ( math.sqrt( sum(numpy.power(style_vec - numpy.array(pro_style), 2))),
241 pro_name) for pro_name, pro_style in Data.questionare_total.iteritems()
243 dist.sort()
244 if not without_dist:
245 return dist[:top], dist[-bottom:]
247 def second((a, b)):
248 return b
250 return map(second, dist[:top]), map(second, dist[-bottom:])
252 if __name__ == '__main__':
253 def main(tex=False):
254 """this prints the averaged questionare data, along with number of interviewees who
255 answered for the particular pro"""
256 #print get_all_player_names(4)
258 questionare_total = questionare_average(Data.questionare_list, cnt_limit=2, silent=False, tex=tex)
260 pa = get_all_player_names(2)
262 vals = numpy.array([ va for pn, va in questionare_total.iteritems() if pn in pa ])
263 #print vals.shape
265 key2vec = {}
266 for desc, num in zip(Data.questionare_annotations, range(4)):
267 sli = vals[:, num]
268 key2vec[desc] = sli
269 if not tex:
270 print u"%s\n mean: %2.3f \u00B1 %2.3f"%(desc, sli.mean(), sli.std())
271 else:
272 print u"%s & %2.3f \\pm %2.3f \\"%(desc, sli.mean(), sli.std())
274 from utils.utils import pearson_coef
276 qa = Data.questionare_annotations
277 print
278 print "PAIRWISE CORRELATIONS"
279 print '',
280 print " | ".join("%15s"%an for an in (['']+qa))
281 for i in xrange(len(qa)):
282 print "%15s | " % qa[i],
283 for j in xrange(len(Data.questionare_annotations)):
284 if i > j:
285 print "%15s |" % ('' ),
286 else:
287 p = pearson_coef(key2vec[qa[i]], key2vec[qa[j]])
288 print "%15s |" % ( "%.3f" % p ),
289 print
291 main()
296 def test_style(style):
297 near, dist = get_interesting_pros(style, 3, 3)
298 print "similar"
299 for p in near:
300 print p
301 print
302 print "distant"
303 for p in dist:
304 print p
306 #test_style([1, 2, 3, 4])