data_about_players.py

   1 #!/usr/bin/python
   2
   3 import numpy
   4 import math
   5
   6 import load_questionare
   7
   8 from config import QUESTIONARE_DIRECTORY
   9
  10 def questionare_average(questionare_list, silent=False, tex=False, cnt_limit=1):
  11 # Otake Hideo          & $4.3 \pm 0.5$ & $3.0 \pm 0.0$ & $4.6 \pm 1.2$ & $3.6 \pm 0.9$ \\
  12     total={}
  13     count={}
  14     for questionare in questionare_list:
  15         for name in questionare.keys():
  16             if name in count:
  17                 count[name] += 1
  18             else:
  19                 count[name] = 1
  20                 total[name] = []
  21             total[name].append(questionare[name])
  22
  23     l=[]
  24     for name, counter in count.items():
  25         l.append( (counter, name) )
  26     l.sort()
  27     l.reverse()
  28     variance=[]
  29     result={}
  30
  31     for counter, name in l:
  32         if counter >= cnt_limit:
  33             means=[]
  34             if not silent:
  35                 print "%d: %20s"%(counter, name),
  36
  37             a = numpy.array(total[name]).transpose()
  38             for b in a:
  39                 means.append(b.mean())
  40                 if not silent:
  41                     if not tex:
  42                         print u"%2.3f \u00B1 %2.3f  "%(b.mean(), numpy.sqrt(b.var())),
  43                     else:
  44                         print u"& $%2.1f \pm %2.1f$"%(b.mean(), numpy.sqrt(b.var())),
  45                     variance.append(numpy.sqrt(b.var()))
  46             if not silent:
  47                 if not tex:
  48                     print
  49                 else:
  50                     print "\\\\"
  51                 variance.append(numpy.sqrt(b.var()))
  52             result[name] = means
  53
  54     if not silent:
  55         if not tex:
  56             print "Mean standard deviation is: %2.3f"%(numpy.array(variance).mean(),)
  57     return result
  58
  59 def questionare_average_raw(questionare_list):
  60     import numpy
  61     total={}
  62     count={}
  63     for questionare in questionare_list:
  64         for name in questionare.keys():
  65             if name in count:
  66                 count[name] += 1
  67             else:
  68                 count[name] = 1
  69                 total[name] = []
  70             total[name].append(questionare[name])
  71
  72     l=[]
  73     for name, counter in count.items():
  74         l.append( (counter, name) )
  75     l.sort()
  76     l.reverse()
  77     variance=[]
  78     result={}
  79     for counter, name in l:
  80         if counter > 1:
  81             means=[]
  82             print "%s, %d,"%(name, counter),
  83             a = numpy.array(total[name]).transpose()
  84             for b in a:
  85                 means.append(b.mean())
  86                 print u"%2.3f,"%(b.mean()),
  87             print
  88             result[name] = means
  89     return result
  90
  91 class Data:
  92     ### Explicit list of players
  93
  94     ###
  95     ### Following code consist of expert based knowledge kindly supplied by
  96     ###      Alexander Dinerstein 3-pro, Motoki Noguchi 7-dan and Vit Brunner 4-dan)
  97
  98     ### The vector at each name corresponds with
  99     ### (
 100     questionare_annotations =  ['territory', 'orthodox', 'aggressiveness', 'thickness']
 101     ## the orthodox scale corresponds to novelty, the "name" of the scale remains
 102     # the same for historical reasons --- the scales are defined the same though
 103
 104     questionare_list = [
 105     #questionare_vit_brun
 106         {
 107             "Chen Yaoye": (7, 5, 7, 6),
 108             "Cho Chikun": (9, 7, 7, 9),
 109             "Cho U": (4, 6, 7, 4),
 110             "Gu Li": (5, 6, 9, 5),
 111             "Ishida Yoshio": (6, 3, 5, 5),
 112             "Luo Xihe": (8, 4, 7, 7),
 113             "Ma Xiaochun": (5, 7, 7, 7),
 114             "O Meien": (3, 9, 6, 5),
 115             "Otake Hideo": (4, 3, 6, 5),
 116             "Rui Naiwei": (5, 6, 8, 5),
 117             "Sakata Eio": (6, 4, 8, 6),
 118             "Takemiya Masaki": (1, 4, 7, 2),
 119             #"Yi Ch'ang-ho 2004-": (7, 6, 4, 4),
 120             #"Yi Ch'ang-ho 2005+": (7, 6, 6, 4),
 121             "Yi Ch'ang-ho": (7, 6, 6, 4),
 122             "Yi Se-tol": (6, 5, 9, 5),
 123             "Yoda Norimoto": (4, 4, 7, 3)
 124         },
 125     # questionare_motoki_noguchi
 126         {
 127             "Cho Chikun": (8, 9, 8, 8 ),
 128             "Cho U": (9, 7, 6, 8),
 129             "Gu Li": (7, 8, 10, 4 ),
 130             "Ishida Yoshio": (9, 6, 2, 6),
 131             "Luo Xihe": (6, 8, 9, 7 ),
 132             "Ma Xiaochun": (9, 6, 7, 8),
 133             "O Meien": (1, 10, 10, 2 ),
 134             "Otake Hideo": (4, 3, 5, 3),
 135             "Rui Naiwei": (6, 6, 10, 2),
 136             "Sakata Eio": (10, 5, 6, 10),
 137             "Takemiya Masaki": (2,6, 6, 1),
 138             #"Yi Ch'ang-ho 2004-": (8, 3, 2, 3),
 139             # P: udelal jsem to z 2004-
 140             "Yi Ch'ang-ho": (8, 3, 2, 3),
 141             "Yi Se-tol": (5, 10, 10, 8 ),
 142             "Yoda Norimoto": (8, 2, 2, 5),
 143             "Fujisawa Hideyuki": (4, 8, 7, 4 ),
 144             "Go Seigen": (8, 10, 9, 6),
 145             "Hane Naoki": (8, 2, 4, 6 ),
 146             "Honinbo Dosaku": (2, 10, 8, 5 ),
 147             "Honinbo Shusaku": (8, 3, 2, 6),
 148             "Honinbo Shuwa": (10, 8, 2, 10),
 149             "Kato Masao": (2,3, 9, 4),
 150             "Kobayashi Koichi": (8, 3, 3, 6),
 151             "Miyazawa Goro": (1, 10, 10, 3),
 152             "Takao Shinji": (4, 3, 7, 4 ),
 153             "Yamashita Keigo": (2, 8, 10, 4 ),
 154             "Yuki Satoshi": (2, 8, 10, 4)
 155         },
 156     #questionare_alex_dinner
 157         {
 158             "Chen Yaoye": (5, 3, 5, 5),
 159             "Cho Chikun": (10, 7, 5, 10),
 160             "Cho U": (9, 5, 3, 7),
 161             "Gu Li": (5, 7, 8, 3),
 162             "Ishida Yoshio": (9, 6, 3, 5),
 163             "Luo Xihe": (8, 10, 7, 4),
 164             "Ma Xiaochun": (10, 6, 3, 9),
 165             "O Meien": (4, 10, 9, 4),
 166             "Otake Hideo": (5, 3, 3, 3),
 167             "Rui Naiwei": (3, 5, 9, 3),
 168             "Sakata Eio": (7, 5, 8, 8),
 169             "Takemiya Masaki": (1, 9, 8, 1),
 170             #"Yi Ch'ang-ho 2004-": (6, 6, 2, 1),
 171             #"Yi Ch'ang-ho 2005+": (5, 4, 5, 3),
 172             # commented because duplicates 2005+
 173             "Yi Ch'ang-ho": (5, 4, 5, 3),
 174             "Yi Se-tol": (5, 5, 9, 7),
 175             "Yoda Norimoto": (7, 7, 4, 2),
 176             "Chen Zude": (3, 8, 6, 5),
 177             "Cho Tae-hyeon": (1, 4, 4, 2),
 178             "Fujisawa Hideyuki": (3, 10, 7, 4),
 179             "Go Seigen": (4, 8, 7, 4),
 180             "Hane Naoki": (7, 3, 4, 3),
 181             "Jie Li": (5, 3, 5, 4),
 182             "Kato Masao": (3, 6, 10, 4),
 183             "Kobayashi Koichi": (10, 2, 2, 5),
 184             "Miyazawa Goro": (2, 10, 9, 5),
 185             "Nie Weiping": (3, 7, 8, 4),
 186             "Shao Zhenzhong": (4, 5, 5, 4),
 187             "Suzuki Goro": (4, 7, 5, 5),
 188             "Takao Shinji": (6, 4, 4, 5),
 189             "Wu Songsheng": (2, 10, 7, 4),
 190             "Yamashita Keigo": (2, 10, 9, 2),
 191             "Yuki Satoshi": (4, 9, 8, 5),
 192             #"breakfast": (7, 7, 3, 4),
 193             #"rapyuta/daien": (4, 7, 6, 5),
 194             #"MilanMilan": (5, 5, 6, 4),
 195             #"roln111-": (6, 5, 7, 5),
 196             #"somerville": (4, 5, 5, 6),
 197             #"artem92-": (7, 4, 3, 2),
 198             #"TheCaptain": (3, 8, 7, 6)
 199         }
 200         ## + guys from the online questionare
 201         ] + load_questionare.scan_d(QUESTIONARE_DIRECTORY)
 202
 203     questionare_total = questionare_average(questionare_list, silent=True)
 204
 205 def get_all_player_names(limit=1):
 206     pc = {}
 207
 208     for q in Data.questionare_list:
 209         for p in q.keys():
 210             pc[p] = pc.get(p, 0) + 1
 211
 212     ps = set( p for p in pc.keys() if pc[p] >= limit )
 213
 214     return ps
 215
 216 def get_interesting_pros(style, top, bottom, without_dist=True):
 217     style_vec = numpy.array(style)
 218
 219     dist = [
 220         ( math.sqrt( sum(numpy.power(style_vec - numpy.array(pro_style),  2))),
 221         pro_name) for pro_name, pro_style in Data.questionare_total.iteritems()
 222     ]
 223     dist.sort()
 224     if not without_dist:
 225         return dist[:top], dist[-bottom:]
 226
 227     def second((a, b)):
 228         return b
 229
 230     return map(second, dist[:top]), map(second, dist[-bottom:])
 231
 232 if __name__ == '__main__':
 233     def main(tex=False):
 234         """this prints the averaged questionare data, along with number of interviewees who
 235         answered for the particular pro"""
 236         #print get_all_player_names(4)
 237
 238         questionare_total = questionare_average(Data.questionare_list, cnt_limit=2, silent=False, tex=tex)
 239
 240         pa = get_all_player_names(2)
 241
 242         vals = numpy.array([ va for pn, va in questionare_total.iteritems() if pn in pa ])
 243         #print vals.shape
 244
 245         key2vec = {}
 246         for desc, num in zip(Data.questionare_annotations, range(4)):
 247             sli = vals[:, num]
 248             key2vec[desc] = sli
 249             if not tex:
 250                 print u"%s\n  mean: %2.3f \u00B1 %2.3f"%(desc, sli.mean(),  sli.std())
 251             else:
 252                 print u"%s & %2.3f \\pm %2.3f \\"%(desc, sli.mean(),  sli.std())
 253
 254         from utils.utils import pearson_coef
 255
 256         qa = Data.questionare_annotations
 257         print
 258         print "PAIRWISE CORRELATIONS"
 259         print '',
 260         print " | ".join("%15s"%an for an in (['']+qa))
 261         for i in xrange(len(qa)):
 262             print "%15s | " % qa[i],
 263             for j in xrange(len(Data.questionare_annotations)):
 264                 if i > j:
 265                     print "%15s |" % ('' ),
 266                 else:
 267                     p = pearson_coef(key2vec[qa[i]], key2vec[qa[j]])
 268                     print "%15s |" % ( "%.3f" % p ),
 269             print
 270
 271     main()
 272
 273     ##
 274     ##
 275
 276     def test_style(style):
 277         near, dist = get_interesting_pros(style,  3, 3)
 278         print "similar"
 279         for p in near:
 280             print p
 281         print
 282         print "distant"
 283         for p in dist:
 284             print p
 285
 286     #test_style([1, 2, 3, 4])
 287
 288
 289