data_about_players.py

   1 #!/usr/bin/python
   2
   3 import numpy
   4 import math
   5
   6 import load_questionare
   7
   8 from config import QUESTIONARE_DIRECTORY
   9
  10 """
  11 This file covers extracting and averaging data from style questionares,
  12 both from fixed predefined results that were covered manually (below,
  13 see the Data class)
  14 """
  15
  16 def questionare_average(questionare_list, silent=False, tex=False, cnt_limit=1):
  17     """Averages data from different interviewees.
  18
  19     Parameters:
  20     cnt_limit -- minimum number of interviewees that filled the quest. for the
  21                  given pro to be included
  22     silent --    do not print anything
  23     tex --       print the results in the tex table linese, suitable for export"""
  24 # Otake Hideo          & $4.3 \pm 0.5$ & $3.0 \pm 0.0$ & $4.6 \pm 1.2$ & $3.6 \pm 0.9$ \\
  25     total={}
  26     count={}
  27     for questionare in questionare_list:
  28         for name in questionare.keys():
  29             if name in count:
  30                 count[name] += 1
  31             else:
  32                 count[name] = 1
  33                 total[name] = []
  34             total[name].append(questionare[name])
  35
  36     l=[]
  37     for name, counter in count.items():
  38         l.append( (counter, name) )
  39     l.sort()
  40     l.reverse()
  41     variance=[]
  42     result={}
  43
  44     for counter, name in l:
  45         if counter >= cnt_limit:
  46             means=[]
  47             if not silent:
  48                 print "%d: %20s"%(counter, name),
  49
  50             a = numpy.array(total[name]).transpose()
  51             for b in a:
  52                 means.append(b.mean())
  53                 if not silent:
  54                     if not tex:
  55                         print u"%2.3f \u00B1 %2.3f  "%(b.mean(), numpy.sqrt(b.var())),
  56                     else:
  57                         print u"& $%2.1f \pm %2.1f$"%(b.mean(), numpy.sqrt(b.var())),
  58                     variance.append(numpy.sqrt(b.var()))
  59             if not silent:
  60                 if not tex:
  61                     print
  62                 else:
  63                     print "\\\\"
  64                 variance.append(numpy.sqrt(b.var()))
  65             result[name] = means
  66
  67     if not silent:
  68         if not tex:
  69             print "Mean standard deviation is: %2.3f"%(numpy.array(variance).mean(),)
  70     return result
  71
  72 def questionare_average_raw(questionare_list):
  73     """currently not used"""
  74     import numpy
  75     total={}
  76     count={}
  77     for questionare in questionare_list:
  78         for name in questionare.keys():
  79             if name in count:
  80                 count[name] += 1
  81             else:
  82                 count[name] = 1
  83                 total[name] = []
  84             total[name].append(questionare[name])
  85
  86     l=[]
  87     for name, counter in count.items():
  88         l.append( (counter, name) )
  89     l.sort()
  90     l.reverse()
  91     variance=[]
  92     result={}
  93     for counter, name in l:
  94         if counter > 1:
  95             means=[]
  96             print "%s, %d,"%(name, counter),
  97             a = numpy.array(total[name]).transpose()
  98             for b in a:
  99                 means.append(b.mean())
 100                 print u"%2.3f,"%(b.mean()),
 101             print
 102             result[name] = means
 103     return result
 104
 105 class Data:
 106     ###
 107     ### Following code consist of expert based knowledge kindly supplied by
 108     ###      Alexander Dinerstein 3-pro, Motoki Noguchi 7-dan and Vit Brunner 4-dan)
 109
 110     ### The vector at each name corresponds with
 111     ### (
 112     questionare_annotations =  ['territory', 'orthodox', 'aggressiveness', 'thickness']
 113     ## the orthodox scale corresponds to novelty, the "name" of the scale remains
 114     # the same for historical reasons --- the scales are defined the same though
 115
 116     # a list of dictionaries (one dict per interviewee) that contain
 117     # style estimates
 118
 119     questionare_list = [
 120     #questionare_vit_brun
 121         {
 122             "Chen Yaoye": (7, 5, 7, 6),
 123             "Cho Chikun": (9, 7, 7, 9),
 124             "Cho U": (4, 6, 7, 4),
 125             "Gu Li": (5, 6, 9, 5),
 126             "Ishida Yoshio": (6, 3, 5, 5),
 127             "Luo Xihe": (8, 4, 7, 7),
 128             "Ma Xiaochun": (5, 7, 7, 7),
 129             "O Meien": (3, 9, 6, 5),
 130             "Otake Hideo": (4, 3, 6, 5),
 131             "Rui Naiwei": (5, 6, 8, 5),
 132             "Sakata Eio": (6, 4, 8, 6),
 133             "Takemiya Masaki": (1, 4, 7, 2),
 134             #"Yi Ch'ang-ho 2004-": (7, 6, 4, 4),
 135             #"Yi Ch'ang-ho 2005+": (7, 6, 6, 4),
 136             "Yi Ch'ang-ho": (7, 6, 6, 4),
 137             "Yi Se-tol": (6, 5, 9, 5),
 138             "Yoda Norimoto": (4, 4, 7, 3)
 139         },
 140     # questionare_motoki_noguchi
 141         {
 142             "Cho Chikun": (8, 9, 8, 8 ),
 143             "Cho U": (9, 7, 6, 8),
 144             "Gu Li": (7, 8, 10, 4 ),
 145             "Ishida Yoshio": (9, 6, 2, 6),
 146             "Luo Xihe": (6, 8, 9, 7 ),
 147             "Ma Xiaochun": (9, 6, 7, 8),
 148             "O Meien": (1, 10, 10, 2 ),
 149             "Otake Hideo": (4, 3, 5, 3),
 150             "Rui Naiwei": (6, 6, 10, 2),
 151             "Sakata Eio": (10, 5, 6, 10),
 152             "Takemiya Masaki": (2,6, 6, 1),
 153             #"Yi Ch'ang-ho 2004-": (8, 3, 2, 3),
 154             # P: udelal jsem to z 2004-
 155             "Yi Ch'ang-ho": (8, 3, 2, 3),
 156             "Yi Se-tol": (5, 10, 10, 8 ),
 157             "Yoda Norimoto": (8, 2, 2, 5),
 158             "Fujisawa Hideyuki": (4, 8, 7, 4 ),
 159             "Go Seigen": (8, 10, 9, 6),
 160             "Hane Naoki": (8, 2, 4, 6 ),
 161             "Honinbo Dosaku": (2, 10, 8, 5 ),
 162             "Honinbo Shusaku": (8, 3, 2, 6),
 163             "Honinbo Shuwa": (10, 8, 2, 10),
 164             "Kato Masao": (2,3, 9, 4),
 165             "Kobayashi Koichi": (8, 3, 3, 6),
 166             "Miyazawa Goro": (1, 10, 10, 3),
 167             "Takao Shinji": (4, 3, 7, 4 ),
 168             "Yamashita Keigo": (2, 8, 10, 4 ),
 169             "Yuki Satoshi": (2, 8, 10, 4)
 170         },
 171     #questionare_alex_dinner
 172         {
 173             "Chen Yaoye": (5, 3, 5, 5),
 174             "Cho Chikun": (10, 7, 5, 10),
 175             "Cho U": (9, 5, 3, 7),
 176             "Gu Li": (5, 7, 8, 3),
 177             "Ishida Yoshio": (9, 6, 3, 5),
 178             "Luo Xihe": (8, 10, 7, 4),
 179             "Ma Xiaochun": (10, 6, 3, 9),
 180             "O Meien": (4, 10, 9, 4),
 181             "Otake Hideo": (5, 3, 3, 3),
 182             "Rui Naiwei": (3, 5, 9, 3),
 183             "Sakata Eio": (7, 5, 8, 8),
 184             "Takemiya Masaki": (1, 9, 8, 1),
 185             #"Yi Ch'ang-ho 2004-": (6, 6, 2, 1),
 186             #"Yi Ch'ang-ho 2005+": (5, 4, 5, 3),
 187             # commented because duplicates 2005+
 188             "Yi Ch'ang-ho": (5, 4, 5, 3),
 189             "Yi Se-tol": (5, 5, 9, 7),
 190             "Yoda Norimoto": (7, 7, 4, 2),
 191             "Chen Zude": (3, 8, 6, 5),
 192             "Cho Tae-hyeon": (1, 4, 4, 2),
 193             "Fujisawa Hideyuki": (3, 10, 7, 4),
 194             "Go Seigen": (4, 8, 7, 4),
 195             "Hane Naoki": (7, 3, 4, 3),
 196             "Jie Li": (5, 3, 5, 4),
 197             "Kato Masao": (3, 6, 10, 4),
 198             "Kobayashi Koichi": (10, 2, 2, 5),
 199             "Miyazawa Goro": (2, 10, 9, 5),
 200             "Nie Weiping": (3, 7, 8, 4),
 201             "Shao Zhenzhong": (4, 5, 5, 4),
 202             "Suzuki Goro": (4, 7, 5, 5),
 203             "Takao Shinji": (6, 4, 4, 5),
 204             "Wu Songsheng": (2, 10, 7, 4),
 205             "Yamashita Keigo": (2, 10, 9, 2),
 206             "Yuki Satoshi": (4, 9, 8, 5),
 207             #"breakfast": (7, 7, 3, 4),
 208             #"rapyuta/daien": (4, 7, 6, 5),
 209             #"MilanMilan": (5, 5, 6, 4),
 210             #"roln111-": (6, 5, 7, 5),
 211             #"somerville": (4, 5, 5, 6),
 212             #"artem92-": (7, 4, 3, 2),
 213             #"TheCaptain": (3, 8, 7, 6)
 214         }
 215         ## + guys from the online questionare
 216         ] + load_questionare.scan_d(QUESTIONARE_DIRECTORY)
 217
 218     questionare_total = questionare_average(questionare_list, silent=True)
 219
 220 def get_all_player_names(limit=1):
 221     """Utility function returning a list of all players that are answered by
 222     at least @limit interviewees"""
 223     pc = {}
 224
 225     for q in Data.questionare_list:
 226         for p in q.keys():
 227             pc[p] = pc.get(p, 0) + 1
 228
 229     ps = set( p for p in pc.keys() if pc[p] >= limit )
 230
 231     return ps
 232
 233 def get_interesting_pros(style, top, bottom, without_dist=True):
 234     """Computes euclidean distance from a given @style and returns @top N closest
 235     pros and @bottom N farthest pros. If without_dist=False (default is True),
 236     the function returns the pros in a tuple with the distance itself."""
 237     style_vec = numpy.array(style)
 238
 239     dist = [
 240         ( math.sqrt( sum(numpy.power(style_vec - numpy.array(pro_style),  2))),
 241         pro_name) for pro_name, pro_style in Data.questionare_total.iteritems()
 242     ]
 243     dist.sort()
 244     if not without_dist:
 245         return dist[:top], dist[-bottom:]
 246
 247     def second((a, b)):
 248         return b
 249
 250     return map(second, dist[:top]), map(second, dist[-bottom:])
 251
 252 if __name__ == '__main__':
 253     def main(tex=False):
 254         """this prints the averaged questionare data, along with number of interviewees who
 255         answered for the particular pro"""
 256         #print get_all_player_names(4)
 257
 258         questionare_total = questionare_average(Data.questionare_list, cnt_limit=2, silent=False, tex=tex)
 259
 260         pa = get_all_player_names(2)
 261
 262         vals = numpy.array([ va for pn, va in questionare_total.iteritems() if pn in pa ])
 263         #print vals.shape
 264
 265         key2vec = {}
 266         for desc, num in zip(Data.questionare_annotations, range(4)):
 267             sli = vals[:, num]
 268             key2vec[desc] = sli
 269             if not tex:
 270                 print u"%s\n  mean: %2.3f \u00B1 %2.3f"%(desc, sli.mean(),  sli.std())
 271             else:
 272                 print u"%s & %2.3f \\pm %2.3f \\"%(desc, sli.mean(),  sli.std())
 273
 274         from utils.utils import pearson_coef
 275
 276         qa = Data.questionare_annotations
 277         print
 278         print "PAIRWISE CORRELATIONS"
 279         print '',
 280         print " | ".join("%15s"%an for an in (['']+qa))
 281         for i in xrange(len(qa)):
 282             print "%15s | " % qa[i],
 283             for j in xrange(len(Data.questionare_annotations)):
 284                 if i > j:
 285                     print "%15s |" % ('' ),
 286                 else:
 287                     p = pearson_coef(key2vec[qa[i]], key2vec[qa[j]])
 288                     print "%15s |" % ( "%.3f" % p ),
 289             print
 290
 291     main()
 292
 293     ##
 294     ##
 295
 296     def test_style(style):
 297         near, dist = get_interesting_pros(style,  3, 3)
 298         print "similar"
 299         for p in near:
 300             print p
 301         print
 302         print "distant"
 303         for p in dist:
 304             print p
 305
 306     #test_style([1, 2, 3, 4])
 307
 308
 309