knn_strength.py

   1 #!/usr/bin/python
   2 import sys
   3 from gostyle import *
   4 from math import sqrt
   5 from itertools import izip
   6 import numpy
   7
   8 from data_about_players import Data
   9
  10 if __name__ == '__main__':
  11         root_dir = '../pdb-gtl/'
  12         main_pat_filename = root_dir + 'all.pat'
  13         player_vector = Data.strength_linear_vector
  14         num_features = 400
  15
  16         ### Object creating input vector when called
  17         print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename
  18         i = InputVectorGenerator(main_pat_filename, num_features)#, rescale=LogRescale)
  19
  20         #raw = root_dir + 'testpat_files'
  21         def list_dir(raw):
  22                 import os, random, shutil
  23                 ranks = os.listdir(raw)
  24                 tot={}
  25                 for rank in ranks:
  26                         plays = os.listdir(raw + '/'+ rank)
  27                         for play in plays:
  28                                 tot[raw + '/' + rank + '/' + play] = rank
  29                 return tot
  30
  31         train_set_dir = root_dir + 'train_set'
  32         test_set_dir = root_dir + 'rawpat_files_merged_test'
  33         train_dict = list_dir(train_set_dir)
  34         test_dict = list_dir(test_set_dir)
  35
  36         train_pl = []
  37         input_vectors_train = []
  38         for f, rank in train_dict.items():
  39                 try:
  40                         input_vectors_train += [i(f)]
  41                 except:
  42                         continue
  43                 train_pl += [rank]
  44
  45         input_vectors_test = []
  46         test_pl = []
  47         test_files = []
  48         for f, rank in test_dict.items():
  49                 try:
  50                         input_vectors_test += [i(f)]
  51                 except:
  52                         continue
  53
  54                 test_pl += [rank]
  55                 test_files += [f]
  56
  57
  58         #if len(input_vectors_train) == 0:
  59         #       print >>sys.stderr, "No reference vectors."
  60         #       sys.exit()
  61         if len(input_vectors_test) == 0:
  62                 print >>sys.stderr, "No vectors to process."
  63                 sys.exit()
  64
  65         ### PCA example usage
  66         # Change this to False, if you do not want to use PCA
  67         use_pca = True
  68         if use_pca:
  69                 # Create PCA object, trained on input_vectors
  70                 print >>sys.stderr, "Running PCA."
  71                 pca = PCA(input_vectors_train + input_vectors_test, reduce=True)
  72                 # Perform a PCA on input vectors
  73                 if input_vectors_train:
  74                         input_vectors_train = pca.process_list_of_vectors(input_vectors_train)
  75                 if input_vectors_test:
  76                         input_vectors_test = pca.process_list_of_vectors(input_vectors_test)
  77                 # Creates a Composed object that first generates an input vector
  78                 # and then performs a PCA analysis on it.
  79                 i = Compose(i, pca)
  80
  81         ### Object creating output vector when called;
  82         ref_dict = {}
  83         for name, input_vector in zip(train_pl, input_vectors_train):
  84                 ref_dict[tuple(input_vector)] = player_vector[name]
  85
  86         print "creating the knn"
  87         #print ref_dict
  88         oknn = KNNOutputVectorGenerator(ref_dict, k=4, weight_param=0.9, dist_mult=6.6)
  89
  90         #oknn = KNNOutputVectorGenerator(ref_dict, k=5, weight_param=0.2, dist_mult=10)
  91
  92         def revnorm(vec):
  93                 return [ (1-x) * 16.5 - 3.0 for x in vec ]
  94
  95         def rand_vect(k):
  96                 return list(2.0*numpy.random.random(k)-1.0)
  97
  98         print "running"
  99         # Create list of output vectors using weighted kNN algorithm approximating output_vector
 100         #output_vectors= [ revnorm(oknn(input_vector)) for input_vector in input_vectors_test ]
 101         output_vectors= [ revnorm(rand_vect(1)) for _ in input_vectors_test ]
 102         desired_vectors= [ revnorm(player_vector[rank]) for rank in test_pl ]
 103
 104         if True:
 105                 for f, out, des in zip(test_files, output_vectors, desired_vectors):
 106                         assert len(out) == 1
 107                         assert len(des) == 1
 108                         print f, "%2.3f ; %2.3f"%(out[0], des[0])
 109
 110                 print
 111                 diff = [ abs(x[0] - y[0]) for x,y in zip(output_vectors,desired_vectors) ]
 112                 zips = zip(diff, test_files)
 113                 zips.sort()
 114                 for diff,a in zips:
 115                         print a, " %2.3f"%(diff,)
 116
 117         errs =[]
 118         for o,d in zip(output_vectors, desired_vectors):
 119                 err = 0.0
 120                 for x,y in zip(o,d):
 121                         e = (1.0*x-1.0*y)**2
 122                         err += e
 123                 errs += [err]
 124
 125         mean = numpy.array(errs).mean()
 126         print "Mean square err:  " + "%2.3f ( = sd %2.3f)  "%(mean, sqrt(mean))