knn.py

   1 #!/usr/bin/python
   2 import sys
   3 from gostyle import *
   4 from math import sqrt
   5
   6 from data_about_players import Data
   7
   8 class KNNOutputVectorGenerator(VectorGenerator):
   9         """ k-NearestNeighbour output vector generator."""
  10         def __init__(self, ref_dict, k=2):
  11                 """
  12                         ref_dict is a dictionary of refence input/output vectors.
  13                         e.g. ref_dict= { (1.0,2.0):(9.0,16.0,21.0)
  14                 """
  15                 self.ref_dict = ref_dict
  16                 self.k = k
  17         def __call__(self, player_vector):
  18                 distance=[]
  19                 for ref_vec in ref_dict.keys():
  20                         distance.append((self.distance(ref_vec, player_vector), ref_vec))
  21                 distance.sort()
  22
  23                 ref_output_vecs = [ self.ref_dict[b] for a,b in distance[:self.k] ]
  24                 coefs = [ self.weight_fc(a) for a,b in distance[:self.k] ]
  25
  26                 return linear_combination(ref_output_vecs, coefs)
  27         def weight_fc(self, distance):
  28                 return 0.9 ** distance
  29         def distance(self, vec1, vec2):
  30                 if len(vec1) != len(vec2):
  31                         raise RuntimeError("Dimensions of vectors mismatch.")
  32                 return sqrt(sum([ (float(a) - float(b))**2 for a,b in zip(vec1,vec2)]))
  33
  34
  35 if __name__ == '__main__':
  36         main_pat_filename = Data.main_pat_filename
  37         filename_play_other = 'knn_other.data'
  38         filename_play_ref = 'knn_ref.data'
  39         filename_play_ref_orig = 'knn_ref_orig.data'
  40         num_features = 300
  41         k = 4
  42         players_all = Data.players_all
  43         players_ref = Data.player_vector.keys()
  44         players_other =  [ x for x in players_all if x not in players_ref ]
  45
  46         ### Object creating input vector when called
  47         print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename
  48         i = InputVectorGenerator(main_pat_filename, num_features)
  49
  50         # Create list of input vectors
  51         input_vectors_ref = []
  52         for name in players_ref:
  53                 input_vectors_ref += [i(Data.pat_files_folder + name)]
  54         input_vectors_other = []
  55         for name in players_other:
  56                 input_vectors_other += [i(Data.pat_files_folder + name)]
  57
  58         if len(input_vectors_ref) == 0:
  59                 print >>sys.stderr, "No reference vectors."
  60                 sys.exit()
  61         if len(input_vectors_other) == 0:
  62                 print >>sys.stderr, "No vectors to process."
  63                 sys.exit()
  64
  65         ### PCA example usage
  66         # Change this to False, if you do not want to use PCA
  67         use_pca = False
  68         if use_pca:
  69                 # Create PCA object, trained on input_vectors
  70                 print >>sys.stderr, "Running PCA."
  71                 pca = PCA(input_vectors_ref + input_vectors_other, reduce=True)
  72                 # Perform a PCA on input vectors
  73                 input_vectors_ref = pca.process_list_of_vectors(input_vectors_ref)
  74                 input_vectors_other = pca.process_list_of_vectors(input_vectors_other)
  75                 # Creates a Composed object that first generates an input vector
  76                 # and then performs a PCA analysis on it.
  77                 i = Compose(i, pca)
  78         #print input_vectors_other[0]
  79         ### Object creating output vector when called;
  80         ref_dict = {}
  81         for name, input_vector in zip(players_ref, input_vectors_ref):
  82                 ref_dict[tuple(input_vector)] = Data.player_vector[name]
  83
  84         oknn = KNNOutputVectorGenerator(ref_dict, k=k)
  85
  86         # Create list of output vectors using weighted kNN algorithm approximating output_vector
  87         output_vectors_other = [ oknn(input_vector) for input_vector in input_vectors_other ]
  88         output_vectors_ref = [ oknn(input_vector) for input_vector in input_vectors_ref ]
  89
  90         def print_me( names, vecs, where):
  91                 if len(names) != len(vecs):
  92                         raise RuntimeError("Dimensions of vectors mismatch.")
  93
  94                 f = open(where, 'w')
  95                 print >>sys.stderr, "Saving output_vectors to file:", where
  96
  97                 for i in xrange(len(names)):
  98                         name_to_print = '_'.join(names[i].split())
  99                         print_vector([name_to_print] + list(vecs[i]), f)
 100
 101                 f.close()
 102
 103         print_me(players_ref, [Data.player_vector[name] for name in players_ref], filename_play_ref_orig)
 104         print_me(players_ref, output_vectors_ref, str(k)+filename_play_ref)
 105         print_me(players_other, output_vectors_other, str(k)+filename_play_other)
 106