gostyle_old, commit old
[gostyle.git] / knn_strength.py
bloba1eb16101b46890b1be9634845cb336e9b013b50
1 #!/usr/bin/python
2 import sys
3 from gostyle import *
4 from math import sqrt
5 from itertools import izip
6 import numpy
8 from data_about_players import Data
10 if __name__ == '__main__':
11 root_dir = '../pdb-gtl/'
12 main_pat_filename = root_dir + 'all.pat'
13 player_vector = Data.strength_linear_vector
14 num_features = 400
16 ### Object creating input vector when called
17 print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename
18 i = InputVectorGenerator(main_pat_filename, num_features)#, rescale=LogRescale)
20 #raw = root_dir + 'testpat_files'
21 def list_dir(raw):
22 import os, random, shutil
23 ranks = os.listdir(raw)
24 tot={}
25 for rank in ranks:
26 plays = os.listdir(raw + '/'+ rank)
27 for play in plays:
28 tot[raw + '/' + rank + '/' + play] = rank
29 return tot
31 train_set_dir = root_dir + 'train_set'
32 test_set_dir = root_dir + 'rawpat_files_merged_test'
33 train_dict = list_dir(train_set_dir)
34 test_dict = list_dir(test_set_dir)
36 train_pl = []
37 input_vectors_train = []
38 for f, rank in train_dict.items():
39 try:
40 input_vectors_train += [i(f)]
41 except:
42 continue
43 train_pl += [rank]
45 input_vectors_test = []
46 test_pl = []
47 test_files = []
48 for f, rank in test_dict.items():
49 try:
50 input_vectors_test += [i(f)]
51 except:
52 continue
54 test_pl += [rank]
55 test_files += [f]
58 #if len(input_vectors_train) == 0:
59 # print >>sys.stderr, "No reference vectors."
60 # sys.exit()
61 if len(input_vectors_test) == 0:
62 print >>sys.stderr, "No vectors to process."
63 sys.exit()
65 ### PCA example usage
66 # Change this to False, if you do not want to use PCA
67 use_pca = True
68 if use_pca:
69 # Create PCA object, trained on input_vectors
70 print >>sys.stderr, "Running PCA."
71 pca = PCA(input_vectors_train + input_vectors_test, reduce=True)
72 # Perform a PCA on input vectors
73 if input_vectors_train:
74 input_vectors_train = pca.process_list_of_vectors(input_vectors_train)
75 if input_vectors_test:
76 input_vectors_test = pca.process_list_of_vectors(input_vectors_test)
77 # Creates a Composed object that first generates an input vector
78 # and then performs a PCA analysis on it.
79 i = Compose(i, pca)
81 ### Object creating output vector when called;
82 ref_dict = {}
83 for name, input_vector in zip(train_pl, input_vectors_train):
84 ref_dict[tuple(input_vector)] = player_vector[name]
86 print "creating the knn"
87 #print ref_dict
88 oknn = KNNOutputVectorGenerator(ref_dict, k=4, weight_param=0.9, dist_mult=6.6)
90 #oknn = KNNOutputVectorGenerator(ref_dict, k=5, weight_param=0.2, dist_mult=10)
92 def revnorm(vec):
93 return [ (1-x) * 16.5 - 3.0 for x in vec ]
95 def rand_vect(k):
96 return list(2.0*numpy.random.random(k)-1.0)
98 print "running"
99 # Create list of output vectors using weighted kNN algorithm approximating output_vector
100 #output_vectors= [ revnorm(oknn(input_vector)) for input_vector in input_vectors_test ]
101 output_vectors= [ revnorm(rand_vect(1)) for _ in input_vectors_test ]
102 desired_vectors= [ revnorm(player_vector[rank]) for rank in test_pl ]
104 if True:
105 for f, out, des in zip(test_files, output_vectors, desired_vectors):
106 assert len(out) == 1
107 assert len(des) == 1
108 print f, "%2.3f ; %2.3f"%(out[0], des[0])
110 print
111 diff = [ abs(x[0] - y[0]) for x,y in zip(output_vectors,desired_vectors) ]
112 zips = zip(diff, test_files)
113 zips.sort()
114 for diff,a in zips:
115 print a, " %2.3f"%(diff,)
117 errs =[]
118 for o,d in zip(output_vectors, desired_vectors):
119 err = 0.0
120 for x,y in zip(o,d):
121 e = (1.0*x-1.0*y)**2
122 err += e
123 errs += [err]
125 mean = numpy.array(errs).mean()
126 print "Mean square err: " + "%2.3f ( = sd %2.3f) "%(mean, sqrt(mean))