data_about_players: Info about median years of selected players
[gostyle.git] / knn_cross.py
blob814ac31534b0b3ab066505366fdbf530d84f7852
1 #!/usr/bin/python
2 import sys
3 from gostyle import *
4 from math import sqrt
5 import numpy
7 from data_about_players import Data
9 from knn import KNNOutputVectorGenerator
11 if __name__ == '__main__':
12 main_pat_filename = Data.main_pat_filename
13 num_features = 400
14 #k = 5
15 player_vector = Data.questionare_total
16 players_ignore = [ "Yi Ch'ang-ho 2004-" ]#, "Fujisawa Hideyuki","Yuki Satoshi", "Otake Hideo", "Yi Ch'ang-ho 2005+","Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
17 players_all = [ p for p in player_vector.keys() if p not in players_ignore ]
19 ### Object creating input vector when called
20 print "Creating input vector generator from main pat file:", main_pat_filename
21 print
22 i = InputVectorGenerator(main_pat_filename, num_features)
24 # Create list of input vectors
25 input_vectors = []
26 for name in players_all:
27 input_vectors += [i(Data.pat_files_folder + name)]
29 #print '"%s"'%(players_all[2],)
30 #print input_vectors[2]
32 if len(input_vectors) == 0:
33 print >>sys.stderr, "No reference vectors."
34 sys.exit()
36 ### PCA example usage
37 # Change this to False, if you do not want to use PCA
38 use_pca = False
39 if use_pca:
40 # Create PCA object, trained on input_vectors
41 print >>sys.stderr, "Running PCA."
42 pca = PCA(input_vectors, reduce=True)
43 # Perform a PCA on input vectors
44 input_vectors = pca.process_list_of_vectors(input_vectors)
45 # Creates a Composed object that first generates an input vector
46 # and then performs a PCA analysis on it.
47 i = Compose(i, pca)
49 ### n/4-fold cross validation
50 #bounds = random.sample(range(1,len(players_all)), len(players_all) / 10 )
51 bounds=[]
52 for x in range(1,len(players_all)/4):
53 bounds += [4*x for _ in [1] if 4*x < len(players_all)]
54 if not bounds:
55 print >>sys.stderr, "Pop too small."
56 sys.exit()
57 bounds.sort()
59 r = Rescale(-1.0,1.0)
60 errs=[]
61 es=[]
62 sentinel=len(players_all)
63 prev=0
64 for b in bounds+[sentinel]:
65 validation_set = range(prev, b)
66 reference_set = range(0,prev) + range(b,sentinel)
67 print "Reference set :",
68 for pr in range(0, prev):
69 print "R",
70 for pr in validation_set:
71 print "_",
72 for pr in range(b, sentinel):
73 print "R",
74 print
75 prev = b
77 ### Object creating output vector when called;
78 ref_dict = {}
79 for index in reference_set:
80 ref_dict[tuple(input_vectors[index])] = r(player_vector[players_all[index]])
82 #oknn = KNNOutputVectorGenerator(ref_dict, k=5, weight_param=0.799)
83 oknn = KNNOutputVectorGenerator(ref_dict, k=4, weight_param=0.8)
85 # Create list of output vectors using weighted kNN algorithm approximating output_vector
86 def rand_vect(k):
87 return list(10*numpy.random.random(k))
88 output_vectors = [ oknn(input_vectors[index]) for index in validation_set ]
89 #output_vectors = [ r(rand_vect(4)) for index in validation_set ]
90 desired_vectors = [ r(player_vector[players_all[index]]) for index in validation_set ]
92 for vec_set,text in [(output_vectors, "Output: "), (desired_vectors, "Desired:")]:
93 print text,
94 for o in vec_set:
95 for x in o:
96 print "%02.3f"%(x,),
97 print "; ",
98 print
100 for o,d in zip(output_vectors, desired_vectors):
101 err = 0.0
102 for x,y in zip(o,d):
103 e = (1.0*x-1.0*y)**2
104 es += [e]
105 err += e
106 errs += [err]
108 #for e in errs[-4:]:
109 # print "%2.3f"%(e,),
110 print
113 print "Total square err: %2.3f"%( sum(errs),)
114 mean = numpy.array(errs).mean()
115 print "Mean square err per player: " + u"%2.3f ( = sd \u00B1 %2.3f) "%(mean, sqrt(mean))
116 mean = numpy.array(es).mean()
117 print "Mean square err per style: " + u"%2.3f ( = sd \u00B1 %2.3f) "%(mean, sqrt(mean))
118 print
119 print "Players sorted by mean square error:"
120 p = zip(errs, players_all)
121 p.sort()
122 for err, name in p:
123 print "%2.3f %s"%(err,name)
124 #print "%s"%(name,)
125 sys.exit()