tex: add fourth revision macro \rvvvv and mark the changes
[gostyle.git] / cross_validation_style.py
blob918439d3c962a46e9b7438d79609694d584c8c58
1 #!/usr/bin/python
2 import sys
3 import subprocess
4 import os
5 from gostyle import *
6 from math import sqrt
7 import numpy
9 from data_about_players import Data
11 if __name__ == '__main__':
12 main_pat_filename = Data.main_pat_filename
13 num_features = 400
14 '''
16 This script is used for Style classification
17 '''
19 # Neural net
20 #typ = 'nn'
21 #typ = 'knn'
22 # random
23 #typ = 'rnd'
24 typ = 'joint_nn_knn'
26 player_vector = Data.questionare_total
27 # players_ignore = [ "Yi Ch'ang-ho 2004-" ]#, "Fujisawa Hideyuki","Yuki Satoshi", "Otake Hideo", "Yi Ch'ang-ho 2005+","Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
28 players_ignore = [ "Yi Ch'ang-ho 2004-", "Yi Ch'ang-ho 2005+" ]#,"Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
29 players_all = [ p for p in player_vector.keys() if p not in players_ignore ]
31 ### Object creating input vector when called
32 print "Creating input vector generator from main pat file:", main_pat_filename
33 i = InputVectorGenerator(main_pat_filename, num_features)
35 # Create list of input vectors
36 input_vectors = []
37 for name in players_all:
38 input_vectors += [i(Data.pat_files_folder + name)]
40 #print '"%s"'%(players_all[2],)
41 #print input_vectors[2]
43 if len(input_vectors) == 0:
44 print >>sys.stderr, "No reference vectors."
45 sys.exit()
47 ### PCA example usage
48 # Change this to False, if you do not want to use PCA
49 use_pca = True
50 if use_pca:
51 # Create PCA object, trained on input_vectors
52 print >>sys.stderr, "Running PCA."
53 pca = PCA(input_vectors, reduce=True)
54 # Perform a PCA on input vectors
55 input_vectors = pca.process_list_of_vectors(input_vectors)
56 # Creates a Composed object that first generates an input vector
57 # and then performs a PCA analysis on it.
58 i = Compose(i, pca)
60 ### n/4-fold cross validation
61 #bounds = random.sample(range(1,len(players_all)), len(players_all) / 10 )
62 bounds=[]
63 for x in range(1,len(players_all)/4):
64 bounds += [4*x for _ in [1] if 4*x < len(players_all)]
65 if not bounds:
66 print >>sys.stderr, "Pop too small."
67 sys.exit()
68 bounds.sort()
70 def norm(vec):
71 return [ (x - 1) / 4.5 - 1.0 for x in vec ]
72 def revnorm(vec):
73 return [ (x + 1) * 4.5 + 1.0 for x in vec ]
75 def rand_vect(k):
76 return list(2.0*numpy.random.random(k)-1.0)
78 print >>sys.stderr, "Running Cross-validation."
79 print
80 errs=[ [] for _ in xrange(len(players_all)) ]
81 es=[]
82 esps=[[],[],[],[]]
83 sentinel=len(players_all)
84 number_runs = 200
85 for _ in xrange(number_runs):
86 pairs = zip(players_all, input_vectors)
87 random.shuffle(pairs)
88 players_all = [ a for a, b in pairs ]
89 input_vectors = [ b for a, b in pairs ]
90 prev=0
91 for b in bounds+[sentinel]:
92 validation_set = range(prev, b)
93 reference_set = range(0,prev) + range(b,sentinel)
94 if False:
95 print "Reference set :",
96 for pr in range(0, prev):
97 print "R",
98 for pr in validation_set:
99 print "_",
100 for pr in range(b, sentinel):
101 print "R",
102 print
103 prev = b
104 if typ == 'nn':
105 data =[]
106 for index in reference_set:
107 data.append( (input_vectors[index], norm(player_vector[players_all[index]])) )
110 ### We can enlarge the data set by adding linear combinations of input and output vectors
111 use_lin_combinations = False
112 if use_lin_combinations:
113 data += Combinator().combine(data)
115 print_set_to_file(data,'nn_cross.data')
117 nn = NeuralNet('nn_cross.data')
118 # Create list of output vectors using weighted kNN algorithm approximating output_vector
119 output_vectors = [ nn(input_vectors[index]) for index in validation_set ]
120 nn.close()
121 elif typ == 'knn':
122 ### Object creating output vector when called;
123 ref_dict = {}
124 for index in reference_set:
125 ref_dict[tuple(input_vectors[index])] = norm(player_vector[players_all[index]])
128 # best pro InputVectorGenerator rescale=Rescale
129 oknn = KNNOutputVectorGenerator(ref_dict, k=3, weight_param=0.8)
131 # Create list of output vectors using weighted kNN algorithm approximating output_vector
132 output_vectors = [ oknn(input_vectors[index]) for index in validation_set ]
133 elif typ == 'joint_nn_knn':
134 data =[]
135 ref_dict = {}
136 for index in reference_set:
137 data.append( (input_vectors[index], norm(player_vector[players_all[index]])) )
138 ref_dict[tuple(input_vectors[index])] = norm(player_vector[players_all[index]])
140 print_set_to_file(data,'nn_cross.data')
141 nn = NeuralNet('nn_cross.data')
142 # Create list of output vectors using weighted kNN algorithm approximating output_vector
143 ov_3 = [ nn(input_vectors[index]) for index in validation_set ]
145 nn.close()
147 oknn = KNNOutputVectorGenerator(ref_dict, k=3, weight_param=0.8)
148 ov_1 = [ oknn(input_vectors[index]) for index in validation_set ]
150 oknn = KNNOutputVectorGenerator(ref_dict, k=1, weight_param=0.8)
151 ov_2 = [ oknn(input_vectors[index]) for index in validation_set ]
153 oknn = KNNOutputVectorGenerator(ref_dict, k=1, weight_param=0.8)
154 ov_4 = [ oknn(input_vectors[index]) for index in validation_set ]
156 output_vectors = [ [a[0],b[1],c[2],d[3]] for a,b,c,d in zip(ov_1, ov_2, ov_3, ov_4)]
157 elif typ == 'rnd':
158 output_vectors = [ rand_vect(4) for index in validation_set ]
160 output_vectors = [ revnorm(x) for x in output_vectors ]
161 desired_vectors = [ player_vector[players_all[index]] for index in validation_set ]
162 #desired_vectors = [ norm(player_vector[players_all[index]]) for index in validation_set ]
164 if False:
165 for vec_set,text in [(output_vectors, "Output: "), (desired_vectors, "Desired:")]:
166 print text,
167 for o in vec_set:
168 for x in o:
169 print "%02.3f"%(x,),
170 print "; ",
171 print
173 for num1, (o,d) in zip(validation_set, zip(output_vectors, desired_vectors)):
174 err = 0.0
175 for num,(x,y) in enumerate(zip(o,d)):
176 e = (1.0*x-1.0*y)**2
177 esps[num]+=[e]
178 es += [e]
179 err += e
180 errs[num1] += [err]
182 if typ == 'joint_nn_knn':
183 print "Joint classifier:"
184 elif typ == 'knn':
185 print "k-NN classifier:"
186 elif typ == 'nn':
187 print "Neural network classifier:"
188 elif typ == 'rnd':
189 print "Random classifier:"
190 #print "Total square err: %2.3f"%( sum(errs) / number_runs,)
191 mar = numpy.array(errs)
192 mean = mar.mean()
193 print "Mean square err per player: " + "%2.3f ( = sd %2.3f) "%(mean, sqrt(mean))
194 mean = numpy.array(es).mean()
195 print "Mean square err per style: " + "%2.3f ( = sd %2.3f) "%(mean, sqrt(mean))
196 for num, style in enumerate(esps):
197 mean = numpy.array(style).mean()
198 print "Style %1d : %2.3f ( = sd %2.3f)"%(num+1, mean, sqrt(mean))
199 #print "%2.3f &"%(mean,),
201 #mean = numpy.array(es).mean()
202 #print "%2.3f &"%(mean),
203 #print "%2.3f \\\\\\hline"%(11.776 / mean)
205 #print
206 #print "Players sorted by mean square error:"
207 #p = zip([numpy.array(errs[p]).mean() for p in xrange(len(players_all)) ], players_all)
208 #p.sort()
209 #for err, name in p:
210 # print "%2.3f %s"%(err,name)
211 # #print "%s"%(name,)
212 # sys.exit()