tex: Joe's Go skills ;)
[gostyle.git] / knn_cross.py
blob0bfc25ac91e910a185b201b5a8f588b70ba74958
1 #!/usr/bin/python
2 import sys
3 import subprocess
4 import os
5 from gostyle import *
6 from math import sqrt
7 import numpy
9 from data_about_players import Data
11 from knn import KNNOutputVectorGenerator
13 class NeuralNet:
14 def __init__( self, filename ):
15 self.null = open('/dev/null','w')
17 s = "./gnet/gnet_train -l 3 -n 30 -p 30 -e 0.0003 -o gonet.net ./"+filename
18 args = s.split()
19 ret = subprocess.call(args,stdout=self.null)
20 s = "./gnet/gnet_run gonet.net"
21 args = s.split()
22 self.p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=self.null)
23 def __call__(self, vector):
24 self.p.stdin.write(' '.join([str(a) for a in vector]) + '\n')
25 a = self.p.stdout.readline()
26 return [ float(num) for num in a.split()]
27 def close(self):
28 self.p.stdin.close()
29 self.p.stdout.close()
30 self.null.close()
32 if __name__ == '__main__':
33 main_pat_filename = Data.main_pat_filename
34 num_features = 400
36 # Neural net
37 #typ = 'nn'
38 #typ = 'knn'
39 # random
40 #typ = 'rnd'
41 typ = 'joint_nn_knn'
43 player_vector = Data.questionare_total
44 # players_ignore = [ "Yi Ch'ang-ho 2004-" ]#, "Fujisawa Hideyuki","Yuki Satoshi", "Otake Hideo", "Yi Ch'ang-ho 2005+","Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
45 players_ignore = [ "Yi Ch'ang-ho 2004-", "Yi Ch'ang-ho 2005+" ]#,"Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
46 players_all = [ p for p in player_vector.keys() if p not in players_ignore ]
48 ### Object creating input vector when called
49 print "Creating input vector generator from main pat file:", main_pat_filename
50 i = InputVectorGenerator(main_pat_filename, num_features)
52 # Create list of input vectors
53 input_vectors = []
54 for name in players_all:
55 input_vectors += [i(Data.pat_files_folder + name)]
57 #print '"%s"'%(players_all[2],)
58 #print input_vectors[2]
60 if len(input_vectors) == 0:
61 print >>sys.stderr, "No reference vectors."
62 sys.exit()
64 ### PCA example usage
65 # Change this to False, if you do not want to use PCA
66 use_pca = True
67 if use_pca:
68 # Create PCA object, trained on input_vectors
69 print >>sys.stderr, "Running PCA."
70 pca = PCA(input_vectors, reduce=True)
71 # Perform a PCA on input vectors
72 input_vectors = pca.process_list_of_vectors(input_vectors)
73 # Creates a Composed object that first generates an input vector
74 # and then performs a PCA analysis on it.
75 i = Compose(i, pca)
77 ### n/4-fold cross validation
78 #bounds = random.sample(range(1,len(players_all)), len(players_all) / 10 )
79 bounds=[]
80 for x in range(1,len(players_all)/4):
81 bounds += [4*x for _ in [1] if 4*x < len(players_all)]
82 if not bounds:
83 print >>sys.stderr, "Pop too small."
84 sys.exit()
85 bounds.sort()
87 def norm(vec):
88 return [ (x - 1) / 4.5 - 1.0 for x in vec ]
89 def revnorm(vec):
90 return [ (x + 1) * 4.5 + 1.0 for x in vec ]
92 def rand_vect(k):
93 return list(2.0*numpy.random.random(k)-1.0)
95 print >>sys.stderr, "Running Cross-validation."
96 print
97 errs=[ [] for _ in xrange(len(players_all)) ]
98 es=[]
99 esps=[[],[],[],[]]
100 sentinel=len(players_all)
101 number_runs = 1
102 for _ in xrange(number_runs):
103 prev=0
104 for b in bounds+[sentinel]:
105 validation_set = range(prev, b)
106 reference_set = range(0,prev) + range(b,sentinel)
107 if False:
108 print "Reference set :",
109 for pr in range(0, prev):
110 print "R",
111 for pr in validation_set:
112 print "_",
113 for pr in range(b, sentinel):
114 print "R",
115 print
116 prev = b
117 if typ == 'nn':
118 data =[]
119 for index in reference_set:
120 data.append( (input_vectors[index], norm(player_vector[players_all[index]])) )
123 ### We can enlarge the data set by adding linear combinations of input and output vectors
124 use_lin_combinations = False
125 if use_lin_combinations:
126 data += Combinator().combine(data)
128 print_set_to_file(data,'nn_cross.data')
130 nn = NeuralNet('nn_cross.data')
131 # Create list of output vectors using weighted kNN algorithm approximating output_vector
132 output_vectors = [ nn(input_vectors[index]) for index in validation_set ]
133 nn.close()
134 elif typ == 'knn':
135 ### Object creating output vector when called;
136 ref_dict = {}
137 for index in reference_set:
138 ref_dict[tuple(input_vectors[index])] = norm(player_vector[players_all[index]])
141 # best pro InputVectorGenerator rescale=Rescale
142 oknn = KNNOutputVectorGenerator(ref_dict, k=4, weight_param=0.8)
144 # Create list of output vectors using weighted kNN algorithm approximating output_vector
145 output_vectors = [ oknn(input_vectors[index]) for index in validation_set ]
146 elif typ == 'joint_nn_knn':
147 data =[]
148 ref_dict = {}
149 for index in reference_set:
150 data.append( (input_vectors[index], norm(player_vector[players_all[index]])) )
151 ref_dict[tuple(input_vectors[index])] = norm(player_vector[players_all[index]])
153 print_set_to_file(data,'nn_cross.data')
154 nn = NeuralNet('nn_cross.data')
155 # Create list of output vectors using weighted kNN algorithm approximating output_vector
156 ov_3 = [ nn(input_vectors[index]) for index in validation_set ]
158 nn.close()
160 oknn = KNNOutputVectorGenerator(ref_dict, k=4, weight_param=0.8)
161 ov_1 = [ oknn(input_vectors[index]) for index in validation_set ]
163 oknn = KNNOutputVectorGenerator(ref_dict, k=3, weight_param=0.8)
164 ov_2 = [ oknn(input_vectors[index]) for index in validation_set ]
166 oknn = KNNOutputVectorGenerator(ref_dict, k=2, weight_param=0.8)
167 ov_4 = [ oknn(input_vectors[index]) for index in validation_set ]
169 output_vectors = [ [a[0],b[1],c[2],d[3]] for a,b,c,d in zip(ov_1, ov_2, ov_3, ov_4)]
170 elif typ == 'rnd':
171 output_vectors = [ rand_vect(4) for index in validation_set ]
173 output_vectors = [ revnorm(x) for x in output_vectors ]
174 desired_vectors = [ player_vector[players_all[index]] for index in validation_set ]
175 #desired_vectors = [ norm(player_vector[players_all[index]]) for index in validation_set ]
177 if False:
178 for vec_set,text in [(output_vectors, "Output: "), (desired_vectors, "Desired:")]:
179 print text,
180 for o in vec_set:
181 for x in o:
182 print "%02.3f"%(x,),
183 print "; ",
184 print
186 for num1, (o,d) in zip(validation_set, zip(output_vectors, desired_vectors)):
187 err = 0.0
188 for num,(x,y) in enumerate(zip(o,d)):
189 e = (1.0*x-1.0*y)**2
190 esps[num]+=[e]
191 es += [e]
192 err += e
193 errs[num1] += [err]
195 if typ == 'joint_nn_knn':
196 print "Joint classifier:"
197 elif typ == 'knn':
198 print "k-NN classifier:"
199 elif typ == 'nn':
200 print "Neural network classifier:"
201 elif typ == 'rnd':
202 print "Random classifier:"
203 #print "Total square err: %2.3f"%( sum(errs) / number_runs,)
204 mar = numpy.array(errs)
205 mean = mar.mean()
206 print "Mean square err per player: " + "%2.3f ( = sd %2.3f) "%(mean, sqrt(mean))
207 mean = numpy.array(es).mean()
208 print "Mean square err per style: " + "%2.3f ( = sd %2.3f) "%(mean, sqrt(mean))
209 for num, style in enumerate(esps):
210 mean = numpy.array(style).mean()
211 print "Style %1d : %2.3f ( = sd %2.3f)"%(num+1, mean, sqrt(mean))
212 print
213 print "Players sorted by mean square error:"
214 p = zip([numpy.array(errs[p]).mean() for p in xrange(len(players_all)) ], players_all)
215 p.sort()
216 for err, name in p:
217 print "%2.3f %s"%(err,name)
218 #print "%s"%(name,)
219 sys.exit()