tex: PCA2 Patterns, extra footnotes
[gostyle.git] / knn_cross.py
blobdc6f7e6a97285d6e3c232be272eac4da1c540784
1 #!/usr/bin/python
2 import sys
3 import subprocess
4 import os
5 from gostyle import *
6 from math import sqrt
7 import numpy
9 from data_about_players import Data
11 from knn import KNNOutputVectorGenerator
13 class NeuralNet:
14 def __init__( self, filename ):
15 s = "./gnet/gnet_train -l 3 -n 30 -p 30 -e 0.0003 -o gonet.net ./"+filename
16 args = s.split()
17 ret = subprocess.call(args)
18 print ret
19 s = "./gnet/gnet_run gonet.net"
20 args = s.split()
21 self.p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
22 def __call__(self, vector):
23 self.p.stdin.write(' '.join([str(a) for a in vector]) + '\n')
24 a = self.p.stdout.readline()
25 return [ float(num) for num in a.split()]
27 if __name__ == '__main__':
28 main_pat_filename = Data.main_pat_filename
29 num_features = 400
30 #k = 5
31 player_vector = Data.questionare_total
32 # players_ignore = [ "Yi Ch'ang-ho 2004-" ]#, "Fujisawa Hideyuki","Yuki Satoshi", "Otake Hideo", "Yi Ch'ang-ho 2005+","Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
33 players_ignore = [ "Yi Ch'ang-ho 2004-", "Yi Ch'ang-ho 2005+" ]#,"Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
34 players_all = [ p for p in player_vector.keys() if p not in players_ignore ]
36 ### Object creating input vector when called
37 print "Creating input vector generator from main pat file:", main_pat_filename
38 print
39 i = InputVectorGenerator(main_pat_filename, num_features)
41 # Create list of input vectors
42 input_vectors = []
43 for name in players_all:
44 input_vectors += [i(Data.pat_files_folder + name)]
46 #print '"%s"'%(players_all[2],)
47 #print input_vectors[2]
49 if len(input_vectors) == 0:
50 print >>sys.stderr, "No reference vectors."
51 sys.exit()
53 ### PCA example usage
54 # Change this to False, if you do not want to use PCA
55 use_pca = True
56 if use_pca:
57 # Create PCA object, trained on input_vectors
58 print >>sys.stderr, "Running PCA."
59 pca = PCA(input_vectors, reduce=True)
60 # Perform a PCA on input vectors
61 input_vectors = pca.process_list_of_vectors(input_vectors)
62 # Creates a Composed object that first generates an input vector
63 # and then performs a PCA analysis on it.
64 i = Compose(i, pca)
66 ### n/4-fold cross validation
67 #bounds = random.sample(range(1,len(players_all)), len(players_all) / 10 )
68 bounds=[]
69 for x in range(1,len(players_all)/4):
70 bounds += [4*x for _ in [1] if 4*x < len(players_all)]
71 if not bounds:
72 print >>sys.stderr, "Pop too small."
73 sys.exit()
74 bounds.sort()
76 def norm(vec):
77 return [ x / 5.0 - 1.0 for x in vec ]
79 def rand_vect(k):
80 return list(2.0*numpy.random.random(k)-1.0)
82 errs=[ [] for _ in xrange(len(players_all)) ]
83 es=[]
84 esps=[[],[],[],[]]
85 sentinel=len(players_all)
86 number_runs = 30
87 for _ in xrange(number_runs):
88 prev=0
89 for b in bounds+[sentinel]:
90 validation_set = range(prev, b)
91 reference_set = range(0,prev) + range(b,sentinel)
92 if False:
93 print "Reference set :",
94 for pr in range(0, prev):
95 print "R",
96 for pr in validation_set:
97 print "_",
98 for pr in range(b, sentinel):
99 print "R",
100 print
101 prev = b
102 typ = 'rnd'
103 if typ == 'nn':
104 data =[]
105 for index in reference_set:
106 data.append( (input_vectors[index], norm(player_vector[players_all[index]])) )
109 ### We can enlarge the data set by adding linear combinations of input and output vectors
110 use_lin_combinations = False
111 if use_lin_combinations:
112 data += Combinator().combine(data)
114 print_set_to_file(data,'nn_cross.data')
116 nn = NeuralNet('nn_cross.data')
117 # Create list of output vectors using weighted kNN algorithm approximating output_vector
118 output_vectors = [ nn(input_vectors[index]) for index in validation_set ]
119 elif typ == 'knn':
120 ### Object creating output vector when called;
121 ref_dict = {}
122 for index in reference_set:
123 ref_dict[tuple(input_vectors[index])] = norm(player_vector[players_all[index]])
125 #oknn = KNNOutputVectorGenerator(ref_dict, k=5, weight_param=0.799)
126 oknn = KNNOutputVectorGenerator(ref_dict, k=4, weight_param=0.8)
128 # Create list of output vectors using weighted kNN algorithm approximating output_vector
129 output_vectors = [ oknn(input_vectors[index]) for index in validation_set ]
130 elif typ == 'rnd':
131 output_vectors = [ rand_vect(4) for index in validation_set ]
133 desired_vectors = [ norm(player_vector[players_all[index]]) for index in validation_set ]
135 if False:
136 for vec_set,text in [(output_vectors, "Output: "), (desired_vectors, "Desired:")]:
137 print text,
138 for o in vec_set:
139 for x in o:
140 print "%02.3f"%(x,),
141 print "; ",
142 print
144 for num1, (o,d) in zip(validation_set, zip(output_vectors, desired_vectors)):
145 err = 0.0
146 for num,(x,y) in enumerate(zip(o,d)):
147 e = (1.0*x-1.0*y)**2
148 esps[num]+=[e]
149 es += [e]
150 err += e
151 errs[num1] += [err]
153 #print "Total square err: %2.3f"%( sum(errs) / number_runs,)
154 mar = numpy.array(errs)
155 mean = mar.mean()
156 print "Mean square err per player: " + "%2.3f ( = sd %2.3f) "%(mean, sqrt(mean))
157 mean = numpy.array(es).mean()
158 print "Mean square err per style: " + "%2.3f ( = sd %2.3f) "%(mean, sqrt(mean))
159 for num, style in enumerate(esps):
160 mean = numpy.array(style).mean()
161 print "%2.3f "%(mean,)
162 print
163 print "Players sorted by mean square error:"
164 p = zip([numpy.array(errs[p]).mean() for p in xrange(len(players_all)) ], players_all)
165 p.sort()
166 for err, name in p:
167 print "%2.3f %s"%(err,name)
168 #print "%s"%(name,)
169 sys.exit()