tex: Random tweaks
[gostyle.git] / nn_cross.py
blobca104fb6579933408d7ccfe51444d1d16f785569
1 #!/usr/bin/python
2 import sys
3 import os
4 import subprocess
5 from gostyle import *
6 from math import sqrt
7 import numpy
9 from data_about_players import Data
11 class NeuralNet:
12 def __init__( self, filename ):
13 s = "./gnet/gnet_train -l 3 -n 30 -p 1000 -e 0.0005 -o gonet.net ./"+filename
14 args = s.split()
15 ret = subprocess.call(args)
16 print ret
17 s = "./gnet/gnet_run gonet.net"
18 args = s.split()
19 self.p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
20 def __call__(self, vector):
21 self.p.stdin.write(' '.join([str(a) for a in vector]) + '\n')
22 a = self.p.stdout.readline()
23 return [ float(num) for num in a.split()]
25 if __name__ == '__main__':
26 main_pat_filename = Data.main_pat_filename
27 num_features = 400
28 #k = 5
29 player_vector = Data.questionare_total
30 players_ignore = [ "Yi Ch'ang-ho 2004-" ]#, "Fujisawa Hideyuki","Yuki Satoshi", "Otake Hideo", "Yi Ch'ang-ho 2005+","Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
31 players_all = [ p for p in player_vector.keys() if p not in players_ignore ]
33 ### Object creating input vector when called
34 print "Creating input vector generator from main pat file:", main_pat_filename
35 print
36 i = InputVectorGenerator(main_pat_filename, num_features)
38 # Create list of input vectors
39 input_vectors = []
40 for name in players_all:
41 input_vectors += [i(Data.pat_files_folder + name)]
43 #print '"%s"'%(players_all[2],)
44 #print input_vectors[2]
46 if len(input_vectors) == 0:
47 print >>sys.stderr, "No reference vectors."
48 sys.exit()
50 ### PCA example usage
51 # Change this to False, if you do not want to use PCA
52 use_pca = True
53 if use_pca:
54 # Create PCA object, trained on input_vectors
55 print >>sys.stderr, "Running PCA."
56 pca = PCA(input_vectors, reduce=True)
57 # Perform a PCA on input vectors
58 input_vectors = pca.process_list_of_vectors(input_vectors)
59 # Creates a Composed object that first generates an input vector
60 # and then performs a PCA analysis on it.
61 i = Compose(i, pca)
63 ### n/4-fold cross validation
64 #bounds = random.sample(range(1,len(players_all)), len(players_all) / 10 )
65 bounds=[]
66 for x in range(1,len(players_all)/4):
67 bounds += [4*x for _ in [1] if 4*x < len(players_all)]
68 if not bounds:
69 print >>sys.stderr, "Pop too small."
70 sys.exit()
71 bounds.sort()
73 r = Rescale(-1.0,1.0)
74 errs=[]
75 es=[]
76 sentinel=len(players_all)
77 prev=0
78 for b in bounds+[sentinel]:
79 validation_set = range(prev, b)
80 reference_set = range(0,prev) + range(b,sentinel)
81 print "Reference set :",
82 for pr in range(0, prev):
83 print "R",
84 for pr in validation_set:
85 print "_",
86 for pr in range(b, sentinel):
87 print "R",
88 print
89 prev = b
91 ### Object creating output vector when called;
92 data =[]
93 for index in reference_set:
94 data.append( (input_vectors[index], r(player_vector[players_all[index]])) )
97 ### We can enlarge the data set by adding linear combinations of input and output vectors
98 use_lin_combinations = True
99 if use_lin_combinations:
100 data += Combinator().combine(data)
102 print_set_to_file(data,'nn_cross.data')
104 nn = NeuralNet('nn_cross.data')
105 # Create list of output vectors using weighted kNN algorithm approximating output_vector
106 def rand_vect(k):
107 return list(10*numpy.random.random(k))
108 output_vectors = [ nn(input_vectors[index]) for index in validation_set ]
109 #output_vectors = [ r(rand_vect(4)) for index in validation_set ]
110 desired_vectors = [ r(player_vector[players_all[index]]) for index in validation_set ]
112 for vec_set,text in [(output_vectors, "Output: "), (desired_vectors, "Desired:")]:
113 print text,
114 for o in vec_set:
115 for x in o:
116 print "%02.3f"%(x,),
117 print "; ",
118 print
120 for o,d in zip(output_vectors, desired_vectors):
121 err = 0.0
122 for x,y in zip(o,d):
123 e = (1.0*x-1.0*y)**2
124 es += [e]
125 err += e
126 errs += [err]
128 #for e in errs[-4:]:
129 # print "%2.3f"%(e,),
130 print
133 print "Total square err: %2.3f"%( sum(errs),)
134 mean = numpy.array(errs).mean()
135 print "Mean square err per player: " + u"%2.3f ( = sd \u00B1 %2.3f) "%(mean, sqrt(mean))
136 mean = numpy.array(es).mean()
137 print "Mean square err per style: " + u"%2.3f ( = sd \u00B1 %2.3f) "%(mean, sqrt(mean))
138 print
139 print "Players sorted by mean square error:"
140 p = zip(errs, players_all)
141 p.sort()
142 for err, name in p:
143 print "%2.3f %s"%(err,name)
144 #print "%s"%(name,)
145 sys.exit()