9 from data_about_players
import Data
10 from cross_val
import Shuffled
, CrossValidation
13 import os
, random
, shutil
14 ranks
= os
.listdir(raw
)
17 plays
= os
.listdir(raw
+ '/'+ rank
)
19 tot
[raw
+ '/' + rank
+ '/' + play
] = rank
22 def param_f_for_knn(x
):
23 r
= 0.0426735 * x
+ 6.82347
27 if __name__
== '__main__':
37 player_vector
= Data
.strength_linear_vector
38 # players_ignore = [ "Yi Ch'ang-ho 2004-" ]#, "Fujisawa Hideyuki","Yuki Satoshi", "Otake Hideo", "Yi Ch'ang-ho 2005+","Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
39 #players_ignore = [ ]#,"Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
40 #players_all = [ p for p in player_vector.keys() if p not in players_ignore ]
42 root_dir
= '../pdb-gtl/'
44 input_dir
= root_dir
+ 'rawpat_files_merged_test'
45 train_dict
= list_dir(input_dir
)
47 main_pat_filename
= root_dir
+ 'all.pat'
48 ### Object creating input vector when called
49 print "Creating input vector generator from main pat file:", main_pat_filename
50 i
= InputVectorGenerator(main_pat_filename
, num_features
)
53 # Create list of input vectors
56 for f
, rank
in train_dict
.items():
58 input_vectors
+= [i(f
)]
62 assert rank
in player_vector
64 if len(input_vectors
) == 0:
65 print >>sys
.stderr
, "No vectors."
69 # Change this to False, if you do not want to use PCA
72 # Create PCA object, trained on input_vectors
73 print >>sys
.stderr
, "Running PCA."
74 pca
= PCA(input_vectors
, reduce=True)
75 # Perform a PCA on input vectors
76 input_vectors
= pca
.process_list_of_vectors(input_vectors
)
77 # Creates a Composed object that first generates an input vector
78 # and then performs a PCA analysis on it.
82 return list(2.0*numpy
.random
.random(k
)-1.0)
85 return [ (1-x
) * 16.5 - 3.0 for x
in vec
]
87 print >>sys
.stderr
, "Running Cross-validation."
90 errs
=[ [] for _
in xrange(len(players_all
)) ]
94 for _
in xrange(number_runs
):
95 num_play
= len(players_all
)
96 num_fold
= num_play
/10
97 #print len(players_all)/4, "-fold validation from:", len(players_all)
98 print num_fold
, "-fold validation from population of:", num_play
100 for reference_set
, validation_set
in Shuffled(CrossValidation
)(range(num_play
), num_fold
):# len(players_all)/4):
104 print len(reference_set
), len(validation_set
)
107 for index
in reference_set
:
108 data
.append( (input_vectors
[index
], player_vector
[players_all
[index
]]) )
110 # print [x for y,x in data]
112 ### We can enlarge the data set by adding linear combinations of input and output vectors
113 use_lin_combinations
= False
114 if use_lin_combinations
:
115 data
+= Combinator().combine(data
)
117 filename
= 'nn_cross.data'+str(os
.getpid())
118 print_set_to_file(data
,filename
)
120 nn
= NeuralNet(filename
, neurons
=35, desired_error
=sys
.argv
[1] if len(sys
.argv
) ==2 else 0.0005 )
121 # Create list of output vectors using weighted kNN algorithm approximating output_vector
122 output_vectors
= [ nn(input_vectors
[index
]) for index
in validation_set
]
125 ### Object creating output vector when called;
127 for index
in reference_set
:
128 ref_dict
[tuple(input_vectors
[index
])] = player_vector
[players_all
[index
]]
131 # best pro InputVectorGenerator rescale=Rescale
132 oknn
= KNNOutputVectorGenerator(ref_dict
, k
=4, weight_param
=0.9, dist_mult
=param_f_for_knn(124))
133 #oknn = KNNOutputVectorGenerator(ref_dict, k=3, weight_param=0.99, dist_mult=1400)
135 # Create list of output vectors using weighted kNN algorithm approximating output_vector
136 output_vectors
= [ oknn(input_vectors
[index
]) for index
in validation_set
]
138 output_vectors
= [ rand_vect(4) for index
in validation_set
]
140 output_vectors
= [ revnorm(ov
) for ov
in output_vectors
]
141 desired_vectors
= [ revnorm(player_vector
[players_all
[index
]]) for index
in validation_set
]
144 for vec_set
,text
in [(output_vectors
, "Output: "), (desired_vectors
, "Desired:")]:
152 for num1
, (o
,d
) in zip(validation_set
, zip(output_vectors
, desired_vectors
)):
160 if typ
== 'joint_nn_knn':
161 print "Joint classifier:"
163 print "k-NN classifier:"
165 print "Neural network classifier:"
167 print "Random classifier:"
169 #print "Total square err: %2.3f"%( sum(errs) / number_runs,)
170 # mar = numpy.array(errs)
172 # print "Mean square err per player: " + "%2.3f ( = sd %2.3f) "%(mean, sqrt(mean))
174 mean
= numpy
.array(es
).mean()
175 print "Mean square err: " + "%2.3f ( = sd %2.3f) "%(mean
, sqrt(mean
))
177 #mean = numpy.array(es).mean()
178 #print "%2.3f &"%(mean),
179 #print "%2.3f \\\\\\hline"%(11.776 / mean)
182 #print "Players sorted by mean square error:"
183 #p = zip([numpy.array(errs[p]).mean() for p in xrange(len(players_all)) ], players_all)
186 # print "%2.3f %s"%(err,name)
187 # #print "%s"%(name,)