7 from data_about_players
import Data
9 class KNNOutputVectorGenerator(VectorGenerator
):
10 """ k-NearestNeighbour output vector generator."""
11 def __init__(self
, ref_dict
, k
=5, weight_param
=0.8, dist_mult
=10):
13 ref_dict is a dictionary of refence input/output vectors.
14 e.g. ref_dict= { (1.0,2.0):(9.0,16.0,21.0)
16 self
.ref_dict
= ref_dict
18 self
.weigth_param
= weight_param
19 self
.dist_mult
= dist_mult
20 def __call__(self
, player_vector
):
22 for ref_vec
in self
.ref_dict
.keys():
23 distance
.append((self
.distance(ref_vec
, player_vector
), ref_vec
))
27 # print "%2.3f"%(float(p),),
29 ref_output_vecs
= [ self
.ref_dict
[b
] for a
,b
in distance
[:self
.k
] ]
30 coefs
= [ self
.weight_fc(a
) for a
,b
in distance
[:self
.k
] ]
32 return linear_combination(ref_output_vecs
, coefs
)
33 def weight_fc(self
, distance
):
34 return self
.weigth_param
** (distance
)
35 def distance(self
, vec1
, vec2
):
36 if len(vec1
) != len(vec2
):
37 raise RuntimeError("Dimensions of vectors mismatch.")
38 ### the 10* multiplicative constant is empirically determined for correct scaling
39 return self
.dist_mult
* sqrt(sum([ (float(a
) - float(b
))**2 for a
,b
in zip(vec1
,vec2
)]))
42 if __name__
== '__main__':
43 root_dir
= '../pdb-gtl/'
44 main_pat_filename
= root_dir
+ 'all.pat'
45 player_vector
= Data
.strength_linear_vector
49 ### Object creating input vector when called
50 print >>sys
.stderr
, "Creating input vector generator from main pat file:", main_pat_filename
51 i
= InputVectorGenerator(main_pat_filename
, num_features
)#, rescale=LogRescale)
53 #raw = root_dir + 'testpat_files'
55 import os
, random
, shutil
56 ranks
= os
.listdir(raw
)
59 plays
= os
.listdir(raw
+ '/'+ rank
)
61 tot
[raw
+ '/' + rank
+ '/' + play
] = rank
64 train_set_dir
= root_dir
+ 'rawpat_files_merged'
65 test_set_dir
= root_dir
+ 'rawpat_files_merged_test'
66 train_dict
= list_dir(train_set_dir
)
67 test_dict
= list_dir(test_set_dir
)
70 input_vectors_train
= []
71 for f
, rank
in train_dict
.items():
73 input_vectors_train
+= [i(f
)]
78 input_vectors_test
= []
81 for f
, rank
in test_dict
.items():
83 input_vectors_test
+= [i(f
)]
91 if len(input_vectors_train
) == 0:
92 print >>sys
.stderr
, "No reference vectors."
94 if len(input_vectors_test
) == 0:
95 print >>sys
.stderr
, "No vectors to process."
99 # Change this to False, if you do not want to use PCA
102 # Create PCA object, trained on input_vectors
103 print >>sys
.stderr
, "Running PCA."
104 pca
= PCA(input_vectors_train
+ input_vectors_test
, reduce=True)
105 # Perform a PCA on input vectors
106 input_vectors_train
= pca
.process_list_of_vectors(input_vectors_train
)
107 input_vectors_test
= pca
.process_list_of_vectors(input_vectors_test
)
108 # Creates a Composed object that first generates an input vector
109 # and then performs a PCA analysis on it.
112 ### Object creating output vector when called;
114 for name
, input_vector
in zip(train_pl
, input_vectors_train
):
115 ref_dict
[tuple(input_vector
)] = player_vector
[name
]
118 oknn
= KNNOutputVectorGenerator(ref_dict
, k
=4, weight_param
=0.9, dist_mult
=10)
119 #oknn = KNNOutputVectorGenerator(ref_dict, k=5, weight_param=0.2, dist_mult=10)
122 return list(numpy
.random
.random(k
))
123 # Create list of output vectors using weighted kNN algorithm approximating output_vector
124 output_vectors
= [ oknn(input_vector
) for input_vector
in input_vectors_test
]
125 #output_vectors= [ rand_vect(1) for _ in input_vectors_test ]
126 desired_vectors
= [ player_vector
[rank
] for rank
in test_pl
]
129 for f
, out
, des
in zip(test_files
, output_vectors
, desired_vectors
):
132 print f
, "%2.3f ; %2.3f"%(out
[0], des
[0])
135 diff
= [ abs(x
[0] - y
[0]) for x
,y
in zip(output_vectors
,desired_vectors
) ]
136 zips
= zip(diff
, test_files
)
139 print a
, " %2.3f"%(diff
,)
142 for o
,d
in zip(output_vectors
, desired_vectors
):
149 mean
= numpy
.array(errs
).mean()
150 print "Mean square err: " + "%2.3f ( = sd %2.3f) "%(mean
, sqrt(mean
))