5 from itertools
import izip
8 from data_about_players
import Data
10 class KNNOutputVectorGenerator(VectorGenerator
):
11 """ k-NearestNeighbour output vector generator."""
12 def __init__(self
, ref_dict
, k
=5, weight_param
=0.8, dist_mult
=10):
14 ref_dict is a dictionary of refence input/output vectors.
15 e.g. ref_dict= { (1.0,2.0):(9.0,16.0,21.0)
17 self
.ref_dict
= ref_dict
19 self
.weigth_param
= weight_param
20 self
.dist_mult
= dist_mult
21 def __call__(self
, player_vector
):
23 for ref_vec
in self
.ref_dict
.keys():
24 distance
.append((self
.distance(ref_vec
, player_vector
), ref_vec
))
28 # print "%2.3f"%(float(p),),
30 ref_output_vecs
= [ self
.ref_dict
[b
] for a
,b
in distance
[:self
.k
] ]
31 coefs
= [ self
.weight_fc(a
) for a
,b
in distance
[:self
.k
] ]
33 return linear_combination(ref_output_vecs
, coefs
)
34 def weight_fc(self
, distance
):
35 return self
.weigth_param
** (distance
)
36 def distance(self
, vec1
, vec2
):
37 if len(vec1
) != len(vec2
):
38 raise RuntimeError("Dimensions of vectors mismatch.")
39 ### the 10* multiplicative constant is empirically determined for correct scaling
40 return self
.dist_mult
* numpy
.sqrt(sum( (a
- b
)**2 for a
,b
in izip(vec1
,vec2
)))
43 if __name__
== '__main__':
44 root_dir
= '../pdb-gtl/'
45 main_pat_filename
= root_dir
+ 'all.pat'
46 player_vector
= Data
.strength_linear_vector
49 ### Object creating input vector when called
50 print >>sys
.stderr
, "Creating input vector generator from main pat file:", main_pat_filename
51 i
= InputVectorGenerator(main_pat_filename
, num_features
)#, rescale=LogRescale)
53 #raw = root_dir + 'testpat_files'
55 import os
, random
, shutil
56 ranks
= os
.listdir(raw
)
59 plays
= os
.listdir(raw
+ '/'+ rank
)
61 tot
[raw
+ '/' + rank
+ '/' + play
] = rank
64 train_set_dir
= root_dir
+ 'train_set'
65 test_set_dir
= root_dir
+ 'rawpat_files_merged_test'
66 train_dict
= list_dir(train_set_dir
)
67 test_dict
= list_dir(test_set_dir
)
70 input_vectors_train
= []
71 for f
, rank
in train_dict
.items():
73 input_vectors_train
+= [i(f
)]
78 input_vectors_test
= []
81 for f
, rank
in test_dict
.items():
83 input_vectors_test
+= [i(f
)]
91 #if len(input_vectors_train) == 0:
92 # print >>sys.stderr, "No reference vectors."
94 if len(input_vectors_test
) == 0:
95 print >>sys
.stderr
, "No vectors to process."
99 # Change this to False, if you do not want to use PCA
102 # Create PCA object, trained on input_vectors
103 print >>sys
.stderr
, "Running PCA."
104 pca
= PCA(input_vectors_train
+ input_vectors_test
, reduce=True)
105 # Perform a PCA on input vectors
106 if input_vectors_train
:
107 input_vectors_train
= pca
.process_list_of_vectors(input_vectors_train
)
108 if input_vectors_test
:
109 input_vectors_test
= pca
.process_list_of_vectors(input_vectors_test
)
110 # Creates a Composed object that first generates an input vector
111 # and then performs a PCA analysis on it.
114 ### Object creating output vector when called;
116 for name
, input_vector
in zip(train_pl
, input_vectors_train
):
117 ref_dict
[tuple(input_vector
)] = player_vector
[name
]
119 print "creating the knn"
121 oknn
= KNNOutputVectorGenerator(ref_dict
, k
=4, weight_param
=0.9, dist_mult
=26.4)
123 #oknn = KNNOutputVectorGenerator(ref_dict, k=5, weight_param=0.2, dist_mult=10)
126 return [ (1-x
) * 16.5 - 3.0 for x
in vec
]
129 return list(2.0*numpy
.random
.random(k
)-1.0)
132 # Create list of output vectors using weighted kNN algorithm approximating output_vector
133 output_vectors
= [ revnorm(oknn(input_vector
)) for input_vector
in input_vectors_test
]
134 #output_vectors= [ revnorm(rand_vect(1)) for _ in input_vectors_test ]
135 desired_vectors
= [ revnorm(player_vector
[rank
]) for rank
in test_pl
]
138 for f
, out
, des
in zip(test_files
, output_vectors
, desired_vectors
):
141 print f
, "%2.3f ; %2.3f"%(out
[0], des
[0])
144 diff
= [ abs(x
[0] - y
[0]) for x
,y
in zip(output_vectors
,desired_vectors
) ]
145 zips
= zip(diff
, test_files
)
148 print a
, " %2.3f"%(diff
,)
151 for o
,d
in zip(output_vectors
, desired_vectors
):
158 mean
= numpy
.array(errs
).mean()
159 print "Mean square err: " + "%2.3f ( = sd %2.3f) "%(mean
, sqrt(mean
))