From beca8098ad92f3b1e1a84b62fb75629685318c6f Mon Sep 17 00:00:00 2001 From: hellboy Date: Sat, 27 Feb 2010 17:06:59 +0100 Subject: [PATCH] knn.py ADDED: k-NearestNeighbour algorithm for vector classification. --- knn.py | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100755 knn.py diff --git a/knn.py b/knn.py new file mode 100755 index 0000000..8c2e923 --- /dev/null +++ b/knn.py @@ -0,0 +1,116 @@ +#!/usr/bin/python +import sys +from gostyle import * +from math import sqrt + +from data_about_players import Data + +class KNNOutputVectorGenerator(VectorGenerator): + """ k-NearestNeighbour output vector generator.""" + def __init__(self, ref_dict, k=2): + """ + ref_dict is a dictionary of refence input/output vectors. + e.g. ref_dict= { (1.0,2.0):(9.0,16.0,21.0) + """ + self.ref_dict = ref_dict + self.k = k + def __call__(self, player_vector): + distance=[] + for ref_vec in ref_dict.keys(): + distance.append((self.distance(ref_vec, player_vector), ref_vec)) + distance.sort() + + #print "DBG :" + #print distance + + ref_output_vecs = [ self.ref_dict[b] for a,b in distance[:self.k] ] + coefs = [ self.weight_fc(a) for a,b in distance[:self.k] ] + + return linear_combination(ref_output_vecs, coefs) + def weight_fc(self, distance): + return 0.2 ** distance + def distance(self, vec1, vec2): + if len(vec1) != len(vec2): + raise RuntimeError("Dimensions of vectors mismatch.") + return sqrt(sum([ (float(a) - float(b))**2 for a,b in zip(vec1,vec2)])) + + +if __name__ == '__main__': + main_pat_filename = Data.main_pat_filename + filename_play_other = 'knn_other.data' + filename_play_ref = 'knn_ref.data' + filename_play_ref_orig = 'knn_ref_orig.data' + num_features = 300 + players_all = Data.players_all + players_ref = Data.player_vector.keys() + players_other = [ x for x in players_all if x not in players_ref ] + + ### Object creating input vector when called + print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename + i = InputVectorGenerator(main_pat_filename, num_features) + + # Create list of input vectors + input_vectors_ref = [] + for name in players_ref: + input_vectors_ref += [i(Data.pat_files_folder + name)] + input_vectors_other = [] + for name in players_other: + input_vectors_other += [i(Data.pat_files_folder + name)] + + if len(input_vectors_ref) == 0: + print >>sys.stderr, "No reference vectors." + sys.exit() + if len(input_vectors_other) == 0: + print >>sys.stderr, "No vectors to process." + sys.exit() + + ### PCA example usage + # Change this to False, if you do not want to use PCA + use_pca = False + if use_pca: + # Create PCA object, trained on input_vectors + print >>sys.stderr, "Running PCA." + pca = PCA(input_vectors_ref + input_vectors_other, reduce=True) + # Perform a PCA on input vectors + input_vectors_ref = pca.process_list_of_vectors(input_vectors_ref) + input_vectors_other = pca.process_list_of_vectors(input_vectors_other) + # Creates a Composed object that first generates an input vector + # and then performs a PCA analysis on it. + i = Compose(i, pca) + + ### Object creating output vector when called; + ref_dict = {} + for name, input_vector in zip(players_ref, input_vectors_ref): + ref_dict[tuple(input_vector)] = Data.player_vector[name] + + oknn = KNNOutputVectorGenerator(ref_dict, k=5) + + + # Create list of output vectors using weighted kNN algorithm approximating output_vector + output_vectors_other = [ oknn(input_vector) for input_vector in input_vectors_other ] + output_vectors_ref = [ oknn(input_vector) for input_vector in input_vectors_ref ] + + def print_me( names, vecs, where): + if len(names) != len(vecs): + raise RuntimeError("Dimensions of vectors mismatch.") + + f = open(where, 'w') + print >>sys.stderr, "Saving output_vectors to file:", where + + for i in xrange(len(names)): + name_to_print = '_'.join(names[i].split()) + print_vector([name_to_print] + list(vecs[i]), f) + + f.close() + + print_me(players_ref, output_vectors_ref, filename_play_ref) + print_me(players_other, output_vectors_other, filename_play_other) + + f = open(filename_play_ref_orig, 'w') + print >>sys.stderr, "Saving output_vectors to file:", filename_play_ref_orig + for name, vec in Data.player_vector.items(): + name_to_print = '_'.join(name.split()) + print_vector([name_to_print]+vec, f) + + f.close() + -- 2.11.4.GIT