tex: add fourth revision macro \rvvvv and mark the changes
[gostyle.git] / knn.py
blob43d0c875e4d1bb4fa47fa3ba9f423529f5b8c7ba
1 #!/usr/bin/python
2 import sys
3 from gostyle import *
4 from math import sqrt
6 from data_about_players import Data
8 if __name__ == '__main__':
9 main_pat_filename = Data.main_pat_filename
10 filename_play_other = 'knn_other.data'
11 filename_play_ref = 'knn_ref.data'
12 filename_play_ref_orig = 'knn_ref_orig.data'
13 num_features = 300
14 k = 4
15 player_vector = Data.questionare_total
16 players_ignore = [ "Yi Ch'ang-ho 2004-", "Yi Ch'ang-ho 2005+"] #,"Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
18 players_all = [ p for p in Data.players_all if p not in players_ignore ]
19 players_ref = [ p for p in player_vector.keys() if p not in players_ignore ]
20 players_other = [ x for x in players_all if x not in players_ref ]
22 ### Object creating input vector when called
23 print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename
24 i = InputVectorGenerator(main_pat_filename, num_features)
26 # Create list of input vectors
27 input_vectors_ref = []
28 for name in players_ref:
29 input_vectors_ref += [i(Data.pat_files_folder + name)]
30 input_vectors_other = []
31 for name in players_other:
32 input_vectors_other += [i(Data.pat_files_folder + name)]
34 if len(input_vectors_ref) == 0:
35 print >>sys.stderr, "No reference vectors."
36 sys.exit()
37 if len(input_vectors_other) == 0:
38 print >>sys.stderr, "No vectors to process."
39 sys.exit()
41 ### PCA example usage
42 # Change this to False, if you do not want to use PCA
43 use_pca = False
44 if use_pca:
45 # Create PCA object, trained on input_vectors
46 print >>sys.stderr, "Running PCA."
47 pca = PCA(input_vectors_ref + input_vectors_other, reduce=True)
48 # Perform a PCA on input vectors
49 input_vectors_ref = pca.process_list_of_vectors(input_vectors_ref)
50 input_vectors_other = pca.process_list_of_vectors(input_vectors_other)
51 # Creates a Composed object that first generates an input vector
52 # and then performs a PCA analysis on it.
53 i = Compose(i, pca)
55 ### Object creating output vector when called;
56 ref_dict = {}
57 for name, input_vector in zip(players_ref, input_vectors_ref):
58 ref_dict[tuple(input_vector)] = player_vector[name]
60 oknn = KNNOutputVectorGenerator(ref_dict, k=k)
62 # Create list of output vectors using weighted kNN algorithm approximating output_vector
63 output_vectors_other = [ oknn(input_vector) for input_vector in input_vectors_other ]
64 output_vectors_ref = [ oknn(input_vector) for input_vector in input_vectors_ref ]
66 def print_me(names, vecs, where):
67 if len(names) != len(vecs):
68 raise RuntimeError("Dimensions of vectors mismatch.")
70 print >>sys.stderr, "Saving output_vectors to file:", where
71 f = open(where, 'w')
72 for i in xrange(len(names)):
73 name_to_print = '_'.join(names[i].split())
74 print_vector([name_to_print] + list(vecs[i]), f)
75 f.close()
77 print_me(players_ref, [player_vector[name] for name in players_ref], filename_play_ref_orig)
78 print_me(players_ref, output_vectors_ref, filename_play_ref)
79 print_me(players_other, output_vectors_other, filename_play_other)
81 print >> sys.stderr, "\nNow plot that in Gnuplot by:"
82 #print >> sys.stderr, 'set xrange[0:%d] ; set yrange[0:%d]'%(size,size)
83 print >> sys.stderr, 'set xtics 1 ; set ytics 1'
84 print >> sys.stderr, 'set grid ; set size square'
85 print >> sys.stderr, 'plot "%s" using 2:3:1 with labels font "arial,11" point lt 10 pt 4 left, "%s" using 2:3:1 with labels font "arial,11" point lt 12 pt 4 left'%(filename_play_other, filename_play_ref)