Separated data about players (strategies, ..).
[gostyle.git] / pca.py
blobde6c74836daa60d99393ef9cbb822394b71817d7
1 #!/usr/bin/python
2 """
3 This code creates input vectors and performs PCA on it. Each pca'd vector is then printed along with
4 the player name, suitable e.g. to plot using gnuplot.
5 OUTPUT FORMAT
6 player_name first_principal_component_of_player's_input_vector second_principal_component ...
7 second_player_name ...
8 ...
9 """
10 import sys
11 from gostyle import print_vector, InputVectorGenerator, PlayerStrategyIdentificator, PCA
12 from itertools import izip
13 from data_about_players import Data
15 if __name__ == '__main__':
16 main_pat_filename = Data.main_pat_filename
17 num_features = 500
18 players = PlayerStrategyIdentificator(Data.strategy_players).all_players
19 #players = Data.player_vector.keys()
21 ### Objects creating input and output vectors when called
22 print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename
23 i = InputVectorGenerator(main_pat_filename, num_features)
25 # Create pairs of (input vector, player name)
26 input_vectors = []
27 for name in players:
28 input_vectors += [i( Data.pat_files_folder + name)]
30 if len(input_vectors) == 0:
31 print >>sys.stderr, "No input vectors.", main_pat_filename
32 sys.exit()
34 # Create PCA object, trained on input_vectors
35 pca = PCA(input_vectors, reduce=True)
36 #pca = PCA(input_vectors, output_dim=2)
38 # Perform a PCA on input vectors
39 input_vectors = pca.process_list_of_vectors(input_vectors)
41 # prints vectors along with player names
42 for name,vector in izip(players, input_vectors):
43 # Substitute ' ' by '_' to allow for gnuplot plotting (recognizing columns correctly)
44 print '_'.join(name.split()),
45 print_vector(vector)
47 print >>sys.stderr, "Printed %d vectors of dimension %d"%(len(input_vectors), len(input_vectors[0]))