3 This code creates input vectors and performs PCA on it. Each pca'd vector is then printed along with
4 the player name, suitable e.g. to plot using gnuplot.
9 player_name first_principal_component_of_player's_input_vector second_principal_component ...
10 second_player_name ...
14 from gostyle
import print_vector
, OccurenceVectorGenerator
, Rescale
, PlayerStrategyIdentificator
, PCA
, InputVectorGenerator
15 from itertools
import izip
, count
16 from data_about_players
import Data
18 if __name__
== '__main__':
19 main_pat_filename
= Data
.main_pat_filename
21 filename_pca
= 'pca.data'
22 filename_proj
= 'pca.dimdata'
23 players_ignore
= [ 'Cho Tae-hyeon', 'Shao Zhenzhong', 'Wu Songsheng', 'Honinbo Shusaku', 'Kuwahara Shusaku', 'Yasuda Shusaku', 'Go Seigen', 'Suzuki Goro', 'Jie Li' ] #, 'Cho Chikun', 'Takemiya Masaki']
24 players_all
= Data
.players_all
25 players
= [ p
for p
in players_all
if p
not in players_ignore
]
26 #players = Data.player_vector.keys()
28 ### Objects creating input and output vectors when called
29 print >>sys
.stderr
, "Creating input vector generator from main pat file:", main_pat_filename
30 ivg
= InputVectorGenerator(main_pat_filename
, num_features
)
32 # Create pairs of (input vector, player name)
35 #input_vectors += [ivg( Data.pat_files_folder + name)]
36 input_vectors
+= [[float(occ
) for occ
in ivg(Data
.pat_files_folder
+ name
)]]
38 if len(input_vectors
) == 0:
39 print >>sys
.stderr
, "No input vectors.", main_pat_filename
42 # Create PCA object, trained on input_vectors
43 pca
= PCA(input_vectors
, output_dim
=10)
44 #pca = PCA(input_vectors, reduce=True)
46 # Perform a PCA on input vectors
47 input_vectors
= pca
.process_list_of_vectors(input_vectors
)
49 ### Now we rescale vectors, so that each component fits on -1.0 to 1.0
50 ### this makes a very nice plot!!
53 ### Normalize each component separately
54 # We need to transpose input_vectors - a list of per-player-vector-of-pca-component
55 # to get list of vectors of per-component-vector-of-player-data
56 def transpose(list_of_vectors
):
57 return zip(*list_of_vectors
)
58 input_vectors
= transpose([ r(vector
) for vector
in transpose(input_vectors
)])
61 print >> sys
.stderr
, "Writing output to file: ", filename_pca
62 # prints vectors along with player names
63 f
=open(filename_pca
, 'w')
64 for name
,vector
in izip(players
, input_vectors
):
65 # Substitute ' ' by '_' to allow for gnuplot plotting (recognizing columns correctly)
66 name_to_print
= '_'.join(name
.split())
67 for p
, i
in izip(vector
, count()):
68 print >> f
, name_to_print
, i
+1, p
71 print >> sys
.stderr
, "Writing projection info to file: ", filename_proj
72 f
= open(filename_proj
, 'w')
73 P
= pca
.get_projection_info()
74 for y
in xrange(1, P
.shape
[0]):
75 for x
in xrange(1, P
.shape
[1]):
76 print >> f
, y
, x
, P
[y
,x
], ivg
.ovg
.stringof(x
)
79 print >> sys
.stderr
, "\nNow print that by:"
80 print >> sys
.stderr
, 'gnuplot> set xrange[1:%d]'%(pca
.pca
.output_dim
+1)
81 print >> sys
.stderr
, 'plot "%s" using 2:3:1 with labels font "arial,10" left point pt 4 offset 1,0'%(filename_pca
,)