3 This code creates input vectors and performs PCA on it. Each pca'd vector is then printed along with
4 the player name, suitable e.g. to plot using gnuplot.
9 player_name first_principal_component_of_player's_input_vector second_principal_component ...
10 second_player_name ...
14 from gostyle
import * #print_vector, OccurenceVectorGenerator, Rescale, PlayerStrategyIdentificator, PCA, InputVectorGenerator
15 from itertools
import izip
, count
16 from data_about_players
import Data
19 if __name__
== '__main__':
20 main_pat_filename
= Data
.main_pat_filename
22 pickle_filename
= 'input_gen.pickle'
23 filename_raw
= 'raw.data'
24 filename_pca
= 'pca.data'
25 filename_proj
= 'pca.dimdata'
26 players_ignore
= ['Go Seigen', 'Ishida Yoshio', 'Yamashita Keigo']# 'Cho Tae-hyeon', 'Shao Zhenzhong', 'Wu Songsheng', 'Honinbo Shusaku', 'Kuwahara Shusaku', 'Yasuda Shusaku', 'Go Seigen', 'Suzuki Goro', 'Jie Li' ] #, 'Cho Chikun', 'Takemiya Masaki']
27 players_all
= Data
.players_all
28 #players_all = Data.strength_all
29 players
= [ p
for p
in players_all
if p
not in players_ignore
]
30 #players = Data.player_vector.keys()
32 ### Objects creating input and output vectors when called
33 print >>sys
.stderr
, "Creating input vector generator from main pat file:", main_pat_filename
34 ivg
= InputVectorGenerator(main_pat_filename
, num_features
)
36 # Create pairs of (input vector, player name)
39 #input_vectors += [ivg( Data.pat_files_folder + name)]
40 input_vectors
+= [[float(occ
) for occ
in ivg(Data
.pat_files_folder
+ name
)]]
42 if len(input_vectors
) == 0:
43 print >>sys
.stderr
, "No input vectors.", main_pat_filename
46 # Create PCA object, trained on input_vectors
47 pca
= PCA(input_vectors
, output_dim
=10)
48 #pca = PCA(input_vectors, reduce=True)
50 # Dump the input generator to file
51 print "Saving the pca input generator object to file:", pickle_filename
52 dump_object_to_file(Compose(ivg
, pca
), pickle_filename
)
54 print >> sys
.stderr
, "Writing raw vectors to file: ", filename_raw
55 f
=open(filename_raw
, 'w')
56 for name
,vector
in izip(players
, input_vectors
):
57 print >> f
, '"'+name
+'"', vector
60 #print >> sys.stderr, "Eigenvalues (top 5): ",
61 #for val in pca.get_eigenvalues()[:5]:
62 # print >> sys.stderr, "%2.5f"%(val,),
65 # Perform a PCA on input vectors
66 input_vectors
= pca
.process_list_of_vectors(input_vectors
)
68 ### Now we rescale vectors, so that each component fits on -1.0 to 1.0
69 ### this makes a very nice plot!!
72 ### Normalize each component separately
73 # We need to transpose input_vectors - a list of per-player-vector-of-pca-component
74 # to get list of vectors of per-component-vector-of-player-data
75 def transpose(list_of_vectors
):
76 return zip(*list_of_vectors
)
77 input_vectors
= transpose([ r(vector
) for vector
in transpose(input_vectors
)])
80 print >> sys
.stderr
, "Writing output to file: ", filename_pca
81 # prints vectors along with player names
82 f
=open(filename_pca
, 'w')
83 for name
,vector
in izip(players
, input_vectors
):
84 # Substitute ' ' by '_' to allow for gnuplot plotting (recognizing columns correctly)
85 name_to_print
= '_'.join(name
.split())
86 for p
, i
in izip(vector
, count()):
87 print >> f
, name_to_print
, i
+1, p
90 print >> sys
.stderr
, "Writing projection info to file: ", filename_proj
91 f
= open(filename_proj
, 'w')
92 P
= pca
.get_projection_info()
93 for y
in xrange(1, P
.shape
[0]):
94 for x
in xrange(1, P
.shape
[1]):
95 print >> f
, y
, x
, P
[y
,x
], ivg
.ovg
.stringof(x
)
98 print >> sys
.stderr
, "\nNow print that by:"
99 print >> sys
.stderr
, 'gnuplot> set xrange[1:%d]'%(pca
.pca
.output_dim
+1)
100 print >> sys
.stderr
, 'plot "%s" using 2:3:1 with labels font "arial,10" left point pt 4 offset 1,0'%(filename_pca
,)
102 def pearson_coef(vec1
, vec2
):
104 assert len(vec1
) == len(vec2
)
107 return sqrt(sum([ x
*x
for x
in vec
]))
110 return sum(vec
) / float(len(vec
))
112 return [ x
- c
for x
in vec
]
113 vec1
, vec2
= center(vec1
), center(vec2
)
114 return sum([x
* y
for x
,y
in zip(vec1
,vec2
)])/(norm(vec1
) * norm(vec2
))
116 # [ first_component , second_component
117 # first_component = [ 1 player, 2 player, ...
118 pca_components
= transpose(input_vectors
)
119 styles
= [[],[],[],[]]
122 styles
[s
] += [float(Data
.questionare_total
[p
][s
])]
123 styles
+= [[ float(Data
.player_year
[p
]) for p
in players
]]
125 eiv
= pca
.get_eigenvalues()
130 print "Eigenvalue \ Style:",
132 print "%d. "%(s
+1,) + (' ' if s
!= 3 else ''),
134 for num
, c
in enumerate(pca_components
):
135 print "%1.7f %2d.: "%(eiv
[num
],num
+1,),
137 pc
= pearson_coef(c
,s
)
143 c1
, c2
= colh
, colstop
145 c1
, c2
= coll
, colstop
148 print pad
+ c1
+ "%1.3f"%(pc
,) + c2
,
156 for num
, c
in enumerate(styles
):
157 print "%2d.:"%(num
+1,) if num
<= 3 else ' Y: ',
158 for num2
, s
in enumerate(styles
):
162 pc
= pearson_coef(c
,s
)
168 c1
, c2
= colh
, colstop
170 c1
, c2
= coll
, colstop
173 print pad
+ c1
+ "%1.3f"%(pc
,) + c2
,