8 from data_about_players
import Data
12 from kohonen
import DistanceMetric
14 print >>sys
.stderr
, "Could not locate kohonen.py library. Visit http://code.google.com/p/python-kohonen/ to obtain it manually."
17 class ComponentEuclideanMetric(DistanceMetric
):
18 '''Implements the euclidean distance (L-2 norm).'''
19 def __init__(self
, k
=None):
21 def __call__(self
, x
, y
):
24 return numpy
.sqrt((d
* d
)[k
])
26 return numpy
.sqrt(numpy
.sum(d
* d
, axis
=-1))
28 if __name__
== '__main__':
29 main_pat_filename
= Data
.main_pat_filename
30 filename
= 'koh_all.data'
33 players_ignore
= [ 'Honinbo Shusaku', 'Kuwahara Shusaku', 'Yasuda Shusaku', 'Go Seigen', 'Cho Tae-hyeon','Rui Naiwei']
34 players_all
= Data
.players_all
35 players
= [ p
for p
in players_all
if p
not in players_ignore
]
36 ### Object creating input vector when called
37 print >>sys
.stderr
, "Creating input vector generator from main pat file:", main_pat_filename
38 i
= InputVectorGenerator(main_pat_filename
, num_features
)
40 # Create list of input vectors
43 input_vectors
+= [i(Data
.pat_files_folder
+ name
)]
45 if len(input_vectors
) == 0:
46 print >>sys
.stderr
, "No vectors."
50 # Change this to False, if you do not want to use PCA
53 # Create PCA object, trained on input_vectors
54 print >>sys
.stderr
, "Running PCA."
55 pca
= PCA(input_vectors
, reduce=True)
56 # Perform a PCA on input vectors
57 input_vectors
= pca
.process_list_of_vectors(input_vectors
)
58 dim
= len(input_vectors
[0])
60 m
= kohonen
.Map(kohonen
.Parameters(dimension
=dim
,
62 learning_rate
=kohonen
.ExponentialTimeseries(-5e-4, 0.5, 0.2),
68 for i
in xrange(num_linc
):
69 if i
% (num_linc
/100) == 0:
70 print >>sys
.stderr
, "Generating training set: %d%%\r"%((100*i
)/num_linc
),
72 num
= random
.randint(2, 20)
73 coefs
= get_random_norm_coefs(num
)
74 vecs
= [ random
.choice(input_vectors
) for _
in xrange(num
) ]
75 lc
.append(linear_combination(vecs
, coefs
))
79 input_vectors
= [numpy
.array(vec
) for vec
in input_vectors
]
80 input_vectors_lc
= [numpy
.array(vec
) for vec
in lc
]
81 total
= input_vectors
+ input_vectors_lc
84 print >>sys
.stderr
, "Training Kohonen net."
86 for i
in xrange(num_iter
):
87 if i
% (num_iter
/10) == 0:
88 err
= sum( [ m
.distances(random
.choice(total
)).min() for _
in xrange(10) ] ) / 10.0
89 print >>sys
.stderr
, "%2d%% (%4d): error = %5f alpha = %5f"%(100 * i
/num_iter
,i
, err
, m
._learning
_rate
.last
)
91 print >>sys
.stderr
, "Current error is good enough."
93 if i
> 0 and err
> 1.5 and False:
94 print >>sys
.stderr
, "This error sucks, reset."
97 m
.learn( random
.choice(total
) )
99 #im = m.neuron_heatmap()
103 m._metric = ComponentEuclideanMetric(0)
104 im2 = m.distance_heatmap(input_vectors[0])
109 winner_neurons
= [ m
.winner(input_vector
) for input_vector
in input_vectors
]
110 #print winner_neurons[0]/16, winner_neurons[0]%16
111 ### Get rid of overlapping labels in the plot by merging names of players represented by the same neuron
114 for neuron
in winner_neurons
:
115 vecx
.append(neuron
/size
)
116 vecy
.append(neuron
%size
)
117 trip
= zip( vecx
, vecy
, players
)
121 for next
in trip
[1:]:
122 if next
[0] == last
[0] and next
[1] == last
[1]:
123 last
[2] += "\\n" + next
[2]
128 f
= open(filename
, 'w')
129 print >>sys
.stderr
, "Saving output_vectors to file:", filename
131 for x
,y
,name
in uniq
:
132 name_to_print
= '_'.join(name
.split())
133 print >>f
, name_to_print
, x
, y
137 print >> sys
.stderr
, "\nNow plot that in Gnuplot by:"
138 print >> sys
.stderr
, 'set xrange[0:%d] ; set yrange[0:%d]'%(size
,size
)
139 print >> sys
.stderr
, 'set xtics 1 ; set ytics 1'
140 print >> sys
.stderr
, 'set grid ; set size square'
141 print >> sys
.stderr
, 'plot "%s" using 2:3:1 with labels font "arial,10" point lt 10 pt 5 left'%(filename
,)