8 from data_about_players
import Data
12 from kohonen
import DistanceMetric
14 print >>sys
.stderr
, "Could not locate kohonen.py library. Visit http://code.google.com/p/python-kohonen/ to obtain it manually."
17 class ComponentEuclideanMetric(DistanceMetric
):
18 '''Implements the euclidean distance (L-2 norm).'''
19 def __init__(self
, k
=None):
21 def __call__(self
, x
, y
):
25 return numpy
.sqrt((d
* d
)[self
.k
])
27 return numpy
.sqrt(numpy
.sum(d
* d
, axis
=-1))
29 if __name__
== '__main__':
30 main_pat_filename
= Data
.main_pat_filename
31 filename
= 'koh_all.data2'
34 players_ignore
= [ 'Honinbo Shusaku', 'Kuwahara Shusaku', 'Yasuda Shusaku', 'Go Seigen', 'Cho Tae-hyeon','Rui Naiwei']
35 players_all
= Data
.players_all
36 players
= [ p
for p
in players_all
if p
not in players_ignore
]
37 ### Object creating input vector when called
38 print >>sys
.stderr
, "Creating input vector generator from main pat file:", main_pat_filename
39 i
= InputVectorGenerator(main_pat_filename
, num_features
)
41 # Create list of input vectors
44 input_vectors
+= [i(Data
.pat_files_folder
+ name
)]
46 if len(input_vectors
) == 0:
47 print >>sys
.stderr
, "No vectors."
51 # Change this to False, if you do not want to use PCA
54 # Create PCA object, trained on input_vectors
55 print >>sys
.stderr
, "Running PCA."
56 pca
= PCA(input_vectors
, reduce=True)
57 # Perform a PCA on input vectors
58 input_vectors
= pca
.process_list_of_vectors(input_vectors
)
59 dim
= len(input_vectors
[0])
64 for i
in xrange(num_linc
):
65 if i
% (num_linc
/100) == 0:
66 print >>sys
.stderr
, "Generating training set: %d%%\r"%((100*i
)/num_linc
),
68 num
= random
.randint(2, 20)
69 coefs
= get_random_norm_coefs(num
)
70 vecs
= [ random
.choice(input_vectors
) for _
in xrange(num
) ]
71 lc
.append(linear_combination(vecs
, coefs
))
75 input_vectors
= [numpy
.array(vec
) for vec
in input_vectors
]
76 input_vectors_lc
= [numpy
.array(vec
) for vec
in lc
]
77 total
= input_vectors
+ input_vectors_lc
80 m
= kohonen
.Map(kohonen
.Parameters(dimension
=dim
,
82 learning_rate
=kohonen
.ExponentialTimeseries(-5e-4, 0.5, 0.2),
85 return random
.choice(total
)
89 print >>sys
.stderr
, "Training Kohonen net."
91 for i
in xrange(num_iter
):
92 if i
% (num_iter
/10) == 0:
93 err
= sum( [ m
.distances(random
.choice(total
)).min() for _
in xrange(10) ] ) / 10.0
94 print >>sys
.stderr
, "%2d%% (%4d): error = %5f alpha = %5f"%(100 * i
/num_iter
,i
, err
, m
._learning
_rate
.last
)
96 print >>sys
.stderr
, "Current error is good enough."
98 if i
> 0 and err
> 1.5 and False:
99 print >>sys
.stderr
, "This error sucks, reset."
102 m
.learn( random
.choice(total
) )
104 #im = m.neuron_heatmap()
109 m
._metric
= ComponentEuclideanMetric(i
)
110 im2
= m
.neuron_heatmap()
115 winner_neurons
= [ m
.winner(input_vector
) for input_vector
in input_vectors
]
116 #print winner_neurons[0]/16, winner_neurons[0]%16
117 ### Get rid of overlapping labels in the plot by merging names of players represented by the same neuron
120 for neuron
in winner_neurons
:
121 vecx
.append(neuron
/size
)
122 vecy
.append(neuron
%size
)
123 trip
= zip( vecx
, vecy
, players
)
127 for next
in trip
[1:]:
128 if next
[0] == last
[0] and next
[1] == last
[1]:
129 last
[2] += "\\n" + next
[2]
134 f
= open(filename
, 'w')
135 print >>sys
.stderr
, "Saving output_vectors to file:", filename
137 for x
,y
,name
in uniq
:
138 name_to_print
= '_'.join(name
.split())
139 print >>f
, name_to_print
, x
, y
143 print >> sys
.stderr
, "\nNow plot that in Gnuplot by:"
144 print >> sys
.stderr
, 'set xrange[0:%d] ; set yrange[0:%d]'%(size
,size
)
145 print >> sys
.stderr
, 'set xtics 1 ; set ytics 1'
146 print >> sys
.stderr
, 'set grid ; set size square'
147 print >> sys
.stderr
, 'plot "%s" using 2:3:1 with labels font "arial,10" point lt 10 pt 5 left'%(filename
,)