3 This file contains several objects we use to process a pattern files for Go game.
4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
7 ===== EXAMPLE PAT FILE =====
8 4632 (border:3 s:5000003 s:6000049 s:700004a)
9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
20 from itertools
import izip
,count
23 """Class used to hold global const variables, such as the pat file format."""
24 pat_file_regexp
= '^\s*(\d+)\s*(.+)$'
26 def print_vector(vector
, where
=sys
.stdout
):
27 """Helper method for printing vector (list of floats)."""
32 def print_set_to_file( data
, filename
):
34 Helper method for printing datasets for neural network.
36 number_of_pairs len_of_input_vector len_of_output_vector
42 def print_set( data
, where
):
43 print >> where
, len(data
), len(data
[0][0]), len(data
[0][1])
45 print_vector(i
, where
)
46 print_vector(o
, where
)
47 fout
= open(filename
, 'w')
51 def dump_object_to_file(object, filename
):
52 """Helper function to save an object to file using cPickle module."""
54 cPickle
.dump(object, f
,-1)
57 def load_object_from_file(filename
):
58 """Helper function to recover an object from file using cPickle module."""
60 object = cPickle
.load(f
)
64 class VectorGenerator(object):
65 """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
67 raise NotImplementedError
69 class VectorToVector(VectorGenerator
):
70 """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
71 def __call__(self
, vector
):
72 raise NotImplementedError
76 ### Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
77 ### fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
79 from numpy
import array
81 class PCA(VectorToVector
):
83 Object performing a PCA analysis on either a given vector (see `__call__' function),
84 or on a list of vectors (see `process_list_of_vectors' function).
86 def __init__(self
, list_of_train_vectors
, *args
, **kwargs
):
87 self
.pca
= mdp
.nodes
.PCANode(*args
, **kwargs
)
88 self
.pca
.train(array(list_of_train_vectors
))
89 self
.pca
.stop_training()
90 self
.input_dim
= self
.pca
.input_dim
91 def __call__(self
, vector
):
92 return list(self
.pca(array([vector
]))[0])
93 def process_list_of_vectors(self
, list_of_vectors
):
94 return [ list(vec
) for vec
in self
.pca(array(list_of_vectors
)) ]
95 def get_projection_info(self
):
96 return self
.pca
.get_recmatrix()
97 def get_eigenvalues(self
):
99 def get_eigenvectors(self
):
100 return list(self
.pca
.v
)
102 except ImportError, e
:
103 print >>sys
.stderr
, "Warning: %s. PCA will not work."%(str(e
))
105 class PCA(VectorToVector
):
106 """Default dummy class for PCA, not very useless."""
107 def __init__(self
, *args
, **kwargs
):
109 def __call__(self
, list_of_vectors
):
110 return list_of_vectors
111 def process_list_of_vectors(self
, list_of_vectors
):
112 return list_of_vectors
115 class Compose(VectorGenerator
):
117 A class used as a composer of different objects, such as InputVectorGenerator and PCA.
118 Use this if you want to e.g. generate PCA processed vectors.
120 def __init__(self
, vector_generator
, vector_to_vector
):
121 if not isinstance(vector_generator
, VectorGenerator
):
123 if not isinstance(vector_to_vector
, VectorToVector
):
125 self
.vector_generator
= vector_generator
126 self
.vector_to_vector
= vector_to_vector
127 # if vector_generator.output_dim != vector_to_vector.input_dim:
128 # raise RuntimeError("Dimensions of Composed object mismatch.")
129 def __call__(self
, *args
, **kwargs
):
130 return self
.vector_to_vector(self
.vector_generator(*args
, **kwargs
))
132 class OccurenceVectorGenerator(VectorGenerator
):
134 A class used to generate input vectors based on a relative number of occurences of some input patterns.
135 The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
137 def generate_top_pattern_dict(self
):
138 rexp
=re
.compile(Const
.pat_file_regexp
)
139 self
.top_pattern_dict
= {}
140 self
.top_pattern_str
= {}
142 input_file
= open(self
.filename
)
143 for line
in input_file
:
144 if i
>= self
.output_dim
:
146 s
= rexp
.match(line
).group(2)
147 self
.top_pattern_dict
[s
] = i
148 self
.top_pattern_str
[i
] = s
152 def __init__(self
, main_pat_file
, num_features
):
153 self
.output_dim
= num_features
154 self
.filename
= main_pat_file
155 self
.generate_top_pattern_dict()
157 def __call__(self
, pat_file
):
158 vector
= [0]*len(self
.top_pattern_dict
)
159 rexp
=re
.compile(Const
.pat_file_regexp
)
161 input_file
= open(pat_file
)
162 for line
in input_file
:
163 match
= rexp
.match(line
)
165 raise IOError("Wrong file format: " + pat_file
)
166 if match
.group(2) in self
.top_pattern_dict
:
167 index
=self
.top_pattern_dict
[match
.group(2)]
168 vector
[index
] += int(match
.group(1))
170 if i
>= len(self
.top_pattern_dict
):
173 if len(vector
) != self
.output_dim
:
177 def stringof(self
, i
):
178 return self
.top_pattern_str
[i
]
180 class Rescale(VectorToVector
):
181 """Class that rescales vectors to a given interval!"""
182 def __init__(self
, a
=-1.0, b
=1.0):
184 raise RuntimeError("a must be < b")
186 self
.avg
= (a
+ b
) * 0.5
192 def __call__(self
, vector
):
195 to_zero
= 0 - self
.norm(min(vector
))
196 maximum
= self
.norm(max(vector
)) + to_zero
198 return [ self
.avg
for _
in vector
]
199 return [ self
.tot
* self
.renorm(float(self
.norm(x
) + to_zero
) / maximum
) + self
.a
for x
in vector
]
202 class LogRescale(Rescale
):
206 return 1 / (1 + math
.exp(-6*(x
-0.5)));
208 class InputVectorGenerator(VectorGenerator
):
210 First we generate an occurence vector by OccurenceVectorGenerator.
211 Then, an input vector is generated as a relative number of occurences of the topmost patterns.
212 The occurences are mapped so that the most frequently used
213 one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
215 def __init__(self
, *args
, **kwargs
):
217 rescale
= kwargs
['rescale']
218 del kwargs
['rescale']
221 self
.ovg
= OccurenceVectorGenerator(*args
, **kwargs
)
222 self
.gen
= Compose(self
.ovg
, rescale(-1.0, 1.0))
223 def __call__(self
, *args
, **kwargs
):
224 return self
.gen(*args
, **kwargs
)
228 def linear_combination(list_of_vectors
, coefs
):
229 if len(list_of_vectors
) != len(coefs
):
230 raise Exception("len(list_of_vectors) != len(coefs)")
231 if len(list_of_vectors
) == 0:
233 len_vec
=len(list_of_vectors
[0])
235 for p
in xrange(len(list_of_vectors
)):
236 for i
in xrange(len_vec
):
237 res_vec
[i
] += coefs
[p
] * list_of_vectors
[p
][i
]
240 def get_random_norm_coefs( num
):
242 rnd_nums
= [ random
.random() for i
in xrange(num
-1) ] + [1]
245 for next
in rnd_nums
:
246 coefs
.append(next
-first
)
250 class Combinator(object):
251 def __init__(self
, num_lincombs
= 1, skip_subset_len
= [0], max_len
= 2):
252 self
.num_lincombs
= num_lincombs
253 self
.skip_subset_len
= skip_subset_len
254 self
.max_len
= max_len
255 def get_subsets(self
, set):
258 sub
=self
.get_subsets(set[1:])
259 return sub
+ filter( lambda x
: ( self
.max_len
==0 or len(x
)<=self
.max_len
), [ set[:1]+subset
for subset
in sub
] )
260 def combine(self
, data
):
262 for subset
in self
.get_subsets(range(len(data
))):
263 if len(subset
) in self
.skip_subset_len
:
265 input_vectors
= [ data
[index
][0] for index
in subset
]
266 output_vectors
= [ data
[index
][1] for index
in subset
]
267 for i
in xrange(self
.num_lincombs
):
268 coefs
= get_random_norm_coefs(len(subset
))
269 combinations
+= [(linear_combination(input_vectors
, coefs
), linear_combination(output_vectors
, coefs
))]
272 class PlayerStrategyIdentificator(object):
273 """Object holding information about default strategies for players."""
274 def __init__(self
, strategy_players
):
275 self
.strategy_players
= strategy_players
277 self
.player_strategy
={}
278 self
.all_players
= []
279 self
.all_strategies
= []
280 for strategy
, players
in self
.strategy_players
.items():
281 self
.all_strategies
+= [strategy
]
282 for player
in players
:
283 self
.all_players
+= [player
]
284 self
.player_strategy
[player
] = strategy
286 def __call__(self
, player_name
):
287 return self
.player_strategy
[player_name
]
289 class StrategyOutputVectorGenerator(VectorGenerator
):
291 This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
292 It is initialized with a list of strategies `valid_strategies' it shall take into acount.
293 When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
294 that corresponds to the strategy like this.
296 def __init__(self
, strategy_players
, valid_strategies
=None):
297 self
.identificator
= PlayerStrategyIdentificator(strategy_players
)
298 if valid_strategies
== None:
299 valid_strategies
= self
.identificator
.all_strategies
301 self
.valid_strategy_index
={}
302 for s
in valid_strategies
:
303 self
.valid_strategy_index
[s
]= index
305 def __call__(self
, player_name
):
307 player_strat
= self
.identificator(player_name
)
308 player_strat_index
= self
.valid_strategy_index
[player_strat
]
309 return [ 1.0 if i
== player_strat_index
else -1.0 for i
in xrange(len(self
.valid_strategy_index
)) ]
313 class PlanarOutputVectorGenerator(VectorGenerator
):
314 """Class that explicitly returns predefined output vectors for given players."""
315 def __init__(self
, player_vector
):
316 self
.player_vector
= player_vector
317 self
.players
= player_vector
.keys()
318 def __call__(self
, player_name
):
320 return self
.player_vector
[player_name
]
324 class KNNOutputVectorGenerator(VectorToVector
):
325 """ k-NearestNeighbour output vector generator."""
326 def __init__(self
, ref_dict
, k
=5, weight_param
=0.8, dist_mult
=10):
328 ref_dict is a dictionary of refence input/output vectors.
329 e.g. ref_dict= { (1.0,2.0):(9.0,16.0,21.0)
331 self
.ref_dict
= ref_dict
333 self
.weigth_param
= weight_param
334 self
.dist_mult
= dist_mult
335 def __call__(self
, player_vector
):
337 for ref_vec
in self
.ref_dict
.keys():
338 distance
.append((self
.distance(ref_vec
, player_vector
), ref_vec
))
341 #for p,v in distance:
342 # print "%2.3f"%(float(p),),
344 ref_output_vecs
= [ self
.ref_dict
[b
] for a
,b
in distance
[:self
.k
] ]
345 coefs
= [ self
.weight_fc(a
) for a
,b
in distance
[:self
.k
] ]
347 return linear_combination(ref_output_vecs
, coefs
)
348 def weight_fc(self
, distance
):
349 return self
.weigth_param
** (distance
)
350 def distance(self
, vec1
, vec2
):
351 if len(vec1
) != len(vec2
):
352 raise RuntimeError("Dimensions of vectors mismatch.")
353 ### the 10* multiplicative constant is empirically determined for correct scaling
354 return self
.dist_mult
* sqrt(sum([ (float(a
) - float(b
))**2 for a
,b
in zip(vec1
,vec2
)]))
356 class NeuralNet(VectorToVector
):
357 """A class encapsulating the Neural Network as a classifier."""
358 def __init__( self
, train_set_filename
, layers
=3, neurons
=10, desired_error
=0.0001, max_epochs
=1000, activation
='sigmoid' ):
359 self
.null
= open('/dev/null','w')
360 self
.netfile
= '/tmp/gonet.net'
362 args
= ["./gnet/gnet_train", "-s", "-l", str(layers
), "-n", str(neurons
) ]
363 args
+= ["-e", str(desired_error
), "-p", str(max_epochs
) + "-a", activation
, "-o", self
.netfile
, "./"+ train_set_filename
]
364 ret
= subprocess
.call(args
,stdout
=self
.null
)
366 raise RuntimeError("Could not train the neural network.")
368 args
= [ "./gnet/gnet_run", "-s", self
.netfile
]
369 self
.p
= subprocess
.Popen(args
, stdin
=subprocess
.PIPE
, stdout
=subprocess
.PIPE
)
370 if self
.p
.poll() > 0:
371 raise RuntimeError("Could not run the neural network.")
373 def __call__(self
, vector
):
374 self
.p
.stdin
.write(' '.join([str(a
) for a
in vector
]) + '\n')
375 a
= self
.p
.stdout
.readline()
376 return [ float(num
) for num
in a
.split()]
380 self
.p
.stdout
.close()
382 os
.remove(self
.netfile
)
385 if __name__
== '__main__':
386 print >>sys
.stderr
, "This is just a library file..."