clanek++
[gostyle.git] / gostyle.py
blob04a39136dc3372504f6cf3b5d86b1cf99932e2dd
1 #!/usr/bin/python
2 """
3 This file contains several objects we use to process a pattern files for Go game.
4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
7 ===== EXAMPLE PAT FILE =====
8 4632 (border:3 s:5000003 s:6000049 s:700004a)
9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
10 ...
11 ===== END =====
12 """
13 import re
14 import sys
15 import cPickle
16 import random
17 import subprocess
18 import os
19 from math import sqrt
20 from itertools import izip,count
22 class Const:
23 """Class used to hold global const variables, such as the pat file format."""
24 pat_file_regexp = '^\s*(\d+)\s*(.+)$'
26 def print_vector(vector, where=sys.stdout):
27 """Helper method for printing vector (list of floats)."""
28 for x in vector:
29 print >> where, x,
30 print >> where
32 def print_set_to_file( data, filename):
33 """
34 Helper method for printing datasets for neural network.
35 FORMAT of the file
36 number_of_pairs len_of_input_vector len_of_output_vector
37 first_input_vector
38 first_output_vector
39 second_input_vector
40 ...
41 """
42 def print_set( data, where):
43 print >> where, len(data), len(data[0][0]), len(data[0][1])
44 for i,o in data:
45 print_vector(i, where)
46 print_vector(o, where)
47 fout = open(filename, 'w')
48 print_set(data, fout)
49 fout.close()
51 def dump_object_to_file(object, filename):
52 """Helper function to save an object to file using cPickle module."""
53 f=open(filename,'w')
54 cPickle.dump(object, f,-1)
55 f.close()
57 def load_object_from_file(filename):
58 """Helper function to recover an object from file using cPickle module."""
59 f=open(filename,'r')
60 object = cPickle.load(f)
61 f.close()
62 return object
64 class VectorGenerator(object):
65 """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
66 def __call__(self):
67 raise NotImplementedError
69 class VectorToVector(VectorGenerator):
70 """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
71 def __call__(self, vector):
72 raise NotImplementedError
74 ###
75 ### PCA Analysis
76 ### Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
77 ### fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
78 try:
79 from numpy import array
80 import mdp
81 class PCA(VectorToVector):
82 """
83 Object performing a PCA analysis on either a given vector (see `__call__' function),
84 or on a list of vectors (see `process_list_of_vectors' function).
85 """
86 def __init__(self, list_of_train_vectors, *args, **kwargs):
87 self.pca = mdp.nodes.PCANode(*args, **kwargs)
88 self.pca.train(array(list_of_train_vectors))
89 self.pca.stop_training()
90 self.input_dim = self.pca.input_dim
91 def __call__(self, vector):
92 return list(self.pca(array([vector]))[0])
93 def process_list_of_vectors(self, list_of_vectors):
94 return [ list(vec) for vec in self.pca(array(list_of_vectors)) ]
95 def get_projection_info(self):
96 return self.pca.get_recmatrix()
97 def get_eigenvalues(self):
98 return self.pca.d
99 def get_eigenvectors(self):
100 return list(self.pca.v)
102 except ImportError, e:
103 print >>sys.stderr, "Warning: %s. PCA will not work."%(str(e))
105 class PCA(VectorToVector):
106 """Default dummy class for PCA, not very useless."""
107 def __init__(self, *args, **kwargs):
108 pass
109 def __call__(self, list_of_vectors):
110 return list_of_vectors
111 def process_list_of_vectors(self, list_of_vectors):
112 return list_of_vectors
115 class Compose(VectorGenerator):
117 A class used as a composer of different objects, such as InputVectorGenerator and PCA.
118 Use this if you want to e.g. generate PCA processed vectors.
120 def __init__(self, vector_generator, vector_to_vector):
121 if not isinstance(vector_generator, VectorGenerator):
122 raise TypeError
123 if not isinstance(vector_to_vector, VectorToVector):
124 raise TypeError
125 self.vector_generator = vector_generator
126 self.vector_to_vector = vector_to_vector
127 # if vector_generator.output_dim != vector_to_vector.input_dim:
128 # raise RuntimeError("Dimensions of Composed object mismatch.")
129 def __call__(self, *args, **kwargs):
130 return self.vector_to_vector(self.vector_generator(*args, **kwargs))
132 class OccurenceVectorGenerator(VectorGenerator):
134 A class used to generate input vectors based on a relative number of occurences of some input patterns.
135 The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
137 def generate_top_pattern_dict(self):
138 rexp=re.compile(Const.pat_file_regexp)
139 self.top_pattern_dict = {}
140 self.top_pattern_str = {}
141 i = 0
142 input_file = open(self.filename)
143 for line in input_file:
144 if i >= self.output_dim:
145 break
146 s = rexp.match(line).group(2)
147 self.top_pattern_dict[s] = i
148 self.top_pattern_str[i] = s
149 i += 1
150 input_file.close()
152 def __init__(self, main_pat_file, num_features):
153 self.output_dim = num_features
154 self.filename = main_pat_file
155 self.generate_top_pattern_dict()
157 def __call__(self, pat_file):
158 vector = [0]*len(self.top_pattern_dict)
159 rexp=re.compile(Const.pat_file_regexp)
160 i = 0
161 input_file = open(pat_file)
162 for line in input_file:
163 match = rexp.match(line)
164 if not match:
165 raise IOError("Wrong file format: " + pat_file)
166 if match.group(2) in self.top_pattern_dict:
167 index=self.top_pattern_dict[match.group(2)]
168 vector[index] += int(match.group(1))
169 i += 1
170 if i >= len(self.top_pattern_dict):
171 break
172 input_file.close()
173 if len(vector) != self.output_dim:
174 raise RuntimeError
175 return vector
177 def stringof(self, i):
178 return self.top_pattern_str[i]
180 class Rescale(VectorToVector):
181 """Class that rescales vectors to a given interval!"""
182 def __init__(self, a=-1.0, b=1.0):
183 if a >= b:
184 raise RuntimeError("a must be < b")
185 self.a = a
186 self.avg = (a + b) * 0.5
187 self.tot = b - a
188 def norm(self, x):
189 return x
190 def renorm(self, x):
191 return x
192 def __call__(self, vector):
193 if len(vector) == 0:
194 raise RuntimeError
195 to_zero = 0 - self.norm(min(vector))
196 maximum = self.norm(max(vector)) + to_zero
197 if maximum == 0:
198 return [ self.avg for _ in vector ]
199 return [ self.tot * self.renorm(float(self.norm(x) + to_zero) / maximum) + self.a for x in vector ]
201 import math;
202 class LogRescale(Rescale):
203 def norm(self, x):
204 return math.log(x+1)
205 def renorm(self, x):
206 return 1 / (1 + math.exp(-6*(x-0.5)));
208 class InputVectorGenerator(VectorGenerator):
210 First we generate an occurence vector by OccurenceVectorGenerator.
211 Then, an input vector is generated as a relative number of occurences of the topmost patterns.
212 The occurences are mapped so that the most frequently used
213 one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
215 def __init__(self, *args, **kwargs):
216 try:
217 rescale = kwargs['rescale']
218 del kwargs['rescale']
219 except KeyError:
220 rescale = Rescale
221 self.ovg = OccurenceVectorGenerator(*args, **kwargs)
222 self.gen = Compose(self.ovg, rescale(-1.0, 1.0))
223 def __call__(self, *args, **kwargs):
224 return self.gen(*args, **kwargs)
225 def ovg(self):
226 return self.ovg
228 def linear_combination(list_of_vectors, coefs):
229 if len(list_of_vectors) != len(coefs):
230 raise Exception("len(list_of_vectors) != len(coefs)")
231 if len(list_of_vectors) == 0:
232 return
233 len_vec=len(list_of_vectors[0])
234 res_vec=[0]*len_vec
235 for p in xrange(len(list_of_vectors)):
236 for i in xrange(len_vec):
237 res_vec[i] += coefs[p] * list_of_vectors[p][i]
238 return res_vec
240 def get_random_norm_coefs( num ):
241 coefs=[]
242 rnd_nums= [ random.random() for i in xrange(num-1) ] + [1]
243 rnd_nums.sort()
244 first=0
245 for next in rnd_nums:
246 coefs.append(next-first)
247 first=next
248 return coefs
250 class Combinator(object):
251 def __init__(self, num_lincombs = 1, skip_subset_len = [0], max_len = 2):
252 self.num_lincombs = num_lincombs
253 self.skip_subset_len = skip_subset_len
254 self.max_len = max_len
255 def get_subsets(self, set):
256 if len(set) == 0:
257 return [[]]
258 sub=self.get_subsets(set[1:])
259 return sub + filter( lambda x : ( self.max_len==0 or len(x)<=self.max_len ), [ set[:1]+subset for subset in sub] )
260 def combine(self, data):
261 combinations = []
262 for subset in self.get_subsets(range(len(data))):
263 if len(subset) in self.skip_subset_len:
264 continue
265 input_vectors = [ data[index][0] for index in subset ]
266 output_vectors = [ data[index][1] for index in subset ]
267 for i in xrange(self.num_lincombs):
268 coefs = get_random_norm_coefs(len(subset))
269 combinations += [(linear_combination(input_vectors, coefs), linear_combination(output_vectors, coefs))]
270 return combinations
272 class PlayerStrategyIdentificator(object):
273 """Object holding information about default strategies for players."""
274 def __init__(self, strategy_players):
275 self.strategy_players = strategy_players
277 self.player_strategy={}
278 self.all_players = []
279 self.all_strategies = []
280 for strategy, players in self.strategy_players.items():
281 self.all_strategies += [strategy]
282 for player in players:
283 self.all_players += [player]
284 self.player_strategy[player] = strategy
286 def __call__(self, player_name):
287 return self.player_strategy[player_name]
289 class StrategyOutputVectorGenerator(VectorGenerator):
291 This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
292 It is initialized with a list of strategies `valid_strategies' it shall take into acount.
293 When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
294 that corresponds to the strategy like this.
296 def __init__(self, strategy_players, valid_strategies=None):
297 self.identificator = PlayerStrategyIdentificator(strategy_players)
298 if valid_strategies == None:
299 valid_strategies = self.identificator.all_strategies
300 index=0
301 self.valid_strategy_index={}
302 for s in valid_strategies:
303 self.valid_strategy_index[s]= index
304 index += 1
305 def __call__(self, player_name):
306 try:
307 player_strat = self.identificator(player_name)
308 player_strat_index = self.valid_strategy_index[player_strat]
309 return [ 1.0 if i == player_strat_index else -1.0 for i in xrange(len(self.valid_strategy_index)) ]
310 except KeyError:
311 return None
313 class PlanarOutputVectorGenerator(VectorGenerator):
314 """Class that explicitly returns predefined output vectors for given players."""
315 def __init__(self, player_vector):
316 self.player_vector = player_vector
317 self.players = player_vector.keys()
318 def __call__(self, player_name):
319 try:
320 return self.player_vector[player_name]
321 except KeyError:
322 return None
324 class KNNOutputVectorGenerator(VectorToVector):
325 """ k-NearestNeighbour output vector generator."""
326 def __init__(self, ref_dict, k=5, weight_param=0.8, dist_mult=10):
328 ref_dict is a dictionary of refence input/output vectors.
329 e.g. ref_dict= { (1.0,2.0):(9.0,16.0,21.0)
331 self.ref_dict = ref_dict
332 self.k = k
333 self.weigth_param = weight_param
334 self.dist_mult = dist_mult
335 def __call__(self, player_vector):
336 distance=[]
337 for ref_vec in self.ref_dict.keys():
338 distance.append((self.distance(ref_vec, player_vector), ref_vec))
339 distance.sort()
341 #for p,v in distance:
342 # print "%2.3f"%(float(p),),
343 #print
344 ref_output_vecs = [ self.ref_dict[b] for a,b in distance[:self.k] ]
345 coefs = [ self.weight_fc(a) for a,b in distance[:self.k] ]
347 return linear_combination(ref_output_vecs, coefs)
348 def weight_fc(self, distance):
349 return self.weigth_param ** (distance)
350 def distance(self, vec1, vec2):
351 if len(vec1) != len(vec2):
352 raise RuntimeError("Dimensions of vectors mismatch.")
353 ### the 10* multiplicative constant is empirically determined for correct scaling
354 return self.dist_mult * sqrt(sum([ (float(a) - float(b))**2 for a,b in zip(vec1,vec2)]))
356 class NeuralNet(VectorToVector):
357 """A class encapsulating the Neural Network as a classifier."""
358 def __init__( self, train_set_filename, layers=3, neurons=10, desired_error=0.0001, max_epochs=1000, activation='sigmoid' ):
359 self.null = open('/dev/null','w')
360 self.netfile = '/tmp/gonet.net'
362 args = ["./gnet/gnet_train", "-s", "-l", str(layers), "-n", str(neurons) ]
363 args += ["-e", str(desired_error), "-p", str(max_epochs) + "-a", activation, "-o", self.netfile, "./"+ train_set_filename]
364 ret = subprocess.call(args,stdout=self.null)
365 if ret:
366 raise RuntimeError("Could not train the neural network.")
368 args = [ "./gnet/gnet_run", "-s", self.netfile ]
369 self.p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
370 if self.p.poll() > 0:
371 raise RuntimeError("Could not run the neural network.")
373 def __call__(self, vector):
374 self.p.stdin.write(' '.join([str(a) for a in vector]) + '\n')
375 a = self.p.stdout.readline()
376 return [ float(num) for num in a.split()]
378 def close(self):
379 self.p.stdin.close()
380 self.p.stdout.close()
381 self.null.close()
382 os.remove(self.netfile)
385 if __name__ == '__main__':
386 print >>sys.stderr, "This is just a library file..."