Merge branch 'master' of ssh://repo.or.cz/srv/git/gostyle
[gostyle.git] / gostyle.py
blob8d11e78c8940c6bb65683ca06313dbd225c2dbc5
1 #!/usr/bin/python
2 """
3 This file contains several objects we use to process a pattern files for Go game.
4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
7 ===== EXAMPLE PAT FILE =====
8 4632 (border:3 s:5000003 s:6000049 s:700004a)
9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
10 ...
11 ===== END =====
12 """
13 import re
14 import sys
15 import cPickle
16 import random
17 from itertools import izip,count
19 class Const:
20 """Class used to hold global const variables, such as the pat file format."""
21 pat_file_regexp = '^\s*(\d+)\s*(.+)$'
23 def print_vector(vector, where=sys.stdout):
24 """Helper method for printing vector (list of floats)."""
25 for x in vector:
26 print >> where, x,
27 print >> where
29 def print_set_to_file( data, filename):
30 """
31 Helper method for printing datasets for neural network.
32 FORMAT of the file
33 number_of_pairs len_of_input_vector len_of_output_vector
34 first_input_vector
35 first_output_vector
36 second_input_vector
37 ...
38 """
39 def print_set( data, where):
40 print >> where, len(data), len(data[0][0]), len(data[0][1])
41 for i,o in data:
42 print_vector(i, where)
43 print_vector(o, where)
44 fout = open(filename, 'w')
45 print_set(data, fout)
46 fout.close()
48 def dump_object_to_file(object, filename):
49 """Helper function to save an object to file using cPickle module."""
50 f=open(filename,'w')
51 cPickle.dump(object, f,-1)
52 f.close()
54 def load_object_from_file(filename):
55 """Helper function to recover an object from file using cPickle module."""
56 f=open(filename,'r')
57 object = cPickle.load(f)
58 f.close()
59 return object
61 class VectorGenerator(object):
62 """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
63 def __call__(self):
64 raise NotImplementedError
66 class VectorToVector(VectorGenerator):
67 """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
68 def __call__(self, vector):
69 raise NotImplementedError
71 ###
72 ### PCA Analysis
73 ### Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
74 ### fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
75 try:
76 from numpy import array
77 import mdp
78 class PCA(VectorToVector):
79 """
80 Object performing a PCA analysis on either a given vector (see `__call__' function),
81 or on a list of vectors (see `process_list_of_vectors' function).
82 """
83 def __init__(self, list_of_train_vectors, *args, **kwargs):
84 self.pca = mdp.nodes.PCANode(*args, **kwargs)
85 self.pca.train(array(list_of_train_vectors))
86 self.input_dim = self.pca.input_dim
87 def __call__(self, vector):
88 return list(self.pca(array([vector]))[0])
89 def process_list_of_vectors(self, list_of_vectors):
90 return [ list(vec) for vec in self.pca(array(list_of_vectors)) ]
91 def get_projection_info(self):
92 return self.pca.get_recmatrix()
94 except ImportError, e:
95 print >>sys.stderr, "Warning: %s. PCA will not work."%(str(e))
97 class PCA(VectorToVector):
98 """Default dummy class for PCA, not very useless."""
99 def __init__(self, *args, **kwargs):
100 pass
101 def __call__(self, list_of_vectors):
102 return list_of_vectors
103 def process_list_of_vectors(self, list_of_vectors):
104 return list_of_vectors
107 class Compose(VectorGenerator):
109 A class used as a composer of different objects, such as InputVectorGenerator and PCA.
110 Use this if you want to e.g. generate PCA processed vectors.
112 def __init__(self, vector_generator, vector_to_vector):
113 if not isinstance(vector_generator, VectorGenerator):
114 raise TypeError
115 if not isinstance(vector_to_vector, VectorToVector):
116 raise TypeError
117 self.vector_generator = vector_generator
118 self.vector_to_vector = vector_to_vector
119 # if vector_generator.output_dim != vector_to_vector.input_dim:
120 # raise RuntimeError("Dimensions of Composed object mismatch.")
121 def __call__(self, *args, **kwargs):
122 return self.vector_to_vector(self.vector_generator(*args, **kwargs))
124 class OccurenceVectorGenerator(VectorGenerator):
126 A class used to generate input vectors based on a relative number of occurences of some input patterns.
127 The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
129 def generate_top_pattern_dict(self):
130 rexp=re.compile(Const.pat_file_regexp)
131 self.top_pattern_dict = {}
132 self.top_pattern_str = {}
133 i = 0
134 input_file = open(self.filename)
135 for line in input_file:
136 if i >= self.output_dim:
137 break
138 s = rexp.match(line).group(2)
139 self.top_pattern_dict[s] = i
140 self.top_pattern_str[i] = s
141 i += 1
142 input_file.close()
144 def __init__(self, main_pat_file, num_features):
145 self.output_dim = num_features
146 self.filename = main_pat_file
147 self.generate_top_pattern_dict()
149 def __call__(self, pat_file):
150 vector = [0]*len(self.top_pattern_dict)
151 rexp=re.compile(Const.pat_file_regexp)
152 i = 0
153 input_file = open(pat_file)
154 for line in input_file:
155 match = rexp.match(line)
156 if not match:
157 raise IOError("Wrong file format: " + pat_file)
158 if match.group(2) in self.top_pattern_dict:
159 index=self.top_pattern_dict[match.group(2)]
160 vector[index] += int(match.group(1))
161 i += 1
162 if i >= len(self.top_pattern_dict):
163 break
164 input_file.close()
165 if len(vector) != self.output_dim:
166 raise RuntimeError
167 return vector
169 def stringof(self, i):
170 return self.top_pattern_str[i]
172 class Rescale(VectorToVector):
173 """Class that rescales vectors to a given interval!"""
174 def __init__(self, a=-1.0, b=1.0):
175 if a > b:
176 raise RuntimeError
177 self.a = a
178 self.tot = b - a #abs(a) + abs(b)
179 def __call__(self, vector):
180 if len(vector) == 0:
181 raise RuntimeError
182 to_zero = 0 - min(vector)
183 maximum = max(vector) + to_zero
184 if maximum == 0:
185 return [ 0.0 for _ in vector ]
186 return [ self.tot * (x + to_zero) / maximum + self.a for x in vector ]
188 class InputVectorGenerator(VectorGenerator):
190 First we generate an occurence vector by OccurenceVectorGenerator.
191 Then, an input vector is generated as a relative number of occurences of the topmost patterns.
192 The occurences are mapped so that the most frequently used
193 one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
195 def __init__(self, *args, **kwargs):
196 self.ovg = OccurenceVectorGenerator(*args, **kwargs)
197 self.gen = Compose(self.ovg, Rescale(-1.0, 1.0))
198 def __call__(self, *args, **kwargs):
199 return self.gen(*args, **kwargs)
200 def ovg(self):
201 return self.ovg
203 def linear_combination(list_of_vectors, coefs):
204 if len(list_of_vectors) != len(coefs):
205 raise Exception("len(list_of_vectors) != len(coefs)")
206 if len(list_of_vectors) == 0:
207 return
208 len_vec=len(list_of_vectors[0])
209 res_vec=[0]*len_vec
210 for p in xrange(len(list_of_vectors)):
211 for i in xrange(len_vec):
212 res_vec[i] += coefs[p] * list_of_vectors[p][i]
213 return res_vec
215 def get_random_norm_coefs( num ):
216 coefs=[]
217 rnd_nums= [ random.random() for i in xrange(num-1) ] + [1]
218 rnd_nums.sort()
219 first=0
220 for next in rnd_nums:
221 coefs.append(next-first)
222 first=next
223 return coefs
225 class Combinator(object):
226 def __init__(self, num_lincombs = 1, skip_subset_len = [0], max_len = 2):
227 self.num_lincombs = num_lincombs
228 self.skip_subset_len = skip_subset_len
229 self.max_len = max_len
230 def get_subsets(self, set):
231 if len(set) == 0:
232 return [[]]
233 sub=self.get_subsets(set[1:])
234 return sub + filter( lambda x : ( self.max_len==0 or len(x)<=self.max_len ), [ set[:1]+subset for subset in sub] )
235 def combine(self, data):
236 combinations = []
237 for subset in self.get_subsets(range(len(data))):
238 if len(subset) in self.skip_subset_len:
239 continue
240 input_vectors = [ data[index][0] for index in subset ]
241 output_vectors = [ data[index][1] for index in subset ]
242 for i in xrange(self.num_lincombs):
243 coefs = get_random_norm_coefs(len(subset))
244 combinations += [(linear_combination(input_vectors, coefs), linear_combination(output_vectors, coefs))]
245 return combinations
247 class PlayerStrategyIdentificator(object):
248 """Object holding information about default strategies for players."""
249 def __init__(self, strategy_players):
250 self.strategy_players = strategy_players
252 self.player_strategy={}
253 self.all_players = []
254 self.all_strategies = []
255 for strategy, players in self.strategy_players.items():
256 self.all_strategies += [strategy]
257 for player in players:
258 self.all_players += [player]
259 self.player_strategy[player] = strategy
261 def __call__(self, player_name):
262 return self.player_strategy[player_name]
264 class StrategyOutputVectorGenerator(VectorGenerator):
266 This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
267 It is initialized with a list of strategies `valid_strategies' it shall take into acount.
268 When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
269 that corresponds to the strategy like this.
271 def __init__(self, strategy_players, valid_strategies=None):
272 self.identificator = PlayerStrategyIdentificator(strategy_players)
273 if valid_strategies == None:
274 valid_strategies = self.identificator.all_strategies
275 index=0
276 self.valid_strategy_index={}
277 for s in valid_strategies:
278 self.valid_strategy_index[s]= index
279 index += 1
280 def __call__(self, player_name):
281 try:
282 player_strat = self.identificator(player_name)
283 player_strat_index = self.valid_strategy_index[player_strat]
284 return [ 1.0 if i == player_strat_index else -1.0 for i in xrange(len(self.valid_strategy_index)) ]
285 except KeyError:
286 return None
288 class PlanarOutputVectorGenerator(VectorGenerator):
289 """Class that explicitly returns predefined output vectors for given players."""
290 def __init__(self, player_vector):
291 self.player_vector = player_vector
292 self.players = player_vector.keys()
293 def __call__(self, player_name):
294 try:
295 return self.player_vector[player_name]
296 except KeyError:
297 return None
299 if __name__ == '__main__':
300 print >>sys.stderr, "This is just a library file..."