Input Vector: log-transform and rescale rel. frequency by logistic function
[gostyle.git] / gostyle.py
blob5a12c1c8dce3172b14b360108a666dd117c1c39e
1 #!/usr/bin/python
2 """
3 This file contains several objects we use to process a pattern files for Go game.
4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
7 ===== EXAMPLE PAT FILE =====
8 4632 (border:3 s:5000003 s:6000049 s:700004a)
9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
10 ...
11 ===== END =====
12 """
13 import re
14 import sys
15 import cPickle
16 import random
17 from itertools import izip,count
19 class Const:
20 """Class used to hold global const variables, such as the pat file format."""
21 pat_file_regexp = '^\s*(\d+)\s*(.+)$'
23 def print_vector(vector, where=sys.stdout):
24 """Helper method for printing vector (list of floats)."""
25 for x in vector:
26 print >> where, x,
27 print >> where
29 def print_set_to_file( data, filename):
30 """
31 Helper method for printing datasets for neural network.
32 FORMAT of the file
33 number_of_pairs len_of_input_vector len_of_output_vector
34 first_input_vector
35 first_output_vector
36 second_input_vector
37 ...
38 """
39 def print_set( data, where):
40 print >> where, len(data), len(data[0][0]), len(data[0][1])
41 for i,o in data:
42 print_vector(i, where)
43 print_vector(o, where)
44 fout = open(filename, 'w')
45 print_set(data, fout)
46 fout.close()
48 def dump_object_to_file(object, filename):
49 """Helper function to save an object to file using cPickle module."""
50 f=open(filename,'w')
51 cPickle.dump(object, f,-1)
52 f.close()
54 def load_object_from_file(filename):
55 """Helper function to recover an object from file using cPickle module."""
56 f=open(filename,'r')
57 object = cPickle.load(f)
58 f.close()
59 return object
61 class VectorGenerator(object):
62 """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
63 def __call__(self):
64 raise NotImplementedError
66 class VectorToVector(VectorGenerator):
67 """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
68 def __call__(self, vector):
69 raise NotImplementedError
71 ###
72 ### PCA Analysis
73 ### Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
74 ### fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
75 try:
76 from numpy import array
77 import mdp
78 class PCA(VectorToVector):
79 """
80 Object performing a PCA analysis on either a given vector (see `__call__' function),
81 or on a list of vectors (see `process_list_of_vectors' function).
82 """
83 def __init__(self, list_of_train_vectors, *args, **kwargs):
84 self.pca = mdp.nodes.PCANode(*args, **kwargs)
85 self.pca.train(array(list_of_train_vectors))
86 self.pca.stop_training()
87 self.input_dim = self.pca.input_dim
88 def __call__(self, vector):
89 return list(self.pca(array([vector]))[0])
90 def process_list_of_vectors(self, list_of_vectors):
91 return [ list(vec) for vec in self.pca(array(list_of_vectors)) ]
92 def get_projection_info(self):
93 return self.pca.get_recmatrix()
94 def get_eigenvalues(self):
95 return self.pca.d
96 def get_eigenvectors(self):
97 return list(self.pca.v)
99 except ImportError, e:
100 print >>sys.stderr, "Warning: %s. PCA will not work."%(str(e))
102 class PCA(VectorToVector):
103 """Default dummy class for PCA, not very useless."""
104 def __init__(self, *args, **kwargs):
105 pass
106 def __call__(self, list_of_vectors):
107 return list_of_vectors
108 def process_list_of_vectors(self, list_of_vectors):
109 return list_of_vectors
112 class Compose(VectorGenerator):
114 A class used as a composer of different objects, such as InputVectorGenerator and PCA.
115 Use this if you want to e.g. generate PCA processed vectors.
117 def __init__(self, vector_generator, vector_to_vector):
118 if not isinstance(vector_generator, VectorGenerator):
119 raise TypeError
120 if not isinstance(vector_to_vector, VectorToVector):
121 raise TypeError
122 self.vector_generator = vector_generator
123 self.vector_to_vector = vector_to_vector
124 # if vector_generator.output_dim != vector_to_vector.input_dim:
125 # raise RuntimeError("Dimensions of Composed object mismatch.")
126 def __call__(self, *args, **kwargs):
127 return self.vector_to_vector(self.vector_generator(*args, **kwargs))
129 class OccurenceVectorGenerator(VectorGenerator):
131 A class used to generate input vectors based on a relative number of occurences of some input patterns.
132 The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
134 def generate_top_pattern_dict(self):
135 rexp=re.compile(Const.pat_file_regexp)
136 self.top_pattern_dict = {}
137 self.top_pattern_str = {}
138 i = 0
139 input_file = open(self.filename)
140 for line in input_file:
141 if i >= self.output_dim:
142 break
143 s = rexp.match(line).group(2)
144 self.top_pattern_dict[s] = i
145 self.top_pattern_str[i] = s
146 i += 1
147 input_file.close()
149 def __init__(self, main_pat_file, num_features):
150 self.output_dim = num_features
151 self.filename = main_pat_file
152 self.generate_top_pattern_dict()
154 def __call__(self, pat_file):
155 vector = [0]*len(self.top_pattern_dict)
156 rexp=re.compile(Const.pat_file_regexp)
157 i = 0
158 input_file = open(pat_file)
159 for line in input_file:
160 match = rexp.match(line)
161 if not match:
162 raise IOError("Wrong file format: " + pat_file)
163 if match.group(2) in self.top_pattern_dict:
164 index=self.top_pattern_dict[match.group(2)]
165 vector[index] += int(match.group(1))
166 i += 1
167 if i >= len(self.top_pattern_dict):
168 break
169 input_file.close()
170 if len(vector) != self.output_dim:
171 raise RuntimeError
172 return vector
174 def stringof(self, i):
175 return self.top_pattern_str[i]
177 class Rescale(VectorToVector):
178 """Class that rescales vectors to a given interval!"""
179 def __init__(self, a=-1.0, b=1.0):
180 if a >= b:
181 raise RuntimeError("a must be < b")
182 self.a = a
183 self.avg = (a + b) * 0.5
184 self.tot = b - a
185 def norm(self, x):
186 return x
187 def renorm(self, x):
188 return x
189 def __call__(self, vector):
190 if len(vector) == 0:
191 raise RuntimeError
192 to_zero = 0 - self.norm(min(vector))
193 maximum = self.norm(max(vector)) + to_zero
194 if maximum == 0:
195 return [ self.avg for _ in vector ]
196 return [ self.tot * self.renorm(float(self.norm(x) + to_zero) / maximum) + self.a for x in vector ]
198 import math;
199 class LogRescale(Rescale):
200 def norm(self, x):
201 return math.log(x+1)
202 def renorm(self, x):
203 return 1 / (1 + math.exp(-6*(x-0.5)));
205 class InputVectorGenerator(VectorGenerator):
207 First we generate an occurence vector by OccurenceVectorGenerator.
208 Then, an input vector is generated as a relative number of occurences of the topmost patterns.
209 The occurences are mapped so that the most frequently used
210 one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
212 def __init__(self, *args, **kwargs):
213 self.ovg = OccurenceVectorGenerator(*args, **kwargs)
214 self.gen = Compose(self.ovg, LogRescale(-1.0, 1.0))
215 def __call__(self, *args, **kwargs):
216 return self.gen(*args, **kwargs)
217 def ovg(self):
218 return self.ovg
220 def linear_combination(list_of_vectors, coefs):
221 if len(list_of_vectors) != len(coefs):
222 raise Exception("len(list_of_vectors) != len(coefs)")
223 if len(list_of_vectors) == 0:
224 return
225 len_vec=len(list_of_vectors[0])
226 res_vec=[0]*len_vec
227 for p in xrange(len(list_of_vectors)):
228 for i in xrange(len_vec):
229 res_vec[i] += coefs[p] * list_of_vectors[p][i]
230 return res_vec
232 def get_random_norm_coefs( num ):
233 coefs=[]
234 rnd_nums= [ random.random() for i in xrange(num-1) ] + [1]
235 rnd_nums.sort()
236 first=0
237 for next in rnd_nums:
238 coefs.append(next-first)
239 first=next
240 return coefs
242 class Combinator(object):
243 def __init__(self, num_lincombs = 1, skip_subset_len = [0], max_len = 2):
244 self.num_lincombs = num_lincombs
245 self.skip_subset_len = skip_subset_len
246 self.max_len = max_len
247 def get_subsets(self, set):
248 if len(set) == 0:
249 return [[]]
250 sub=self.get_subsets(set[1:])
251 return sub + filter( lambda x : ( self.max_len==0 or len(x)<=self.max_len ), [ set[:1]+subset for subset in sub] )
252 def combine(self, data):
253 combinations = []
254 for subset in self.get_subsets(range(len(data))):
255 if len(subset) in self.skip_subset_len:
256 continue
257 input_vectors = [ data[index][0] for index in subset ]
258 output_vectors = [ data[index][1] for index in subset ]
259 for i in xrange(self.num_lincombs):
260 coefs = get_random_norm_coefs(len(subset))
261 combinations += [(linear_combination(input_vectors, coefs), linear_combination(output_vectors, coefs))]
262 return combinations
264 class PlayerStrategyIdentificator(object):
265 """Object holding information about default strategies for players."""
266 def __init__(self, strategy_players):
267 self.strategy_players = strategy_players
269 self.player_strategy={}
270 self.all_players = []
271 self.all_strategies = []
272 for strategy, players in self.strategy_players.items():
273 self.all_strategies += [strategy]
274 for player in players:
275 self.all_players += [player]
276 self.player_strategy[player] = strategy
278 def __call__(self, player_name):
279 return self.player_strategy[player_name]
281 class StrategyOutputVectorGenerator(VectorGenerator):
283 This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
284 It is initialized with a list of strategies `valid_strategies' it shall take into acount.
285 When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
286 that corresponds to the strategy like this.
288 def __init__(self, strategy_players, valid_strategies=None):
289 self.identificator = PlayerStrategyIdentificator(strategy_players)
290 if valid_strategies == None:
291 valid_strategies = self.identificator.all_strategies
292 index=0
293 self.valid_strategy_index={}
294 for s in valid_strategies:
295 self.valid_strategy_index[s]= index
296 index += 1
297 def __call__(self, player_name):
298 try:
299 player_strat = self.identificator(player_name)
300 player_strat_index = self.valid_strategy_index[player_strat]
301 return [ 1.0 if i == player_strat_index else -1.0 for i in xrange(len(self.valid_strategy_index)) ]
302 except KeyError:
303 return None
305 class PlanarOutputVectorGenerator(VectorGenerator):
306 """Class that explicitly returns predefined output vectors for given players."""
307 def __init__(self, player_vector):
308 self.player_vector = player_vector
309 self.players = player_vector.keys()
310 def __call__(self, player_name):
311 try:
312 return self.player_vector[player_name]
313 except KeyError:
314 return None
316 if __name__ == '__main__':
317 print >>sys.stderr, "This is just a library file..."