3 This file contains several objects we use to process a pattern files for Go game.
4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
7 ===== EXAMPLE PAT FILE =====
8 4632 (border:3 s:5000003 s:6000049 s:700004a)
9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
17 from itertools
import izip
,count
20 """Class used to hold global const variables, such as the pat file format."""
21 pat_file_regexp
= '^\s*(\d+)\s*(.+)$'
23 def print_vector(vector
, where
=sys
.stdout
):
24 """Helper method for printing vector (list of floats)."""
29 def print_set_to_file( data
, filename
):
31 Helper method for printing datasets for neural network.
33 number_of_pairs len_of_input_vector len_of_output_vector
39 def print_set( data
, where
):
40 print >> where
, len(data
), len(data
[0][0]), len(data
[0][1])
42 print_vector(i
, where
)
43 print_vector(o
, where
)
44 fout
= open(filename
, 'w')
48 def dump_object_to_file(object, filename
):
49 """Helper function to save an object to file using cPickle module."""
51 cPickle
.dump(object, f
,-1)
54 def load_object_from_file(filename
):
55 """Helper function to recover an object from file using cPickle module."""
57 object = cPickle
.load(f
)
61 class VectorGenerator(object):
62 """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
64 raise NotImplementedError
66 class VectorToVector(VectorGenerator
):
67 """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
68 def __call__(self
, vector
):
69 raise NotImplementedError
73 ### Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
74 ### fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
76 from numpy
import array
78 class PCA(VectorToVector
):
80 Object performing a PCA analysis on either a given vector (see `__call__' function),
81 or on a list of vectors (see `process_list_of_vectors' function).
83 def __init__(self
, list_of_train_vectors
, *args
, **kwargs
):
84 self
.pca
= mdp
.nodes
.PCANode(*args
, **kwargs
)
85 self
.pca
.train(array(list_of_train_vectors
))
86 self
.pca
.stop_training()
87 self
.input_dim
= self
.pca
.input_dim
88 def __call__(self
, vector
):
89 return list(self
.pca(array([vector
]))[0])
90 def process_list_of_vectors(self
, list_of_vectors
):
91 return [ list(vec
) for vec
in self
.pca(array(list_of_vectors
)) ]
92 def get_projection_info(self
):
93 return self
.pca
.get_recmatrix()
94 def get_eigenvalues(self
):
96 def get_eigenvectors(self
):
97 return list(self
.pca
.v
)
99 except ImportError, e
:
100 print >>sys
.stderr
, "Warning: %s. PCA will not work."%(str(e
))
102 class PCA(VectorToVector
):
103 """Default dummy class for PCA, not very useless."""
104 def __init__(self
, *args
, **kwargs
):
106 def __call__(self
, list_of_vectors
):
107 return list_of_vectors
108 def process_list_of_vectors(self
, list_of_vectors
):
109 return list_of_vectors
112 class Compose(VectorGenerator
):
114 A class used as a composer of different objects, such as InputVectorGenerator and PCA.
115 Use this if you want to e.g. generate PCA processed vectors.
117 def __init__(self
, vector_generator
, vector_to_vector
):
118 if not isinstance(vector_generator
, VectorGenerator
):
120 if not isinstance(vector_to_vector
, VectorToVector
):
122 self
.vector_generator
= vector_generator
123 self
.vector_to_vector
= vector_to_vector
124 # if vector_generator.output_dim != vector_to_vector.input_dim:
125 # raise RuntimeError("Dimensions of Composed object mismatch.")
126 def __call__(self
, *args
, **kwargs
):
127 return self
.vector_to_vector(self
.vector_generator(*args
, **kwargs
))
129 class OccurenceVectorGenerator(VectorGenerator
):
131 A class used to generate input vectors based on a relative number of occurences of some input patterns.
132 The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
134 def generate_top_pattern_dict(self
):
135 rexp
=re
.compile(Const
.pat_file_regexp
)
136 self
.top_pattern_dict
= {}
137 self
.top_pattern_str
= {}
139 input_file
= open(self
.filename
)
140 for line
in input_file
:
141 if i
>= self
.output_dim
:
143 s
= rexp
.match(line
).group(2)
144 self
.top_pattern_dict
[s
] = i
145 self
.top_pattern_str
[i
] = s
149 def __init__(self
, main_pat_file
, num_features
):
150 self
.output_dim
= num_features
151 self
.filename
= main_pat_file
152 self
.generate_top_pattern_dict()
154 def __call__(self
, pat_file
):
155 vector
= [0]*len(self
.top_pattern_dict
)
156 rexp
=re
.compile(Const
.pat_file_regexp
)
158 input_file
= open(pat_file
)
159 for line
in input_file
:
160 match
= rexp
.match(line
)
162 raise IOError("Wrong file format: " + pat_file
)
163 if match
.group(2) in self
.top_pattern_dict
:
164 index
=self
.top_pattern_dict
[match
.group(2)]
165 vector
[index
] += int(match
.group(1))
167 if i
>= len(self
.top_pattern_dict
):
170 if len(vector
) != self
.output_dim
:
174 def stringof(self
, i
):
175 return self
.top_pattern_str
[i
]
177 class Rescale(VectorToVector
):
178 """Class that rescales vectors to a given interval!"""
179 def __init__(self
, a
=-1.0, b
=1.0):
181 raise RuntimeError("a must be < b")
183 self
.avg
= (a
+ b
) * 0.5
189 def __call__(self
, vector
):
192 to_zero
= 0 - self
.norm(min(vector
))
193 maximum
= self
.norm(max(vector
)) + to_zero
195 return [ self
.avg
for _
in vector
]
196 return [ self
.tot
* self
.renorm(float(self
.norm(x
) + to_zero
) / maximum
) + self
.a
for x
in vector
]
199 class LogRescale(Rescale
):
203 return 1 / (1 + math
.exp(-6*(x
-0.5)));
205 class InputVectorGenerator(VectorGenerator
):
207 First we generate an occurence vector by OccurenceVectorGenerator.
208 Then, an input vector is generated as a relative number of occurences of the topmost patterns.
209 The occurences are mapped so that the most frequently used
210 one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
212 def __init__(self
, *args
, **kwargs
):
214 rescale
= kwargs
['rescale']
215 del kwargs
['rescale']
218 self
.ovg
= OccurenceVectorGenerator(*args
, **kwargs
)
219 self
.gen
= Compose(self
.ovg
, rescale(-1.0, 1.0))
220 def __call__(self
, *args
, **kwargs
):
221 return self
.gen(*args
, **kwargs
)
225 def linear_combination(list_of_vectors
, coefs
):
226 if len(list_of_vectors
) != len(coefs
):
227 raise Exception("len(list_of_vectors) != len(coefs)")
228 if len(list_of_vectors
) == 0:
230 len_vec
=len(list_of_vectors
[0])
232 for p
in xrange(len(list_of_vectors
)):
233 for i
in xrange(len_vec
):
234 res_vec
[i
] += coefs
[p
] * list_of_vectors
[p
][i
]
237 def get_random_norm_coefs( num
):
239 rnd_nums
= [ random
.random() for i
in xrange(num
-1) ] + [1]
242 for next
in rnd_nums
:
243 coefs
.append(next
-first
)
247 class Combinator(object):
248 def __init__(self
, num_lincombs
= 1, skip_subset_len
= [0], max_len
= 2):
249 self
.num_lincombs
= num_lincombs
250 self
.skip_subset_len
= skip_subset_len
251 self
.max_len
= max_len
252 def get_subsets(self
, set):
255 sub
=self
.get_subsets(set[1:])
256 return sub
+ filter( lambda x
: ( self
.max_len
==0 or len(x
)<=self
.max_len
), [ set[:1]+subset
for subset
in sub
] )
257 def combine(self
, data
):
259 for subset
in self
.get_subsets(range(len(data
))):
260 if len(subset
) in self
.skip_subset_len
:
262 input_vectors
= [ data
[index
][0] for index
in subset
]
263 output_vectors
= [ data
[index
][1] for index
in subset
]
264 for i
in xrange(self
.num_lincombs
):
265 coefs
= get_random_norm_coefs(len(subset
))
266 combinations
+= [(linear_combination(input_vectors
, coefs
), linear_combination(output_vectors
, coefs
))]
269 class PlayerStrategyIdentificator(object):
270 """Object holding information about default strategies for players."""
271 def __init__(self
, strategy_players
):
272 self
.strategy_players
= strategy_players
274 self
.player_strategy
={}
275 self
.all_players
= []
276 self
.all_strategies
= []
277 for strategy
, players
in self
.strategy_players
.items():
278 self
.all_strategies
+= [strategy
]
279 for player
in players
:
280 self
.all_players
+= [player
]
281 self
.player_strategy
[player
] = strategy
283 def __call__(self
, player_name
):
284 return self
.player_strategy
[player_name
]
286 class StrategyOutputVectorGenerator(VectorGenerator
):
288 This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
289 It is initialized with a list of strategies `valid_strategies' it shall take into acount.
290 When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
291 that corresponds to the strategy like this.
293 def __init__(self
, strategy_players
, valid_strategies
=None):
294 self
.identificator
= PlayerStrategyIdentificator(strategy_players
)
295 if valid_strategies
== None:
296 valid_strategies
= self
.identificator
.all_strategies
298 self
.valid_strategy_index
={}
299 for s
in valid_strategies
:
300 self
.valid_strategy_index
[s
]= index
302 def __call__(self
, player_name
):
304 player_strat
= self
.identificator(player_name
)
305 player_strat_index
= self
.valid_strategy_index
[player_strat
]
306 return [ 1.0 if i
== player_strat_index
else -1.0 for i
in xrange(len(self
.valid_strategy_index
)) ]
310 class PlanarOutputVectorGenerator(VectorGenerator
):
311 """Class that explicitly returns predefined output vectors for given players."""
312 def __init__(self
, player_vector
):
313 self
.player_vector
= player_vector
314 self
.players
= player_vector
.keys()
315 def __call__(self
, player_name
):
317 return self
.player_vector
[player_name
]
321 if __name__
== '__main__':
322 print >>sys
.stderr
, "This is just a library file..."