3 This file contains several objects we use to process a pattern files for Go game.
4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
7 ===== EXAMPLE PAT FILE =====
8 4632 (border:3 s:5000003 s:6000049 s:700004a)
9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
17 from itertools
import izip
,count
20 """Class used to hold global const variables, such as the pat file format."""
21 pat_file_regexp
= '^\s*(\d+)\s*(.+)$'
23 def print_vector(vector
, where
=sys
.stdout
):
24 """Helper method for printing vector (list of floats)."""
29 def print_set_to_file( data
, filename
):
31 Helper method for printing datasets for neural network.
33 number_of_pairs len_of_input_vector len_of_output_vector
39 def print_set( data
, where
):
40 print >> where
, len(data
), len(data
[0][0]), len(data
[0][1])
42 print_vector(i
, where
)
43 print_vector(o
, where
)
44 fout
= open(filename
, 'w')
48 def dump_object_to_file(object, filename
):
49 """Helper function to save an object to file using cPickle module."""
51 cPickle
.dump(object, f
,-1)
54 def load_object_from_file(filename
):
55 """Helper function to recover an object from file using cPickle module."""
57 object = cPickle
.load(f
)
61 class VectorGenerator(object):
62 """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
64 raise NotImplementedError
66 class VectorToVector(VectorGenerator
):
67 """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
68 def __call__(self
, vector
):
69 raise NotImplementedError
73 ### Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
74 ### fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
76 from numpy
import array
78 class PCA(VectorToVector
):
80 Object performing a PCA analysis on either a given vector (see `__call__' function),
81 or on a list of vectors (see `process_list_of_vectors' function).
83 def __init__(self
, list_of_train_vectors
, *args
, **kwargs
):
84 self
.pca
= mdp
.nodes
.PCANode(*args
, **kwargs
)
85 self
.pca
.train(array(list_of_train_vectors
))
86 self
.input_dim
= self
.pca
.input_dim
87 def __call__(self
, vector
):
88 return list(self
.pca(array([vector
]))[0])
89 def process_list_of_vectors(self
, list_of_vectors
):
90 return [ list(vec
) for vec
in self
.pca(array(list_of_vectors
)) ]
92 except ImportError, e
:
93 print >>sys
.stderr
, "Warning: %s. PCA will not work."%(str(e
))
95 class PCA(VectorToVector
):
96 """Default dummy class for PCA, not very useless."""
97 def __init__(self
, *args
, **kwargs
):
99 def __call__(self
, list_of_vectors
):
100 return list_of_vectors
101 def process_list_of_vectors(self
, list_of_vectors
):
102 return list_of_vectors
105 class Compose(VectorGenerator
):
107 A class used as a composer of different objects, such as InputVectorGenerator and PCA.
108 Use this if you want to e.g. generate PCA processed vectors.
110 def __init__(self
, vector_generator
, vector_to_vector
):
111 if not isinstance(vector_generator
, VectorGenerator
):
113 if not isinstance(vector_to_vector
, VectorToVector
):
115 self
.vector_generator
= vector_generator
116 self
.vector_to_vector
= vector_to_vector
117 # if vector_generator.output_dim != vector_to_vector.input_dim:
118 # raise RuntimeError("Dimensions of Composed object mismatch.")
119 def __call__(self
, *args
, **kwargs
):
120 return self
.vector_to_vector(self
.vector_generator(*args
, **kwargs
))
122 class OccurenceVectorGenerator(VectorGenerator
):
124 A class used to generate input vectors based on a relative number of occurences of some input patterns.
125 The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
127 def generate_top_pattern_dict(self
):
128 rexp
=re
.compile(Const
.pat_file_regexp
)
129 self
.top_pattern_dict
= {}
131 input_file
= open(self
.filename
)
132 for line
in input_file
:
133 if i
>= self
.output_dim
:
135 self
.top_pattern_dict
[rexp
.match(line
).group(2)] = i
139 def __init__(self
, main_pat_file
, num_features
):
140 self
.output_dim
= num_features
141 self
.filename
= main_pat_file
142 self
.generate_top_pattern_dict()
144 def __call__(self
, pat_file
):
145 vector
= [0]*len(self
.top_pattern_dict
)
146 rexp
=re
.compile(Const
.pat_file_regexp
)
148 input_file
= open(pat_file
)
149 for line
in input_file
:
150 match
= rexp
.match(line
)
152 raise IOError("Wrong file format: " + pat_file
)
153 if match
.group(2) in self
.top_pattern_dict
:
154 index
=self
.top_pattern_dict
[match
.group(2)]
155 vector
[index
] += int(match
.group(1))
157 if i
>= len(self
.top_pattern_dict
):
160 if len(vector
) != self
.output_dim
:
164 class Rescale(VectorToVector
):
165 """Class that rescales vectors to a given interval!"""
166 def __init__(self
, a
=-1.0, b
=1.0):
170 self
.tot
= b
- a
#abs(a) + abs(b)
171 def __call__(self
, vector
):
174 to_zero
= 0 - min(vector
)
175 maximum
= max(vector
) + to_zero
176 return [ self
.tot
* (x
+ to_zero
) / maximum
+ self
.a
for x
in vector
]
178 class InputVectorGenerator(VectorGenerator
):
180 First we generate an occurence vector by OccurenceVectorGenerator.
181 Then, an input vector is generated as a relative number of occurences of the topmost patterns.
182 The occurences are mapped so that the most frequently used
183 one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
185 def __init__(self
, *args
, **kwargs
):
186 self
.gen
= Compose(OccurenceVectorGenerator(*args
, **kwargs
), Rescale(-1.0, 1.0))
187 def __call__(self
, *args
, **kwargs
):
188 return self
.gen(*args
, **kwargs
)
190 class Combinator(object):
191 def __init__(self
, num_lincombs
= 1, skip_subset_len
= [0], max_len
= 2):
192 self
.num_lincombs
= num_lincombs
193 self
.skip_subset_len
= skip_subset_len
194 self
.max_len
= max_len
195 def get_subsets(self
, set):
198 sub
=self
.get_subsets(set[1:])
199 return sub
+ filter( lambda x
: ( self
.max_len
==0 or len(x
)<=self
.max_len
), [ set[:1]+subset
for subset
in sub
] )
200 def linear_combination(self
, list_of_vectors
, coefs
):
201 if len(list_of_vectors
) != len(coefs
):
202 raise Exception("len(list_of_vectors) != len(coefs)")
203 if len(list_of_vectors
) == 0:
205 len_vec
=len(list_of_vectors
[0])
207 for p
in xrange(len(list_of_vectors
)):
208 for i
in xrange(len_vec
):
209 res_vec
[i
] += coefs
[p
] * list_of_vectors
[p
][i
]
211 def get_random_norm_coefs(self
, num
):
213 rnd_nums
= [ random
.random() for i
in xrange(num
-1) ] + [1]
216 for next
in rnd_nums
:
217 coefs
.append(next
-first
)
220 def combine(self
, data
):
222 for subset
in self
.get_subsets(range(len(data
))):
223 if len(subset
) in self
.skip_subset_len
:
225 input_vectors
= [ data
[index
][0] for index
in subset
]
226 output_vectors
= [ data
[index
][1] for index
in subset
]
227 for i
in xrange(self
.num_lincombs
):
228 coefs
= self
.get_random_norm_coefs(len(subset
))
229 combinations
+= [(self
.linear_combination(input_vectors
, coefs
), self
.linear_combination(output_vectors
, coefs
))]
232 class PlayerStrategyIdentificator(object):
233 """Object holding information about default strategies for players."""
234 def __init__(self
, strategy_players
):
235 self
.strategy_players
= strategy_players
237 self
.player_strategy
={}
238 self
.all_players
= []
239 self
.all_strategies
= []
240 for strategy
, players
in self
.strategy_players
.items():
241 self
.all_strategies
+= [strategy
]
242 for player
in players
:
243 self
.all_players
+= [player
]
244 self
.player_strategy
[player
] = strategy
246 def __call__(self
, player_name
):
247 return self
.player_strategy
[player_name
]
249 class StrategyOutputVectorGenerator(VectorGenerator
):
251 This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
252 It is initialized with a list of strategies `valid_strategies' it shall take into acount.
253 When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
254 that corresponds to the strategy like this.
256 def __init__(self
, strategy_players
, valid_strategies
=None):
257 self
.identificator
= PlayerStrategyIdentificator(strategy_players
)
258 if valid_strategies
== None:
259 valid_strategies
= self
.identificator
.all_strategies
261 self
.valid_strategy_index
={}
262 for s
in valid_strategies
:
263 self
.valid_strategy_index
[s
]= index
265 def __call__(self
, player_name
):
267 player_strat
= self
.identificator(player_name
)
268 player_strat_index
= self
.valid_strategy_index
[player_strat
]
269 return [ 1.0 if i
== player_strat_index
else -1.0 for i
in xrange(len(self
.valid_strategy_index
)) ]
273 class PlanarOutputVectorGenerator(VectorGenerator
):
274 """Class that explicitly returns predefined output vectors for given players."""
275 def __init__(self
, player_vector
):
276 self
.player_vector
= player_vector
277 self
.players
= player_vector
.keys()
278 def __call__(self
, player_name
):
280 return self
.player_vector
[player_name
]
284 if __name__
== '__main__':
285 print >>sys
.stderr
, "This is just a library file..."