3 This file contains several objects we use to process a pattern files for Go game.
4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
7 ===== EXAMPLE PAT FILE =====
8 4632 (border:3 s:5000003 s:6000049 s:700004a)
9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
17 from itertools
import izip
,count
20 """Class used to hold global const variables, such as the pat file format."""
21 pat_file_regexp
= '^\s*(\d+)\s*(.+)$'
23 def print_vector(vector
, where
=sys
.stdout
):
24 """Helper method for printing vector (list of floats)."""
29 def print_set_to_file( data
, filename
):
31 Helper method for printing datasets for neural network.
33 number_of_pairs len_of_input_vector len_of_output_vector
39 def print_set( data
, where
):
40 print >> where
, len(data
), len(data
[0][0]), len(data
[0][1])
42 print_vector(i
, where
)
43 print_vector(o
, where
)
44 fout
= open(filename
, 'w')
48 def dump_object_to_file(object, filename
):
49 """Helper function to save an object to file using cPickle module."""
51 cPickle
.dump(object, f
,-1)
54 def load_object_from_file(filename
):
55 """Helper function to recover an object from file using cPickle module."""
57 object = cPickle
.load(f
)
61 class VectorGenerator(object):
62 """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
64 raise NotImplementedError
66 class VectorToVector(VectorGenerator
):
67 """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
68 def __call__(self
, vector
):
69 raise NotImplementedError
73 ### Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
74 ### fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
76 from numpy
import array
78 class PCA(VectorToVector
):
80 Object performing a PCA analysis on either a given vector (see `__call__' function),
81 or on a list of vectors (see `process_list_of_vectors' function).
83 def __init__(self
, list_of_train_vectors
, *args
, **kwargs
):
84 self
.pca
= mdp
.nodes
.PCANode(*args
, **kwargs
)
85 self
.pca
.train(array(list_of_train_vectors
))
86 self
.input_dim
= self
.pca
.input_dim
87 def __call__(self
, vector
):
88 return list(self
.pca(array([vector
]))[0])
89 def process_list_of_vectors(self
, list_of_vectors
):
90 return [ list(vec
) for vec
in self
.pca(array(list_of_vectors
)) ]
91 def get_projection_info(self
):
92 return self
.pca
.get_recmatrix()
94 except ImportError, e
:
95 print >>sys
.stderr
, "Warning: %s. PCA will not work."%(str(e
))
97 class PCA(VectorToVector
):
98 """Default dummy class for PCA, not very useless."""
99 def __init__(self
, *args
, **kwargs
):
101 def __call__(self
, list_of_vectors
):
102 return list_of_vectors
103 def process_list_of_vectors(self
, list_of_vectors
):
104 return list_of_vectors
107 class Compose(VectorGenerator
):
109 A class used as a composer of different objects, such as InputVectorGenerator and PCA.
110 Use this if you want to e.g. generate PCA processed vectors.
112 def __init__(self
, vector_generator
, vector_to_vector
):
113 if not isinstance(vector_generator
, VectorGenerator
):
115 if not isinstance(vector_to_vector
, VectorToVector
):
117 self
.vector_generator
= vector_generator
118 self
.vector_to_vector
= vector_to_vector
119 # if vector_generator.output_dim != vector_to_vector.input_dim:
120 # raise RuntimeError("Dimensions of Composed object mismatch.")
121 def __call__(self
, *args
, **kwargs
):
122 return self
.vector_to_vector(self
.vector_generator(*args
, **kwargs
))
124 class OccurenceVectorGenerator(VectorGenerator
):
126 A class used to generate input vectors based on a relative number of occurences of some input patterns.
127 The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
129 def generate_top_pattern_dict(self
):
130 rexp
=re
.compile(Const
.pat_file_regexp
)
131 self
.top_pattern_dict
= {}
132 self
.top_pattern_str
= {}
134 input_file
= open(self
.filename
)
135 for line
in input_file
:
136 if i
>= self
.output_dim
:
138 s
= rexp
.match(line
).group(2)
139 self
.top_pattern_dict
[s
] = i
140 self
.top_pattern_str
[i
] = s
144 def __init__(self
, main_pat_file
, num_features
):
145 self
.output_dim
= num_features
146 self
.filename
= main_pat_file
147 self
.generate_top_pattern_dict()
149 def __call__(self
, pat_file
):
150 vector
= [0]*len(self
.top_pattern_dict
)
151 rexp
=re
.compile(Const
.pat_file_regexp
)
153 input_file
= open(pat_file
)
154 for line
in input_file
:
155 match
= rexp
.match(line
)
157 raise IOError("Wrong file format: " + pat_file
)
158 if match
.group(2) in self
.top_pattern_dict
:
159 index
=self
.top_pattern_dict
[match
.group(2)]
160 vector
[index
] += int(match
.group(1))
162 if i
>= len(self
.top_pattern_dict
):
165 if len(vector
) != self
.output_dim
:
169 def stringof(self
, i
):
170 return self
.top_pattern_str
[i
]
172 class Rescale(VectorToVector
):
173 """Class that rescales vectors to a given interval!"""
174 def __init__(self
, a
=-1.0, b
=1.0):
178 self
.tot
= b
- a
#abs(a) + abs(b)
179 def __call__(self
, vector
):
182 to_zero
= 0 - min(vector
)
183 maximum
= max(vector
) + to_zero
184 return [ self
.tot
* (x
+ to_zero
) / maximum
+ self
.a
for x
in vector
]
186 class InputVectorGenerator(VectorGenerator
):
188 First we generate an occurence vector by OccurenceVectorGenerator.
189 Then, an input vector is generated as a relative number of occurences of the topmost patterns.
190 The occurences are mapped so that the most frequently used
191 one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
193 def __init__(self
, *args
, **kwargs
):
194 self
.ovg
= OccurenceVectorGenerator(*args
, **kwargs
)
195 self
.gen
= Compose(self
.ovg
, Rescale(-1.0, 1.0))
196 def __call__(self
, *args
, **kwargs
):
197 return self
.gen(*args
, **kwargs
)
201 def linear_combination(list_of_vectors
, coefs
):
202 if len(list_of_vectors
) != len(coefs
):
203 raise Exception("len(list_of_vectors) != len(coefs)")
204 if len(list_of_vectors
) == 0:
206 len_vec
=len(list_of_vectors
[0])
208 for p
in xrange(len(list_of_vectors
)):
209 for i
in xrange(len_vec
):
210 res_vec
[i
] += coefs
[p
] * list_of_vectors
[p
][i
]
213 def get_random_norm_coefs( num
):
215 rnd_nums
= [ random
.random() for i
in xrange(num
-1) ] + [1]
218 for next
in rnd_nums
:
219 coefs
.append(next
-first
)
223 class Combinator(object):
224 def __init__(self
, num_lincombs
= 1, skip_subset_len
= [0], max_len
= 2):
225 self
.num_lincombs
= num_lincombs
226 self
.skip_subset_len
= skip_subset_len
227 self
.max_len
= max_len
228 def get_subsets(self
, set):
231 sub
=self
.get_subsets(set[1:])
232 return sub
+ filter( lambda x
: ( self
.max_len
==0 or len(x
)<=self
.max_len
), [ set[:1]+subset
for subset
in sub
] )
233 def combine(self
, data
):
235 for subset
in self
.get_subsets(range(len(data
))):
236 if len(subset
) in self
.skip_subset_len
:
238 input_vectors
= [ data
[index
][0] for index
in subset
]
239 output_vectors
= [ data
[index
][1] for index
in subset
]
240 for i
in xrange(self
.num_lincombs
):
241 coefs
= get_random_norm_coefs(len(subset
))
242 combinations
+= [(linear_combination(input_vectors
, coefs
), linear_combination(output_vectors
, coefs
))]
245 class PlayerStrategyIdentificator(object):
246 """Object holding information about default strategies for players."""
247 def __init__(self
, strategy_players
):
248 self
.strategy_players
= strategy_players
250 self
.player_strategy
={}
251 self
.all_players
= []
252 self
.all_strategies
= []
253 for strategy
, players
in self
.strategy_players
.items():
254 self
.all_strategies
+= [strategy
]
255 for player
in players
:
256 self
.all_players
+= [player
]
257 self
.player_strategy
[player
] = strategy
259 def __call__(self
, player_name
):
260 return self
.player_strategy
[player_name
]
262 class StrategyOutputVectorGenerator(VectorGenerator
):
264 This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
265 It is initialized with a list of strategies `valid_strategies' it shall take into acount.
266 When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
267 that corresponds to the strategy like this.
269 def __init__(self
, strategy_players
, valid_strategies
=None):
270 self
.identificator
= PlayerStrategyIdentificator(strategy_players
)
271 if valid_strategies
== None:
272 valid_strategies
= self
.identificator
.all_strategies
274 self
.valid_strategy_index
={}
275 for s
in valid_strategies
:
276 self
.valid_strategy_index
[s
]= index
278 def __call__(self
, player_name
):
280 player_strat
= self
.identificator(player_name
)
281 player_strat_index
= self
.valid_strategy_index
[player_strat
]
282 return [ 1.0 if i
== player_strat_index
else -1.0 for i
in xrange(len(self
.valid_strategy_index
)) ]
286 class PlanarOutputVectorGenerator(VectorGenerator
):
287 """Class that explicitly returns predefined output vectors for given players."""
288 def __init__(self
, player_vector
):
289 self
.player_vector
= player_vector
290 self
.players
= player_vector
.keys()
291 def __call__(self
, player_name
):
293 return self
.player_vector
[player_name
]
297 if __name__
== '__main__':
298 print >>sys
.stderr
, "This is just a library file..."