PCA players_ignore: Ignore all other players with tiny sample sizes
[gostyle.git] / gostyle.py
blobc21f2d594aa4393cdbc50676b83aaaccb08746b6
1 #!/usr/bin/python
2 """
3 This file contains several objects we use to process a pattern files for Go game.
4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
7 ===== EXAMPLE PAT FILE =====
8 4632 (border:3 s:5000003 s:6000049 s:700004a)
9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
10 ...
11 ===== END =====
12 """
13 import re
14 import sys
15 import cPickle
16 import random
17 from itertools import izip,count
19 class Const:
20 """Class used to hold global const variables, such as the pat file format."""
21 pat_file_regexp = '^\s*(\d+)\s*(.+)$'
23 def print_vector(vector, where=sys.stdout):
24 """Helper method for printing vector (list of floats)."""
25 for x in vector:
26 print >> where, x,
27 print >> where
29 def print_set_to_file( data, filename):
30 """
31 Helper method for printing datasets for neural network.
32 FORMAT of the file
33 number_of_pairs len_of_input_vector len_of_output_vector
34 first_input_vector
35 first_output_vector
36 second_input_vector
37 ...
38 """
39 def print_set( data, where):
40 print >> where, len(data), len(data[0][0]), len(data[0][1])
41 for i,o in data:
42 print_vector(i, where)
43 print_vector(o, where)
44 fout = open(filename, 'w')
45 print_set(data, fout)
46 fout.close()
48 def dump_object_to_file(object, filename):
49 """Helper function to save an object to file using cPickle module."""
50 f=open(filename,'w')
51 cPickle.dump(object, f,-1)
52 f.close()
54 def load_object_from_file(filename):
55 """Helper function to recover an object from file using cPickle module."""
56 f=open(filename,'r')
57 object = cPickle.load(f)
58 f.close()
59 return object
61 class VectorGenerator(object):
62 """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
63 def __call__(self):
64 raise NotImplementedError
66 class VectorToVector(VectorGenerator):
67 """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
68 def __call__(self, vector):
69 raise NotImplementedError
71 ###
72 ### PCA Analysis
73 ### Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
74 ### fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
75 try:
76 from numpy import array
77 import mdp
78 class PCA(VectorToVector):
79 """
80 Object performing a PCA analysis on either a given vector (see `__call__' function),
81 or on a list of vectors (see `process_list_of_vectors' function).
82 """
83 def __init__(self, list_of_train_vectors, *args, **kwargs):
84 self.pca = mdp.nodes.PCANode(*args, **kwargs)
85 self.pca.train(array(list_of_train_vectors))
86 self.input_dim = self.pca.input_dim
87 def __call__(self, vector):
88 return list(self.pca(array([vector]))[0])
89 def process_list_of_vectors(self, list_of_vectors):
90 return [ list(vec) for vec in self.pca(array(list_of_vectors)) ]
92 except ImportError, e:
93 print >>sys.stderr, "Warning: %s. PCA will not work."%(str(e))
95 class PCA(VectorToVector):
96 """Default dummy class for PCA, not very useless."""
97 def __init__(self, *args, **kwargs):
98 pass
99 def __call__(self, list_of_vectors):
100 return list_of_vectors
101 def process_list_of_vectors(self, list_of_vectors):
102 return list_of_vectors
105 class Compose(VectorGenerator):
107 A class used as a composer of different objects, such as InputVectorGenerator and PCA.
108 Use this if you want to e.g. generate PCA processed vectors.
110 def __init__(self, vector_generator, vector_to_vector):
111 if not isinstance(vector_generator, VectorGenerator):
112 raise TypeError
113 if not isinstance(vector_to_vector, VectorToVector):
114 raise TypeError
115 self.vector_generator = vector_generator
116 self.vector_to_vector = vector_to_vector
117 # if vector_generator.output_dim != vector_to_vector.input_dim:
118 # raise RuntimeError("Dimensions of Composed object mismatch.")
119 def __call__(self, *args, **kwargs):
120 return self.vector_to_vector(self.vector_generator(*args, **kwargs))
122 class OccurenceVectorGenerator(VectorGenerator):
124 A class used to generate input vectors based on a relative number of occurences of some input patterns.
125 The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
127 def generate_top_pattern_dict(self):
128 rexp=re.compile(Const.pat_file_regexp)
129 self.top_pattern_dict = {}
130 i = 0
131 input_file = open(self.filename)
132 for line in input_file:
133 if i >= self.output_dim:
134 break
135 self.top_pattern_dict[rexp.match(line).group(2)] = i
136 i += 1
137 input_file.close()
139 def __init__(self, main_pat_file, num_features):
140 self.output_dim = num_features
141 self.filename = main_pat_file
142 self.generate_top_pattern_dict()
144 def __call__(self, pat_file):
145 vector = [0]*len(self.top_pattern_dict)
146 rexp=re.compile(Const.pat_file_regexp)
147 i = 0
148 input_file = open(pat_file)
149 for line in input_file:
150 match = rexp.match(line)
151 if not match:
152 raise IOError("Wrong file format: " + pat_file)
153 if match.group(2) in self.top_pattern_dict:
154 index=self.top_pattern_dict[match.group(2)]
155 vector[index] += int(match.group(1))
156 i += 1
157 if i >= len(self.top_pattern_dict):
158 break
159 input_file.close()
160 if len(vector) != self.output_dim:
161 raise RuntimeError
162 return vector
164 class Rescale(VectorToVector):
165 """Class that rescales vectors to a given interval!"""
166 def __init__(self, a=-1.0, b=1.0):
167 if a > b:
168 raise RuntimeError
169 self.a = a
170 self.tot = b - a #abs(a) + abs(b)
171 def __call__(self, vector):
172 if len(vector) == 0:
173 raise RuntimeError
174 to_zero = 0 - min(vector)
175 maximum = max(vector) + to_zero
176 return [ self.tot * (x + to_zero) / maximum + self.a for x in vector ]
178 class InputVectorGenerator(VectorGenerator):
180 First we generate an occurence vector by OccurenceVectorGenerator.
181 Then, an input vector is generated as a relative number of occurences of the topmost patterns.
182 The occurences are mapped so that the most frequently used
183 one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
185 def __init__(self, *args, **kwargs):
186 self.gen = Compose(OccurenceVectorGenerator(*args, **kwargs), Rescale(-1.0, 1.0))
187 def __call__(self, *args, **kwargs):
188 return self.gen(*args, **kwargs)
190 def linear_combination(list_of_vectors, coefs):
191 if len(list_of_vectors) != len(coefs):
192 raise Exception("len(list_of_vectors) != len(coefs)")
193 if len(list_of_vectors) == 0:
194 return
195 len_vec=len(list_of_vectors[0])
196 res_vec=[0]*len_vec
197 for p in xrange(len(list_of_vectors)):
198 for i in xrange(len_vec):
199 res_vec[i] += coefs[p] * list_of_vectors[p][i]
200 return res_vec
202 def get_random_norm_coefs( num ):
203 coefs=[]
204 rnd_nums= [ random.random() for i in xrange(num-1) ] + [1]
205 rnd_nums.sort()
206 first=0
207 for next in rnd_nums:
208 coefs.append(next-first)
209 first=next
210 return coefs
212 class Combinator(object):
213 def __init__(self, num_lincombs = 1, skip_subset_len = [0], max_len = 2):
214 self.num_lincombs = num_lincombs
215 self.skip_subset_len = skip_subset_len
216 self.max_len = max_len
217 def get_subsets(self, set):
218 if len(set) == 0:
219 return [[]]
220 sub=self.get_subsets(set[1:])
221 return sub + filter( lambda x : ( self.max_len==0 or len(x)<=self.max_len ), [ set[:1]+subset for subset in sub] )
222 def combine(self, data):
223 combinations = []
224 for subset in self.get_subsets(range(len(data))):
225 if len(subset) in self.skip_subset_len:
226 continue
227 input_vectors = [ data[index][0] for index in subset ]
228 output_vectors = [ data[index][1] for index in subset ]
229 for i in xrange(self.num_lincombs):
230 coefs = get_random_norm_coefs(len(subset))
231 combinations += [(linear_combination(input_vectors, coefs), linear_combination(output_vectors, coefs))]
232 return combinations
234 class PlayerStrategyIdentificator(object):
235 """Object holding information about default strategies for players."""
236 def __init__(self, strategy_players):
237 self.strategy_players = strategy_players
239 self.player_strategy={}
240 self.all_players = []
241 self.all_strategies = []
242 for strategy, players in self.strategy_players.items():
243 self.all_strategies += [strategy]
244 for player in players:
245 self.all_players += [player]
246 self.player_strategy[player] = strategy
248 def __call__(self, player_name):
249 return self.player_strategy[player_name]
251 class StrategyOutputVectorGenerator(VectorGenerator):
253 This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
254 It is initialized with a list of strategies `valid_strategies' it shall take into acount.
255 When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
256 that corresponds to the strategy like this.
258 def __init__(self, strategy_players, valid_strategies=None):
259 self.identificator = PlayerStrategyIdentificator(strategy_players)
260 if valid_strategies == None:
261 valid_strategies = self.identificator.all_strategies
262 index=0
263 self.valid_strategy_index={}
264 for s in valid_strategies:
265 self.valid_strategy_index[s]= index
266 index += 1
267 def __call__(self, player_name):
268 try:
269 player_strat = self.identificator(player_name)
270 player_strat_index = self.valid_strategy_index[player_strat]
271 return [ 1.0 if i == player_strat_index else -1.0 for i in xrange(len(self.valid_strategy_index)) ]
272 except KeyError:
273 return None
275 class PlanarOutputVectorGenerator(VectorGenerator):
276 """Class that explicitly returns predefined output vectors for given players."""
277 def __init__(self, player_vector):
278 self.player_vector = player_vector
279 self.players = player_vector.keys()
280 def __call__(self, player_name):
281 try:
282 return self.player_vector[player_name]
283 except KeyError:
284 return None
286 if __name__ == '__main__':
287 print >>sys.stderr, "This is just a library file..."