gostyle.py

   1 #!/usr/bin/python
   2 """
   3 This file contains several objects we use to process a pattern files for Go game.
   4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
   5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
   6
   7 ===== EXAMPLE PAT FILE =====
   8 4632 (border:3 s:5000003 s:6000049 s:700004a)
   9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
  10 ...
  11 ===== END =====
  12 """
  13 import re
  14 import sys
  15 import cPickle
  16 import random
  17 from itertools import izip,count
  18
  19 class Const:
  20         """Class used to hold global const variables, such as the pat file format."""
  21         pat_file_regexp = '^\s*(\d+)\s*(.+)$'
  22
  23 def print_vector(vector, where=sys.stdout):
  24         """Helper method for printing vector (list of floats)."""
  25         for x in vector:
  26                 print >> where, x,
  27         print >> where
  28
  29 def print_set_to_file( data, filename):
  30         """
  31         Helper method for printing datasets for neural network.
  32         FORMAT of the file
  33         number_of_pairs len_of_input_vector len_of_output_vector
  34         first_input_vector
  35         first_output_vector
  36         second_input_vector
  37         ...
  38         """
  39         def print_set( data, where):
  40                 print >> where,  len(data), len(data[0][0]), len(data[0][1])
  41                 for i,o in data:
  42                         print_vector(i, where)
  43                         print_vector(o, where)
  44         fout = open(filename, 'w')
  45         print_set(data, fout)
  46         fout.close()
  47
  48 def dump_object_to_file(object, filename):
  49         """Helper function to save an object to file using cPickle module."""
  50         f=open(filename,'w')
  51         cPickle.dump(object, f,-1)
  52         f.close()
  53
  54 def load_object_from_file(filename):
  55         """Helper function to recover an object from file using cPickle module."""
  56         f=open(filename,'r')
  57         object = cPickle.load(f)
  58         f.close()
  59         return object
  60
  61 class VectorGenerator(object):
  62         """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
  63         def __call__(self):
  64                 raise NotImplementedError
  65
  66 class VectorToVector(VectorGenerator):
  67         """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
  68         def __call__(self, vector):
  69                 raise NotImplementedError
  70
  71 ###
  72 ###             PCA Analysis
  73 ###  Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
  74 ###  fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
  75 try:
  76         from numpy import array
  77         import mdp
  78         class PCA(VectorToVector):
  79                 """
  80                 Object performing a PCA analysis on either a given vector (see `__call__' function),
  81                 or on a list of vectors (see `process_list_of_vectors' function).
  82                 """
  83                 def __init__(self, list_of_train_vectors, *args, **kwargs):
  84                         self.pca = mdp.nodes.PCANode(*args, **kwargs)
  85                         self.pca.train(array(list_of_train_vectors))
  86                         self.pca.stop_training()
  87                         self.input_dim = self.pca.input_dim
  88                 def __call__(self, vector):
  89                         return list(self.pca(array([vector]))[0])
  90                 def process_list_of_vectors(self, list_of_vectors):
  91                         return [ list(vec) for vec in self.pca(array(list_of_vectors)) ]
  92                 def get_projection_info(self):
  93                         return self.pca.get_recmatrix()
  94                 def get_eigenvalues(self):
  95                         return self.pca.d
  96                 def get_eigenvectors(self):
  97                         return list(self.pca.v)
  98
  99 except ImportError, e:
 100         print >>sys.stderr, "Warning: %s. PCA will not work."%(str(e))
 101
 102         class PCA(VectorToVector):
 103                 """Default dummy class for PCA, not very useless."""
 104                 def __init__(self, *args, **kwargs):
 105                         pass
 106                 def __call__(self, list_of_vectors):
 107                         return list_of_vectors
 108                 def process_list_of_vectors(self, list_of_vectors):
 109                         return list_of_vectors
 110
 111
 112 class Compose(VectorGenerator):
 113         """
 114         A class used as a composer of different objects, such as InputVectorGenerator and PCA.
 115         Use this if you want to e.g. generate PCA processed vectors.
 116         """
 117         def __init__(self, vector_generator, vector_to_vector):
 118                 if not isinstance(vector_generator, VectorGenerator):
 119                         raise TypeError
 120                 if not isinstance(vector_to_vector, VectorToVector):
 121                         raise TypeError
 122                 self.vector_generator = vector_generator
 123                 self.vector_to_vector = vector_to_vector
 124         #       if vector_generator.output_dim != vector_to_vector.input_dim:
 125         #               raise RuntimeError("Dimensions of Composed object mismatch.")
 126         def __call__(self, *args, **kwargs):
 127                 return self.vector_to_vector(self.vector_generator(*args, **kwargs))
 128
 129 class OccurenceVectorGenerator(VectorGenerator):
 130         """
 131         A class used to generate input vectors based on a relative number of occurences of some input patterns.
 132         The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
 133         """
 134         def generate_top_pattern_dict(self):
 135                 rexp=re.compile(Const.pat_file_regexp)
 136                 self.top_pattern_dict = {}
 137                 self.top_pattern_str = {}
 138                 i = 0
 139                 input_file = open(self.filename)
 140                 for line in input_file:
 141                         if i >= self.output_dim:
 142                                 break
 143                         s = rexp.match(line).group(2)
 144                         self.top_pattern_dict[s] = i
 145                         self.top_pattern_str[i] = s
 146                         i += 1
 147                 input_file.close()
 148
 149         def __init__(self, main_pat_file, num_features):
 150                 self.output_dim = num_features
 151                 self.filename = main_pat_file
 152                 self.generate_top_pattern_dict()
 153
 154         def __call__(self, pat_file):
 155                 vector = [0]*len(self.top_pattern_dict)
 156                 rexp=re.compile(Const.pat_file_regexp)
 157                 i = 0
 158                 input_file = open(pat_file)
 159                 for line in input_file:
 160                         match = rexp.match(line)
 161                         if not match:
 162                                 raise IOError("Wrong file format: " + pat_file)
 163                         if match.group(2) in self.top_pattern_dict:
 164                                 index=self.top_pattern_dict[match.group(2)]
 165                                 vector[index] += int(match.group(1))
 166                                 i += 1
 167                         if i >= len(self.top_pattern_dict):
 168                                 break
 169                 input_file.close()
 170                 if len(vector) != self.output_dim:
 171                         raise RuntimeError
 172                 return vector
 173
 174         def stringof(self, i):
 175                 return self.top_pattern_str[i]
 176
 177 class Rescale(VectorToVector):
 178         """Class that rescales vectors to a given interval!"""
 179         def __init__(self, a=-1.0, b=1.0):
 180                 if a >= b:
 181                         raise RuntimeError("a must be < b")
 182                 self.a = a
 183                 self.avg = (a + b) * 0.5
 184                 self.tot = b - a
 185         def norm(self, x):
 186                 return x
 187         def renorm(self, x):
 188                 return x
 189         def __call__(self, vector):
 190                 if len(vector) == 0:
 191                         raise RuntimeError
 192                 to_zero = 0 - self.norm(min(vector))
 193                 maximum = self.norm(max(vector)) + to_zero
 194                 if maximum == 0:
 195                         return [ self.avg  for _ in vector ]
 196                 return [ self.tot * self.renorm(float(self.norm(x) + to_zero) / maximum) + self.a for x in vector ]
 197
 198 import math;
 199 class LogRescale(Rescale):
 200         def norm(self, x):
 201                 return math.log(x+1)
 202         def renorm(self, x):
 203                 return 1 / (1 + math.exp(-6*(x-0.5)));
 204
 205 class InputVectorGenerator(VectorGenerator):
 206         """
 207         First we generate an occurence vector by OccurenceVectorGenerator.
 208         Then, an input vector is generated as a relative number of occurences of the topmost patterns.
 209         The occurences are mapped so that the most frequently used
 210         one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
 211         """
 212         def __init__(self, *args, **kwargs):
 213                 try:
 214                         rescale = kwargs['rescale']
 215                         del kwargs['rescale']
 216                 except KeyError:
 217                         rescale = Rescale
 218                 self.ovg = OccurenceVectorGenerator(*args, **kwargs)
 219                 self.gen = Compose(self.ovg, rescale(-1.0, 1.0))
 220         def __call__(self, *args, **kwargs):
 221                 return self.gen(*args, **kwargs)
 222         def ovg(self):
 223                 return self.ovg
 224
 225 def linear_combination(list_of_vectors, coefs):
 226         if len(list_of_vectors) != len(coefs):
 227                 raise Exception("len(list_of_vectors) != len(coefs)")
 228         if len(list_of_vectors) == 0:
 229                 return
 230         len_vec=len(list_of_vectors[0])
 231         res_vec=[0]*len_vec
 232         for p in xrange(len(list_of_vectors)):
 233                 for i in xrange(len_vec):
 234                         res_vec[i] += coefs[p] * list_of_vectors[p][i]
 235         return res_vec
 236
 237 def get_random_norm_coefs( num ):
 238         coefs=[]
 239         rnd_nums= [ random.random() for i in xrange(num-1) ] + [1]
 240         rnd_nums.sort()
 241         first=0
 242         for next in rnd_nums:
 243                 coefs.append(next-first)
 244                 first=next
 245         return coefs
 246
 247 class Combinator(object):
 248         def __init__(self, num_lincombs = 1, skip_subset_len = [0], max_len = 2):
 249                 self.num_lincombs = num_lincombs
 250                 self.skip_subset_len = skip_subset_len
 251                 self.max_len = max_len
 252         def get_subsets(self, set):
 253                 if len(set) == 0:
 254                         return [[]]
 255                 sub=self.get_subsets(set[1:])
 256                 return sub + filter( lambda x : ( self.max_len==0 or len(x)<=self.max_len ), [ set[:1]+subset  for subset in sub] )
 257         def combine(self, data):
 258                 combinations = []
 259                 for subset in self.get_subsets(range(len(data))):
 260                         if len(subset) in self.skip_subset_len:
 261                                 continue
 262                         input_vectors = [ data[index][0] for index in subset ]
 263                         output_vectors = [ data[index][1] for index in subset ]
 264                         for i in xrange(self.num_lincombs):
 265                                 coefs = get_random_norm_coefs(len(subset))
 266                                 combinations += [(linear_combination(input_vectors, coefs), linear_combination(output_vectors, coefs))]
 267                 return combinations
 268
 269 class PlayerStrategyIdentificator(object):
 270         """Object holding information about default strategies for players."""
 271         def __init__(self, strategy_players):
 272                 self.strategy_players = strategy_players
 273
 274                 self.player_strategy={}
 275                 self.all_players = []
 276                 self.all_strategies = []
 277                 for strategy, players in self.strategy_players.items():
 278                         self.all_strategies += [strategy]
 279                         for player in players:
 280                                 self.all_players += [player]
 281                                 self.player_strategy[player] = strategy
 282
 283         def __call__(self, player_name):
 284                 return self.player_strategy[player_name]
 285
 286 class StrategyOutputVectorGenerator(VectorGenerator):
 287         """
 288         This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
 289         It is initialized with a list of strategies `valid_strategies' it shall take into acount.
 290         When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
 291         that corresponds to the strategy like this.
 292         """
 293         def __init__(self, strategy_players, valid_strategies=None):
 294                 self.identificator = PlayerStrategyIdentificator(strategy_players)
 295                 if valid_strategies == None:
 296                         valid_strategies = self.identificator.all_strategies
 297                 index=0
 298                 self.valid_strategy_index={}
 299                 for s in valid_strategies:
 300                         self.valid_strategy_index[s]= index
 301                         index += 1
 302         def __call__(self, player_name):
 303                 try:
 304                         player_strat = self.identificator(player_name)
 305                         player_strat_index = self.valid_strategy_index[player_strat]
 306                         return [ 1.0 if i == player_strat_index else -1.0 for i in xrange(len(self.valid_strategy_index)) ]
 307                 except KeyError:
 308                         return None
 309
 310 class PlanarOutputVectorGenerator(VectorGenerator):
 311         """Class that explicitly returns predefined output vectors for given players."""
 312         def __init__(self, player_vector):
 313                 self.player_vector = player_vector
 314                 self.players = player_vector.keys()
 315         def __call__(self, player_name):
 316                 try:
 317                         return self.player_vector[player_name]
 318                 except KeyError:
 319                         return None
 320
 321 if __name__ == '__main__':
 322         print >>sys.stderr, "This is just a library file..."