gostyle.py

   1 #!/usr/bin/python
   2 """
   3 This file contains several objects we use to process a pattern files for Go game.
   4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
   5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
   6
   7 ===== EXAMPLE PAT FILE =====
   8 4632 (border:3 s:5000003 s:6000049 s:700004a)
   9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
  10 ...
  11 ===== END =====
  12 """
  13 import re
  14 import sys
  15 import cPickle
  16 import random
  17 from itertools import izip,count
  18
  19 class Const:
  20         """Class used to hold global const variables, such as the pat file format."""
  21         pat_file_regexp = '^\s*(\d+)\s*(.+)$'
  22
  23 def print_vector(vector, where=sys.stdout):
  24         """Helper method for printing vector (list of floats)."""
  25         for x in vector:
  26                 print >> where, x,
  27         print >> where
  28
  29 def print_set_to_file( data, filename):
  30         """
  31         Helper method for printing datasets for neural network.
  32         FORMAT of the file
  33         number_of_pairs len_of_input_vector len_of_output_vector
  34         first_input_vector
  35         first_output_vector
  36         second_input_vector
  37         ...
  38         """
  39         def print_set( data, where):
  40                 print >> where,  len(data), len(data[0][0]), len(data[0][1])
  41                 for i,o in data:
  42                         print_vector(i, where)
  43                         print_vector(o, where)
  44         fout = open(filename, 'w')
  45         print_set(data, fout)
  46         fout.close()
  47
  48 def dump_object_to_file(object, filename):
  49         """Helper function to save an object to file using cPickle module."""
  50         f=open(filename,'w')
  51         cPickle.dump(object, f,-1)
  52         f.close()
  53
  54 def load_object_from_file(filename):
  55         """Helper function to recover an object from file using cPickle module."""
  56         f=open(filename,'r')
  57         object = cPickle.load(f)
  58         f.close()
  59         return object
  60
  61 class VectorGenerator(object):
  62         """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
  63         def __call__(self):
  64                 raise NotImplementedError
  65
  66 class VectorToVector(VectorGenerator):
  67         """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
  68         def __call__(self, vector):
  69                 raise NotImplementedError
  70
  71 ###
  72 ###             PCA Analysis
  73 ###  Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
  74 ###  fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
  75 try:
  76         from numpy import array
  77         import mdp
  78         class PCA(VectorToVector):
  79                 """
  80                 Object performing a PCA analysis on either a given vector (see `__call__' function),
  81                 or on a list of vectors (see `process_list_of_vectors' function).
  82                 """
  83                 def __init__(self, list_of_train_vectors, *args, **kwargs):
  84                         self.pca = mdp.nodes.PCANode(*args, **kwargs)
  85                         self.pca.train(array(list_of_train_vectors))
  86                         self.input_dim = self.pca.input_dim
  87                 def __call__(self, vector):
  88                         return list(self.pca(array([vector]))[0])
  89                 def process_list_of_vectors(self, list_of_vectors):
  90                         return [ list(vec) for vec in self.pca(array(list_of_vectors)) ]
  91                 def get_projection_info(self):
  92                         return self.pca.get_recmatrix()
  93
  94 except ImportError, e:
  95         print >>sys.stderr, "Warning: %s. PCA will not work."%(str(e))
  96
  97         class PCA(VectorToVector):
  98                 """Default dummy class for PCA, not very useless."""
  99                 def __init__(self, *args, **kwargs):
 100                         pass
 101                 def __call__(self, list_of_vectors):
 102                         return list_of_vectors
 103                 def process_list_of_vectors(self, list_of_vectors):
 104                         return list_of_vectors
 105
 106
 107 class Compose(VectorGenerator):
 108         """
 109         A class used as a composer of different objects, such as InputVectorGenerator and PCA.
 110         Use this if you want to e.g. generate PCA processed vectors.
 111         """
 112         def __init__(self, vector_generator, vector_to_vector):
 113                 if not isinstance(vector_generator, VectorGenerator):
 114                         raise TypeError
 115                 if not isinstance(vector_to_vector, VectorToVector):
 116                         raise TypeError
 117                 self.vector_generator = vector_generator
 118                 self.vector_to_vector = vector_to_vector
 119         #       if vector_generator.output_dim != vector_to_vector.input_dim:
 120         #               raise RuntimeError("Dimensions of Composed object mismatch.")
 121         def __call__(self, *args, **kwargs):
 122                 return self.vector_to_vector(self.vector_generator(*args, **kwargs))
 123
 124 class OccurenceVectorGenerator(VectorGenerator):
 125         """
 126         A class used to generate input vectors based on a relative number of occurences of some input patterns.
 127         The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
 128         """
 129         def generate_top_pattern_dict(self):
 130                 rexp=re.compile(Const.pat_file_regexp)
 131                 self.top_pattern_dict = {}
 132                 self.top_pattern_str = {}
 133                 i = 0
 134                 input_file = open(self.filename)
 135                 for line in input_file:
 136                         if i >= self.output_dim:
 137                                 break
 138                         s = rexp.match(line).group(2)
 139                         self.top_pattern_dict[s] = i
 140                         self.top_pattern_str[i] = s
 141                         i += 1
 142                 input_file.close()
 143
 144         def __init__(self, main_pat_file, num_features):
 145                 self.output_dim = num_features
 146                 self.filename = main_pat_file
 147                 self.generate_top_pattern_dict()
 148
 149         def __call__(self, pat_file):
 150                 vector = [0]*len(self.top_pattern_dict)
 151                 rexp=re.compile(Const.pat_file_regexp)
 152                 i = 0
 153                 input_file = open(pat_file)
 154                 for line in input_file:
 155                         match = rexp.match(line)
 156                         if not match:
 157                                 raise IOError("Wrong file format: " + pat_file)
 158                         if match.group(2) in self.top_pattern_dict:
 159                                 index=self.top_pattern_dict[match.group(2)]
 160                                 vector[index] += int(match.group(1))
 161                                 i += 1
 162                         if i >= len(self.top_pattern_dict):
 163                                 break
 164                 input_file.close()
 165                 if len(vector) != self.output_dim:
 166                         raise RuntimeError
 167                 return vector
 168
 169         def stringof(self, i):
 170                 return self.top_pattern_str[i]
 171
 172 class Rescale(VectorToVector):
 173         """Class that rescales vectors to a given interval!"""
 174         def __init__(self, a=-1.0, b=1.0):
 175                 if a > b:
 176                         raise RuntimeError
 177                 self.a = a
 178                 self.tot = b - a #abs(a) + abs(b)
 179         def __call__(self, vector):
 180                 if len(vector) == 0:
 181                         raise RuntimeError
 182                 to_zero = 0 - min(vector)
 183                 maximum = max(vector) + to_zero
 184                 return [ self.tot * (x + to_zero) / maximum + self.a for x in vector ]
 185
 186 class InputVectorGenerator(VectorGenerator):
 187         """
 188         First we generate an occurence vector by OccurenceVectorGenerator.
 189         Then, an input vector is generated as a relative number of occurences of the topmost patterns.
 190         The occurences are mapped so that the most frequently used
 191         one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
 192         """
 193         def __init__(self, *args, **kwargs):
 194                 self.ovg = OccurenceVectorGenerator(*args, **kwargs)
 195                 self.gen = Compose(self.ovg, Rescale(-1.0, 1.0))
 196         def __call__(self, *args, **kwargs):
 197                 return self.gen(*args, **kwargs)
 198         def ovg(self):
 199                 return self.ovg
 200
 201 def linear_combination(list_of_vectors, coefs):
 202         if len(list_of_vectors) != len(coefs):
 203                 raise Exception("len(list_of_vectors) != len(coefs)")
 204         if len(list_of_vectors) == 0:
 205                 return
 206         len_vec=len(list_of_vectors[0])
 207         res_vec=[0]*len_vec
 208         for p in xrange(len(list_of_vectors)):
 209                 for i in xrange(len_vec):
 210                         res_vec[i] += coefs[p] * list_of_vectors[p][i]
 211         return res_vec
 212
 213 def get_random_norm_coefs( num ):
 214         coefs=[]
 215         rnd_nums= [ random.random() for i in xrange(num-1) ] + [1]
 216         rnd_nums.sort()
 217         first=0
 218         for next in rnd_nums:
 219                 coefs.append(next-first)
 220                 first=next
 221         return coefs
 222
 223 class Combinator(object):
 224         def __init__(self, num_lincombs = 1, skip_subset_len = [0], max_len = 2):
 225                 self.num_lincombs = num_lincombs
 226                 self.skip_subset_len = skip_subset_len
 227                 self.max_len = max_len
 228         def get_subsets(self, set):
 229                 if len(set) == 0:
 230                         return [[]]
 231                 sub=self.get_subsets(set[1:])
 232                 return sub + filter( lambda x : ( self.max_len==0 or len(x)<=self.max_len ), [ set[:1]+subset  for subset in sub] )
 233         def combine(self, data):
 234                 combinations = []
 235                 for subset in self.get_subsets(range(len(data))):
 236                         if len(subset) in self.skip_subset_len:
 237                                 continue
 238                         input_vectors = [ data[index][0] for index in subset ]
 239                         output_vectors = [ data[index][1] for index in subset ]
 240                         for i in xrange(self.num_lincombs):
 241                                 coefs = get_random_norm_coefs(len(subset))
 242                                 combinations += [(linear_combination(input_vectors, coefs), linear_combination(output_vectors, coefs))]
 243                 return combinations
 244
 245 class PlayerStrategyIdentificator(object):
 246         """Object holding information about default strategies for players."""
 247         def __init__(self, strategy_players):
 248                 self.strategy_players = strategy_players
 249
 250                 self.player_strategy={}
 251                 self.all_players = []
 252                 self.all_strategies = []
 253                 for strategy, players in self.strategy_players.items():
 254                         self.all_strategies += [strategy]
 255                         for player in players:
 256                                 self.all_players += [player]
 257                                 self.player_strategy[player] = strategy
 258
 259         def __call__(self, player_name):
 260                 return self.player_strategy[player_name]
 261
 262 class StrategyOutputVectorGenerator(VectorGenerator):
 263         """
 264         This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
 265         It is initialized with a list of strategies `valid_strategies' it shall take into acount.
 266         When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
 267         that corresponds to the strategy like this.
 268         """
 269         def __init__(self, strategy_players, valid_strategies=None):
 270                 self.identificator = PlayerStrategyIdentificator(strategy_players)
 271                 if valid_strategies == None:
 272                         valid_strategies = self.identificator.all_strategies
 273                 index=0
 274                 self.valid_strategy_index={}
 275                 for s in valid_strategies:
 276                         self.valid_strategy_index[s]= index
 277                         index += 1
 278         def __call__(self, player_name):
 279                 try:
 280                         player_strat = self.identificator(player_name)
 281                         player_strat_index = self.valid_strategy_index[player_strat]
 282                         return [ 1.0 if i == player_strat_index else -1.0 for i in xrange(len(self.valid_strategy_index)) ]
 283                 except KeyError:
 284                         return None
 285
 286 class PlanarOutputVectorGenerator(VectorGenerator):
 287         """Class that explicitly returns predefined output vectors for given players."""
 288         def __init__(self, player_vector):
 289                 self.player_vector = player_vector
 290                 self.players = player_vector.keys()
 291         def __call__(self, player_name):
 292                 try:
 293                         return self.player_vector[player_name]
 294                 except KeyError:
 295                         return None
 296
 297 if __name__ == '__main__':
 298         print >>sys.stderr, "This is just a library file..."