gostyle.py

   1 #!/usr/bin/python
   2 """
   3 This file contains several objects we use to process a pattern files for Go game.
   4 We use it to generate input sets for a neural network (libfann), or if we want to perform a PCA analysis to data.
   5 It generates input vectors from pattern files. See `Const.pat_file_regexp' for file format in regexp.
   6
   7 ===== EXAMPLE PAT FILE =====
   8 4632 (border:3 s:5000003 s:6000049 s:700004a)
   9 3497 (atariescape:0 border:0 ldist:4 lldist:2 s:30011a1)
  10 ...
  11 ===== END =====
  12 """
  13 import re
  14 import sys
  15 import cPickle
  16 import random
  17 import subprocess
  18 import os
  19 from math import sqrt
  20 from itertools import izip,count
  21
  22 class Const:
  23         """Class used to hold global const variables, such as the pat file format."""
  24         pat_file_regexp = '^\s*(\d+)\s*(.+)$'
  25
  26 def print_vector(vector, where=sys.stdout):
  27         """Helper method for printing vector (list of floats)."""
  28         for x in vector:
  29                 print >> where, x,
  30         print >> where
  31
  32 def print_set_to_file( data, filename):
  33         """
  34         Helper method for printing datasets for neural network.
  35         FORMAT of the file
  36         number_of_pairs len_of_input_vector len_of_output_vector
  37         first_input_vector
  38         first_output_vector
  39         second_input_vector
  40         ...
  41         """
  42         def print_set( data, where):
  43                 print >> where,  len(data), len(data[0][0]), len(data[0][1])
  44                 for i,o in data:
  45                         print_vector(i, where)
  46                         print_vector(o, where)
  47         fout = open(filename, 'w')
  48         print_set(data, fout)
  49         fout.close()
  50
  51 def dump_object_to_file(object, filename):
  52         """Helper function to save an object to file using cPickle module."""
  53         f=open(filename,'w')
  54         cPickle.dump(object, f,-1)
  55         f.close()
  56
  57 def load_object_from_file(filename):
  58         """Helper function to recover an object from file using cPickle module."""
  59         f=open(filename,'r')
  60         object = cPickle.load(f)
  61         f.close()
  62         return object
  63
  64 class VectorGenerator(object):
  65         """Abstract class. When called, returns a vector (list of floats). Has output_dim param."""
  66         def __call__(self):
  67                 raise NotImplementedError
  68
  69 class VectorToVector(VectorGenerator):
  70         """Abstract class. When called with a vector (list of floats), returns a vector. Has input_dim and output_dim params."""
  71         def __call__(self, vector):
  72                 raise NotImplementedError
  73
  74 ###
  75 ###             PCA Analysis
  76 ###  Note that you must have `numpy' and `mdp' python modules, otherwise the PCA will
  77 ###  fallback to doing nothing at all (but doing it completely compatibly with the rest of the code :-).
  78 try:
  79         from numpy import array
  80         import mdp
  81         class PCA(VectorToVector):
  82                 """
  83                 Object performing a PCA analysis on either a given vector (see `__call__' function),
  84                 or on a list of vectors (see `process_list_of_vectors' function).
  85                 """
  86                 def __init__(self, list_of_train_vectors, *args, **kwargs):
  87                         self.pca = mdp.nodes.PCANode(*args, **kwargs)
  88                         self.pca.train(array(list_of_train_vectors))
  89                         self.pca.stop_training()
  90                         self.input_dim = self.pca.input_dim
  91                 def __call__(self, vector):
  92                         return list(self.pca(array([vector]))[0])
  93                 def process_list_of_vectors(self, list_of_vectors):
  94                         return [ list(vec) for vec in self.pca(array(list_of_vectors)) ]
  95                 def get_projection_info(self):
  96                         return self.pca.get_recmatrix()
  97                 def get_eigenvalues(self):
  98                         return self.pca.d
  99                 def get_eigenvectors(self):
 100                         return list(self.pca.v)
 101
 102 except ImportError, e:
 103         print >>sys.stderr, "Warning: %s. PCA will not work."%(str(e))
 104
 105         class PCA(VectorToVector):
 106                 """Default dummy class for PCA, not very useless."""
 107                 def __init__(self, *args, **kwargs):
 108                         pass
 109                 def __call__(self, list_of_vectors):
 110                         return list_of_vectors
 111                 def process_list_of_vectors(self, list_of_vectors):
 112                         return list_of_vectors
 113
 114
 115 class Compose(VectorGenerator):
 116         """
 117         A class used as a composer of different objects, such as InputVectorGenerator and PCA.
 118         Use this if you want to e.g. generate PCA processed vectors.
 119         """
 120         def __init__(self, vector_generator, vector_to_vector):
 121                 if not isinstance(vector_generator, VectorGenerator):
 122                         raise TypeError
 123                 if not isinstance(vector_to_vector, VectorToVector):
 124                         raise TypeError
 125                 self.vector_generator = vector_generator
 126                 self.vector_to_vector = vector_to_vector
 127         #       if vector_generator.output_dim != vector_to_vector.input_dim:
 128         #               raise RuntimeError("Dimensions of Composed object mismatch.")
 129         def __call__(self, *args, **kwargs):
 130                 return self.vector_to_vector(self.vector_generator(*args, **kwargs))
 131
 132 class OccurenceVectorGenerator(VectorGenerator):
 133         """
 134         A class used to generate input vectors based on a relative number of occurences of some input patterns.
 135         The object is initialized with a file of patterns. It takes the topmost `num_features' patterns.
 136         """
 137         def generate_top_pattern_dict(self):
 138                 rexp=re.compile(Const.pat_file_regexp)
 139                 self.top_pattern_dict = {}
 140                 self.top_pattern_str = {}
 141                 i = 0
 142                 input_file = open(self.filename)
 143                 for line in input_file:
 144                         if i >= self.output_dim:
 145                                 break
 146                         s = rexp.match(line).group(2)
 147                         self.top_pattern_dict[s] = i
 148                         self.top_pattern_str[i] = s
 149                         i += 1
 150                 input_file.close()
 151
 152         def __init__(self, main_pat_file, num_features):
 153                 self.output_dim = num_features
 154                 self.filename = main_pat_file
 155                 self.generate_top_pattern_dict()
 156
 157         def __call__(self, pat_file):
 158                 vector = [0]*len(self.top_pattern_dict)
 159                 rexp=re.compile(Const.pat_file_regexp)
 160                 i = 0
 161                 input_file = open(pat_file)
 162                 for line in input_file:
 163                         match = rexp.match(line)
 164                         if not match:
 165                                 raise IOError("Wrong file format: " + pat_file)
 166                         if match.group(2) in self.top_pattern_dict:
 167                                 index=self.top_pattern_dict[match.group(2)]
 168                                 vector[index] += int(match.group(1))
 169                                 i += 1
 170                         if i >= len(self.top_pattern_dict):
 171                                 break
 172                 input_file.close()
 173                 if len(vector) != self.output_dim:
 174                         raise RuntimeError
 175                 return vector
 176
 177         def stringof(self, i):
 178                 return self.top_pattern_str[i]
 179
 180 class Rescale(VectorToVector):
 181         """Class that rescales vectors to a given interval!"""
 182         def __init__(self, a=-1.0, b=1.0):
 183                 if a >= b:
 184                         raise RuntimeError("a must be < b")
 185                 self.a = a
 186                 self.avg = (a + b) * 0.5
 187                 self.tot = b - a
 188         def norm(self, x):
 189                 return x
 190         def renorm(self, x):
 191                 return x
 192         def __call__(self, vector):
 193                 if len(vector) == 0:
 194                         raise RuntimeError
 195                 to_zero = 0 - self.norm(min(vector))
 196                 maximum = self.norm(max(vector)) + to_zero
 197                 if maximum == 0:
 198                         return [ self.avg  for _ in vector ]
 199                 return [ self.tot * self.renorm(float(self.norm(x) + to_zero) / maximum) + self.a for x in vector ]
 200
 201 import math;
 202 class LogRescale(Rescale):
 203         def norm(self, x):
 204                 return math.log(x+1)
 205         def renorm(self, x):
 206                 return 1 / (1 + math.exp(-6*(x-0.5)));
 207
 208 class InputVectorGenerator(VectorGenerator):
 209         """
 210         First we generate an occurence vector by OccurenceVectorGenerator.
 211         Then, an input vector is generated as a relative number of occurences of the topmost patterns.
 212         The occurences are mapped so that the most frequently used
 213         one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function.
 214         """
 215         def __init__(self, *args, **kwargs):
 216                 try:
 217                         rescale = kwargs['rescale']
 218                         del kwargs['rescale']
 219                 except KeyError:
 220                         rescale = Rescale
 221                 self.ovg = OccurenceVectorGenerator(*args, **kwargs)
 222                 self.gen = Compose(self.ovg, rescale(-1.0, 1.0))
 223         def __call__(self, *args, **kwargs):
 224                 return self.gen(*args, **kwargs)
 225         def ovg(self):
 226                 return self.ovg
 227
 228 def linear_combination(list_of_vectors, coefs):
 229         if len(list_of_vectors) != len(coefs):
 230                 raise Exception("len(list_of_vectors) != len(coefs)")
 231         if len(list_of_vectors) == 0:
 232                 return
 233         len_vec=len(list_of_vectors[0])
 234         res_vec=[0]*len_vec
 235         for p in xrange(len(list_of_vectors)):
 236                 for i in xrange(len_vec):
 237                         res_vec[i] += coefs[p] * list_of_vectors[p][i]
 238         return res_vec
 239
 240 def get_random_norm_coefs( num ):
 241         coefs=[]
 242         rnd_nums= [ random.random() for i in xrange(num-1) ] + [1]
 243         rnd_nums.sort()
 244         first=0
 245         for next in rnd_nums:
 246                 coefs.append(next-first)
 247                 first=next
 248         return coefs
 249
 250 class Combinator(object):
 251         def __init__(self, num_lincombs = 1, skip_subset_len = [0], max_len = 2):
 252                 self.num_lincombs = num_lincombs
 253                 self.skip_subset_len = skip_subset_len
 254                 self.max_len = max_len
 255         def get_subsets(self, set):
 256                 if len(set) == 0:
 257                         return [[]]
 258                 sub=self.get_subsets(set[1:])
 259                 return sub + filter( lambda x : ( self.max_len==0 or len(x)<=self.max_len ), [ set[:1]+subset  for subset in sub] )
 260         def combine(self, data):
 261                 combinations = []
 262                 for subset in self.get_subsets(range(len(data))):
 263                         if len(subset) in self.skip_subset_len:
 264                                 continue
 265                         input_vectors = [ data[index][0] for index in subset ]
 266                         output_vectors = [ data[index][1] for index in subset ]
 267                         for i in xrange(self.num_lincombs):
 268                                 coefs = get_random_norm_coefs(len(subset))
 269                                 combinations += [(linear_combination(input_vectors, coefs), linear_combination(output_vectors, coefs))]
 270                 return combinations
 271
 272 class PlayerStrategyIdentificator(object):
 273         """Object holding information about default strategies for players."""
 274         def __init__(self, strategy_players):
 275                 self.strategy_players = strategy_players
 276
 277                 self.player_strategy={}
 278                 self.all_players = []
 279                 self.all_strategies = []
 280                 for strategy, players in self.strategy_players.items():
 281                         self.all_strategies += [strategy]
 282                         for player in players:
 283                                 self.all_players += [player]
 284                                 self.player_strategy[player] = strategy
 285
 286         def __call__(self, player_name):
 287                 return self.player_strategy[player_name]
 288
 289 class StrategyOutputVectorGenerator(VectorGenerator):
 290         """
 291         This object generates output vectors for players with strategies specified in `PlayerStrategyIdentificator' object.
 292         It is initialized with a list of strategies `valid_strategies' it shall take into acount.
 293         When called (see `__call__') with a name of a player with a strategy from `valid_strategies' it returns a vector
 294         that corresponds to the strategy like this.
 295         """
 296         def __init__(self, strategy_players, valid_strategies=None):
 297                 self.identificator = PlayerStrategyIdentificator(strategy_players)
 298                 if valid_strategies == None:
 299                         valid_strategies = self.identificator.all_strategies
 300                 index=0
 301                 self.valid_strategy_index={}
 302                 for s in valid_strategies:
 303                         self.valid_strategy_index[s]= index
 304                         index += 1
 305         def __call__(self, player_name):
 306                 try:
 307                         player_strat = self.identificator(player_name)
 308                         player_strat_index = self.valid_strategy_index[player_strat]
 309                         return [ 1.0 if i == player_strat_index else -1.0 for i in xrange(len(self.valid_strategy_index)) ]
 310                 except KeyError:
 311                         return None
 312
 313 class PlanarOutputVectorGenerator(VectorGenerator):
 314         """Class that explicitly returns predefined output vectors for given players."""
 315         def __init__(self, player_vector):
 316                 self.player_vector = player_vector
 317                 self.players = player_vector.keys()
 318         def __call__(self, player_name):
 319                 try:
 320                         return self.player_vector[player_name]
 321                 except KeyError:
 322                         return None
 323
 324 class KNNOutputVectorGenerator(VectorToVector):
 325         """ k-NearestNeighbour output vector generator."""
 326         def __init__(self, ref_dict, k=5, weight_param=0.8, dist_mult=10):
 327                 """
 328                         ref_dict is a dictionary of refence input/output vectors.
 329                         e.g. ref_dict= { (1.0,2.0):(9.0,16.0,21.0)
 330                 """
 331                 self.ref_dict = ref_dict
 332                 self.k = k
 333                 self.weigth_param = weight_param
 334                 self.dist_mult = dist_mult
 335         def __call__(self, player_vector):
 336                 distance=[]
 337                 for ref_vec in self.ref_dict.keys():
 338                         distance.append((self.distance(ref_vec, player_vector), ref_vec))
 339                 distance.sort()
 340
 341                 #for p,v in distance:
 342                 #       print "%2.3f"%(float(p),),
 343                 #print
 344                 ref_output_vecs = [ self.ref_dict[b] for a,b in distance[:self.k] ]
 345                 coefs = [ self.weight_fc(a) for a,b in distance[:self.k] ]
 346
 347                 return linear_combination(ref_output_vecs, coefs)
 348         def weight_fc(self, distance):
 349                 return self.weigth_param ** (distance)
 350         def distance(self, vec1, vec2):
 351                 if len(vec1) != len(vec2):
 352                         raise RuntimeError("Dimensions of vectors mismatch.")
 353                 ### the 10* multiplicative constant is empirically determined for correct scaling
 354                 return self.dist_mult * sqrt(sum([ (float(a) - float(b))**2 for a,b in zip(vec1,vec2)]))
 355
 356 class NeuralNet(VectorToVector):
 357         """A class encapsulating the Neural Network as a classifier."""
 358         def __init__( self, train_set_filename, layers=3, neurons=10, desired_error=0.0001, max_epochs=1000, activation='sigmoid' ):
 359                 self.null = open('/dev/null','w')
 360                 self.netfile = '/tmp/gonet.net'
 361
 362                 args  = ["./gnet/gnet_train", "-s", "-l", str(layers), "-n", str(neurons) ]
 363                 args += ["-e", str(desired_error), "-p", str(max_epochs) + "-a", activation, "-o", self.netfile, "./"+ train_set_filename]
 364                 ret = subprocess.call(args,stdout=self.null)
 365                 if ret:
 366                         raise RuntimeError("Could not train the neural network.")
 367
 368                 args = [ "./gnet/gnet_run", "-s", self.netfile ]
 369                 self.p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
 370                 if self.p.poll() > 0:
 371                         raise RuntimeError("Could not run the neural network.")
 372
 373         def __call__(self, vector):
 374                 self.p.stdin.write(' '.join([str(a) for a in vector]) + '\n')
 375                 a = self.p.stdout.readline()
 376                 return [ float(num) for num in a.split()]
 377
 378         def close(self):
 379                 self.p.stdin.close()
 380                 self.p.stdout.close()
 381                 self.null.close()
 382                 os.remove(self.netfile)
 383
 384
 385 if __name__ == '__main__':
 386         print >>sys.stderr, "This is just a library file..."