make_train_set_strength.py

   1 #!/usr/bin/python
   2 import sys
   3 from gostyle import print_vector, print_set_to_file, dump_object_to_file, load_object_from_file
   4 from gostyle import InputVectorGenerator, PlanarOutputVectorGenerator, StrategyOutputVectorGenerator, PlayerStrategyIdentificator
   5 from gostyle import Compose, PCA, Combinator, Rescale
   6
   7 from data_about_players import Data
   8
   9
  10 if __name__ == '__main__':
  11         pickle_filename = 'input_gen.pickle'
  12         train_set_filename = 'train_set.data'
  13         root_dir = '../pdb-gtl/'
  14         main_pat_filename = root_dir + 'all.pat'
  15         player_vector = Data.strength_linear_vector
  16         num_features = 400
  17
  18         ### Objects creating input vector when called
  19         print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename
  20         i = InputVectorGenerator(main_pat_filename, num_features)
  21         ### Objects creating output vector when called
  22         o = PlanarOutputVectorGenerator(player_vector)
  23
  24         def list_all():
  25                 import os, random, shutil
  26                 #raw = root_dir + 'testpat_files'
  27                 raw = root_dir + 'rawpat_files_merged'
  28                 ranks = os.listdir(raw)
  29                 for rank in ranks:
  30                         plays = os.listdir(raw + '/'+ rank)
  31                         for play in plays:
  32                                 yield (rank, raw + '/' + rank + '/' + play)
  33
  34
  35         # Create list of input vectors
  36         input_vectors = []
  37         output_vectors = []
  38         for rank,filename in list_all():
  39                 try:
  40                         ii = i(filename)
  41                         oo = o(rank)
  42                         input_vectors += [ ii ]
  43                         output_vectors += [ oo ]
  44                 except:
  45                         continue
  46
  47         print len(input_vectors)
  48
  49         ### PCA example usage
  50         # Change this to False, if you do not want to use PCA
  51         use_pca = True
  52         if use_pca:
  53                 # Create PCA object, trained on input_vectors
  54                 print >>sys.stderr, "Running PCA."
  55                 pca = PCA(input_vectors, reduce=True)
  56                 # Perform a PCA on input vectors
  57                 input_vectors = pca.process_list_of_vectors(input_vectors)
  58                 # Creates a Composed object that first generates an input vector
  59                 # and then performs a PCA analysis on it.
  60                 i = Compose(i, pca)
  61
  62         ### We now save the InputGenerator to a file for later use.
  63         # This is especially feasible when we use the PCA, since thus we may use
  64         # the once trained object again.
  65         print "Saving the input generator object to file:", pickle_filename
  66         dump_object_to_file(i, pickle_filename)
  67
  68         #r = Rescale(-1.0,1.0)
  69         # Create list of output vectors
  70         # Create list of pairs: [ (input_vec_1, output_vec_1), (input_vec_2, output_vec_2), ... ]
  71         data = zip(input_vectors, output_vectors)
  72         # And filter out players with no output vector
  73         # (since PlanarOutputVectorGenerator returns None if the player does not have defined output vector)
  74         # Note: This would not be needed if we set `players = PlanarOutputVectorGenerator.players' in the beggining
  75         data = filter(lambda x: x[1] != None, data)
  76
  77         if len(data) == 0:
  78                 print >>sys.stderr, "No data."
  79                 sys.exit()
  80
  81         ### We can enlarge the data set by adding linear combinations of input and output vectors
  82         use_lin_combinations = False
  83         if use_lin_combinations:
  84                 data += Combinator().combine(data)
  85
  86         # Save the result
  87         # Create the neural network train set
  88         print >>sys.stderr, "Saving neural network train set to file:", train_set_filename
  89         print_set_to_file(data,train_set_filename)
  90         print >>sys.stderr, "Printed %d pairs of dimensions (%d,%d)."%(len(data), len(data[0][0]), len(data[0][1]))
  91
  92         #for i,o in data:
  93         #       print_vector(i)
  94         #       print_vector(o)