make_train_set.py

   1 #!/usr/bin/python
   2 import sys
   3 from gostyle import print_vector, print_set_to_file, dump_object_to_file, load_object_from_file
   4 from gostyle import InputVectorGenerator, PlanarOutputVectorGenerator, StrategyOutputVectorGenerator, PlayerStrategyIdentificator
   5 from gostyle import Compose, PCA, Combinator, Rescale
   6
   7 from data_about_players import Data
   8
   9 if __name__ == '__main__':
  10         pickle_filename = 'input_gen.pickle'
  11         train_set_filename = 'train_set.data'
  12         main_pat_filename = Data.main_pat_filename
  13         player_vector = Data.strength_linear_vector
  14         num_features = 400
  15
  16         ### Objects creating input vector when called
  17         print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename
  18         i = InputVectorGenerator(main_pat_filename, num_features)
  19
  20         ### Objects creating output vector when called
  21         o = PlanarOutputVectorGenerator(player_vector)
  22         ### List of players
  23         players_ignore = [ ] #"Yi Ch'ang-ho 2004-" ]#, "Fujisawa Hideyuki","Yuki Satoshi", "Otake Hideo", "Yi Ch'ang-ho 2005+","Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
  24         players = [ p for p in player_vector.keys() if p not in players_ignore ]
  25
  26         #o = StrategyOutputVectorGenerator(Data.strategy_players, valid_strategies=["moyo","territorial"])
  27         #o = StrategyOutputVectorGenerator(Data.strategy_players)
  28         #players = Data.players_all
  29
  30         # Create list of input vectors
  31         input_vectors = []
  32         for name in players:
  33                 input_vectors += [i(Data.pat_files_folder + name)]
  34
  35         if len(input_vectors) == 0:
  36                 print >>sys.stderr, "No input vectors."
  37                 sys.exit()
  38
  39         ### PCA example usage
  40         # Change this to False, if you do not want to use PCA
  41         use_pca = True
  42         if use_pca:
  43                 # Create PCA object, trained on input_vectors
  44                 print >>sys.stderr, "Running PCA."
  45                 pca = PCA(input_vectors, reduce=True)
  46                 # Perform a PCA on input vectors
  47                 input_vectors = pca.process_list_of_vectors(input_vectors)
  48                 # Creates a Composed object that first generates an input vector
  49                 # and then performs a PCA analysis on it.
  50                 i = Compose(i, pca)
  51
  52         ### We now save the InputGenerator to a file for later use.
  53         # This is especially feasible when we use the PCA, since thus we may use
  54         # the once trained object again.
  55         print "Saving the input generator object to file:", pickle_filename
  56         dump_object_to_file(i, pickle_filename)
  57
  58         #r = Rescale(-1.0,1.0)
  59         # Create list of output vectors
  60         output_vectors = [ o(name) for name in players ]
  61         # Create list of pairs: [ (input_vec_1, output_vec_1), (input_vec_2, output_vec_2), ... ]
  62         data = zip(input_vectors, output_vectors)
  63         # And filter out players with no output vector
  64         # (since PlanarOutputVectorGenerator returns None if the player does not have defined output vector)
  65         # Note: This would not be needed if we set `players = PlanarOutputVectorGenerator.players' in the beggining
  66         data = filter(lambda x: x[1] != None, data)
  67
  68         if len(data) == 0:
  69                 print >>sys.stderr, "No data."
  70                 sys.exit()
  71
  72         ### We can enlarge the data set by adding linear combinations of input and output vectors
  73         use_lin_combinations = False
  74         if use_lin_combinations:
  75                 data += Combinator().combine(data)
  76
  77         # Save the result
  78         # Create the neural network train set
  79         print >>sys.stderr, "Saving neural network train set to file:", train_set_filename
  80         print_set_to_file(data,train_set_filename)
  81         print >>sys.stderr, "Printed %d pairs of dimensions (%d,%d)."%(len(data), len(data[0][0]), len(data[0][1]))
  82
  83         #for i,o in data:
  84         #       print_vector(i)
  85         #       print_vector(o)