tex: Move expert-base knowledge info to latter sections
[gostyle.git] / make_train_set.py
blobe0679d359516db372ed9a20c3757d2dea8e06f04
1 #!/usr/bin/python
2 import sys
3 from gostyle import print_vector, print_set_to_file, dump_object_to_file, load_object_from_file
4 from gostyle import InputVectorGenerator, PlanarOutputVectorGenerator, StrategyOutputVectorGenerator, PlayerStrategyIdentificator
5 from gostyle import Compose, PCA, Combinator, Rescale
7 from data_about_players import Data
9 if __name__ == '__main__':
10 pickle_filename = 'input_gen.pickle'
11 train_set_filename = 'train_set.data'
12 main_pat_filename = Data.main_pat_filename
13 player_vector = Data.strength_linear_vector
14 num_features = 400
16 ### Objects creating input vector when called
17 print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename
18 i = InputVectorGenerator(main_pat_filename, num_features)
20 ### Objects creating output vector when called
21 o = PlanarOutputVectorGenerator(player_vector)
22 ### List of players
23 players_ignore = [ ] #"Yi Ch'ang-ho 2004-" ]#, "Fujisawa Hideyuki","Yuki Satoshi", "Otake Hideo", "Yi Ch'ang-ho 2005+","Takao Shinji","Hane Naoki","Kobayashi Koichi" ]
24 players = [ p for p in player_vector.keys() if p not in players_ignore ]
26 #o = StrategyOutputVectorGenerator(Data.strategy_players, valid_strategies=["moyo","territorial"])
27 #o = StrategyOutputVectorGenerator(Data.strategy_players)
28 #players = Data.players_all
30 # Create list of input vectors
31 input_vectors = []
32 for name in players:
33 input_vectors += [i(Data.pat_files_folder + name)]
35 if len(input_vectors) == 0:
36 print >>sys.stderr, "No input vectors."
37 sys.exit()
39 ### PCA example usage
40 # Change this to False, if you do not want to use PCA
41 use_pca = True
42 if use_pca:
43 # Create PCA object, trained on input_vectors
44 print >>sys.stderr, "Running PCA."
45 pca = PCA(input_vectors, reduce=True)
46 # Perform a PCA on input vectors
47 input_vectors = pca.process_list_of_vectors(input_vectors)
48 # Creates a Composed object that first generates an input vector
49 # and then performs a PCA analysis on it.
50 i = Compose(i, pca)
52 ### We now save the InputGenerator to a file for later use.
53 # This is especially feasible when we use the PCA, since thus we may use
54 # the once trained object again.
55 print "Saving the input generator object to file:", pickle_filename
56 dump_object_to_file(i, pickle_filename)
58 #r = Rescale(-1.0,1.0)
59 # Create list of output vectors
60 output_vectors = [ o(name) for name in players ]
61 # Create list of pairs: [ (input_vec_1, output_vec_1), (input_vec_2, output_vec_2), ... ]
62 data = zip(input_vectors, output_vectors)
63 # And filter out players with no output vector
64 # (since PlanarOutputVectorGenerator returns None if the player does not have defined output vector)
65 # Note: This would not be needed if we set `players = PlanarOutputVectorGenerator.players' in the beggining
66 data = filter(lambda x: x[1] != None, data)
68 if len(data) == 0:
69 print >>sys.stderr, "No data."
70 sys.exit()
72 ### We can enlarge the data set by adding linear combinations of input and output vectors
73 use_lin_combinations = False
74 if use_lin_combinations:
75 data += Combinator().combine(data)
77 # Save the result
78 # Create the neural network train set
79 print >>sys.stderr, "Saving neural network train set to file:", train_set_filename
80 print_set_to_file(data,train_set_filename)
81 print >>sys.stderr, "Printed %d pairs of dimensions (%d,%d)."%(len(data), len(data[0][0]), len(data[0][1]))
83 #for i,o in data:
84 # print_vector(i)
85 # print_vector(o)