gostyle_old, commit old
[gostyle.git] / make_train_set_strength.py
blobd647f3d136e34158e1d6e020151e057d94d3d5f3
1 #!/usr/bin/python
2 import sys
3 from gostyle import print_vector, print_set_to_file, dump_object_to_file, load_object_from_file
4 from gostyle import InputVectorGenerator, PlanarOutputVectorGenerator, StrategyOutputVectorGenerator, PlayerStrategyIdentificator
5 from gostyle import Compose, PCA, Combinator, Rescale
7 from data_about_players import Data
10 if __name__ == '__main__':
11 pickle_filename = 'input_gen.pickle'
12 train_set_filename = 'train_set.data'
13 root_dir = '../pdb-gtl/'
14 main_pat_filename = root_dir + 'all.pat'
15 player_vector = Data.strength_linear_vector
16 num_features = 400
18 ### Objects creating input vector when called
19 print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename
20 i = InputVectorGenerator(main_pat_filename, num_features)
21 ### Objects creating output vector when called
22 o = PlanarOutputVectorGenerator(player_vector)
24 def list_all():
25 import os, random, shutil
26 #raw = root_dir + 'testpat_files'
27 raw = root_dir + 'rawpat_files_merged'
28 ranks = os.listdir(raw)
29 for rank in ranks:
30 plays = os.listdir(raw + '/'+ rank)
31 for play in plays:
32 yield (rank, raw + '/' + rank + '/' + play)
35 # Create list of input vectors
36 input_vectors = []
37 output_vectors = []
38 for rank,filename in list_all():
39 try:
40 ii = i(filename)
41 oo = o(rank)
42 input_vectors += [ ii ]
43 output_vectors += [ oo ]
44 except:
45 continue
47 print len(input_vectors)
49 ### PCA example usage
50 # Change this to False, if you do not want to use PCA
51 use_pca = True
52 if use_pca:
53 # Create PCA object, trained on input_vectors
54 print >>sys.stderr, "Running PCA."
55 pca = PCA(input_vectors, reduce=True)
56 # Perform a PCA on input vectors
57 input_vectors = pca.process_list_of_vectors(input_vectors)
58 # Creates a Composed object that first generates an input vector
59 # and then performs a PCA analysis on it.
60 i = Compose(i, pca)
62 ### We now save the InputGenerator to a file for later use.
63 # This is especially feasible when we use the PCA, since thus we may use
64 # the once trained object again.
65 print "Saving the input generator object to file:", pickle_filename
66 dump_object_to_file(i, pickle_filename)
68 #r = Rescale(-1.0,1.0)
69 # Create list of output vectors
70 # Create list of pairs: [ (input_vec_1, output_vec_1), (input_vec_2, output_vec_2), ... ]
71 data = zip(input_vectors, output_vectors)
72 # And filter out players with no output vector
73 # (since PlanarOutputVectorGenerator returns None if the player does not have defined output vector)
74 # Note: This would not be needed if we set `players = PlanarOutputVectorGenerator.players' in the beggining
75 data = filter(lambda x: x[1] != None, data)
77 if len(data) == 0:
78 print >>sys.stderr, "No data."
79 sys.exit()
81 ### We can enlarge the data set by adding linear combinations of input and output vectors
82 use_lin_combinations = False
83 if use_lin_combinations:
84 data += Combinator().combine(data)
86 # Save the result
87 # Create the neural network train set
88 print >>sys.stderr, "Saving neural network train set to file:", train_set_filename
89 print_set_to_file(data,train_set_filename)
90 print >>sys.stderr, "Printed %d pairs of dimensions (%d,%d)."%(len(data), len(data[0][0]), len(data[0][1]))
92 #for i,o in data:
93 # print_vector(i)
94 # print_vector(o)