From d5df0b5fc40470cc75731572f98a8338381f8d3b Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Tue, 2 Mar 2010 09:38:00 +0100 Subject: [PATCH] PCA: Print also projection information on the output That is, mapping from PCA vectors to input vector patterns --- gostyle.py | 15 +++++++++++++-- pca.py | 13 ++++++++++--- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/gostyle.py b/gostyle.py index c21f2d5..55b102f 100755 --- a/gostyle.py +++ b/gostyle.py @@ -88,6 +88,8 @@ try: return list(self.pca(array([vector]))[0]) def process_list_of_vectors(self, list_of_vectors): return [ list(vec) for vec in self.pca(array(list_of_vectors)) ] + def get_projection_info(self): + return self.pca.get_recmatrix() except ImportError, e: print >>sys.stderr, "Warning: %s. PCA will not work."%(str(e)) @@ -127,12 +129,15 @@ class OccurenceVectorGenerator(VectorGenerator): def generate_top_pattern_dict(self): rexp=re.compile(Const.pat_file_regexp) self.top_pattern_dict = {} + self.top_pattern_str = {} i = 0 input_file = open(self.filename) for line in input_file: if i >= self.output_dim: break - self.top_pattern_dict[rexp.match(line).group(2)] = i + s = rexp.match(line).group(2) + self.top_pattern_dict[s] = i + self.top_pattern_str[i] = s i += 1 input_file.close() @@ -161,6 +166,9 @@ class OccurenceVectorGenerator(VectorGenerator): raise RuntimeError return vector + def stringof(self, i): + return self.top_pattern_str[i] + class Rescale(VectorToVector): """Class that rescales vectors to a given interval!""" def __init__(self, a=-1.0, b=1.0): @@ -183,9 +191,12 @@ class InputVectorGenerator(VectorGenerator): one is mapped to 1.0 and the rest is mapped relatively on the scale (1.0,-1.0). See `__call__' function. """ def __init__(self, *args, **kwargs): - self.gen = Compose(OccurenceVectorGenerator(*args, **kwargs), Rescale(-1.0, 1.0)) + self.ovg = OccurenceVectorGenerator(*args, **kwargs) + self.gen = Compose(self.ovg, Rescale(-1.0, 1.0)) def __call__(self, *args, **kwargs): return self.gen(*args, **kwargs) + def ovg(self): + return self.ovg def linear_combination(list_of_vectors, coefs): if len(list_of_vectors) != len(coefs): diff --git a/pca.py b/pca.py index 16105ad..189c582 100755 --- a/pca.py +++ b/pca.py @@ -25,13 +25,13 @@ if __name__ == '__main__': ### Objects creating input and output vectors when called print >>sys.stderr, "Creating input vector generator from main pat file:", main_pat_filename - i = InputVectorGenerator(main_pat_filename, num_features) + ivg = InputVectorGenerator(main_pat_filename, num_features) # Create pairs of (input vector, player name) input_vectors = [] for name in players: - #input_vectors += [i( Data.pat_files_folder + name)] - input_vectors += [[float(occ) for occ in i(Data.pat_files_folder + name)]] + #input_vectors += [ivg( Data.pat_files_folder + name)] + input_vectors += [[float(occ) for occ in ivg(Data.pat_files_folder + name)]] if len(input_vectors) == 0: print >>sys.stderr, "No input vectors.", main_pat_filename @@ -66,6 +66,13 @@ if __name__ == '__main__': #print name_to_print, #print_vector(vector) + print "\nProjection info:" + P = pca.get_projection_info() + for y in xrange(1, P.shape[0]): + for x in xrange(1, P.shape[1]): + print y, x, P[y,x], ivg.ovg.stringof(x) + #print P + print >> sys.stderr, "\nNow print that by:" print >> sys.stderr, 'gnuplot> set xrange[1:%d]'%(pca.pca.output_dim+1) print >> sys.stderr, 'gnuplot> plot "./pca.data" using 2:3:1 with labels font "arial,10" left point pt 4 offset 1,0' -- 2.11.4.GIT