From ee4ce62e5dcd1af97d71dbaf00c2ec6bef871f6b Mon Sep 17 00:00:00 2001 From: Petr Baudis Date: Sat, 13 Mar 2010 12:29:03 +0100 Subject: [PATCH] Input Vector: log-transform and rescale rel. frequency by logistic function This seems to give much more clear-cut PCA output, preventing most occuring patterns overshadowing anything else. t=-6 in logistic function has been tuned experimentally. --- gostyle.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/gostyle.py b/gostyle.py index fab0609..5a12c1c 100755 --- a/gostyle.py +++ b/gostyle.py @@ -182,14 +182,25 @@ class Rescale(VectorToVector): self.a = a self.avg = (a + b) * 0.5 self.tot = b - a + def norm(self, x): + return x + def renorm(self, x): + return x def __call__(self, vector): if len(vector) == 0: raise RuntimeError - to_zero = 0 - min(vector) - maximum = max(vector) + to_zero + to_zero = 0 - self.norm(min(vector)) + maximum = self.norm(max(vector)) + to_zero if maximum == 0: return [ self.avg for _ in vector ] - return [ self.tot * (x + to_zero) / maximum + self.a for x in vector ] + return [ self.tot * self.renorm(float(self.norm(x) + to_zero) / maximum) + self.a for x in vector ] + +import math; +class LogRescale(Rescale): + def norm(self, x): + return math.log(x+1) + def renorm(self, x): + return 1 / (1 + math.exp(-6*(x-0.5))); class InputVectorGenerator(VectorGenerator): """ @@ -200,7 +211,7 @@ class InputVectorGenerator(VectorGenerator): """ def __init__(self, *args, **kwargs): self.ovg = OccurenceVectorGenerator(*args, **kwargs) - self.gen = Compose(self.ovg, Rescale(-1.0, 1.0)) + self.gen = Compose(self.ovg, LogRescale(-1.0, 1.0)) def __call__(self, *args, **kwargs): return self.gen(*args, **kwargs) def ovg(self): -- 2.11.4.GIT