From 9ec876ce8c4da0db1184c03ce23fae5407538431 Mon Sep 17 00:00:00 2001
From: Joe Moudrik <j.moudrik@gmail.com>
Date: Tue, 4 Jun 2013 14:49:29 +0200
Subject: [PATCH] orange hacks module

---
 orange_hacks/__init__.py      |   0
 orange_hacks/fann_neural.py   | 846 ++++++++++++++++++++++++++++++++++++++++++
 orange_hacks/feature_wise.tab |   7 +
 orange_hacks/knn_weighted.py  | 190 ++++++++++
 orange_hacks/stacking.py      | 141 +++++++
 orange_hacks/test.tab         |  11 +
 orange_hacks/xor.tab          |   7 +
 7 files changed, 1202 insertions(+)
 create mode 100644 orange_hacks/__init__.py
 create mode 100644 orange_hacks/fann_neural.py
 create mode 100644 orange_hacks/feature_wise.tab
 create mode 100644 orange_hacks/knn_weighted.py
 create mode 100644 orange_hacks/stacking.py
 create mode 100644 orange_hacks/test.tab
 create mode 100644 orange_hacks/xor.tab

diff --git a/orange_hacks/__init__.py b/orange_hacks/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/orange_hacks/fann_neural.py b/orange_hacks/fann_neural.py
new file mode 100644
index 0000000..17ea68b
--- /dev/null
+++ b/orange_hacks/fann_neural.py
@@ -0,0 +1,846 @@
+"""
+Wrapper for the Fast Artificial Neural Network library:
+    http://leenissen.dk/fann/wp/
+
+This module mainly contains FannNeuralLearner and FannNeuralClassifier,
+the classifier supports both classification (both normal and multilabel)
+and reggresion.
+
+The size of domains for Continuous classes is
+limited by the range of activation functions of the neurons.
+
+"""
+import Orange
+import Orange.core
+
+import numpy
+import tempfile
+import itertools
+import logging
+
+from pyfann import libfann
+
+__author__ = "Josef Moudrik"
+__credits__ = [ 'Authors of the Fann library, http://leenissen.dk/fann/wp/' ]
+__license__ = "GPL"
+__version__ = "3.0"
+__maintainer__ = "Josef Moudrik"
+__email__ = "J.Moudrik@gmail.com"
+
+
+class FannNeuralNetPickable:
+    def __init__(self, filename=None):
+        self.ann = libfann.neural_net()
+        if filename != None:
+            self.ann = libfann.neural_net.create_from_file(filename)
+
+    def __getstate__(self):
+        odict = self.__dict__.copy()
+        del odict['ann']
+        odict['fann_save'] = fake_file_call_f2s( self.ann.save )
+
+        return odict
+
+    def __setstate__(self, odict):
+        ann = libfann.neural_net()
+        fake_file_call_s2f( ann.create_from_file,
+                            odict.pop('fann_save') )
+
+        self.__dict__.update(odict)
+        self.ann = ann
+
+    def __getattr__(self, key):
+        return self.ann.__getattribute__(key)
+
+class FannNeuralLearner(Orange.classification.Learner):
+    """
+    """
+
+    def __new__(cls, examples=None, name='Fann neural', **kwargs):
+        self = Orange.classification.Learner.__new__(cls, **kwargs)
+        if examples:
+            self.__init__(**kwargs)
+            return self.__call__(examples, **kwargs)
+        else:
+            return self
+
+    def __init__(self, name='Fann neural',  **kwargs):
+        """
+
+        See:
+        for parameters and methods of the ANN
+            http://leenissen.dk/fann/html/files/fann-h.html
+
+        for parameters and methods of the train data
+            http://leenissen.dk/fann/html/files/fann_train-h.html
+        """
+        self.name = name
+        # default parameters for the learner
+        self.def_params = {
+                 "nn_type":'standard',
+                 # disable the check for data to be in <-1,1>
+                 "allow_out_of_range":False,
+                 "autorescale_output": False,
+                 # dicts for setting properties of ann, and train data
+                 "ann_prop":{},
+                 "train_prop":{},
+                 # custom postprocessing functions for more complicated modifications
+                 # see the __call__ below
+                 "ann_postprocess":None,  # will be called: ann_postprocess(ann)
+                 "train_postprocess":None,   # will be called: train_postprocess(train_data)
+                 # parameters:
+                 # CREATION
+                 "hidden_layers":[], # number of neurons in each of the hidden layers
+                 # sparse
+                 "connection_rate":0.5,
+                 # TRAINING
+                 "desired_error":0.0001,
+                 # normal training
+                 "max_epochs":2000,
+                 "iterations_between_reports":0, # 0 turns it off
+                 # cascade training
+                 "max_neurons":20,
+                 "neurons_between_reports":0, # 0 turns it off
+        }
+        self.def_params.update(kwargs)
+
+    def __call__(self, data,
+                 weight=None,
+                 **kwargs ):
+        """
+        Learn from the given table of data instances.
+
+        The learning proceeds as follows:
+        1. data are transformed into pairs of input and output vectors, the size
+           of these vector corresponding to number of neurons in input/output
+           layers. The number of input neurons is determined by number of cols
+           in Table.to_numpy, number of output neurons is as follows:
+             1 output neuron for each Continuous class attribute (reggression)
+             N output neurons for each Discrete class, where N is the number of
+               possible class values.
+
+
+            Because the omain of the neurons' output function is usually <-1,1>
+            the reggression task only works if the data is scaled to this interval.
+            If you wanna use the NN and you have different range of the output
+            variables, you should do some scaling. This wrapper has one cannonical
+            scaling available, if the autorescale_output option is set to True,
+            the output is linearly scaled onto <-1,1> (min of the values to -1,
+            max to 1, guys in the middle linearly in between). The Min and Max
+            is learned from the training set, so if larger values are present
+            later when testing, this will not work optimally. Though, it usually
+            works well. (if this option is used, the output values from running
+            the actual reggression are rescaled back, so this is transparent to the user)
+
+        2. FANN training data struct (call it train_data) is made from these
+           input/output pairs. The train_data is then posprocessed by:
+                (a) for each iter pair (key, value) from the params.train_prop
+                the train_data.key(value) is called. This is used to set up
+                FANN properties of the train data, as specified in
+
+                    http://leenissen.dk/fann/html/files/fann_train-h.html
+
+                (b) if params.train_postprocess function is given, than the
+                params.train_postprocess(train_data) is called. This param
+                may be used to set up a hook for some complicated FANN
+                train_data transformations.
+
+        3. The Neural Network (ANN) is then created. Fann offers 3 types of
+           network types, 'standard', 'sparse' and 'shortcut', as described in
+
+                    http://leenissen.dk/fann/html/files/fann-h.html
+
+           along with 'cascade' type (I've added; discussed in the point 4.,
+           below) these can be specified in the params.nn_type.
+
+           The network is postprocessed (similar to train_data postprocessing):
+                (a) for each iter pair (key, value) from the params.ann_prop
+                the ANN.key(value) is called. This is used to set up
+                FANN properties of the network, as specified in the FANN reference.
+
+                    for example, setting the kw parameter
+                    ann_prop = {
+                     'set_activation_function_hidden' : libfann.SIGMOID_STEPWISE ,
+                     'set_activation_function_output' : libfann.SIGMOID_STEPWISE,
+                     'set_training_algorithm' : FANN_TRAIN_QUICKPROP
+                    }
+                    will override the default activation function
+                    libfann.SIGMOID_SYMMETRIC with its linear stepwise approximation,
+                    and will change the default learning gradient learning
+                    algorithm RPROP to QUICKPROP.
+
+                (b) if params.ann_postprocess function is given, than the
+                params.ann_postprocess(ANN) is called.
+
+        4. The network is then trained. There are two different approaches to
+           training in FANN:
+                (a) fixed topology training: this is the "usual" way of training,
+                the number of neurons and connections in the network is fixed, we
+                only choose the learning algorithm which iteratively changes
+                the weights.
+
+                (b) cascade training (training with evolving topology): this
+                approach starts with an empty network and adds promising neurons
+                into the network. See
+
+                    http://leenissen.dk/fann/html/files/fann_cascade-h.html
+
+                for details. When using cascade training, the network type can
+                only be the shortcut type with no hidden layers on the start.
+                Here, in the FannNeuralLearner, you can specify that you want
+                to do the cascade learning by setting the params.nn_type to
+                'cascade'. This triggers the shortcut topology and trains using
+                the FANN Cascade algorithm. Use params.nn_type = 'shortcut' if
+                you want the standard fixed topology training.
+
+        5. The classifier is returned. Surprisingly, huh? See the __doc__
+        there for more stuff.
+
+        """
+        # params for this run of __call__ are the default Learner's params
+        # overriden by the __call__ kwargs
+        class Params(object):
+            pass
+        params = Params()
+        params.__dict__.update(self.def_params)
+        params.__dict__.update(kwargs)
+
+        if not params.nn_type in ['standard',  'sparse',  'shortcut', 'cascade']:
+            raise ValueError('Unknown network type "%s"'%params.nn_type)
+
+        ## Create the training input/output pairs
+        # Step 1 in the __call__.__doc__
+        X, Y =  table_to_XY(data)
+
+        def wrong_range(array):
+            return not ((array >= -1.0) & (array <=  1.0)).all()
+
+        # no scaling by default
+        autoscaler = None
+        if wrong_range(Y):
+            if params.autorescale_output:
+                lower = params.__dict__.get('autorescale_lower_bound', -1.0)
+                upper = params.__dict__.get('autorescale_upper_bound', 1.0)
+
+                autoscaler = AutoScaler(Y, lower, upper)
+                Y = autoscaler(Y)
+            elif not params.allow_out_of_range:
+                raise RuntimeError("The training data for the neural net are not scaled"
+                                   "to <-1,1>. This will probably result to poor performance"
+                                   "of the reggression."
+                                   "Set allow_out_of_range to True to disable the check, or"
+                                   "set autorescale_output to True to perform the automatic scaling"
+                                   "(and descaling of output), or do some scaling yourself.")
+
+        ## Create and postprocess the training data
+        # Step 2 in the __doc__
+        train_data = XY_to_fann_train_data(X, Y)
+
+        # set properties
+        fann_setter(train_data, params.train_prop)
+
+        # posprocess if relevant
+        if params.train_postprocess:
+            train_postprocess(train_data)
+
+        ## Create the ANN
+        # Step 3 in the __doc__
+
+        ann = FannNeuralNetPickable()
+        # this could be used instead, but we use the wrapper, so that the classifier
+        # is pickable
+        #ann = libfann.neural_net()
+
+        # topology = [ number of input neurons,
+        #              number of neurons in 1.st layer ,
+        #              number of neurons in 2.nd layer ,
+        #               etc,
+        #              number of output neurons ]
+        topology = (len(X[0]), ) + tuple(params.hidden_layers) + (len(Y[0]), )
+        if params.nn_type == 'standard':
+            ann.create_standard_array( topology )
+        elif params.nn_type == 'sparse':
+            ann.create_sparse_array(params.connection_rate, topology )
+        elif params.nn_type == 'shortcut':
+            ann.create_shortcut_array( topology )
+        elif params.nn_type == 'cascade':
+            if params.hidden_layers:
+                raise ValueError("The cascade-trained network must not have any hidden layers on startup.")
+            ann.create_shortcut_array( topology )
+        else:
+            assert False
+
+        # set the properties
+        # some defaults
+        ann.set_activation_function_hidden(libfann.SIGMOID_SYMMETRIC)
+        ann.set_activation_function_output(libfann.SIGMOID_SYMMETRIC)
+
+        # override by
+        fann_setter(ann, params.ann_prop)
+
+        # posprocess if relevant
+        if params.ann_postprocess:
+            ann_postprocess(ann)
+
+        ## Train the ANN
+        # Step 4 in the __doc__
+
+        if params.nn_type == 'cascade':
+            ann.cascadetrain_on_data(train_data,
+                                     params.max_neurons,
+                                     params.neurons_between_reports,
+                                     params.desired_error
+                                     )
+        else:
+            ann.train_on_data(train_data,
+                              params.max_epochs,
+                              params.iterations_between_reports,
+                              params.desired_error
+                              )
+
+        return FannNeuralClassifier(ann, data.domain, autoscaler)
+
+
+def table_to_XY(data):
+    """Converts the Orange.Table data to pairs of input and output vectors
+    (represented row-wise in two numpy.arrays X, Y)
+    suitable to be used as a training/testing set for a Artificial neural network.
+
+    The attributes are created by the Table.to_numpy method. The class attribute(s)
+    are transformed as follows:
+        - each Continuous class attribute (regression), is assinged one output neuron
+            (no scaling is performed on this step)
+        - each Discrete class attribute (classification), is assinged one output neuron
+            for each discrete value of this class. E.g. in the iris dataset
+            (one discrete class attribute noting the name of the flower), we have
+            3 neurons.
+    """
+    if not len(data):
+        return numpy.array(), numpy.aray()
+
+    ## prepare the training data
+    # classes
+
+    cls_descriptors = filter( lambda desc: desc, [data.domain.class_var] + list(data.domain.class_vars))
+
+    def get_unfolder(descriptor):
+        """Unfolds class variable into a number of output neurons' output """
+        if isinstance(descriptor, Orange.feature.Continuous):
+            def unfold(value):
+                return [float(value)]
+
+        elif isinstance(descriptor, Orange.feature.Discrete):
+            def unfold(value):
+                l = [-1.0] * len(descriptor.values)
+                l[int(value)] = 1.0
+                return l
+
+        else:
+            raise ValueError("Unsupported class variable type '%s'. Must be either Discrete or Continuous."%descriptor.var_type)
+
+        return unfold
+
+    unfolders = map(get_unfolder, cls_descriptors)
+
+    def get_class_values(instance):
+        l = []
+        if data.domain.class_var:
+            l = [instance.get_class()]
+        return l + instance.get_classes()
+
+    y = []
+
+    # flatten([[0,0,0,1], [0.44], [1,0]]) =
+    # [ 0, 0, 0, 1, 0.44, 1, 0 ]
+    flatten = lambda it: list(itertools.chain.from_iterable(it))
+
+    # multi_map([lambda x: x + 1, lambda x: x * 2], [0, 10]) =
+    # [1, 20]
+    multi_map = lambda Fs, Args : [ f(arg) for f, arg in zip(Fs,  Args) ]
+
+    for instance in data:
+        values = get_class_values(instance)
+        y.append( flatten(multi_map( unfolders, values )) )
+
+    # attributes
+    X = data.to_numpy()[0]
+    # classes
+    Y = numpy.array(y)
+
+    """
+    print "X"
+    for instance in data:
+        print len(instance)
+        print instance
+    print "Y"
+    print Y
+    """
+    return X, Y
+
+def XY_to_fann_train_data(X, Y):
+    if len(X) != len(Y):
+        raise ValueError("X and Y must have the same number of lines.")
+
+    train_data = libfann.training_data()
+
+    if len(X):
+        dim_X, dim_Y = len(X[0]), len(Y[0])
+
+        tmp = tempfile.NamedTemporaryFile(delete=False)
+        with tmp:
+            tmp.write("%d %d %d\n"%(len(X), dim_X,  dim_Y))
+            for i in xrange(len(X)):
+                for line in [ X[i], Y[i] ]:
+                    tmp.write("%s\n"% ' '.join( str(float(val)) for val in line ))
+
+        train_data.read_train_from_file(tmp.name)
+        tmp.unlink(tmp.name)
+
+    return train_data
+
+class RawScaler:
+    def __init__(self, MIN, MAX, a, b):
+        self.MIN = MIN
+        self.MAX = MAX
+        self.a = a
+        self.b = b
+
+    def __call__(self, number):
+        assert self.a <= self.b
+
+        if number < self.MIN or number > self.MAX:
+            logging.warn("The MIN and MAX estimated from the train set"
+                         " do not reflect real MIN and MAX from the test set."
+                         " (%.2f < %.2f) or (%.2f > %.2f)"%(number, self.MIN,
+                                                            number, self.MAX) )
+
+        if self.MIN == self.MAX:
+            # return average value of the set
+            return float(self.a + self.b) / 2
+
+        return self.a + (number - self.MIN) * ( float(self.b - self.a) / (self.MAX -  self.MIN) )
+
+
+class AutoScaler:
+    def __init__(self, train_array,  a=-1,  b=1):
+        assert a <= b
+        self.a = a
+        self.b = b
+        self.train(train_array)
+
+    def train(self, array):
+        rows, cols = array.shape
+
+        #self.MIN,  self.MAX = [], []
+        self.trans = []
+        self.trans_back = []
+
+        for col in xrange(cols):
+            column = array[:, col]
+            mi, ma =  column.min(),  column.max()
+            #print mi,  ma
+            #self.MIN.append(mi)
+            #self.MAX.append(ma)
+            self.trans.append(RawScaler(mi, ma, self.a, self.b))
+            self.trans_back.append(RawScaler(self.a, self.b, mi, ma))
+
+    def scale(self, vector):
+        return self._scale(vector, self.trans)
+
+    def scale_back(self, vector):
+        return self._scale(vector, self.trans_back)
+
+    def _scale(self, vector, fcs):
+        vector = numpy.array(vector)
+        cols, = vector.shape
+        assert cols == len(fcs)
+        return numpy.array([fcs[i](vector[i]) for i in xrange(cols) ])
+
+    def scale_array(self, array):
+        return self._scale_array(array, self.trans)
+
+    def scale_array_back(self, array):
+        return self._scale_array(array, self.trans_back)
+
+    def _scale_array(self, array, fcs):
+        by_rows = [ self._scale(vector, fcs) for vector in array ]
+        return numpy.hstack(by_rows).reshape(array.shape)
+
+    def __call__(self, array):
+        return self.scale_array(array)
+
+## FIXME Orange.classification.Classifier (which should be there)
+## is commented because if it is not, pickling does not work...
+class FannNeuralClassifier: #Orange.classification.Classifier):
+    """
+    """
+    def __init__(self, ann, domain, autoscaler=None):
+        assert isinstance(ann, FannNeuralNetPickable)
+
+        self.ann = ann
+        self.domain = domain
+        self.autoscaler = autoscaler
+
+    def raw_response(self, instance ):
+        instance = list(instance)
+        if self.domain.class_var:
+            instance = instance[:len(self.domain)-1]
+
+        if len(instance) !=  self.ann.get_num_input():
+            raise ValueError("Instance '%s' has wrong length (%d instead of %d)."%(str(instance),
+                                                                                   len(instance),
+                                                                                   self.ann.get_num_input()))
+
+        input_vector = map(float, instance)
+
+        ## run the input throught the ANN
+        output_vector = self.ann.run(input_vector)
+
+        if self.autoscaler:
+            output_vector = self.autoscaler.scale_back(output_vector)
+
+        return output_vector
+
+    def _get_responses(self, instance ):
+        # basically the opposite of unfolding in table_to_XY
+
+        output_vector = self.raw_response(instance)
+
+        cls_descriptors = filter( lambda desc: desc, [self.domain.class_var] + list(self.domain.class_vars))
+
+        def get_folder(descriptor):
+            """Folds neurons' output into target value.
+            returns a tuple (F, num), where F is function that takes list
+            of len num (num is the consumed number
+            """
+            if isinstance(descriptor, Orange.feature.Continuous):
+                def fold(outputs):
+                    value = descriptor(outputs[0])
+                    dist = Orange.statistics.distribution.Continuous(descriptor)
+                    dist[value] = 1.
+                    return value, dist
+                return fold, 1
+
+            elif isinstance(descriptor, Orange.feature.Discrete):
+                def fold(outputs):
+                    # the output neurons' range is <-1, 1>, where
+                    # - 1 says this class is not likely
+                    # 1 says this class is likely
+                    # so we transform i to <0,2>, so that we do not have
+                    # "negative" probabiliies after the normalization
+                    outputs = [ o + 1 for o in outputs]
+                    cprob = Orange.statistics.distribution.Discrete(outputs)
+                    cprob.normalize()
+
+                    mt_prob = cprob
+                    mt_value = Orange.data.Value(descriptor, cprob.values().index(max(cprob)))
+                    return mt_value, mt_prob
+                return fold, len(descriptor.values)
+
+            else:
+                raise ValueError("Unsupported class variable type '%s'. Must be either Discrete or Continuous."%descriptor.var_type)
+
+        responses = []
+        for folder, input_size in map(get_folder, cls_descriptors):
+            responses.append( folder(output_vector[:input_size]) )
+            output_vector = output_vector[input_size:]
+
+        return responses
+
+    def __call__(self, instance,
+                 result_type=Orange.classification.Classifier.GetValue):
+        """Classify a new instance.
+        """
+        ## Handles the ugly result_type discussion
+
+        ## see the self._get_responses
+
+        responses = self._get_responses(instance)
+
+        values, probs =  [], []
+        for value, prob in responses:
+            values.append(value)
+            probs.append(prob)
+
+        # multilabel
+        if self.domain.class_vars :
+            if result_type == Orange.classification.Classifier.GetValue:
+                return values
+            #if any( prob == None for prob in probs):
+                #raise ValueError("Wrong result_type for reggresion task")
+            if result_type == Orange.classification.Classifier.GetProbabilities:
+                return probs
+            if result_type == Orange.classification.Classifier.GetBoth:
+                return (tuple(values), tuple(probs))
+            assert False
+
+        assert len(values) == 1
+        value, prob = values[0], probs[0]
+
+        if result_type == Orange.classification.Classifier.GetValue:
+            return value
+        #if prob == None:
+            #raise ValueError("Wrong result_type for reggresion task")
+        if result_type == Orange.classification.Classifier.GetProbabilities:
+            return prob
+        if result_type == Orange.classification.Classifier.GetBoth:
+            return (value, prob)
+
+        assert False
+
+## Utility functions
+
+def fann_setter(obj, set_dict):
+    """Small utility function for calling setters of FANN objects."""
+    for key, val in set_dict.iteritems():
+        setter = obj.__getattribute__(key)
+        if not isinstance(val, tuple):
+            val = (val, )
+
+        setter(*val)
+
+def fake_file_call_s2f(func, string):
+    """saves string into a file and calls
+    func(filename)
+
+    delete the file afterwards
+    """
+
+    f = tempfile.NamedTemporaryFile(delete=False)
+    f.write(string)
+    f.close()
+
+    func(f.name)
+    f.unlink(f.name)
+
+def fake_file_call_f2s(func):
+    """lets function save something in a file and then returns the filecontent
+
+    f(filename)
+    return filecontent
+
+    delete the file afterwards
+    """
+    f = tempfile.NamedTemporaryFile()
+    func(f.name)
+    ret = f.read()
+    f.close() # and delete
+    return ret
+
+##
+## tests and examples
+##
+
+def test_xor():
+    """
+    Test simple reggression by learning the XOR function, famous problem,
+    imppossible for 1 layered network (without hidden layers).
+    """
+    ## the data
+    attrs = [ Orange.feature.Continuous(name) for name in ['X', 'Y', 'X^Y'] ]
+    insts =  [ [x, y, x ^ y] for x, y in itertools.product([0, 1], [0, 1])]
+    data = Orange.data.Table(Orange.data.Domain(attrs), insts)
+
+    ## the NeuralNetwork
+    print "\n   Test: Xor Function\n" + test_xor.__doc__
+
+    classifier = FannNeuralLearner( data,
+                                    # one hidden layer with 2 neurons...
+                                    # XOR cannot be done without hidden layer
+                                    hidden_layers=[3],
+                                    desired_error=0.0001,
+                                    iterations_between_reports=500,
+                                    max_epochs=5000 )
+
+    for inst in data:
+        print "%d xor %d = %d, nn(%d, %d) = %.2f " % (
+                        inst[0],  inst[1],  inst[2],
+                        inst[0],  inst[1],  classifier(inst) )
+
+def test_iris():
+    """
+    Test simple classification by learning to classify the iris dataset.
+    """
+    data = Orange.data.Table("iris.tab")
+
+    print "\n   Test: Iris Dataset\n" + test_iris.__doc__
+    classifier = FannNeuralLearner( data,
+                                    hidden_layers=[5],
+                                    max_epochs=2000,
+                                    desired_error=0.005,
+                                    iterations_between_reports=200
+                                    )
+
+    show_predictions(classifier, data,  probs=True)
+
+def show_predictions(classifier, data,  top=5,  probs=False):
+    print
+    if probs:
+        print "Probability key:\n", data.domain.class_var.values
+        print
+    print "Random five classifications%s:" % (' and probabilities' if probs else '')
+    print
+    cnt = 0
+    data.shuffle()
+
+    for num, inst in enumerate(data):
+        pred, prob = classifier(inst, Orange.classification.Classifier.GetBoth)
+        cls = inst.get_class()
+
+        if num < top:
+            if probs:
+                print prob
+            print "%d: Instance %s predicted as %s" % (num + 1, cls, pred)
+            print
+
+        if cls !=  pred:
+            cnt += 1
+
+    print "\nMissed: %d out of %d examples = %.1f%%" % (cnt, len(data), 100.0 * cnt / len(data))
+
+def test_cascade():
+    """
+    Test classification on the voting dataset and GetProbabilities output. Also,
+    the learning method used to train the neural net is the cascade learning:
+        See: http://leenissen.dk/fann/html/files/fann_cascade-h.html
+    """
+    data = Orange.data.Table("voting.tab")
+    # Impute
+    data = Orange.data.imputation.ImputeTable(data, method=Orange.feature.imputation.AverageConstructor())
+    # take half as train data
+    selection = Orange.data.sample.SubsetIndices2(data, 0.5)
+    train_data = data.select(selection, 0)
+    test_data = data.select(selection, 1)
+
+    print "\n   Test: Cascade Train, Voting Dataset and GetProbabilities\n" + test_cascade.__doc__
+    classifier = FannNeuralLearner( train_data,
+                                    nn_type='cascade',
+                                    max_neurons=5,
+                                    neurons_between_reports=2, # 0 turns it off
+                                    desired_error=0.005,
+                                    )
+
+    print
+    print "Possible classes:", data.domain.classVar.values
+    print "Probabilities for democrats:"
+    print """
+    (Note that this are not really 'probabilities';
+    more like a measure of sureness of the network.
+    This basically are normed neurons' outputs.)"""
+    print
+
+    test_data.shuffle()
+    show_predictions( classifier,  test_data,  probs=True)
+
+def test_compare():
+    iris = Orange.data.Table("iris")
+    learners = [
+            Orange.classification.knn.kNNLearner(),
+            Orange.classification.bayes.NaiveLearner(),
+            Orange.classification.majority.MajorityLearner(),
+            FannNeuralLearner()
+    ]
+
+    cv = Orange.evaluation.testing.cross_validation(learners, iris, folds=5)
+    print ["%.4f" % score for score in Orange.evaluation.scoring.CA(cv)]
+
+def test_housing():
+    """
+    Test reggression together with automatic scaling -- when the output
+    domain is out of range <-1,1>.
+    """
+    data = Orange.data.Table("housing")
+
+    # rescale the domain to -1.2, 1.2
+    # default, X=1
+    X =  1.2
+
+    #print "\n   Test: Iris Dataset\n" + test_iris.__doc__
+    learner = FannNeuralLearner(
+                                 hidden_layers=[50],
+                                 max_epochs=2000,
+                                 desired_error=0.005,
+                                 iterations_between_reports=0,
+                                 allow_out_of_range=False,
+                                 autorescale_output=True,
+                                 autorescale_lower_bound=-X,
+                                 autorescale_upper_bound=X,
+                                )
+
+    #show_predictions(classifier, data,  probs=True)
+    cv = Orange.evaluation.testing.cross_validation([
+        learner,
+        Orange.regression.linear.LinearRegressionLearner()
+        ], data, folds=5)
+
+    print '\n'.join("%s : %.4f" % (text, score)
+            for score, text in zip(Orange.evaluation.scoring.RMSE(cv),
+                                    ["ann", "linear"])
+                    )
+
+def equal_within_epsilon(a, b, epsilon=1e-10):
+    if a.shape !=  b.shape:
+        return False
+    return ( numpy.abs(a - b) <= epsilon ).all()
+
+def test_autoscale():
+    data = 10 * numpy.random.random((40,4))
+    test, train =  data[:5], data[5:]
+
+    # we could also specify smaller domain
+    #at = AutoScaler(train, -0.8, 0.8 )
+    at = AutoScaler(train) # (-1,1) by default
+
+    print "train scaled"
+    print at(train)
+    print "test scaled"
+    print at(test)
+
+    to_list =  lambda arr : map(list,list(arr))
+
+    #print "test - to and fro"
+    #print at.scale_array_back(at(test))
+    #print "test normal"
+    #print test
+
+    assert equal_within_epsilon(test,  at.scale_array_back(at(test)) )
+
+def test_pickle():
+    """
+    Test pickling on the xor network
+    """
+    ## the data
+    attrs = [ Orange.feature.Continuous(name) for name in ['X', 'Y', 'X^Y'] ]
+    insts =  [ [x, y, x ^ y] for x, y in itertools.product([0, 1], [0, 1])]
+    data = Orange.data.Table(Orange.data.Domain(attrs), insts)
+
+    ## the NeuralNetwork
+    print "\n   Test: Xor Function\n" + test_xor.__doc__
+
+    classifier = FannNeuralLearner( data,
+                                    # one hidden layer with 2 neurons...
+                                    # XOR cannot be done without hidden layer
+                                    hidden_layers=[3],
+                                    desired_error=0.0001,
+                                    iterations_between_reports=500,
+                                    max_epochs=5000 )
+    import pickle
+    with open("OUT.pkl", 'wb') as fout:
+        pickle.dump(classifier, fout)
+
+    print 'saved'
+    with open("OUT.pkl", 'rb') as fin:
+        print pickle.load(fin)
+
+
+if __name__ == "__main__":
+    test_xor()
+    #test_iris()
+    #test_cascade()
+    #test_compare()
+    #test_housing()
+    #test_autoscale()
+    #test_pickle()
+
+
+
diff --git a/orange_hacks/feature_wise.tab b/orange_hacks/feature_wise.tab
new file mode 100644
index 0000000..2f424fc
--- /dev/null
+++ b/orange_hacks/feature_wise.tab
@@ -0,0 +1,7 @@
+f1(a)	f2(b)	f2(c)	f3(d)	f3(d)	f3(d)	res
+c	c	c	c	c	c	c
+						class
+10	1	97	987	8	79	87
+8	6	876	7	6	87	87
+765	765	87	686	875	465	4
+8	88	7	4	444	65	8
diff --git a/orange_hacks/knn_weighted.py b/orange_hacks/knn_weighted.py
new file mode 100644
index 0000000..b8ae8f9
--- /dev/null
+++ b/orange_hacks/knn_weighted.py
@@ -0,0 +1,190 @@
+import Orange
+import numpy
+import random
+import math
+import logging
+
+class KnnWeightedLearner(Orange.classification.Learner):
+    def __new__(cls, examples=None, **kwargs):
+        learner = Orange.classification.Learner.__new__(cls, **kwargs)
+        if examples:
+            # force init and return classifier
+            learner.__init__(**kwargs)
+            return learner.__call__(examples)
+        else:
+            # invoke init
+            return learner
+
+    def __init__(self,
+                 k=0,
+                 alpha=1, 
+                 distance_constructor=Orange.distance.Euclidean(),
+                 exp_weight=False, 
+                 name='knn weighted'):
+        self.k = k
+        self.alpha = alpha
+        self.distance_constructor = distance_constructor
+        self.name = name
+        self.exp_weight = exp_weight
+        
+    def __call__(self, data,  weight=0):
+        assert isinstance(data.domain.class_var, Orange.feature.Continuous)
+        
+        if not data.domain.class_var:
+            raise ValueError('classless domain')
+        
+        fnc = Orange.classification.knn.FindNearestConstructor()
+        fnc.distance_constructor = self.distance_constructor
+        did = Orange.feature.Descriptor.new_meta_id()
+        
+        fn = fnc(data, 0, did)
+        
+        k = self.k
+        if k == 0:
+            k = int(math.sqrt( len(data)))
+        
+        return KnnWeightedClassifier(data.domain, k, fn, self.alpha,  self.exp_weight)
+
+## FIXME Orange.classification.Classifier (which should be there)
+## is commented because if it is not, pickling does not work...
+class KnnWeightedClassifier: #(Orange.classification.Classifier):
+    def __init__(self, domain, k, find_nearest, alpha,  exp_weight):
+        self.domain = domain
+        self.domain_f = Orange.data.Domain(domain.features)
+        self.k = k
+        self.find_nearest = find_nearest
+        self.alpha = alpha
+        self.exp_weight = exp_weight
+        
+        
+    def __call__(self, instance, resultType=Orange.core.GetValue):
+        if not instance.domain != self.domain_f:
+            raise ValueError("instance has wrong domain")
+        
+        def get_dist(nb):
+            return 
+        
+        nbs = self.find_nearest(instance, self.k)
+        
+        # distances
+        dsts = numpy.array( [ nb[self.find_nearest.distance_ID]
+                                for nb in nbs ])
+        # target variables
+        clss = numpy.array( [ nb.get_class()
+                                for nb in nbs ])
+        if 0 in dsts:
+            #logging.warn('0 in distances, add epsilon')
+            dsts +=  1e-5
+            
+        # compute the weights
+        if not self.exp_weight:
+            # inversely proportional
+            w = dsts ** ( - self.alpha ) 
+        else:
+            assert 0.0 < self.alpha < 1.0
+            # weird exp.
+            w = self.alpha ** dsts
+        
+        # normalize to 1
+        w = w / w.sum()
+        # lin combination
+        res = (w * clss).sum()
+        
+        value = self.domain.class_var(res)
+        
+        dist = Orange.statistics.distribution.Continuous(self.domain.class_var)
+        dist[value] = 1.
+        
+        if resultType == Orange.core.GetValue:
+            return value
+        if resultType == Orange.core.GetProbabilities:
+            return dist
+        return (value,  dist)
+
+##
+## tests and examples
+##
+        
+def test_housing():
+    data = Orange.data.Table("housing")
+
+    from fann_neural import FannNeuralLearner
+    
+    learners = [
+                 KnnWeightedLearner( k=4, alpha=2 ),
+                 KnnWeightedLearner( k=4, alpha=1 ),
+                 KnnWeightedLearner( k=4, alpha=0 ),
+                 Orange.classification.knn.kNNLearner(k=4, name='knn 4'), 
+                 Orange.classification.knn.kNNLearner(k=4, name='knn 4, False', rank_weight=False), 
+                 ]
+    
+    cv = Orange.evaluation.testing.cross_validation(learners, data, folds=5)
+    
+    for l, score in zip(learners, Orange.evaluation.scoring.RMSE(cv)):
+        print "%s: %.8f" % (l.name , score) 
+
+def plot_im() :
+    """
+    this is somewhat inspired by
+    http://quasiphysics.wordpress.com/2011/12/13/visualizing-k-nearest-neighbor-regression/
+    
+    """
+    import Image, ImageDraw
+        
+    attrs = [ Orange.feature.Continuous(name) for name in ['X', 'Y', 'color'] ]
+    insts = []    
+    random.seed(50)
+    for num in xrange(10):
+        color = 255 * int(2 * random.random() )
+        
+        def get_point():
+            return 0.25 + random.random() / 2
+        
+        x, y = get_point(), get_point()
+        
+        insts.append([x, y, color])
+    
+    data = Orange.data.Table(Orange.data.Domain(attrs), insts)
+    
+    def get_inst(a, b):
+        return Orange.data.Instance(Orange.data.Domain(data.domain.features),[a, b])
+    
+    for k in xrange(1, 11):
+        for alpha in xrange(4):
+            for dist in [Orange.distance.Euclidean() ]: #, Orange.distance.Manhattan()   ]:
+                
+                    l = KnnWeightedLearner( k=k, alpha=alpha, distance_constructor=dist)
+                    #l = Orange.classification.knn.kNNLearner( k=k )
+                    knn = l(data)
+                    
+                    size = 200
+                    
+                    a = []
+                    for X in xrange(size):
+                        for Y in xrange(size):
+                            val = int(knn(get_inst(float(X)/size, float(Y)/size)))
+                            a.append(val)
+                            
+                    arr = numpy.array(a,  dtype=numpy.uint8 )
+                    arr = arr.reshape((size, size))
+                    
+                    im = Image.fromarray(arr).convert("RGB")
+                    for inst in data:
+                        y, x = int(size * inst[0] ),  int(size * inst[1])
+                        color = int(inst[2])
+                        
+                        draw = ImageDraw.Draw(im)
+                        r = size / 50
+                        draw.ellipse((x-r, y-r, x+r, y+r), outline=(255, 0, 0), fill=(color, color, color))
+                    
+                    fn = "knn_w/k=%d_alpha=%d_dist=%s.ppm" % (k, alpha, dist.name)
+                    print fn
+                    im.save(fn)
+                    #im.show()
+    
+if __name__ == "__main__":
+    #plot_im()
+    test_housing()
+    
+    pass
+    
diff --git a/orange_hacks/stacking.py b/orange_hacks/stacking.py
new file mode 100644
index 0000000..784f380
--- /dev/null
+++ b/orange_hacks/stacking.py
@@ -0,0 +1,141 @@
+import Orange
+
+class StackedClassificationLearner(Orange.classification.Learner):
+    """Stacking by inference of meta classifier from class probability estimates
+    on cross-validation held-out data for level-0 classifiers developed on held-in data sets.
+
+    :param learners: level-0 learners.
+    :type learners: list
+
+    :param meta_learner: meta learner (default: :class:`~Orange.classification.bayes.NaiveLearner`).
+    :type meta_learner: :class:`~Orange.classification.Learner`
+
+    :param folds: number of iterations (folds) of cross-validation to assemble class probability data for meta learner.
+
+    :param name: learner name (default: stacking).
+    :type name: string
+
+    :rtype: :class:`~Orange.ensemble.stacking.StackedClassificationLearner` or
+        :class:`~Orange.ensemble.stacking.StackedClassifier`
+    """
+    def __new__(cls, learners, data=None, weight=0, **kwds):
+        if data is None:
+            self = Orange.classification.Learner.__new__(cls)
+            return self
+        else:
+            self = cls(learners, **kwds)
+            return self(data, weight)
+
+    def __init__(self, learners, meta_learner=Orange.classification.bayes.NaiveLearner(), folds=10, name='stacking'):
+        self.learners = learners
+        self.meta_learner = meta_learner
+        self.name = name
+        self.folds = folds
+
+    def __call__(self, data, weight=0):
+        res = Orange.evaluation.testing.cross_validation(self.learners, data, self.folds)
+        
+        if isinstance(data.domain.class_var, Orange.feature.Discrete):
+            features = [Orange.feature.Continuous("%d" % i) for i in range(len(self.learners) * (len(data.domain.class_var.values) - 1))]
+            
+        elif isinstance(data.domain.class_var, Orange.feature.Continuous):
+            features = [Orange.feature.Continuous("%d" % i) for i in range(len(self.learners))]
+            
+        else:
+            raise RuntimeError("unknown class_var type")
+            
+        domain = Orange.data.Domain(features + [data.domain.class_var])
+        p_data = Orange.data.Table(domain)
+        
+        if isinstance(data.domain.class_var, Orange.feature.Discrete):
+            for r in res.results:
+                p_data.append([p for ps in r.probabilities for p in list(ps)[:-1]] + [r.actual_class])
+        else:
+            assert isinstance(data.domain.class_var, Orange.feature.Continuous)
+            
+            for r in res.results:
+                p_data.append( r.classes + [r.actual_class])
+            
+            assert len(p_data[0]) == len(domain)
+            
+        meta_classifier = self.meta_learner(p_data)
+        classifiers = [l(data, weight) for l in self.learners]
+        
+        #feature_domain = Orange.data.Domain(features)
+        return StackedClassifier(classifiers, meta_classifier, name=self.name, meta_domain=p_data.domain)
+
+class StackedClassifier:
+    """
+    A classifier for stacking. Uses a set of level-0 classifiers to induce class probabilities, which
+    are an input to a meta-classifier to predict class probability for a given data instance.
+
+    :param classifiers: a list of level-0 classifiers.
+    :type classifiers: list
+
+    :param meta_classifier: meta-classifier.
+    :type meta_classifier: :class:`~Orange.classification.Classifier`
+    """
+    def __init__(self, classifiers, meta_classifier, meta_domain, **kwds):
+        self.classifiers = classifiers
+        self.meta_classifier = meta_classifier
+        self.meta_domain = meta_domain
+        self.domain = Orange.data.Domain(self.meta_domain.features, False)
+        self.__dict__.update(kwds)
+
+    def __call__(self, instance, resultType=Orange.core.GetValue):
+        if isinstance(self.meta_domain.class_var, Orange.feature.Discrete):
+        #if isinstance(self.meta_classifier.domain.class_var, Orange.feature.Discrete):
+            ps = Orange.data.Instance(self.domain, [p for cl in self.classifiers for p in list(cl(instance, Orange.core.GetProbabilities))[:-1]])
+        else:
+            assert isinstance(self.meta_domain.class_var, Orange.feature.Continuous)
+            #assert isinstance(self.meta_classifier.domain.class_var, Orange.feature.Continuous)
+            ps = Orange.data.Instance(self.domain, [ float(cl(instance, Orange.core.GetValue)) for cl in self.classifiers ])
+        
+        return self.meta_classifier(ps, resultType)
+
+
+##
+## tests and examples
+##
+
+def test_stack_reggression():
+    base_learners = [
+        Orange.regression.linear.LinearRegressionLearner(name='linear'), 
+        Orange.regression.pls.PLSRegressionLearner(name='PLS'),
+        Orange.classification.knn.kNNLearner(k=20,  name='knn 20'), 
+        Orange.classification.knn.kNNLearner(k=30,  name='knn 30')
+        #Orange.ensemble.forest.RandomForestLearner(name='random forrest')
+    ]
+    
+    stack = StackedClassificationLearner(base_learners, 
+                                             #meta_learner=Orange.ensemble.forest.RandomForestLearner(name='meta random forrest'), 
+                                             meta_learner=Orange.classification.knn.kNNLearner(k=20,  name='meta knn 20'),
+                                             folds=10, 
+                                             name='stacking')
+    
+    learners =    [ stack ] + base_learners
+    
+    data = Orange.data.Table("housing")
+    res = Orange.evaluation.testing.cross_validation(learners, data, folds=10)
+    
+    print "\n".join(["%8s: %5.3f" % (l.name, r) for r, l in zip(Orange.evaluation.scoring.RMSE(res), learners)])
+    
+def test_stack_classification():
+    data = Orange.data.Table("promoters")
+
+    bayes = Orange.classification.bayes.NaiveLearner(name="bayes")
+    tree = Orange.classification.tree.SimpleTreeLearner(name="tree")
+    lin = Orange.classification.svm.LinearLearner(name="lr")
+    knn = Orange.classification.knn.kNNLearner(name="knn")
+    
+    base_learners = [bayes, tree, lin, knn]
+    stack = StackedClassificationLearner(base_learners)
+    
+    learners = [stack, bayes, tree, lin, knn]
+    res = Orange.evaluation.testing.cross_validation(learners, data, 3)
+    print "\n".join(["%8s: %5.3f" % (l.name, r) for r, l in zip(Orange.evaluation.scoring.CA(res), learners)])
+    
+if __name__ == "__main__":
+    test_stack_reggression()
+    #test_stack_classification()
+    
diff --git a/orange_hacks/test.tab b/orange_hacks/test.tab
new file mode 100644
index 0000000..f0b8817
--- /dev/null
+++ b/orange_hacks/test.tab
@@ -0,0 +1,11 @@
+vek	bla
+
+c	c
+	class
+16	1
+17	1
+18	2
+19	2
+20	2
+21	2
+22	2
diff --git a/orange_hacks/xor.tab b/orange_hacks/xor.tab
new file mode 100644
index 0000000..81c0586
--- /dev/null
+++ b/orange_hacks/xor.tab
@@ -0,0 +1,7 @@
+X	Y	Xor
+c	c	c
+		class
+0	1	1
+1	0	1
+0	0	0
+1	1	0
-- 
2.11.4.GIT