orange hacks module
[gostyle.git] / orange_hacks / fann_neural.py
blob17ea68b9e2c41fdcdeef7ff145e84bc9ca63b703
1 """
2 Wrapper for the Fast Artificial Neural Network library:
3 http://leenissen.dk/fann/wp/
5 This module mainly contains FannNeuralLearner and FannNeuralClassifier,
6 the classifier supports both classification (both normal and multilabel)
7 and reggresion.
9 The size of domains for Continuous classes is
10 limited by the range of activation functions of the neurons.
12 """
13 import Orange
14 import Orange.core
16 import numpy
17 import tempfile
18 import itertools
19 import logging
21 from pyfann import libfann
23 __author__ = "Josef Moudrik"
24 __credits__ = [ 'Authors of the Fann library, http://leenissen.dk/fann/wp/' ]
25 __license__ = "GPL"
26 __version__ = "3.0"
27 __maintainer__ = "Josef Moudrik"
28 __email__ = "J.Moudrik@gmail.com"
31 class FannNeuralNetPickable:
32 def __init__(self, filename=None):
33 self.ann = libfann.neural_net()
34 if filename != None:
35 self.ann = libfann.neural_net.create_from_file(filename)
37 def __getstate__(self):
38 odict = self.__dict__.copy()
39 del odict['ann']
40 odict['fann_save'] = fake_file_call_f2s( self.ann.save )
42 return odict
44 def __setstate__(self, odict):
45 ann = libfann.neural_net()
46 fake_file_call_s2f( ann.create_from_file,
47 odict.pop('fann_save') )
49 self.__dict__.update(odict)
50 self.ann = ann
52 def __getattr__(self, key):
53 return self.ann.__getattribute__(key)
55 class FannNeuralLearner(Orange.classification.Learner):
56 """
57 """
59 def __new__(cls, examples=None, name='Fann neural', **kwargs):
60 self = Orange.classification.Learner.__new__(cls, **kwargs)
61 if examples:
62 self.__init__(**kwargs)
63 return self.__call__(examples, **kwargs)
64 else:
65 return self
67 def __init__(self, name='Fann neural', **kwargs):
68 """
70 See:
71 for parameters and methods of the ANN
72 http://leenissen.dk/fann/html/files/fann-h.html
74 for parameters and methods of the train data
75 http://leenissen.dk/fann/html/files/fann_train-h.html
76 """
77 self.name = name
78 # default parameters for the learner
79 self.def_params = {
80 "nn_type":'standard',
81 # disable the check for data to be in <-1,1>
82 "allow_out_of_range":False,
83 "autorescale_output": False,
84 # dicts for setting properties of ann, and train data
85 "ann_prop":{},
86 "train_prop":{},
87 # custom postprocessing functions for more complicated modifications
88 # see the __call__ below
89 "ann_postprocess":None, # will be called: ann_postprocess(ann)
90 "train_postprocess":None, # will be called: train_postprocess(train_data)
91 # parameters:
92 # CREATION
93 "hidden_layers":[], # number of neurons in each of the hidden layers
94 # sparse
95 "connection_rate":0.5,
96 # TRAINING
97 "desired_error":0.0001,
98 # normal training
99 "max_epochs":2000,
100 "iterations_between_reports":0, # 0 turns it off
101 # cascade training
102 "max_neurons":20,
103 "neurons_between_reports":0, # 0 turns it off
105 self.def_params.update(kwargs)
107 def __call__(self, data,
108 weight=None,
109 **kwargs ):
111 Learn from the given table of data instances.
113 The learning proceeds as follows:
114 1. data are transformed into pairs of input and output vectors, the size
115 of these vector corresponding to number of neurons in input/output
116 layers. The number of input neurons is determined by number of cols
117 in Table.to_numpy, number of output neurons is as follows:
118 1 output neuron for each Continuous class attribute (reggression)
119 N output neurons for each Discrete class, where N is the number of
120 possible class values.
123 Because the omain of the neurons' output function is usually <-1,1>
124 the reggression task only works if the data is scaled to this interval.
125 If you wanna use the NN and you have different range of the output
126 variables, you should do some scaling. This wrapper has one cannonical
127 scaling available, if the autorescale_output option is set to True,
128 the output is linearly scaled onto <-1,1> (min of the values to -1,
129 max to 1, guys in the middle linearly in between). The Min and Max
130 is learned from the training set, so if larger values are present
131 later when testing, this will not work optimally. Though, it usually
132 works well. (if this option is used, the output values from running
133 the actual reggression are rescaled back, so this is transparent to the user)
135 2. FANN training data struct (call it train_data) is made from these
136 input/output pairs. The train_data is then posprocessed by:
137 (a) for each iter pair (key, value) from the params.train_prop
138 the train_data.key(value) is called. This is used to set up
139 FANN properties of the train data, as specified in
141 http://leenissen.dk/fann/html/files/fann_train-h.html
143 (b) if params.train_postprocess function is given, than the
144 params.train_postprocess(train_data) is called. This param
145 may be used to set up a hook for some complicated FANN
146 train_data transformations.
148 3. The Neural Network (ANN) is then created. Fann offers 3 types of
149 network types, 'standard', 'sparse' and 'shortcut', as described in
151 http://leenissen.dk/fann/html/files/fann-h.html
153 along with 'cascade' type (I've added; discussed in the point 4.,
154 below) these can be specified in the params.nn_type.
156 The network is postprocessed (similar to train_data postprocessing):
157 (a) for each iter pair (key, value) from the params.ann_prop
158 the ANN.key(value) is called. This is used to set up
159 FANN properties of the network, as specified in the FANN reference.
161 for example, setting the kw parameter
162 ann_prop = {
163 'set_activation_function_hidden' : libfann.SIGMOID_STEPWISE ,
164 'set_activation_function_output' : libfann.SIGMOID_STEPWISE,
165 'set_training_algorithm' : FANN_TRAIN_QUICKPROP
167 will override the default activation function
168 libfann.SIGMOID_SYMMETRIC with its linear stepwise approximation,
169 and will change the default learning gradient learning
170 algorithm RPROP to QUICKPROP.
172 (b) if params.ann_postprocess function is given, than the
173 params.ann_postprocess(ANN) is called.
175 4. The network is then trained. There are two different approaches to
176 training in FANN:
177 (a) fixed topology training: this is the "usual" way of training,
178 the number of neurons and connections in the network is fixed, we
179 only choose the learning algorithm which iteratively changes
180 the weights.
182 (b) cascade training (training with evolving topology): this
183 approach starts with an empty network and adds promising neurons
184 into the network. See
186 http://leenissen.dk/fann/html/files/fann_cascade-h.html
188 for details. When using cascade training, the network type can
189 only be the shortcut type with no hidden layers on the start.
190 Here, in the FannNeuralLearner, you can specify that you want
191 to do the cascade learning by setting the params.nn_type to
192 'cascade'. This triggers the shortcut topology and trains using
193 the FANN Cascade algorithm. Use params.nn_type = 'shortcut' if
194 you want the standard fixed topology training.
196 5. The classifier is returned. Surprisingly, huh? See the __doc__
197 there for more stuff.
200 # params for this run of __call__ are the default Learner's params
201 # overriden by the __call__ kwargs
202 class Params(object):
203 pass
204 params = Params()
205 params.__dict__.update(self.def_params)
206 params.__dict__.update(kwargs)
208 if not params.nn_type in ['standard', 'sparse', 'shortcut', 'cascade']:
209 raise ValueError('Unknown network type "%s"'%params.nn_type)
211 ## Create the training input/output pairs
212 # Step 1 in the __call__.__doc__
213 X, Y = table_to_XY(data)
215 def wrong_range(array):
216 return not ((array >= -1.0) & (array <= 1.0)).all()
218 # no scaling by default
219 autoscaler = None
220 if wrong_range(Y):
221 if params.autorescale_output:
222 lower = params.__dict__.get('autorescale_lower_bound', -1.0)
223 upper = params.__dict__.get('autorescale_upper_bound', 1.0)
225 autoscaler = AutoScaler(Y, lower, upper)
226 Y = autoscaler(Y)
227 elif not params.allow_out_of_range:
228 raise RuntimeError("The training data for the neural net are not scaled"
229 "to <-1,1>. This will probably result to poor performance"
230 "of the reggression."
231 "Set allow_out_of_range to True to disable the check, or"
232 "set autorescale_output to True to perform the automatic scaling"
233 "(and descaling of output), or do some scaling yourself.")
235 ## Create and postprocess the training data
236 # Step 2 in the __doc__
237 train_data = XY_to_fann_train_data(X, Y)
239 # set properties
240 fann_setter(train_data, params.train_prop)
242 # posprocess if relevant
243 if params.train_postprocess:
244 train_postprocess(train_data)
246 ## Create the ANN
247 # Step 3 in the __doc__
249 ann = FannNeuralNetPickable()
250 # this could be used instead, but we use the wrapper, so that the classifier
251 # is pickable
252 #ann = libfann.neural_net()
254 # topology = [ number of input neurons,
255 # number of neurons in 1.st layer ,
256 # number of neurons in 2.nd layer ,
257 # etc,
258 # number of output neurons ]
259 topology = (len(X[0]), ) + tuple(params.hidden_layers) + (len(Y[0]), )
260 if params.nn_type == 'standard':
261 ann.create_standard_array( topology )
262 elif params.nn_type == 'sparse':
263 ann.create_sparse_array(params.connection_rate, topology )
264 elif params.nn_type == 'shortcut':
265 ann.create_shortcut_array( topology )
266 elif params.nn_type == 'cascade':
267 if params.hidden_layers:
268 raise ValueError("The cascade-trained network must not have any hidden layers on startup.")
269 ann.create_shortcut_array( topology )
270 else:
271 assert False
273 # set the properties
274 # some defaults
275 ann.set_activation_function_hidden(libfann.SIGMOID_SYMMETRIC)
276 ann.set_activation_function_output(libfann.SIGMOID_SYMMETRIC)
278 # override by
279 fann_setter(ann, params.ann_prop)
281 # posprocess if relevant
282 if params.ann_postprocess:
283 ann_postprocess(ann)
285 ## Train the ANN
286 # Step 4 in the __doc__
288 if params.nn_type == 'cascade':
289 ann.cascadetrain_on_data(train_data,
290 params.max_neurons,
291 params.neurons_between_reports,
292 params.desired_error
294 else:
295 ann.train_on_data(train_data,
296 params.max_epochs,
297 params.iterations_between_reports,
298 params.desired_error
301 return FannNeuralClassifier(ann, data.domain, autoscaler)
304 def table_to_XY(data):
305 """Converts the Orange.Table data to pairs of input and output vectors
306 (represented row-wise in two numpy.arrays X, Y)
307 suitable to be used as a training/testing set for a Artificial neural network.
309 The attributes are created by the Table.to_numpy method. The class attribute(s)
310 are transformed as follows:
311 - each Continuous class attribute (regression), is assinged one output neuron
312 (no scaling is performed on this step)
313 - each Discrete class attribute (classification), is assinged one output neuron
314 for each discrete value of this class. E.g. in the iris dataset
315 (one discrete class attribute noting the name of the flower), we have
316 3 neurons.
318 if not len(data):
319 return numpy.array(), numpy.aray()
321 ## prepare the training data
322 # classes
324 cls_descriptors = filter( lambda desc: desc, [data.domain.class_var] + list(data.domain.class_vars))
326 def get_unfolder(descriptor):
327 """Unfolds class variable into a number of output neurons' output """
328 if isinstance(descriptor, Orange.feature.Continuous):
329 def unfold(value):
330 return [float(value)]
332 elif isinstance(descriptor, Orange.feature.Discrete):
333 def unfold(value):
334 l = [-1.0] * len(descriptor.values)
335 l[int(value)] = 1.0
336 return l
338 else:
339 raise ValueError("Unsupported class variable type '%s'. Must be either Discrete or Continuous."%descriptor.var_type)
341 return unfold
343 unfolders = map(get_unfolder, cls_descriptors)
345 def get_class_values(instance):
346 l = []
347 if data.domain.class_var:
348 l = [instance.get_class()]
349 return l + instance.get_classes()
351 y = []
353 # flatten([[0,0,0,1], [0.44], [1,0]]) =
354 # [ 0, 0, 0, 1, 0.44, 1, 0 ]
355 flatten = lambda it: list(itertools.chain.from_iterable(it))
357 # multi_map([lambda x: x + 1, lambda x: x * 2], [0, 10]) =
358 # [1, 20]
359 multi_map = lambda Fs, Args : [ f(arg) for f, arg in zip(Fs, Args) ]
361 for instance in data:
362 values = get_class_values(instance)
363 y.append( flatten(multi_map( unfolders, values )) )
365 # attributes
366 X = data.to_numpy()[0]
367 # classes
368 Y = numpy.array(y)
371 print "X"
372 for instance in data:
373 print len(instance)
374 print instance
375 print "Y"
376 print Y
378 return X, Y
380 def XY_to_fann_train_data(X, Y):
381 if len(X) != len(Y):
382 raise ValueError("X and Y must have the same number of lines.")
384 train_data = libfann.training_data()
386 if len(X):
387 dim_X, dim_Y = len(X[0]), len(Y[0])
389 tmp = tempfile.NamedTemporaryFile(delete=False)
390 with tmp:
391 tmp.write("%d %d %d\n"%(len(X), dim_X, dim_Y))
392 for i in xrange(len(X)):
393 for line in [ X[i], Y[i] ]:
394 tmp.write("%s\n"% ' '.join( str(float(val)) for val in line ))
396 train_data.read_train_from_file(tmp.name)
397 tmp.unlink(tmp.name)
399 return train_data
401 class RawScaler:
402 def __init__(self, MIN, MAX, a, b):
403 self.MIN = MIN
404 self.MAX = MAX
405 self.a = a
406 self.b = b
408 def __call__(self, number):
409 assert self.a <= self.b
411 if number < self.MIN or number > self.MAX:
412 logging.warn("The MIN and MAX estimated from the train set"
413 " do not reflect real MIN and MAX from the test set."
414 " (%.2f < %.2f) or (%.2f > %.2f)"%(number, self.MIN,
415 number, self.MAX) )
417 if self.MIN == self.MAX:
418 # return average value of the set
419 return float(self.a + self.b) / 2
421 return self.a + (number - self.MIN) * ( float(self.b - self.a) / (self.MAX - self.MIN) )
424 class AutoScaler:
425 def __init__(self, train_array, a=-1, b=1):
426 assert a <= b
427 self.a = a
428 self.b = b
429 self.train(train_array)
431 def train(self, array):
432 rows, cols = array.shape
434 #self.MIN, self.MAX = [], []
435 self.trans = []
436 self.trans_back = []
438 for col in xrange(cols):
439 column = array[:, col]
440 mi, ma = column.min(), column.max()
441 #print mi, ma
442 #self.MIN.append(mi)
443 #self.MAX.append(ma)
444 self.trans.append(RawScaler(mi, ma, self.a, self.b))
445 self.trans_back.append(RawScaler(self.a, self.b, mi, ma))
447 def scale(self, vector):
448 return self._scale(vector, self.trans)
450 def scale_back(self, vector):
451 return self._scale(vector, self.trans_back)
453 def _scale(self, vector, fcs):
454 vector = numpy.array(vector)
455 cols, = vector.shape
456 assert cols == len(fcs)
457 return numpy.array([fcs[i](vector[i]) for i in xrange(cols) ])
459 def scale_array(self, array):
460 return self._scale_array(array, self.trans)
462 def scale_array_back(self, array):
463 return self._scale_array(array, self.trans_back)
465 def _scale_array(self, array, fcs):
466 by_rows = [ self._scale(vector, fcs) for vector in array ]
467 return numpy.hstack(by_rows).reshape(array.shape)
469 def __call__(self, array):
470 return self.scale_array(array)
472 ## FIXME Orange.classification.Classifier (which should be there)
473 ## is commented because if it is not, pickling does not work...
474 class FannNeuralClassifier: #Orange.classification.Classifier):
477 def __init__(self, ann, domain, autoscaler=None):
478 assert isinstance(ann, FannNeuralNetPickable)
480 self.ann = ann
481 self.domain = domain
482 self.autoscaler = autoscaler
484 def raw_response(self, instance ):
485 instance = list(instance)
486 if self.domain.class_var:
487 instance = instance[:len(self.domain)-1]
489 if len(instance) != self.ann.get_num_input():
490 raise ValueError("Instance '%s' has wrong length (%d instead of %d)."%(str(instance),
491 len(instance),
492 self.ann.get_num_input()))
494 input_vector = map(float, instance)
496 ## run the input throught the ANN
497 output_vector = self.ann.run(input_vector)
499 if self.autoscaler:
500 output_vector = self.autoscaler.scale_back(output_vector)
502 return output_vector
504 def _get_responses(self, instance ):
505 # basically the opposite of unfolding in table_to_XY
507 output_vector = self.raw_response(instance)
509 cls_descriptors = filter( lambda desc: desc, [self.domain.class_var] + list(self.domain.class_vars))
511 def get_folder(descriptor):
512 """Folds neurons' output into target value.
513 returns a tuple (F, num), where F is function that takes list
514 of len num (num is the consumed number
516 if isinstance(descriptor, Orange.feature.Continuous):
517 def fold(outputs):
518 value = descriptor(outputs[0])
519 dist = Orange.statistics.distribution.Continuous(descriptor)
520 dist[value] = 1.
521 return value, dist
522 return fold, 1
524 elif isinstance(descriptor, Orange.feature.Discrete):
525 def fold(outputs):
526 # the output neurons' range is <-1, 1>, where
527 # - 1 says this class is not likely
528 # 1 says this class is likely
529 # so we transform i to <0,2>, so that we do not have
530 # "negative" probabiliies after the normalization
531 outputs = [ o + 1 for o in outputs]
532 cprob = Orange.statistics.distribution.Discrete(outputs)
533 cprob.normalize()
535 mt_prob = cprob
536 mt_value = Orange.data.Value(descriptor, cprob.values().index(max(cprob)))
537 return mt_value, mt_prob
538 return fold, len(descriptor.values)
540 else:
541 raise ValueError("Unsupported class variable type '%s'. Must be either Discrete or Continuous."%descriptor.var_type)
543 responses = []
544 for folder, input_size in map(get_folder, cls_descriptors):
545 responses.append( folder(output_vector[:input_size]) )
546 output_vector = output_vector[input_size:]
548 return responses
550 def __call__(self, instance,
551 result_type=Orange.classification.Classifier.GetValue):
552 """Classify a new instance.
554 ## Handles the ugly result_type discussion
556 ## see the self._get_responses
558 responses = self._get_responses(instance)
560 values, probs = [], []
561 for value, prob in responses:
562 values.append(value)
563 probs.append(prob)
565 # multilabel
566 if self.domain.class_vars :
567 if result_type == Orange.classification.Classifier.GetValue:
568 return values
569 #if any( prob == None for prob in probs):
570 #raise ValueError("Wrong result_type for reggresion task")
571 if result_type == Orange.classification.Classifier.GetProbabilities:
572 return probs
573 if result_type == Orange.classification.Classifier.GetBoth:
574 return (tuple(values), tuple(probs))
575 assert False
577 assert len(values) == 1
578 value, prob = values[0], probs[0]
580 if result_type == Orange.classification.Classifier.GetValue:
581 return value
582 #if prob == None:
583 #raise ValueError("Wrong result_type for reggresion task")
584 if result_type == Orange.classification.Classifier.GetProbabilities:
585 return prob
586 if result_type == Orange.classification.Classifier.GetBoth:
587 return (value, prob)
589 assert False
591 ## Utility functions
593 def fann_setter(obj, set_dict):
594 """Small utility function for calling setters of FANN objects."""
595 for key, val in set_dict.iteritems():
596 setter = obj.__getattribute__(key)
597 if not isinstance(val, tuple):
598 val = (val, )
600 setter(*val)
602 def fake_file_call_s2f(func, string):
603 """saves string into a file and calls
604 func(filename)
606 delete the file afterwards
609 f = tempfile.NamedTemporaryFile(delete=False)
610 f.write(string)
611 f.close()
613 func(f.name)
614 f.unlink(f.name)
616 def fake_file_call_f2s(func):
617 """lets function save something in a file and then returns the filecontent
619 f(filename)
620 return filecontent
622 delete the file afterwards
624 f = tempfile.NamedTemporaryFile()
625 func(f.name)
626 ret = f.read()
627 f.close() # and delete
628 return ret
631 ## tests and examples
634 def test_xor():
636 Test simple reggression by learning the XOR function, famous problem,
637 imppossible for 1 layered network (without hidden layers).
639 ## the data
640 attrs = [ Orange.feature.Continuous(name) for name in ['X', 'Y', 'X^Y'] ]
641 insts = [ [x, y, x ^ y] for x, y in itertools.product([0, 1], [0, 1])]
642 data = Orange.data.Table(Orange.data.Domain(attrs), insts)
644 ## the NeuralNetwork
645 print "\n Test: Xor Function\n" + test_xor.__doc__
647 classifier = FannNeuralLearner( data,
648 # one hidden layer with 2 neurons...
649 # XOR cannot be done without hidden layer
650 hidden_layers=[3],
651 desired_error=0.0001,
652 iterations_between_reports=500,
653 max_epochs=5000 )
655 for inst in data:
656 print "%d xor %d = %d, nn(%d, %d) = %.2f " % (
657 inst[0], inst[1], inst[2],
658 inst[0], inst[1], classifier(inst) )
660 def test_iris():
662 Test simple classification by learning to classify the iris dataset.
664 data = Orange.data.Table("iris.tab")
666 print "\n Test: Iris Dataset\n" + test_iris.__doc__
667 classifier = FannNeuralLearner( data,
668 hidden_layers=[5],
669 max_epochs=2000,
670 desired_error=0.005,
671 iterations_between_reports=200
674 show_predictions(classifier, data, probs=True)
676 def show_predictions(classifier, data, top=5, probs=False):
677 print
678 if probs:
679 print "Probability key:\n", data.domain.class_var.values
680 print
681 print "Random five classifications%s:" % (' and probabilities' if probs else '')
682 print
683 cnt = 0
684 data.shuffle()
686 for num, inst in enumerate(data):
687 pred, prob = classifier(inst, Orange.classification.Classifier.GetBoth)
688 cls = inst.get_class()
690 if num < top:
691 if probs:
692 print prob
693 print "%d: Instance %s predicted as %s" % (num + 1, cls, pred)
694 print
696 if cls != pred:
697 cnt += 1
699 print "\nMissed: %d out of %d examples = %.1f%%" % (cnt, len(data), 100.0 * cnt / len(data))
701 def test_cascade():
703 Test classification on the voting dataset and GetProbabilities output. Also,
704 the learning method used to train the neural net is the cascade learning:
705 See: http://leenissen.dk/fann/html/files/fann_cascade-h.html
707 data = Orange.data.Table("voting.tab")
708 # Impute
709 data = Orange.data.imputation.ImputeTable(data, method=Orange.feature.imputation.AverageConstructor())
710 # take half as train data
711 selection = Orange.data.sample.SubsetIndices2(data, 0.5)
712 train_data = data.select(selection, 0)
713 test_data = data.select(selection, 1)
715 print "\n Test: Cascade Train, Voting Dataset and GetProbabilities\n" + test_cascade.__doc__
716 classifier = FannNeuralLearner( train_data,
717 nn_type='cascade',
718 max_neurons=5,
719 neurons_between_reports=2, # 0 turns it off
720 desired_error=0.005,
723 print
724 print "Possible classes:", data.domain.classVar.values
725 print "Probabilities for democrats:"
726 print """
727 (Note that this are not really 'probabilities';
728 more like a measure of sureness of the network.
729 This basically are normed neurons' outputs.)"""
730 print
732 test_data.shuffle()
733 show_predictions( classifier, test_data, probs=True)
735 def test_compare():
736 iris = Orange.data.Table("iris")
737 learners = [
738 Orange.classification.knn.kNNLearner(),
739 Orange.classification.bayes.NaiveLearner(),
740 Orange.classification.majority.MajorityLearner(),
741 FannNeuralLearner()
744 cv = Orange.evaluation.testing.cross_validation(learners, iris, folds=5)
745 print ["%.4f" % score for score in Orange.evaluation.scoring.CA(cv)]
747 def test_housing():
749 Test reggression together with automatic scaling -- when the output
750 domain is out of range <-1,1>.
752 data = Orange.data.Table("housing")
754 # rescale the domain to -1.2, 1.2
755 # default, X=1
756 X = 1.2
758 #print "\n Test: Iris Dataset\n" + test_iris.__doc__
759 learner = FannNeuralLearner(
760 hidden_layers=[50],
761 max_epochs=2000,
762 desired_error=0.005,
763 iterations_between_reports=0,
764 allow_out_of_range=False,
765 autorescale_output=True,
766 autorescale_lower_bound=-X,
767 autorescale_upper_bound=X,
770 #show_predictions(classifier, data, probs=True)
771 cv = Orange.evaluation.testing.cross_validation([
772 learner,
773 Orange.regression.linear.LinearRegressionLearner()
774 ], data, folds=5)
776 print '\n'.join("%s : %.4f" % (text, score)
777 for score, text in zip(Orange.evaluation.scoring.RMSE(cv),
778 ["ann", "linear"])
781 def equal_within_epsilon(a, b, epsilon=1e-10):
782 if a.shape != b.shape:
783 return False
784 return ( numpy.abs(a - b) <= epsilon ).all()
786 def test_autoscale():
787 data = 10 * numpy.random.random((40,4))
788 test, train = data[:5], data[5:]
790 # we could also specify smaller domain
791 #at = AutoScaler(train, -0.8, 0.8 )
792 at = AutoScaler(train) # (-1,1) by default
794 print "train scaled"
795 print at(train)
796 print "test scaled"
797 print at(test)
799 to_list = lambda arr : map(list,list(arr))
801 #print "test - to and fro"
802 #print at.scale_array_back(at(test))
803 #print "test normal"
804 #print test
806 assert equal_within_epsilon(test, at.scale_array_back(at(test)) )
808 def test_pickle():
810 Test pickling on the xor network
812 ## the data
813 attrs = [ Orange.feature.Continuous(name) for name in ['X', 'Y', 'X^Y'] ]
814 insts = [ [x, y, x ^ y] for x, y in itertools.product([0, 1], [0, 1])]
815 data = Orange.data.Table(Orange.data.Domain(attrs), insts)
817 ## the NeuralNetwork
818 print "\n Test: Xor Function\n" + test_xor.__doc__
820 classifier = FannNeuralLearner( data,
821 # one hidden layer with 2 neurons...
822 # XOR cannot be done without hidden layer
823 hidden_layers=[3],
824 desired_error=0.0001,
825 iterations_between_reports=500,
826 max_epochs=5000 )
827 import pickle
828 with open("OUT.pkl", 'wb') as fout:
829 pickle.dump(classifier, fout)
831 print 'saved'
832 with open("OUT.pkl", 'rb') as fin:
833 print pickle.load(fin)
836 if __name__ == "__main__":
837 test_xor()
838 #test_iris()
839 #test_cascade()
840 #test_compare()
841 #test_housing()
842 #test_autoscale()
843 #test_pickle()