From 2c4cc554f561d80e10d1cbbd7e43b8cd558afbec Mon Sep 17 00:00:00 2001 From: hellboy Date: Sun, 14 Mar 2010 18:56:56 +0100 Subject: [PATCH] knn_cross.py: rm --- knn_cross.py | 219 ----------------------------------------------------------- 1 file changed, 219 deletions(-) delete mode 100755 knn_cross.py diff --git a/knn_cross.py b/knn_cross.py deleted file mode 100755 index 0bfc25a..0000000 --- a/knn_cross.py +++ /dev/null @@ -1,219 +0,0 @@ -#!/usr/bin/python -import sys -import subprocess -import os -from gostyle import * -from math import sqrt -import numpy - -from data_about_players import Data - -from knn import KNNOutputVectorGenerator - -class NeuralNet: - def __init__( self, filename ): - self.null = open('/dev/null','w') - - s = "./gnet/gnet_train -l 3 -n 30 -p 30 -e 0.0003 -o gonet.net ./"+filename - args = s.split() - ret = subprocess.call(args,stdout=self.null) - s = "./gnet/gnet_run gonet.net" - args = s.split() - self.p = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=self.null) - def __call__(self, vector): - self.p.stdin.write(' '.join([str(a) for a in vector]) + '\n') - a = self.p.stdout.readline() - return [ float(num) for num in a.split()] - def close(self): - self.p.stdin.close() - self.p.stdout.close() - self.null.close() - -if __name__ == '__main__': - main_pat_filename = Data.main_pat_filename - num_features = 400 - - # Neural net - #typ = 'nn' - #typ = 'knn' - # random - #typ = 'rnd' - typ = 'joint_nn_knn' - - player_vector = Data.questionare_total - # players_ignore = [ "Yi Ch'ang-ho 2004-" ]#, "Fujisawa Hideyuki","Yuki Satoshi", "Otake Hideo", "Yi Ch'ang-ho 2005+","Takao Shinji","Hane Naoki","Kobayashi Koichi" ] - players_ignore = [ "Yi Ch'ang-ho 2004-", "Yi Ch'ang-ho 2005+" ]#,"Takao Shinji","Hane Naoki","Kobayashi Koichi" ] - players_all = [ p for p in player_vector.keys() if p not in players_ignore ] - - ### Object creating input vector when called - print "Creating input vector generator from main pat file:", main_pat_filename - i = InputVectorGenerator(main_pat_filename, num_features) - - # Create list of input vectors - input_vectors = [] - for name in players_all: - input_vectors += [i(Data.pat_files_folder + name)] - - #print '"%s"'%(players_all[2],) - #print input_vectors[2] - - if len(input_vectors) == 0: - print >>sys.stderr, "No reference vectors." - sys.exit() - - ### PCA example usage - # Change this to False, if you do not want to use PCA - use_pca = True - if use_pca: - # Create PCA object, trained on input_vectors - print >>sys.stderr, "Running PCA." - pca = PCA(input_vectors, reduce=True) - # Perform a PCA on input vectors - input_vectors = pca.process_list_of_vectors(input_vectors) - # Creates a Composed object that first generates an input vector - # and then performs a PCA analysis on it. - i = Compose(i, pca) - - ### n/4-fold cross validation - #bounds = random.sample(range(1,len(players_all)), len(players_all) / 10 ) - bounds=[] - for x in range(1,len(players_all)/4): - bounds += [4*x for _ in [1] if 4*x < len(players_all)] - if not bounds: - print >>sys.stderr, "Pop too small." - sys.exit() - bounds.sort() - - def norm(vec): - return [ (x - 1) / 4.5 - 1.0 for x in vec ] - def revnorm(vec): - return [ (x + 1) * 4.5 + 1.0 for x in vec ] - - def rand_vect(k): - return list(2.0*numpy.random.random(k)-1.0) - - print >>sys.stderr, "Running Cross-validation." - print - errs=[ [] for _ in xrange(len(players_all)) ] - es=[] - esps=[[],[],[],[]] - sentinel=len(players_all) - number_runs = 1 - for _ in xrange(number_runs): - prev=0 - for b in bounds+[sentinel]: - validation_set = range(prev, b) - reference_set = range(0,prev) + range(b,sentinel) - if False: - print "Reference set :", - for pr in range(0, prev): - print "R", - for pr in validation_set: - print "_", - for pr in range(b, sentinel): - print "R", - print - prev = b - if typ == 'nn': - data =[] - for index in reference_set: - data.append( (input_vectors[index], norm(player_vector[players_all[index]])) ) - - - ### We can enlarge the data set by adding linear combinations of input and output vectors - use_lin_combinations = False - if use_lin_combinations: - data += Combinator().combine(data) - - print_set_to_file(data,'nn_cross.data') - - nn = NeuralNet('nn_cross.data') - # Create list of output vectors using weighted kNN algorithm approximating output_vector - output_vectors = [ nn(input_vectors[index]) for index in validation_set ] - nn.close() - elif typ == 'knn': - ### Object creating output vector when called; - ref_dict = {} - for index in reference_set: - ref_dict[tuple(input_vectors[index])] = norm(player_vector[players_all[index]]) - - - # best pro InputVectorGenerator rescale=Rescale - oknn = KNNOutputVectorGenerator(ref_dict, k=4, weight_param=0.8) - - # Create list of output vectors using weighted kNN algorithm approximating output_vector - output_vectors = [ oknn(input_vectors[index]) for index in validation_set ] - elif typ == 'joint_nn_knn': - data =[] - ref_dict = {} - for index in reference_set: - data.append( (input_vectors[index], norm(player_vector[players_all[index]])) ) - ref_dict[tuple(input_vectors[index])] = norm(player_vector[players_all[index]]) - - print_set_to_file(data,'nn_cross.data') - nn = NeuralNet('nn_cross.data') - # Create list of output vectors using weighted kNN algorithm approximating output_vector - ov_3 = [ nn(input_vectors[index]) for index in validation_set ] - - nn.close() - - oknn = KNNOutputVectorGenerator(ref_dict, k=4, weight_param=0.8) - ov_1 = [ oknn(input_vectors[index]) for index in validation_set ] - - oknn = KNNOutputVectorGenerator(ref_dict, k=3, weight_param=0.8) - ov_2 = [ oknn(input_vectors[index]) for index in validation_set ] - - oknn = KNNOutputVectorGenerator(ref_dict, k=2, weight_param=0.8) - ov_4 = [ oknn(input_vectors[index]) for index in validation_set ] - - output_vectors = [ [a[0],b[1],c[2],d[3]] for a,b,c,d in zip(ov_1, ov_2, ov_3, ov_4)] - elif typ == 'rnd': - output_vectors = [ rand_vect(4) for index in validation_set ] - - output_vectors = [ revnorm(x) for x in output_vectors ] - desired_vectors = [ player_vector[players_all[index]] for index in validation_set ] - #desired_vectors = [ norm(player_vector[players_all[index]]) for index in validation_set ] - - if False: - for vec_set,text in [(output_vectors, "Output: "), (desired_vectors, "Desired:")]: - print text, - for o in vec_set: - for x in o: - print "%02.3f"%(x,), - print "; ", - print - - for num1, (o,d) in zip(validation_set, zip(output_vectors, desired_vectors)): - err = 0.0 - for num,(x,y) in enumerate(zip(o,d)): - e = (1.0*x-1.0*y)**2 - esps[num]+=[e] - es += [e] - err += e - errs[num1] += [err] - - if typ == 'joint_nn_knn': - print "Joint classifier:" - elif typ == 'knn': - print "k-NN classifier:" - elif typ == 'nn': - print "Neural network classifier:" - elif typ == 'rnd': - print "Random classifier:" - #print "Total square err: %2.3f"%( sum(errs) / number_runs,) - mar = numpy.array(errs) - mean = mar.mean() - print "Mean square err per player: " + "%2.3f ( = sd %2.3f) "%(mean, sqrt(mean)) - mean = numpy.array(es).mean() - print "Mean square err per style: " + "%2.3f ( = sd %2.3f) "%(mean, sqrt(mean)) - for num, style in enumerate(esps): - mean = numpy.array(style).mean() - print "Style %1d : %2.3f ( = sd %2.3f)"%(num+1, mean, sqrt(mean)) - print - print "Players sorted by mean square error:" - p = zip([numpy.array(errs[p]).mean() for p in xrange(len(players_all)) ], players_all) - p.sort() - for err, name in p: - print "%2.3f %s"%(err,name) - #print "%s"%(name,) - sys.exit() -- 2.11.4.GIT