6 my $dim = $ARGV[0]; # id of characteristic; eg. 0=territory
7 my $deg = $ARGV[1]; # number of class bins
9 our @PCA=qw(PCA1 PCA2 PCA3 PCA4 PCA5 PCA6 PCA7 PCA8 PCA9 PCA10);
10 our (%input, %output);
11 open P
, "pca.data" or die "$! - run pca.py?";
14 /^(\D+)\s+(\d+)\s+(.*)$/;
15 my $k = $1; $k =~ y/_/ /;
16 $input{$k}->{$PCA[$2-1]} = $3;
19 open P
, "python -c 'import data_about_players; data_about_players.questionare_average_raw(data_about_players.Data.questionare_list)'|" or die "$!";
24 next unless $input{$_[0]};
25 $output{$_[0]} = POSIX
::floor
($deg*($n-1)/10);
30 # k-fold cross-validation
35 foreach my $f (0..$k-1) {
37 push @
{$folds[$f]}, splice(@p, rand(scalar @p), 1);
44 foreach my $fold (@folds) {
48 my $nb = AI
::NaiveBayes1
->new;
52 foreach my $p (keys %input) {
53 next if grep { $p eq $_ } @
$fold;
54 # do not add instances with singluar values within this fold
55 next if ((scalar grep { my $pp = $_; (not grep { $pp eq $_ } @
$fold) and $output{$p} eq $output{$pp} } keys %output) < 2);
56 $nb->add_instance(attributes
=>$input{$p}, label
=>'div='.$output{$p});
60 #print $nb->print_model;
62 foreach my $pl (@
$fold) {
63 my $p = $nb->predict(attributes
=>$input{$pl});
64 my @sp = sort {$p->{$b} <=> $p->{$a}} keys %$p;
65 my $top = $sp[0]; $top =~ s/div=//;
66 # my $top = rand($deg); - for comparing with random classifier
67 # rescale to [-1,1] and calculate squared error
68 my $se = (($output{$pl} - $top) * 2 / $deg) ** 2;
70 #print join ',', $pl, $se, $output{$pl}, $top, $p->{'div='.$top}, "\n";
74 print $mse/(@folds*$k),"\n";