[t/spec] A few wilder tests for nesting.
[pugs.git] / examples / naive_bayesian / naive_bayesian.pl
blobc7cd53dc3c6ebe86d24efb1ef48b75f8d2c66115
1 use v6;
3 my %words;
5 sub load_db returns Void {
6 return() unless "words.db.pl" ~~ :e;
7 my $db = open("words.db.pl") orelse die "Cannot open the words.db.pl file: $!";
8 for $db.lines -> $_line {
9 my $line = $_line;
10 my ($key, $value) = split("\t", $line);
11 %words{"$key"} = $value;
13 $db.close();
16 sub save_db returns Void {
17 my $db = open("words.db.pl", :w) orelse die "Cannot open the words.db.pl file: $!";
18 for (%words.kv) -> $key, $value {
19 $db.say($key ~ "\t" ~ $value);
21 $db.close();
24 sub parse_file (Str $file) returns Hash {
25 my %words_in_file;
26 my $fh = open("$file") orelse die "Cannot open the '$file' file: $!";
27 for $fh.lines -> $_line {
28 my $line = $_line;
29 while ($line ~~ s:P5/(\w+)[ \t\n\r]//) {
30 %words_in_file{lc($0)}++;
33 $fh.close;
34 return %words_in_file;
37 sub add_words (Str $category, %words_in_file) returns Void {
38 for (%words_in_file.kv) -> $key, $value {
39 %words{"$category-$key"} += $value;
43 sub classify (%words_in_file) returns Void {
45 my %count;
46 my $total = 0;
48 for (%words.kv) -> $key, $value {
49 $key ~~ rx:P5/^(.+)-(.+)$/;
50 %count{$0} += $value;
51 $total += $value;
54 my %score;
55 for (%words_in_file.keys) -> $word {
56 for (%count.kv) -> $category, $count {
57 if (defined(%words{"$category-$word"})) {
58 %score{$category} += log(%words{"$category-$word"} / $count);
60 else {
61 %score{$category} += log(0.01 / $count);
66 for (%count.kv) -> $category, $count {
67 %score{$category} += log($count / $total)
70 # do this weird sort block because:
71 # %score{$^a} <=> %score{$^b}
72 # does not currently work
73 for (%count.keys.sort: { %score{$^a} == %score{$^b} ?? 0 !! %score{$^a} > %score{$^b} ?? -1 !! 1 }) -> $category {
74 say("$category %score{$category}");
78 load_db();
80 if (@*ARGS[0] eq 'add' && +@*ARGS == 3) {
81 add_words(@*ARGS[1], parse_file(@*ARGS[2]));
83 elsif (@*ARGS[0] eq 'classify' && +@*ARGS == 2) {
84 classify(parse_file(@*ARGS[1]));
86 else {
87 say("USAGE:
88 add <category> <file>
89 classify <file>");
92 save_db();