5 sub load_db returns Void
{
6 return() unless "words.db.pl" ~~ :e
;
7 my $db = open("words.db.pl") orelse
die "Cannot open the words.db.pl file: $!";
8 for $db.lines
-> $_line {
10 my ($key, $value) = split("\t", $line);
11 %words{"$key"} = $value;
16 sub save_db returns Void
{
17 my $db = open("words.db.pl", :w
) orelse
die "Cannot open the words.db.pl file: $!";
18 for (%words.kv
) -> $key, $value {
19 $db.say($key ~ "\t" ~ $value);
24 sub parse_file
(Str
$file) returns Hash
{
26 my $fh = open("$file") orelse
die "Cannot open the '$file' file: $!";
27 for $fh.lines
-> $_line {
29 while ($line ~~ s
:P5
/(\w+)[ \t\n\r]//) {
30 %words_in_file{lc($0)}++;
34 return %words_in_file;
37 sub add_words
(Str
$category, %words_in_file) returns Void
{
38 for (%words_in_file.kv
) -> $key, $value {
39 %words{"$category-$key"} += $value;
43 sub classify
(%words_in_file) returns Void
{
48 for (%words.kv
) -> $key, $value {
49 $key ~~ rx
:P5
/^(.+)-(.+)$/;
55 for (%words_in_file.keys) -> $word {
56 for (%count.kv
) -> $category, $count {
57 if (defined(%words{"$category-$word"})) {
58 %score{$category} += log(%words{"$category-$word"} / $count);
61 %score{$category} += log(0.01 / $count);
66 for (%count.kv
) -> $category, $count {
67 %score{$category} += log($count / $total)
70 # do this weird sort block because:
71 # %score{$^a} <=> %score{$^b}
72 # does not currently work
73 for (%count.keys.sort: { %score{$^a
} == %score{$^b
} ??
0 !! %score{$^a
} > %score{$^b
} ??
-1 !! 1 }) -> $category {
74 say("$category %score{$category}");
80 if (@
*ARGS
[0] eq 'add' && +@
*ARGS
== 3) {
81 add_words
(@
*ARGS
[1], parse_file
(@
*ARGS
[2]));
83 elsif (@
*ARGS
[0] eq 'classify' && +@
*ARGS
== 2) {
84 classify
(parse_file
(@
*ARGS
[1]));