Minor fix for compatibility with Ruby versions that don't understand
[mailvisa.git] / calculate_scores.rb
blobf5c9b29830118ae3399b9307a6818bf2f81a8fb7
1 require 'wordlist'
3 ### Defaults
5 confdir = ENV['HOME'] + '/settings/mailvisa'
6 scorefile = 'scores'
7 goodfile = 'good'
8 badfile = 'bad'
9 $good_multiplier = 1
11 ### Functions
13 def calculate_word_score word
14         bad_count = $bad_words[word]
15         good_count = $good_words[word]
16         b = bad_count != nil ? ($bad_words[word].to_f / $bad_messages) : 0
17         g = good_count != nil ?
18                 ($good_multiplier * $good_words[word].to_f / $good_messages) :
19                 0
20         s = b / (b + g)
21         if s < 0.1
22                 0.1
23         elsif s > 0.9
24                 0.9
25         else
26                 s
27         end
28 end
30 usage = 'USAGE: ' + $0 + ' [options]'
32 help = <<EOT
33 Valid options are:
35 -c <path>       Look for files in <path> (default: $HOME/settings/mailvisa)
36 -g <file>       Load good words from <file> (default: "good")
37 -b <file>       Load bad words from <file> (default: "bad")
38 -f <file>       Write scores to <file> (default: "scores")
39 -m <num>        Multiply number of good occurrences by <num> (default: 1.0)
40 EOT
42 ### Main program
44 ## Process command line
45 i, n  = 0, 0
46 while i < ARGV.length
47         case ARGV[i]
48         when '-c'
49                 i = i + 1
50                 confdir = ARGV[i]
51         when '-g'
52                 i = i + 1
53                 goodfile = ARGV[i]
54         when '-b'
55                 i =i + 1
56                 badfile = ARGV[i]
57         when '-f'
58                 i = i + 1
59                 scorefile = ARGV[i]
60         when '-h'
61                 puts usage
62                 print "\n" + help
63                 exit
64         when '-m'
65                 i = i + 1
66                 $good_multiplier = ARGV[i].to_f
67         when /^-/
68                 $stderr.puts 'Unknown option: ' + ARGV[i]
69                 $stderr.puts 'See "' + $0 + ' -h" for valid options'
70                 exit 0x80
71         else
72                 ## Compatibility with original version
73                 case n
74                 when 0
75                         goodfile = ARGV[i]
76                 when 1
77                         badfile = ARGV[i]
78                 when 2
79                         scorefile = ARGV[i]
80                 else
81                         raise 'Too many arguments'
82                 end
83         end
84         i = i + 1
85 end
87 goodfile = confdir + '/' + goodfile if goodfile.index('/') == nil
88 badfile = confdir + '/' + badfile if badfile.index('/') == nil
89 scorefile = confdir + '/' + scorefile if scorefile.index('/') == nil
91 print "Loading good words from #{goodfile}..."
92 $stdout.flush
93 good = load_wordlist open(goodfile)
94 $good_words = good[:words]
95 $good_messages = good[:messages]
96 puts "#{$good_words.length} words loaded"
98 print "Loading bad words from #{badfile}..."
99 $stdout.flush
100 bad = load_wordlist open(badfile)
101 $bad_words = bad[:words]
102 $bad_messages = bad[:messages]
103 puts "#{$bad_words.length} words loaded"
105 print "Calculating probabilities..."
106 $stdout.flush
107 score = {}
108 $bad_words.each do |word,count|
109         if score[word] == nil && count > 4
110                 score[word] = calculate_word_score word
111         end
114 $good_words.each do |word,count|
115         if score[word] == nil && count > 4
116                 score[word] = calculate_word_score word
117         end
119 puts 'done'
121 fh = open scorefile, 'w'
122 wordlist = { :messages => ($good_messages + $bad_messages), :words => score }
123 dump_wordlist wordlist, fh
124 fh.close