Minor fix for compatibility with Ruby versions that don't understand
[mailvisa.git] / add_messages.rb
blob8af086f081d34fd79079884aeb05e053992d1db8
1 require 'wordlist'
2 require 'tokenize'
4 ### Defaults
5 confdir = ENV['HOME'] + '/settings/mailvisa'
6 $ignorespamheaders = true
7 $weed_threshold = 1
8 $weed_count = 100000
9 filename = nil
10 messages = []
12 ### Functions
13 def add_stream words, stream
14         stream.each do |line|
15                 next if line[0,6] == 'X-Spam'
16                 tokenize(line).each do |word|
17                         next if word.length > 40
18                         count = words[word]
19                         words[word] = if count == nil then 1 else count + 1 end
20                         if $weed_count != 0
21                                 if words.length % $weed_count == 0
22                                         words = weed words
23                                 end
24                         end
25                 end
26         end
27 end
29 def weed words
30         puts 'Weeding...'
31         count = words.length
32         words.each { |k,v| words.delete k if v <= $weed_threshold }
33         weeded = count - words.length
34         puts "#{weeded} words discarded"
35         $weed_count = $weed_count * 2 if weeded < $weed_count / 10
36         words
37 end
39 usage = 'USAGE: ' + $0 + ' [options] <wordlist> [<message> ...]'
41 help = <<EOT
42 Valid options are:
44 -c <path>       Look for configuration files in <path>
45                         (default: $HOME/settings/mailvisa)
46 -i              Include X-Spam headers in analysis
47 -w <num>        Weed wordlist every <num> words (default: 100000
48                         use 0 to disable weeding)
49 -t <num>        Weed words that occur <num> or fewer times (default: 1)
50 EOT
52 ### Main program
54 ## Process command line
55 i = 0
56 while i < ARGV.length
57         case ARGV[i]
58         when '-h'
59                 puts usage
60                 print "\n" + help
61                 exit
62         when '-c'
63                 i = i + 1
64                 confdir = ARGV[i]
65         when /^-/
66                 $stderr.puts 'Invalid option: ' + ARGV[i]
67                 $stderr.puts usage
68                 exit 0x80
69         else
70                 filename = ARGV[i]
71                 i = i + 1
72                 messages = ARGV[i..-1]
73                 break
74         end
75         i = i + 1
76 end
78 if filename == nil
79         $stderr.puts 'No wordlist specified'
80         $stderr.puts usage
81         exit 0x80
82 end
84 filename = confdir + '/' + filename if filename.index('/') == nil
86 begin
87         fh = open filename
88 rescue
89         print filename + ' not found, will create new file'
90         fh = false
91 end
93 if fh
94         $stderr.print "Loading #{filename}..."
95         wordlist = load_wordlist fh
96         fh.close
97         words = wordlist[:words]
98         message_count = wordlist[:messages]
99         $stderr.puts words.length.to_s + ' words loaded'
100 else
101         words = {}
102         message_count = 0
105 if messages.length > 0
106         messages.each do |x|
107                 $stderr.puts "Adding #{x}"
108                 fh = open x
109                 add_stream words, fh
110                 fh.close
111                 message_count = message_count + 1
112         end
113 else
114         add_stream words, $stdin
115         message_count = message_count + 1
118 wordlist = {
119         :messages => message_count,
120         :words => words
123 $stderr.print "Writing #{filename}..."
124 fh = open filename, 'w'
125 dump_wordlist wordlist, fh
126 fh.close
127 $stderr.puts 'done'