5 confdir = ENV['HOME'] + '/settings/mailvisa'
6 $ignorespamheaders = true
13 def add_stream words, stream
15 next if line[0,6] == 'X-Spam'
16 tokenize(line).each do |word|
17 next if word.length > 40
19 words[word] = if count == nil then 1 else count + 1 end
21 if words.length % $weed_count == 0
32 words.each { |k,v| words.delete k if v <= $weed_threshold }
33 weeded = count - words.length
34 puts "#{weeded} words discarded"
35 $weed_count = $weed_count * 2 if weeded < $weed_count / 10
39 usage = 'USAGE: ' + $0 + ' [options] <wordlist> [<message> ...]'
44 -c <path> Look for configuration files in <path>
45 (default: $HOME/settings/mailvisa)
46 -i Include X-Spam headers in analysis
47 -w <num> Weed wordlist every <num> words (default: 100000
48 use 0 to disable weeding)
49 -t <num> Weed words that occur <num> or fewer times (default: 1)
54 ## Process command line
66 $stderr.puts 'Invalid option: ' + ARGV[i]
72 messages = ARGV[i..-1]
79 $stderr.puts 'No wordlist specified'
84 filename = confdir + '/' + filename if filename.index('/') == nil
89 print filename + ' not found, will create new file'
94 $stderr.print "Loading #{filename}..."
95 wordlist = load_wordlist fh
97 words = wordlist[:words]
98 message_count = wordlist[:messages]
99 $stderr.puts words.length.to_s + ' words loaded'
105 if messages.length > 0
107 $stderr.puts "Adding #{x}"
111 message_count = message_count + 1
114 add_stream words, $stdin
115 message_count = message_count + 1
119 :messages => message_count,
123 $stderr.print "Writing #{filename}..."
124 fh = open filename, 'w'
125 dump_wordlist wordlist, fh