Merge branch 'gem'
[fuzed.git] / helloworld / vendor / gems / chronic-0.2.2 / lib / numerizer / numerizer.rb
blob8b02e62603a7579b2290a4977d7bbc6afdf126c2
1 require 'strscan'
3 class Numerizer
5   DIRECT_NUMS = [
6                   ['eleven', '11'],
7                   ['twelve', '12'],
8                   ['thirteen', '13'],
9                   ['fourteen', '14'],
10                   ['fifteen', '15'],
11                   ['sixteen', '16'],
12                   ['seventeen', '17'],
13                   ['eighteen', '18'],
14                   ['nineteen', '19'],
15                   ['ninteen', '19'], # Common mis-spelling
16                   ['zero', '0'],
17                   ['one', '1'],
18                   ['two', '2'],
19                   ['three', '3'],
20                   ['four(\W|$)', '4\1'],  # The weird regex is so that it matches four but not fourty
21                   ['five', '5'],
22                   ['six(\W|$)', '6\1'],
23                   ['seven(\W|$)', '7\1'],
24                   ['eight(\W|$)', '8\1'],
25                   ['nine(\W|$)', '9\1'],
26                   ['ten', '10'],
27                   ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
28                 ]
30   TEN_PREFIXES = [ ['twenty', 20],
31                     ['thirty', 30],
32                     ['fourty', 40],
33                     ['fifty', 50],
34                     ['sixty', 60],
35                     ['seventy', 70],
36                     ['eighty', 80],
37                     ['ninety', 90]
38                   ]
40   BIG_PREFIXES = [ ['hundred', 100],
41                     ['thousand', 1000],
42                     ['million', 1_000_000],
43                     ['billion', 1_000_000_000],
44                     ['trillion', 1_000_000_000_000],
45                   ]
47 class << self
48   def numerize(string)
49     string = string.dup
50   
51     # preprocess
52     string.gsub!(/ +|([^\d])-([^d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
53     string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
55     # easy/direct replacements
56   
57     DIRECT_NUMS.each do |dn|
58       string.gsub!(/#{dn[0]}/i, dn[1])
59     end
61     # ten, twenty, etc.
63     TEN_PREFIXES.each do |tp|
64       string.gsub!(/(?:#{tp[0]})( *\d(?=[^\d]|$))*/i) { (tp[1] + $1.to_i).to_s }
65     end
67     # hundreds, thousands, millions, etc.
69     BIG_PREFIXES.each do |bp|
70       string.gsub!(/(\d*) *#{bp[0]}/i) { (bp[1] * $1.to_i).to_s}
71       andition(string)
72       #combine_numbers(string) # Should to be more efficient way to do this
73     end
75     # fractional addition
76     # I'm not combining this with the previous block as using float addition complicates the strings
77     # (with extraneous .0's and such )
78     string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
80     string
81   end
83 private
84   def andition(string)
85     sc = StringScanner.new(string)
86     while(sc.scan_until(/(\d+)( | and )(\d+)(?=[^\w]|$)/i))
87       if sc[2] =~ /and/ || sc[1].size > sc[3].size
88         string[(sc.pos - sc.matched_size)..(sc.pos-1)] = (sc[1].to_i + sc[3].to_i).to_s
89         sc.reset
90       end
91     end
92   end
94 #  def combine_numbers(string)
95 #    sc = StringScanner.new(string)
96 #    while(sc.scan_until(/(\d+)(?: | and |-)(\d+)(?=[^\w]|$)/i))
97 #      string[(sc.pos - sc.matched_size)..(sc.pos-1)] = (sc[1].to_i + sc[2].to_i).to_s
98 #      sc.reset
99 #    end
100 #  end