15 ['ninteen', '19'], # Common mis-spelling
20 ['four(\W|$)', '4\1'], # The weird regex is so that it matches four but not fourty
23 ['seven(\W|$)', '7\1'],
24 ['eight(\W|$)', '8\1'],
25 ['nine(\W|$)', '9\1'],
27 ['\ba[\b^$]', '1'] # doesn't make sense for an 'a' at the end to be a 1
30 TEN_PREFIXES = [ ['twenty', 20],
40 BIG_PREFIXES = [ ['hundred', 100],
42 ['million', 1_000_000],
43 ['billion', 1_000_000_000],
44 ['trillion', 1_000_000_000_000],
52 string.gsub!(/ +|([^\d])-([^d])/, '\1 \2') # will mutilate hyphenated-words but shouldn't matter for date extraction
53 string.gsub!(/a half/, 'haAlf') # take the 'a' out so it doesn't turn into a 1, save the half for the end
55 # easy/direct replacements
57 DIRECT_NUMS.each do |dn|
58 string.gsub!(/#{dn[0]}/i, dn[1])
63 TEN_PREFIXES.each do |tp|
64 string.gsub!(/(?:#{tp[0]})( *\d(?=[^\d]|$))*/i) { (tp[1] + $1.to_i).to_s }
67 # hundreds, thousands, millions, etc.
69 BIG_PREFIXES.each do |bp|
70 string.gsub!(/(\d*) *#{bp[0]}/i) { (bp[1] * $1.to_i).to_s}
72 #combine_numbers(string) # Should to be more efficient way to do this
76 # I'm not combining this with the previous block as using float addition complicates the strings
77 # (with extraneous .0's and such )
78 string.gsub!(/(\d+)(?: | and |-)*haAlf/i) { ($1.to_f + 0.5).to_s }
85 sc = StringScanner.new(string)
86 while(sc.scan_until(/(\d+)( | and )(\d+)(?=[^\w]|$)/i))
87 if sc[2] =~ /and/ || sc[1].size > sc[3].size
88 string[(sc.pos - sc.matched_size)..(sc.pos-1)] = (sc[1].to_i + sc[3].to_i).to_s
94 # def combine_numbers(string)
95 # sc = StringScanner.new(string)
96 # while(sc.scan_until(/(\d+)(?: | and |-)(\d+)(?=[^\w]|$)/i))
97 # string[(sc.pos - sc.matched_size)..(sc.pos-1)] = (sc[1].to_i + sc[2].to_i).to_s