4 # Parses a string containing a natural language date or time. If the parser
5 # can find a date or time, either a Time or Chronic::Span will be returned
6 # (depending on the value of <tt>:guess</tt>). If no date or time can be found,
7 # +nil+ will be returned.
12 # <tt>:past</tt> or <tt>:future</tt> (defaults to <tt>:future</tt>)
14 # If your string represents a birthday, you can set <tt>:context</tt> to <tt>:past</tt>
15 # and if an ambiguous string is given, it will assume it is in the
16 # past. Specify <tt>:future</tt> or omit to set a future context.
19 # Time (defaults to Time.now)
21 # By setting <tt>:now</tt> to a Time, all computations will be based off
22 # of that time instead of Time.now
25 # +true+ or +false+ (defaults to +true+)
27 # By default, the parser will guess a single point in time for the
28 # given date or time. If you'd rather have the entire time span returned,
29 # set <tt>:guess</tt> to +false+ and a Chronic::Span will be returned.
31 # [<tt>:ambiguous_time_range</tt>]
32 # Integer or <tt>:none</tt> (defaults to <tt>6</tt> (6am-6pm))
34 # If an Integer is given, ambiguous times (like 5:00) will be
35 # assumed to be within the range of that time in the AM to that time
36 # in the PM. For example, if you set it to <tt>7</tt>, then the parser will
37 # look for the time between 7am and 7pm. In the case of 5:00, it would
38 # assume that means 5:00pm. If <tt>:none</tt> is given, no assumption
39 # will be made, and the first matching instance of that time will
41 def parse(text, specified_options = {})
42 # get options and set defaults if necessary
43 default_options = {:context => :future,
46 :ambiguous_time_range => 6}
47 options = default_options.merge specified_options
49 # ensure the specified options are valid
50 specified_options.keys.each do |key|
51 default_options.keys.include?(key) || raise(InvalidArgumentException, "#{key} is not a valid option key.")
53 [:past, :future, :none].include?(options[:context]) || raise(InvalidArgumentException, "Invalid value ':#{options[:context]}' for :context specified. Valid values are :past and :future.")
55 # store now for later =)
58 # put the text into a normal format to ease scanning
59 text = self.pre_normalize(text)
61 # get base tokens for each word
62 @tokens = self.base_tokenize(text)
64 # scan the tokens with each token scanner
65 [Repeater].each do |tokenizer|
66 @tokens = tokenizer.scan(@tokens, options)
69 [Grabber, Pointer, Scalar, Ordinal, Separator, TimeZone].each do |tokenizer|
70 @tokens = tokenizer.scan(@tokens)
73 # strip any non-tagged tokens
74 @tokens = @tokens.select { |token| token.tagged? }
77 puts "+---------------------------------------------------"
78 puts "| " + @tokens.to_s
79 puts "+---------------------------------------------------"
82 # do the heavy lifting
84 span = self.tokens_to_span(@tokens, options)
90 # guess a time within a span if required
92 return self.guess(span)
98 # Clean up the specified input text by stripping unwanted characters,
99 # converting idioms to their canonical form, converting number words
100 # to numbers (three => 3), and converting ordinal words to numeric
101 # ordinals (third => 3rd)
102 def pre_normalize(text) #:nodoc:
103 normalized_text = text.to_s.downcase
104 normalized_text = numericize_numbers(normalized_text)
105 normalized_text.gsub!(/['"\.]/, '')
106 normalized_text.gsub!(/([\/\-\,\@])/) { ' ' + $1 + ' ' }
107 normalized_text.gsub!(/\btoday\b/, 'this day')
108 normalized_text.gsub!(/\btomm?orr?ow\b/, 'next day')
109 normalized_text.gsub!(/\byesterday\b/, 'last day')
110 normalized_text.gsub!(/\bnoon\b/, '12:00')
111 normalized_text.gsub!(/\bmidnight\b/, '24:00')
112 normalized_text.gsub!(/\bbefore now\b/, 'past')
113 normalized_text.gsub!(/\bnow\b/, 'this second')
114 normalized_text.gsub!(/\b(ago|before)\b/, 'past')
115 normalized_text.gsub!(/\bthis past\b/, 'last')
116 normalized_text.gsub!(/\bthis last\b/, 'last')
117 normalized_text.gsub!(/\b(?:in|during) the (morning)\b/, '\1')
118 normalized_text.gsub!(/\b(?:in the|during the|at) (afternoon|evening|night)\b/, '\1')
119 normalized_text.gsub!(/\btonight\b/, 'this night')
120 normalized_text.gsub!(/(?=\w)([ap]m|oclock)\b/, ' \1')
121 normalized_text.gsub!(/\b(hence|after|from)\b/, 'future')
122 normalized_text = numericize_ordinals(normalized_text)
125 # Convert number words to numbers (three => 3)
126 def numericize_numbers(text) #:nodoc:
127 Numerizer.numerize(text)
130 # Convert ordinal words to numeric ordinals (third => 3rd)
131 def numericize_ordinals(text) #:nodoc:
135 # Split the text on spaces and convert each word into
137 def base_tokenize(text) #:nodoc:
138 text.split(' ').map { |word| Token.new(word) }
141 # Guess a specific time within the given span
142 def guess(span) #:nodoc:
143 return nil if span.nil?
145 span.begin + (span.width / 2)
153 attr_accessor :word, :tags
160 # Tag this token with the specified tag
165 # Remove all tags of the given class
167 @tags = @tags.select { |m| !m.kind_of? tag_class }
170 # Return true if this token has any tags
175 # Return the Tag that matches the given class
176 def get_tag(tag_class)
177 matches = @tags.select { |m| m.kind_of? tag_class }
178 #matches.size < 2 || raise("Multiple identical tags found")
182 # Print this Token in a pretty way
184 @word << '(' << @tags.join(', ') << ') '
188 # A Span represents a range of time. Since this class extends
189 # Range, you can use #begin and #end to get the beginning and
190 # ending times of the span (they will be of class Time)
192 # Returns the width of this span in seconds
194 (self.end - self.begin).to_i
197 # Add a number of seconds to this span, returning the
200 Span.new(self.begin + seconds, self.end + seconds)
203 # Subtract a number of seconds to this span, returning the
209 # Prints this span in a nice fashion
211 '(' << self.begin.to_s << '..' << self.end.to_s << ')'
215 # Tokens are tagged with subclassed instances of this class when
216 # they match specific criteria
230 class ChronicPain < Exception #:nodoc:
234 # This exception is raised if an invalid argument is provided to
235 # any of Chronic's methods
236 class InvalidArgumentException < Exception