Prepare for python 3.0: minimize "types" module where possible (gbrandl).
[docutils.git] / docutils / statemachine.py
blob40630efe6330b2f0c7f3c8f3d68a733b2006b191
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 A finite state machine specialized for regular-expression-based text filters,
7 this module defines the following classes:
9 - `StateMachine`, a state machine
10 - `State`, a state superclass
11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
12 - `StateWS`, a state superclass for use with `StateMachineWS`
13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
15 - `ViewList`, extends standard Python lists.
16 - `StringList`, string-specific ViewList.
18 Exception classes:
20 - `StateMachineError`
21 - `UnknownStateError`
22 - `DuplicateStateError`
23 - `UnknownTransitionError`
24 - `DuplicateTransitionError`
25 - `TransitionPatternNotFound`
26 - `TransitionMethodNotFound`
27 - `UnexpectedIndentationError`
28 - `TransitionCorrection`: Raised to switch to another transition.
29 - `StateCorrection`: Raised to switch to another state & transition.
31 Functions:
33 - `string2lines()`: split a multi-line string into a list of one-line strings
36 How To Use This Module
37 ======================
38 (See the individual classes, methods, and attributes for details.)
40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
41 You will also need to ``import re``.
43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
44 machine::
46 class MyState(statemachine.State):
48 Within the state's class definition:
50 a) Include a pattern for each transition, in `State.patterns`::
52 patterns = {'atransition': r'pattern', ...}
54 b) Include a list of initial transitions to be set up automatically, in
55 `State.initial_transitions`::
57 initial_transitions = ['atransition', ...]
59 c) Define a method for each transition, with the same name as the
60 transition pattern::
62 def atransition(self, match, context, next_state):
63 # do something
64 result = [...] # a list
65 return context, next_state, result
66 # context, next_state may be altered
68 Transition methods may raise an `EOFError` to cut processing short.
70 d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
71 transition methods, which handle the beginning- and end-of-file.
73 e) In order to handle nested processing, you may wish to override the
74 attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
76 If you are using `StateWS` as a base class, in order to handle nested
77 indented blocks, you may wish to:
79 - override the attributes `StateWS.indent_sm`,
80 `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
81 `StateWS.known_indent_sm_kwargs`;
82 - override the `StateWS.blank()` method; and/or
83 - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
84 and/or `StateWS.firstknown_indent()` methods.
86 3. Create a state machine object::
88 sm = StateMachine(state_classes=[MyState, ...],
89 initial_state='MyState')
91 4. Obtain the input text, which needs to be converted into a tab-free list of
92 one-line strings. For example, to read text from a file called
93 'inputfile'::
95 input_string = open('inputfile').read()
96 input_lines = statemachine.string2lines(input_string)
98 5. Run the state machine on the input text and collect the results, a list::
100 results = sm.run(input_lines)
102 6. Remove any lingering circular references::
104 sm.unlink()
107 __docformat__ = 'restructuredtext'
109 import sys
110 import re
111 import types
112 import unicodedata
115 class StateMachine:
118 A finite state machine for text filters using regular expressions.
120 The input is provided in the form of a list of one-line strings (no
121 newlines). States are subclasses of the `State` class. Transitions consist
122 of regular expression patterns and transition methods, and are defined in
123 each state.
125 The state machine is started with the `run()` method, which returns the
126 results of processing in a list.
129 def __init__(self, state_classes, initial_state, debug=0):
131 Initialize a `StateMachine` object; add state objects.
133 Parameters:
135 - `state_classes`: a list of `State` (sub)classes.
136 - `initial_state`: a string, the class name of the initial state.
137 - `debug`: a boolean; produce verbose output if true (nonzero).
140 self.input_lines = None
141 """`StringList` of input lines (without newlines).
142 Filled by `self.run()`."""
144 self.input_offset = 0
145 """Offset of `self.input_lines` from the beginning of the file."""
147 self.line = None
148 """Current input line."""
150 self.line_offset = -1
151 """Current input line offset from beginning of `self.input_lines`."""
153 self.debug = debug
154 """Debugging mode on/off."""
156 self.initial_state = initial_state
157 """The name of the initial state (key to `self.states`)."""
159 self.current_state = initial_state
160 """The name of the current state (key to `self.states`)."""
162 self.states = {}
163 """Mapping of {state_name: State_object}."""
165 self.add_states(state_classes)
167 self.observers = []
168 """List of bound methods or functions to call whenever the current
169 line changes. Observers are called with one argument, ``self``.
170 Cleared at the end of `run()`."""
172 def unlink(self):
173 """Remove circular references to objects no longer required."""
174 for state in self.states.values():
175 state.unlink()
176 self.states = None
178 def run(self, input_lines, input_offset=0, context=None,
179 input_source=None):
181 Run the state machine on `input_lines`. Return results (a list).
183 Reset `self.line_offset` and `self.current_state`. Run the
184 beginning-of-file transition. Input one line at a time and check for a
185 matching transition. If a match is found, call the transition method
186 and possibly change the state. Store the context returned by the
187 transition method to be passed on to the next transition matched.
188 Accumulate the results returned by the transition methods in a list.
189 Run the end-of-file transition. Finally, return the accumulated
190 results.
192 Parameters:
194 - `input_lines`: a list of strings without newlines, or `StringList`.
195 - `input_offset`: the line offset of `input_lines` from the beginning
196 of the file.
197 - `context`: application-specific storage.
198 - `input_source`: name or path of source of `input_lines`.
200 self.runtime_init()
201 if isinstance(input_lines, StringList):
202 self.input_lines = input_lines
203 else:
204 self.input_lines = StringList(input_lines, source=input_source)
205 self.input_offset = input_offset
206 self.line_offset = -1
207 self.current_state = self.initial_state
208 if self.debug:
209 print >>sys.stderr, (
210 '\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
211 % (self.line_offset, '\n| '.join(self.input_lines)))
212 transitions = None
213 results = []
214 state = self.get_state()
215 try:
216 if self.debug:
217 print >>sys.stderr, ('\nStateMachine.run: bof transition')
218 context, result = state.bof(context)
219 results.extend(result)
220 while 1:
221 try:
222 try:
223 self.next_line()
224 if self.debug:
225 source, offset = self.input_lines.info(
226 self.line_offset)
227 print >>sys.stderr, (
228 '\nStateMachine.run: line (source=%r, '
229 'offset=%r):\n| %s'
230 % (source, offset, self.line))
231 context, next_state, result = self.check_line(
232 context, state, transitions)
233 except EOFError:
234 if self.debug:
235 print >>sys.stderr, (
236 '\nStateMachine.run: %s.eof transition'
237 % state.__class__.__name__)
238 result = state.eof(context)
239 results.extend(result)
240 break
241 else:
242 results.extend(result)
243 except TransitionCorrection, exception:
244 self.previous_line() # back up for another try
245 transitions = (exception.args[0],)
246 if self.debug:
247 print >>sys.stderr, (
248 '\nStateMachine.run: TransitionCorrection to '
249 'state "%s", transition %s.'
250 % (state.__class__.__name__, transitions[0]))
251 continue
252 except StateCorrection, exception:
253 self.previous_line() # back up for another try
254 next_state = exception.args[0]
255 if len(exception.args) == 1:
256 transitions = None
257 else:
258 transitions = (exception.args[1],)
259 if self.debug:
260 print >>sys.stderr, (
261 '\nStateMachine.run: StateCorrection to state '
262 '"%s", transition %s.'
263 % (next_state, transitions[0]))
264 else:
265 transitions = None
266 state = self.get_state(next_state)
267 except:
268 if self.debug:
269 self.error()
270 raise
271 self.observers = []
272 return results
274 def get_state(self, next_state=None):
276 Return current state object; set it first if `next_state` given.
278 Parameter `next_state`: a string, the name of the next state.
280 Exception: `UnknownStateError` raised if `next_state` unknown.
282 if next_state:
283 if self.debug and next_state != self.current_state:
284 print >>sys.stderr, \
285 ('\nStateMachine.get_state: Changing state from '
286 '"%s" to "%s" (input line %s).'
287 % (self.current_state, next_state,
288 self.abs_line_number()))
289 self.current_state = next_state
290 try:
291 return self.states[self.current_state]
292 except KeyError:
293 raise UnknownStateError(self.current_state)
295 def next_line(self, n=1):
296 """Load `self.line` with the `n`'th next line and return it."""
297 try:
298 try:
299 self.line_offset += n
300 self.line = self.input_lines[self.line_offset]
301 except IndexError:
302 self.line = None
303 raise EOFError
304 return self.line
305 finally:
306 self.notify_observers()
308 def is_next_line_blank(self):
309 """Return 1 if the next line is blank or non-existant."""
310 try:
311 return not self.input_lines[self.line_offset + 1].strip()
312 except IndexError:
313 return 1
315 def at_eof(self):
316 """Return 1 if the input is at or past end-of-file."""
317 return self.line_offset >= len(self.input_lines) - 1
319 def at_bof(self):
320 """Return 1 if the input is at or before beginning-of-file."""
321 return self.line_offset <= 0
323 def previous_line(self, n=1):
324 """Load `self.line` with the `n`'th previous line and return it."""
325 self.line_offset -= n
326 if self.line_offset < 0:
327 self.line = None
328 else:
329 self.line = self.input_lines[self.line_offset]
330 self.notify_observers()
331 return self.line
333 def goto_line(self, line_offset):
334 """Jump to absolute line offset `line_offset`, load and return it."""
335 try:
336 try:
337 self.line_offset = line_offset - self.input_offset
338 self.line = self.input_lines[self.line_offset]
339 except IndexError:
340 self.line = None
341 raise EOFError
342 return self.line
343 finally:
344 self.notify_observers()
346 def get_source(self, line_offset):
347 """Return source of line at absolute line offset `line_offset`."""
348 return self.input_lines.source(line_offset - self.input_offset)
350 def abs_line_offset(self):
351 """Return line offset of current line, from beginning of file."""
352 return self.line_offset + self.input_offset
354 def abs_line_number(self):
355 """Return line number of current line (counting from 1)."""
356 return self.line_offset + self.input_offset + 1
358 def insert_input(self, input_lines, source):
359 self.input_lines.insert(self.line_offset + 1, '',
360 source='internal padding')
361 self.input_lines.insert(self.line_offset + 1, '',
362 source='internal padding')
363 self.input_lines.insert(self.line_offset + 2,
364 StringList(input_lines, source))
366 def get_text_block(self, flush_left=0):
368 Return a contiguous block of text.
370 If `flush_left` is true, raise `UnexpectedIndentationError` if an
371 indented line is encountered before the text block ends (with a blank
372 line).
374 try:
375 block = self.input_lines.get_text_block(self.line_offset,
376 flush_left)
377 self.next_line(len(block) - 1)
378 return block
379 except UnexpectedIndentationError, error:
380 block, source, lineno = error.args
381 self.next_line(len(block) - 1) # advance to last line of block
382 raise
384 def check_line(self, context, state, transitions=None):
386 Examine one line of input for a transition match & execute its method.
388 Parameters:
390 - `context`: application-dependent storage.
391 - `state`: a `State` object, the current state.
392 - `transitions`: an optional ordered list of transition names to try,
393 instead of ``state.transition_order``.
395 Return the values returned by the transition method:
397 - context: possibly modified from the parameter `context`;
398 - next state name (`State` subclass name);
399 - the result output of the transition, a list.
401 When there is no match, ``state.no_match()`` is called and its return
402 value is returned.
404 if transitions is None:
405 transitions = state.transition_order
406 state_correction = None
407 if self.debug:
408 print >>sys.stderr, (
409 '\nStateMachine.check_line: state="%s", transitions=%r.'
410 % (state.__class__.__name__, transitions))
411 for name in transitions:
412 pattern, method, next_state = state.transitions[name]
413 match = self.match(pattern)
414 if match:
415 if self.debug:
416 print >>sys.stderr, (
417 '\nStateMachine.check_line: Matched transition '
418 '"%s" in state "%s".'
419 % (name, state.__class__.__name__))
420 return method(match, context, next_state)
421 else:
422 if self.debug:
423 print >>sys.stderr, (
424 '\nStateMachine.check_line: No match in state "%s".'
425 % state.__class__.__name__)
426 return state.no_match(context, transitions)
428 def match(self, pattern):
430 Return the result of a regular expression match.
432 Parameter `pattern`: an `re` compiled regular expression.
434 return pattern.match(self.line)
436 def add_state(self, state_class):
438 Initialize & add a `state_class` (`State` subclass) object.
440 Exception: `DuplicateStateError` raised if `state_class` was already
441 added.
443 statename = state_class.__name__
444 if statename in self.states:
445 raise DuplicateStateError(statename)
446 self.states[statename] = state_class(self, self.debug)
448 def add_states(self, state_classes):
450 Add `state_classes` (a list of `State` subclasses).
452 for state_class in state_classes:
453 self.add_state(state_class)
455 def runtime_init(self):
457 Initialize `self.states`.
459 for state in self.states.values():
460 state.runtime_init()
462 def error(self):
463 """Report error details."""
464 type, value, module, line, function = _exception_data()
465 print >>sys.stderr, '%s: %s' % (type, value)
466 print >>sys.stderr, 'input line %s' % (self.abs_line_number())
467 print >>sys.stderr, ('module %s, line %s, function %s'
468 % (module, line, function))
470 def attach_observer(self, observer):
472 The `observer` parameter is a function or bound method which takes two
473 arguments, the source and offset of the current line.
475 self.observers.append(observer)
477 def detach_observer(self, observer):
478 self.observers.remove(observer)
480 def notify_observers(self):
481 for observer in self.observers:
482 try:
483 info = self.input_lines.info(self.line_offset)
484 except IndexError:
485 info = (None, None)
486 observer(*info)
489 class State:
492 State superclass. Contains a list of transitions, and transition methods.
494 Transition methods all have the same signature. They take 3 parameters:
496 - An `re` match object. ``match.string`` contains the matched input line,
497 ``match.start()`` gives the start index of the match, and
498 ``match.end()`` gives the end index.
499 - A context object, whose meaning is application-defined (initial value
500 ``None``). It can be used to store any information required by the state
501 machine, and the retured context is passed on to the next transition
502 method unchanged.
503 - The name of the next state, a string, taken from the transitions list;
504 normally it is returned unchanged, but it may be altered by the
505 transition method if necessary.
507 Transition methods all return a 3-tuple:
509 - A context object, as (potentially) modified by the transition method.
510 - The next state name (a return value of ``None`` means no state change).
511 - The processing result, a list, which is accumulated by the state
512 machine.
514 Transition methods may raise an `EOFError` to cut processing short.
516 There are two implicit transitions, and corresponding transition methods
517 are defined: `bof()` handles the beginning-of-file, and `eof()` handles
518 the end-of-file. These methods have non-standard signatures and return
519 values. `bof()` returns the initial context and results, and may be used
520 to return a header string, or do any other processing needed. `eof()`
521 should handle any remaining context and wrap things up; it returns the
522 final processing result.
524 Typical applications need only subclass `State` (or a subclass), set the
525 `patterns` and `initial_transitions` class attributes, and provide
526 corresponding transition methods. The default object initialization will
527 take care of constructing the list of transitions.
530 patterns = None
532 {Name: pattern} mapping, used by `make_transition()`. Each pattern may
533 be a string or a compiled `re` pattern. Override in subclasses.
536 initial_transitions = None
538 A list of transitions to initialize when a `State` is instantiated.
539 Each entry is either a transition name string, or a (transition name, next
540 state name) pair. See `make_transitions()`. Override in subclasses.
543 nested_sm = None
545 The `StateMachine` class for handling nested processing.
547 If left as ``None``, `nested_sm` defaults to the class of the state's
548 controlling state machine. Override it in subclasses to avoid the default.
551 nested_sm_kwargs = None
553 Keyword arguments dictionary, passed to the `nested_sm` constructor.
555 Two keys must have entries in the dictionary:
557 - Key 'state_classes' must be set to a list of `State` classes.
558 - Key 'initial_state' must be set to the name of the initial state class.
560 If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
561 class of the current state, and 'initial_state' defaults to the name of
562 the class of the current state. Override in subclasses to avoid the
563 defaults.
566 def __init__(self, state_machine, debug=0):
568 Initialize a `State` object; make & add initial transitions.
570 Parameters:
572 - `statemachine`: the controlling `StateMachine` object.
573 - `debug`: a boolean; produce verbose output if true (nonzero).
576 self.transition_order = []
577 """A list of transition names in search order."""
579 self.transitions = {}
581 A mapping of transition names to 3-tuples containing
582 (compiled_pattern, transition_method, next_state_name). Initialized as
583 an instance attribute dynamically (instead of as a class attribute)
584 because it may make forward references to patterns and methods in this
585 or other classes.
588 self.add_initial_transitions()
590 self.state_machine = state_machine
591 """A reference to the controlling `StateMachine` object."""
593 self.debug = debug
594 """Debugging mode on/off."""
596 if self.nested_sm is None:
597 self.nested_sm = self.state_machine.__class__
598 if self.nested_sm_kwargs is None:
599 self.nested_sm_kwargs = {'state_classes': [self.__class__],
600 'initial_state': self.__class__.__name__}
602 def runtime_init(self):
604 Initialize this `State` before running the state machine; called from
605 `self.state_machine.run()`.
607 pass
609 def unlink(self):
610 """Remove circular references to objects no longer required."""
611 self.state_machine = None
613 def add_initial_transitions(self):
614 """Make and add transitions listed in `self.initial_transitions`."""
615 if self.initial_transitions:
616 names, transitions = self.make_transitions(
617 self.initial_transitions)
618 self.add_transitions(names, transitions)
620 def add_transitions(self, names, transitions):
622 Add a list of transitions to the start of the transition list.
624 Parameters:
626 - `names`: a list of transition names.
627 - `transitions`: a mapping of names to transition tuples.
629 Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
631 for name in names:
632 if name in self.transitions:
633 raise DuplicateTransitionError(name)
634 if name not in transitions:
635 raise UnknownTransitionError(name)
636 self.transition_order[:0] = names
637 self.transitions.update(transitions)
639 def add_transition(self, name, transition):
641 Add a transition to the start of the transition list.
643 Parameter `transition`: a ready-made transition 3-tuple.
645 Exception: `DuplicateTransitionError`.
647 if name in self.transitions:
648 raise DuplicateTransitionError(name)
649 self.transition_order[:0] = [name]
650 self.transitions[name] = transition
652 def remove_transition(self, name):
654 Remove a transition by `name`.
656 Exception: `UnknownTransitionError`.
658 try:
659 del self.transitions[name]
660 self.transition_order.remove(name)
661 except:
662 raise UnknownTransitionError(name)
664 def make_transition(self, name, next_state=None):
666 Make & return a transition tuple based on `name`.
668 This is a convenience function to simplify transition creation.
670 Parameters:
672 - `name`: a string, the name of the transition pattern & method. This
673 `State` object must have a method called '`name`', and a dictionary
674 `self.patterns` containing a key '`name`'.
675 - `next_state`: a string, the name of the next `State` object for this
676 transition. A value of ``None`` (or absent) implies no state change
677 (i.e., continue with the same state).
679 Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
681 if next_state is None:
682 next_state = self.__class__.__name__
683 try:
684 pattern = self.patterns[name]
685 if not hasattr(pattern, 'match'):
686 pattern = re.compile(pattern)
687 except KeyError:
688 raise TransitionPatternNotFound(
689 '%s.patterns[%r]' % (self.__class__.__name__, name))
690 try:
691 method = getattr(self, name)
692 except AttributeError:
693 raise TransitionMethodNotFound(
694 '%s.%s' % (self.__class__.__name__, name))
695 return (pattern, method, next_state)
697 def make_transitions(self, name_list):
699 Return a list of transition names and a transition mapping.
701 Parameter `name_list`: a list, where each entry is either a transition
702 name string, or a 1- or 2-tuple (transition name, optional next state
703 name).
705 stringtype = type('')
706 names = []
707 transitions = {}
708 for namestate in name_list:
709 if type(namestate) is stringtype:
710 transitions[namestate] = self.make_transition(namestate)
711 names.append(namestate)
712 else:
713 transitions[namestate[0]] = self.make_transition(*namestate)
714 names.append(namestate[0])
715 return names, transitions
717 def no_match(self, context, transitions):
719 Called when there is no match from `StateMachine.check_line()`.
721 Return the same values returned by transition methods:
723 - context: unchanged;
724 - next state name: ``None``;
725 - empty result list.
727 Override in subclasses to catch this event.
729 return context, None, []
731 def bof(self, context):
733 Handle beginning-of-file. Return unchanged `context`, empty result.
735 Override in subclasses.
737 Parameter `context`: application-defined storage.
739 return context, []
741 def eof(self, context):
743 Handle end-of-file. Return empty result.
745 Override in subclasses.
747 Parameter `context`: application-defined storage.
749 return []
751 def nop(self, match, context, next_state):
753 A "do nothing" transition method.
755 Return unchanged `context` & `next_state`, empty result. Useful for
756 simple state changes (actionless transitions).
758 return context, next_state, []
761 class StateMachineWS(StateMachine):
764 `StateMachine` subclass specialized for whitespace recognition.
766 There are three methods provided for extracting indented text blocks:
768 - `get_indented()`: use when the indent is unknown.
769 - `get_known_indented()`: use when the indent is known for all lines.
770 - `get_first_known_indented()`: use when only the first line's indent is
771 known.
774 def get_indented(self, until_blank=0, strip_indent=1):
776 Return a block of indented lines of text, and info.
778 Extract an indented block where the indent is unknown for all lines.
780 :Parameters:
781 - `until_blank`: Stop collecting at the first blank line if true
782 (1).
783 - `strip_indent`: Strip common leading indent if true (1,
784 default).
786 :Return:
787 - the indented block (a list of lines of text),
788 - its indent,
789 - its first line offset from BOF, and
790 - whether or not it finished with a blank line.
792 offset = self.abs_line_offset()
793 indented, indent, blank_finish = self.input_lines.get_indented(
794 self.line_offset, until_blank, strip_indent)
795 if indented:
796 self.next_line(len(indented) - 1) # advance to last indented line
797 while indented and not indented[0].strip():
798 indented.trim_start()
799 offset += 1
800 return indented, indent, offset, blank_finish
802 def get_known_indented(self, indent, until_blank=0, strip_indent=1):
804 Return an indented block and info.
806 Extract an indented block where the indent is known for all lines.
807 Starting with the current line, extract the entire text block with at
808 least `indent` indentation (which must be whitespace, except for the
809 first line).
811 :Parameters:
812 - `indent`: The number of indent columns/characters.
813 - `until_blank`: Stop collecting at the first blank line if true
814 (1).
815 - `strip_indent`: Strip `indent` characters of indentation if true
816 (1, default).
818 :Return:
819 - the indented block,
820 - its first line offset from BOF, and
821 - whether or not it finished with a blank line.
823 offset = self.abs_line_offset()
824 indented, indent, blank_finish = self.input_lines.get_indented(
825 self.line_offset, until_blank, strip_indent,
826 block_indent=indent)
827 self.next_line(len(indented) - 1) # advance to last indented line
828 while indented and not indented[0].strip():
829 indented.trim_start()
830 offset += 1
831 return indented, offset, blank_finish
833 def get_first_known_indented(self, indent, until_blank=0, strip_indent=1,
834 strip_top=1):
836 Return an indented block and info.
838 Extract an indented block where the indent is known for the first line
839 and unknown for all other lines.
841 :Parameters:
842 - `indent`: The first line's indent (# of columns/characters).
843 - `until_blank`: Stop collecting at the first blank line if true
844 (1).
845 - `strip_indent`: Strip `indent` characters of indentation if true
846 (1, default).
847 - `strip_top`: Strip blank lines from the beginning of the block.
849 :Return:
850 - the indented block,
851 - its indent,
852 - its first line offset from BOF, and
853 - whether or not it finished with a blank line.
855 offset = self.abs_line_offset()
856 indented, indent, blank_finish = self.input_lines.get_indented(
857 self.line_offset, until_blank, strip_indent,
858 first_indent=indent)
859 self.next_line(len(indented) - 1) # advance to last indented line
860 if strip_top:
861 while indented and not indented[0].strip():
862 indented.trim_start()
863 offset += 1
864 return indented, indent, offset, blank_finish
867 class StateWS(State):
870 State superclass specialized for whitespace (blank lines & indents).
872 Use this class with `StateMachineWS`. The transitions 'blank' (for blank
873 lines) and 'indent' (for indented text blocks) are added automatically,
874 before any other transitions. The transition method `blank()` handles
875 blank lines and `indent()` handles nested indented blocks. Indented
876 blocks trigger a new state machine to be created by `indent()` and run.
877 The class of the state machine to be created is in `indent_sm`, and the
878 constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
880 The methods `known_indent()` and `firstknown_indent()` are provided for
881 indented blocks where the indent (all lines' and first line's only,
882 respectively) is known to the transition method, along with the attributes
883 `known_indent_sm` and `known_indent_sm_kwargs`. Neither transition method
884 is triggered automatically.
887 indent_sm = None
889 The `StateMachine` class handling indented text blocks.
891 If left as ``None``, `indent_sm` defaults to the value of
892 `State.nested_sm`. Override it in subclasses to avoid the default.
895 indent_sm_kwargs = None
897 Keyword arguments dictionary, passed to the `indent_sm` constructor.
899 If left as ``None``, `indent_sm_kwargs` defaults to the value of
900 `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
903 known_indent_sm = None
905 The `StateMachine` class handling known-indented text blocks.
907 If left as ``None``, `known_indent_sm` defaults to the value of
908 `indent_sm`. Override it in subclasses to avoid the default.
911 known_indent_sm_kwargs = None
913 Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
915 If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
916 `indent_sm_kwargs`. Override it in subclasses to avoid the default.
919 ws_patterns = {'blank': ' *$',
920 'indent': ' +'}
921 """Patterns for default whitespace transitions. May be overridden in
922 subclasses."""
924 ws_initial_transitions = ('blank', 'indent')
925 """Default initial whitespace transitions, added before those listed in
926 `State.initial_transitions`. May be overridden in subclasses."""
928 def __init__(self, state_machine, debug=0):
930 Initialize a `StateSM` object; extends `State.__init__()`.
932 Check for indent state machine attributes, set defaults if not set.
934 State.__init__(self, state_machine, debug)
935 if self.indent_sm is None:
936 self.indent_sm = self.nested_sm
937 if self.indent_sm_kwargs is None:
938 self.indent_sm_kwargs = self.nested_sm_kwargs
939 if self.known_indent_sm is None:
940 self.known_indent_sm = self.indent_sm
941 if self.known_indent_sm_kwargs is None:
942 self.known_indent_sm_kwargs = self.indent_sm_kwargs
944 def add_initial_transitions(self):
946 Add whitespace-specific transitions before those defined in subclass.
948 Extends `State.add_initial_transitions()`.
950 State.add_initial_transitions(self)
951 if self.patterns is None:
952 self.patterns = {}
953 self.patterns.update(self.ws_patterns)
954 names, transitions = self.make_transitions(
955 self.ws_initial_transitions)
956 self.add_transitions(names, transitions)
958 def blank(self, match, context, next_state):
959 """Handle blank lines. Does nothing. Override in subclasses."""
960 return self.nop(match, context, next_state)
962 def indent(self, match, context, next_state):
964 Handle an indented text block. Extend or override in subclasses.
966 Recursively run the registered state machine for indented blocks
967 (`self.indent_sm`).
969 indented, indent, line_offset, blank_finish = \
970 self.state_machine.get_indented()
971 sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
972 results = sm.run(indented, input_offset=line_offset)
973 return context, next_state, results
975 def known_indent(self, match, context, next_state):
977 Handle a known-indent text block. Extend or override in subclasses.
979 Recursively run the registered state machine for known-indent indented
980 blocks (`self.known_indent_sm`). The indent is the length of the
981 match, ``match.end()``.
983 indented, line_offset, blank_finish = \
984 self.state_machine.get_known_indented(match.end())
985 sm = self.known_indent_sm(debug=self.debug,
986 **self.known_indent_sm_kwargs)
987 results = sm.run(indented, input_offset=line_offset)
988 return context, next_state, results
990 def first_known_indent(self, match, context, next_state):
992 Handle an indented text block (first line's indent known).
994 Extend or override in subclasses.
996 Recursively run the registered state machine for known-indent indented
997 blocks (`self.known_indent_sm`). The indent is the length of the
998 match, ``match.end()``.
1000 indented, line_offset, blank_finish = \
1001 self.state_machine.get_first_known_indented(match.end())
1002 sm = self.known_indent_sm(debug=self.debug,
1003 **self.known_indent_sm_kwargs)
1004 results = sm.run(indented, input_offset=line_offset)
1005 return context, next_state, results
1008 class _SearchOverride:
1011 Mix-in class to override `StateMachine` regular expression behavior.
1013 Changes regular expression matching, from the default `re.match()`
1014 (succeeds only if the pattern matches at the start of `self.line`) to
1015 `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1016 When subclassing a `StateMachine`, list this class **first** in the
1017 inheritance list of the class definition.
1020 def match(self, pattern):
1022 Return the result of a regular expression search.
1024 Overrides `StateMachine.match()`.
1026 Parameter `pattern`: `re` compiled regular expression.
1028 return pattern.search(self.line)
1031 class SearchStateMachine(_SearchOverride, StateMachine):
1032 """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1033 pass
1036 class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1037 """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1038 pass
1041 class ViewList:
1044 List with extended functionality: slices of ViewList objects are child
1045 lists, linked to their parents. Changes made to a child list also affect
1046 the parent list. A child list is effectively a "view" (in the SQL sense)
1047 of the parent list. Changes to parent lists, however, do *not* affect
1048 active child lists. If a parent list is changed, any active child lists
1049 should be recreated.
1051 The start and end of the slice can be trimmed using the `trim_start()` and
1052 `trim_end()` methods, without affecting the parent list. The link between
1053 child and parent lists can be broken by calling `disconnect()` on the
1054 child list.
1056 Also, ViewList objects keep track of the source & offset of each item.
1057 This information is accessible via the `source()`, `offset()`, and
1058 `info()` methods.
1061 def __init__(self, initlist=None, source=None, items=None,
1062 parent=None, parent_offset=None):
1063 self.data = []
1064 """The actual list of data, flattened from various sources."""
1066 self.items = []
1067 """A list of (source, offset) pairs, same length as `self.data`: the
1068 source of each line and the offset of each line from the beginning of
1069 its source."""
1071 self.parent = parent
1072 """The parent list."""
1074 self.parent_offset = parent_offset
1075 """Offset of this list from the beginning of the parent list."""
1077 if isinstance(initlist, ViewList):
1078 self.data = initlist.data[:]
1079 self.items = initlist.items[:]
1080 elif initlist is not None:
1081 self.data = list(initlist)
1082 if items:
1083 self.items = items
1084 else:
1085 self.items = [(source, i) for i in range(len(initlist))]
1086 assert len(self.data) == len(self.items), 'data mismatch'
1088 def __str__(self):
1089 return str(self.data)
1091 def __repr__(self):
1092 return '%s(%s, items=%s)' % (self.__class__.__name__,
1093 self.data, self.items)
1095 def __lt__(self, other): return self.data < self.__cast(other)
1096 def __le__(self, other): return self.data <= self.__cast(other)
1097 def __eq__(self, other): return self.data == self.__cast(other)
1098 def __ne__(self, other): return self.data != self.__cast(other)
1099 def __gt__(self, other): return self.data > self.__cast(other)
1100 def __ge__(self, other): return self.data >= self.__cast(other)
1101 def __cmp__(self, other): return cmp(self.data, self.__cast(other))
1103 def __cast(self, other):
1104 if isinstance(other, ViewList):
1105 return other.data
1106 else:
1107 return other
1109 def __contains__(self, item): return item in self.data
1110 def __len__(self): return len(self.data)
1112 # The __getitem__()/__setitem__() methods check whether the index
1113 # is a slice first, since native list objects start supporting
1114 # them directly in Python 2.3 (no exception is raised when
1115 # indexing a list with a slice object; they just work).
1117 def __getitem__(self, i):
1118 if isinstance(i, types.SliceType):
1119 assert i.step in (None, 1), 'cannot handle slice with stride'
1120 return self.__class__(self.data[i.start:i.stop],
1121 items=self.items[i.start:i.stop],
1122 parent=self, parent_offset=i.start)
1123 else:
1124 return self.data[i]
1126 def __setitem__(self, i, item):
1127 if isinstance(i, types.SliceType):
1128 assert i.step in (None, 1), 'cannot handle slice with stride'
1129 if not isinstance(item, ViewList):
1130 raise TypeError('assigning non-ViewList to ViewList slice')
1131 self.data[i.start:i.stop] = item.data
1132 self.items[i.start:i.stop] = item.items
1133 assert len(self.data) == len(self.items), 'data mismatch'
1134 if self.parent:
1135 self.parent[i.start + self.parent_offset
1136 : i.stop + self.parent_offset] = item
1137 else:
1138 self.data[i] = item
1139 if self.parent:
1140 self.parent[i + self.parent_offset] = item
1142 def __delitem__(self, i):
1143 try:
1144 del self.data[i]
1145 del self.items[i]
1146 if self.parent:
1147 del self.parent[i + self.parent_offset]
1148 except TypeError:
1149 assert i.step is None, 'cannot handle slice with stride'
1150 del self.data[i.start:i.stop]
1151 del self.items[i.start:i.stop]
1152 if self.parent:
1153 del self.parent[i.start + self.parent_offset
1154 : i.stop + self.parent_offset]
1156 def __add__(self, other):
1157 if isinstance(other, ViewList):
1158 return self.__class__(self.data + other.data,
1159 items=(self.items + other.items))
1160 else:
1161 raise TypeError('adding non-ViewList to a ViewList')
1163 def __radd__(self, other):
1164 if isinstance(other, ViewList):
1165 return self.__class__(other.data + self.data,
1166 items=(other.items + self.items))
1167 else:
1168 raise TypeError('adding ViewList to a non-ViewList')
1170 def __iadd__(self, other):
1171 if isinstance(other, ViewList):
1172 self.data += other.data
1173 else:
1174 raise TypeError('argument to += must be a ViewList')
1175 return self
1177 def __mul__(self, n):
1178 return self.__class__(self.data * n, items=(self.items * n))
1180 __rmul__ = __mul__
1182 def __imul__(self, n):
1183 self.data *= n
1184 self.items *= n
1185 return self
1187 def extend(self, other):
1188 if not isinstance(other, ViewList):
1189 raise TypeError('extending a ViewList with a non-ViewList')
1190 if self.parent:
1191 self.parent.insert(len(self.data) + self.parent_offset, other)
1192 self.data.extend(other.data)
1193 self.items.extend(other.items)
1195 def append(self, item, source=None, offset=0):
1196 if source is None:
1197 self.extend(item)
1198 else:
1199 if self.parent:
1200 self.parent.insert(len(self.data) + self.parent_offset, item,
1201 source, offset)
1202 self.data.append(item)
1203 self.items.append((source, offset))
1205 def insert(self, i, item, source=None, offset=0):
1206 if source is None:
1207 if not isinstance(item, ViewList):
1208 raise TypeError('inserting non-ViewList with no source given')
1209 self.data[i:i] = item.data
1210 self.items[i:i] = item.items
1211 if self.parent:
1212 index = (len(self.data) + i) % len(self.data)
1213 self.parent.insert(index + self.parent_offset, item)
1214 else:
1215 self.data.insert(i, item)
1216 self.items.insert(i, (source, offset))
1217 if self.parent:
1218 index = (len(self.data) + i) % len(self.data)
1219 self.parent.insert(index + self.parent_offset, item,
1220 source, offset)
1222 def pop(self, i=-1):
1223 if self.parent:
1224 index = (len(self.data) + i) % len(self.data)
1225 self.parent.pop(index + self.parent_offset)
1226 self.items.pop(i)
1227 return self.data.pop(i)
1229 def trim_start(self, n=1):
1231 Remove items from the start of the list, without touching the parent.
1233 if n > len(self.data):
1234 raise IndexError("Size of trim too large; can't trim %s items "
1235 "from a list of size %s." % (n, len(self.data)))
1236 elif n < 0:
1237 raise IndexError('Trim size must be >= 0.')
1238 del self.data[:n]
1239 del self.items[:n]
1240 if self.parent:
1241 self.parent_offset += n
1243 def trim_end(self, n=1):
1245 Remove items from the end of the list, without touching the parent.
1247 if n > len(self.data):
1248 raise IndexError("Size of trim too large; can't trim %s items "
1249 "from a list of size %s." % (n, len(self.data)))
1250 elif n < 0:
1251 raise IndexError('Trim size must be >= 0.')
1252 del self.data[-n:]
1253 del self.items[-n:]
1255 def remove(self, item):
1256 index = self.index(item)
1257 del self[index]
1259 def count(self, item): return self.data.count(item)
1260 def index(self, item): return self.data.index(item)
1262 def reverse(self):
1263 self.data.reverse()
1264 self.items.reverse()
1265 self.parent = None
1267 def sort(self, *args):
1268 tmp = zip(self.data, self.items)
1269 tmp.sort(*args)
1270 self.data = [entry[0] for entry in tmp]
1271 self.items = [entry[1] for entry in tmp]
1272 self.parent = None
1274 def info(self, i):
1275 """Return source & offset for index `i`."""
1276 try:
1277 return self.items[i]
1278 except IndexError:
1279 if i == len(self.data): # Just past the end
1280 return self.items[i - 1][0], None
1281 else:
1282 raise
1284 def source(self, i):
1285 """Return source for index `i`."""
1286 return self.info(i)[0]
1288 def offset(self, i):
1289 """Return offset for index `i`."""
1290 return self.info(i)[1]
1292 def disconnect(self):
1293 """Break link between this list and parent list."""
1294 self.parent = None
1297 class StringList(ViewList):
1299 """A `ViewList` with string-specific methods."""
1301 def trim_left(self, length, start=0, end=sys.maxint):
1303 Trim `length` characters off the beginning of each item, in-place,
1304 from index `start` to `end`. No whitespace-checking is done on the
1305 trimmed text. Does not affect slice parent.
1307 self.data[start:end] = [line[length:]
1308 for line in self.data[start:end]]
1310 def get_text_block(self, start, flush_left=0):
1312 Return a contiguous block of text.
1314 If `flush_left` is true, raise `UnexpectedIndentationError` if an
1315 indented line is encountered before the text block ends (with a blank
1316 line).
1318 end = start
1319 last = len(self.data)
1320 while end < last:
1321 line = self.data[end]
1322 if not line.strip():
1323 break
1324 if flush_left and (line[0] == ' '):
1325 source, offset = self.info(end)
1326 raise UnexpectedIndentationError(self[start:end], source,
1327 offset + 1)
1328 end += 1
1329 return self[start:end]
1331 def get_indented(self, start=0, until_blank=0, strip_indent=1,
1332 block_indent=None, first_indent=None):
1334 Extract and return a StringList of indented lines of text.
1336 Collect all lines with indentation, determine the minimum indentation,
1337 remove the minimum indentation from all indented lines (unless
1338 `strip_indent` is false), and return them. All lines up to but not
1339 including the first unindented line will be returned.
1341 :Parameters:
1342 - `start`: The index of the first line to examine.
1343 - `until_blank`: Stop collecting at the first blank line if true.
1344 - `strip_indent`: Strip common leading indent if true (default).
1345 - `block_indent`: The indent of the entire block, if known.
1346 - `first_indent`: The indent of the first line, if known.
1348 :Return:
1349 - a StringList of indented lines with mininum indent removed;
1350 - the amount of the indent;
1351 - a boolean: did the indented block finish with a blank line or EOF?
1353 indent = block_indent # start with None if unknown
1354 end = start
1355 if block_indent is not None and first_indent is None:
1356 first_indent = block_indent
1357 if first_indent is not None:
1358 end += 1
1359 last = len(self.data)
1360 while end < last:
1361 line = self.data[end]
1362 if line and (line[0] != ' '
1363 or (block_indent is not None
1364 and line[:block_indent].strip())):
1365 # Line not indented or insufficiently indented.
1366 # Block finished properly iff the last indented line blank:
1367 blank_finish = ((end > start)
1368 and not self.data[end - 1].strip())
1369 break
1370 stripped = line.lstrip()
1371 if not stripped: # blank line
1372 if until_blank:
1373 blank_finish = 1
1374 break
1375 elif block_indent is None:
1376 line_indent = len(line) - len(stripped)
1377 if indent is None:
1378 indent = line_indent
1379 else:
1380 indent = min(indent, line_indent)
1381 end += 1
1382 else:
1383 blank_finish = 1 # block ends at end of lines
1384 block = self[start:end]
1385 if first_indent is not None and block:
1386 block.data[0] = block.data[0][first_indent:]
1387 if indent and strip_indent:
1388 block.trim_left(indent, start=(first_indent is not None))
1389 return block, indent or 0, blank_finish
1391 def get_2D_block(self, top, left, bottom, right, strip_indent=1):
1392 block = self[top:bottom]
1393 indent = right
1394 for i in range(len(block.data)):
1395 block.data[i] = line = block.data[i][left:right].rstrip()
1396 if line:
1397 indent = min(indent, len(line) - len(line.lstrip()))
1398 if strip_indent and 0 < indent < right:
1399 block.data = [line[indent:] for line in block.data]
1400 return block
1402 def pad_double_width(self, pad_char):
1404 Pad all double-width characters in self by appending `pad_char` to each.
1405 For East Asian language support.
1407 if hasattr(unicodedata, 'east_asian_width'):
1408 east_asian_width = unicodedata.east_asian_width
1409 else:
1410 return # new in Python 2.4
1411 for i in range(len(self.data)):
1412 line = self.data[i]
1413 if isinstance(line, unicode):
1414 new = []
1415 for char in line:
1416 new.append(char)
1417 if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1418 new.append(pad_char)
1419 self.data[i] = ''.join(new)
1421 def replace(self, old, new):
1422 """Replace all occurrences of substring `old` with `new`."""
1423 for i in range(len(self.data)):
1424 self.data[i] = self.data[i].replace(old, new)
1427 class StateMachineError(Exception): pass
1428 class UnknownStateError(StateMachineError): pass
1429 class DuplicateStateError(StateMachineError): pass
1430 class UnknownTransitionError(StateMachineError): pass
1431 class DuplicateTransitionError(StateMachineError): pass
1432 class TransitionPatternNotFound(StateMachineError): pass
1433 class TransitionMethodNotFound(StateMachineError): pass
1434 class UnexpectedIndentationError(StateMachineError): pass
1437 class TransitionCorrection(Exception):
1440 Raise from within a transition method to switch to another transition.
1442 Raise with one argument, the new transition name.
1446 class StateCorrection(Exception):
1449 Raise from within a transition method to switch to another state.
1451 Raise with one or two arguments: new state name, and an optional new
1452 transition name.
1456 def string2lines(astring, tab_width=8, convert_whitespace=0,
1457 whitespace=re.compile('[\v\f]')):
1459 Return a list of one-line strings with tabs expanded, no newlines, and
1460 trailing whitespace stripped.
1462 Each tab is expanded with between 1 and `tab_width` spaces, so that the
1463 next character's index becomes a multiple of `tab_width` (8 by default).
1465 Parameters:
1467 - `astring`: a multi-line string.
1468 - `tab_width`: the number of columns between tab stops.
1469 - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1471 if convert_whitespace:
1472 astring = whitespace.sub(' ', astring)
1473 return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1475 def _exception_data():
1477 Return exception information:
1479 - the exception's class name;
1480 - the exception object;
1481 - the name of the file containing the offending code;
1482 - the line number of the offending code;
1483 - the function name of the offending code.
1485 type, value, traceback = sys.exc_info()
1486 while traceback.tb_next:
1487 traceback = traceback.tb_next
1488 code = traceback.tb_frame.f_code
1489 return (type.__name__, value, code.co_filename, traceback.tb_lineno,
1490 code.co_name)