2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 A finite state machine specialized for regular-expression-based text filters,
7 this module defines the following classes:
9 - `StateMachine`, a state machine
10 - `State`, a state superclass
11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
12 - `StateWS`, a state superclass for use with `StateMachineWS`
13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
15 - `ViewList`, extends standard Python lists.
16 - `StringList`, string-specific ViewList.
22 - `DuplicateStateError`
23 - `UnknownTransitionError`
24 - `DuplicateTransitionError`
25 - `TransitionPatternNotFound`
26 - `TransitionMethodNotFound`
27 - `UnexpectedIndentationError`
28 - `TransitionCorrection`: Raised to switch to another transition.
29 - `StateCorrection`: Raised to switch to another state & transition.
33 - `string2lines()`: split a multi-line string into a list of one-line strings
36 How To Use This Module
37 ======================
38 (See the individual classes, methods, and attributes for details.)
40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
41 You will also need to ``import re``.
43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
46 class MyState(statemachine.State):
48 Within the state's class definition:
50 a) Include a pattern for each transition, in `State.patterns`::
52 patterns = {'atransition': r'pattern', ...}
54 b) Include a list of initial transitions to be set up automatically, in
55 `State.initial_transitions`::
57 initial_transitions = ['atransition', ...]
59 c) Define a method for each transition, with the same name as the
62 def atransition(self, match, context, next_state):
64 result = [...] # a list
65 return context, next_state, result
66 # context, next_state may be altered
68 Transition methods may raise an `EOFError` to cut processing short.
70 d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
71 transition methods, which handle the beginning- and end-of-file.
73 e) In order to handle nested processing, you may wish to override the
74 attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
76 If you are using `StateWS` as a base class, in order to handle nested
77 indented blocks, you may wish to:
79 - override the attributes `StateWS.indent_sm`,
80 `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
81 `StateWS.known_indent_sm_kwargs`;
82 - override the `StateWS.blank()` method; and/or
83 - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
84 and/or `StateWS.firstknown_indent()` methods.
86 3. Create a state machine object::
88 sm = StateMachine(state_classes=[MyState, ...],
89 initial_state='MyState')
91 4. Obtain the input text, which needs to be converted into a tab-free list of
92 one-line strings. For example, to read text from a file called
95 input_string = open('inputfile').read()
96 input_lines = statemachine.string2lines(input_string)
98 5. Run the state machine on the input text and collect the results, a list::
100 results = sm.run(input_lines)
102 6. Remove any lingering circular references::
107 __docformat__
= 'restructuredtext'
113 from docutils
import utils
114 from docutils
.utils
.error_reporting
import ErrorOutput
120 A finite state machine for text filters using regular expressions.
122 The input is provided in the form of a list of one-line strings (no
123 newlines). States are subclasses of the `State` class. Transitions consist
124 of regular expression patterns and transition methods, and are defined in
127 The state machine is started with the `run()` method, which returns the
128 results of processing in a list.
131 def __init__(self
, state_classes
, initial_state
, debug
=False):
133 Initialize a `StateMachine` object; add state objects.
137 - `state_classes`: a list of `State` (sub)classes.
138 - `initial_state`: a string, the class name of the initial state.
139 - `debug`: a boolean; produce verbose output if true (nonzero).
142 self
.input_lines
= None
143 """`StringList` of input lines (without newlines).
144 Filled by `self.run()`."""
146 self
.input_offset
= 0
147 """Offset of `self.input_lines` from the beginning of the file."""
150 """Current input line."""
152 self
.line_offset
= -1
153 """Current input line offset from beginning of `self.input_lines`."""
156 """Debugging mode on/off."""
158 self
.initial_state
= initial_state
159 """The name of the initial state (key to `self.states`)."""
161 self
.current_state
= initial_state
162 """The name of the current state (key to `self.states`)."""
165 """Mapping of {state_name: State_object}."""
167 self
.add_states(state_classes
)
170 """List of bound methods or functions to call whenever the current
171 line changes. Observers are called with one argument, ``self``.
172 Cleared at the end of `run()`."""
174 self
._stderr
= ErrorOutput()
175 """Wrapper around sys.stderr catching en-/decoding errors"""
179 """Remove circular references to objects no longer required."""
180 for state
in self
.states
.values():
184 def run(self
, input_lines
, input_offset
=0, context
=None,
185 input_source
=None, initial_state
=None):
187 Run the state machine on `input_lines`. Return results (a list).
189 Reset `self.line_offset` and `self.current_state`. Run the
190 beginning-of-file transition. Input one line at a time and check for a
191 matching transition. If a match is found, call the transition method
192 and possibly change the state. Store the context returned by the
193 transition method to be passed on to the next transition matched.
194 Accumulate the results returned by the transition methods in a list.
195 Run the end-of-file transition. Finally, return the accumulated
200 - `input_lines`: a list of strings without newlines, or `StringList`.
201 - `input_offset`: the line offset of `input_lines` from the beginning
203 - `context`: application-specific storage.
204 - `input_source`: name or path of source of `input_lines`.
205 - `initial_state`: name of initial state.
208 if isinstance(input_lines
, StringList
):
209 self
.input_lines
= input_lines
211 self
.input_lines
= StringList(input_lines
, source
=input_source
)
212 self
.input_offset
= input_offset
213 self
.line_offset
= -1
214 self
.current_state
= initial_state
or self
.initial_state
216 print >>self
._stderr
, (
217 u
'\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
218 % (self
.line_offset
, u
'\n| '.join(self
.input_lines
)))
221 state
= self
.get_state()
224 print >>self
._stderr
, '\nStateMachine.run: bof transition'
225 context
, result
= state
.bof(context
)
226 results
.extend(result
)
232 source
, offset
= self
.input_lines
.info(
234 print >>self
._stderr
, (
235 u
'\nStateMachine.run: line (source=%r, '
237 % (source
, offset
, self
.line
))
238 context
, next_state
, result
= self
.check_line(
239 context
, state
, transitions
)
242 print >>self
._stderr
, (
243 '\nStateMachine.run: %s.eof transition'
244 % state
.__class
__.__name
__)
245 result
= state
.eof(context
)
246 results
.extend(result
)
249 results
.extend(result
)
250 except TransitionCorrection
, exception
:
251 self
.previous_line() # back up for another try
252 transitions
= (exception
.args
[0],)
254 print >>self
._stderr
, (
255 '\nStateMachine.run: TransitionCorrection to '
256 'state "%s", transition %s.'
257 % (state
.__class
__.__name
__, transitions
[0]))
259 except StateCorrection
, exception
:
260 self
.previous_line() # back up for another try
261 next_state
= exception
.args
[0]
262 if len(exception
.args
) == 1:
265 transitions
= (exception
.args
[1],)
267 print >>self
._stderr
, (
268 '\nStateMachine.run: StateCorrection to state '
269 '"%s", transition %s.'
270 % (next_state
, transitions
[0]))
273 state
= self
.get_state(next_state
)
281 def get_state(self
, next_state
=None):
283 Return current state object; set it first if `next_state` given.
285 Parameter `next_state`: a string, the name of the next state.
287 Exception: `UnknownStateError` raised if `next_state` unknown.
290 if self
.debug
and next_state
!= self
.current_state
:
291 print >>self
._stderr
, (
292 '\nStateMachine.get_state: Changing state from '
293 '"%s" to "%s" (input line %s).'
294 % (self
.current_state
, next_state
,
295 self
.abs_line_number()))
296 self
.current_state
= next_state
298 return self
.states
[self
.current_state
]
300 raise UnknownStateError(self
.current_state
)
302 def next_line(self
, n
=1):
303 """Load `self.line` with the `n`'th next line and return it."""
306 self
.line_offset
+= n
307 self
.line
= self
.input_lines
[self
.line_offset
]
313 self
.notify_observers()
315 def is_next_line_blank(self
):
316 """Return 1 if the next line is blank or non-existant."""
318 return not self
.input_lines
[self
.line_offset
+ 1].strip()
323 """Return 1 if the input is at or past end-of-file."""
324 return self
.line_offset
>= len(self
.input_lines
) - 1
327 """Return 1 if the input is at or before beginning-of-file."""
328 return self
.line_offset
<= 0
330 def previous_line(self
, n
=1):
331 """Load `self.line` with the `n`'th previous line and return it."""
332 self
.line_offset
-= n
333 if self
.line_offset
< 0:
336 self
.line
= self
.input_lines
[self
.line_offset
]
337 self
.notify_observers()
340 def goto_line(self
, line_offset
):
341 """Jump to absolute line offset `line_offset`, load and return it."""
344 self
.line_offset
= line_offset
- self
.input_offset
345 self
.line
= self
.input_lines
[self
.line_offset
]
351 self
.notify_observers()
353 def get_source(self
, line_offset
):
354 """Return source of line at absolute line offset `line_offset`."""
355 return self
.input_lines
.source(line_offset
- self
.input_offset
)
357 def abs_line_offset(self
):
358 """Return line offset of current line, from beginning of file."""
359 return self
.line_offset
+ self
.input_offset
361 def abs_line_number(self
):
362 """Return line number of current line (counting from 1)."""
363 return self
.line_offset
+ self
.input_offset
+ 1
365 def get_source_and_line(self
, lineno
=None):
366 """Return (source, line) tuple for current or given line number.
368 Looks up the source and line number in the `self.input_lines`
369 StringList instance to count for included source files.
371 If the optional argument `lineno` is given, convert it from an
372 absolute line number to the corresponding (source, line) pair.
375 offset
= self
.line_offset
377 offset
= lineno
- self
.input_offset
- 1
379 src
, srcoffset
= self
.input_lines
.info(offset
)
380 srcline
= srcoffset
+ 1
382 # line is None if index is "Just past the end"
383 src
, srcline
= self
.get_source_and_line(offset
+ self
.input_offset
)
384 return src
, srcline
+ 1
385 except (IndexError): # `offset` is off the list
386 src
, srcline
= None, None
387 # raise AssertionError('cannot find line %d in %s lines' %
388 # (offset, len(self.input_lines)))
389 # # list(self.input_lines.lines())))
390 # assert offset == srcoffset, str(self.input_lines)
391 # print "get_source_and_line(%s):" % lineno,
392 # print offset + 1, '->', src, srcline
393 # print self.input_lines
394 return (src
, srcline
)
396 def insert_input(self
, input_lines
, source
):
397 self
.input_lines
.insert(self
.line_offset
+ 1, '',
398 source
='internal padding after '+source
,
399 offset
=len(input_lines
))
400 self
.input_lines
.insert(self
.line_offset
+ 1, '',
401 source
='internal padding before '+source
,
403 self
.input_lines
.insert(self
.line_offset
+ 2,
404 StringList(input_lines
, source
))
406 def get_text_block(self
, flush_left
=False):
408 Return a contiguous block of text.
410 If `flush_left` is true, raise `UnexpectedIndentationError` if an
411 indented line is encountered before the text block ends (with a blank
415 block
= self
.input_lines
.get_text_block(self
.line_offset
,
417 self
.next_line(len(block
) - 1)
419 except UnexpectedIndentationError
, err
:
421 self
.next_line(len(block
) - 1) # advance to last line of block
424 def check_line(self
, context
, state
, transitions
=None):
426 Examine one line of input for a transition match & execute its method.
430 - `context`: application-dependent storage.
431 - `state`: a `State` object, the current state.
432 - `transitions`: an optional ordered list of transition names to try,
433 instead of ``state.transition_order``.
435 Return the values returned by the transition method:
437 - context: possibly modified from the parameter `context`;
438 - next state name (`State` subclass name);
439 - the result output of the transition, a list.
441 When there is no match, ``state.no_match()`` is called and its return
444 if transitions
is None:
445 transitions
= state
.transition_order
446 state_correction
= None
448 print >>self
._stderr
, (
449 '\nStateMachine.check_line: state="%s", transitions=%r.'
450 % (state
.__class
__.__name
__, transitions
))
451 for name
in transitions
:
452 pattern
, method
, next_state
= state
.transitions
[name
]
453 match
= pattern
.match(self
.line
)
456 print >>self
._stderr
, (
457 '\nStateMachine.check_line: Matched transition '
458 '"%s" in state "%s".'
459 % (name
, state
.__class
__.__name
__))
460 return method(match
, context
, next_state
)
463 print >>self
._stderr
, (
464 '\nStateMachine.check_line: No match in state "%s".'
465 % state
.__class
__.__name
__)
466 return state
.no_match(context
, transitions
)
468 def add_state(self
, state_class
):
470 Initialize & add a `state_class` (`State` subclass) object.
472 Exception: `DuplicateStateError` raised if `state_class` was already
475 statename
= state_class
.__name
__
476 if statename
in self
.states
:
477 raise DuplicateStateError(statename
)
478 self
.states
[statename
] = state_class(self
, self
.debug
)
480 def add_states(self
, state_classes
):
482 Add `state_classes` (a list of `State` subclasses).
484 for state_class
in state_classes
:
485 self
.add_state(state_class
)
487 def runtime_init(self
):
489 Initialize `self.states`.
491 for state
in self
.states
.values():
495 """Report error details."""
496 type, value
, module
, line
, function
= _exception_data()
497 print >>self
._stderr
, u
'%s: %s' % (type, value
)
498 print >>self
._stderr
, 'input line %s' % (self
.abs_line_number())
499 print >>self
._stderr
, (u
'module %s, line %s, function %s' %
500 (module
, line
, function
))
502 def attach_observer(self
, observer
):
504 The `observer` parameter is a function or bound method which takes two
505 arguments, the source and offset of the current line.
507 self
.observers
.append(observer
)
509 def detach_observer(self
, observer
):
510 self
.observers
.remove(observer
)
512 def notify_observers(self
):
513 for observer
in self
.observers
:
515 info
= self
.input_lines
.info(self
.line_offset
)
524 State superclass. Contains a list of transitions, and transition methods.
526 Transition methods all have the same signature. They take 3 parameters:
528 - An `re` match object. ``match.string`` contains the matched input line,
529 ``match.start()`` gives the start index of the match, and
530 ``match.end()`` gives the end index.
531 - A context object, whose meaning is application-defined (initial value
532 ``None``). It can be used to store any information required by the state
533 machine, and the retured context is passed on to the next transition
535 - The name of the next state, a string, taken from the transitions list;
536 normally it is returned unchanged, but it may be altered by the
537 transition method if necessary.
539 Transition methods all return a 3-tuple:
541 - A context object, as (potentially) modified by the transition method.
542 - The next state name (a return value of ``None`` means no state change).
543 - The processing result, a list, which is accumulated by the state
546 Transition methods may raise an `EOFError` to cut processing short.
548 There are two implicit transitions, and corresponding transition methods
549 are defined: `bof()` handles the beginning-of-file, and `eof()` handles
550 the end-of-file. These methods have non-standard signatures and return
551 values. `bof()` returns the initial context and results, and may be used
552 to return a header string, or do any other processing needed. `eof()`
553 should handle any remaining context and wrap things up; it returns the
554 final processing result.
556 Typical applications need only subclass `State` (or a subclass), set the
557 `patterns` and `initial_transitions` class attributes, and provide
558 corresponding transition methods. The default object initialization will
559 take care of constructing the list of transitions.
564 {Name: pattern} mapping, used by `make_transition()`. Each pattern may
565 be a string or a compiled `re` pattern. Override in subclasses.
568 initial_transitions
= None
570 A list of transitions to initialize when a `State` is instantiated.
571 Each entry is either a transition name string, or a (transition name, next
572 state name) pair. See `make_transitions()`. Override in subclasses.
577 The `StateMachine` class for handling nested processing.
579 If left as ``None``, `nested_sm` defaults to the class of the state's
580 controlling state machine. Override it in subclasses to avoid the default.
583 nested_sm_kwargs
= None
585 Keyword arguments dictionary, passed to the `nested_sm` constructor.
587 Two keys must have entries in the dictionary:
589 - Key 'state_classes' must be set to a list of `State` classes.
590 - Key 'initial_state' must be set to the name of the initial state class.
592 If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
593 class of the current state, and 'initial_state' defaults to the name of
594 the class of the current state. Override in subclasses to avoid the
598 def __init__(self
, state_machine
, debug
=False):
600 Initialize a `State` object; make & add initial transitions.
604 - `statemachine`: the controlling `StateMachine` object.
605 - `debug`: a boolean; produce verbose output if true.
608 self
.transition_order
= []
609 """A list of transition names in search order."""
611 self
.transitions
= {}
613 A mapping of transition names to 3-tuples containing
614 (compiled_pattern, transition_method, next_state_name). Initialized as
615 an instance attribute dynamically (instead of as a class attribute)
616 because it may make forward references to patterns and methods in this
620 self
.add_initial_transitions()
622 self
.state_machine
= state_machine
623 """A reference to the controlling `StateMachine` object."""
626 """Debugging mode on/off."""
628 if self
.nested_sm
is None:
629 self
.nested_sm
= self
.state_machine
.__class
__
630 if self
.nested_sm_kwargs
is None:
631 self
.nested_sm_kwargs
= {'state_classes': [self
.__class
__],
632 'initial_state': self
.__class
__.__name
__}
634 def runtime_init(self
):
636 Initialize this `State` before running the state machine; called from
637 `self.state_machine.run()`.
642 """Remove circular references to objects no longer required."""
643 self
.state_machine
= None
645 def add_initial_transitions(self
):
646 """Make and add transitions listed in `self.initial_transitions`."""
647 if self
.initial_transitions
:
648 names
, transitions
= self
.make_transitions(
649 self
.initial_transitions
)
650 self
.add_transitions(names
, transitions
)
652 def add_transitions(self
, names
, transitions
):
654 Add a list of transitions to the start of the transition list.
658 - `names`: a list of transition names.
659 - `transitions`: a mapping of names to transition tuples.
661 Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
664 if name
in self
.transitions
:
665 raise DuplicateTransitionError(name
)
666 if name
not in transitions
:
667 raise UnknownTransitionError(name
)
668 self
.transition_order
[:0] = names
669 self
.transitions
.update(transitions
)
671 def add_transition(self
, name
, transition
):
673 Add a transition to the start of the transition list.
675 Parameter `transition`: a ready-made transition 3-tuple.
677 Exception: `DuplicateTransitionError`.
679 if name
in self
.transitions
:
680 raise DuplicateTransitionError(name
)
681 self
.transition_order
[:0] = [name
]
682 self
.transitions
[name
] = transition
684 def remove_transition(self
, name
):
686 Remove a transition by `name`.
688 Exception: `UnknownTransitionError`.
691 del self
.transitions
[name
]
692 self
.transition_order
.remove(name
)
694 raise UnknownTransitionError(name
)
696 def make_transition(self
, name
, next_state
=None):
698 Make & return a transition tuple based on `name`.
700 This is a convenience function to simplify transition creation.
704 - `name`: a string, the name of the transition pattern & method. This
705 `State` object must have a method called '`name`', and a dictionary
706 `self.patterns` containing a key '`name`'.
707 - `next_state`: a string, the name of the next `State` object for this
708 transition. A value of ``None`` (or absent) implies no state change
709 (i.e., continue with the same state).
711 Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
713 if next_state
is None:
714 next_state
= self
.__class
__.__name
__
716 pattern
= self
.patterns
[name
]
717 if not hasattr(pattern
, 'match'):
718 pattern
= re
.compile(pattern
)
720 raise TransitionPatternNotFound(
721 '%s.patterns[%r]' % (self
.__class
__.__name
__, name
))
723 method
= getattr(self
, name
)
724 except AttributeError:
725 raise TransitionMethodNotFound(
726 '%s.%s' % (self
.__class
__.__name
__, name
))
727 return (pattern
, method
, next_state
)
729 def make_transitions(self
, name_list
):
731 Return a list of transition names and a transition mapping.
733 Parameter `name_list`: a list, where each entry is either a transition
734 name string, or a 1- or 2-tuple (transition name, optional next state
737 stringtype
= type('')
740 for namestate
in name_list
:
741 if type(namestate
) is stringtype
:
742 transitions
[namestate
] = self
.make_transition(namestate
)
743 names
.append(namestate
)
745 transitions
[namestate
[0]] = self
.make_transition(*namestate
)
746 names
.append(namestate
[0])
747 return names
, transitions
749 def no_match(self
, context
, transitions
):
751 Called when there is no match from `StateMachine.check_line()`.
753 Return the same values returned by transition methods:
755 - context: unchanged;
756 - next state name: ``None``;
759 Override in subclasses to catch this event.
761 return context
, None, []
763 def bof(self
, context
):
765 Handle beginning-of-file. Return unchanged `context`, empty result.
767 Override in subclasses.
769 Parameter `context`: application-defined storage.
773 def eof(self
, context
):
775 Handle end-of-file. Return empty result.
777 Override in subclasses.
779 Parameter `context`: application-defined storage.
783 def nop(self
, match
, context
, next_state
):
785 A "do nothing" transition method.
787 Return unchanged `context` & `next_state`, empty result. Useful for
788 simple state changes (actionless transitions).
790 return context
, next_state
, []
793 class StateMachineWS(StateMachine
):
796 `StateMachine` subclass specialized for whitespace recognition.
798 There are three methods provided for extracting indented text blocks:
800 - `get_indented()`: use when the indent is unknown.
801 - `get_known_indented()`: use when the indent is known for all lines.
802 - `get_first_known_indented()`: use when only the first line's indent is
806 def get_indented(self
, until_blank
=False, strip_indent
=True):
808 Return a block of indented lines of text, and info.
810 Extract an indented block where the indent is unknown for all lines.
813 - `until_blank`: Stop collecting at the first blank line if true.
814 - `strip_indent`: Strip common leading indent if true (default).
817 - the indented block (a list of lines of text),
819 - its first line offset from BOF, and
820 - whether or not it finished with a blank line.
822 offset
= self
.abs_line_offset()
823 indented
, indent
, blank_finish
= self
.input_lines
.get_indented(
824 self
.line_offset
, until_blank
, strip_indent
)
826 self
.next_line(len(indented
) - 1) # advance to last indented line
827 while indented
and not indented
[0].strip():
828 indented
.trim_start()
830 return indented
, indent
, offset
, blank_finish
832 def get_known_indented(self
, indent
, until_blank
=False, strip_indent
=True):
834 Return an indented block and info.
836 Extract an indented block where the indent is known for all lines.
837 Starting with the current line, extract the entire text block with at
838 least `indent` indentation (which must be whitespace, except for the
842 - `indent`: The number of indent columns/characters.
843 - `until_blank`: Stop collecting at the first blank line if true.
844 - `strip_indent`: Strip `indent` characters of indentation if true
848 - the indented block,
849 - its first line offset from BOF, and
850 - whether or not it finished with a blank line.
852 offset
= self
.abs_line_offset()
853 indented
, indent
, blank_finish
= self
.input_lines
.get_indented(
854 self
.line_offset
, until_blank
, strip_indent
,
856 self
.next_line(len(indented
) - 1) # advance to last indented line
857 while indented
and not indented
[0].strip():
858 indented
.trim_start()
860 return indented
, offset
, blank_finish
862 def get_first_known_indented(self
, indent
, until_blank
=False,
863 strip_indent
=True, strip_top
=True):
865 Return an indented block and info.
867 Extract an indented block where the indent is known for the first line
868 and unknown for all other lines.
871 - `indent`: The first line's indent (# of columns/characters).
872 - `until_blank`: Stop collecting at the first blank line if true
874 - `strip_indent`: Strip `indent` characters of indentation if true
876 - `strip_top`: Strip blank lines from the beginning of the block.
879 - the indented block,
881 - its first line offset from BOF, and
882 - whether or not it finished with a blank line.
884 offset
= self
.abs_line_offset()
885 indented
, indent
, blank_finish
= self
.input_lines
.get_indented(
886 self
.line_offset
, until_blank
, strip_indent
,
888 self
.next_line(len(indented
) - 1) # advance to last indented line
890 while indented
and not indented
[0].strip():
891 indented
.trim_start()
893 return indented
, indent
, offset
, blank_finish
896 class StateWS(State
):
899 State superclass specialized for whitespace (blank lines & indents).
901 Use this class with `StateMachineWS`. The transitions 'blank' (for blank
902 lines) and 'indent' (for indented text blocks) are added automatically,
903 before any other transitions. The transition method `blank()` handles
904 blank lines and `indent()` handles nested indented blocks. Indented
905 blocks trigger a new state machine to be created by `indent()` and run.
906 The class of the state machine to be created is in `indent_sm`, and the
907 constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
909 The methods `known_indent()` and `firstknown_indent()` are provided for
910 indented blocks where the indent (all lines' and first line's only,
911 respectively) is known to the transition method, along with the attributes
912 `known_indent_sm` and `known_indent_sm_kwargs`. Neither transition method
913 is triggered automatically.
918 The `StateMachine` class handling indented text blocks.
920 If left as ``None``, `indent_sm` defaults to the value of
921 `State.nested_sm`. Override it in subclasses to avoid the default.
924 indent_sm_kwargs
= None
926 Keyword arguments dictionary, passed to the `indent_sm` constructor.
928 If left as ``None``, `indent_sm_kwargs` defaults to the value of
929 `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
932 known_indent_sm
= None
934 The `StateMachine` class handling known-indented text blocks.
936 If left as ``None``, `known_indent_sm` defaults to the value of
937 `indent_sm`. Override it in subclasses to avoid the default.
940 known_indent_sm_kwargs
= None
942 Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
944 If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
945 `indent_sm_kwargs`. Override it in subclasses to avoid the default.
948 ws_patterns
= {'blank': ' *$',
950 """Patterns for default whitespace transitions. May be overridden in
953 ws_initial_transitions
= ('blank', 'indent')
954 """Default initial whitespace transitions, added before those listed in
955 `State.initial_transitions`. May be overridden in subclasses."""
957 def __init__(self
, state_machine
, debug
=False):
959 Initialize a `StateSM` object; extends `State.__init__()`.
961 Check for indent state machine attributes, set defaults if not set.
963 State
.__init
__(self
, state_machine
, debug
)
964 if self
.indent_sm
is None:
965 self
.indent_sm
= self
.nested_sm
966 if self
.indent_sm_kwargs
is None:
967 self
.indent_sm_kwargs
= self
.nested_sm_kwargs
968 if self
.known_indent_sm
is None:
969 self
.known_indent_sm
= self
.indent_sm
970 if self
.known_indent_sm_kwargs
is None:
971 self
.known_indent_sm_kwargs
= self
.indent_sm_kwargs
973 def add_initial_transitions(self
):
975 Add whitespace-specific transitions before those defined in subclass.
977 Extends `State.add_initial_transitions()`.
979 State
.add_initial_transitions(self
)
980 if self
.patterns
is None:
982 self
.patterns
.update(self
.ws_patterns
)
983 names
, transitions
= self
.make_transitions(
984 self
.ws_initial_transitions
)
985 self
.add_transitions(names
, transitions
)
987 def blank(self
, match
, context
, next_state
):
988 """Handle blank lines. Does nothing. Override in subclasses."""
989 return self
.nop(match
, context
, next_state
)
991 def indent(self
, match
, context
, next_state
):
993 Handle an indented text block. Extend or override in subclasses.
995 Recursively run the registered state machine for indented blocks
998 indented
, indent
, line_offset
, blank_finish
= \
999 self
.state_machine
.get_indented()
1000 sm
= self
.indent_sm(debug
=self
.debug
, **self
.indent_sm_kwargs
)
1001 results
= sm
.run(indented
, input_offset
=line_offset
)
1002 return context
, next_state
, results
1004 def known_indent(self
, match
, context
, next_state
):
1006 Handle a known-indent text block. Extend or override in subclasses.
1008 Recursively run the registered state machine for known-indent indented
1009 blocks (`self.known_indent_sm`). The indent is the length of the
1010 match, ``match.end()``.
1012 indented
, line_offset
, blank_finish
= \
1013 self
.state_machine
.get_known_indented(match
.end())
1014 sm
= self
.known_indent_sm(debug
=self
.debug
,
1015 **self
.known_indent_sm_kwargs
)
1016 results
= sm
.run(indented
, input_offset
=line_offset
)
1017 return context
, next_state
, results
1019 def first_known_indent(self
, match
, context
, next_state
):
1021 Handle an indented text block (first line's indent known).
1023 Extend or override in subclasses.
1025 Recursively run the registered state machine for known-indent indented
1026 blocks (`self.known_indent_sm`). The indent is the length of the
1027 match, ``match.end()``.
1029 indented
, line_offset
, blank_finish
= \
1030 self
.state_machine
.get_first_known_indented(match
.end())
1031 sm
= self
.known_indent_sm(debug
=self
.debug
,
1032 **self
.known_indent_sm_kwargs
)
1033 results
= sm
.run(indented
, input_offset
=line_offset
)
1034 return context
, next_state
, results
1037 class _SearchOverride
:
1040 Mix-in class to override `StateMachine` regular expression behavior.
1042 Changes regular expression matching, from the default `re.match()`
1043 (succeeds only if the pattern matches at the start of `self.line`) to
1044 `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1045 When subclassing a `StateMachine`, list this class **first** in the
1046 inheritance list of the class definition.
1049 def match(self
, pattern
):
1051 Return the result of a regular expression search.
1053 Overrides `StateMachine.match()`.
1055 Parameter `pattern`: `re` compiled regular expression.
1057 return pattern
.search(self
.line
)
1060 class SearchStateMachine(_SearchOverride
, StateMachine
):
1061 """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1065 class SearchStateMachineWS(_SearchOverride
, StateMachineWS
):
1066 """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1073 List with extended functionality: slices of ViewList objects are child
1074 lists, linked to their parents. Changes made to a child list also affect
1075 the parent list. A child list is effectively a "view" (in the SQL sense)
1076 of the parent list. Changes to parent lists, however, do *not* affect
1077 active child lists. If a parent list is changed, any active child lists
1078 should be recreated.
1080 The start and end of the slice can be trimmed using the `trim_start()` and
1081 `trim_end()` methods, without affecting the parent list. The link between
1082 child and parent lists can be broken by calling `disconnect()` on the
1085 Also, ViewList objects keep track of the source & offset of each item.
1086 This information is accessible via the `source()`, `offset()`, and
1090 def __init__(self
, initlist
=None, source
=None, items
=None,
1091 parent
=None, parent_offset
=None):
1093 """The actual list of data, flattened from various sources."""
1096 """A list of (source, offset) pairs, same length as `self.data`: the
1097 source of each line and the offset of each line from the beginning of
1100 self
.parent
= parent
1101 """The parent list."""
1103 self
.parent_offset
= parent_offset
1104 """Offset of this list from the beginning of the parent list."""
1106 if isinstance(initlist
, ViewList
):
1107 self
.data
= initlist
.data
[:]
1108 self
.items
= initlist
.items
[:]
1109 elif initlist
is not None:
1110 self
.data
= list(initlist
)
1114 self
.items
= [(source
, i
) for i
in range(len(initlist
))]
1115 assert len(self
.data
) == len(self
.items
), 'data mismatch'
1118 return str(self
.data
)
1121 return '%s(%s, items=%s)' % (self
.__class
__.__name
__,
1122 self
.data
, self
.items
)
1124 def __lt__(self
, other
): return self
.data
< self
.__cast
(other
)
1125 def __le__(self
, other
): return self
.data
<= self
.__cast
(other
)
1126 def __eq__(self
, other
): return self
.data
== self
.__cast
(other
)
1127 def __ne__(self
, other
): return self
.data
!= self
.__cast
(other
)
1128 def __gt__(self
, other
): return self
.data
> self
.__cast
(other
)
1129 def __ge__(self
, other
): return self
.data
>= self
.__cast
(other
)
1130 def __cmp__(self
, other
): return cmp(self
.data
, self
.__cast
(other
))
1132 def __cast(self
, other
):
1133 if isinstance(other
, ViewList
):
1138 def __contains__(self
, item
): return item
in self
.data
1139 def __len__(self
): return len(self
.data
)
1141 # The __getitem__()/__setitem__() methods check whether the index
1142 # is a slice first, since indexing a native list with a slice object
1145 def __getitem__(self
, i
):
1146 if isinstance(i
, types
.SliceType
):
1147 assert i
.step
in (None, 1), 'cannot handle slice with stride'
1148 return self
.__class
__(self
.data
[i
.start
:i
.stop
],
1149 items
=self
.items
[i
.start
:i
.stop
],
1150 parent
=self
, parent_offset
=i
.start
or 0)
1154 def __setitem__(self
, i
, item
):
1155 if isinstance(i
, types
.SliceType
):
1156 assert i
.step
in (None, 1), 'cannot handle slice with stride'
1157 if not isinstance(item
, ViewList
):
1158 raise TypeError('assigning non-ViewList to ViewList slice')
1159 self
.data
[i
.start
:i
.stop
] = item
.data
1160 self
.items
[i
.start
:i
.stop
] = item
.items
1161 assert len(self
.data
) == len(self
.items
), 'data mismatch'
1163 self
.parent
[(i
.start
or 0) + self
.parent_offset
1164 : (i
.stop
or len(self
)) + self
.parent_offset
] = item
1168 self
.parent
[i
+ self
.parent_offset
] = item
1170 def __delitem__(self
, i
):
1175 del self
.parent
[i
+ self
.parent_offset
]
1177 assert i
.step
is None, 'cannot handle slice with stride'
1178 del self
.data
[i
.start
:i
.stop
]
1179 del self
.items
[i
.start
:i
.stop
]
1181 del self
.parent
[(i
.start
or 0) + self
.parent_offset
1182 : (i
.stop
or len(self
)) + self
.parent_offset
]
1184 def __add__(self
, other
):
1185 if isinstance(other
, ViewList
):
1186 return self
.__class
__(self
.data
+ other
.data
,
1187 items
=(self
.items
+ other
.items
))
1189 raise TypeError('adding non-ViewList to a ViewList')
1191 def __radd__(self
, other
):
1192 if isinstance(other
, ViewList
):
1193 return self
.__class
__(other
.data
+ self
.data
,
1194 items
=(other
.items
+ self
.items
))
1196 raise TypeError('adding ViewList to a non-ViewList')
1198 def __iadd__(self
, other
):
1199 if isinstance(other
, ViewList
):
1200 self
.data
+= other
.data
1202 raise TypeError('argument to += must be a ViewList')
1205 def __mul__(self
, n
):
1206 return self
.__class
__(self
.data
* n
, items
=(self
.items
* n
))
1210 def __imul__(self
, n
):
1215 def extend(self
, other
):
1216 if not isinstance(other
, ViewList
):
1217 raise TypeError('extending a ViewList with a non-ViewList')
1219 self
.parent
.insert(len(self
.data
) + self
.parent_offset
, other
)
1220 self
.data
.extend(other
.data
)
1221 self
.items
.extend(other
.items
)
1223 def append(self
, item
, source
=None, offset
=0):
1228 self
.parent
.insert(len(self
.data
) + self
.parent_offset
, item
,
1230 self
.data
.append(item
)
1231 self
.items
.append((source
, offset
))
1233 def insert(self
, i
, item
, source
=None, offset
=0):
1235 if not isinstance(item
, ViewList
):
1236 raise TypeError('inserting non-ViewList with no source given')
1237 self
.data
[i
:i
] = item
.data
1238 self
.items
[i
:i
] = item
.items
1240 index
= (len(self
.data
) + i
) % len(self
.data
)
1241 self
.parent
.insert(index
+ self
.parent_offset
, item
)
1243 self
.data
.insert(i
, item
)
1244 self
.items
.insert(i
, (source
, offset
))
1246 index
= (len(self
.data
) + i
) % len(self
.data
)
1247 self
.parent
.insert(index
+ self
.parent_offset
, item
,
1250 def pop(self
, i
=-1):
1252 index
= (len(self
.data
) + i
) % len(self
.data
)
1253 self
.parent
.pop(index
+ self
.parent_offset
)
1255 return self
.data
.pop(i
)
1257 def trim_start(self
, n
=1):
1259 Remove items from the start of the list, without touching the parent.
1261 if n
> len(self
.data
):
1262 raise IndexError("Size of trim too large; can't trim %s items "
1263 "from a list of size %s." % (n
, len(self
.data
)))
1265 raise IndexError('Trim size must be >= 0.')
1269 self
.parent_offset
+= n
1271 def trim_end(self
, n
=1):
1273 Remove items from the end of the list, without touching the parent.
1275 if n
> len(self
.data
):
1276 raise IndexError("Size of trim too large; can't trim %s items "
1277 "from a list of size %s." % (n
, len(self
.data
)))
1279 raise IndexError('Trim size must be >= 0.')
1283 def remove(self
, item
):
1284 index
= self
.index(item
)
1287 def count(self
, item
): return self
.data
.count(item
)
1288 def index(self
, item
): return self
.data
.index(item
)
1292 self
.items
.reverse()
1295 def sort(self
, *args
):
1296 tmp
= zip(self
.data
, self
.items
)
1298 self
.data
= [entry
[0] for entry
in tmp
]
1299 self
.items
= [entry
[1] for entry
in tmp
]
1303 """Return source & offset for index `i`."""
1305 return self
.items
[i
]
1307 if i
== len(self
.data
): # Just past the end
1308 return self
.items
[i
- 1][0], None
1312 def source(self
, i
):
1313 """Return source for index `i`."""
1314 return self
.info(i
)[0]
1316 def offset(self
, i
):
1317 """Return offset for index `i`."""
1318 return self
.info(i
)[1]
1320 def disconnect(self
):
1321 """Break link between this list and parent list."""
1325 """Return iterator yielding (source, offset, value) tuples."""
1326 for (value
, (source
, offset
)) in zip(self
.data
, self
.items
):
1327 yield (source
, offset
, value
)
1330 """Print the list in `grep` format (`source:offset:value` lines)"""
1331 for line
in self
.xitems():
1332 print "%s:%d:%s" % line
1335 class StringList(ViewList
):
1337 """A `ViewList` with string-specific methods."""
1339 def trim_left(self
, length
, start
=0, end
=sys
.maxint
):
1341 Trim `length` characters off the beginning of each item, in-place,
1342 from index `start` to `end`. No whitespace-checking is done on the
1343 trimmed text. Does not affect slice parent.
1345 self
.data
[start
:end
] = [line
[length
:]
1346 for line
in self
.data
[start
:end
]]
1348 def get_text_block(self
, start
, flush_left
=False):
1350 Return a contiguous block of text.
1352 If `flush_left` is true, raise `UnexpectedIndentationError` if an
1353 indented line is encountered before the text block ends (with a blank
1357 last
= len(self
.data
)
1359 line
= self
.data
[end
]
1360 if not line
.strip():
1362 if flush_left
and (line
[0] == ' '):
1363 source
, offset
= self
.info(end
)
1364 raise UnexpectedIndentationError(self
[start
:end
], source
,
1367 return self
[start
:end
]
1369 def get_indented(self
, start
=0, until_blank
=False, strip_indent
=True,
1370 block_indent
=None, first_indent
=None):
1372 Extract and return a StringList of indented lines of text.
1374 Collect all lines with indentation, determine the minimum indentation,
1375 remove the minimum indentation from all indented lines (unless
1376 `strip_indent` is false), and return them. All lines up to but not
1377 including the first unindented line will be returned.
1380 - `start`: The index of the first line to examine.
1381 - `until_blank`: Stop collecting at the first blank line if true.
1382 - `strip_indent`: Strip common leading indent if true (default).
1383 - `block_indent`: The indent of the entire block, if known.
1384 - `first_indent`: The indent of the first line, if known.
1387 - a StringList of indented lines with mininum indent removed;
1388 - the amount of the indent;
1389 - a boolean: did the indented block finish with a blank line or EOF?
1391 indent
= block_indent
# start with None if unknown
1393 if block_indent
is not None and first_indent
is None:
1394 first_indent
= block_indent
1395 if first_indent
is not None:
1397 last
= len(self
.data
)
1399 line
= self
.data
[end
]
1400 if line
and (line
[0] != ' '
1401 or (block_indent
is not None
1402 and line
[:block_indent
].strip())):
1403 # Line not indented or insufficiently indented.
1404 # Block finished properly iff the last indented line blank:
1405 blank_finish
= ((end
> start
)
1406 and not self
.data
[end
- 1].strip())
1408 stripped
= line
.lstrip()
1409 if not stripped
: # blank line
1413 elif block_indent
is None:
1414 line_indent
= len(line
) - len(stripped
)
1416 indent
= line_indent
1418 indent
= min(indent
, line_indent
)
1421 blank_finish
= 1 # block ends at end of lines
1422 block
= self
[start
:end
]
1423 if first_indent
is not None and block
:
1424 block
.data
[0] = block
.data
[0][first_indent
:]
1425 if indent
and strip_indent
:
1426 block
.trim_left(indent
, start
=(first_indent
is not None))
1427 return block
, indent
or 0, blank_finish
1429 def get_2D_block(self
, top
, left
, bottom
, right
, strip_indent
=True):
1430 block
= self
[top
:bottom
]
1432 for i
in range(len(block
.data
)):
1433 # get slice from line, care for combining characters
1434 ci
= utils
.column_indices(block
.data
[i
])
1438 left
+= len(block
.data
[i
]) - len(ci
)
1442 right
+= len(block
.data
[i
]) - len(ci
)
1443 block
.data
[i
] = line
= block
.data
[i
][left
:right
].rstrip()
1445 indent
= min(indent
, len(line
) - len(line
.lstrip()))
1446 if strip_indent
and 0 < indent
< right
:
1447 block
.data
= [line
[indent
:] for line
in block
.data
]
1450 def pad_double_width(self
, pad_char
):
1452 Pad all double-width characters in self by appending `pad_char` to each.
1453 For East Asian language support.
1455 if hasattr(unicodedata
, 'east_asian_width'):
1456 east_asian_width
= unicodedata
.east_asian_width
1458 return # new in Python 2.4
1459 for i
in range(len(self
.data
)):
1461 if isinstance(line
, unicode):
1465 if east_asian_width(char
) in 'WF': # 'W'ide & 'F'ull-width
1466 new
.append(pad_char
)
1467 self
.data
[i
] = ''.join(new
)
1469 def replace(self
, old
, new
):
1470 """Replace all occurrences of substring `old` with `new`."""
1471 for i
in range(len(self
.data
)):
1472 self
.data
[i
] = self
.data
[i
].replace(old
, new
)
1475 class StateMachineError(Exception): pass
1476 class UnknownStateError(StateMachineError
): pass
1477 class DuplicateStateError(StateMachineError
): pass
1478 class UnknownTransitionError(StateMachineError
): pass
1479 class DuplicateTransitionError(StateMachineError
): pass
1480 class TransitionPatternNotFound(StateMachineError
): pass
1481 class TransitionMethodNotFound(StateMachineError
): pass
1482 class UnexpectedIndentationError(StateMachineError
): pass
1485 class TransitionCorrection(Exception):
1488 Raise from within a transition method to switch to another transition.
1490 Raise with one argument, the new transition name.
1494 class StateCorrection(Exception):
1497 Raise from within a transition method to switch to another state.
1499 Raise with one or two arguments: new state name, and an optional new
1504 def string2lines(astring
, tab_width
=8, convert_whitespace
=False,
1505 whitespace
=re
.compile('[\v\f]')):
1507 Return a list of one-line strings with tabs expanded, no newlines, and
1508 trailing whitespace stripped.
1510 Each tab is expanded with between 1 and `tab_width` spaces, so that the
1511 next character's index becomes a multiple of `tab_width` (8 by default).
1515 - `astring`: a multi-line string.
1516 - `tab_width`: the number of columns between tab stops.
1517 - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1519 if convert_whitespace
:
1520 astring
= whitespace
.sub(' ', astring
)
1521 return [s
.expandtabs(tab_width
).rstrip() for s
in astring
.splitlines()]
1523 def _exception_data():
1525 Return exception information:
1527 - the exception's class name;
1528 - the exception object;
1529 - the name of the file containing the offending code;
1530 - the line number of the offending code;
1531 - the function name of the offending code.
1533 type, value
, traceback
= sys
.exc_info()
1534 while traceback
.tb_next
:
1535 traceback
= traceback
.tb_next
1536 code
= traceback
.tb_frame
.f_code
1537 return (type.__name
__, value
, code
.co_filename
, traceback
.tb_lineno
,