2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 A finite state machine specialized for regular-expression-based text filters,
7 this module defines the following classes:
9 - `StateMachine`, a state machine
10 - `State`, a state superclass
11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
12 - `StateWS`, a state superclass for use with `StateMachineWS`
13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
15 - `ViewList`, extends standard Python lists.
16 - `StringList`, string-specific ViewList.
22 - `DuplicateStateError`
23 - `UnknownTransitionError`
24 - `DuplicateTransitionError`
25 - `TransitionPatternNotFound`
26 - `TransitionMethodNotFound`
27 - `UnexpectedIndentationError`
28 - `TransitionCorrection`: Raised to switch to another transition.
29 - `StateCorrection`: Raised to switch to another state & transition.
33 - `string2lines()`: split a multi-line string into a list of one-line strings
36 How To Use This Module
37 ======================
38 (See the individual classes, methods, and attributes for details.)
40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
41 You will also need to ``import re``.
43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
46 class MyState(statemachine.State):
48 Within the state's class definition:
50 a) Include a pattern for each transition, in `State.patterns`::
52 patterns = {'atransition': r'pattern', ...}
54 b) Include a list of initial transitions to be set up automatically, in
55 `State.initial_transitions`::
57 initial_transitions = ['atransition', ...]
59 c) Define a method for each transition, with the same name as the
62 def atransition(self, match, context, next_state):
64 result = [...] # a list
65 return context, next_state, result
66 # context, next_state may be altered
68 Transition methods may raise an `EOFError` to cut processing short.
70 d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
71 transition methods, which handle the beginning- and end-of-file.
73 e) In order to handle nested processing, you may wish to override the
74 attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
76 If you are using `StateWS` as a base class, in order to handle nested
77 indented blocks, you may wish to:
79 - override the attributes `StateWS.indent_sm`,
80 `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
81 `StateWS.known_indent_sm_kwargs`;
82 - override the `StateWS.blank()` method; and/or
83 - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
84 and/or `StateWS.firstknown_indent()` methods.
86 3. Create a state machine object::
88 sm = StateMachine(state_classes=[MyState, ...],
89 initial_state='MyState')
91 4. Obtain the input text, which needs to be converted into a tab-free list of
92 one-line strings. For example, to read text from a file called
95 input_string = open('inputfile').read()
96 input_lines = statemachine.string2lines(input_string)
98 5. Run the state machine on the input text and collect the results, a list::
100 results = sm.run(input_lines)
102 6. Remove any lingering circular references::
107 __docformat__
= 'restructuredtext'
113 from docutils
import utils
114 from docutils
.error_reporting
import ErrorOutput
120 A finite state machine for text filters using regular expressions.
122 The input is provided in the form of a list of one-line strings (no
123 newlines). States are subclasses of the `State` class. Transitions consist
124 of regular expression patterns and transition methods, and are defined in
127 The state machine is started with the `run()` method, which returns the
128 results of processing in a list.
131 def __init__(self
, state_classes
, initial_state
, debug
=0):
133 Initialize a `StateMachine` object; add state objects.
137 - `state_classes`: a list of `State` (sub)classes.
138 - `initial_state`: a string, the class name of the initial state.
139 - `debug`: a boolean; produce verbose output if true (nonzero).
142 self
.input_lines
= None
143 """`StringList` of input lines (without newlines).
144 Filled by `self.run()`."""
146 self
.input_offset
= 0
147 """Offset of `self.input_lines` from the beginning of the file."""
150 """Current input line."""
152 self
.line_offset
= -1
153 """Current input line offset from beginning of `self.input_lines`."""
156 """Debugging mode on/off."""
158 self
.initial_state
= initial_state
159 """The name of the initial state (key to `self.states`)."""
161 self
.current_state
= initial_state
162 """The name of the current state (key to `self.states`)."""
165 """Mapping of {state_name: State_object}."""
167 self
.add_states(state_classes
)
170 """List of bound methods or functions to call whenever the current
171 line changes. Observers are called with one argument, ``self``.
172 Cleared at the end of `run()`."""
174 self
._stderr
= ErrorOutput()
175 """Wrapper around sys.stderr catching en-/decoding errors"""
179 """Remove circular references to objects no longer required."""
180 for state
in self
.states
.values():
184 def run(self
, input_lines
, input_offset
=0, context
=None,
185 input_source
=None, initial_state
=None):
187 Run the state machine on `input_lines`. Return results (a list).
189 Reset `self.line_offset` and `self.current_state`. Run the
190 beginning-of-file transition. Input one line at a time and check for a
191 matching transition. If a match is found, call the transition method
192 and possibly change the state. Store the context returned by the
193 transition method to be passed on to the next transition matched.
194 Accumulate the results returned by the transition methods in a list.
195 Run the end-of-file transition. Finally, return the accumulated
200 - `input_lines`: a list of strings without newlines, or `StringList`.
201 - `input_offset`: the line offset of `input_lines` from the beginning
203 - `context`: application-specific storage.
204 - `input_source`: name or path of source of `input_lines`.
205 - `initial_state`: name of initial state.
208 if isinstance(input_lines
, StringList
):
209 self
.input_lines
= input_lines
211 self
.input_lines
= StringList(input_lines
, source
=input_source
)
212 self
.input_offset
= input_offset
213 self
.line_offset
= -1
214 self
.current_state
= initial_state
or self
.initial_state
216 print >>self
._stderr
, (
217 u
'\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
218 % (self
.line_offset
, u
'\n| '.join(self
.input_lines
)))
221 state
= self
.get_state()
224 print >>self
._stderr
, '\nStateMachine.run: bof transition'
225 context
, result
= state
.bof(context
)
226 results
.extend(result
)
232 source
, offset
= self
.input_lines
.info(
234 print >>self
._stderr
, (
235 u
'\nStateMachine.run: line (source=%r, '
237 % (source
, offset
, self
.line
))
238 context
, next_state
, result
= self
.check_line(
239 context
, state
, transitions
)
242 print >>self
._stderr
, (
243 '\nStateMachine.run: %s.eof transition'
244 % state
.__class
__.__name
__)
245 result
= state
.eof(context
)
246 results
.extend(result
)
249 results
.extend(result
)
250 except TransitionCorrection
, exception
:
251 self
.previous_line() # back up for another try
252 transitions
= (exception
.args
[0],)
254 print >>self
._stderr
, (
255 '\nStateMachine.run: TransitionCorrection to '
256 'state "%s", transition %s.'
257 % (state
.__class
__.__name
__, transitions
[0]))
259 except StateCorrection
, exception
:
260 self
.previous_line() # back up for another try
261 next_state
= exception
.args
[0]
262 if len(exception
.args
) == 1:
265 transitions
= (exception
.args
[1],)
267 print >>self
._stderr
, (
268 '\nStateMachine.run: StateCorrection to state '
269 '"%s", transition %s.'
270 % (next_state
, transitions
[0]))
273 state
= self
.get_state(next_state
)
281 def get_state(self
, next_state
=None):
283 Return current state object; set it first if `next_state` given.
285 Parameter `next_state`: a string, the name of the next state.
287 Exception: `UnknownStateError` raised if `next_state` unknown.
290 if self
.debug
and next_state
!= self
.current_state
:
291 print >>self
._stderr
, (
292 '\nStateMachine.get_state: Changing state from '
293 '"%s" to "%s" (input line %s).'
294 % (self
.current_state
, next_state
,
295 self
.abs_line_number()))
296 self
.current_state
= next_state
298 return self
.states
[self
.current_state
]
300 raise UnknownStateError(self
.current_state
)
302 def next_line(self
, n
=1):
303 """Load `self.line` with the `n`'th next line and return it."""
306 self
.line_offset
+= n
307 self
.line
= self
.input_lines
[self
.line_offset
]
313 self
.notify_observers()
315 def is_next_line_blank(self
):
316 """Return 1 if the next line is blank or non-existant."""
318 return not self
.input_lines
[self
.line_offset
+ 1].strip()
323 """Return 1 if the input is at or past end-of-file."""
324 return self
.line_offset
>= len(self
.input_lines
) - 1
327 """Return 1 if the input is at or before beginning-of-file."""
328 return self
.line_offset
<= 0
330 def previous_line(self
, n
=1):
331 """Load `self.line` with the `n`'th previous line and return it."""
332 self
.line_offset
-= n
333 if self
.line_offset
< 0:
336 self
.line
= self
.input_lines
[self
.line_offset
]
337 self
.notify_observers()
340 def goto_line(self
, line_offset
):
341 """Jump to absolute line offset `line_offset`, load and return it."""
344 self
.line_offset
= line_offset
- self
.input_offset
345 self
.line
= self
.input_lines
[self
.line_offset
]
351 self
.notify_observers()
353 def get_source(self
, line_offset
):
354 """Return source of line at absolute line offset `line_offset`."""
355 return self
.input_lines
.source(line_offset
- self
.input_offset
)
357 def abs_line_offset(self
):
358 """Return line offset of current line, from beginning of file."""
359 return self
.line_offset
+ self
.input_offset
361 def abs_line_number(self
):
362 """Return line number of current line (counting from 1)."""
363 return self
.line_offset
+ self
.input_offset
+ 1
365 def get_source_and_line(self
, lineno
=None):
366 """Return (source, line) tuple for current or given line number.
368 Looks up the source and line number in the `self.input_lines`
369 StringList instance to count for included source files.
371 If the optional argument `lineno` is given, convert it from an
372 absolute line number to the corresponding (source, line) pair.
375 offset
= self
.line_offset
377 offset
= lineno
- self
.input_offset
- 1
379 src
, srcoffset
= self
.input_lines
.info(offset
)
380 srcline
= srcoffset
+ 1
382 # line is None if index is "Just past the end"
383 src
, srcline
= self
.get_source_and_line(offset
+ self
.input_offset
)
384 return src
, srcline
+ 1
385 except (IndexError): # `offset` is off the list
386 src
, srcline
= None, None
387 # raise AssertionError('cannot find line %d in %s lines' %
388 # (offset, len(self.input_lines)))
389 # # list(self.input_lines.lines())))
390 # assert offset == srcoffset, str(self.input_lines)
391 # print "get_source_and_line(%s):" % lineno,
392 # print offset + 1, '->', src, srcline
393 # print self.input_lines
394 return (src
, srcline
)
396 def insert_input(self
, input_lines
, source
):
397 self
.input_lines
.insert(self
.line_offset
+ 1, '',
398 source
='internal padding after '+source
,
399 offset
=len(input_lines
))
400 self
.input_lines
.insert(self
.line_offset
+ 1, '',
401 source
='internal padding before '+source
,
403 self
.input_lines
.insert(self
.line_offset
+ 2,
404 StringList(input_lines
, source
))
406 def get_text_block(self
, flush_left
=0):
408 Return a contiguous block of text.
410 If `flush_left` is true, raise `UnexpectedIndentationError` if an
411 indented line is encountered before the text block ends (with a blank
415 block
= self
.input_lines
.get_text_block(self
.line_offset
,
417 self
.next_line(len(block
) - 1)
419 except UnexpectedIndentationError
, error
:
420 block
, source
, lineno
= error
.args
421 self
.next_line(len(block
) - 1) # advance to last line of block
424 def check_line(self
, context
, state
, transitions
=None):
426 Examine one line of input for a transition match & execute its method.
430 - `context`: application-dependent storage.
431 - `state`: a `State` object, the current state.
432 - `transitions`: an optional ordered list of transition names to try,
433 instead of ``state.transition_order``.
435 Return the values returned by the transition method:
437 - context: possibly modified from the parameter `context`;
438 - next state name (`State` subclass name);
439 - the result output of the transition, a list.
441 When there is no match, ``state.no_match()`` is called and its return
444 if transitions
is None:
445 transitions
= state
.transition_order
446 state_correction
= None
448 print >>self
._stderr
, (
449 '\nStateMachine.check_line: state="%s", transitions=%r.'
450 % (state
.__class
__.__name
__, transitions
))
451 for name
in transitions
:
452 pattern
, method
, next_state
= state
.transitions
[name
]
453 match
= pattern
.match(self
.line
)
456 print >>self
._stderr
, (
457 '\nStateMachine.check_line: Matched transition '
458 '"%s" in state "%s".'
459 % (name
, state
.__class
__.__name
__))
460 return method(match
, context
, next_state
)
463 print >>self
._stderr
, (
464 '\nStateMachine.check_line: No match in state "%s".'
465 % state
.__class
__.__name
__)
466 return state
.no_match(context
, transitions
)
468 def add_state(self
, state_class
):
470 Initialize & add a `state_class` (`State` subclass) object.
472 Exception: `DuplicateStateError` raised if `state_class` was already
475 statename
= state_class
.__name
__
476 if statename
in self
.states
:
477 raise DuplicateStateError(statename
)
478 self
.states
[statename
] = state_class(self
, self
.debug
)
480 def add_states(self
, state_classes
):
482 Add `state_classes` (a list of `State` subclasses).
484 for state_class
in state_classes
:
485 self
.add_state(state_class
)
487 def runtime_init(self
):
489 Initialize `self.states`.
491 for state
in self
.states
.values():
495 """Report error details."""
496 type, value
, module
, line
, function
= _exception_data()
497 print >>self
._stderr
, u
'%s: %s' % (type, value
)
498 print >>self
._stderr
, 'input line %s' % (self
.abs_line_number())
499 print >>self
._stderr
, (u
'module %s, line %s, function %s' %
500 (module
, line
, function
))
502 def attach_observer(self
, observer
):
504 The `observer` parameter is a function or bound method which takes two
505 arguments, the source and offset of the current line.
507 self
.observers
.append(observer
)
509 def detach_observer(self
, observer
):
510 self
.observers
.remove(observer
)
512 def notify_observers(self
):
513 for observer
in self
.observers
:
515 info
= self
.input_lines
.info(self
.line_offset
)
524 State superclass. Contains a list of transitions, and transition methods.
526 Transition methods all have the same signature. They take 3 parameters:
528 - An `re` match object. ``match.string`` contains the matched input line,
529 ``match.start()`` gives the start index of the match, and
530 ``match.end()`` gives the end index.
531 - A context object, whose meaning is application-defined (initial value
532 ``None``). It can be used to store any information required by the state
533 machine, and the retured context is passed on to the next transition
535 - The name of the next state, a string, taken from the transitions list;
536 normally it is returned unchanged, but it may be altered by the
537 transition method if necessary.
539 Transition methods all return a 3-tuple:
541 - A context object, as (potentially) modified by the transition method.
542 - The next state name (a return value of ``None`` means no state change).
543 - The processing result, a list, which is accumulated by the state
546 Transition methods may raise an `EOFError` to cut processing short.
548 There are two implicit transitions, and corresponding transition methods
549 are defined: `bof()` handles the beginning-of-file, and `eof()` handles
550 the end-of-file. These methods have non-standard signatures and return
551 values. `bof()` returns the initial context and results, and may be used
552 to return a header string, or do any other processing needed. `eof()`
553 should handle any remaining context and wrap things up; it returns the
554 final processing result.
556 Typical applications need only subclass `State` (or a subclass), set the
557 `patterns` and `initial_transitions` class attributes, and provide
558 corresponding transition methods. The default object initialization will
559 take care of constructing the list of transitions.
564 {Name: pattern} mapping, used by `make_transition()`. Each pattern may
565 be a string or a compiled `re` pattern. Override in subclasses.
568 initial_transitions
= None
570 A list of transitions to initialize when a `State` is instantiated.
571 Each entry is either a transition name string, or a (transition name, next
572 state name) pair. See `make_transitions()`. Override in subclasses.
577 The `StateMachine` class for handling nested processing.
579 If left as ``None``, `nested_sm` defaults to the class of the state's
580 controlling state machine. Override it in subclasses to avoid the default.
583 nested_sm_kwargs
= None
585 Keyword arguments dictionary, passed to the `nested_sm` constructor.
587 Two keys must have entries in the dictionary:
589 - Key 'state_classes' must be set to a list of `State` classes.
590 - Key 'initial_state' must be set to the name of the initial state class.
592 If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
593 class of the current state, and 'initial_state' defaults to the name of
594 the class of the current state. Override in subclasses to avoid the
598 def __init__(self
, state_machine
, debug
=0):
600 Initialize a `State` object; make & add initial transitions.
604 - `statemachine`: the controlling `StateMachine` object.
605 - `debug`: a boolean; produce verbose output if true (nonzero).
608 self
.transition_order
= []
609 """A list of transition names in search order."""
611 self
.transitions
= {}
613 A mapping of transition names to 3-tuples containing
614 (compiled_pattern, transition_method, next_state_name). Initialized as
615 an instance attribute dynamically (instead of as a class attribute)
616 because it may make forward references to patterns and methods in this
620 self
.add_initial_transitions()
622 self
.state_machine
= state_machine
623 """A reference to the controlling `StateMachine` object."""
626 """Debugging mode on/off."""
628 if self
.nested_sm
is None:
629 self
.nested_sm
= self
.state_machine
.__class
__
630 if self
.nested_sm_kwargs
is None:
631 self
.nested_sm_kwargs
= {'state_classes': [self
.__class
__],
632 'initial_state': self
.__class
__.__name
__}
634 def runtime_init(self
):
636 Initialize this `State` before running the state machine; called from
637 `self.state_machine.run()`.
642 """Remove circular references to objects no longer required."""
643 self
.state_machine
= None
645 def add_initial_transitions(self
):
646 """Make and add transitions listed in `self.initial_transitions`."""
647 if self
.initial_transitions
:
648 names
, transitions
= self
.make_transitions(
649 self
.initial_transitions
)
650 self
.add_transitions(names
, transitions
)
652 def add_transitions(self
, names
, transitions
):
654 Add a list of transitions to the start of the transition list.
658 - `names`: a list of transition names.
659 - `transitions`: a mapping of names to transition tuples.
661 Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
664 if name
in self
.transitions
:
665 raise DuplicateTransitionError(name
)
666 if name
not in transitions
:
667 raise UnknownTransitionError(name
)
668 self
.transition_order
[:0] = names
669 self
.transitions
.update(transitions
)
671 def add_transition(self
, name
, transition
):
673 Add a transition to the start of the transition list.
675 Parameter `transition`: a ready-made transition 3-tuple.
677 Exception: `DuplicateTransitionError`.
679 if name
in self
.transitions
:
680 raise DuplicateTransitionError(name
)
681 self
.transition_order
[:0] = [name
]
682 self
.transitions
[name
] = transition
684 def remove_transition(self
, name
):
686 Remove a transition by `name`.
688 Exception: `UnknownTransitionError`.
691 del self
.transitions
[name
]
692 self
.transition_order
.remove(name
)
694 raise UnknownTransitionError(name
)
696 def make_transition(self
, name
, next_state
=None):
698 Make & return a transition tuple based on `name`.
700 This is a convenience function to simplify transition creation.
704 - `name`: a string, the name of the transition pattern & method. This
705 `State` object must have a method called '`name`', and a dictionary
706 `self.patterns` containing a key '`name`'.
707 - `next_state`: a string, the name of the next `State` object for this
708 transition. A value of ``None`` (or absent) implies no state change
709 (i.e., continue with the same state).
711 Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
713 if next_state
is None:
714 next_state
= self
.__class
__.__name
__
716 pattern
= self
.patterns
[name
]
717 if not hasattr(pattern
, 'match'):
718 pattern
= re
.compile(pattern
)
720 raise TransitionPatternNotFound(
721 '%s.patterns[%r]' % (self
.__class
__.__name
__, name
))
723 method
= getattr(self
, name
)
724 except AttributeError:
725 raise TransitionMethodNotFound(
726 '%s.%s' % (self
.__class
__.__name
__, name
))
727 return (pattern
, method
, next_state
)
729 def make_transitions(self
, name_list
):
731 Return a list of transition names and a transition mapping.
733 Parameter `name_list`: a list, where each entry is either a transition
734 name string, or a 1- or 2-tuple (transition name, optional next state
737 stringtype
= type('')
740 for namestate
in name_list
:
741 if type(namestate
) is stringtype
:
742 transitions
[namestate
] = self
.make_transition(namestate
)
743 names
.append(namestate
)
745 transitions
[namestate
[0]] = self
.make_transition(*namestate
)
746 names
.append(namestate
[0])
747 return names
, transitions
749 def no_match(self
, context
, transitions
):
751 Called when there is no match from `StateMachine.check_line()`.
753 Return the same values returned by transition methods:
755 - context: unchanged;
756 - next state name: ``None``;
759 Override in subclasses to catch this event.
761 return context
, None, []
763 def bof(self
, context
):
765 Handle beginning-of-file. Return unchanged `context`, empty result.
767 Override in subclasses.
769 Parameter `context`: application-defined storage.
773 def eof(self
, context
):
775 Handle end-of-file. Return empty result.
777 Override in subclasses.
779 Parameter `context`: application-defined storage.
783 def nop(self
, match
, context
, next_state
):
785 A "do nothing" transition method.
787 Return unchanged `context` & `next_state`, empty result. Useful for
788 simple state changes (actionless transitions).
790 return context
, next_state
, []
793 class StateMachineWS(StateMachine
):
796 `StateMachine` subclass specialized for whitespace recognition.
798 There are three methods provided for extracting indented text blocks:
800 - `get_indented()`: use when the indent is unknown.
801 - `get_known_indented()`: use when the indent is known for all lines.
802 - `get_first_known_indented()`: use when only the first line's indent is
806 def get_indented(self
, until_blank
=0, strip_indent
=1):
808 Return a block of indented lines of text, and info.
810 Extract an indented block where the indent is unknown for all lines.
813 - `until_blank`: Stop collecting at the first blank line if true
815 - `strip_indent`: Strip common leading indent if true (1,
819 - the indented block (a list of lines of text),
821 - its first line offset from BOF, and
822 - whether or not it finished with a blank line.
824 offset
= self
.abs_line_offset()
825 indented
, indent
, blank_finish
= self
.input_lines
.get_indented(
826 self
.line_offset
, until_blank
, strip_indent
)
828 self
.next_line(len(indented
) - 1) # advance to last indented line
829 while indented
and not indented
[0].strip():
830 indented
.trim_start()
832 return indented
, indent
, offset
, blank_finish
834 def get_known_indented(self
, indent
, until_blank
=0, strip_indent
=1):
836 Return an indented block and info.
838 Extract an indented block where the indent is known for all lines.
839 Starting with the current line, extract the entire text block with at
840 least `indent` indentation (which must be whitespace, except for the
844 - `indent`: The number of indent columns/characters.
845 - `until_blank`: Stop collecting at the first blank line if true
847 - `strip_indent`: Strip `indent` characters of indentation if true
851 - the indented block,
852 - its first line offset from BOF, and
853 - whether or not it finished with a blank line.
855 offset
= self
.abs_line_offset()
856 indented
, indent
, blank_finish
= self
.input_lines
.get_indented(
857 self
.line_offset
, until_blank
, strip_indent
,
859 self
.next_line(len(indented
) - 1) # advance to last indented line
860 while indented
and not indented
[0].strip():
861 indented
.trim_start()
863 return indented
, offset
, blank_finish
865 def get_first_known_indented(self
, indent
, until_blank
=0, strip_indent
=1,
868 Return an indented block and info.
870 Extract an indented block where the indent is known for the first line
871 and unknown for all other lines.
874 - `indent`: The first line's indent (# of columns/characters).
875 - `until_blank`: Stop collecting at the first blank line if true
877 - `strip_indent`: Strip `indent` characters of indentation if true
879 - `strip_top`: Strip blank lines from the beginning of the block.
882 - the indented block,
884 - its first line offset from BOF, and
885 - whether or not it finished with a blank line.
887 offset
= self
.abs_line_offset()
888 indented
, indent
, blank_finish
= self
.input_lines
.get_indented(
889 self
.line_offset
, until_blank
, strip_indent
,
891 self
.next_line(len(indented
) - 1) # advance to last indented line
893 while indented
and not indented
[0].strip():
894 indented
.trim_start()
896 return indented
, indent
, offset
, blank_finish
899 class StateWS(State
):
902 State superclass specialized for whitespace (blank lines & indents).
904 Use this class with `StateMachineWS`. The transitions 'blank' (for blank
905 lines) and 'indent' (for indented text blocks) are added automatically,
906 before any other transitions. The transition method `blank()` handles
907 blank lines and `indent()` handles nested indented blocks. Indented
908 blocks trigger a new state machine to be created by `indent()` and run.
909 The class of the state machine to be created is in `indent_sm`, and the
910 constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
912 The methods `known_indent()` and `firstknown_indent()` are provided for
913 indented blocks where the indent (all lines' and first line's only,
914 respectively) is known to the transition method, along with the attributes
915 `known_indent_sm` and `known_indent_sm_kwargs`. Neither transition method
916 is triggered automatically.
921 The `StateMachine` class handling indented text blocks.
923 If left as ``None``, `indent_sm` defaults to the value of
924 `State.nested_sm`. Override it in subclasses to avoid the default.
927 indent_sm_kwargs
= None
929 Keyword arguments dictionary, passed to the `indent_sm` constructor.
931 If left as ``None``, `indent_sm_kwargs` defaults to the value of
932 `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
935 known_indent_sm
= None
937 The `StateMachine` class handling known-indented text blocks.
939 If left as ``None``, `known_indent_sm` defaults to the value of
940 `indent_sm`. Override it in subclasses to avoid the default.
943 known_indent_sm_kwargs
= None
945 Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
947 If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
948 `indent_sm_kwargs`. Override it in subclasses to avoid the default.
951 ws_patterns
= {'blank': ' *$',
953 """Patterns for default whitespace transitions. May be overridden in
956 ws_initial_transitions
= ('blank', 'indent')
957 """Default initial whitespace transitions, added before those listed in
958 `State.initial_transitions`. May be overridden in subclasses."""
960 def __init__(self
, state_machine
, debug
=0):
962 Initialize a `StateSM` object; extends `State.__init__()`.
964 Check for indent state machine attributes, set defaults if not set.
966 State
.__init
__(self
, state_machine
, debug
)
967 if self
.indent_sm
is None:
968 self
.indent_sm
= self
.nested_sm
969 if self
.indent_sm_kwargs
is None:
970 self
.indent_sm_kwargs
= self
.nested_sm_kwargs
971 if self
.known_indent_sm
is None:
972 self
.known_indent_sm
= self
.indent_sm
973 if self
.known_indent_sm_kwargs
is None:
974 self
.known_indent_sm_kwargs
= self
.indent_sm_kwargs
976 def add_initial_transitions(self
):
978 Add whitespace-specific transitions before those defined in subclass.
980 Extends `State.add_initial_transitions()`.
982 State
.add_initial_transitions(self
)
983 if self
.patterns
is None:
985 self
.patterns
.update(self
.ws_patterns
)
986 names
, transitions
= self
.make_transitions(
987 self
.ws_initial_transitions
)
988 self
.add_transitions(names
, transitions
)
990 def blank(self
, match
, context
, next_state
):
991 """Handle blank lines. Does nothing. Override in subclasses."""
992 return self
.nop(match
, context
, next_state
)
994 def indent(self
, match
, context
, next_state
):
996 Handle an indented text block. Extend or override in subclasses.
998 Recursively run the registered state machine for indented blocks
1001 indented
, indent
, line_offset
, blank_finish
= \
1002 self
.state_machine
.get_indented()
1003 sm
= self
.indent_sm(debug
=self
.debug
, **self
.indent_sm_kwargs
)
1004 results
= sm
.run(indented
, input_offset
=line_offset
)
1005 return context
, next_state
, results
1007 def known_indent(self
, match
, context
, next_state
):
1009 Handle a known-indent text block. Extend or override in subclasses.
1011 Recursively run the registered state machine for known-indent indented
1012 blocks (`self.known_indent_sm`). The indent is the length of the
1013 match, ``match.end()``.
1015 indented
, line_offset
, blank_finish
= \
1016 self
.state_machine
.get_known_indented(match
.end())
1017 sm
= self
.known_indent_sm(debug
=self
.debug
,
1018 **self
.known_indent_sm_kwargs
)
1019 results
= sm
.run(indented
, input_offset
=line_offset
)
1020 return context
, next_state
, results
1022 def first_known_indent(self
, match
, context
, next_state
):
1024 Handle an indented text block (first line's indent known).
1026 Extend or override in subclasses.
1028 Recursively run the registered state machine for known-indent indented
1029 blocks (`self.known_indent_sm`). The indent is the length of the
1030 match, ``match.end()``.
1032 indented
, line_offset
, blank_finish
= \
1033 self
.state_machine
.get_first_known_indented(match
.end())
1034 sm
= self
.known_indent_sm(debug
=self
.debug
,
1035 **self
.known_indent_sm_kwargs
)
1036 results
= sm
.run(indented
, input_offset
=line_offset
)
1037 return context
, next_state
, results
1040 class _SearchOverride
:
1043 Mix-in class to override `StateMachine` regular expression behavior.
1045 Changes regular expression matching, from the default `re.match()`
1046 (succeeds only if the pattern matches at the start of `self.line`) to
1047 `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1048 When subclassing a `StateMachine`, list this class **first** in the
1049 inheritance list of the class definition.
1052 def match(self
, pattern
):
1054 Return the result of a regular expression search.
1056 Overrides `StateMachine.match()`.
1058 Parameter `pattern`: `re` compiled regular expression.
1060 return pattern
.search(self
.line
)
1063 class SearchStateMachine(_SearchOverride
, StateMachine
):
1064 """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1068 class SearchStateMachineWS(_SearchOverride
, StateMachineWS
):
1069 """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1076 List with extended functionality: slices of ViewList objects are child
1077 lists, linked to their parents. Changes made to a child list also affect
1078 the parent list. A child list is effectively a "view" (in the SQL sense)
1079 of the parent list. Changes to parent lists, however, do *not* affect
1080 active child lists. If a parent list is changed, any active child lists
1081 should be recreated.
1083 The start and end of the slice can be trimmed using the `trim_start()` and
1084 `trim_end()` methods, without affecting the parent list. The link between
1085 child and parent lists can be broken by calling `disconnect()` on the
1088 Also, ViewList objects keep track of the source & offset of each item.
1089 This information is accessible via the `source()`, `offset()`, and
1093 def __init__(self
, initlist
=None, source
=None, items
=None,
1094 parent
=None, parent_offset
=None):
1096 """The actual list of data, flattened from various sources."""
1099 """A list of (source, offset) pairs, same length as `self.data`: the
1100 source of each line and the offset of each line from the beginning of
1103 self
.parent
= parent
1104 """The parent list."""
1106 self
.parent_offset
= parent_offset
1107 """Offset of this list from the beginning of the parent list."""
1109 if isinstance(initlist
, ViewList
):
1110 self
.data
= initlist
.data
[:]
1111 self
.items
= initlist
.items
[:]
1112 elif initlist
is not None:
1113 self
.data
= list(initlist
)
1117 self
.items
= [(source
, i
) for i
in range(len(initlist
))]
1118 assert len(self
.data
) == len(self
.items
), 'data mismatch'
1121 return str(self
.data
)
1124 return '%s(%s, items=%s)' % (self
.__class
__.__name
__,
1125 self
.data
, self
.items
)
1127 def __lt__(self
, other
): return self
.data
< self
.__cast
(other
)
1128 def __le__(self
, other
): return self
.data
<= self
.__cast
(other
)
1129 def __eq__(self
, other
): return self
.data
== self
.__cast
(other
)
1130 def __ne__(self
, other
): return self
.data
!= self
.__cast
(other
)
1131 def __gt__(self
, other
): return self
.data
> self
.__cast
(other
)
1132 def __ge__(self
, other
): return self
.data
>= self
.__cast
(other
)
1133 def __cmp__(self
, other
): return cmp(self
.data
, self
.__cast
(other
))
1135 def __cast(self
, other
):
1136 if isinstance(other
, ViewList
):
1141 def __contains__(self
, item
): return item
in self
.data
1142 def __len__(self
): return len(self
.data
)
1144 # The __getitem__()/__setitem__() methods check whether the index
1145 # is a slice first, since indexing a native list with a slice object
1148 def __getitem__(self
, i
):
1149 if isinstance(i
, types
.SliceType
):
1150 assert i
.step
in (None, 1), 'cannot handle slice with stride'
1151 return self
.__class
__(self
.data
[i
.start
:i
.stop
],
1152 items
=self
.items
[i
.start
:i
.stop
],
1153 parent
=self
, parent_offset
=i
.start
or 0)
1157 def __setitem__(self
, i
, item
):
1158 if isinstance(i
, types
.SliceType
):
1159 assert i
.step
in (None, 1), 'cannot handle slice with stride'
1160 if not isinstance(item
, ViewList
):
1161 raise TypeError('assigning non-ViewList to ViewList slice')
1162 self
.data
[i
.start
:i
.stop
] = item
.data
1163 self
.items
[i
.start
:i
.stop
] = item
.items
1164 assert len(self
.data
) == len(self
.items
), 'data mismatch'
1166 self
.parent
[(i
.start
or 0) + self
.parent_offset
1167 : (i
.stop
or len(self
)) + self
.parent_offset
] = item
1171 self
.parent
[i
+ self
.parent_offset
] = item
1173 def __delitem__(self
, i
):
1178 del self
.parent
[i
+ self
.parent_offset
]
1180 assert i
.step
is None, 'cannot handle slice with stride'
1181 del self
.data
[i
.start
:i
.stop
]
1182 del self
.items
[i
.start
:i
.stop
]
1184 del self
.parent
[(i
.start
or 0) + self
.parent_offset
1185 : (i
.stop
or len(self
)) + self
.parent_offset
]
1187 def __add__(self
, other
):
1188 if isinstance(other
, ViewList
):
1189 return self
.__class
__(self
.data
+ other
.data
,
1190 items
=(self
.items
+ other
.items
))
1192 raise TypeError('adding non-ViewList to a ViewList')
1194 def __radd__(self
, other
):
1195 if isinstance(other
, ViewList
):
1196 return self
.__class
__(other
.data
+ self
.data
,
1197 items
=(other
.items
+ self
.items
))
1199 raise TypeError('adding ViewList to a non-ViewList')
1201 def __iadd__(self
, other
):
1202 if isinstance(other
, ViewList
):
1203 self
.data
+= other
.data
1205 raise TypeError('argument to += must be a ViewList')
1208 def __mul__(self
, n
):
1209 return self
.__class
__(self
.data
* n
, items
=(self
.items
* n
))
1213 def __imul__(self
, n
):
1218 def extend(self
, other
):
1219 if not isinstance(other
, ViewList
):
1220 raise TypeError('extending a ViewList with a non-ViewList')
1222 self
.parent
.insert(len(self
.data
) + self
.parent_offset
, other
)
1223 self
.data
.extend(other
.data
)
1224 self
.items
.extend(other
.items
)
1226 def append(self
, item
, source
=None, offset
=0):
1231 self
.parent
.insert(len(self
.data
) + self
.parent_offset
, item
,
1233 self
.data
.append(item
)
1234 self
.items
.append((source
, offset
))
1236 def insert(self
, i
, item
, source
=None, offset
=0):
1238 if not isinstance(item
, ViewList
):
1239 raise TypeError('inserting non-ViewList with no source given')
1240 self
.data
[i
:i
] = item
.data
1241 self
.items
[i
:i
] = item
.items
1243 index
= (len(self
.data
) + i
) % len(self
.data
)
1244 self
.parent
.insert(index
+ self
.parent_offset
, item
)
1246 self
.data
.insert(i
, item
)
1247 self
.items
.insert(i
, (source
, offset
))
1249 index
= (len(self
.data
) + i
) % len(self
.data
)
1250 self
.parent
.insert(index
+ self
.parent_offset
, item
,
1253 def pop(self
, i
=-1):
1255 index
= (len(self
.data
) + i
) % len(self
.data
)
1256 self
.parent
.pop(index
+ self
.parent_offset
)
1258 return self
.data
.pop(i
)
1260 def trim_start(self
, n
=1):
1262 Remove items from the start of the list, without touching the parent.
1264 if n
> len(self
.data
):
1265 raise IndexError("Size of trim too large; can't trim %s items "
1266 "from a list of size %s." % (n
, len(self
.data
)))
1268 raise IndexError('Trim size must be >= 0.')
1272 self
.parent_offset
+= n
1274 def trim_end(self
, n
=1):
1276 Remove items from the end of the list, without touching the parent.
1278 if n
> len(self
.data
):
1279 raise IndexError("Size of trim too large; can't trim %s items "
1280 "from a list of size %s." % (n
, len(self
.data
)))
1282 raise IndexError('Trim size must be >= 0.')
1286 def remove(self
, item
):
1287 index
= self
.index(item
)
1290 def count(self
, item
): return self
.data
.count(item
)
1291 def index(self
, item
): return self
.data
.index(item
)
1295 self
.items
.reverse()
1298 def sort(self
, *args
):
1299 tmp
= zip(self
.data
, self
.items
)
1301 self
.data
= [entry
[0] for entry
in tmp
]
1302 self
.items
= [entry
[1] for entry
in tmp
]
1306 """Return source & offset for index `i`."""
1308 return self
.items
[i
]
1310 if i
== len(self
.data
): # Just past the end
1311 return self
.items
[i
- 1][0], None
1315 def source(self
, i
):
1316 """Return source for index `i`."""
1317 return self
.info(i
)[0]
1319 def offset(self
, i
):
1320 """Return offset for index `i`."""
1321 return self
.info(i
)[1]
1323 def disconnect(self
):
1324 """Break link between this list and parent list."""
1328 """Return iterator yielding (source, offset, value) tuples."""
1329 for (value
, (source
, offset
)) in zip(self
.data
, self
.items
):
1330 yield (source
, offset
, value
)
1333 """Print the list in `grep` format (`source:offset:value` lines)"""
1334 for line
in self
.xitems():
1335 print "%s:%d:%s" % line
1338 class StringList(ViewList
):
1340 """A `ViewList` with string-specific methods."""
1342 def trim_left(self
, length
, start
=0, end
=sys
.maxint
):
1344 Trim `length` characters off the beginning of each item, in-place,
1345 from index `start` to `end`. No whitespace-checking is done on the
1346 trimmed text. Does not affect slice parent.
1348 self
.data
[start
:end
] = [line
[length
:]
1349 for line
in self
.data
[start
:end
]]
1351 def get_text_block(self
, start
, flush_left
=0):
1353 Return a contiguous block of text.
1355 If `flush_left` is true, raise `UnexpectedIndentationError` if an
1356 indented line is encountered before the text block ends (with a blank
1360 last
= len(self
.data
)
1362 line
= self
.data
[end
]
1363 if not line
.strip():
1365 if flush_left
and (line
[0] == ' '):
1366 source
, offset
= self
.info(end
)
1367 raise UnexpectedIndentationError(self
[start
:end
], source
,
1370 return self
[start
:end
]
1372 def get_indented(self
, start
=0, until_blank
=0, strip_indent
=1,
1373 block_indent
=None, first_indent
=None):
1375 Extract and return a StringList of indented lines of text.
1377 Collect all lines with indentation, determine the minimum indentation,
1378 remove the minimum indentation from all indented lines (unless
1379 `strip_indent` is false), and return them. All lines up to but not
1380 including the first unindented line will be returned.
1383 - `start`: The index of the first line to examine.
1384 - `until_blank`: Stop collecting at the first blank line if true.
1385 - `strip_indent`: Strip common leading indent if true (default).
1386 - `block_indent`: The indent of the entire block, if known.
1387 - `first_indent`: The indent of the first line, if known.
1390 - a StringList of indented lines with mininum indent removed;
1391 - the amount of the indent;
1392 - a boolean: did the indented block finish with a blank line or EOF?
1394 indent
= block_indent
# start with None if unknown
1396 if block_indent
is not None and first_indent
is None:
1397 first_indent
= block_indent
1398 if first_indent
is not None:
1400 last
= len(self
.data
)
1402 line
= self
.data
[end
]
1403 if line
and (line
[0] != ' '
1404 or (block_indent
is not None
1405 and line
[:block_indent
].strip())):
1406 # Line not indented or insufficiently indented.
1407 # Block finished properly iff the last indented line blank:
1408 blank_finish
= ((end
> start
)
1409 and not self
.data
[end
- 1].strip())
1411 stripped
= line
.lstrip()
1412 if not stripped
: # blank line
1416 elif block_indent
is None:
1417 line_indent
= len(line
) - len(stripped
)
1419 indent
= line_indent
1421 indent
= min(indent
, line_indent
)
1424 blank_finish
= 1 # block ends at end of lines
1425 block
= self
[start
:end
]
1426 if first_indent
is not None and block
:
1427 block
.data
[0] = block
.data
[0][first_indent
:]
1428 if indent
and strip_indent
:
1429 block
.trim_left(indent
, start
=(first_indent
is not None))
1430 return block
, indent
or 0, blank_finish
1432 def get_2D_block(self
, top
, left
, bottom
, right
, strip_indent
=1):
1433 block
= self
[top
:bottom
]
1435 for i
in range(len(block
.data
)):
1436 # get slice from line, care for combining characters
1437 ci
= utils
.column_indices(block
.data
[i
])
1441 left
+= len(block
.data
[i
]) - len(ci
)
1445 right
+= len(block
.data
[i
]) - len(ci
)
1446 block
.data
[i
] = line
= block
.data
[i
][left
:right
].rstrip()
1448 indent
= min(indent
, len(line
) - len(line
.lstrip()))
1449 if strip_indent
and 0 < indent
< right
:
1450 block
.data
= [line
[indent
:] for line
in block
.data
]
1453 def pad_double_width(self
, pad_char
):
1455 Pad all double-width characters in self by appending `pad_char` to each.
1456 For East Asian language support.
1458 if hasattr(unicodedata
, 'east_asian_width'):
1459 east_asian_width
= unicodedata
.east_asian_width
1461 return # new in Python 2.4
1462 for i
in range(len(self
.data
)):
1464 if isinstance(line
, unicode):
1468 if east_asian_width(char
) in 'WF': # 'W'ide & 'F'ull-width
1469 new
.append(pad_char
)
1470 self
.data
[i
] = ''.join(new
)
1472 def replace(self
, old
, new
):
1473 """Replace all occurrences of substring `old` with `new`."""
1474 for i
in range(len(self
.data
)):
1475 self
.data
[i
] = self
.data
[i
].replace(old
, new
)
1478 class StateMachineError(Exception): pass
1479 class UnknownStateError(StateMachineError
): pass
1480 class DuplicateStateError(StateMachineError
): pass
1481 class UnknownTransitionError(StateMachineError
): pass
1482 class DuplicateTransitionError(StateMachineError
): pass
1483 class TransitionPatternNotFound(StateMachineError
): pass
1484 class TransitionMethodNotFound(StateMachineError
): pass
1485 class UnexpectedIndentationError(StateMachineError
): pass
1488 class TransitionCorrection(Exception):
1491 Raise from within a transition method to switch to another transition.
1493 Raise with one argument, the new transition name.
1497 class StateCorrection(Exception):
1500 Raise from within a transition method to switch to another state.
1502 Raise with one or two arguments: new state name, and an optional new
1507 def string2lines(astring
, tab_width
=8, convert_whitespace
=0,
1508 whitespace
=re
.compile('[\v\f]')):
1510 Return a list of one-line strings with tabs expanded, no newlines, and
1511 trailing whitespace stripped.
1513 Each tab is expanded with between 1 and `tab_width` spaces, so that the
1514 next character's index becomes a multiple of `tab_width` (8 by default).
1518 - `astring`: a multi-line string.
1519 - `tab_width`: the number of columns between tab stops.
1520 - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1522 if convert_whitespace
:
1523 astring
= whitespace
.sub(' ', astring
)
1524 return [s
.expandtabs(tab_width
).rstrip() for s
in astring
.splitlines()]
1526 def _exception_data():
1528 Return exception information:
1530 - the exception's class name;
1531 - the exception object;
1532 - the name of the file containing the offending code;
1533 - the line number of the offending code;
1534 - the function name of the offending code.
1536 type, value
, traceback
= sys
.exc_info()
1537 while traceback
.tb_next
:
1538 traceback
= traceback
.tb_next
1539 code
= traceback
.tb_frame
.f_code
1540 return (type.__name
__, value
, code
.co_filename
, traceback
.tb_lineno
,