2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 A finite state machine specialized for regular-expression-based text filters,
7 this module defines the following classes:
9 - `StateMachine`, a state machine
10 - `State`, a state superclass
11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
12 - `StateWS`, a state superclass for use with `StateMachineWS`
13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
15 - `ViewList`, extends standard Python lists.
16 - `StringList`, string-specific ViewList.
22 - `DuplicateStateError`
23 - `UnknownTransitionError`
24 - `DuplicateTransitionError`
25 - `TransitionPatternNotFound`
26 - `TransitionMethodNotFound`
27 - `UnexpectedIndentationError`
28 - `TransitionCorrection`: Raised to switch to another transition.
29 - `StateCorrection`: Raised to switch to another state & transition.
33 - `string2lines()`: split a multi-line string into a list of one-line strings
36 How To Use This Module
37 ======================
38 (See the individual classes, methods, and attributes for details.)
40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
41 You will also need to ``import re``.
43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
46 class MyState(statemachine.State):
48 Within the state's class definition:
50 a) Include a pattern for each transition, in `State.patterns`::
52 patterns = {'atransition': r'pattern', ...}
54 b) Include a list of initial transitions to be set up automatically, in
55 `State.initial_transitions`::
57 initial_transitions = ['atransition', ...]
59 c) Define a method for each transition, with the same name as the
62 def atransition(self, match, context, next_state):
64 result = [...] # a list
65 return context, next_state, result
66 # context, next_state may be altered
68 Transition methods may raise an `EOFError` to cut processing short.
70 d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
71 transition methods, which handle the beginning- and end-of-file.
73 e) In order to handle nested processing, you may wish to override the
74 attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
76 If you are using `StateWS` as a base class, in order to handle nested
77 indented blocks, you may wish to:
79 - override the attributes `StateWS.indent_sm`,
80 `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
81 `StateWS.known_indent_sm_kwargs`;
82 - override the `StateWS.blank()` method; and/or
83 - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
84 and/or `StateWS.firstknown_indent()` methods.
86 3. Create a state machine object::
88 sm = StateMachine(state_classes=[MyState, ...],
89 initial_state='MyState')
91 4. Obtain the input text, which needs to be converted into a tab-free list of
92 one-line strings. For example, to read text from a file called
95 input_string = open('inputfile').read()
96 input_lines = statemachine.string2lines(input_string)
98 5. Run the state machine on the input text and collect the results, a list::
100 results = sm.run(input_lines)
102 6. Remove any lingering circular references::
107 __docformat__
= 'restructuredtext'
113 from docutils
.error_reporting
import ErrorOutput
118 A finite state machine for text filters using regular expressions.
120 The input is provided in the form of a list of one-line strings (no
121 newlines). States are subclasses of the `State` class. Transitions consist
122 of regular expression patterns and transition methods, and are defined in
125 The state machine is started with the `run()` method, which returns the
126 results of processing in a list.
129 def __init__(self
, state_classes
, initial_state
, debug
=0):
131 Initialize a `StateMachine` object; add state objects.
135 - `state_classes`: a list of `State` (sub)classes.
136 - `initial_state`: a string, the class name of the initial state.
137 - `debug`: a boolean; produce verbose output if true (nonzero).
140 self
.input_lines
= None
141 """`StringList` of input lines (without newlines).
142 Filled by `self.run()`."""
144 self
.input_offset
= 0
145 """Offset of `self.input_lines` from the beginning of the file."""
148 """Current input line."""
150 self
.line_offset
= -1
151 """Current input line offset from beginning of `self.input_lines`."""
154 """Debugging mode on/off."""
156 self
.initial_state
= initial_state
157 """The name of the initial state (key to `self.states`)."""
159 self
.current_state
= initial_state
160 """The name of the current state (key to `self.states`)."""
163 """Mapping of {state_name: State_object}."""
165 self
.add_states(state_classes
)
168 """List of bound methods or functions to call whenever the current
169 line changes. Observers are called with one argument, ``self``.
170 Cleared at the end of `run()`."""
172 self
._stderr
= ErrorOutput()
173 """Wrapper around sys.stderr catching en-/decoding errors"""
177 """Remove circular references to objects no longer required."""
178 for state
in self
.states
.values():
182 def run(self
, input_lines
, input_offset
=0, context
=None,
183 input_source
=None, initial_state
=None):
185 Run the state machine on `input_lines`. Return results (a list).
187 Reset `self.line_offset` and `self.current_state`. Run the
188 beginning-of-file transition. Input one line at a time and check for a
189 matching transition. If a match is found, call the transition method
190 and possibly change the state. Store the context returned by the
191 transition method to be passed on to the next transition matched.
192 Accumulate the results returned by the transition methods in a list.
193 Run the end-of-file transition. Finally, return the accumulated
198 - `input_lines`: a list of strings without newlines, or `StringList`.
199 - `input_offset`: the line offset of `input_lines` from the beginning
201 - `context`: application-specific storage.
202 - `input_source`: name or path of source of `input_lines`.
203 - `initial_state`: name of initial state.
206 if isinstance(input_lines
, StringList
):
207 self
.input_lines
= input_lines
209 self
.input_lines
= StringList(input_lines
, source
=input_source
)
210 self
.input_offset
= input_offset
211 self
.line_offset
= -1
212 self
.current_state
= initial_state
or self
.initial_state
214 print >>self
._stderr
, (
215 u
'\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
216 % (self
.line_offset
, u
'\n| '.join(self
.input_lines
)))
219 state
= self
.get_state()
222 print >>self
._stderr
, '\nStateMachine.run: bof transition'
223 context
, result
= state
.bof(context
)
224 results
.extend(result
)
230 source
, offset
= self
.input_lines
.info(
232 print >>self
._stderr
, (
233 u
'\nStateMachine.run: line (source=%r, '
235 % (source
, offset
, self
.line
))
236 context
, next_state
, result
= self
.check_line(
237 context
, state
, transitions
)
240 print >>self
._stderr
, (
241 '\nStateMachine.run: %s.eof transition'
242 % state
.__class
__.__name
__)
243 result
= state
.eof(context
)
244 results
.extend(result
)
247 results
.extend(result
)
248 except TransitionCorrection
, exception
:
249 self
.previous_line() # back up for another try
250 transitions
= (exception
.args
[0],)
252 print >>self
._stderr
, (
253 '\nStateMachine.run: TransitionCorrection to '
254 'state "%s", transition %s.'
255 % (state
.__class
__.__name
__, transitions
[0]))
257 except StateCorrection
, exception
:
258 self
.previous_line() # back up for another try
259 next_state
= exception
.args
[0]
260 if len(exception
.args
) == 1:
263 transitions
= (exception
.args
[1],)
265 print >>self
._stderr
, (
266 '\nStateMachine.run: StateCorrection to state '
267 '"%s", transition %s.'
268 % (next_state
, transitions
[0]))
271 state
= self
.get_state(next_state
)
279 def get_state(self
, next_state
=None):
281 Return current state object; set it first if `next_state` given.
283 Parameter `next_state`: a string, the name of the next state.
285 Exception: `UnknownStateError` raised if `next_state` unknown.
288 if self
.debug
and next_state
!= self
.current_state
:
289 print >>self
._stderr
, (
290 '\nStateMachine.get_state: Changing state from '
291 '"%s" to "%s" (input line %s).'
292 % (self
.current_state
, next_state
,
293 self
.abs_line_number()))
294 self
.current_state
= next_state
296 return self
.states
[self
.current_state
]
298 raise UnknownStateError(self
.current_state
)
300 def next_line(self
, n
=1):
301 """Load `self.line` with the `n`'th next line and return it."""
304 self
.line_offset
+= n
305 self
.line
= self
.input_lines
[self
.line_offset
]
311 self
.notify_observers()
313 def is_next_line_blank(self
):
314 """Return 1 if the next line is blank or non-existant."""
316 return not self
.input_lines
[self
.line_offset
+ 1].strip()
321 """Return 1 if the input is at or past end-of-file."""
322 return self
.line_offset
>= len(self
.input_lines
) - 1
325 """Return 1 if the input is at or before beginning-of-file."""
326 return self
.line_offset
<= 0
328 def previous_line(self
, n
=1):
329 """Load `self.line` with the `n`'th previous line and return it."""
330 self
.line_offset
-= n
331 if self
.line_offset
< 0:
334 self
.line
= self
.input_lines
[self
.line_offset
]
335 self
.notify_observers()
338 def goto_line(self
, line_offset
):
339 """Jump to absolute line offset `line_offset`, load and return it."""
342 self
.line_offset
= line_offset
- self
.input_offset
343 self
.line
= self
.input_lines
[self
.line_offset
]
349 self
.notify_observers()
351 def get_source(self
, line_offset
):
352 """Return source of line at absolute line offset `line_offset`."""
353 return self
.input_lines
.source(line_offset
- self
.input_offset
)
355 def abs_line_offset(self
):
356 """Return line offset of current line, from beginning of file."""
357 return self
.line_offset
+ self
.input_offset
359 def abs_line_number(self
):
360 """Return line number of current line (counting from 1)."""
361 return self
.line_offset
+ self
.input_offset
+ 1
363 def get_source_and_line(self
, lineno
=None):
364 """Return (source, line) tuple for current or given line number.
366 Looks up the source and line number in the `self.input_lines`
367 StringList instance to count for included source files.
369 If the optional argument `lineno` is given, convert it from an
370 absolute line number to the corresponding (source, line) pair.
373 offset
= self
.line_offset
375 offset
= lineno
- self
.input_offset
- 1
377 src
, srcoffset
= self
.input_lines
.info(offset
)
378 srcline
= srcoffset
+ 1
380 # line is None if index is "Just past the end"
381 src
, srcline
= self
.get_source_and_line(offset
+ self
.input_offset
)
382 return src
, srcline
+ 1
383 except (IndexError): # `offset` is off the list
384 src
, srcline
= None, None
385 # raise AssertionError('cannot find line %d in %s lines' %
386 # (offset, len(self.input_lines)))
387 # # list(self.input_lines.lines())))
388 # assert offset == srcoffset, str(self.input_lines)
389 # print "get_source_and_line(%s):" % lineno,
390 # print offset + 1, '->', src, srcline
391 # print self.input_lines
392 return (src
, srcline
)
394 def insert_input(self
, input_lines
, source
):
395 self
.input_lines
.insert(self
.line_offset
+ 1, '',
396 source
='internal padding after '+source
,
397 offset
=len(input_lines
))
398 self
.input_lines
.insert(self
.line_offset
+ 1, '',
399 source
='internal padding before '+source
,
401 self
.input_lines
.insert(self
.line_offset
+ 2,
402 StringList(input_lines
, source
))
404 def get_text_block(self
, flush_left
=0):
406 Return a contiguous block of text.
408 If `flush_left` is true, raise `UnexpectedIndentationError` if an
409 indented line is encountered before the text block ends (with a blank
413 block
= self
.input_lines
.get_text_block(self
.line_offset
,
415 self
.next_line(len(block
) - 1)
417 except UnexpectedIndentationError
, error
:
418 block
, source
, lineno
= error
.args
419 self
.next_line(len(block
) - 1) # advance to last line of block
422 def check_line(self
, context
, state
, transitions
=None):
424 Examine one line of input for a transition match & execute its method.
428 - `context`: application-dependent storage.
429 - `state`: a `State` object, the current state.
430 - `transitions`: an optional ordered list of transition names to try,
431 instead of ``state.transition_order``.
433 Return the values returned by the transition method:
435 - context: possibly modified from the parameter `context`;
436 - next state name (`State` subclass name);
437 - the result output of the transition, a list.
439 When there is no match, ``state.no_match()`` is called and its return
442 if transitions
is None:
443 transitions
= state
.transition_order
444 state_correction
= None
446 print >>self
._stderr
, (
447 '\nStateMachine.check_line: state="%s", transitions=%r.'
448 % (state
.__class
__.__name
__, transitions
))
449 for name
in transitions
:
450 pattern
, method
, next_state
= state
.transitions
[name
]
451 match
= pattern
.match(self
.line
)
454 print >>self
._stderr
, (
455 '\nStateMachine.check_line: Matched transition '
456 '"%s" in state "%s".'
457 % (name
, state
.__class
__.__name
__))
458 return method(match
, context
, next_state
)
461 print >>self
._stderr
, (
462 '\nStateMachine.check_line: No match in state "%s".'
463 % state
.__class
__.__name
__)
464 return state
.no_match(context
, transitions
)
466 def add_state(self
, state_class
):
468 Initialize & add a `state_class` (`State` subclass) object.
470 Exception: `DuplicateStateError` raised if `state_class` was already
473 statename
= state_class
.__name
__
474 if statename
in self
.states
:
475 raise DuplicateStateError(statename
)
476 self
.states
[statename
] = state_class(self
, self
.debug
)
478 def add_states(self
, state_classes
):
480 Add `state_classes` (a list of `State` subclasses).
482 for state_class
in state_classes
:
483 self
.add_state(state_class
)
485 def runtime_init(self
):
487 Initialize `self.states`.
489 for state
in self
.states
.values():
493 """Report error details."""
494 type, value
, module
, line
, function
= _exception_data()
495 print >>self
._stderr
, u
'%s: %s' % (type, value
)
496 print >>self
._stderr
, 'input line %s' % (self
.abs_line_number())
497 print >>self
._stderr
, (u
'module %s, line %s, function %s' %
498 (module
, line
, function
))
500 def attach_observer(self
, observer
):
502 The `observer` parameter is a function or bound method which takes two
503 arguments, the source and offset of the current line.
505 self
.observers
.append(observer
)
507 def detach_observer(self
, observer
):
508 self
.observers
.remove(observer
)
510 def notify_observers(self
):
511 for observer
in self
.observers
:
513 info
= self
.input_lines
.info(self
.line_offset
)
522 State superclass. Contains a list of transitions, and transition methods.
524 Transition methods all have the same signature. They take 3 parameters:
526 - An `re` match object. ``match.string`` contains the matched input line,
527 ``match.start()`` gives the start index of the match, and
528 ``match.end()`` gives the end index.
529 - A context object, whose meaning is application-defined (initial value
530 ``None``). It can be used to store any information required by the state
531 machine, and the retured context is passed on to the next transition
533 - The name of the next state, a string, taken from the transitions list;
534 normally it is returned unchanged, but it may be altered by the
535 transition method if necessary.
537 Transition methods all return a 3-tuple:
539 - A context object, as (potentially) modified by the transition method.
540 - The next state name (a return value of ``None`` means no state change).
541 - The processing result, a list, which is accumulated by the state
544 Transition methods may raise an `EOFError` to cut processing short.
546 There are two implicit transitions, and corresponding transition methods
547 are defined: `bof()` handles the beginning-of-file, and `eof()` handles
548 the end-of-file. These methods have non-standard signatures and return
549 values. `bof()` returns the initial context and results, and may be used
550 to return a header string, or do any other processing needed. `eof()`
551 should handle any remaining context and wrap things up; it returns the
552 final processing result.
554 Typical applications need only subclass `State` (or a subclass), set the
555 `patterns` and `initial_transitions` class attributes, and provide
556 corresponding transition methods. The default object initialization will
557 take care of constructing the list of transitions.
562 {Name: pattern} mapping, used by `make_transition()`. Each pattern may
563 be a string or a compiled `re` pattern. Override in subclasses.
566 initial_transitions
= None
568 A list of transitions to initialize when a `State` is instantiated.
569 Each entry is either a transition name string, or a (transition name, next
570 state name) pair. See `make_transitions()`. Override in subclasses.
575 The `StateMachine` class for handling nested processing.
577 If left as ``None``, `nested_sm` defaults to the class of the state's
578 controlling state machine. Override it in subclasses to avoid the default.
581 nested_sm_kwargs
= None
583 Keyword arguments dictionary, passed to the `nested_sm` constructor.
585 Two keys must have entries in the dictionary:
587 - Key 'state_classes' must be set to a list of `State` classes.
588 - Key 'initial_state' must be set to the name of the initial state class.
590 If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
591 class of the current state, and 'initial_state' defaults to the name of
592 the class of the current state. Override in subclasses to avoid the
596 def __init__(self
, state_machine
, debug
=0):
598 Initialize a `State` object; make & add initial transitions.
602 - `statemachine`: the controlling `StateMachine` object.
603 - `debug`: a boolean; produce verbose output if true (nonzero).
606 self
.transition_order
= []
607 """A list of transition names in search order."""
609 self
.transitions
= {}
611 A mapping of transition names to 3-tuples containing
612 (compiled_pattern, transition_method, next_state_name). Initialized as
613 an instance attribute dynamically (instead of as a class attribute)
614 because it may make forward references to patterns and methods in this
618 self
.add_initial_transitions()
620 self
.state_machine
= state_machine
621 """A reference to the controlling `StateMachine` object."""
624 """Debugging mode on/off."""
626 if self
.nested_sm
is None:
627 self
.nested_sm
= self
.state_machine
.__class
__
628 if self
.nested_sm_kwargs
is None:
629 self
.nested_sm_kwargs
= {'state_classes': [self
.__class
__],
630 'initial_state': self
.__class
__.__name
__}
632 def runtime_init(self
):
634 Initialize this `State` before running the state machine; called from
635 `self.state_machine.run()`.
640 """Remove circular references to objects no longer required."""
641 self
.state_machine
= None
643 def add_initial_transitions(self
):
644 """Make and add transitions listed in `self.initial_transitions`."""
645 if self
.initial_transitions
:
646 names
, transitions
= self
.make_transitions(
647 self
.initial_transitions
)
648 self
.add_transitions(names
, transitions
)
650 def add_transitions(self
, names
, transitions
):
652 Add a list of transitions to the start of the transition list.
656 - `names`: a list of transition names.
657 - `transitions`: a mapping of names to transition tuples.
659 Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
662 if name
in self
.transitions
:
663 raise DuplicateTransitionError(name
)
664 if name
not in transitions
:
665 raise UnknownTransitionError(name
)
666 self
.transition_order
[:0] = names
667 self
.transitions
.update(transitions
)
669 def add_transition(self
, name
, transition
):
671 Add a transition to the start of the transition list.
673 Parameter `transition`: a ready-made transition 3-tuple.
675 Exception: `DuplicateTransitionError`.
677 if name
in self
.transitions
:
678 raise DuplicateTransitionError(name
)
679 self
.transition_order
[:0] = [name
]
680 self
.transitions
[name
] = transition
682 def remove_transition(self
, name
):
684 Remove a transition by `name`.
686 Exception: `UnknownTransitionError`.
689 del self
.transitions
[name
]
690 self
.transition_order
.remove(name
)
692 raise UnknownTransitionError(name
)
694 def make_transition(self
, name
, next_state
=None):
696 Make & return a transition tuple based on `name`.
698 This is a convenience function to simplify transition creation.
702 - `name`: a string, the name of the transition pattern & method. This
703 `State` object must have a method called '`name`', and a dictionary
704 `self.patterns` containing a key '`name`'.
705 - `next_state`: a string, the name of the next `State` object for this
706 transition. A value of ``None`` (or absent) implies no state change
707 (i.e., continue with the same state).
709 Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
711 if next_state
is None:
712 next_state
= self
.__class
__.__name
__
714 pattern
= self
.patterns
[name
]
715 if not hasattr(pattern
, 'match'):
716 pattern
= re
.compile(pattern
)
718 raise TransitionPatternNotFound(
719 '%s.patterns[%r]' % (self
.__class
__.__name
__, name
))
721 method
= getattr(self
, name
)
722 except AttributeError:
723 raise TransitionMethodNotFound(
724 '%s.%s' % (self
.__class
__.__name
__, name
))
725 return (pattern
, method
, next_state
)
727 def make_transitions(self
, name_list
):
729 Return a list of transition names and a transition mapping.
731 Parameter `name_list`: a list, where each entry is either a transition
732 name string, or a 1- or 2-tuple (transition name, optional next state
735 stringtype
= type('')
738 for namestate
in name_list
:
739 if type(namestate
) is stringtype
:
740 transitions
[namestate
] = self
.make_transition(namestate
)
741 names
.append(namestate
)
743 transitions
[namestate
[0]] = self
.make_transition(*namestate
)
744 names
.append(namestate
[0])
745 return names
, transitions
747 def no_match(self
, context
, transitions
):
749 Called when there is no match from `StateMachine.check_line()`.
751 Return the same values returned by transition methods:
753 - context: unchanged;
754 - next state name: ``None``;
757 Override in subclasses to catch this event.
759 return context
, None, []
761 def bof(self
, context
):
763 Handle beginning-of-file. Return unchanged `context`, empty result.
765 Override in subclasses.
767 Parameter `context`: application-defined storage.
771 def eof(self
, context
):
773 Handle end-of-file. Return empty result.
775 Override in subclasses.
777 Parameter `context`: application-defined storage.
781 def nop(self
, match
, context
, next_state
):
783 A "do nothing" transition method.
785 Return unchanged `context` & `next_state`, empty result. Useful for
786 simple state changes (actionless transitions).
788 return context
, next_state
, []
791 class StateMachineWS(StateMachine
):
794 `StateMachine` subclass specialized for whitespace recognition.
796 There are three methods provided for extracting indented text blocks:
798 - `get_indented()`: use when the indent is unknown.
799 - `get_known_indented()`: use when the indent is known for all lines.
800 - `get_first_known_indented()`: use when only the first line's indent is
804 def get_indented(self
, until_blank
=0, strip_indent
=1):
806 Return a block of indented lines of text, and info.
808 Extract an indented block where the indent is unknown for all lines.
811 - `until_blank`: Stop collecting at the first blank line if true
813 - `strip_indent`: Strip common leading indent if true (1,
817 - the indented block (a list of lines of text),
819 - its first line offset from BOF, and
820 - whether or not it finished with a blank line.
822 offset
= self
.abs_line_offset()
823 indented
, indent
, blank_finish
= self
.input_lines
.get_indented(
824 self
.line_offset
, until_blank
, strip_indent
)
826 self
.next_line(len(indented
) - 1) # advance to last indented line
827 while indented
and not indented
[0].strip():
828 indented
.trim_start()
830 return indented
, indent
, offset
, blank_finish
832 def get_known_indented(self
, indent
, until_blank
=0, strip_indent
=1):
834 Return an indented block and info.
836 Extract an indented block where the indent is known for all lines.
837 Starting with the current line, extract the entire text block with at
838 least `indent` indentation (which must be whitespace, except for the
842 - `indent`: The number of indent columns/characters.
843 - `until_blank`: Stop collecting at the first blank line if true
845 - `strip_indent`: Strip `indent` characters of indentation if true
849 - the indented block,
850 - its first line offset from BOF, and
851 - whether or not it finished with a blank line.
853 offset
= self
.abs_line_offset()
854 indented
, indent
, blank_finish
= self
.input_lines
.get_indented(
855 self
.line_offset
, until_blank
, strip_indent
,
857 self
.next_line(len(indented
) - 1) # advance to last indented line
858 while indented
and not indented
[0].strip():
859 indented
.trim_start()
861 return indented
, offset
, blank_finish
863 def get_first_known_indented(self
, indent
, until_blank
=0, strip_indent
=1,
866 Return an indented block and info.
868 Extract an indented block where the indent is known for the first line
869 and unknown for all other lines.
872 - `indent`: The first line's indent (# of columns/characters).
873 - `until_blank`: Stop collecting at the first blank line if true
875 - `strip_indent`: Strip `indent` characters of indentation if true
877 - `strip_top`: Strip blank lines from the beginning of the block.
880 - the indented block,
882 - its first line offset from BOF, and
883 - whether or not it finished with a blank line.
885 offset
= self
.abs_line_offset()
886 indented
, indent
, blank_finish
= self
.input_lines
.get_indented(
887 self
.line_offset
, until_blank
, strip_indent
,
889 self
.next_line(len(indented
) - 1) # advance to last indented line
891 while indented
and not indented
[0].strip():
892 indented
.trim_start()
894 return indented
, indent
, offset
, blank_finish
897 class StateWS(State
):
900 State superclass specialized for whitespace (blank lines & indents).
902 Use this class with `StateMachineWS`. The transitions 'blank' (for blank
903 lines) and 'indent' (for indented text blocks) are added automatically,
904 before any other transitions. The transition method `blank()` handles
905 blank lines and `indent()` handles nested indented blocks. Indented
906 blocks trigger a new state machine to be created by `indent()` and run.
907 The class of the state machine to be created is in `indent_sm`, and the
908 constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
910 The methods `known_indent()` and `firstknown_indent()` are provided for
911 indented blocks where the indent (all lines' and first line's only,
912 respectively) is known to the transition method, along with the attributes
913 `known_indent_sm` and `known_indent_sm_kwargs`. Neither transition method
914 is triggered automatically.
919 The `StateMachine` class handling indented text blocks.
921 If left as ``None``, `indent_sm` defaults to the value of
922 `State.nested_sm`. Override it in subclasses to avoid the default.
925 indent_sm_kwargs
= None
927 Keyword arguments dictionary, passed to the `indent_sm` constructor.
929 If left as ``None``, `indent_sm_kwargs` defaults to the value of
930 `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
933 known_indent_sm
= None
935 The `StateMachine` class handling known-indented text blocks.
937 If left as ``None``, `known_indent_sm` defaults to the value of
938 `indent_sm`. Override it in subclasses to avoid the default.
941 known_indent_sm_kwargs
= None
943 Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
945 If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
946 `indent_sm_kwargs`. Override it in subclasses to avoid the default.
949 ws_patterns
= {'blank': ' *$',
951 """Patterns for default whitespace transitions. May be overridden in
954 ws_initial_transitions
= ('blank', 'indent')
955 """Default initial whitespace transitions, added before those listed in
956 `State.initial_transitions`. May be overridden in subclasses."""
958 def __init__(self
, state_machine
, debug
=0):
960 Initialize a `StateSM` object; extends `State.__init__()`.
962 Check for indent state machine attributes, set defaults if not set.
964 State
.__init
__(self
, state_machine
, debug
)
965 if self
.indent_sm
is None:
966 self
.indent_sm
= self
.nested_sm
967 if self
.indent_sm_kwargs
is None:
968 self
.indent_sm_kwargs
= self
.nested_sm_kwargs
969 if self
.known_indent_sm
is None:
970 self
.known_indent_sm
= self
.indent_sm
971 if self
.known_indent_sm_kwargs
is None:
972 self
.known_indent_sm_kwargs
= self
.indent_sm_kwargs
974 def add_initial_transitions(self
):
976 Add whitespace-specific transitions before those defined in subclass.
978 Extends `State.add_initial_transitions()`.
980 State
.add_initial_transitions(self
)
981 if self
.patterns
is None:
983 self
.patterns
.update(self
.ws_patterns
)
984 names
, transitions
= self
.make_transitions(
985 self
.ws_initial_transitions
)
986 self
.add_transitions(names
, transitions
)
988 def blank(self
, match
, context
, next_state
):
989 """Handle blank lines. Does nothing. Override in subclasses."""
990 return self
.nop(match
, context
, next_state
)
992 def indent(self
, match
, context
, next_state
):
994 Handle an indented text block. Extend or override in subclasses.
996 Recursively run the registered state machine for indented blocks
999 indented
, indent
, line_offset
, blank_finish
= \
1000 self
.state_machine
.get_indented()
1001 sm
= self
.indent_sm(debug
=self
.debug
, **self
.indent_sm_kwargs
)
1002 results
= sm
.run(indented
, input_offset
=line_offset
)
1003 return context
, next_state
, results
1005 def known_indent(self
, match
, context
, next_state
):
1007 Handle a known-indent text block. Extend or override in subclasses.
1009 Recursively run the registered state machine for known-indent indented
1010 blocks (`self.known_indent_sm`). The indent is the length of the
1011 match, ``match.end()``.
1013 indented
, line_offset
, blank_finish
= \
1014 self
.state_machine
.get_known_indented(match
.end())
1015 sm
= self
.known_indent_sm(debug
=self
.debug
,
1016 **self
.known_indent_sm_kwargs
)
1017 results
= sm
.run(indented
, input_offset
=line_offset
)
1018 return context
, next_state
, results
1020 def first_known_indent(self
, match
, context
, next_state
):
1022 Handle an indented text block (first line's indent known).
1024 Extend or override in subclasses.
1026 Recursively run the registered state machine for known-indent indented
1027 blocks (`self.known_indent_sm`). The indent is the length of the
1028 match, ``match.end()``.
1030 indented
, line_offset
, blank_finish
= \
1031 self
.state_machine
.get_first_known_indented(match
.end())
1032 sm
= self
.known_indent_sm(debug
=self
.debug
,
1033 **self
.known_indent_sm_kwargs
)
1034 results
= sm
.run(indented
, input_offset
=line_offset
)
1035 return context
, next_state
, results
1038 class _SearchOverride
:
1041 Mix-in class to override `StateMachine` regular expression behavior.
1043 Changes regular expression matching, from the default `re.match()`
1044 (succeeds only if the pattern matches at the start of `self.line`) to
1045 `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1046 When subclassing a `StateMachine`, list this class **first** in the
1047 inheritance list of the class definition.
1050 def match(self
, pattern
):
1052 Return the result of a regular expression search.
1054 Overrides `StateMachine.match()`.
1056 Parameter `pattern`: `re` compiled regular expression.
1058 return pattern
.search(self
.line
)
1061 class SearchStateMachine(_SearchOverride
, StateMachine
):
1062 """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1066 class SearchStateMachineWS(_SearchOverride
, StateMachineWS
):
1067 """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1074 List with extended functionality: slices of ViewList objects are child
1075 lists, linked to their parents. Changes made to a child list also affect
1076 the parent list. A child list is effectively a "view" (in the SQL sense)
1077 of the parent list. Changes to parent lists, however, do *not* affect
1078 active child lists. If a parent list is changed, any active child lists
1079 should be recreated.
1081 The start and end of the slice can be trimmed using the `trim_start()` and
1082 `trim_end()` methods, without affecting the parent list. The link between
1083 child and parent lists can be broken by calling `disconnect()` on the
1086 Also, ViewList objects keep track of the source & offset of each item.
1087 This information is accessible via the `source()`, `offset()`, and
1091 def __init__(self
, initlist
=None, source
=None, items
=None,
1092 parent
=None, parent_offset
=None):
1094 """The actual list of data, flattened from various sources."""
1097 """A list of (source, offset) pairs, same length as `self.data`: the
1098 source of each line and the offset of each line from the beginning of
1101 self
.parent
= parent
1102 """The parent list."""
1104 self
.parent_offset
= parent_offset
1105 """Offset of this list from the beginning of the parent list."""
1107 if isinstance(initlist
, ViewList
):
1108 self
.data
= initlist
.data
[:]
1109 self
.items
= initlist
.items
[:]
1110 elif initlist
is not None:
1111 self
.data
= list(initlist
)
1115 self
.items
= [(source
, i
) for i
in range(len(initlist
))]
1116 assert len(self
.data
) == len(self
.items
), 'data mismatch'
1119 return str(self
.data
)
1122 return '%s(%s, items=%s)' % (self
.__class
__.__name
__,
1123 self
.data
, self
.items
)
1125 def __lt__(self
, other
): return self
.data
< self
.__cast
(other
)
1126 def __le__(self
, other
): return self
.data
<= self
.__cast
(other
)
1127 def __eq__(self
, other
): return self
.data
== self
.__cast
(other
)
1128 def __ne__(self
, other
): return self
.data
!= self
.__cast
(other
)
1129 def __gt__(self
, other
): return self
.data
> self
.__cast
(other
)
1130 def __ge__(self
, other
): return self
.data
>= self
.__cast
(other
)
1131 def __cmp__(self
, other
): return cmp(self
.data
, self
.__cast
(other
))
1133 def __cast(self
, other
):
1134 if isinstance(other
, ViewList
):
1139 def __contains__(self
, item
): return item
in self
.data
1140 def __len__(self
): return len(self
.data
)
1142 # The __getitem__()/__setitem__() methods check whether the index
1143 # is a slice first, since indexing a native list with a slice object
1146 def __getitem__(self
, i
):
1147 if isinstance(i
, types
.SliceType
):
1148 assert i
.step
in (None, 1), 'cannot handle slice with stride'
1149 return self
.__class
__(self
.data
[i
.start
:i
.stop
],
1150 items
=self
.items
[i
.start
:i
.stop
],
1151 parent
=self
, parent_offset
=i
.start
or 0)
1155 def __setitem__(self
, i
, item
):
1156 if isinstance(i
, types
.SliceType
):
1157 assert i
.step
in (None, 1), 'cannot handle slice with stride'
1158 if not isinstance(item
, ViewList
):
1159 raise TypeError('assigning non-ViewList to ViewList slice')
1160 self
.data
[i
.start
:i
.stop
] = item
.data
1161 self
.items
[i
.start
:i
.stop
] = item
.items
1162 assert len(self
.data
) == len(self
.items
), 'data mismatch'
1164 self
.parent
[(i
.start
or 0) + self
.parent_offset
1165 : (i
.stop
or len(self
)) + self
.parent_offset
] = item
1169 self
.parent
[i
+ self
.parent_offset
] = item
1171 def __delitem__(self
, i
):
1176 del self
.parent
[i
+ self
.parent_offset
]
1178 assert i
.step
is None, 'cannot handle slice with stride'
1179 del self
.data
[i
.start
:i
.stop
]
1180 del self
.items
[i
.start
:i
.stop
]
1182 del self
.parent
[(i
.start
or 0) + self
.parent_offset
1183 : (i
.stop
or len(self
)) + self
.parent_offset
]
1185 def __add__(self
, other
):
1186 if isinstance(other
, ViewList
):
1187 return self
.__class
__(self
.data
+ other
.data
,
1188 items
=(self
.items
+ other
.items
))
1190 raise TypeError('adding non-ViewList to a ViewList')
1192 def __radd__(self
, other
):
1193 if isinstance(other
, ViewList
):
1194 return self
.__class
__(other
.data
+ self
.data
,
1195 items
=(other
.items
+ self
.items
))
1197 raise TypeError('adding ViewList to a non-ViewList')
1199 def __iadd__(self
, other
):
1200 if isinstance(other
, ViewList
):
1201 self
.data
+= other
.data
1203 raise TypeError('argument to += must be a ViewList')
1206 def __mul__(self
, n
):
1207 return self
.__class
__(self
.data
* n
, items
=(self
.items
* n
))
1211 def __imul__(self
, n
):
1216 def extend(self
, other
):
1217 if not isinstance(other
, ViewList
):
1218 raise TypeError('extending a ViewList with a non-ViewList')
1220 self
.parent
.insert(len(self
.data
) + self
.parent_offset
, other
)
1221 self
.data
.extend(other
.data
)
1222 self
.items
.extend(other
.items
)
1224 def append(self
, item
, source
=None, offset
=0):
1229 self
.parent
.insert(len(self
.data
) + self
.parent_offset
, item
,
1231 self
.data
.append(item
)
1232 self
.items
.append((source
, offset
))
1234 def insert(self
, i
, item
, source
=None, offset
=0):
1236 if not isinstance(item
, ViewList
):
1237 raise TypeError('inserting non-ViewList with no source given')
1238 self
.data
[i
:i
] = item
.data
1239 self
.items
[i
:i
] = item
.items
1241 index
= (len(self
.data
) + i
) % len(self
.data
)
1242 self
.parent
.insert(index
+ self
.parent_offset
, item
)
1244 self
.data
.insert(i
, item
)
1245 self
.items
.insert(i
, (source
, offset
))
1247 index
= (len(self
.data
) + i
) % len(self
.data
)
1248 self
.parent
.insert(index
+ self
.parent_offset
, item
,
1251 def pop(self
, i
=-1):
1253 index
= (len(self
.data
) + i
) % len(self
.data
)
1254 self
.parent
.pop(index
+ self
.parent_offset
)
1256 return self
.data
.pop(i
)
1258 def trim_start(self
, n
=1):
1260 Remove items from the start of the list, without touching the parent.
1262 if n
> len(self
.data
):
1263 raise IndexError("Size of trim too large; can't trim %s items "
1264 "from a list of size %s." % (n
, len(self
.data
)))
1266 raise IndexError('Trim size must be >= 0.')
1270 self
.parent_offset
+= n
1272 def trim_end(self
, n
=1):
1274 Remove items from the end of the list, without touching the parent.
1276 if n
> len(self
.data
):
1277 raise IndexError("Size of trim too large; can't trim %s items "
1278 "from a list of size %s." % (n
, len(self
.data
)))
1280 raise IndexError('Trim size must be >= 0.')
1284 def remove(self
, item
):
1285 index
= self
.index(item
)
1288 def count(self
, item
): return self
.data
.count(item
)
1289 def index(self
, item
): return self
.data
.index(item
)
1293 self
.items
.reverse()
1296 def sort(self
, *args
):
1297 tmp
= zip(self
.data
, self
.items
)
1299 self
.data
= [entry
[0] for entry
in tmp
]
1300 self
.items
= [entry
[1] for entry
in tmp
]
1304 """Return source & offset for index `i`."""
1306 return self
.items
[i
]
1308 if i
== len(self
.data
): # Just past the end
1309 return self
.items
[i
- 1][0], None
1313 def source(self
, i
):
1314 """Return source for index `i`."""
1315 return self
.info(i
)[0]
1317 def offset(self
, i
):
1318 """Return offset for index `i`."""
1319 return self
.info(i
)[1]
1321 def disconnect(self
):
1322 """Break link between this list and parent list."""
1326 """Return iterator yielding (source, offset, value) tuples."""
1327 for (value
, (source
, offset
)) in zip(self
.data
, self
.items
):
1328 yield (source
, offset
, value
)
1331 """Print the list in `grep` format (`source:offset:value` lines)"""
1332 for line
in self
.xitems():
1333 print "%s:%d:%s" % line
1336 class StringList(ViewList
):
1338 """A `ViewList` with string-specific methods."""
1340 def trim_left(self
, length
, start
=0, end
=sys
.maxint
):
1342 Trim `length` characters off the beginning of each item, in-place,
1343 from index `start` to `end`. No whitespace-checking is done on the
1344 trimmed text. Does not affect slice parent.
1346 self
.data
[start
:end
] = [line
[length
:]
1347 for line
in self
.data
[start
:end
]]
1349 def get_text_block(self
, start
, flush_left
=0):
1351 Return a contiguous block of text.
1353 If `flush_left` is true, raise `UnexpectedIndentationError` if an
1354 indented line is encountered before the text block ends (with a blank
1358 last
= len(self
.data
)
1360 line
= self
.data
[end
]
1361 if not line
.strip():
1363 if flush_left
and (line
[0] == ' '):
1364 source
, offset
= self
.info(end
)
1365 raise UnexpectedIndentationError(self
[start
:end
], source
,
1368 return self
[start
:end
]
1370 def get_indented(self
, start
=0, until_blank
=0, strip_indent
=1,
1371 block_indent
=None, first_indent
=None):
1373 Extract and return a StringList of indented lines of text.
1375 Collect all lines with indentation, determine the minimum indentation,
1376 remove the minimum indentation from all indented lines (unless
1377 `strip_indent` is false), and return them. All lines up to but not
1378 including the first unindented line will be returned.
1381 - `start`: The index of the first line to examine.
1382 - `until_blank`: Stop collecting at the first blank line if true.
1383 - `strip_indent`: Strip common leading indent if true (default).
1384 - `block_indent`: The indent of the entire block, if known.
1385 - `first_indent`: The indent of the first line, if known.
1388 - a StringList of indented lines with mininum indent removed;
1389 - the amount of the indent;
1390 - a boolean: did the indented block finish with a blank line or EOF?
1392 indent
= block_indent
# start with None if unknown
1394 if block_indent
is not None and first_indent
is None:
1395 first_indent
= block_indent
1396 if first_indent
is not None:
1398 last
= len(self
.data
)
1400 line
= self
.data
[end
]
1401 if line
and (line
[0] != ' '
1402 or (block_indent
is not None
1403 and line
[:block_indent
].strip())):
1404 # Line not indented or insufficiently indented.
1405 # Block finished properly iff the last indented line blank:
1406 blank_finish
= ((end
> start
)
1407 and not self
.data
[end
- 1].strip())
1409 stripped
= line
.lstrip()
1410 if not stripped
: # blank line
1414 elif block_indent
is None:
1415 line_indent
= len(line
) - len(stripped
)
1417 indent
= line_indent
1419 indent
= min(indent
, line_indent
)
1422 blank_finish
= 1 # block ends at end of lines
1423 block
= self
[start
:end
]
1424 if first_indent
is not None and block
:
1425 block
.data
[0] = block
.data
[0][first_indent
:]
1426 if indent
and strip_indent
:
1427 block
.trim_left(indent
, start
=(first_indent
is not None))
1428 return block
, indent
or 0, blank_finish
1430 def get_2D_block(self
, top
, left
, bottom
, right
, strip_indent
=1):
1431 block
= self
[top
:bottom
]
1433 for i
in range(len(block
.data
)):
1434 block
.data
[i
] = line
= block
.data
[i
][left
:right
].rstrip()
1436 indent
= min(indent
, len(line
) - len(line
.lstrip()))
1437 if strip_indent
and 0 < indent
< right
:
1438 block
.data
= [line
[indent
:] for line
in block
.data
]
1441 def pad_double_width(self
, pad_char
):
1443 Pad all double-width characters in self by appending `pad_char` to each.
1444 For East Asian language support.
1446 if hasattr(unicodedata
, 'east_asian_width'):
1447 east_asian_width
= unicodedata
.east_asian_width
1449 return # new in Python 2.4
1450 for i
in range(len(self
.data
)):
1452 if isinstance(line
, unicode):
1456 if east_asian_width(char
) in 'WF': # 'W'ide & 'F'ull-width
1457 new
.append(pad_char
)
1458 self
.data
[i
] = ''.join(new
)
1460 def replace(self
, old
, new
):
1461 """Replace all occurrences of substring `old` with `new`."""
1462 for i
in range(len(self
.data
)):
1463 self
.data
[i
] = self
.data
[i
].replace(old
, new
)
1466 class StateMachineError(Exception): pass
1467 class UnknownStateError(StateMachineError
): pass
1468 class DuplicateStateError(StateMachineError
): pass
1469 class UnknownTransitionError(StateMachineError
): pass
1470 class DuplicateTransitionError(StateMachineError
): pass
1471 class TransitionPatternNotFound(StateMachineError
): pass
1472 class TransitionMethodNotFound(StateMachineError
): pass
1473 class UnexpectedIndentationError(StateMachineError
): pass
1476 class TransitionCorrection(Exception):
1479 Raise from within a transition method to switch to another transition.
1481 Raise with one argument, the new transition name.
1485 class StateCorrection(Exception):
1488 Raise from within a transition method to switch to another state.
1490 Raise with one or two arguments: new state name, and an optional new
1495 def string2lines(astring
, tab_width
=8, convert_whitespace
=0,
1496 whitespace
=re
.compile('[\v\f]')):
1498 Return a list of one-line strings with tabs expanded, no newlines, and
1499 trailing whitespace stripped.
1501 Each tab is expanded with between 1 and `tab_width` spaces, so that the
1502 next character's index becomes a multiple of `tab_width` (8 by default).
1506 - `astring`: a multi-line string.
1507 - `tab_width`: the number of columns between tab stops.
1508 - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1510 if convert_whitespace
:
1511 astring
= whitespace
.sub(' ', astring
)
1512 return [s
.expandtabs(tab_width
).rstrip() for s
in astring
.splitlines()]
1514 def _exception_data():
1516 Return exception information:
1518 - the exception's class name;
1519 - the exception object;
1520 - the name of the file containing the offending code;
1521 - the line number of the offending code;
1522 - the function name of the offending code.
1524 type, value
, traceback
= sys
.exc_info()
1525 while traceback
.tb_next
:
1526 traceback
= traceback
.tb_next
1527 code
= traceback
.tb_frame
.f_code
1528 return (type.__name
__, value
, code
.co_filename
, traceback
.tb_lineno
,