Document table_style HTML writer option.
[docutils.git] / docutils / statemachine.py
blobce05d3b74836bf02f7d6080ee103246a1cacbe41
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 A finite state machine specialized for regular-expression-based text filters,
7 this module defines the following classes:
9 - `StateMachine`, a state machine
10 - `State`, a state superclass
11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
12 - `StateWS`, a state superclass for use with `StateMachineWS`
13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
15 - `ViewList`, extends standard Python lists.
16 - `StringList`, string-specific ViewList.
18 Exception classes:
20 - `StateMachineError`
21 - `UnknownStateError`
22 - `DuplicateStateError`
23 - `UnknownTransitionError`
24 - `DuplicateTransitionError`
25 - `TransitionPatternNotFound`
26 - `TransitionMethodNotFound`
27 - `UnexpectedIndentationError`
28 - `TransitionCorrection`: Raised to switch to another transition.
29 - `StateCorrection`: Raised to switch to another state & transition.
31 Functions:
33 - `string2lines()`: split a multi-line string into a list of one-line strings
36 How To Use This Module
37 ======================
38 (See the individual classes, methods, and attributes for details.)
40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
41 You will also need to ``import re``.
43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
44 machine::
46 class MyState(statemachine.State):
48 Within the state's class definition:
50 a) Include a pattern for each transition, in `State.patterns`::
52 patterns = {'atransition': r'pattern', ...}
54 b) Include a list of initial transitions to be set up automatically, in
55 `State.initial_transitions`::
57 initial_transitions = ['atransition', ...]
59 c) Define a method for each transition, with the same name as the
60 transition pattern::
62 def atransition(self, match, context, next_state):
63 # do something
64 result = [...] # a list
65 return context, next_state, result
66 # context, next_state may be altered
68 Transition methods may raise an `EOFError` to cut processing short.
70 d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
71 transition methods, which handle the beginning- and end-of-file.
73 e) In order to handle nested processing, you may wish to override the
74 attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
76 If you are using `StateWS` as a base class, in order to handle nested
77 indented blocks, you may wish to:
79 - override the attributes `StateWS.indent_sm`,
80 `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
81 `StateWS.known_indent_sm_kwargs`;
82 - override the `StateWS.blank()` method; and/or
83 - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
84 and/or `StateWS.firstknown_indent()` methods.
86 3. Create a state machine object::
88 sm = StateMachine(state_classes=[MyState, ...],
89 initial_state='MyState')
91 4. Obtain the input text, which needs to be converted into a tab-free list of
92 one-line strings. For example, to read text from a file called
93 'inputfile'::
95 input_string = open('inputfile').read()
96 input_lines = statemachine.string2lines(input_string)
98 5. Run the state machine on the input text and collect the results, a list::
100 results = sm.run(input_lines)
102 6. Remove any lingering circular references::
104 sm.unlink()
107 __docformat__ = 'restructuredtext'
109 import sys
110 import re
111 import types
112 import unicodedata
115 class StateMachine:
118 A finite state machine for text filters using regular expressions.
120 The input is provided in the form of a list of one-line strings (no
121 newlines). States are subclasses of the `State` class. Transitions consist
122 of regular expression patterns and transition methods, and are defined in
123 each state.
125 The state machine is started with the `run()` method, which returns the
126 results of processing in a list.
129 def __init__(self, state_classes, initial_state, debug=0):
131 Initialize a `StateMachine` object; add state objects.
133 Parameters:
135 - `state_classes`: a list of `State` (sub)classes.
136 - `initial_state`: a string, the class name of the initial state.
137 - `debug`: a boolean; produce verbose output if true (nonzero).
140 self.input_lines = None
141 """`StringList` of input lines (without newlines).
142 Filled by `self.run()`."""
144 self.input_offset = 0
145 """Offset of `self.input_lines` from the beginning of the file."""
147 self.line = None
148 """Current input line."""
150 self.line_offset = -1
151 """Current input line offset from beginning of `self.input_lines`."""
153 self.debug = debug
154 """Debugging mode on/off."""
156 self.initial_state = initial_state
157 """The name of the initial state (key to `self.states`)."""
159 self.current_state = initial_state
160 """The name of the current state (key to `self.states`)."""
162 self.states = {}
163 """Mapping of {state_name: State_object}."""
165 self.add_states(state_classes)
167 self.observers = []
168 """List of bound methods or functions to call whenever the current
169 line changes. Observers are called with one argument, ``self``.
170 Cleared at the end of `run()`."""
172 def unlink(self):
173 """Remove circular references to objects no longer required."""
174 for state in self.states.values():
175 state.unlink()
176 self.states = None
178 def run(self, input_lines, input_offset=0, context=None,
179 input_source=None, initial_state=None):
181 Run the state machine on `input_lines`. Return results (a list).
183 Reset `self.line_offset` and `self.current_state`. Run the
184 beginning-of-file transition. Input one line at a time and check for a
185 matching transition. If a match is found, call the transition method
186 and possibly change the state. Store the context returned by the
187 transition method to be passed on to the next transition matched.
188 Accumulate the results returned by the transition methods in a list.
189 Run the end-of-file transition. Finally, return the accumulated
190 results.
192 Parameters:
194 - `input_lines`: a list of strings without newlines, or `StringList`.
195 - `input_offset`: the line offset of `input_lines` from the beginning
196 of the file.
197 - `context`: application-specific storage.
198 - `input_source`: name or path of source of `input_lines`.
199 - `initial_state`: name of initial state.
201 self.runtime_init()
202 if isinstance(input_lines, StringList):
203 self.input_lines = input_lines
204 else:
205 self.input_lines = StringList(input_lines, source=input_source)
206 self.input_offset = input_offset
207 self.line_offset = -1
208 self.current_state = initial_state or self.initial_state
209 if self.debug:
210 print >>sys.stderr, (
211 '\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
212 % (self.line_offset, '\n| '.join(self.input_lines)))
213 transitions = None
214 results = []
215 state = self.get_state()
216 try:
217 if self.debug:
218 print >>sys.stderr, ('\nStateMachine.run: bof transition')
219 context, result = state.bof(context)
220 results.extend(result)
221 while 1:
222 try:
223 try:
224 self.next_line()
225 if self.debug:
226 source, offset = self.input_lines.info(
227 self.line_offset)
228 print >>sys.stderr, (
229 '\nStateMachine.run: line (source=%r, '
230 'offset=%r):\n| %s'
231 % (source, offset, self.line))
232 context, next_state, result = self.check_line(
233 context, state, transitions)
234 except EOFError:
235 if self.debug:
236 print >>sys.stderr, (
237 '\nStateMachine.run: %s.eof transition'
238 % state.__class__.__name__)
239 result = state.eof(context)
240 results.extend(result)
241 break
242 else:
243 results.extend(result)
244 except TransitionCorrection, exception:
245 self.previous_line() # back up for another try
246 transitions = (exception.args[0],)
247 if self.debug:
248 print >>sys.stderr, (
249 '\nStateMachine.run: TransitionCorrection to '
250 'state "%s", transition %s.'
251 % (state.__class__.__name__, transitions[0]))
252 continue
253 except StateCorrection, exception:
254 self.previous_line() # back up for another try
255 next_state = exception.args[0]
256 if len(exception.args) == 1:
257 transitions = None
258 else:
259 transitions = (exception.args[1],)
260 if self.debug:
261 print >>sys.stderr, (
262 '\nStateMachine.run: StateCorrection to state '
263 '"%s", transition %s.'
264 % (next_state, transitions[0]))
265 else:
266 transitions = None
267 state = self.get_state(next_state)
268 except:
269 if self.debug:
270 self.error()
271 raise
272 self.observers = []
273 return results
275 def get_state(self, next_state=None):
277 Return current state object; set it first if `next_state` given.
279 Parameter `next_state`: a string, the name of the next state.
281 Exception: `UnknownStateError` raised if `next_state` unknown.
283 if next_state:
284 if self.debug and next_state != self.current_state:
285 print >>sys.stderr, \
286 ('\nStateMachine.get_state: Changing state from '
287 '"%s" to "%s" (input line %s).'
288 % (self.current_state, next_state,
289 self.abs_line_number()))
290 self.current_state = next_state
291 try:
292 return self.states[self.current_state]
293 except KeyError:
294 raise UnknownStateError(self.current_state)
296 def next_line(self, n=1):
297 """Load `self.line` with the `n`'th next line and return it."""
298 try:
299 try:
300 self.line_offset += n
301 self.line = self.input_lines[self.line_offset]
302 except IndexError:
303 self.line = None
304 raise EOFError
305 return self.line
306 finally:
307 self.notify_observers()
309 def is_next_line_blank(self):
310 """Return 1 if the next line is blank or non-existant."""
311 try:
312 return not self.input_lines[self.line_offset + 1].strip()
313 except IndexError:
314 return 1
316 def at_eof(self):
317 """Return 1 if the input is at or past end-of-file."""
318 return self.line_offset >= len(self.input_lines) - 1
320 def at_bof(self):
321 """Return 1 if the input is at or before beginning-of-file."""
322 return self.line_offset <= 0
324 def previous_line(self, n=1):
325 """Load `self.line` with the `n`'th previous line and return it."""
326 self.line_offset -= n
327 if self.line_offset < 0:
328 self.line = None
329 else:
330 self.line = self.input_lines[self.line_offset]
331 self.notify_observers()
332 return self.line
334 def goto_line(self, line_offset):
335 """Jump to absolute line offset `line_offset`, load and return it."""
336 try:
337 try:
338 self.line_offset = line_offset - self.input_offset
339 self.line = self.input_lines[self.line_offset]
340 except IndexError:
341 self.line = None
342 raise EOFError
343 return self.line
344 finally:
345 self.notify_observers()
347 def get_source(self, line_offset):
348 """Return source of line at absolute line offset `line_offset`."""
349 return self.input_lines.source(line_offset - self.input_offset)
351 def get_source_spot(self, line_offset=None):
352 """Return dict with source position of current or given line"""
353 if line_offset is None:
354 line_offset = self.line_offset
355 else:
356 line_offset -= self.input_offset
357 (source, offset) = self.input_lines.info(line_offset)
358 return {'source': source, 'line': offset + 1}
360 def abs_line_offset(self):
361 """Return line offset of current line, from beginning of file."""
362 return self.line_offset + self.input_offset
364 def abs_line_number(self):
365 """Return line number of current line (counting from 1)."""
366 return self.line_offset + self.input_offset + 1
368 def insert_input(self, input_lines, source):
369 self.input_lines.insert(self.line_offset + 1, '',
370 source='internal padding after ' + source)
371 self.input_lines.insert(self.line_offset + 1, '',
372 source='internal padding before '+ source)
373 self.input_lines.insert(self.line_offset + 2,
374 StringList(input_lines, source))
376 def get_text_block(self, flush_left=0):
378 Return a contiguous block of text.
380 If `flush_left` is true, raise `UnexpectedIndentationError` if an
381 indented line is encountered before the text block ends (with a blank
382 line).
384 try:
385 block = self.input_lines.get_text_block(self.line_offset,
386 flush_left)
387 self.next_line(len(block) - 1)
388 return block
389 except UnexpectedIndentationError, error:
390 block, source, lineno = error.args
391 self.next_line(len(block) - 1) # advance to last line of block
392 raise
394 def check_line(self, context, state, transitions=None):
396 Examine one line of input for a transition match & execute its method.
398 Parameters:
400 - `context`: application-dependent storage.
401 - `state`: a `State` object, the current state.
402 - `transitions`: an optional ordered list of transition names to try,
403 instead of ``state.transition_order``.
405 Return the values returned by the transition method:
407 - context: possibly modified from the parameter `context`;
408 - next state name (`State` subclass name);
409 - the result output of the transition, a list.
411 When there is no match, ``state.no_match()`` is called and its return
412 value is returned.
414 if transitions is None:
415 transitions = state.transition_order
416 state_correction = None
417 if self.debug:
418 print >>sys.stderr, (
419 '\nStateMachine.check_line: state="%s", transitions=%r.'
420 % (state.__class__.__name__, transitions))
421 for name in transitions:
422 pattern, method, next_state = state.transitions[name]
423 match = pattern.match(self.line)
424 if match:
425 if self.debug:
426 print >>sys.stderr, (
427 '\nStateMachine.check_line: Matched transition '
428 '"%s" in state "%s".'
429 % (name, state.__class__.__name__))
430 return method(match, context, next_state)
431 else:
432 if self.debug:
433 print >>sys.stderr, (
434 '\nStateMachine.check_line: No match in state "%s".'
435 % state.__class__.__name__)
436 return state.no_match(context, transitions)
438 def add_state(self, state_class):
440 Initialize & add a `state_class` (`State` subclass) object.
442 Exception: `DuplicateStateError` raised if `state_class` was already
443 added.
445 statename = state_class.__name__
446 if statename in self.states:
447 raise DuplicateStateError(statename)
448 self.states[statename] = state_class(self, self.debug)
450 def add_states(self, state_classes):
452 Add `state_classes` (a list of `State` subclasses).
454 for state_class in state_classes:
455 self.add_state(state_class)
457 def runtime_init(self):
459 Initialize `self.states`.
461 for state in self.states.values():
462 state.runtime_init()
464 def error(self):
465 """Report error details."""
466 type, value, module, line, function = _exception_data()
467 print >>sys.stderr, '%s: %s' % (type, value)
468 print >>sys.stderr, 'input line %s' % (self.abs_line_number())
469 print >>sys.stderr, ('module %s, line %s, function %s'
470 % (module, line, function))
472 def attach_observer(self, observer):
474 The `observer` parameter is a function or bound method which takes two
475 arguments, the source and offset of the current line.
477 self.observers.append(observer)
479 def detach_observer(self, observer):
480 self.observers.remove(observer)
482 def notify_observers(self):
483 for observer in self.observers:
484 try:
485 info = self.input_lines.info(self.line_offset)
486 except IndexError:
487 info = (None, None)
488 observer(*info)
491 class State:
494 State superclass. Contains a list of transitions, and transition methods.
496 Transition methods all have the same signature. They take 3 parameters:
498 - An `re` match object. ``match.string`` contains the matched input line,
499 ``match.start()`` gives the start index of the match, and
500 ``match.end()`` gives the end index.
501 - A context object, whose meaning is application-defined (initial value
502 ``None``). It can be used to store any information required by the state
503 machine, and the retured context is passed on to the next transition
504 method unchanged.
505 - The name of the next state, a string, taken from the transitions list;
506 normally it is returned unchanged, but it may be altered by the
507 transition method if necessary.
509 Transition methods all return a 3-tuple:
511 - A context object, as (potentially) modified by the transition method.
512 - The next state name (a return value of ``None`` means no state change).
513 - The processing result, a list, which is accumulated by the state
514 machine.
516 Transition methods may raise an `EOFError` to cut processing short.
518 There are two implicit transitions, and corresponding transition methods
519 are defined: `bof()` handles the beginning-of-file, and `eof()` handles
520 the end-of-file. These methods have non-standard signatures and return
521 values. `bof()` returns the initial context and results, and may be used
522 to return a header string, or do any other processing needed. `eof()`
523 should handle any remaining context and wrap things up; it returns the
524 final processing result.
526 Typical applications need only subclass `State` (or a subclass), set the
527 `patterns` and `initial_transitions` class attributes, and provide
528 corresponding transition methods. The default object initialization will
529 take care of constructing the list of transitions.
532 patterns = None
534 {Name: pattern} mapping, used by `make_transition()`. Each pattern may
535 be a string or a compiled `re` pattern. Override in subclasses.
538 initial_transitions = None
540 A list of transitions to initialize when a `State` is instantiated.
541 Each entry is either a transition name string, or a (transition name, next
542 state name) pair. See `make_transitions()`. Override in subclasses.
545 nested_sm = None
547 The `StateMachine` class for handling nested processing.
549 If left as ``None``, `nested_sm` defaults to the class of the state's
550 controlling state machine. Override it in subclasses to avoid the default.
553 nested_sm_kwargs = None
555 Keyword arguments dictionary, passed to the `nested_sm` constructor.
557 Two keys must have entries in the dictionary:
559 - Key 'state_classes' must be set to a list of `State` classes.
560 - Key 'initial_state' must be set to the name of the initial state class.
562 If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
563 class of the current state, and 'initial_state' defaults to the name of
564 the class of the current state. Override in subclasses to avoid the
565 defaults.
568 def __init__(self, state_machine, debug=0):
570 Initialize a `State` object; make & add initial transitions.
572 Parameters:
574 - `statemachine`: the controlling `StateMachine` object.
575 - `debug`: a boolean; produce verbose output if true (nonzero).
578 self.transition_order = []
579 """A list of transition names in search order."""
581 self.transitions = {}
583 A mapping of transition names to 3-tuples containing
584 (compiled_pattern, transition_method, next_state_name). Initialized as
585 an instance attribute dynamically (instead of as a class attribute)
586 because it may make forward references to patterns and methods in this
587 or other classes.
590 self.add_initial_transitions()
592 self.state_machine = state_machine
593 """A reference to the controlling `StateMachine` object."""
595 self.debug = debug
596 """Debugging mode on/off."""
598 if self.nested_sm is None:
599 self.nested_sm = self.state_machine.__class__
600 if self.nested_sm_kwargs is None:
601 self.nested_sm_kwargs = {'state_classes': [self.__class__],
602 'initial_state': self.__class__.__name__}
604 def runtime_init(self):
606 Initialize this `State` before running the state machine; called from
607 `self.state_machine.run()`.
609 pass
611 def unlink(self):
612 """Remove circular references to objects no longer required."""
613 self.state_machine = None
615 def add_initial_transitions(self):
616 """Make and add transitions listed in `self.initial_transitions`."""
617 if self.initial_transitions:
618 names, transitions = self.make_transitions(
619 self.initial_transitions)
620 self.add_transitions(names, transitions)
622 def add_transitions(self, names, transitions):
624 Add a list of transitions to the start of the transition list.
626 Parameters:
628 - `names`: a list of transition names.
629 - `transitions`: a mapping of names to transition tuples.
631 Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
633 for name in names:
634 if name in self.transitions:
635 raise DuplicateTransitionError(name)
636 if name not in transitions:
637 raise UnknownTransitionError(name)
638 self.transition_order[:0] = names
639 self.transitions.update(transitions)
641 def add_transition(self, name, transition):
643 Add a transition to the start of the transition list.
645 Parameter `transition`: a ready-made transition 3-tuple.
647 Exception: `DuplicateTransitionError`.
649 if name in self.transitions:
650 raise DuplicateTransitionError(name)
651 self.transition_order[:0] = [name]
652 self.transitions[name] = transition
654 def remove_transition(self, name):
656 Remove a transition by `name`.
658 Exception: `UnknownTransitionError`.
660 try:
661 del self.transitions[name]
662 self.transition_order.remove(name)
663 except:
664 raise UnknownTransitionError(name)
666 def make_transition(self, name, next_state=None):
668 Make & return a transition tuple based on `name`.
670 This is a convenience function to simplify transition creation.
672 Parameters:
674 - `name`: a string, the name of the transition pattern & method. This
675 `State` object must have a method called '`name`', and a dictionary
676 `self.patterns` containing a key '`name`'.
677 - `next_state`: a string, the name of the next `State` object for this
678 transition. A value of ``None`` (or absent) implies no state change
679 (i.e., continue with the same state).
681 Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
683 if next_state is None:
684 next_state = self.__class__.__name__
685 try:
686 pattern = self.patterns[name]
687 if not hasattr(pattern, 'match'):
688 pattern = re.compile(pattern)
689 except KeyError:
690 raise TransitionPatternNotFound(
691 '%s.patterns[%r]' % (self.__class__.__name__, name))
692 try:
693 method = getattr(self, name)
694 except AttributeError:
695 raise TransitionMethodNotFound(
696 '%s.%s' % (self.__class__.__name__, name))
697 return (pattern, method, next_state)
699 def make_transitions(self, name_list):
701 Return a list of transition names and a transition mapping.
703 Parameter `name_list`: a list, where each entry is either a transition
704 name string, or a 1- or 2-tuple (transition name, optional next state
705 name).
707 stringtype = type('')
708 names = []
709 transitions = {}
710 for namestate in name_list:
711 if type(namestate) is stringtype:
712 transitions[namestate] = self.make_transition(namestate)
713 names.append(namestate)
714 else:
715 transitions[namestate[0]] = self.make_transition(*namestate)
716 names.append(namestate[0])
717 return names, transitions
719 def no_match(self, context, transitions):
721 Called when there is no match from `StateMachine.check_line()`.
723 Return the same values returned by transition methods:
725 - context: unchanged;
726 - next state name: ``None``;
727 - empty result list.
729 Override in subclasses to catch this event.
731 return context, None, []
733 def bof(self, context):
735 Handle beginning-of-file. Return unchanged `context`, empty result.
737 Override in subclasses.
739 Parameter `context`: application-defined storage.
741 return context, []
743 def eof(self, context):
745 Handle end-of-file. Return empty result.
747 Override in subclasses.
749 Parameter `context`: application-defined storage.
751 return []
753 def nop(self, match, context, next_state):
755 A "do nothing" transition method.
757 Return unchanged `context` & `next_state`, empty result. Useful for
758 simple state changes (actionless transitions).
760 return context, next_state, []
763 class StateMachineWS(StateMachine):
766 `StateMachine` subclass specialized for whitespace recognition.
768 There are three methods provided for extracting indented text blocks:
770 - `get_indented()`: use when the indent is unknown.
771 - `get_known_indented()`: use when the indent is known for all lines.
772 - `get_first_known_indented()`: use when only the first line's indent is
773 known.
776 def get_indented(self, until_blank=0, strip_indent=1):
778 Return a block of indented lines of text, and info.
780 Extract an indented block where the indent is unknown for all lines.
782 :Parameters:
783 - `until_blank`: Stop collecting at the first blank line if true
784 (1).
785 - `strip_indent`: Strip common leading indent if true (1,
786 default).
788 :Return:
789 - the indented block (a list of lines of text),
790 - its indent,
791 - its first line offset from BOF, and
792 - whether or not it finished with a blank line.
794 offset = self.abs_line_offset()
795 indented, indent, blank_finish = self.input_lines.get_indented(
796 self.line_offset, until_blank, strip_indent)
797 if indented:
798 self.next_line(len(indented) - 1) # advance to last indented line
799 while indented and not indented[0].strip():
800 indented.trim_start()
801 offset += 1
802 return indented, indent, offset, blank_finish
804 def get_known_indented(self, indent, until_blank=0, strip_indent=1):
806 Return an indented block and info.
808 Extract an indented block where the indent is known for all lines.
809 Starting with the current line, extract the entire text block with at
810 least `indent` indentation (which must be whitespace, except for the
811 first line).
813 :Parameters:
814 - `indent`: The number of indent columns/characters.
815 - `until_blank`: Stop collecting at the first blank line if true
816 (1).
817 - `strip_indent`: Strip `indent` characters of indentation if true
818 (1, default).
820 :Return:
821 - the indented block,
822 - its first line offset from BOF, and
823 - whether or not it finished with a blank line.
825 offset = self.abs_line_offset()
826 indented, indent, blank_finish = self.input_lines.get_indented(
827 self.line_offset, until_blank, strip_indent,
828 block_indent=indent)
829 self.next_line(len(indented) - 1) # advance to last indented line
830 while indented and not indented[0].strip():
831 indented.trim_start()
832 offset += 1
833 return indented, offset, blank_finish
835 def get_first_known_indented(self, indent, until_blank=0, strip_indent=1,
836 strip_top=1):
838 Return an indented block and info.
840 Extract an indented block where the indent is known for the first line
841 and unknown for all other lines.
843 :Parameters:
844 - `indent`: The first line's indent (# of columns/characters).
845 - `until_blank`: Stop collecting at the first blank line if true
846 (1).
847 - `strip_indent`: Strip `indent` characters of indentation if true
848 (1, default).
849 - `strip_top`: Strip blank lines from the beginning of the block.
851 :Return:
852 - the indented block,
853 - its indent,
854 - its first line offset from BOF, and
855 - whether or not it finished with a blank line.
857 offset = self.abs_line_offset()
858 indented, indent, blank_finish = self.input_lines.get_indented(
859 self.line_offset, until_blank, strip_indent,
860 first_indent=indent)
861 self.next_line(len(indented) - 1) # advance to last indented line
862 if strip_top:
863 while indented and not indented[0].strip():
864 indented.trim_start()
865 offset += 1
866 return indented, indent, offset, blank_finish
869 class StateWS(State):
872 State superclass specialized for whitespace (blank lines & indents).
874 Use this class with `StateMachineWS`. The transitions 'blank' (for blank
875 lines) and 'indent' (for indented text blocks) are added automatically,
876 before any other transitions. The transition method `blank()` handles
877 blank lines and `indent()` handles nested indented blocks. Indented
878 blocks trigger a new state machine to be created by `indent()` and run.
879 The class of the state machine to be created is in `indent_sm`, and the
880 constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
882 The methods `known_indent()` and `firstknown_indent()` are provided for
883 indented blocks where the indent (all lines' and first line's only,
884 respectively) is known to the transition method, along with the attributes
885 `known_indent_sm` and `known_indent_sm_kwargs`. Neither transition method
886 is triggered automatically.
889 indent_sm = None
891 The `StateMachine` class handling indented text blocks.
893 If left as ``None``, `indent_sm` defaults to the value of
894 `State.nested_sm`. Override it in subclasses to avoid the default.
897 indent_sm_kwargs = None
899 Keyword arguments dictionary, passed to the `indent_sm` constructor.
901 If left as ``None``, `indent_sm_kwargs` defaults to the value of
902 `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
905 known_indent_sm = None
907 The `StateMachine` class handling known-indented text blocks.
909 If left as ``None``, `known_indent_sm` defaults to the value of
910 `indent_sm`. Override it in subclasses to avoid the default.
913 known_indent_sm_kwargs = None
915 Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
917 If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
918 `indent_sm_kwargs`. Override it in subclasses to avoid the default.
921 ws_patterns = {'blank': ' *$',
922 'indent': ' +'}
923 """Patterns for default whitespace transitions. May be overridden in
924 subclasses."""
926 ws_initial_transitions = ('blank', 'indent')
927 """Default initial whitespace transitions, added before those listed in
928 `State.initial_transitions`. May be overridden in subclasses."""
930 def __init__(self, state_machine, debug=0):
932 Initialize a `StateSM` object; extends `State.__init__()`.
934 Check for indent state machine attributes, set defaults if not set.
936 State.__init__(self, state_machine, debug)
937 if self.indent_sm is None:
938 self.indent_sm = self.nested_sm
939 if self.indent_sm_kwargs is None:
940 self.indent_sm_kwargs = self.nested_sm_kwargs
941 if self.known_indent_sm is None:
942 self.known_indent_sm = self.indent_sm
943 if self.known_indent_sm_kwargs is None:
944 self.known_indent_sm_kwargs = self.indent_sm_kwargs
946 def add_initial_transitions(self):
948 Add whitespace-specific transitions before those defined in subclass.
950 Extends `State.add_initial_transitions()`.
952 State.add_initial_transitions(self)
953 if self.patterns is None:
954 self.patterns = {}
955 self.patterns.update(self.ws_patterns)
956 names, transitions = self.make_transitions(
957 self.ws_initial_transitions)
958 self.add_transitions(names, transitions)
960 def blank(self, match, context, next_state):
961 """Handle blank lines. Does nothing. Override in subclasses."""
962 return self.nop(match, context, next_state)
964 def indent(self, match, context, next_state):
966 Handle an indented text block. Extend or override in subclasses.
968 Recursively run the registered state machine for indented blocks
969 (`self.indent_sm`).
971 indented, indent, line_offset, blank_finish = \
972 self.state_machine.get_indented()
973 sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
974 results = sm.run(indented, input_offset=line_offset)
975 return context, next_state, results
977 def known_indent(self, match, context, next_state):
979 Handle a known-indent text block. Extend or override in subclasses.
981 Recursively run the registered state machine for known-indent indented
982 blocks (`self.known_indent_sm`). The indent is the length of the
983 match, ``match.end()``.
985 indented, line_offset, blank_finish = \
986 self.state_machine.get_known_indented(match.end())
987 sm = self.known_indent_sm(debug=self.debug,
988 **self.known_indent_sm_kwargs)
989 results = sm.run(indented, input_offset=line_offset)
990 return context, next_state, results
992 def first_known_indent(self, match, context, next_state):
994 Handle an indented text block (first line's indent known).
996 Extend or override in subclasses.
998 Recursively run the registered state machine for known-indent indented
999 blocks (`self.known_indent_sm`). The indent is the length of the
1000 match, ``match.end()``.
1002 indented, line_offset, blank_finish = \
1003 self.state_machine.get_first_known_indented(match.end())
1004 sm = self.known_indent_sm(debug=self.debug,
1005 **self.known_indent_sm_kwargs)
1006 results = sm.run(indented, input_offset=line_offset)
1007 return context, next_state, results
1010 class _SearchOverride:
1013 Mix-in class to override `StateMachine` regular expression behavior.
1015 Changes regular expression matching, from the default `re.match()`
1016 (succeeds only if the pattern matches at the start of `self.line`) to
1017 `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1018 When subclassing a `StateMachine`, list this class **first** in the
1019 inheritance list of the class definition.
1022 def match(self, pattern):
1024 Return the result of a regular expression search.
1026 Overrides `StateMachine.match()`.
1028 Parameter `pattern`: `re` compiled regular expression.
1030 return pattern.search(self.line)
1033 class SearchStateMachine(_SearchOverride, StateMachine):
1034 """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1035 pass
1038 class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1039 """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1040 pass
1043 class ViewList:
1046 List with extended functionality: slices of ViewList objects are child
1047 lists, linked to their parents. Changes made to a child list also affect
1048 the parent list. A child list is effectively a "view" (in the SQL sense)
1049 of the parent list. Changes to parent lists, however, do *not* affect
1050 active child lists. If a parent list is changed, any active child lists
1051 should be recreated.
1053 The start and end of the slice can be trimmed using the `trim_start()` and
1054 `trim_end()` methods, without affecting the parent list. The link between
1055 child and parent lists can be broken by calling `disconnect()` on the
1056 child list.
1058 Also, ViewList objects keep track of the source & offset of each item.
1059 This information is accessible via the `source()`, `offset()`, and
1060 `info()` methods.
1063 def __init__(self, initlist=None, source=None, items=None,
1064 parent=None, parent_offset=None):
1065 self.data = []
1066 """The actual list of data, flattened from various sources."""
1068 self.items = []
1069 """A list of (source, offset) pairs, same length as `self.data`: the
1070 source of each line and the offset of each line from the beginning of
1071 its source."""
1073 self.parent = parent
1074 """The parent list."""
1076 self.parent_offset = parent_offset
1077 """Offset of this list from the beginning of the parent list."""
1079 if isinstance(initlist, ViewList):
1080 self.data = initlist.data[:]
1081 self.items = initlist.items[:]
1082 elif initlist is not None:
1083 self.data = list(initlist)
1084 if items:
1085 self.items = items
1086 else:
1087 self.items = [(source, i) for i in range(len(initlist))]
1088 assert len(self.data) == len(self.items), 'data mismatch'
1090 def __str__(self):
1091 return str(self.data)
1093 def __repr__(self):
1094 return '%s(%s, items=%s)' % (self.__class__.__name__,
1095 self.data, self.items)
1097 def __lt__(self, other): return self.data < self.__cast(other)
1098 def __le__(self, other): return self.data <= self.__cast(other)
1099 def __eq__(self, other): return self.data == self.__cast(other)
1100 def __ne__(self, other): return self.data != self.__cast(other)
1101 def __gt__(self, other): return self.data > self.__cast(other)
1102 def __ge__(self, other): return self.data >= self.__cast(other)
1103 def __cmp__(self, other): return cmp(self.data, self.__cast(other))
1105 def __cast(self, other):
1106 if isinstance(other, ViewList):
1107 return other.data
1108 else:
1109 return other
1111 def __contains__(self, item): return item in self.data
1112 def __len__(self): return len(self.data)
1114 # The __getitem__()/__setitem__() methods check whether the index
1115 # is a slice first, since indexing a native list with a slice object
1116 # just works.
1118 def __getitem__(self, i):
1119 if isinstance(i, types.SliceType):
1120 assert i.step in (None, 1), 'cannot handle slice with stride'
1121 return self.__class__(self.data[i.start:i.stop],
1122 items=self.items[i.start:i.stop],
1123 parent=self, parent_offset=i.start or 0)
1124 else:
1125 return self.data[i]
1127 def __setitem__(self, i, item):
1128 if isinstance(i, types.SliceType):
1129 assert i.step in (None, 1), 'cannot handle slice with stride'
1130 if not isinstance(item, ViewList):
1131 raise TypeError('assigning non-ViewList to ViewList slice')
1132 self.data[i.start:i.stop] = item.data
1133 self.items[i.start:i.stop] = item.items
1134 assert len(self.data) == len(self.items), 'data mismatch'
1135 if self.parent:
1136 self.parent[(i.start or 0) + self.parent_offset
1137 : (i.stop or len(self)) + self.parent_offset] = item
1138 else:
1139 self.data[i] = item
1140 if self.parent:
1141 self.parent[i + self.parent_offset] = item
1143 def __delitem__(self, i):
1144 try:
1145 del self.data[i]
1146 del self.items[i]
1147 if self.parent:
1148 del self.parent[i + self.parent_offset]
1149 except TypeError:
1150 assert i.step is None, 'cannot handle slice with stride'
1151 del self.data[i.start:i.stop]
1152 del self.items[i.start:i.stop]
1153 if self.parent:
1154 del self.parent[(i.start or 0) + self.parent_offset
1155 : (i.stop or len(self)) + self.parent_offset]
1157 def __add__(self, other):
1158 if isinstance(other, ViewList):
1159 return self.__class__(self.data + other.data,
1160 items=(self.items + other.items))
1161 else:
1162 raise TypeError('adding non-ViewList to a ViewList')
1164 def __radd__(self, other):
1165 if isinstance(other, ViewList):
1166 return self.__class__(other.data + self.data,
1167 items=(other.items + self.items))
1168 else:
1169 raise TypeError('adding ViewList to a non-ViewList')
1171 def __iadd__(self, other):
1172 if isinstance(other, ViewList):
1173 self.data += other.data
1174 else:
1175 raise TypeError('argument to += must be a ViewList')
1176 return self
1178 def __mul__(self, n):
1179 return self.__class__(self.data * n, items=(self.items * n))
1181 __rmul__ = __mul__
1183 def __imul__(self, n):
1184 self.data *= n
1185 self.items *= n
1186 return self
1188 def extend(self, other):
1189 if not isinstance(other, ViewList):
1190 raise TypeError('extending a ViewList with a non-ViewList')
1191 if self.parent:
1192 self.parent.insert(len(self.data) + self.parent_offset, other)
1193 self.data.extend(other.data)
1194 self.items.extend(other.items)
1196 def append(self, item, source=None, offset=0):
1197 if source is None:
1198 self.extend(item)
1199 else:
1200 if self.parent:
1201 self.parent.insert(len(self.data) + self.parent_offset, item,
1202 source, offset)
1203 self.data.append(item)
1204 self.items.append((source, offset))
1206 def insert(self, i, item, source=None, offset=0):
1207 if source is None:
1208 if not isinstance(item, ViewList):
1209 raise TypeError('inserting non-ViewList with no source given')
1210 self.data[i:i] = item.data
1211 self.items[i:i] = item.items
1212 if self.parent:
1213 index = (len(self.data) + i) % len(self.data)
1214 self.parent.insert(index + self.parent_offset, item)
1215 else:
1216 self.data.insert(i, item)
1217 self.items.insert(i, (source, offset))
1218 if self.parent:
1219 index = (len(self.data) + i) % len(self.data)
1220 self.parent.insert(index + self.parent_offset, item,
1221 source, offset)
1223 def pop(self, i=-1):
1224 if self.parent:
1225 index = (len(self.data) + i) % len(self.data)
1226 self.parent.pop(index + self.parent_offset)
1227 self.items.pop(i)
1228 return self.data.pop(i)
1230 def trim_start(self, n=1):
1232 Remove items from the start of the list, without touching the parent.
1234 if n > len(self.data):
1235 raise IndexError("Size of trim too large; can't trim %s items "
1236 "from a list of size %s." % (n, len(self.data)))
1237 elif n < 0:
1238 raise IndexError('Trim size must be >= 0.')
1239 del self.data[:n]
1240 del self.items[:n]
1241 if self.parent:
1242 self.parent_offset += n
1244 def trim_end(self, n=1):
1246 Remove items from the end of the list, without touching the parent.
1248 if n > len(self.data):
1249 raise IndexError("Size of trim too large; can't trim %s items "
1250 "from a list of size %s." % (n, len(self.data)))
1251 elif n < 0:
1252 raise IndexError('Trim size must be >= 0.')
1253 del self.data[-n:]
1254 del self.items[-n:]
1256 def remove(self, item):
1257 index = self.index(item)
1258 del self[index]
1260 def count(self, item): return self.data.count(item)
1261 def index(self, item): return self.data.index(item)
1263 def reverse(self):
1264 self.data.reverse()
1265 self.items.reverse()
1266 self.parent = None
1268 def sort(self, *args):
1269 tmp = zip(self.data, self.items)
1270 tmp.sort(*args)
1271 self.data = [entry[0] for entry in tmp]
1272 self.items = [entry[1] for entry in tmp]
1273 self.parent = None
1275 def info(self, i):
1276 """Return source & offset for index `i`."""
1277 try:
1278 return self.items[i]
1279 except IndexError:
1280 if i == len(self.data): # Just past the end
1281 return self.items[i - 1][0], None
1282 else:
1283 raise
1285 def source(self, i):
1286 """Return source for index `i`."""
1287 return self.info(i)[0]
1289 def offset(self, i):
1290 """Return offset for index `i`."""
1291 return self.info(i)[1]
1293 def disconnect(self):
1294 """Break link between this list and parent list."""
1295 self.parent = None
1298 class StringList(ViewList):
1300 """A `ViewList` with string-specific methods."""
1302 def trim_left(self, length, start=0, end=sys.maxint):
1304 Trim `length` characters off the beginning of each item, in-place,
1305 from index `start` to `end`. No whitespace-checking is done on the
1306 trimmed text. Does not affect slice parent.
1308 self.data[start:end] = [line[length:]
1309 for line in self.data[start:end]]
1311 def get_text_block(self, start, flush_left=0):
1313 Return a contiguous block of text.
1315 If `flush_left` is true, raise `UnexpectedIndentationError` if an
1316 indented line is encountered before the text block ends (with a blank
1317 line).
1319 end = start
1320 last = len(self.data)
1321 while end < last:
1322 line = self.data[end]
1323 if not line.strip():
1324 break
1325 if flush_left and (line[0] == ' '):
1326 source, offset = self.info(end)
1327 raise UnexpectedIndentationError(self[start:end], source,
1328 offset + 1)
1329 end += 1
1330 return self[start:end]
1332 def get_indented(self, start=0, until_blank=0, strip_indent=1,
1333 block_indent=None, first_indent=None):
1335 Extract and return a StringList of indented lines of text.
1337 Collect all lines with indentation, determine the minimum indentation,
1338 remove the minimum indentation from all indented lines (unless
1339 `strip_indent` is false), and return them. All lines up to but not
1340 including the first unindented line will be returned.
1342 :Parameters:
1343 - `start`: The index of the first line to examine.
1344 - `until_blank`: Stop collecting at the first blank line if true.
1345 - `strip_indent`: Strip common leading indent if true (default).
1346 - `block_indent`: The indent of the entire block, if known.
1347 - `first_indent`: The indent of the first line, if known.
1349 :Return:
1350 - a StringList of indented lines with mininum indent removed;
1351 - the amount of the indent;
1352 - a boolean: did the indented block finish with a blank line or EOF?
1354 indent = block_indent # start with None if unknown
1355 end = start
1356 if block_indent is not None and first_indent is None:
1357 first_indent = block_indent
1358 if first_indent is not None:
1359 end += 1
1360 last = len(self.data)
1361 while end < last:
1362 line = self.data[end]
1363 if line and (line[0] != ' '
1364 or (block_indent is not None
1365 and line[:block_indent].strip())):
1366 # Line not indented or insufficiently indented.
1367 # Block finished properly iff the last indented line blank:
1368 blank_finish = ((end > start)
1369 and not self.data[end - 1].strip())
1370 break
1371 stripped = line.lstrip()
1372 if not stripped: # blank line
1373 if until_blank:
1374 blank_finish = 1
1375 break
1376 elif block_indent is None:
1377 line_indent = len(line) - len(stripped)
1378 if indent is None:
1379 indent = line_indent
1380 else:
1381 indent = min(indent, line_indent)
1382 end += 1
1383 else:
1384 blank_finish = 1 # block ends at end of lines
1385 block = self[start:end]
1386 if first_indent is not None and block:
1387 block.data[0] = block.data[0][first_indent:]
1388 if indent and strip_indent:
1389 block.trim_left(indent, start=(first_indent is not None))
1390 return block, indent or 0, blank_finish
1392 def get_2D_block(self, top, left, bottom, right, strip_indent=1):
1393 block = self[top:bottom]
1394 indent = right
1395 for i in range(len(block.data)):
1396 block.data[i] = line = block.data[i][left:right].rstrip()
1397 if line:
1398 indent = min(indent, len(line) - len(line.lstrip()))
1399 if strip_indent and 0 < indent < right:
1400 block.data = [line[indent:] for line in block.data]
1401 return block
1403 def pad_double_width(self, pad_char):
1405 Pad all double-width characters in self by appending `pad_char` to each.
1406 For East Asian language support.
1408 if hasattr(unicodedata, 'east_asian_width'):
1409 east_asian_width = unicodedata.east_asian_width
1410 else:
1411 return # new in Python 2.4
1412 for i in range(len(self.data)):
1413 line = self.data[i]
1414 if isinstance(line, unicode):
1415 new = []
1416 for char in line:
1417 new.append(char)
1418 if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1419 new.append(pad_char)
1420 self.data[i] = ''.join(new)
1422 def replace(self, old, new):
1423 """Replace all occurrences of substring `old` with `new`."""
1424 for i in range(len(self.data)):
1425 self.data[i] = self.data[i].replace(old, new)
1428 class StateMachineError(Exception): pass
1429 class UnknownStateError(StateMachineError): pass
1430 class DuplicateStateError(StateMachineError): pass
1431 class UnknownTransitionError(StateMachineError): pass
1432 class DuplicateTransitionError(StateMachineError): pass
1433 class TransitionPatternNotFound(StateMachineError): pass
1434 class TransitionMethodNotFound(StateMachineError): pass
1435 class UnexpectedIndentationError(StateMachineError): pass
1438 class TransitionCorrection(Exception):
1441 Raise from within a transition method to switch to another transition.
1443 Raise with one argument, the new transition name.
1447 class StateCorrection(Exception):
1450 Raise from within a transition method to switch to another state.
1452 Raise with one or two arguments: new state name, and an optional new
1453 transition name.
1457 def string2lines(astring, tab_width=8, convert_whitespace=0,
1458 whitespace=re.compile('[\v\f]')):
1460 Return a list of one-line strings with tabs expanded, no newlines, and
1461 trailing whitespace stripped.
1463 Each tab is expanded with between 1 and `tab_width` spaces, so that the
1464 next character's index becomes a multiple of `tab_width` (8 by default).
1466 Parameters:
1468 - `astring`: a multi-line string.
1469 - `tab_width`: the number of columns between tab stops.
1470 - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1472 if convert_whitespace:
1473 astring = whitespace.sub(' ', astring)
1474 return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1476 def _exception_data():
1478 Return exception information:
1480 - the exception's class name;
1481 - the exception object;
1482 - the name of the file containing the offending code;
1483 - the line number of the offending code;
1484 - the function name of the offending code.
1486 type, value, traceback = sys.exc_info()
1487 while traceback.tb_next:
1488 traceback = traceback.tb_next
1489 code = traceback.tb_frame.f_code
1490 return (type.__name__, value, code.co_filename, traceback.tb_lineno,
1491 code.co_name)