docutils/statemachine.py

   1  # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 A finite state machine specialized for regular-expression-based text filters,
   7 this module defines the following classes:
   8
   9 - `StateMachine`, a state machine
  10 - `State`, a state superclass
  11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
  12 - `StateWS`, a state superclass for use with `StateMachineWS`
  13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
  14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
  15 - `ViewList`, extends standard Python lists.
  16 - `StringList`, string-specific ViewList.
  17
  18 Exception classes:
  19
  20 - `StateMachineError`
  21 - `UnknownStateError`
  22 - `DuplicateStateError`
  23 - `UnknownTransitionError`
  24 - `DuplicateTransitionError`
  25 - `TransitionPatternNotFound`
  26 - `TransitionMethodNotFound`
  27 - `UnexpectedIndentationError`
  28 - `TransitionCorrection`: Raised to switch to another transition.
  29 - `StateCorrection`: Raised to switch to another state & transition.
  30
  31 Functions:
  32
  33 - `string2lines()`: split a multi-line string into a list of one-line strings
  34
  35
  36 How To Use This Module
  37 ======================
  38 (See the individual classes, methods, and attributes for details.)
  39
  40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
  41    You will also need to ``import re``.
  42
  43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
  44    machine::
  45
  46        class MyState(statemachine.State):
  47
  48    Within the state's class definition:
  49
  50    a) Include a pattern for each transition, in `State.patterns`::
  51
  52           patterns = {'atransition': r'pattern', ...}
  53
  54    b) Include a list of initial transitions to be set up automatically, in
  55       `State.initial_transitions`::
  56
  57           initial_transitions = ['atransition', ...]
  58
  59    c) Define a method for each transition, with the same name as the
  60       transition pattern::
  61
  62           def atransition(self, match, context, next_state):
  63               # do something
  64               result = [...]  # a list
  65               return context, next_state, result
  66               # context, next_state may be altered
  67
  68       Transition methods may raise an `EOFError` to cut processing short.
  69
  70    d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
  71       transition methods, which handle the beginning- and end-of-file.
  72
  73    e) In order to handle nested processing, you may wish to override the
  74       attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
  75
  76       If you are using `StateWS` as a base class, in order to handle nested
  77       indented blocks, you may wish to:
  78
  79       - override the attributes `StateWS.indent_sm`,
  80         `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
  81         `StateWS.known_indent_sm_kwargs`;
  82       - override the `StateWS.blank()` method; and/or
  83       - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
  84         and/or `StateWS.firstknown_indent()` methods.
  85
  86 3. Create a state machine object::
  87
  88        sm = StateMachine(state_classes=[MyState, ...],
  89                          initial_state='MyState')
  90
  91 4. Obtain the input text, which needs to be converted into a tab-free list of
  92    one-line strings. For example, to read text from a file called
  93    'inputfile'::
  94
  95        input_string = open('inputfile').read()
  96        input_lines = statemachine.string2lines(input_string)
  97
  98 5. Run the state machine on the input text and collect the results, a list::
  99
 100        results = sm.run(input_lines)
 101
 102 6. Remove any lingering circular references::
 103
 104        sm.unlink()
 105 """
 106
 107 __docformat__ = 'restructuredtext'
 108
 109 import sys
 110 import re
 111 import types
 112 import unicodedata
 113 from docutils.error_reporting import ErrorOutput
 114
 115 class StateMachine:
 116
 117     """
 118     A finite state machine for text filters using regular expressions.
 119
 120     The input is provided in the form of a list of one-line strings (no
 121     newlines). States are subclasses of the `State` class. Transitions consist
 122     of regular expression patterns and transition methods, and are defined in
 123     each state.
 124
 125     The state machine is started with the `run()` method, which returns the
 126     results of processing in a list.
 127     """
 128
 129     def __init__(self, state_classes, initial_state, debug=0):
 130         """
 131         Initialize a `StateMachine` object; add state objects.
 132
 133         Parameters:
 134
 135         - `state_classes`: a list of `State` (sub)classes.
 136         - `initial_state`: a string, the class name of the initial state.
 137         - `debug`: a boolean; produce verbose output if true (nonzero).
 138         """
 139
 140         self.input_lines = None
 141         """`StringList` of input lines (without newlines).
 142         Filled by `self.run()`."""
 143
 144         self.input_offset = 0
 145         """Offset of `self.input_lines` from the beginning of the file."""
 146
 147         self.line = None
 148         """Current input line."""
 149
 150         self.line_offset = -1
 151         """Current input line offset from beginning of `self.input_lines`."""
 152
 153         self.debug = debug
 154         """Debugging mode on/off."""
 155
 156         self.initial_state = initial_state
 157         """The name of the initial state (key to `self.states`)."""
 158
 159         self.current_state = initial_state
 160         """The name of the current state (key to `self.states`)."""
 161
 162         self.states = {}
 163         """Mapping of {state_name: State_object}."""
 164
 165         self.add_states(state_classes)
 166
 167         self.observers = []
 168         """List of bound methods or functions to call whenever the current
 169         line changes.  Observers are called with one argument, ``self``.
 170         Cleared at the end of `run()`."""
 171
 172         self._stderr = ErrorOutput()
 173         """Wrapper around sys.stderr catching en-/decoding errors"""
 174
 175
 176     def unlink(self):
 177         """Remove circular references to objects no longer required."""
 178         for state in self.states.values():
 179             state.unlink()
 180         self.states = None
 181
 182     def run(self, input_lines, input_offset=0, context=None,
 183             input_source=None, initial_state=None):
 184         """
 185         Run the state machine on `input_lines`. Return results (a list).
 186
 187         Reset `self.line_offset` and `self.current_state`. Run the
 188         beginning-of-file transition. Input one line at a time and check for a
 189         matching transition. If a match is found, call the transition method
 190         and possibly change the state. Store the context returned by the
 191         transition method to be passed on to the next transition matched.
 192         Accumulate the results returned by the transition methods in a list.
 193         Run the end-of-file transition. Finally, return the accumulated
 194         results.
 195
 196         Parameters:
 197
 198         - `input_lines`: a list of strings without newlines, or `StringList`.
 199         - `input_offset`: the line offset of `input_lines` from the beginning
 200           of the file.
 201         - `context`: application-specific storage.
 202         - `input_source`: name or path of source of `input_lines`.
 203         - `initial_state`: name of initial state.
 204         """
 205         self.runtime_init()
 206         if isinstance(input_lines, StringList):
 207             self.input_lines = input_lines
 208         else:
 209             self.input_lines = StringList(input_lines, source=input_source)
 210         self.input_offset = input_offset
 211         self.line_offset = -1
 212         self.current_state = initial_state or self.initial_state
 213         if self.debug:
 214             print >>self._stderr, (
 215                 u'\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
 216                 % (self.line_offset, u'\n| '.join(self.input_lines)))
 217         transitions = None
 218         results = []
 219         state = self.get_state()
 220         try:
 221             if self.debug:
 222                 print >>self._stderr, '\nStateMachine.run: bof transition'
 223             context, result = state.bof(context)
 224             results.extend(result)
 225             while 1:
 226                 try:
 227                     try:
 228                         self.next_line()
 229                         if self.debug:
 230                             source, offset = self.input_lines.info(
 231                                 self.line_offset)
 232                             print >>self._stderr, (
 233                                 u'\nStateMachine.run: line (source=%r, '
 234                                 u'offset=%r):\n| %s'
 235                                 % (source, offset, self.line))
 236                         context, next_state, result = self.check_line(
 237                             context, state, transitions)
 238                     except EOFError:
 239                         if self.debug:
 240                             print >>self._stderr, (
 241                                 '\nStateMachine.run: %s.eof transition'
 242                                 % state.__class__.__name__)
 243                         result = state.eof(context)
 244                         results.extend(result)
 245                         break
 246                     else:
 247                         results.extend(result)
 248                 except TransitionCorrection, exception:
 249                     self.previous_line() # back up for another try
 250                     transitions = (exception.args[0],)
 251                     if self.debug:
 252                         print >>self._stderr, (
 253                               '\nStateMachine.run: TransitionCorrection to '
 254                               'state "%s", transition %s.'
 255                               % (state.__class__.__name__, transitions[0]))
 256                     continue
 257                 except StateCorrection, exception:
 258                     self.previous_line() # back up for another try
 259                     next_state = exception.args[0]
 260                     if len(exception.args) == 1:
 261                         transitions = None
 262                     else:
 263                         transitions = (exception.args[1],)
 264                     if self.debug:
 265                         print >>self._stderr, (
 266                               '\nStateMachine.run: StateCorrection to state '
 267                               '"%s", transition %s.'
 268                               % (next_state, transitions[0]))
 269                 else:
 270                     transitions = None
 271                 state = self.get_state(next_state)
 272         except:
 273             if self.debug:
 274                 self.error()
 275             raise
 276         self.observers = []
 277         return results
 278
 279     def get_state(self, next_state=None):
 280         """
 281         Return current state object; set it first if `next_state` given.
 282
 283         Parameter `next_state`: a string, the name of the next state.
 284
 285         Exception: `UnknownStateError` raised if `next_state` unknown.
 286         """
 287         if next_state:
 288             if self.debug and next_state != self.current_state:
 289                 print >>self._stderr, (
 290                     '\nStateMachine.get_state: Changing state from '
 291                     '"%s" to "%s" (input line %s).'
 292                     % (self.current_state, next_state,
 293                        self.abs_line_number()))
 294             self.current_state = next_state
 295         try:
 296             return self.states[self.current_state]
 297         except KeyError:
 298             raise UnknownStateError(self.current_state)
 299
 300     def next_line(self, n=1):
 301         """Load `self.line` with the `n`'th next line and return it."""
 302         try:
 303             try:
 304                 self.line_offset += n
 305                 self.line = self.input_lines[self.line_offset]
 306             except IndexError:
 307                 self.line = None
 308                 raise EOFError
 309             return self.line
 310         finally:
 311             self.notify_observers()
 312
 313     def is_next_line_blank(self):
 314         """Return 1 if the next line is blank or non-existant."""
 315         try:
 316             return not self.input_lines[self.line_offset + 1].strip()
 317         except IndexError:
 318             return 1
 319
 320     def at_eof(self):
 321         """Return 1 if the input is at or past end-of-file."""
 322         return self.line_offset >= len(self.input_lines) - 1
 323
 324     def at_bof(self):
 325         """Return 1 if the input is at or before beginning-of-file."""
 326         return self.line_offset <= 0
 327
 328     def previous_line(self, n=1):
 329         """Load `self.line` with the `n`'th previous line and return it."""
 330         self.line_offset -= n
 331         if self.line_offset < 0:
 332             self.line = None
 333         else:
 334             self.line = self.input_lines[self.line_offset]
 335         self.notify_observers()
 336         return self.line
 337
 338     def goto_line(self, line_offset):
 339         """Jump to absolute line offset `line_offset`, load and return it."""
 340         try:
 341             try:
 342                 self.line_offset = line_offset - self.input_offset
 343                 self.line = self.input_lines[self.line_offset]
 344             except IndexError:
 345                 self.line = None
 346                 raise EOFError
 347             return self.line
 348         finally:
 349             self.notify_observers()
 350
 351     def get_source(self, line_offset):
 352         """Return source of line at absolute line offset `line_offset`."""
 353         return self.input_lines.source(line_offset - self.input_offset)
 354
 355     def abs_line_offset(self):
 356         """Return line offset of current line, from beginning of file."""
 357         return self.line_offset + self.input_offset
 358
 359     def abs_line_number(self):
 360         """Return line number of current line (counting from 1)."""
 361         return self.line_offset + self.input_offset + 1
 362
 363     def get_source_and_line(self, lineno=None):
 364         """Return (source, line) tuple for current or given line number.
 365
 366         Looks up the source and line number in the `self.input_lines`
 367         StringList instance to count for included source files.
 368
 369         If the optional argument `lineno` is given, convert it from an
 370         absolute line number to the corresponding (source, line) pair.
 371         """
 372         if lineno is None:
 373             offset = self.line_offset
 374         else:
 375             offset = lineno - self.input_offset - 1
 376         try:
 377             src, srcoffset = self.input_lines.info(offset)
 378             srcline = srcoffset + 1
 379         except (TypeError):
 380             # line is None if index is "Just past the end"
 381             src, srcline = self.get_source_and_line(offset + self.input_offset)
 382             return src, srcline + 1
 383         except (IndexError): # `offset` is off the list
 384             src, srcline = None, None
 385             # raise AssertionError('cannot find line %d in %s lines' %
 386             #                      (offset, len(self.input_lines)))
 387             #                      # list(self.input_lines.lines())))
 388         # assert offset == srcoffset, str(self.input_lines)
 389         # print "get_source_and_line(%s):" % lineno,
 390         # print offset + 1, '->', src, srcline
 391         # print self.input_lines
 392         return (src, srcline)
 393
 394     def insert_input(self, input_lines, source):
 395         self.input_lines.insert(self.line_offset + 1, '',
 396                                 source='internal padding after '+source,
 397                                 offset=len(input_lines))
 398         self.input_lines.insert(self.line_offset + 1, '',
 399                                 source='internal padding before '+source,
 400                                 offset=-1)
 401         self.input_lines.insert(self.line_offset + 2,
 402                                 StringList(input_lines, source))
 403
 404     def get_text_block(self, flush_left=0):
 405         """
 406         Return a contiguous block of text.
 407
 408         If `flush_left` is true, raise `UnexpectedIndentationError` if an
 409         indented line is encountered before the text block ends (with a blank
 410         line).
 411         """
 412         try:
 413             block = self.input_lines.get_text_block(self.line_offset,
 414                                                     flush_left)
 415             self.next_line(len(block) - 1)
 416             return block
 417         except UnexpectedIndentationError, error:
 418             block, source, lineno = error.args
 419             self.next_line(len(block) - 1) # advance to last line of block
 420             raise
 421
 422     def check_line(self, context, state, transitions=None):
 423         """
 424         Examine one line of input for a transition match & execute its method.
 425
 426         Parameters:
 427
 428         - `context`: application-dependent storage.
 429         - `state`: a `State` object, the current state.
 430         - `transitions`: an optional ordered list of transition names to try,
 431           instead of ``state.transition_order``.
 432
 433         Return the values returned by the transition method:
 434
 435         - context: possibly modified from the parameter `context`;
 436         - next state name (`State` subclass name);
 437         - the result output of the transition, a list.
 438
 439         When there is no match, ``state.no_match()`` is called and its return
 440         value is returned.
 441         """
 442         if transitions is None:
 443             transitions =  state.transition_order
 444         state_correction = None
 445         if self.debug:
 446             print >>self._stderr, (
 447                   '\nStateMachine.check_line: state="%s", transitions=%r.'
 448                   % (state.__class__.__name__, transitions))
 449         for name in transitions:
 450             pattern, method, next_state = state.transitions[name]
 451             match = pattern.match(self.line)
 452             if match:
 453                 if self.debug:
 454                     print >>self._stderr, (
 455                           '\nStateMachine.check_line: Matched transition '
 456                           '"%s" in state "%s".'
 457                           % (name, state.__class__.__name__))
 458                 return method(match, context, next_state)
 459         else:
 460             if self.debug:
 461                 print >>self._stderr, (
 462                       '\nStateMachine.check_line: No match in state "%s".'
 463                       % state.__class__.__name__)
 464             return state.no_match(context, transitions)
 465
 466     def add_state(self, state_class):
 467         """
 468         Initialize & add a `state_class` (`State` subclass) object.
 469
 470         Exception: `DuplicateStateError` raised if `state_class` was already
 471         added.
 472         """
 473         statename = state_class.__name__
 474         if statename in self.states:
 475             raise DuplicateStateError(statename)
 476         self.states[statename] = state_class(self, self.debug)
 477
 478     def add_states(self, state_classes):
 479         """
 480         Add `state_classes` (a list of `State` subclasses).
 481         """
 482         for state_class in state_classes:
 483             self.add_state(state_class)
 484
 485     def runtime_init(self):
 486         """
 487         Initialize `self.states`.
 488         """
 489         for state in self.states.values():
 490             state.runtime_init()
 491
 492     def error(self):
 493         """Report error details."""
 494         type, value, module, line, function = _exception_data()
 495         print >>self._stderr, u'%s: %s' % (type, value)
 496         print >>self._stderr, 'input line %s' % (self.abs_line_number())
 497         print >>self._stderr, (u'module %s, line %s, function %s' %
 498                                (module, line, function))
 499
 500     def attach_observer(self, observer):
 501         """
 502         The `observer` parameter is a function or bound method which takes two
 503         arguments, the source and offset of the current line.
 504         """
 505         self.observers.append(observer)
 506
 507     def detach_observer(self, observer):
 508         self.observers.remove(observer)
 509
 510     def notify_observers(self):
 511         for observer in self.observers:
 512             try:
 513                 info = self.input_lines.info(self.line_offset)
 514             except IndexError:
 515                 info = (None, None)
 516             observer(*info)
 517
 518
 519 class State:
 520
 521     """
 522     State superclass. Contains a list of transitions, and transition methods.
 523
 524     Transition methods all have the same signature. They take 3 parameters:
 525
 526     - An `re` match object. ``match.string`` contains the matched input line,
 527       ``match.start()`` gives the start index of the match, and
 528       ``match.end()`` gives the end index.
 529     - A context object, whose meaning is application-defined (initial value
 530       ``None``). It can be used to store any information required by the state
 531       machine, and the retured context is passed on to the next transition
 532       method unchanged.
 533     - The name of the next state, a string, taken from the transitions list;
 534       normally it is returned unchanged, but it may be altered by the
 535       transition method if necessary.
 536
 537     Transition methods all return a 3-tuple:
 538
 539     - A context object, as (potentially) modified by the transition method.
 540     - The next state name (a return value of ``None`` means no state change).
 541     - The processing result, a list, which is accumulated by the state
 542       machine.
 543
 544     Transition methods may raise an `EOFError` to cut processing short.
 545
 546     There are two implicit transitions, and corresponding transition methods
 547     are defined: `bof()` handles the beginning-of-file, and `eof()` handles
 548     the end-of-file. These methods have non-standard signatures and return
 549     values. `bof()` returns the initial context and results, and may be used
 550     to return a header string, or do any other processing needed. `eof()`
 551     should handle any remaining context and wrap things up; it returns the
 552     final processing result.
 553
 554     Typical applications need only subclass `State` (or a subclass), set the
 555     `patterns` and `initial_transitions` class attributes, and provide
 556     corresponding transition methods. The default object initialization will
 557     take care of constructing the list of transitions.
 558     """
 559
 560     patterns = None
 561     """
 562     {Name: pattern} mapping, used by `make_transition()`. Each pattern may
 563     be a string or a compiled `re` pattern. Override in subclasses.
 564     """
 565
 566     initial_transitions = None
 567     """
 568     A list of transitions to initialize when a `State` is instantiated.
 569     Each entry is either a transition name string, or a (transition name, next
 570     state name) pair. See `make_transitions()`. Override in subclasses.
 571     """
 572
 573     nested_sm = None
 574     """
 575     The `StateMachine` class for handling nested processing.
 576
 577     If left as ``None``, `nested_sm` defaults to the class of the state's
 578     controlling state machine. Override it in subclasses to avoid the default.
 579     """
 580
 581     nested_sm_kwargs = None
 582     """
 583     Keyword arguments dictionary, passed to the `nested_sm` constructor.
 584
 585     Two keys must have entries in the dictionary:
 586
 587     - Key 'state_classes' must be set to a list of `State` classes.
 588     - Key 'initial_state' must be set to the name of the initial state class.
 589
 590     If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
 591     class of the current state, and 'initial_state' defaults to the name of
 592     the class of the current state. Override in subclasses to avoid the
 593     defaults.
 594     """
 595
 596     def __init__(self, state_machine, debug=0):
 597         """
 598         Initialize a `State` object; make & add initial transitions.
 599
 600         Parameters:
 601
 602         - `statemachine`: the controlling `StateMachine` object.
 603         - `debug`: a boolean; produce verbose output if true (nonzero).
 604         """
 605
 606         self.transition_order = []
 607         """A list of transition names in search order."""
 608
 609         self.transitions = {}
 610         """
 611         A mapping of transition names to 3-tuples containing
 612         (compiled_pattern, transition_method, next_state_name). Initialized as
 613         an instance attribute dynamically (instead of as a class attribute)
 614         because it may make forward references to patterns and methods in this
 615         or other classes.
 616         """
 617
 618         self.add_initial_transitions()
 619
 620         self.state_machine = state_machine
 621         """A reference to the controlling `StateMachine` object."""
 622
 623         self.debug = debug
 624         """Debugging mode on/off."""
 625
 626         if self.nested_sm is None:
 627             self.nested_sm = self.state_machine.__class__
 628         if self.nested_sm_kwargs is None:
 629             self.nested_sm_kwargs = {'state_classes': [self.__class__],
 630                                      'initial_state': self.__class__.__name__}
 631
 632     def runtime_init(self):
 633         """
 634         Initialize this `State` before running the state machine; called from
 635         `self.state_machine.run()`.
 636         """
 637         pass
 638
 639     def unlink(self):
 640         """Remove circular references to objects no longer required."""
 641         self.state_machine = None
 642
 643     def add_initial_transitions(self):
 644         """Make and add transitions listed in `self.initial_transitions`."""
 645         if self.initial_transitions:
 646             names, transitions = self.make_transitions(
 647                   self.initial_transitions)
 648             self.add_transitions(names, transitions)
 649
 650     def add_transitions(self, names, transitions):
 651         """
 652         Add a list of transitions to the start of the transition list.
 653
 654         Parameters:
 655
 656         - `names`: a list of transition names.
 657         - `transitions`: a mapping of names to transition tuples.
 658
 659         Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
 660         """
 661         for name in names:
 662             if name in self.transitions:
 663                 raise DuplicateTransitionError(name)
 664             if name not in transitions:
 665                 raise UnknownTransitionError(name)
 666         self.transition_order[:0] = names
 667         self.transitions.update(transitions)
 668
 669     def add_transition(self, name, transition):
 670         """
 671         Add a transition to the start of the transition list.
 672
 673         Parameter `transition`: a ready-made transition 3-tuple.
 674
 675         Exception: `DuplicateTransitionError`.
 676         """
 677         if name in self.transitions:
 678             raise DuplicateTransitionError(name)
 679         self.transition_order[:0] = [name]
 680         self.transitions[name] = transition
 681
 682     def remove_transition(self, name):
 683         """
 684         Remove a transition by `name`.
 685
 686         Exception: `UnknownTransitionError`.
 687         """
 688         try:
 689             del self.transitions[name]
 690             self.transition_order.remove(name)
 691         except:
 692             raise UnknownTransitionError(name)
 693
 694     def make_transition(self, name, next_state=None):
 695         """
 696         Make & return a transition tuple based on `name`.
 697
 698         This is a convenience function to simplify transition creation.
 699
 700         Parameters:
 701
 702         - `name`: a string, the name of the transition pattern & method. This
 703           `State` object must have a method called '`name`', and a dictionary
 704           `self.patterns` containing a key '`name`'.
 705         - `next_state`: a string, the name of the next `State` object for this
 706           transition. A value of ``None`` (or absent) implies no state change
 707           (i.e., continue with the same state).
 708
 709         Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
 710         """
 711         if next_state is None:
 712             next_state = self.__class__.__name__
 713         try:
 714             pattern = self.patterns[name]
 715             if not hasattr(pattern, 'match'):
 716                 pattern = re.compile(pattern)
 717         except KeyError:
 718             raise TransitionPatternNotFound(
 719                   '%s.patterns[%r]' % (self.__class__.__name__, name))
 720         try:
 721             method = getattr(self, name)
 722         except AttributeError:
 723             raise TransitionMethodNotFound(
 724                   '%s.%s' % (self.__class__.__name__, name))
 725         return (pattern, method, next_state)
 726
 727     def make_transitions(self, name_list):
 728         """
 729         Return a list of transition names and a transition mapping.
 730
 731         Parameter `name_list`: a list, where each entry is either a transition
 732         name string, or a 1- or 2-tuple (transition name, optional next state
 733         name).
 734         """
 735         stringtype = type('')
 736         names = []
 737         transitions = {}
 738         for namestate in name_list:
 739             if type(namestate) is stringtype:
 740                 transitions[namestate] = self.make_transition(namestate)
 741                 names.append(namestate)
 742             else:
 743                 transitions[namestate[0]] = self.make_transition(*namestate)
 744                 names.append(namestate[0])
 745         return names, transitions
 746
 747     def no_match(self, context, transitions):
 748         """
 749         Called when there is no match from `StateMachine.check_line()`.
 750
 751         Return the same values returned by transition methods:
 752
 753         - context: unchanged;
 754         - next state name: ``None``;
 755         - empty result list.
 756
 757         Override in subclasses to catch this event.
 758         """
 759         return context, None, []
 760
 761     def bof(self, context):
 762         """
 763         Handle beginning-of-file. Return unchanged `context`, empty result.
 764
 765         Override in subclasses.
 766
 767         Parameter `context`: application-defined storage.
 768         """
 769         return context, []
 770
 771     def eof(self, context):
 772         """
 773         Handle end-of-file. Return empty result.
 774
 775         Override in subclasses.
 776
 777         Parameter `context`: application-defined storage.
 778         """
 779         return []
 780
 781     def nop(self, match, context, next_state):
 782         """
 783         A "do nothing" transition method.
 784
 785         Return unchanged `context` & `next_state`, empty result. Useful for
 786         simple state changes (actionless transitions).
 787         """
 788         return context, next_state, []
 789
 790
 791 class StateMachineWS(StateMachine):
 792
 793     """
 794     `StateMachine` subclass specialized for whitespace recognition.
 795
 796     There are three methods provided for extracting indented text blocks:
 797
 798     - `get_indented()`: use when the indent is unknown.
 799     - `get_known_indented()`: use when the indent is known for all lines.
 800     - `get_first_known_indented()`: use when only the first line's indent is
 801       known.
 802     """
 803
 804     def get_indented(self, until_blank=0, strip_indent=1):
 805         """
 806         Return a block of indented lines of text, and info.
 807
 808         Extract an indented block where the indent is unknown for all lines.
 809
 810         :Parameters:
 811             - `until_blank`: Stop collecting at the first blank line if true
 812               (1).
 813             - `strip_indent`: Strip common leading indent if true (1,
 814               default).
 815
 816         :Return:
 817             - the indented block (a list of lines of text),
 818             - its indent,
 819             - its first line offset from BOF, and
 820             - whether or not it finished with a blank line.
 821         """
 822         offset = self.abs_line_offset()
 823         indented, indent, blank_finish = self.input_lines.get_indented(
 824               self.line_offset, until_blank, strip_indent)
 825         if indented:
 826             self.next_line(len(indented) - 1) # advance to last indented line
 827         while indented and not indented[0].strip():
 828             indented.trim_start()
 829             offset += 1
 830         return indented, indent, offset, blank_finish
 831
 832     def get_known_indented(self, indent, until_blank=0, strip_indent=1):
 833         """
 834         Return an indented block and info.
 835
 836         Extract an indented block where the indent is known for all lines.
 837         Starting with the current line, extract the entire text block with at
 838         least `indent` indentation (which must be whitespace, except for the
 839         first line).
 840
 841         :Parameters:
 842             - `indent`: The number of indent columns/characters.
 843             - `until_blank`: Stop collecting at the first blank line if true
 844               (1).
 845             - `strip_indent`: Strip `indent` characters of indentation if true
 846               (1, default).
 847
 848         :Return:
 849             - the indented block,
 850             - its first line offset from BOF, and
 851             - whether or not it finished with a blank line.
 852         """
 853         offset = self.abs_line_offset()
 854         indented, indent, blank_finish = self.input_lines.get_indented(
 855               self.line_offset, until_blank, strip_indent,
 856               block_indent=indent)
 857         self.next_line(len(indented) - 1) # advance to last indented line
 858         while indented and not indented[0].strip():
 859             indented.trim_start()
 860             offset += 1
 861         return indented, offset, blank_finish
 862
 863     def get_first_known_indented(self, indent, until_blank=0, strip_indent=1,
 864                                  strip_top=1):
 865         """
 866         Return an indented block and info.
 867
 868         Extract an indented block where the indent is known for the first line
 869         and unknown for all other lines.
 870
 871         :Parameters:
 872             - `indent`: The first line's indent (# of columns/characters).
 873             - `until_blank`: Stop collecting at the first blank line if true
 874               (1).
 875             - `strip_indent`: Strip `indent` characters of indentation if true
 876               (1, default).
 877             - `strip_top`: Strip blank lines from the beginning of the block.
 878
 879         :Return:
 880             - the indented block,
 881             - its indent,
 882             - its first line offset from BOF, and
 883             - whether or not it finished with a blank line.
 884         """
 885         offset = self.abs_line_offset()
 886         indented, indent, blank_finish = self.input_lines.get_indented(
 887               self.line_offset, until_blank, strip_indent,
 888               first_indent=indent)
 889         self.next_line(len(indented) - 1) # advance to last indented line
 890         if strip_top:
 891             while indented and not indented[0].strip():
 892                 indented.trim_start()
 893                 offset += 1
 894         return indented, indent, offset, blank_finish
 895
 896
 897 class StateWS(State):
 898
 899     """
 900     State superclass specialized for whitespace (blank lines & indents).
 901
 902     Use this class with `StateMachineWS`.  The transitions 'blank' (for blank
 903     lines) and 'indent' (for indented text blocks) are added automatically,
 904     before any other transitions.  The transition method `blank()` handles
 905     blank lines and `indent()` handles nested indented blocks.  Indented
 906     blocks trigger a new state machine to be created by `indent()` and run.
 907     The class of the state machine to be created is in `indent_sm`, and the
 908     constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
 909
 910     The methods `known_indent()` and `firstknown_indent()` are provided for
 911     indented blocks where the indent (all lines' and first line's only,
 912     respectively) is known to the transition method, along with the attributes
 913     `known_indent_sm` and `known_indent_sm_kwargs`.  Neither transition method
 914     is triggered automatically.
 915     """
 916
 917     indent_sm = None
 918     """
 919     The `StateMachine` class handling indented text blocks.
 920
 921     If left as ``None``, `indent_sm` defaults to the value of
 922     `State.nested_sm`.  Override it in subclasses to avoid the default.
 923     """
 924
 925     indent_sm_kwargs = None
 926     """
 927     Keyword arguments dictionary, passed to the `indent_sm` constructor.
 928
 929     If left as ``None``, `indent_sm_kwargs` defaults to the value of
 930     `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
 931     """
 932
 933     known_indent_sm = None
 934     """
 935     The `StateMachine` class handling known-indented text blocks.
 936
 937     If left as ``None``, `known_indent_sm` defaults to the value of
 938     `indent_sm`.  Override it in subclasses to avoid the default.
 939     """
 940
 941     known_indent_sm_kwargs = None
 942     """
 943     Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
 944
 945     If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
 946     `indent_sm_kwargs`. Override it in subclasses to avoid the default.
 947     """
 948
 949     ws_patterns = {'blank': ' *$',
 950                    'indent': ' +'}
 951     """Patterns for default whitespace transitions.  May be overridden in
 952     subclasses."""
 953
 954     ws_initial_transitions = ('blank', 'indent')
 955     """Default initial whitespace transitions, added before those listed in
 956     `State.initial_transitions`.  May be overridden in subclasses."""
 957
 958     def __init__(self, state_machine, debug=0):
 959         """
 960         Initialize a `StateSM` object; extends `State.__init__()`.
 961
 962         Check for indent state machine attributes, set defaults if not set.
 963         """
 964         State.__init__(self, state_machine, debug)
 965         if self.indent_sm is None:
 966             self.indent_sm = self.nested_sm
 967         if self.indent_sm_kwargs is None:
 968             self.indent_sm_kwargs = self.nested_sm_kwargs
 969         if self.known_indent_sm is None:
 970             self.known_indent_sm = self.indent_sm
 971         if self.known_indent_sm_kwargs is None:
 972             self.known_indent_sm_kwargs = self.indent_sm_kwargs
 973
 974     def add_initial_transitions(self):
 975         """
 976         Add whitespace-specific transitions before those defined in subclass.
 977
 978         Extends `State.add_initial_transitions()`.
 979         """
 980         State.add_initial_transitions(self)
 981         if self.patterns is None:
 982             self.patterns = {}
 983         self.patterns.update(self.ws_patterns)
 984         names, transitions = self.make_transitions(
 985             self.ws_initial_transitions)
 986         self.add_transitions(names, transitions)
 987
 988     def blank(self, match, context, next_state):
 989         """Handle blank lines. Does nothing. Override in subclasses."""
 990         return self.nop(match, context, next_state)
 991
 992     def indent(self, match, context, next_state):
 993         """
 994         Handle an indented text block. Extend or override in subclasses.
 995
 996         Recursively run the registered state machine for indented blocks
 997         (`self.indent_sm`).
 998         """
 999         indented, indent, line_offset, blank_finish = \
1000               self.state_machine.get_indented()
1001         sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
1002         results = sm.run(indented, input_offset=line_offset)
1003         return context, next_state, results
1004
1005     def known_indent(self, match, context, next_state):
1006         """
1007         Handle a known-indent text block. Extend or override in subclasses.
1008
1009         Recursively run the registered state machine for known-indent indented
1010         blocks (`self.known_indent_sm`). The indent is the length of the
1011         match, ``match.end()``.
1012         """
1013         indented, line_offset, blank_finish = \
1014               self.state_machine.get_known_indented(match.end())
1015         sm = self.known_indent_sm(debug=self.debug,
1016                                  **self.known_indent_sm_kwargs)
1017         results = sm.run(indented, input_offset=line_offset)
1018         return context, next_state, results
1019
1020     def first_known_indent(self, match, context, next_state):
1021         """
1022         Handle an indented text block (first line's indent known).
1023
1024         Extend or override in subclasses.
1025
1026         Recursively run the registered state machine for known-indent indented
1027         blocks (`self.known_indent_sm`). The indent is the length of the
1028         match, ``match.end()``.
1029         """
1030         indented, line_offset, blank_finish = \
1031               self.state_machine.get_first_known_indented(match.end())
1032         sm = self.known_indent_sm(debug=self.debug,
1033                                  **self.known_indent_sm_kwargs)
1034         results = sm.run(indented, input_offset=line_offset)
1035         return context, next_state, results
1036
1037
1038 class _SearchOverride:
1039
1040     """
1041     Mix-in class to override `StateMachine` regular expression behavior.
1042
1043     Changes regular expression matching, from the default `re.match()`
1044     (succeeds only if the pattern matches at the start of `self.line`) to
1045     `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1046     When subclassing a `StateMachine`, list this class **first** in the
1047     inheritance list of the class definition.
1048     """
1049
1050     def match(self, pattern):
1051         """
1052         Return the result of a regular expression search.
1053
1054         Overrides `StateMachine.match()`.
1055
1056         Parameter `pattern`: `re` compiled regular expression.
1057         """
1058         return pattern.search(self.line)
1059
1060
1061 class SearchStateMachine(_SearchOverride, StateMachine):
1062     """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1063     pass
1064
1065
1066 class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1067     """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1068     pass
1069
1070
1071 class ViewList:
1072
1073     """
1074     List with extended functionality: slices of ViewList objects are child
1075     lists, linked to their parents. Changes made to a child list also affect
1076     the parent list.  A child list is effectively a "view" (in the SQL sense)
1077     of the parent list.  Changes to parent lists, however, do *not* affect
1078     active child lists.  If a parent list is changed, any active child lists
1079     should be recreated.
1080
1081     The start and end of the slice can be trimmed using the `trim_start()` and
1082     `trim_end()` methods, without affecting the parent list.  The link between
1083     child and parent lists can be broken by calling `disconnect()` on the
1084     child list.
1085
1086     Also, ViewList objects keep track of the source & offset of each item.
1087     This information is accessible via the `source()`, `offset()`, and
1088     `info()` methods.
1089     """
1090
1091     def __init__(self, initlist=None, source=None, items=None,
1092                  parent=None, parent_offset=None):
1093         self.data = []
1094         """The actual list of data, flattened from various sources."""
1095
1096         self.items = []
1097         """A list of (source, offset) pairs, same length as `self.data`: the
1098         source of each line and the offset of each line from the beginning of
1099         its source."""
1100
1101         self.parent = parent
1102         """The parent list."""
1103
1104         self.parent_offset = parent_offset
1105         """Offset of this list from the beginning of the parent list."""
1106
1107         if isinstance(initlist, ViewList):
1108             self.data = initlist.data[:]
1109             self.items = initlist.items[:]
1110         elif initlist is not None:
1111             self.data = list(initlist)
1112             if items:
1113                 self.items = items
1114             else:
1115                 self.items = [(source, i) for i in range(len(initlist))]
1116         assert len(self.data) == len(self.items), 'data mismatch'
1117
1118     def __str__(self):
1119         return str(self.data)
1120
1121     def __repr__(self):
1122         return '%s(%s, items=%s)' % (self.__class__.__name__,
1123                                      self.data, self.items)
1124
1125     def __lt__(self, other): return self.data <  self.__cast(other)
1126     def __le__(self, other): return self.data <= self.__cast(other)
1127     def __eq__(self, other): return self.data == self.__cast(other)
1128     def __ne__(self, other): return self.data != self.__cast(other)
1129     def __gt__(self, other): return self.data >  self.__cast(other)
1130     def __ge__(self, other): return self.data >= self.__cast(other)
1131     def __cmp__(self, other): return cmp(self.data, self.__cast(other))
1132
1133     def __cast(self, other):
1134         if isinstance(other, ViewList):
1135             return other.data
1136         else:
1137             return other
1138
1139     def __contains__(self, item): return item in self.data
1140     def __len__(self): return len(self.data)
1141
1142     # The __getitem__()/__setitem__() methods check whether the index
1143     # is a slice first, since indexing a native list with a slice object
1144     # just works.
1145
1146     def __getitem__(self, i):
1147         if isinstance(i, types.SliceType):
1148             assert i.step in (None, 1),  'cannot handle slice with stride'
1149             return self.__class__(self.data[i.start:i.stop],
1150                                   items=self.items[i.start:i.stop],
1151                                   parent=self, parent_offset=i.start or 0)
1152         else:
1153             return self.data[i]
1154
1155     def __setitem__(self, i, item):
1156         if isinstance(i, types.SliceType):
1157             assert i.step in (None, 1), 'cannot handle slice with stride'
1158             if not isinstance(item, ViewList):
1159                 raise TypeError('assigning non-ViewList to ViewList slice')
1160             self.data[i.start:i.stop] = item.data
1161             self.items[i.start:i.stop] = item.items
1162             assert len(self.data) == len(self.items), 'data mismatch'
1163             if self.parent:
1164                 self.parent[(i.start or 0) + self.parent_offset
1165                             : (i.stop or len(self)) + self.parent_offset] = item
1166         else:
1167             self.data[i] = item
1168             if self.parent:
1169                 self.parent[i + self.parent_offset] = item
1170
1171     def __delitem__(self, i):
1172         try:
1173             del self.data[i]
1174             del self.items[i]
1175             if self.parent:
1176                 del self.parent[i + self.parent_offset]
1177         except TypeError:
1178             assert i.step is None, 'cannot handle slice with stride'
1179             del self.data[i.start:i.stop]
1180             del self.items[i.start:i.stop]
1181             if self.parent:
1182                 del self.parent[(i.start or 0) + self.parent_offset
1183                                 : (i.stop or len(self)) + self.parent_offset]
1184
1185     def __add__(self, other):
1186         if isinstance(other, ViewList):
1187             return self.__class__(self.data + other.data,
1188                                   items=(self.items + other.items))
1189         else:
1190             raise TypeError('adding non-ViewList to a ViewList')
1191
1192     def __radd__(self, other):
1193         if isinstance(other, ViewList):
1194             return self.__class__(other.data + self.data,
1195                                   items=(other.items + self.items))
1196         else:
1197             raise TypeError('adding ViewList to a non-ViewList')
1198
1199     def __iadd__(self, other):
1200         if isinstance(other, ViewList):
1201             self.data += other.data
1202         else:
1203             raise TypeError('argument to += must be a ViewList')
1204         return self
1205
1206     def __mul__(self, n):
1207         return self.__class__(self.data * n, items=(self.items * n))
1208
1209     __rmul__ = __mul__
1210
1211     def __imul__(self, n):
1212         self.data *= n
1213         self.items *= n
1214         return self
1215
1216     def extend(self, other):
1217         if not isinstance(other, ViewList):
1218             raise TypeError('extending a ViewList with a non-ViewList')
1219         if self.parent:
1220             self.parent.insert(len(self.data) + self.parent_offset, other)
1221         self.data.extend(other.data)
1222         self.items.extend(other.items)
1223
1224     def append(self, item, source=None, offset=0):
1225         if source is None:
1226             self.extend(item)
1227         else:
1228             if self.parent:
1229                 self.parent.insert(len(self.data) + self.parent_offset, item,
1230                                    source, offset)
1231             self.data.append(item)
1232             self.items.append((source, offset))
1233
1234     def insert(self, i, item, source=None, offset=0):
1235         if source is None:
1236             if not isinstance(item, ViewList):
1237                 raise TypeError('inserting non-ViewList with no source given')
1238             self.data[i:i] = item.data
1239             self.items[i:i] = item.items
1240             if self.parent:
1241                 index = (len(self.data) + i) % len(self.data)
1242                 self.parent.insert(index + self.parent_offset, item)
1243         else:
1244             self.data.insert(i, item)
1245             self.items.insert(i, (source, offset))
1246             if self.parent:
1247                 index = (len(self.data) + i) % len(self.data)
1248                 self.parent.insert(index + self.parent_offset, item,
1249                                    source, offset)
1250
1251     def pop(self, i=-1):
1252         if self.parent:
1253             index = (len(self.data) + i) % len(self.data)
1254             self.parent.pop(index + self.parent_offset)
1255         self.items.pop(i)
1256         return self.data.pop(i)
1257
1258     def trim_start(self, n=1):
1259         """
1260         Remove items from the start of the list, without touching the parent.
1261         """
1262         if n > len(self.data):
1263             raise IndexError("Size of trim too large; can't trim %s items "
1264                              "from a list of size %s." % (n, len(self.data)))
1265         elif n < 0:
1266             raise IndexError('Trim size must be >= 0.')
1267         del self.data[:n]
1268         del self.items[:n]
1269         if self.parent:
1270             self.parent_offset += n
1271
1272     def trim_end(self, n=1):
1273         """
1274         Remove items from the end of the list, without touching the parent.
1275         """
1276         if n > len(self.data):
1277             raise IndexError("Size of trim too large; can't trim %s items "
1278                              "from a list of size %s." % (n, len(self.data)))
1279         elif n < 0:
1280             raise IndexError('Trim size must be >= 0.')
1281         del self.data[-n:]
1282         del self.items[-n:]
1283
1284     def remove(self, item):
1285         index = self.index(item)
1286         del self[index]
1287
1288     def count(self, item): return self.data.count(item)
1289     def index(self, item): return self.data.index(item)
1290
1291     def reverse(self):
1292         self.data.reverse()
1293         self.items.reverse()
1294         self.parent = None
1295
1296     def sort(self, *args):
1297         tmp = zip(self.data, self.items)
1298         tmp.sort(*args)
1299         self.data = [entry[0] for entry in tmp]
1300         self.items = [entry[1] for entry in tmp]
1301         self.parent = None
1302
1303     def info(self, i):
1304         """Return source & offset for index `i`."""
1305         try:
1306             return self.items[i]
1307         except IndexError:
1308             if i == len(self.data):     # Just past the end
1309                 return self.items[i - 1][0], None
1310             else:
1311                 raise
1312
1313     def source(self, i):
1314         """Return source for index `i`."""
1315         return self.info(i)[0]
1316
1317     def offset(self, i):
1318         """Return offset for index `i`."""
1319         return self.info(i)[1]
1320
1321     def disconnect(self):
1322         """Break link between this list and parent list."""
1323         self.parent = None
1324
1325     def xitems(self):
1326         """Return iterator yielding (source, offset, value) tuples."""
1327         for (value, (source, offset)) in zip(self.data, self.items):
1328             yield (source, offset, value)
1329
1330     def pprint(self):
1331         """Print the list in `grep` format (`source:offset:value` lines)"""
1332         for line in self.xitems():
1333             print "%s:%d:%s" % line
1334
1335
1336 class StringList(ViewList):
1337
1338     """A `ViewList` with string-specific methods."""
1339
1340     def trim_left(self, length, start=0, end=sys.maxint):
1341         """
1342         Trim `length` characters off the beginning of each item, in-place,
1343         from index `start` to `end`.  No whitespace-checking is done on the
1344         trimmed text.  Does not affect slice parent.
1345         """
1346         self.data[start:end] = [line[length:]
1347                                 for line in self.data[start:end]]
1348
1349     def get_text_block(self, start, flush_left=0):
1350         """
1351         Return a contiguous block of text.
1352
1353         If `flush_left` is true, raise `UnexpectedIndentationError` if an
1354         indented line is encountered before the text block ends (with a blank
1355         line).
1356         """
1357         end = start
1358         last = len(self.data)
1359         while end < last:
1360             line = self.data[end]
1361             if not line.strip():
1362                 break
1363             if flush_left and (line[0] == ' '):
1364                 source, offset = self.info(end)
1365                 raise UnexpectedIndentationError(self[start:end], source,
1366                                                  offset + 1)
1367             end += 1
1368         return self[start:end]
1369
1370     def get_indented(self, start=0, until_blank=0, strip_indent=1,
1371                      block_indent=None, first_indent=None):
1372         """
1373         Extract and return a StringList of indented lines of text.
1374
1375         Collect all lines with indentation, determine the minimum indentation,
1376         remove the minimum indentation from all indented lines (unless
1377         `strip_indent` is false), and return them. All lines up to but not
1378         including the first unindented line will be returned.
1379
1380         :Parameters:
1381           - `start`: The index of the first line to examine.
1382           - `until_blank`: Stop collecting at the first blank line if true.
1383           - `strip_indent`: Strip common leading indent if true (default).
1384           - `block_indent`: The indent of the entire block, if known.
1385           - `first_indent`: The indent of the first line, if known.
1386
1387         :Return:
1388           - a StringList of indented lines with mininum indent removed;
1389           - the amount of the indent;
1390           - a boolean: did the indented block finish with a blank line or EOF?
1391         """
1392         indent = block_indent           # start with None if unknown
1393         end = start
1394         if block_indent is not None and first_indent is None:
1395             first_indent = block_indent
1396         if first_indent is not None:
1397             end += 1
1398         last = len(self.data)
1399         while end < last:
1400             line = self.data[end]
1401             if line and (line[0] != ' '
1402                          or (block_indent is not None
1403                              and line[:block_indent].strip())):
1404                 # Line not indented or insufficiently indented.
1405                 # Block finished properly iff the last indented line blank:
1406                 blank_finish = ((end > start)
1407                                 and not self.data[end - 1].strip())
1408                 break
1409             stripped = line.lstrip()
1410             if not stripped:            # blank line
1411                 if until_blank:
1412                     blank_finish = 1
1413                     break
1414             elif block_indent is None:
1415                 line_indent = len(line) - len(stripped)
1416                 if indent is None:
1417                     indent = line_indent
1418                 else:
1419                     indent = min(indent, line_indent)
1420             end += 1
1421         else:
1422             blank_finish = 1            # block ends at end of lines
1423         block = self[start:end]
1424         if first_indent is not None and block:
1425             block.data[0] = block.data[0][first_indent:]
1426         if indent and strip_indent:
1427             block.trim_left(indent, start=(first_indent is not None))
1428         return block, indent or 0, blank_finish
1429
1430     def get_2D_block(self, top, left, bottom, right, strip_indent=1):
1431         block = self[top:bottom]
1432         indent = right
1433         for i in range(len(block.data)):
1434             block.data[i] = line = block.data[i][left:right].rstrip()
1435             if line:
1436                 indent = min(indent, len(line) - len(line.lstrip()))
1437         if strip_indent and 0 < indent < right:
1438             block.data = [line[indent:] for line in block.data]
1439         return block
1440
1441     def pad_double_width(self, pad_char):
1442         """
1443         Pad all double-width characters in self by appending `pad_char` to each.
1444         For East Asian language support.
1445         """
1446         if hasattr(unicodedata, 'east_asian_width'):
1447             east_asian_width = unicodedata.east_asian_width
1448         else:
1449             return                      # new in Python 2.4
1450         for i in range(len(self.data)):
1451             line = self.data[i]
1452             if isinstance(line, unicode):
1453                 new = []
1454                 for char in line:
1455                     new.append(char)
1456                     if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1457                         new.append(pad_char)
1458                 self.data[i] = ''.join(new)
1459
1460     def replace(self, old, new):
1461         """Replace all occurrences of substring `old` with `new`."""
1462         for i in range(len(self.data)):
1463             self.data[i] = self.data[i].replace(old, new)
1464
1465
1466 class StateMachineError(Exception): pass
1467 class UnknownStateError(StateMachineError): pass
1468 class DuplicateStateError(StateMachineError): pass
1469 class UnknownTransitionError(StateMachineError): pass
1470 class DuplicateTransitionError(StateMachineError): pass
1471 class TransitionPatternNotFound(StateMachineError): pass
1472 class TransitionMethodNotFound(StateMachineError): pass
1473 class UnexpectedIndentationError(StateMachineError): pass
1474
1475
1476 class TransitionCorrection(Exception):
1477
1478     """
1479     Raise from within a transition method to switch to another transition.
1480
1481     Raise with one argument, the new transition name.
1482     """
1483
1484
1485 class StateCorrection(Exception):
1486
1487     """
1488     Raise from within a transition method to switch to another state.
1489
1490     Raise with one or two arguments: new state name, and an optional new
1491     transition name.
1492     """
1493
1494
1495 def string2lines(astring, tab_width=8, convert_whitespace=0,
1496                  whitespace=re.compile('[\v\f]')):
1497     """
1498     Return a list of one-line strings with tabs expanded, no newlines, and
1499     trailing whitespace stripped.
1500
1501     Each tab is expanded with between 1 and `tab_width` spaces, so that the
1502     next character's index becomes a multiple of `tab_width` (8 by default).
1503
1504     Parameters:
1505
1506     - `astring`: a multi-line string.
1507     - `tab_width`: the number of columns between tab stops.
1508     - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1509     """
1510     if convert_whitespace:
1511         astring = whitespace.sub(' ', astring)
1512     return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1513
1514 def _exception_data():
1515     """
1516     Return exception information:
1517
1518     - the exception's class name;
1519     - the exception object;
1520     - the name of the file containing the offending code;
1521     - the line number of the offending code;
1522     - the function name of the offending code.
1523     """
1524     type, value, traceback = sys.exc_info()
1525     while traceback.tb_next:
1526         traceback = traceback.tb_next
1527     code = traceback.tb_frame.f_code
1528     return (type.__name__, value, code.co_filename, traceback.tb_lineno,
1529             code.co_name)