docutils/statemachine.py

   1 # Author: David Goodger
   2 # Contact: goodger@users.sourceforge.net
   3 # Revision: $Revision$
   4 # Date: $Date$
   5 # Copyright: This module has been placed in the public domain.
   6
   7 """
   8 A finite state machine specialized for regular-expression-based text filters,
   9 this module defines the following classes:
  10
  11 - `StateMachine`, a state machine
  12 - `State`, a state superclass
  13 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
  14 - `StateWS`, a state superclass for use with `StateMachineWS`
  15 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
  16 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
  17 - `ViewList`, extends standard Python lists.
  18 - `StringList`, string-specific ViewList.
  19
  20 Exception classes:
  21
  22 - `StateMachineError`
  23 - `UnknownStateError`
  24 - `DuplicateStateError`
  25 - `UnknownTransitionError`
  26 - `DuplicateTransitionError`
  27 - `TransitionPatternNotFound`
  28 - `TransitionMethodNotFound`
  29 - `UnexpectedIndentationError`
  30 - `TransitionCorrection`: Raised to switch to another transition.
  31 - `StateCorrection`: Raised to switch to another state & transition.
  32
  33 Functions:
  34
  35 - `string2lines()`: split a multi-line string into a list of one-line strings
  36
  37
  38 How To Use This Module
  39 ======================
  40 (See the individual classes, methods, and attributes for details.)
  41
  42 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
  43    You will also need to ``import re``.
  44
  45 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
  46    machine::
  47
  48        class MyState(statemachine.State):
  49
  50    Within the state's class definition:
  51
  52    a) Include a pattern for each transition, in `State.patterns`::
  53
  54           patterns = {'atransition': r'pattern', ...}
  55
  56    b) Include a list of initial transitions to be set up automatically, in
  57       `State.initial_transitions`::
  58
  59           initial_transitions = ['atransition', ...]
  60
  61    c) Define a method for each transition, with the same name as the
  62       transition pattern::
  63
  64           def atransition(self, match, context, next_state):
  65               # do something
  66               result = [...]  # a list
  67               return context, next_state, result
  68               # context, next_state may be altered
  69
  70       Transition methods may raise an `EOFError` to cut processing short.
  71
  72    d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
  73       transition methods, which handle the beginning- and end-of-file.
  74
  75    e) In order to handle nested processing, you may wish to override the
  76       attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
  77
  78       If you are using `StateWS` as a base class, in order to handle nested
  79       indented blocks, you may wish to:
  80
  81       - override the attributes `StateWS.indent_sm`,
  82         `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
  83         `StateWS.known_indent_sm_kwargs`;
  84       - override the `StateWS.blank()` method; and/or
  85       - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
  86         and/or `StateWS.firstknown_indent()` methods.
  87
  88 3. Create a state machine object::
  89
  90        sm = StateMachine(state_classes=[MyState, ...],
  91                          initial_state='MyState')
  92
  93 4. Obtain the input text, which needs to be converted into a tab-free list of
  94    one-line strings. For example, to read text from a file called
  95    'inputfile'::
  96
  97        input_string = open('inputfile').read()
  98        input_lines = statemachine.string2lines(input_string)
  99
 100 5. Run the state machine on the input text and collect the results, a list::
 101
 102        results = sm.run(input_lines)
 103
 104 6. Remove any lingering circular references::
 105
 106        sm.unlink()
 107 """
 108
 109 __docformat__ = 'restructuredtext'
 110
 111 import sys
 112 import re
 113 import types
 114 import unicodedata
 115
 116
 117 class StateMachine:
 118
 119     """
 120     A finite state machine for text filters using regular expressions.
 121
 122     The input is provided in the form of a list of one-line strings (no
 123     newlines). States are subclasses of the `State` class. Transitions consist
 124     of regular expression patterns and transition methods, and are defined in
 125     each state.
 126
 127     The state machine is started with the `run()` method, which returns the
 128     results of processing in a list.
 129     """
 130
 131     def __init__(self, state_classes, initial_state, debug=0):
 132         """
 133         Initialize a `StateMachine` object; add state objects.
 134
 135         Parameters:
 136
 137         - `state_classes`: a list of `State` (sub)classes.
 138         - `initial_state`: a string, the class name of the initial state.
 139         - `debug`: a boolean; produce verbose output if true (nonzero).
 140         """
 141
 142         self.input_lines = None
 143         """`StringList` of input lines (without newlines).
 144         Filled by `self.run()`."""
 145
 146         self.input_offset = 0
 147         """Offset of `self.input_lines` from the beginning of the file."""
 148
 149         self.line = None
 150         """Current input line."""
 151
 152         self.line_offset = -1
 153         """Current input line offset from beginning of `self.input_lines`."""
 154
 155         self.debug = debug
 156         """Debugging mode on/off."""
 157
 158         self.initial_state = initial_state
 159         """The name of the initial state (key to `self.states`)."""
 160
 161         self.current_state = initial_state
 162         """The name of the current state (key to `self.states`)."""
 163
 164         self.states = {}
 165         """Mapping of {state_name: State_object}."""
 166
 167         self.add_states(state_classes)
 168
 169         self.observers = []
 170         """List of bound methods or functions to call whenever the current
 171         line changes.  Observers are called with one argument, ``self``.
 172         Cleared at the end of `run()`."""
 173
 174     def unlink(self):
 175         """Remove circular references to objects no longer required."""
 176         for state in self.states.values():
 177             state.unlink()
 178         self.states = None
 179
 180     def run(self, input_lines, input_offset=0, context=None,
 181             input_source=None):
 182         """
 183         Run the state machine on `input_lines`. Return results (a list).
 184
 185         Reset `self.line_offset` and `self.current_state`. Run the
 186         beginning-of-file transition. Input one line at a time and check for a
 187         matching transition. If a match is found, call the transition method
 188         and possibly change the state. Store the context returned by the
 189         transition method to be passed on to the next transition matched.
 190         Accumulate the results returned by the transition methods in a list.
 191         Run the end-of-file transition. Finally, return the accumulated
 192         results.
 193
 194         Parameters:
 195
 196         - `input_lines`: a list of strings without newlines, or `StringList`.
 197         - `input_offset`: the line offset of `input_lines` from the beginning
 198           of the file.
 199         - `context`: application-specific storage.
 200         - `input_source`: name or path of source of `input_lines`.
 201         """
 202         self.runtime_init()
 203         if isinstance(input_lines, StringList):
 204             self.input_lines = input_lines
 205         else:
 206             self.input_lines = StringList(input_lines, source=input_source)
 207         self.input_offset = input_offset
 208         self.line_offset = -1
 209         self.current_state = self.initial_state
 210         if self.debug:
 211             print >>sys.stderr, (
 212                 '\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
 213                 % (self.line_offset, '\n| '.join(self.input_lines)))
 214         transitions = None
 215         results = []
 216         state = self.get_state()
 217         try:
 218             if self.debug:
 219                 print >>sys.stderr, ('\nStateMachine.run: bof transition')
 220             context, result = state.bof(context)
 221             results.extend(result)
 222             while 1:
 223                 try:
 224                     try:
 225                         self.next_line()
 226                         if self.debug:
 227                             source, offset = self.input_lines.info(
 228                                 self.line_offset)
 229                             print >>sys.stderr, (
 230                                 '\nStateMachine.run: line (source=%r, '
 231                                 'offset=%r):\n| %s'
 232                                 % (source, offset, self.line))
 233                         context, next_state, result = self.check_line(
 234                             context, state, transitions)
 235                     except EOFError:
 236                         if self.debug:
 237                             print >>sys.stderr, (
 238                                 '\nStateMachine.run: %s.eof transition'
 239                                 % state.__class__.__name__)
 240                         result = state.eof(context)
 241                         results.extend(result)
 242                         break
 243                     else:
 244                         results.extend(result)
 245                 except TransitionCorrection, exception:
 246                     self.previous_line() # back up for another try
 247                     transitions = (exception.args[0],)
 248                     if self.debug:
 249                         print >>sys.stderr, (
 250                               '\nStateMachine.run: TransitionCorrection to '
 251                               'state "%s", transition %s.'
 252                               % (state.__class__.__name__, transitions[0]))
 253                     continue
 254                 except StateCorrection, exception:
 255                     self.previous_line() # back up for another try
 256                     next_state = exception.args[0]
 257                     if len(exception.args) == 1:
 258                         transitions = None
 259                     else:
 260                         transitions = (exception.args[1],)
 261                     if self.debug:
 262                         print >>sys.stderr, (
 263                               '\nStateMachine.run: StateCorrection to state '
 264                               '"%s", transition %s.'
 265                               % (next_state, transitions[0]))
 266                 else:
 267                     transitions = None
 268                 state = self.get_state(next_state)
 269         except:
 270             if self.debug:
 271                 self.error()
 272             raise
 273         self.observers = []
 274         return results
 275
 276     def get_state(self, next_state=None):
 277         """
 278         Return current state object; set it first if `next_state` given.
 279
 280         Parameter `next_state`: a string, the name of the next state.
 281
 282         Exception: `UnknownStateError` raised if `next_state` unknown.
 283         """
 284         if next_state:
 285             if self.debug and next_state != self.current_state:
 286                 print >>sys.stderr, \
 287                       ('\nStateMachine.get_state: Changing state from '
 288                        '"%s" to "%s" (input line %s).'
 289                        % (self.current_state, next_state,
 290                           self.abs_line_number()))
 291             self.current_state = next_state
 292         try:
 293             return self.states[self.current_state]
 294         except KeyError:
 295             raise UnknownStateError(self.current_state)
 296
 297     def next_line(self, n=1):
 298         """Load `self.line` with the `n`'th next line and return it."""
 299         try:
 300             try:
 301                 self.line_offset += n
 302                 self.line = self.input_lines[self.line_offset]
 303             except IndexError:
 304                 self.line = None
 305                 raise EOFError
 306             return self.line
 307         finally:
 308             self.notify_observers()
 309
 310     def is_next_line_blank(self):
 311         """Return 1 if the next line is blank or non-existant."""
 312         try:
 313             return not self.input_lines[self.line_offset + 1].strip()
 314         except IndexError:
 315             return 1
 316
 317     def at_eof(self):
 318         """Return 1 if the input is at or past end-of-file."""
 319         return self.line_offset >= len(self.input_lines) - 1
 320
 321     def at_bof(self):
 322         """Return 1 if the input is at or before beginning-of-file."""
 323         return self.line_offset <= 0
 324
 325     def previous_line(self, n=1):
 326         """Load `self.line` with the `n`'th previous line and return it."""
 327         self.line_offset -= n
 328         if self.line_offset < 0:
 329             self.line = None
 330         else:
 331             self.line = self.input_lines[self.line_offset]
 332         self.notify_observers()
 333         return self.line
 334
 335     def goto_line(self, line_offset):
 336         """Jump to absolute line offset `line_offset`, load and return it."""
 337         try:
 338             try:
 339                 self.line_offset = line_offset - self.input_offset
 340                 self.line = self.input_lines[self.line_offset]
 341             except IndexError:
 342                 self.line = None
 343                 raise EOFError
 344             return self.line
 345         finally:
 346             self.notify_observers()
 347
 348     def get_source(self, line_offset):
 349         """Return source of line at absolute line offset `line_offset`."""
 350         return self.input_lines.source(line_offset - self.input_offset)
 351
 352     def abs_line_offset(self):
 353         """Return line offset of current line, from beginning of file."""
 354         return self.line_offset + self.input_offset
 355
 356     def abs_line_number(self):
 357         """Return line number of current line (counting from 1)."""
 358         return self.line_offset + self.input_offset + 1
 359
 360     def insert_input(self, input_lines, source):
 361         self.input_lines.insert(self.line_offset + 1, '',
 362                                 source='internal padding')
 363         self.input_lines.insert(self.line_offset + 1, '',
 364                                 source='internal padding')
 365         self.input_lines.insert(self.line_offset + 2,
 366                                 StringList(input_lines, source))
 367
 368     def get_text_block(self, flush_left=0):
 369         """
 370         Return a contiguous block of text.
 371
 372         If `flush_left` is true, raise `UnexpectedIndentationError` if an
 373         indented line is encountered before the text block ends (with a blank
 374         line).
 375         """
 376         try:
 377             block = self.input_lines.get_text_block(self.line_offset,
 378                                                     flush_left)
 379             self.next_line(len(block) - 1)
 380             return block
 381         except UnexpectedIndentationError, error:
 382             block, source, lineno = error
 383             self.next_line(len(block) - 1) # advance to last line of block
 384             raise
 385
 386     def check_line(self, context, state, transitions=None):
 387         """
 388         Examine one line of input for a transition match & execute its method.
 389
 390         Parameters:
 391
 392         - `context`: application-dependent storage.
 393         - `state`: a `State` object, the current state.
 394         - `transitions`: an optional ordered list of transition names to try,
 395           instead of ``state.transition_order``.
 396
 397         Return the values returned by the transition method:
 398
 399         - context: possibly modified from the parameter `context`;
 400         - next state name (`State` subclass name);
 401         - the result output of the transition, a list.
 402
 403         When there is no match, ``state.no_match()`` is called and its return
 404         value is returned.
 405         """
 406         if transitions is None:
 407             transitions =  state.transition_order
 408         state_correction = None
 409         if self.debug:
 410             print >>sys.stderr, (
 411                   '\nStateMachine.check_line: state="%s", transitions=%r.'
 412                   % (state.__class__.__name__, transitions))
 413         for name in transitions:
 414             pattern, method, next_state = state.transitions[name]
 415             match = self.match(pattern)
 416             if match:
 417                 if self.debug:
 418                     print >>sys.stderr, (
 419                           '\nStateMachine.check_line: Matched transition '
 420                           '"%s" in state "%s".'
 421                           % (name, state.__class__.__name__))
 422                 return method(match, context, next_state)
 423         else:
 424             if self.debug:
 425                 print >>sys.stderr, (
 426                       '\nStateMachine.check_line: No match in state "%s".'
 427                       % state.__class__.__name__)
 428             return state.no_match(context, transitions)
 429
 430     def match(self, pattern):
 431         """
 432         Return the result of a regular expression match.
 433
 434         Parameter `pattern`: an `re` compiled regular expression.
 435         """
 436         return pattern.match(self.line)
 437
 438     def add_state(self, state_class):
 439         """
 440         Initialize & add a `state_class` (`State` subclass) object.
 441
 442         Exception: `DuplicateStateError` raised if `state_class` was already
 443         added.
 444         """
 445         statename = state_class.__name__
 446         if self.states.has_key(statename):
 447             raise DuplicateStateError(statename)
 448         self.states[statename] = state_class(self, self.debug)
 449
 450     def add_states(self, state_classes):
 451         """
 452         Add `state_classes` (a list of `State` subclasses).
 453         """
 454         for state_class in state_classes:
 455             self.add_state(state_class)
 456
 457     def runtime_init(self):
 458         """
 459         Initialize `self.states`.
 460         """
 461         for state in self.states.values():
 462             state.runtime_init()
 463
 464     def error(self):
 465         """Report error details."""
 466         type, value, module, line, function = _exception_data()
 467         print >>sys.stderr, '%s: %s' % (type, value)
 468         print >>sys.stderr, 'input line %s' % (self.abs_line_number())
 469         print >>sys.stderr, ('module %s, line %s, function %s'
 470                              % (module, line, function))
 471
 472     def attach_observer(self, observer):
 473         """
 474         The `observer` parameter is a function or bound method which takes two
 475         arguments, the source and offset of the current line.
 476         """
 477         self.observers.append(observer)
 478
 479     def detach_observer(self, observer):
 480         self.observers.remove(observer)
 481
 482     def notify_observers(self):
 483         for observer in self.observers:
 484             try:
 485                 info = self.input_lines.info(self.line_offset)
 486             except IndexError:
 487                 info = (None, None)
 488             observer(*info)
 489
 490
 491 class State:
 492
 493     """
 494     State superclass. Contains a list of transitions, and transition methods.
 495
 496     Transition methods all have the same signature. They take 3 parameters:
 497
 498     - An `re` match object. ``match.string`` contains the matched input line,
 499       ``match.start()`` gives the start index of the match, and
 500       ``match.end()`` gives the end index.
 501     - A context object, whose meaning is application-defined (initial value
 502       ``None``). It can be used to store any information required by the state
 503       machine, and the retured context is passed on to the next transition
 504       method unchanged.
 505     - The name of the next state, a string, taken from the transitions list;
 506       normally it is returned unchanged, but it may be altered by the
 507       transition method if necessary.
 508
 509     Transition methods all return a 3-tuple:
 510
 511     - A context object, as (potentially) modified by the transition method.
 512     - The next state name (a return value of ``None`` means no state change).
 513     - The processing result, a list, which is accumulated by the state
 514       machine.
 515
 516     Transition methods may raise an `EOFError` to cut processing short.
 517
 518     There are two implicit transitions, and corresponding transition methods
 519     are defined: `bof()` handles the beginning-of-file, and `eof()` handles
 520     the end-of-file. These methods have non-standard signatures and return
 521     values. `bof()` returns the initial context and results, and may be used
 522     to return a header string, or do any other processing needed. `eof()`
 523     should handle any remaining context and wrap things up; it returns the
 524     final processing result.
 525
 526     Typical applications need only subclass `State` (or a subclass), set the
 527     `patterns` and `initial_transitions` class attributes, and provide
 528     corresponding transition methods. The default object initialization will
 529     take care of constructing the list of transitions.
 530     """
 531
 532     patterns = None
 533     """
 534     {Name: pattern} mapping, used by `make_transition()`. Each pattern may
 535     be a string or a compiled `re` pattern. Override in subclasses.
 536     """
 537
 538     initial_transitions = None
 539     """
 540     A list of transitions to initialize when a `State` is instantiated.
 541     Each entry is either a transition name string, or a (transition name, next
 542     state name) pair. See `make_transitions()`. Override in subclasses.
 543     """
 544
 545     nested_sm = None
 546     """
 547     The `StateMachine` class for handling nested processing.
 548
 549     If left as ``None``, `nested_sm` defaults to the class of the state's
 550     controlling state machine. Override it in subclasses to avoid the default.
 551     """
 552
 553     nested_sm_kwargs = None
 554     """
 555     Keyword arguments dictionary, passed to the `nested_sm` constructor.
 556
 557     Two keys must have entries in the dictionary:
 558
 559     - Key 'state_classes' must be set to a list of `State` classes.
 560     - Key 'initial_state' must be set to the name of the initial state class.
 561
 562     If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
 563     class of the current state, and 'initial_state' defaults to the name of
 564     the class of the current state. Override in subclasses to avoid the
 565     defaults.
 566     """
 567
 568     def __init__(self, state_machine, debug=0):
 569         """
 570         Initialize a `State` object; make & add initial transitions.
 571
 572         Parameters:
 573
 574         - `statemachine`: the controlling `StateMachine` object.
 575         - `debug`: a boolean; produce verbose output if true (nonzero).
 576         """
 577
 578         self.transition_order = []
 579         """A list of transition names in search order."""
 580
 581         self.transitions = {}
 582         """
 583         A mapping of transition names to 3-tuples containing
 584         (compiled_pattern, transition_method, next_state_name). Initialized as
 585         an instance attribute dynamically (instead of as a class attribute)
 586         because it may make forward references to patterns and methods in this
 587         or other classes.
 588         """
 589
 590         self.add_initial_transitions()
 591
 592         self.state_machine = state_machine
 593         """A reference to the controlling `StateMachine` object."""
 594
 595         self.debug = debug
 596         """Debugging mode on/off."""
 597
 598         if self.nested_sm is None:
 599             self.nested_sm = self.state_machine.__class__
 600         if self.nested_sm_kwargs is None:
 601             self.nested_sm_kwargs = {'state_classes': [self.__class__],
 602                                      'initial_state': self.__class__.__name__}
 603
 604     def runtime_init(self):
 605         """
 606         Initialize this `State` before running the state machine; called from
 607         `self.state_machine.run()`.
 608         """
 609         pass
 610
 611     def unlink(self):
 612         """Remove circular references to objects no longer required."""
 613         self.state_machine = None
 614
 615     def add_initial_transitions(self):
 616         """Make and add transitions listed in `self.initial_transitions`."""
 617         if self.initial_transitions:
 618             names, transitions = self.make_transitions(
 619                   self.initial_transitions)
 620             self.add_transitions(names, transitions)
 621
 622     def add_transitions(self, names, transitions):
 623         """
 624         Add a list of transitions to the start of the transition list.
 625
 626         Parameters:
 627
 628         - `names`: a list of transition names.
 629         - `transitions`: a mapping of names to transition tuples.
 630
 631         Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
 632         """
 633         for name in names:
 634             if self.transitions.has_key(name):
 635                 raise DuplicateTransitionError(name)
 636             if not transitions.has_key(name):
 637                 raise UnknownTransitionError(name)
 638         self.transition_order[:0] = names
 639         self.transitions.update(transitions)
 640
 641     def add_transition(self, name, transition):
 642         """
 643         Add a transition to the start of the transition list.
 644
 645         Parameter `transition`: a ready-made transition 3-tuple.
 646
 647         Exception: `DuplicateTransitionError`.
 648         """
 649         if self.transitions.has_key(name):
 650             raise DuplicateTransitionError(name)
 651         self.transition_order[:0] = [name]
 652         self.transitions[name] = transition
 653
 654     def remove_transition(self, name):
 655         """
 656         Remove a transition by `name`.
 657
 658         Exception: `UnknownTransitionError`.
 659         """
 660         try:
 661             del self.transitions[name]
 662             self.transition_order.remove(name)
 663         except:
 664             raise UnknownTransitionError(name)
 665
 666     def make_transition(self, name, next_state=None):
 667         """
 668         Make & return a transition tuple based on `name`.
 669
 670         This is a convenience function to simplify transition creation.
 671
 672         Parameters:
 673
 674         - `name`: a string, the name of the transition pattern & method. This
 675           `State` object must have a method called '`name`', and a dictionary
 676           `self.patterns` containing a key '`name`'.
 677         - `next_state`: a string, the name of the next `State` object for this
 678           transition. A value of ``None`` (or absent) implies no state change
 679           (i.e., continue with the same state).
 680
 681         Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
 682         """
 683         if next_state is None:
 684             next_state = self.__class__.__name__
 685         try:
 686             pattern = self.patterns[name]
 687             if not hasattr(pattern, 'match'):
 688                 pattern = re.compile(pattern)
 689         except KeyError:
 690             raise TransitionPatternNotFound(
 691                   '%s.patterns[%r]' % (self.__class__.__name__, name))
 692         try:
 693             method = getattr(self, name)
 694         except AttributeError:
 695             raise TransitionMethodNotFound(
 696                   '%s.%s' % (self.__class__.__name__, name))
 697         return (pattern, method, next_state)
 698
 699     def make_transitions(self, name_list):
 700         """
 701         Return a list of transition names and a transition mapping.
 702
 703         Parameter `name_list`: a list, where each entry is either a transition
 704         name string, or a 1- or 2-tuple (transition name, optional next state
 705         name).
 706         """
 707         stringtype = type('')
 708         names = []
 709         transitions = {}
 710         for namestate in name_list:
 711             if type(namestate) is stringtype:
 712                 transitions[namestate] = self.make_transition(namestate)
 713                 names.append(namestate)
 714             else:
 715                 transitions[namestate[0]] = self.make_transition(*namestate)
 716                 names.append(namestate[0])
 717         return names, transitions
 718
 719     def no_match(self, context, transitions):
 720         """
 721         Called when there is no match from `StateMachine.check_line()`.
 722
 723         Return the same values returned by transition methods:
 724
 725         - context: unchanged;
 726         - next state name: ``None``;
 727         - empty result list.
 728
 729         Override in subclasses to catch this event.
 730         """
 731         return context, None, []
 732
 733     def bof(self, context):
 734         """
 735         Handle beginning-of-file. Return unchanged `context`, empty result.
 736
 737         Override in subclasses.
 738
 739         Parameter `context`: application-defined storage.
 740         """
 741         return context, []
 742
 743     def eof(self, context):
 744         """
 745         Handle end-of-file. Return empty result.
 746
 747         Override in subclasses.
 748
 749         Parameter `context`: application-defined storage.
 750         """
 751         return []
 752
 753     def nop(self, match, context, next_state):
 754         """
 755         A "do nothing" transition method.
 756
 757         Return unchanged `context` & `next_state`, empty result. Useful for
 758         simple state changes (actionless transitions).
 759         """
 760         return context, next_state, []
 761
 762
 763 class StateMachineWS(StateMachine):
 764
 765     """
 766     `StateMachine` subclass specialized for whitespace recognition.
 767
 768     There are three methods provided for extracting indented text blocks:
 769
 770     - `get_indented()`: use when the indent is unknown.
 771     - `get_known_indented()`: use when the indent is known for all lines.
 772     - `get_first_known_indented()`: use when only the first line's indent is
 773       known.
 774     """
 775
 776     def get_indented(self, until_blank=0, strip_indent=1):
 777         """
 778         Return a block of indented lines of text, and info.
 779
 780         Extract an indented block where the indent is unknown for all lines.
 781
 782         :Parameters:
 783             - `until_blank`: Stop collecting at the first blank line if true
 784               (1).
 785             - `strip_indent`: Strip common leading indent if true (1,
 786               default).
 787
 788         :Return:
 789             - the indented block (a list of lines of text),
 790             - its indent,
 791             - its first line offset from BOF, and
 792             - whether or not it finished with a blank line.
 793         """
 794         offset = self.abs_line_offset()
 795         indented, indent, blank_finish = self.input_lines.get_indented(
 796               self.line_offset, until_blank, strip_indent)
 797         if indented:
 798             self.next_line(len(indented) - 1) # advance to last indented line
 799         while indented and not indented[0].strip():
 800             indented.trim_start()
 801             offset += 1
 802         return indented, indent, offset, blank_finish
 803
 804     def get_known_indented(self, indent, until_blank=0, strip_indent=1):
 805         """
 806         Return an indented block and info.
 807
 808         Extract an indented block where the indent is known for all lines.
 809         Starting with the current line, extract the entire text block with at
 810         least `indent` indentation (which must be whitespace, except for the
 811         first line).
 812
 813         :Parameters:
 814             - `indent`: The number of indent columns/characters.
 815             - `until_blank`: Stop collecting at the first blank line if true
 816               (1).
 817             - `strip_indent`: Strip `indent` characters of indentation if true
 818               (1, default).
 819
 820         :Return:
 821             - the indented block,
 822             - its first line offset from BOF, and
 823             - whether or not it finished with a blank line.
 824         """
 825         offset = self.abs_line_offset()
 826         indented, indent, blank_finish = self.input_lines.get_indented(
 827               self.line_offset, until_blank, strip_indent,
 828               block_indent=indent)
 829         self.next_line(len(indented) - 1) # advance to last indented line
 830         while indented and not indented[0].strip():
 831             indented.trim_start()
 832             offset += 1
 833         return indented, offset, blank_finish
 834
 835     def get_first_known_indented(self, indent, until_blank=0, strip_indent=1,
 836                                  strip_top=1):
 837         """
 838         Return an indented block and info.
 839
 840         Extract an indented block where the indent is known for the first line
 841         and unknown for all other lines.
 842
 843         :Parameters:
 844             - `indent`: The first line's indent (# of columns/characters).
 845             - `until_blank`: Stop collecting at the first blank line if true
 846               (1).
 847             - `strip_indent`: Strip `indent` characters of indentation if true
 848               (1, default).
 849             - `strip_top`: Strip blank lines from the beginning of the block.
 850
 851         :Return:
 852             - the indented block,
 853             - its indent,
 854             - its first line offset from BOF, and
 855             - whether or not it finished with a blank line.
 856         """
 857         offset = self.abs_line_offset()
 858         indented, indent, blank_finish = self.input_lines.get_indented(
 859               self.line_offset, until_blank, strip_indent,
 860               first_indent=indent)
 861         self.next_line(len(indented) - 1) # advance to last indented line
 862         if strip_top:
 863             while indented and not indented[0].strip():
 864                 indented.trim_start()
 865                 offset += 1
 866         return indented, indent, offset, blank_finish
 867
 868
 869 class StateWS(State):
 870
 871     """
 872     State superclass specialized for whitespace (blank lines & indents).
 873
 874     Use this class with `StateMachineWS`.  The transitions 'blank' (for blank
 875     lines) and 'indent' (for indented text blocks) are added automatically,
 876     before any other transitions.  The transition method `blank()` handles
 877     blank lines and `indent()` handles nested indented blocks.  Indented
 878     blocks trigger a new state machine to be created by `indent()` and run.
 879     The class of the state machine to be created is in `indent_sm`, and the
 880     constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
 881
 882     The methods `known_indent()` and `firstknown_indent()` are provided for
 883     indented blocks where the indent (all lines' and first line's only,
 884     respectively) is known to the transition method, along with the attributes
 885     `known_indent_sm` and `known_indent_sm_kwargs`.  Neither transition method
 886     is triggered automatically.
 887     """
 888
 889     indent_sm = None
 890     """
 891     The `StateMachine` class handling indented text blocks.
 892
 893     If left as ``None``, `indent_sm` defaults to the value of
 894     `State.nested_sm`.  Override it in subclasses to avoid the default.
 895     """
 896
 897     indent_sm_kwargs = None
 898     """
 899     Keyword arguments dictionary, passed to the `indent_sm` constructor.
 900
 901     If left as ``None``, `indent_sm_kwargs` defaults to the value of
 902     `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
 903     """
 904
 905     known_indent_sm = None
 906     """
 907     The `StateMachine` class handling known-indented text blocks.
 908
 909     If left as ``None``, `known_indent_sm` defaults to the value of
 910     `indent_sm`.  Override it in subclasses to avoid the default.
 911     """
 912
 913     known_indent_sm_kwargs = None
 914     """
 915     Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
 916
 917     If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
 918     `indent_sm_kwargs`. Override it in subclasses to avoid the default.
 919     """
 920
 921     ws_patterns = {'blank': ' *$',
 922                    'indent': ' +'}
 923     """Patterns for default whitespace transitions.  May be overridden in
 924     subclasses."""
 925
 926     ws_initial_transitions = ('blank', 'indent')
 927     """Default initial whitespace transitions, added before those listed in
 928     `State.initial_transitions`.  May be overridden in subclasses."""
 929
 930     def __init__(self, state_machine, debug=0):
 931         """
 932         Initialize a `StateSM` object; extends `State.__init__()`.
 933
 934         Check for indent state machine attributes, set defaults if not set.
 935         """
 936         State.__init__(self, state_machine, debug)
 937         if self.indent_sm is None:
 938             self.indent_sm = self.nested_sm
 939         if self.indent_sm_kwargs is None:
 940             self.indent_sm_kwargs = self.nested_sm_kwargs
 941         if self.known_indent_sm is None:
 942             self.known_indent_sm = self.indent_sm
 943         if self.known_indent_sm_kwargs is None:
 944             self.known_indent_sm_kwargs = self.indent_sm_kwargs
 945
 946     def add_initial_transitions(self):
 947         """
 948         Add whitespace-specific transitions before those defined in subclass.
 949
 950         Extends `State.add_initial_transitions()`.
 951         """
 952         State.add_initial_transitions(self)
 953         if self.patterns is None:
 954             self.patterns = {}
 955         self.patterns.update(self.ws_patterns)
 956         names, transitions = self.make_transitions(
 957             self.ws_initial_transitions)
 958         self.add_transitions(names, transitions)
 959
 960     def blank(self, match, context, next_state):
 961         """Handle blank lines. Does nothing. Override in subclasses."""
 962         return self.nop(match, context, next_state)
 963
 964     def indent(self, match, context, next_state):
 965         """
 966         Handle an indented text block. Extend or override in subclasses.
 967
 968         Recursively run the registered state machine for indented blocks
 969         (`self.indent_sm`).
 970         """
 971         indented, indent, line_offset, blank_finish = \
 972               self.state_machine.get_indented()
 973         sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
 974         results = sm.run(indented, input_offset=line_offset)
 975         return context, next_state, results
 976
 977     def known_indent(self, match, context, next_state):
 978         """
 979         Handle a known-indent text block. Extend or override in subclasses.
 980
 981         Recursively run the registered state machine for known-indent indented
 982         blocks (`self.known_indent_sm`). The indent is the length of the
 983         match, ``match.end()``.
 984         """
 985         indented, line_offset, blank_finish = \
 986               self.state_machine.get_known_indented(match.end())
 987         sm = self.known_indent_sm(debug=self.debug,
 988                                  **self.known_indent_sm_kwargs)
 989         results = sm.run(indented, input_offset=line_offset)
 990         return context, next_state, results
 991
 992     def first_known_indent(self, match, context, next_state):
 993         """
 994         Handle an indented text block (first line's indent known).
 995
 996         Extend or override in subclasses.
 997
 998         Recursively run the registered state machine for known-indent indented
 999         blocks (`self.known_indent_sm`). The indent is the length of the
1000         match, ``match.end()``.
1001         """
1002         indented, line_offset, blank_finish = \
1003               self.state_machine.get_first_known_indented(match.end())
1004         sm = self.known_indent_sm(debug=self.debug,
1005                                  **self.known_indent_sm_kwargs)
1006         results = sm.run(indented, input_offset=line_offset)
1007         return context, next_state, results
1008
1009
1010 class _SearchOverride:
1011
1012     """
1013     Mix-in class to override `StateMachine` regular expression behavior.
1014
1015     Changes regular expression matching, from the default `re.match()`
1016     (succeeds only if the pattern matches at the start of `self.line`) to
1017     `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1018     When subclassing a `StateMachine`, list this class **first** in the
1019     inheritance list of the class definition.
1020     """
1021
1022     def match(self, pattern):
1023         """
1024         Return the result of a regular expression search.
1025
1026         Overrides `StateMachine.match()`.
1027
1028         Parameter `pattern`: `re` compiled regular expression.
1029         """
1030         return pattern.search(self.line)
1031
1032
1033 class SearchStateMachine(_SearchOverride, StateMachine):
1034     """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1035     pass
1036
1037
1038 class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1039     """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1040     pass
1041
1042
1043 class ViewList:
1044
1045     """
1046     List with extended functionality: slices of ViewList objects are child
1047     lists, linked to their parents. Changes made to a child list also affect
1048     the parent list.  A child list is effectively a "view" (in the SQL sense)
1049     of the parent list.  Changes to parent lists, however, do *not* affect
1050     active child lists.  If a parent list is changed, any active child lists
1051     should be recreated.
1052
1053     The start and end of the slice can be trimmed using the `trim_start()` and
1054     `trim_end()` methods, without affecting the parent list.  The link between
1055     child and parent lists can be broken by calling `disconnect()` on the
1056     child list.
1057
1058     Also, ViewList objects keep track of the source & offset of each item.
1059     This information is accessible via the `source()`, `offset()`, and
1060     `info()` methods.
1061     """
1062
1063     def __init__(self, initlist=None, source=None, items=None,
1064                  parent=None, parent_offset=None):
1065         self.data = []
1066         """The actual list of data, flattened from various sources."""
1067
1068         self.items = []
1069         """A list of (source, offset) pairs, same length as `self.data`: the
1070         source of each line and the offset of each line from the beginning of
1071         its source."""
1072
1073         self.parent = parent
1074         """The parent list."""
1075
1076         self.parent_offset = parent_offset
1077         """Offset of this list from the beginning of the parent list."""
1078
1079         if isinstance(initlist, ViewList):
1080             self.data = initlist.data[:]
1081             self.items = initlist.items[:]
1082         elif initlist is not None:
1083             self.data = list(initlist)
1084             if items:
1085                 self.items = items
1086             else:
1087                 self.items = [(source, i) for i in range(len(initlist))]
1088         assert len(self.data) == len(self.items), 'data mismatch'
1089
1090     def __str__(self):
1091         return str(self.data)
1092
1093     def __repr__(self):
1094         return '%s(%s, items=%s)' % (self.__class__.__name__,
1095                                      self.data, self.items)
1096
1097     def __lt__(self, other): return self.data <  self.__cast(other)
1098     def __le__(self, other): return self.data <= self.__cast(other)
1099     def __eq__(self, other): return self.data == self.__cast(other)
1100     def __ne__(self, other): return self.data != self.__cast(other)
1101     def __gt__(self, other): return self.data >  self.__cast(other)
1102     def __ge__(self, other): return self.data >= self.__cast(other)
1103     def __cmp__(self, other): return cmp(self.data, self.__cast(other))
1104
1105     def __cast(self, other):
1106         if isinstance(other, ViewList):
1107             return other.data
1108         else:
1109             return other
1110
1111     def __contains__(self, item): return item in self.data
1112     def __len__(self): return len(self.data)
1113
1114     # The __getitem__()/__setitem__() methods check whether the index
1115     # is a slice first, since native list objects start supporting
1116     # them directly in Python 2.3 (no exception is raised when
1117     # indexing a list with a slice object; they just work).
1118
1119     def __getitem__(self, i):
1120         if isinstance(i, types.SliceType):
1121             assert i.step in (None, 1),  'cannot handle slice with stride'
1122             return self.__class__(self.data[i.start:i.stop],
1123                                   items=self.items[i.start:i.stop],
1124                                   parent=self, parent_offset=i.start)
1125         else:
1126             return self.data[i]
1127
1128     def __setitem__(self, i, item):
1129         if isinstance(i, types.SliceType):
1130             assert i.step in (None, 1), 'cannot handle slice with stride'
1131             if not isinstance(item, ViewList):
1132                 raise TypeError('assigning non-ViewList to ViewList slice')
1133             self.data[i.start:i.stop] = item.data
1134             self.items[i.start:i.stop] = item.items
1135             assert len(self.data) == len(self.items), 'data mismatch'
1136             if self.parent:
1137                 self.parent[i.start + self.parent_offset
1138                             : i.stop + self.parent_offset] = item
1139         else:
1140             self.data[i] = item
1141             if self.parent:
1142                 self.parent[i + self.parent_offset] = item
1143
1144     def __delitem__(self, i):
1145         try:
1146             del self.data[i]
1147             del self.items[i]
1148             if self.parent:
1149                 del self.parent[i + self.parent_offset]
1150         except TypeError:
1151             assert i.step is None, 'cannot handle slice with stride'
1152             del self.data[i.start:i.stop]
1153             del self.items[i.start:i.stop]
1154             if self.parent:
1155                 del self.parent[i.start + self.parent_offset
1156                                 : i.stop + self.parent_offset]
1157
1158     def __add__(self, other):
1159         if isinstance(other, ViewList):
1160             return self.__class__(self.data + other.data,
1161                                   items=(self.items + other.items))
1162         else:
1163             raise TypeError('adding non-ViewList to a ViewList')
1164
1165     def __radd__(self, other):
1166         if isinstance(other, ViewList):
1167             return self.__class__(other.data + self.data,
1168                                   items=(other.items + self.items))
1169         else:
1170             raise TypeError('adding ViewList to a non-ViewList')
1171
1172     def __iadd__(self, other):
1173         if isinstance(other, ViewList):
1174             self.data += other.data
1175         else:
1176             raise TypeError('argument to += must be a ViewList')
1177         return self
1178
1179     def __mul__(self, n):
1180         return self.__class__(self.data * n, items=(self.items * n))
1181
1182     __rmul__ = __mul__
1183
1184     def __imul__(self, n):
1185         self.data *= n
1186         self.items *= n
1187         return self
1188
1189     def extend(self, other):
1190         if not isinstance(other, ViewList):
1191             raise TypeError('extending a ViewList with a non-ViewList')
1192         if self.parent:
1193             self.parent.insert(len(self.data) + self.parent_offset, other)
1194         self.data.extend(other.data)
1195         self.items.extend(other.items)
1196
1197     def append(self, item, source=None, offset=0):
1198         if source is None:
1199             self.extend(item)
1200         else:
1201             if self.parent:
1202                 self.parent.insert(len(self.data) + self.parent_offset, item,
1203                                    source, offset)
1204             self.data.append(item)
1205             self.items.append((source, offset))
1206
1207     def insert(self, i, item, source=None, offset=0):
1208         if source is None:
1209             if not isinstance(item, ViewList):
1210                 raise TypeError('inserting non-ViewList with no source given')
1211             self.data[i:i] = item.data
1212             self.items[i:i] = item.items
1213             if self.parent:
1214                 index = (len(self.data) + i) % len(self.data)
1215                 self.parent.insert(index + self.parent_offset, item)
1216         else:
1217             self.data.insert(i, item)
1218             self.items.insert(i, (source, offset))
1219             if self.parent:
1220                 index = (len(self.data) + i) % len(self.data)
1221                 self.parent.insert(index + self.parent_offset, item,
1222                                    source, offset)
1223
1224     def pop(self, i=-1):
1225         if self.parent:
1226             index = (len(self.data) + i) % len(self.data)
1227             self.parent.pop(index + self.parent_offset)
1228         self.items.pop(i)
1229         return self.data.pop(i)
1230
1231     def trim_start(self, n=1):
1232         """
1233         Remove items from the start of the list, without touching the parent.
1234         """
1235         if n > len(self.data):
1236             raise IndexError("Size of trim too large; can't trim %s items "
1237                              "from a list of size %s." % (n, len(self.data)))
1238         elif n < 0:
1239             raise IndexError('Trim size must be >= 0.')
1240         del self.data[:n]
1241         del self.items[:n]
1242         if self.parent:
1243             self.parent_offset += n
1244
1245     def trim_end(self, n=1):
1246         """
1247         Remove items from the end of the list, without touching the parent.
1248         """
1249         if n > len(self.data):
1250             raise IndexError("Size of trim too large; can't trim %s items "
1251                              "from a list of size %s." % (n, len(self.data)))
1252         elif n < 0:
1253             raise IndexError('Trim size must be >= 0.')
1254         del self.data[-n:]
1255         del self.items[-n:]
1256
1257     def remove(self, item):
1258         index = self.index(item)
1259         del self[index]
1260
1261     def count(self, item): return self.data.count(item)
1262     def index(self, item): return self.data.index(item)
1263
1264     def reverse(self):
1265         self.data.reverse()
1266         self.items.reverse()
1267         self.parent = None
1268
1269     def sort(self, *args):
1270         tmp = zip(self.data, self.items)
1271         tmp.sort(*args)
1272         self.data = [entry[0] for entry in tmp]
1273         self.items = [entry[1] for entry in tmp]
1274         self.parent = None
1275
1276     def info(self, i):
1277         """Return source & offset for index `i`."""
1278         try:
1279             return self.items[i]
1280         except IndexError:
1281             if i == len(self.data):     # Just past the end
1282                 return self.items[i - 1][0], None
1283             else:
1284                 raise
1285
1286     def source(self, i):
1287         """Return source for index `i`."""
1288         return self.info(i)[0]
1289
1290     def offset(self, i):
1291         """Return offset for index `i`."""
1292         return self.info(i)[1]
1293
1294     def disconnect(self):
1295         """Break link between this list and parent list."""
1296         self.parent = None
1297
1298
1299 class StringList(ViewList):
1300
1301     """A `ViewList` with string-specific methods."""
1302
1303     def trim_left(self, length, start=0, end=sys.maxint):
1304         """
1305         Trim `length` characters off the beginning of each item, in-place,
1306         from index `start` to `end`.  No whitespace-checking is done on the
1307         trimmed text.  Does not affect slice parent.
1308         """
1309         self.data[start:end] = [line[length:]
1310                                 for line in self.data[start:end]]
1311
1312     def get_text_block(self, start, flush_left=0):
1313         """
1314         Return a contiguous block of text.
1315
1316         If `flush_left` is true, raise `UnexpectedIndentationError` if an
1317         indented line is encountered before the text block ends (with a blank
1318         line).
1319         """
1320         end = start
1321         last = len(self.data)
1322         while end < last:
1323             line = self.data[end]
1324             if not line.strip():
1325                 break
1326             if flush_left and (line[0] == ' '):
1327                 source, offset = self.info(end)
1328                 raise UnexpectedIndentationError(self[start:end], source,
1329                                                  offset + 1)
1330             end += 1
1331         return self[start:end]
1332
1333     def get_indented(self, start=0, until_blank=0, strip_indent=1,
1334                      block_indent=None, first_indent=None):
1335         """
1336         Extract and return a StringList of indented lines of text.
1337
1338         Collect all lines with indentation, determine the minimum indentation,
1339         remove the minimum indentation from all indented lines (unless
1340         `strip_indent` is false), and return them. All lines up to but not
1341         including the first unindented line will be returned.
1342
1343         :Parameters:
1344           - `start`: The index of the first line to examine.
1345           - `until_blank`: Stop collecting at the first blank line if true.
1346           - `strip_indent`: Strip common leading indent if true (default).
1347           - `block_indent`: The indent of the entire block, if known.
1348           - `first_indent`: The indent of the first line, if known.
1349
1350         :Return:
1351           - a StringList of indented lines with mininum indent removed;
1352           - the amount of the indent;
1353           - a boolean: did the indented block finish with a blank line or EOF?
1354         """
1355         indent = block_indent           # start with None if unknown
1356         end = start
1357         if block_indent is not None and first_indent is None:
1358             first_indent = block_indent
1359         if first_indent is not None:
1360             end += 1
1361         last = len(self.data)
1362         while end < last:
1363             line = self.data[end]
1364             if line and (line[0] != ' '
1365                          or (block_indent is not None
1366                              and line[:block_indent].strip())):
1367                 # Line not indented or insufficiently indented.
1368                 # Block finished properly iff the last indented line blank:
1369                 blank_finish = ((end > start)
1370                                 and not self.data[end - 1].strip())
1371                 break
1372             stripped = line.lstrip()
1373             if not stripped:            # blank line
1374                 if until_blank:
1375                     blank_finish = 1
1376                     break
1377             elif block_indent is None:
1378                 line_indent = len(line) - len(stripped)
1379                 if indent is None:
1380                     indent = line_indent
1381                 else:
1382                     indent = min(indent, line_indent)
1383             end += 1
1384         else:
1385             blank_finish = 1            # block ends at end of lines
1386         block = self[start:end]
1387         if first_indent is not None and block:
1388             block.data[0] = block.data[0][first_indent:]
1389         if indent and strip_indent:
1390             block.trim_left(indent, start=(first_indent is not None))
1391         return block, indent or 0, blank_finish
1392
1393     def get_2D_block(self, top, left, bottom, right, strip_indent=1):
1394         block = self[top:bottom]
1395         indent = right
1396         for i in range(len(block.data)):
1397             block.data[i] = line = block.data[i][left:right].rstrip()
1398             if line:
1399                 indent = min(indent, len(line) - len(line.lstrip()))
1400         if strip_indent and 0 < indent < right:
1401             block.data = [line[indent:] for line in block.data]
1402         return block
1403
1404     def pad_double_width(self, pad_char):
1405         """
1406         Pad all double-width characters in self by appending `pad_char` to each.
1407         For East Asian language support.
1408         """
1409         if hasattr(unicodedata, 'east_asian_width'):
1410             east_asian_width = unicodedata.east_asian_width
1411         else:
1412             return                      # new in Python 2.4
1413         for i in range(len(self.data)):
1414             line = self.data[i]
1415             if isinstance(line, types.UnicodeType):
1416                 new = []
1417                 for char in line:
1418                     new.append(char)
1419                     if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1420                         new.append(pad_char)
1421                 self.data[i] = ''.join(new)
1422
1423     def replace(self, old, new):
1424         """Replace all occurrences of substring `old` with `new`."""
1425         for i in range(len(self.data)):
1426             self.data[i] = self.data[i].replace(old, new)
1427
1428
1429 class StateMachineError(Exception): pass
1430 class UnknownStateError(StateMachineError): pass
1431 class DuplicateStateError(StateMachineError): pass
1432 class UnknownTransitionError(StateMachineError): pass
1433 class DuplicateTransitionError(StateMachineError): pass
1434 class TransitionPatternNotFound(StateMachineError): pass
1435 class TransitionMethodNotFound(StateMachineError): pass
1436 class UnexpectedIndentationError(StateMachineError): pass
1437
1438
1439 class TransitionCorrection(Exception):
1440
1441     """
1442     Raise from within a transition method to switch to another transition.
1443
1444     Raise with one argument, the new transition name.
1445     """
1446
1447
1448 class StateCorrection(Exception):
1449
1450     """
1451     Raise from within a transition method to switch to another state.
1452
1453     Raise with one or two arguments: new state name, and an optional new
1454     transition name.
1455     """
1456
1457
1458 def string2lines(astring, tab_width=8, convert_whitespace=0,
1459                  whitespace=re.compile('[\v\f]')):
1460     """
1461     Return a list of one-line strings with tabs expanded, no newlines, and
1462     trailing whitespace stripped.
1463
1464     Each tab is expanded with between 1 and `tab_width` spaces, so that the
1465     next character's index becomes a multiple of `tab_width` (8 by default).
1466
1467     Parameters:
1468
1469     - `astring`: a multi-line string.
1470     - `tab_width`: the number of columns between tab stops.
1471     - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1472     """
1473     if convert_whitespace:
1474         astring = whitespace.sub(' ', astring)
1475     return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1476
1477 def _exception_data():
1478     """
1479     Return exception information:
1480
1481     - the exception's class name;
1482     - the exception object;
1483     - the name of the file containing the offending code;
1484     - the line number of the offending code;
1485     - the function name of the offending code.
1486     """
1487     type, value, traceback = sys.exc_info()
1488     while traceback.tb_next:
1489         traceback = traceback.tb_next
1490     code = traceback.tb_frame.f_code
1491     return (type.__name__, value, code.co_filename, traceback.tb_lineno,
1492             code.co_name)