docutils/statemachine.py

   1 # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 A finite state machine specialized for regular-expression-based text filters,
   7 this module defines the following classes:
   8
   9 - `StateMachine`, a state machine
  10 - `State`, a state superclass
  11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
  12 - `StateWS`, a state superclass for use with `StateMachineWS`
  13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
  14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
  15 - `ViewList`, extends standard Python lists.
  16 - `StringList`, string-specific ViewList.
  17
  18 Exception classes:
  19
  20 - `StateMachineError`
  21 - `UnknownStateError`
  22 - `DuplicateStateError`
  23 - `UnknownTransitionError`
  24 - `DuplicateTransitionError`
  25 - `TransitionPatternNotFound`
  26 - `TransitionMethodNotFound`
  27 - `UnexpectedIndentationError`
  28 - `TransitionCorrection`: Raised to switch to another transition.
  29 - `StateCorrection`: Raised to switch to another state & transition.
  30
  31 Functions:
  32
  33 - `string2lines()`: split a multi-line string into a list of one-line strings
  34
  35
  36 How To Use This Module
  37 ======================
  38 (See the individual classes, methods, and attributes for details.)
  39
  40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
  41    You will also need to ``import re``.
  42
  43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
  44    machine::
  45
  46        class MyState(statemachine.State):
  47
  48    Within the state's class definition:
  49
  50    a) Include a pattern for each transition, in `State.patterns`::
  51
  52           patterns = {'atransition': r'pattern', ...}
  53
  54    b) Include a list of initial transitions to be set up automatically, in
  55       `State.initial_transitions`::
  56
  57           initial_transitions = ['atransition', ...]
  58
  59    c) Define a method for each transition, with the same name as the
  60       transition pattern::
  61
  62           def atransition(self, match, context, next_state):
  63               # do something
  64               result = [...]  # a list
  65               return context, next_state, result
  66               # context, next_state may be altered
  67
  68       Transition methods may raise an `EOFError` to cut processing short.
  69
  70    d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
  71       transition methods, which handle the beginning- and end-of-file.
  72
  73    e) In order to handle nested processing, you may wish to override the
  74       attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
  75
  76       If you are using `StateWS` as a base class, in order to handle nested
  77       indented blocks, you may wish to:
  78
  79       - override the attributes `StateWS.indent_sm`,
  80         `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
  81         `StateWS.known_indent_sm_kwargs`;
  82       - override the `StateWS.blank()` method; and/or
  83       - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
  84         and/or `StateWS.firstknown_indent()` methods.
  85
  86 3. Create a state machine object::
  87
  88        sm = StateMachine(state_classes=[MyState, ...],
  89                          initial_state='MyState')
  90
  91 4. Obtain the input text, which needs to be converted into a tab-free list of
  92    one-line strings. For example, to read text from a file called
  93    'inputfile'::
  94
  95        input_string = open('inputfile').read()
  96        input_lines = statemachine.string2lines(input_string)
  97
  98 5. Run the state machine on the input text and collect the results, a list::
  99
 100        results = sm.run(input_lines)
 101
 102 6. Remove any lingering circular references::
 103
 104        sm.unlink()
 105 """
 106
 107 __docformat__ = 'restructuredtext'
 108
 109 import sys
 110 import re
 111 import types
 112 import unicodedata
 113
 114
 115 class StateMachine:
 116
 117     """
 118     A finite state machine for text filters using regular expressions.
 119
 120     The input is provided in the form of a list of one-line strings (no
 121     newlines). States are subclasses of the `State` class. Transitions consist
 122     of regular expression patterns and transition methods, and are defined in
 123     each state.
 124
 125     The state machine is started with the `run()` method, which returns the
 126     results of processing in a list.
 127     """
 128
 129     def __init__(self, state_classes, initial_state, debug=0):
 130         """
 131         Initialize a `StateMachine` object; add state objects.
 132
 133         Parameters:
 134
 135         - `state_classes`: a list of `State` (sub)classes.
 136         - `initial_state`: a string, the class name of the initial state.
 137         - `debug`: a boolean; produce verbose output if true (nonzero).
 138         """
 139
 140         self.input_lines = None
 141         """`StringList` of input lines (without newlines).
 142         Filled by `self.run()`."""
 143
 144         self.input_offset = 0
 145         """Offset of `self.input_lines` from the beginning of the file."""
 146
 147         self.line = None
 148         """Current input line."""
 149
 150         self.line_offset = -1
 151         """Current input line offset from beginning of `self.input_lines`."""
 152
 153         self.debug = debug
 154         """Debugging mode on/off."""
 155
 156         self.initial_state = initial_state
 157         """The name of the initial state (key to `self.states`)."""
 158
 159         self.current_state = initial_state
 160         """The name of the current state (key to `self.states`)."""
 161
 162         self.states = {}
 163         """Mapping of {state_name: State_object}."""
 164
 165         self.add_states(state_classes)
 166
 167         self.observers = []
 168         """List of bound methods or functions to call whenever the current
 169         line changes.  Observers are called with one argument, ``self``.
 170         Cleared at the end of `run()`."""
 171
 172     def unlink(self):
 173         """Remove circular references to objects no longer required."""
 174         for state in self.states.values():
 175             state.unlink()
 176         self.states = None
 177
 178     def run(self, input_lines, input_offset=0, context=None,
 179             input_source=None):
 180         """
 181         Run the state machine on `input_lines`. Return results (a list).
 182
 183         Reset `self.line_offset` and `self.current_state`. Run the
 184         beginning-of-file transition. Input one line at a time and check for a
 185         matching transition. If a match is found, call the transition method
 186         and possibly change the state. Store the context returned by the
 187         transition method to be passed on to the next transition matched.
 188         Accumulate the results returned by the transition methods in a list.
 189         Run the end-of-file transition. Finally, return the accumulated
 190         results.
 191
 192         Parameters:
 193
 194         - `input_lines`: a list of strings without newlines, or `StringList`.
 195         - `input_offset`: the line offset of `input_lines` from the beginning
 196           of the file.
 197         - `context`: application-specific storage.
 198         - `input_source`: name or path of source of `input_lines`.
 199         """
 200         self.runtime_init()
 201         if isinstance(input_lines, StringList):
 202             self.input_lines = input_lines
 203         else:
 204             self.input_lines = StringList(input_lines, source=input_source)
 205         self.input_offset = input_offset
 206         self.line_offset = -1
 207         self.current_state = self.initial_state
 208         if self.debug:
 209             print >>sys.stderr, (
 210                 '\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
 211                 % (self.line_offset, '\n| '.join(self.input_lines)))
 212         transitions = None
 213         results = []
 214         state = self.get_state()
 215         try:
 216             if self.debug:
 217                 print >>sys.stderr, ('\nStateMachine.run: bof transition')
 218             context, result = state.bof(context)
 219             results.extend(result)
 220             while 1:
 221                 try:
 222                     try:
 223                         self.next_line()
 224                         if self.debug:
 225                             source, offset = self.input_lines.info(
 226                                 self.line_offset)
 227                             print >>sys.stderr, (
 228                                 '\nStateMachine.run: line (source=%r, '
 229                                 'offset=%r):\n| %s'
 230                                 % (source, offset, self.line))
 231                         context, next_state, result = self.check_line(
 232                             context, state, transitions)
 233                     except EOFError:
 234                         if self.debug:
 235                             print >>sys.stderr, (
 236                                 '\nStateMachine.run: %s.eof transition'
 237                                 % state.__class__.__name__)
 238                         result = state.eof(context)
 239                         results.extend(result)
 240                         break
 241                     else:
 242                         results.extend(result)
 243                 except TransitionCorrection, exception:
 244                     self.previous_line() # back up for another try
 245                     transitions = (exception.args[0],)
 246                     if self.debug:
 247                         print >>sys.stderr, (
 248                               '\nStateMachine.run: TransitionCorrection to '
 249                               'state "%s", transition %s.'
 250                               % (state.__class__.__name__, transitions[0]))
 251                     continue
 252                 except StateCorrection, exception:
 253                     self.previous_line() # back up for another try
 254                     next_state = exception.args[0]
 255                     if len(exception.args) == 1:
 256                         transitions = None
 257                     else:
 258                         transitions = (exception.args[1],)
 259                     if self.debug:
 260                         print >>sys.stderr, (
 261                               '\nStateMachine.run: StateCorrection to state '
 262                               '"%s", transition %s.'
 263                               % (next_state, transitions[0]))
 264                 else:
 265                     transitions = None
 266                 state = self.get_state(next_state)
 267         except:
 268             if self.debug:
 269                 self.error()
 270             raise
 271         self.observers = []
 272         return results
 273
 274     def get_state(self, next_state=None):
 275         """
 276         Return current state object; set it first if `next_state` given.
 277
 278         Parameter `next_state`: a string, the name of the next state.
 279
 280         Exception: `UnknownStateError` raised if `next_state` unknown.
 281         """
 282         if next_state:
 283             if self.debug and next_state != self.current_state:
 284                 print >>sys.stderr, \
 285                       ('\nStateMachine.get_state: Changing state from '
 286                        '"%s" to "%s" (input line %s).'
 287                        % (self.current_state, next_state,
 288                           self.abs_line_number()))
 289             self.current_state = next_state
 290         try:
 291             return self.states[self.current_state]
 292         except KeyError:
 293             raise UnknownStateError(self.current_state)
 294
 295     def next_line(self, n=1):
 296         """Load `self.line` with the `n`'th next line and return it."""
 297         try:
 298             try:
 299                 self.line_offset += n
 300                 self.line = self.input_lines[self.line_offset]
 301             except IndexError:
 302                 self.line = None
 303                 raise EOFError
 304             return self.line
 305         finally:
 306             self.notify_observers()
 307
 308     def is_next_line_blank(self):
 309         """Return 1 if the next line is blank or non-existant."""
 310         try:
 311             return not self.input_lines[self.line_offset + 1].strip()
 312         except IndexError:
 313             return 1
 314
 315     def at_eof(self):
 316         """Return 1 if the input is at or past end-of-file."""
 317         return self.line_offset >= len(self.input_lines) - 1
 318
 319     def at_bof(self):
 320         """Return 1 if the input is at or before beginning-of-file."""
 321         return self.line_offset <= 0
 322
 323     def previous_line(self, n=1):
 324         """Load `self.line` with the `n`'th previous line and return it."""
 325         self.line_offset -= n
 326         if self.line_offset < 0:
 327             self.line = None
 328         else:
 329             self.line = self.input_lines[self.line_offset]
 330         self.notify_observers()
 331         return self.line
 332
 333     def goto_line(self, line_offset):
 334         """Jump to absolute line offset `line_offset`, load and return it."""
 335         try:
 336             try:
 337                 self.line_offset = line_offset - self.input_offset
 338                 self.line = self.input_lines[self.line_offset]
 339             except IndexError:
 340                 self.line = None
 341                 raise EOFError
 342             return self.line
 343         finally:
 344             self.notify_observers()
 345
 346     def get_source(self, line_offset):
 347         """Return source of line at absolute line offset `line_offset`."""
 348         return self.input_lines.source(line_offset - self.input_offset)
 349
 350     def abs_line_offset(self):
 351         """Return line offset of current line, from beginning of file."""
 352         return self.line_offset + self.input_offset
 353
 354     def abs_line_number(self):
 355         """Return line number of current line (counting from 1)."""
 356         return self.line_offset + self.input_offset + 1
 357
 358     def insert_input(self, input_lines, source):
 359         self.input_lines.insert(self.line_offset + 1, '',
 360                                 source='internal padding')
 361         self.input_lines.insert(self.line_offset + 1, '',
 362                                 source='internal padding')
 363         self.input_lines.insert(self.line_offset + 2,
 364                                 StringList(input_lines, source))
 365
 366     def get_text_block(self, flush_left=0):
 367         """
 368         Return a contiguous block of text.
 369
 370         If `flush_left` is true, raise `UnexpectedIndentationError` if an
 371         indented line is encountered before the text block ends (with a blank
 372         line).
 373         """
 374         try:
 375             block = self.input_lines.get_text_block(self.line_offset,
 376                                                     flush_left)
 377             self.next_line(len(block) - 1)
 378             return block
 379         except UnexpectedIndentationError, error:
 380             block, source, lineno = error.args
 381             self.next_line(len(block) - 1) # advance to last line of block
 382             raise
 383
 384     def check_line(self, context, state, transitions=None):
 385         """
 386         Examine one line of input for a transition match & execute its method.
 387
 388         Parameters:
 389
 390         - `context`: application-dependent storage.
 391         - `state`: a `State` object, the current state.
 392         - `transitions`: an optional ordered list of transition names to try,
 393           instead of ``state.transition_order``.
 394
 395         Return the values returned by the transition method:
 396
 397         - context: possibly modified from the parameter `context`;
 398         - next state name (`State` subclass name);
 399         - the result output of the transition, a list.
 400
 401         When there is no match, ``state.no_match()`` is called and its return
 402         value is returned.
 403         """
 404         if transitions is None:
 405             transitions =  state.transition_order
 406         state_correction = None
 407         if self.debug:
 408             print >>sys.stderr, (
 409                   '\nStateMachine.check_line: state="%s", transitions=%r.'
 410                   % (state.__class__.__name__, transitions))
 411         for name in transitions:
 412             pattern, method, next_state = state.transitions[name]
 413             match = self.match(pattern)
 414             if match:
 415                 if self.debug:
 416                     print >>sys.stderr, (
 417                           '\nStateMachine.check_line: Matched transition '
 418                           '"%s" in state "%s".'
 419                           % (name, state.__class__.__name__))
 420                 return method(match, context, next_state)
 421         else:
 422             if self.debug:
 423                 print >>sys.stderr, (
 424                       '\nStateMachine.check_line: No match in state "%s".'
 425                       % state.__class__.__name__)
 426             return state.no_match(context, transitions)
 427
 428     def match(self, pattern):
 429         """
 430         Return the result of a regular expression match.
 431
 432         Parameter `pattern`: an `re` compiled regular expression.
 433         """
 434         return pattern.match(self.line)
 435
 436     def add_state(self, state_class):
 437         """
 438         Initialize & add a `state_class` (`State` subclass) object.
 439
 440         Exception: `DuplicateStateError` raised if `state_class` was already
 441         added.
 442         """
 443         statename = state_class.__name__
 444         if statename in self.states:
 445             raise DuplicateStateError(statename)
 446         self.states[statename] = state_class(self, self.debug)
 447
 448     def add_states(self, state_classes):
 449         """
 450         Add `state_classes` (a list of `State` subclasses).
 451         """
 452         for state_class in state_classes:
 453             self.add_state(state_class)
 454
 455     def runtime_init(self):
 456         """
 457         Initialize `self.states`.
 458         """
 459         for state in self.states.values():
 460             state.runtime_init()
 461
 462     def error(self):
 463         """Report error details."""
 464         type, value, module, line, function = _exception_data()
 465         print >>sys.stderr, '%s: %s' % (type, value)
 466         print >>sys.stderr, 'input line %s' % (self.abs_line_number())
 467         print >>sys.stderr, ('module %s, line %s, function %s'
 468                              % (module, line, function))
 469
 470     def attach_observer(self, observer):
 471         """
 472         The `observer` parameter is a function or bound method which takes two
 473         arguments, the source and offset of the current line.
 474         """
 475         self.observers.append(observer)
 476
 477     def detach_observer(self, observer):
 478         self.observers.remove(observer)
 479
 480     def notify_observers(self):
 481         for observer in self.observers:
 482             try:
 483                 info = self.input_lines.info(self.line_offset)
 484             except IndexError:
 485                 info = (None, None)
 486             observer(*info)
 487
 488
 489 class State:
 490
 491     """
 492     State superclass. Contains a list of transitions, and transition methods.
 493
 494     Transition methods all have the same signature. They take 3 parameters:
 495
 496     - An `re` match object. ``match.string`` contains the matched input line,
 497       ``match.start()`` gives the start index of the match, and
 498       ``match.end()`` gives the end index.
 499     - A context object, whose meaning is application-defined (initial value
 500       ``None``). It can be used to store any information required by the state
 501       machine, and the retured context is passed on to the next transition
 502       method unchanged.
 503     - The name of the next state, a string, taken from the transitions list;
 504       normally it is returned unchanged, but it may be altered by the
 505       transition method if necessary.
 506
 507     Transition methods all return a 3-tuple:
 508
 509     - A context object, as (potentially) modified by the transition method.
 510     - The next state name (a return value of ``None`` means no state change).
 511     - The processing result, a list, which is accumulated by the state
 512       machine.
 513
 514     Transition methods may raise an `EOFError` to cut processing short.
 515
 516     There are two implicit transitions, and corresponding transition methods
 517     are defined: `bof()` handles the beginning-of-file, and `eof()` handles
 518     the end-of-file. These methods have non-standard signatures and return
 519     values. `bof()` returns the initial context and results, and may be used
 520     to return a header string, or do any other processing needed. `eof()`
 521     should handle any remaining context and wrap things up; it returns the
 522     final processing result.
 523
 524     Typical applications need only subclass `State` (or a subclass), set the
 525     `patterns` and `initial_transitions` class attributes, and provide
 526     corresponding transition methods. The default object initialization will
 527     take care of constructing the list of transitions.
 528     """
 529
 530     patterns = None
 531     """
 532     {Name: pattern} mapping, used by `make_transition()`. Each pattern may
 533     be a string or a compiled `re` pattern. Override in subclasses.
 534     """
 535
 536     initial_transitions = None
 537     """
 538     A list of transitions to initialize when a `State` is instantiated.
 539     Each entry is either a transition name string, or a (transition name, next
 540     state name) pair. See `make_transitions()`. Override in subclasses.
 541     """
 542
 543     nested_sm = None
 544     """
 545     The `StateMachine` class for handling nested processing.
 546
 547     If left as ``None``, `nested_sm` defaults to the class of the state's
 548     controlling state machine. Override it in subclasses to avoid the default.
 549     """
 550
 551     nested_sm_kwargs = None
 552     """
 553     Keyword arguments dictionary, passed to the `nested_sm` constructor.
 554
 555     Two keys must have entries in the dictionary:
 556
 557     - Key 'state_classes' must be set to a list of `State` classes.
 558     - Key 'initial_state' must be set to the name of the initial state class.
 559
 560     If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
 561     class of the current state, and 'initial_state' defaults to the name of
 562     the class of the current state. Override in subclasses to avoid the
 563     defaults.
 564     """
 565
 566     def __init__(self, state_machine, debug=0):
 567         """
 568         Initialize a `State` object; make & add initial transitions.
 569
 570         Parameters:
 571
 572         - `statemachine`: the controlling `StateMachine` object.
 573         - `debug`: a boolean; produce verbose output if true (nonzero).
 574         """
 575
 576         self.transition_order = []
 577         """A list of transition names in search order."""
 578
 579         self.transitions = {}
 580         """
 581         A mapping of transition names to 3-tuples containing
 582         (compiled_pattern, transition_method, next_state_name). Initialized as
 583         an instance attribute dynamically (instead of as a class attribute)
 584         because it may make forward references to patterns and methods in this
 585         or other classes.
 586         """
 587
 588         self.add_initial_transitions()
 589
 590         self.state_machine = state_machine
 591         """A reference to the controlling `StateMachine` object."""
 592
 593         self.debug = debug
 594         """Debugging mode on/off."""
 595
 596         if self.nested_sm is None:
 597             self.nested_sm = self.state_machine.__class__
 598         if self.nested_sm_kwargs is None:
 599             self.nested_sm_kwargs = {'state_classes': [self.__class__],
 600                                      'initial_state': self.__class__.__name__}
 601
 602     def runtime_init(self):
 603         """
 604         Initialize this `State` before running the state machine; called from
 605         `self.state_machine.run()`.
 606         """
 607         pass
 608
 609     def unlink(self):
 610         """Remove circular references to objects no longer required."""
 611         self.state_machine = None
 612
 613     def add_initial_transitions(self):
 614         """Make and add transitions listed in `self.initial_transitions`."""
 615         if self.initial_transitions:
 616             names, transitions = self.make_transitions(
 617                   self.initial_transitions)
 618             self.add_transitions(names, transitions)
 619
 620     def add_transitions(self, names, transitions):
 621         """
 622         Add a list of transitions to the start of the transition list.
 623
 624         Parameters:
 625
 626         - `names`: a list of transition names.
 627         - `transitions`: a mapping of names to transition tuples.
 628
 629         Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
 630         """
 631         for name in names:
 632             if name in self.transitions:
 633                 raise DuplicateTransitionError(name)
 634             if name not in transitions:
 635                 raise UnknownTransitionError(name)
 636         self.transition_order[:0] = names
 637         self.transitions.update(transitions)
 638
 639     def add_transition(self, name, transition):
 640         """
 641         Add a transition to the start of the transition list.
 642
 643         Parameter `transition`: a ready-made transition 3-tuple.
 644
 645         Exception: `DuplicateTransitionError`.
 646         """
 647         if name in self.transitions:
 648             raise DuplicateTransitionError(name)
 649         self.transition_order[:0] = [name]
 650         self.transitions[name] = transition
 651
 652     def remove_transition(self, name):
 653         """
 654         Remove a transition by `name`.
 655
 656         Exception: `UnknownTransitionError`.
 657         """
 658         try:
 659             del self.transitions[name]
 660             self.transition_order.remove(name)
 661         except:
 662             raise UnknownTransitionError(name)
 663
 664     def make_transition(self, name, next_state=None):
 665         """
 666         Make & return a transition tuple based on `name`.
 667
 668         This is a convenience function to simplify transition creation.
 669
 670         Parameters:
 671
 672         - `name`: a string, the name of the transition pattern & method. This
 673           `State` object must have a method called '`name`', and a dictionary
 674           `self.patterns` containing a key '`name`'.
 675         - `next_state`: a string, the name of the next `State` object for this
 676           transition. A value of ``None`` (or absent) implies no state change
 677           (i.e., continue with the same state).
 678
 679         Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
 680         """
 681         if next_state is None:
 682             next_state = self.__class__.__name__
 683         try:
 684             pattern = self.patterns[name]
 685             if not hasattr(pattern, 'match'):
 686                 pattern = re.compile(pattern)
 687         except KeyError:
 688             raise TransitionPatternNotFound(
 689                   '%s.patterns[%r]' % (self.__class__.__name__, name))
 690         try:
 691             method = getattr(self, name)
 692         except AttributeError:
 693             raise TransitionMethodNotFound(
 694                   '%s.%s' % (self.__class__.__name__, name))
 695         return (pattern, method, next_state)
 696
 697     def make_transitions(self, name_list):
 698         """
 699         Return a list of transition names and a transition mapping.
 700
 701         Parameter `name_list`: a list, where each entry is either a transition
 702         name string, or a 1- or 2-tuple (transition name, optional next state
 703         name).
 704         """
 705         stringtype = type('')
 706         names = []
 707         transitions = {}
 708         for namestate in name_list:
 709             if type(namestate) is stringtype:
 710                 transitions[namestate] = self.make_transition(namestate)
 711                 names.append(namestate)
 712             else:
 713                 transitions[namestate[0]] = self.make_transition(*namestate)
 714                 names.append(namestate[0])
 715         return names, transitions
 716
 717     def no_match(self, context, transitions):
 718         """
 719         Called when there is no match from `StateMachine.check_line()`.
 720
 721         Return the same values returned by transition methods:
 722
 723         - context: unchanged;
 724         - next state name: ``None``;
 725         - empty result list.
 726
 727         Override in subclasses to catch this event.
 728         """
 729         return context, None, []
 730
 731     def bof(self, context):
 732         """
 733         Handle beginning-of-file. Return unchanged `context`, empty result.
 734
 735         Override in subclasses.
 736
 737         Parameter `context`: application-defined storage.
 738         """
 739         return context, []
 740
 741     def eof(self, context):
 742         """
 743         Handle end-of-file. Return empty result.
 744
 745         Override in subclasses.
 746
 747         Parameter `context`: application-defined storage.
 748         """
 749         return []
 750
 751     def nop(self, match, context, next_state):
 752         """
 753         A "do nothing" transition method.
 754
 755         Return unchanged `context` & `next_state`, empty result. Useful for
 756         simple state changes (actionless transitions).
 757         """
 758         return context, next_state, []
 759
 760
 761 class StateMachineWS(StateMachine):
 762
 763     """
 764     `StateMachine` subclass specialized for whitespace recognition.
 765
 766     There are three methods provided for extracting indented text blocks:
 767
 768     - `get_indented()`: use when the indent is unknown.
 769     - `get_known_indented()`: use when the indent is known for all lines.
 770     - `get_first_known_indented()`: use when only the first line's indent is
 771       known.
 772     """
 773
 774     def get_indented(self, until_blank=0, strip_indent=1):
 775         """
 776         Return a block of indented lines of text, and info.
 777
 778         Extract an indented block where the indent is unknown for all lines.
 779
 780         :Parameters:
 781             - `until_blank`: Stop collecting at the first blank line if true
 782               (1).
 783             - `strip_indent`: Strip common leading indent if true (1,
 784               default).
 785
 786         :Return:
 787             - the indented block (a list of lines of text),
 788             - its indent,
 789             - its first line offset from BOF, and
 790             - whether or not it finished with a blank line.
 791         """
 792         offset = self.abs_line_offset()
 793         indented, indent, blank_finish = self.input_lines.get_indented(
 794               self.line_offset, until_blank, strip_indent)
 795         if indented:
 796             self.next_line(len(indented) - 1) # advance to last indented line
 797         while indented and not indented[0].strip():
 798             indented.trim_start()
 799             offset += 1
 800         return indented, indent, offset, blank_finish
 801
 802     def get_known_indented(self, indent, until_blank=0, strip_indent=1):
 803         """
 804         Return an indented block and info.
 805
 806         Extract an indented block where the indent is known for all lines.
 807         Starting with the current line, extract the entire text block with at
 808         least `indent` indentation (which must be whitespace, except for the
 809         first line).
 810
 811         :Parameters:
 812             - `indent`: The number of indent columns/characters.
 813             - `until_blank`: Stop collecting at the first blank line if true
 814               (1).
 815             - `strip_indent`: Strip `indent` characters of indentation if true
 816               (1, default).
 817
 818         :Return:
 819             - the indented block,
 820             - its first line offset from BOF, and
 821             - whether or not it finished with a blank line.
 822         """
 823         offset = self.abs_line_offset()
 824         indented, indent, blank_finish = self.input_lines.get_indented(
 825               self.line_offset, until_blank, strip_indent,
 826               block_indent=indent)
 827         self.next_line(len(indented) - 1) # advance to last indented line
 828         while indented and not indented[0].strip():
 829             indented.trim_start()
 830             offset += 1
 831         return indented, offset, blank_finish
 832
 833     def get_first_known_indented(self, indent, until_blank=0, strip_indent=1,
 834                                  strip_top=1):
 835         """
 836         Return an indented block and info.
 837
 838         Extract an indented block where the indent is known for the first line
 839         and unknown for all other lines.
 840
 841         :Parameters:
 842             - `indent`: The first line's indent (# of columns/characters).
 843             - `until_blank`: Stop collecting at the first blank line if true
 844               (1).
 845             - `strip_indent`: Strip `indent` characters of indentation if true
 846               (1, default).
 847             - `strip_top`: Strip blank lines from the beginning of the block.
 848
 849         :Return:
 850             - the indented block,
 851             - its indent,
 852             - its first line offset from BOF, and
 853             - whether or not it finished with a blank line.
 854         """
 855         offset = self.abs_line_offset()
 856         indented, indent, blank_finish = self.input_lines.get_indented(
 857               self.line_offset, until_blank, strip_indent,
 858               first_indent=indent)
 859         self.next_line(len(indented) - 1) # advance to last indented line
 860         if strip_top:
 861             while indented and not indented[0].strip():
 862                 indented.trim_start()
 863                 offset += 1
 864         return indented, indent, offset, blank_finish
 865
 866
 867 class StateWS(State):
 868
 869     """
 870     State superclass specialized for whitespace (blank lines & indents).
 871
 872     Use this class with `StateMachineWS`.  The transitions 'blank' (for blank
 873     lines) and 'indent' (for indented text blocks) are added automatically,
 874     before any other transitions.  The transition method `blank()` handles
 875     blank lines and `indent()` handles nested indented blocks.  Indented
 876     blocks trigger a new state machine to be created by `indent()` and run.
 877     The class of the state machine to be created is in `indent_sm`, and the
 878     constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
 879
 880     The methods `known_indent()` and `firstknown_indent()` are provided for
 881     indented blocks where the indent (all lines' and first line's only,
 882     respectively) is known to the transition method, along with the attributes
 883     `known_indent_sm` and `known_indent_sm_kwargs`.  Neither transition method
 884     is triggered automatically.
 885     """
 886
 887     indent_sm = None
 888     """
 889     The `StateMachine` class handling indented text blocks.
 890
 891     If left as ``None``, `indent_sm` defaults to the value of
 892     `State.nested_sm`.  Override it in subclasses to avoid the default.
 893     """
 894
 895     indent_sm_kwargs = None
 896     """
 897     Keyword arguments dictionary, passed to the `indent_sm` constructor.
 898
 899     If left as ``None``, `indent_sm_kwargs` defaults to the value of
 900     `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
 901     """
 902
 903     known_indent_sm = None
 904     """
 905     The `StateMachine` class handling known-indented text blocks.
 906
 907     If left as ``None``, `known_indent_sm` defaults to the value of
 908     `indent_sm`.  Override it in subclasses to avoid the default.
 909     """
 910
 911     known_indent_sm_kwargs = None
 912     """
 913     Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
 914
 915     If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
 916     `indent_sm_kwargs`. Override it in subclasses to avoid the default.
 917     """
 918
 919     ws_patterns = {'blank': ' *$',
 920                    'indent': ' +'}
 921     """Patterns for default whitespace transitions.  May be overridden in
 922     subclasses."""
 923
 924     ws_initial_transitions = ('blank', 'indent')
 925     """Default initial whitespace transitions, added before those listed in
 926     `State.initial_transitions`.  May be overridden in subclasses."""
 927
 928     def __init__(self, state_machine, debug=0):
 929         """
 930         Initialize a `StateSM` object; extends `State.__init__()`.
 931
 932         Check for indent state machine attributes, set defaults if not set.
 933         """
 934         State.__init__(self, state_machine, debug)
 935         if self.indent_sm is None:
 936             self.indent_sm = self.nested_sm
 937         if self.indent_sm_kwargs is None:
 938             self.indent_sm_kwargs = self.nested_sm_kwargs
 939         if self.known_indent_sm is None:
 940             self.known_indent_sm = self.indent_sm
 941         if self.known_indent_sm_kwargs is None:
 942             self.known_indent_sm_kwargs = self.indent_sm_kwargs
 943
 944     def add_initial_transitions(self):
 945         """
 946         Add whitespace-specific transitions before those defined in subclass.
 947
 948         Extends `State.add_initial_transitions()`.
 949         """
 950         State.add_initial_transitions(self)
 951         if self.patterns is None:
 952             self.patterns = {}
 953         self.patterns.update(self.ws_patterns)
 954         names, transitions = self.make_transitions(
 955             self.ws_initial_transitions)
 956         self.add_transitions(names, transitions)
 957
 958     def blank(self, match, context, next_state):
 959         """Handle blank lines. Does nothing. Override in subclasses."""
 960         return self.nop(match, context, next_state)
 961
 962     def indent(self, match, context, next_state):
 963         """
 964         Handle an indented text block. Extend or override in subclasses.
 965
 966         Recursively run the registered state machine for indented blocks
 967         (`self.indent_sm`).
 968         """
 969         indented, indent, line_offset, blank_finish = \
 970               self.state_machine.get_indented()
 971         sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
 972         results = sm.run(indented, input_offset=line_offset)
 973         return context, next_state, results
 974
 975     def known_indent(self, match, context, next_state):
 976         """
 977         Handle a known-indent text block. Extend or override in subclasses.
 978
 979         Recursively run the registered state machine for known-indent indented
 980         blocks (`self.known_indent_sm`). The indent is the length of the
 981         match, ``match.end()``.
 982         """
 983         indented, line_offset, blank_finish = \
 984               self.state_machine.get_known_indented(match.end())
 985         sm = self.known_indent_sm(debug=self.debug,
 986                                  **self.known_indent_sm_kwargs)
 987         results = sm.run(indented, input_offset=line_offset)
 988         return context, next_state, results
 989
 990     def first_known_indent(self, match, context, next_state):
 991         """
 992         Handle an indented text block (first line's indent known).
 993
 994         Extend or override in subclasses.
 995
 996         Recursively run the registered state machine for known-indent indented
 997         blocks (`self.known_indent_sm`). The indent is the length of the
 998         match, ``match.end()``.
 999         """
1000         indented, line_offset, blank_finish = \
1001               self.state_machine.get_first_known_indented(match.end())
1002         sm = self.known_indent_sm(debug=self.debug,
1003                                  **self.known_indent_sm_kwargs)
1004         results = sm.run(indented, input_offset=line_offset)
1005         return context, next_state, results
1006
1007
1008 class _SearchOverride:
1009
1010     """
1011     Mix-in class to override `StateMachine` regular expression behavior.
1012
1013     Changes regular expression matching, from the default `re.match()`
1014     (succeeds only if the pattern matches at the start of `self.line`) to
1015     `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1016     When subclassing a `StateMachine`, list this class **first** in the
1017     inheritance list of the class definition.
1018     """
1019
1020     def match(self, pattern):
1021         """
1022         Return the result of a regular expression search.
1023
1024         Overrides `StateMachine.match()`.
1025
1026         Parameter `pattern`: `re` compiled regular expression.
1027         """
1028         return pattern.search(self.line)
1029
1030
1031 class SearchStateMachine(_SearchOverride, StateMachine):
1032     """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1033     pass
1034
1035
1036 class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1037     """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1038     pass
1039
1040
1041 class ViewList:
1042
1043     """
1044     List with extended functionality: slices of ViewList objects are child
1045     lists, linked to their parents. Changes made to a child list also affect
1046     the parent list.  A child list is effectively a "view" (in the SQL sense)
1047     of the parent list.  Changes to parent lists, however, do *not* affect
1048     active child lists.  If a parent list is changed, any active child lists
1049     should be recreated.
1050
1051     The start and end of the slice can be trimmed using the `trim_start()` and
1052     `trim_end()` methods, without affecting the parent list.  The link between
1053     child and parent lists can be broken by calling `disconnect()` on the
1054     child list.
1055
1056     Also, ViewList objects keep track of the source & offset of each item.
1057     This information is accessible via the `source()`, `offset()`, and
1058     `info()` methods.
1059     """
1060
1061     def __init__(self, initlist=None, source=None, items=None,
1062                  parent=None, parent_offset=None):
1063         self.data = []
1064         """The actual list of data, flattened from various sources."""
1065
1066         self.items = []
1067         """A list of (source, offset) pairs, same length as `self.data`: the
1068         source of each line and the offset of each line from the beginning of
1069         its source."""
1070
1071         self.parent = parent
1072         """The parent list."""
1073
1074         self.parent_offset = parent_offset
1075         """Offset of this list from the beginning of the parent list."""
1076
1077         if isinstance(initlist, ViewList):
1078             self.data = initlist.data[:]
1079             self.items = initlist.items[:]
1080         elif initlist is not None:
1081             self.data = list(initlist)
1082             if items:
1083                 self.items = items
1084             else:
1085                 self.items = [(source, i) for i in range(len(initlist))]
1086         assert len(self.data) == len(self.items), 'data mismatch'
1087
1088     def __str__(self):
1089         return str(self.data)
1090
1091     def __repr__(self):
1092         return '%s(%s, items=%s)' % (self.__class__.__name__,
1093                                      self.data, self.items)
1094
1095     def __lt__(self, other): return self.data <  self.__cast(other)
1096     def __le__(self, other): return self.data <= self.__cast(other)
1097     def __eq__(self, other): return self.data == self.__cast(other)
1098     def __ne__(self, other): return self.data != self.__cast(other)
1099     def __gt__(self, other): return self.data >  self.__cast(other)
1100     def __ge__(self, other): return self.data >= self.__cast(other)
1101     def __cmp__(self, other): return cmp(self.data, self.__cast(other))
1102
1103     def __cast(self, other):
1104         if isinstance(other, ViewList):
1105             return other.data
1106         else:
1107             return other
1108
1109     def __contains__(self, item): return item in self.data
1110     def __len__(self): return len(self.data)
1111
1112     # The __getitem__()/__setitem__() methods check whether the index
1113     # is a slice first, since native list objects start supporting
1114     # them directly in Python 2.3 (no exception is raised when
1115     # indexing a list with a slice object; they just work).
1116
1117     def __getitem__(self, i):
1118         if isinstance(i, types.SliceType):
1119             assert i.step in (None, 1),  'cannot handle slice with stride'
1120             return self.__class__(self.data[i.start:i.stop],
1121                                   items=self.items[i.start:i.stop],
1122                                   parent=self, parent_offset=i.start)
1123         else:
1124             return self.data[i]
1125
1126     def __setitem__(self, i, item):
1127         if isinstance(i, types.SliceType):
1128             assert i.step in (None, 1), 'cannot handle slice with stride'
1129             if not isinstance(item, ViewList):
1130                 raise TypeError('assigning non-ViewList to ViewList slice')
1131             self.data[i.start:i.stop] = item.data
1132             self.items[i.start:i.stop] = item.items
1133             assert len(self.data) == len(self.items), 'data mismatch'
1134             if self.parent:
1135                 self.parent[i.start + self.parent_offset
1136                             : i.stop + self.parent_offset] = item
1137         else:
1138             self.data[i] = item
1139             if self.parent:
1140                 self.parent[i + self.parent_offset] = item
1141
1142     def __delitem__(self, i):
1143         try:
1144             del self.data[i]
1145             del self.items[i]
1146             if self.parent:
1147                 del self.parent[i + self.parent_offset]
1148         except TypeError:
1149             assert i.step is None, 'cannot handle slice with stride'
1150             del self.data[i.start:i.stop]
1151             del self.items[i.start:i.stop]
1152             if self.parent:
1153                 del self.parent[i.start + self.parent_offset
1154                                 : i.stop + self.parent_offset]
1155
1156     def __add__(self, other):
1157         if isinstance(other, ViewList):
1158             return self.__class__(self.data + other.data,
1159                                   items=(self.items + other.items))
1160         else:
1161             raise TypeError('adding non-ViewList to a ViewList')
1162
1163     def __radd__(self, other):
1164         if isinstance(other, ViewList):
1165             return self.__class__(other.data + self.data,
1166                                   items=(other.items + self.items))
1167         else:
1168             raise TypeError('adding ViewList to a non-ViewList')
1169
1170     def __iadd__(self, other):
1171         if isinstance(other, ViewList):
1172             self.data += other.data
1173         else:
1174             raise TypeError('argument to += must be a ViewList')
1175         return self
1176
1177     def __mul__(self, n):
1178         return self.__class__(self.data * n, items=(self.items * n))
1179
1180     __rmul__ = __mul__
1181
1182     def __imul__(self, n):
1183         self.data *= n
1184         self.items *= n
1185         return self
1186
1187     def extend(self, other):
1188         if not isinstance(other, ViewList):
1189             raise TypeError('extending a ViewList with a non-ViewList')
1190         if self.parent:
1191             self.parent.insert(len(self.data) + self.parent_offset, other)
1192         self.data.extend(other.data)
1193         self.items.extend(other.items)
1194
1195     def append(self, item, source=None, offset=0):
1196         if source is None:
1197             self.extend(item)
1198         else:
1199             if self.parent:
1200                 self.parent.insert(len(self.data) + self.parent_offset, item,
1201                                    source, offset)
1202             self.data.append(item)
1203             self.items.append((source, offset))
1204
1205     def insert(self, i, item, source=None, offset=0):
1206         if source is None:
1207             if not isinstance(item, ViewList):
1208                 raise TypeError('inserting non-ViewList with no source given')
1209             self.data[i:i] = item.data
1210             self.items[i:i] = item.items
1211             if self.parent:
1212                 index = (len(self.data) + i) % len(self.data)
1213                 self.parent.insert(index + self.parent_offset, item)
1214         else:
1215             self.data.insert(i, item)
1216             self.items.insert(i, (source, offset))
1217             if self.parent:
1218                 index = (len(self.data) + i) % len(self.data)
1219                 self.parent.insert(index + self.parent_offset, item,
1220                                    source, offset)
1221
1222     def pop(self, i=-1):
1223         if self.parent:
1224             index = (len(self.data) + i) % len(self.data)
1225             self.parent.pop(index + self.parent_offset)
1226         self.items.pop(i)
1227         return self.data.pop(i)
1228
1229     def trim_start(self, n=1):
1230         """
1231         Remove items from the start of the list, without touching the parent.
1232         """
1233         if n > len(self.data):
1234             raise IndexError("Size of trim too large; can't trim %s items "
1235                              "from a list of size %s." % (n, len(self.data)))
1236         elif n < 0:
1237             raise IndexError('Trim size must be >= 0.')
1238         del self.data[:n]
1239         del self.items[:n]
1240         if self.parent:
1241             self.parent_offset += n
1242
1243     def trim_end(self, n=1):
1244         """
1245         Remove items from the end of the list, without touching the parent.
1246         """
1247         if n > len(self.data):
1248             raise IndexError("Size of trim too large; can't trim %s items "
1249                              "from a list of size %s." % (n, len(self.data)))
1250         elif n < 0:
1251             raise IndexError('Trim size must be >= 0.')
1252         del self.data[-n:]
1253         del self.items[-n:]
1254
1255     def remove(self, item):
1256         index = self.index(item)
1257         del self[index]
1258
1259     def count(self, item): return self.data.count(item)
1260     def index(self, item): return self.data.index(item)
1261
1262     def reverse(self):
1263         self.data.reverse()
1264         self.items.reverse()
1265         self.parent = None
1266
1267     def sort(self, *args):
1268         tmp = zip(self.data, self.items)
1269         tmp.sort(*args)
1270         self.data = [entry[0] for entry in tmp]
1271         self.items = [entry[1] for entry in tmp]
1272         self.parent = None
1273
1274     def info(self, i):
1275         """Return source & offset for index `i`."""
1276         try:
1277             return self.items[i]
1278         except IndexError:
1279             if i == len(self.data):     # Just past the end
1280                 return self.items[i - 1][0], None
1281             else:
1282                 raise
1283
1284     def source(self, i):
1285         """Return source for index `i`."""
1286         return self.info(i)[0]
1287
1288     def offset(self, i):
1289         """Return offset for index `i`."""
1290         return self.info(i)[1]
1291
1292     def disconnect(self):
1293         """Break link between this list and parent list."""
1294         self.parent = None
1295
1296
1297 class StringList(ViewList):
1298
1299     """A `ViewList` with string-specific methods."""
1300
1301     def trim_left(self, length, start=0, end=sys.maxint):
1302         """
1303         Trim `length` characters off the beginning of each item, in-place,
1304         from index `start` to `end`.  No whitespace-checking is done on the
1305         trimmed text.  Does not affect slice parent.
1306         """
1307         self.data[start:end] = [line[length:]
1308                                 for line in self.data[start:end]]
1309
1310     def get_text_block(self, start, flush_left=0):
1311         """
1312         Return a contiguous block of text.
1313
1314         If `flush_left` is true, raise `UnexpectedIndentationError` if an
1315         indented line is encountered before the text block ends (with a blank
1316         line).
1317         """
1318         end = start
1319         last = len(self.data)
1320         while end < last:
1321             line = self.data[end]
1322             if not line.strip():
1323                 break
1324             if flush_left and (line[0] == ' '):
1325                 source, offset = self.info(end)
1326                 raise UnexpectedIndentationError(self[start:end], source,
1327                                                  offset + 1)
1328             end += 1
1329         return self[start:end]
1330
1331     def get_indented(self, start=0, until_blank=0, strip_indent=1,
1332                      block_indent=None, first_indent=None):
1333         """
1334         Extract and return a StringList of indented lines of text.
1335
1336         Collect all lines with indentation, determine the minimum indentation,
1337         remove the minimum indentation from all indented lines (unless
1338         `strip_indent` is false), and return them. All lines up to but not
1339         including the first unindented line will be returned.
1340
1341         :Parameters:
1342           - `start`: The index of the first line to examine.
1343           - `until_blank`: Stop collecting at the first blank line if true.
1344           - `strip_indent`: Strip common leading indent if true (default).
1345           - `block_indent`: The indent of the entire block, if known.
1346           - `first_indent`: The indent of the first line, if known.
1347
1348         :Return:
1349           - a StringList of indented lines with mininum indent removed;
1350           - the amount of the indent;
1351           - a boolean: did the indented block finish with a blank line or EOF?
1352         """
1353         indent = block_indent           # start with None if unknown
1354         end = start
1355         if block_indent is not None and first_indent is None:
1356             first_indent = block_indent
1357         if first_indent is not None:
1358             end += 1
1359         last = len(self.data)
1360         while end < last:
1361             line = self.data[end]
1362             if line and (line[0] != ' '
1363                          or (block_indent is not None
1364                              and line[:block_indent].strip())):
1365                 # Line not indented or insufficiently indented.
1366                 # Block finished properly iff the last indented line blank:
1367                 blank_finish = ((end > start)
1368                                 and not self.data[end - 1].strip())
1369                 break
1370             stripped = line.lstrip()
1371             if not stripped:            # blank line
1372                 if until_blank:
1373                     blank_finish = 1
1374                     break
1375             elif block_indent is None:
1376                 line_indent = len(line) - len(stripped)
1377                 if indent is None:
1378                     indent = line_indent
1379                 else:
1380                     indent = min(indent, line_indent)
1381             end += 1
1382         else:
1383             blank_finish = 1            # block ends at end of lines
1384         block = self[start:end]
1385         if first_indent is not None and block:
1386             block.data[0] = block.data[0][first_indent:]
1387         if indent and strip_indent:
1388             block.trim_left(indent, start=(first_indent is not None))
1389         return block, indent or 0, blank_finish
1390
1391     def get_2D_block(self, top, left, bottom, right, strip_indent=1):
1392         block = self[top:bottom]
1393         indent = right
1394         for i in range(len(block.data)):
1395             block.data[i] = line = block.data[i][left:right].rstrip()
1396             if line:
1397                 indent = min(indent, len(line) - len(line.lstrip()))
1398         if strip_indent and 0 < indent < right:
1399             block.data = [line[indent:] for line in block.data]
1400         return block
1401
1402     def pad_double_width(self, pad_char):
1403         """
1404         Pad all double-width characters in self by appending `pad_char` to each.
1405         For East Asian language support.
1406         """
1407         if hasattr(unicodedata, 'east_asian_width'):
1408             east_asian_width = unicodedata.east_asian_width
1409         else:
1410             return                      # new in Python 2.4
1411         for i in range(len(self.data)):
1412             line = self.data[i]
1413             if isinstance(line, unicode):
1414                 new = []
1415                 for char in line:
1416                     new.append(char)
1417                     if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1418                         new.append(pad_char)
1419                 self.data[i] = ''.join(new)
1420
1421     def replace(self, old, new):
1422         """Replace all occurrences of substring `old` with `new`."""
1423         for i in range(len(self.data)):
1424             self.data[i] = self.data[i].replace(old, new)
1425
1426
1427 class StateMachineError(Exception): pass
1428 class UnknownStateError(StateMachineError): pass
1429 class DuplicateStateError(StateMachineError): pass
1430 class UnknownTransitionError(StateMachineError): pass
1431 class DuplicateTransitionError(StateMachineError): pass
1432 class TransitionPatternNotFound(StateMachineError): pass
1433 class TransitionMethodNotFound(StateMachineError): pass
1434 class UnexpectedIndentationError(StateMachineError): pass
1435
1436
1437 class TransitionCorrection(Exception):
1438
1439     """
1440     Raise from within a transition method to switch to another transition.
1441
1442     Raise with one argument, the new transition name.
1443     """
1444
1445
1446 class StateCorrection(Exception):
1447
1448     """
1449     Raise from within a transition method to switch to another state.
1450
1451     Raise with one or two arguments: new state name, and an optional new
1452     transition name.
1453     """
1454
1455
1456 def string2lines(astring, tab_width=8, convert_whitespace=0,
1457                  whitespace=re.compile('[\v\f]')):
1458     """
1459     Return a list of one-line strings with tabs expanded, no newlines, and
1460     trailing whitespace stripped.
1461
1462     Each tab is expanded with between 1 and `tab_width` spaces, so that the
1463     next character's index becomes a multiple of `tab_width` (8 by default).
1464
1465     Parameters:
1466
1467     - `astring`: a multi-line string.
1468     - `tab_width`: the number of columns between tab stops.
1469     - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1470     """
1471     if convert_whitespace:
1472         astring = whitespace.sub(' ', astring)
1473     return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1474
1475 def _exception_data():
1476     """
1477     Return exception information:
1478
1479     - the exception's class name;
1480     - the exception object;
1481     - the name of the file containing the offending code;
1482     - the line number of the offending code;
1483     - the function name of the offending code.
1484     """
1485     type, value, traceback = sys.exc_info()
1486     while traceback.tb_next:
1487         traceback = traceback.tb_next
1488     code = traceback.tb_frame.f_code
1489     return (type.__name__, value, code.co_filename, traceback.tb_lineno,
1490             code.co_name)