docutils/statemachine.py

   1 # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 A finite state machine specialized for regular-expression-based text filters,
   7 this module defines the following classes:
   8
   9 - `StateMachine`, a state machine
  10 - `State`, a state superclass
  11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
  12 - `StateWS`, a state superclass for use with `StateMachineWS`
  13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
  14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
  15 - `ViewList`, extends standard Python lists.
  16 - `StringList`, string-specific ViewList.
  17
  18 Exception classes:
  19
  20 - `StateMachineError`
  21 - `UnknownStateError`
  22 - `DuplicateStateError`
  23 - `UnknownTransitionError`
  24 - `DuplicateTransitionError`
  25 - `TransitionPatternNotFound`
  26 - `TransitionMethodNotFound`
  27 - `UnexpectedIndentationError`
  28 - `TransitionCorrection`: Raised to switch to another transition.
  29 - `StateCorrection`: Raised to switch to another state & transition.
  30
  31 Functions:
  32
  33 - `string2lines()`: split a multi-line string into a list of one-line strings
  34
  35
  36 How To Use This Module
  37 ======================
  38 (See the individual classes, methods, and attributes for details.)
  39
  40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
  41    You will also need to ``import re``.
  42
  43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
  44    machine::
  45
  46        class MyState(statemachine.State):
  47
  48    Within the state's class definition:
  49
  50    a) Include a pattern for each transition, in `State.patterns`::
  51
  52           patterns = {'atransition': r'pattern', ...}
  53
  54    b) Include a list of initial transitions to be set up automatically, in
  55       `State.initial_transitions`::
  56
  57           initial_transitions = ['atransition', ...]
  58
  59    c) Define a method for each transition, with the same name as the
  60       transition pattern::
  61
  62           def atransition(self, match, context, next_state):
  63               # do something
  64               result = [...]  # a list
  65               return context, next_state, result
  66               # context, next_state may be altered
  67
  68       Transition methods may raise an `EOFError` to cut processing short.
  69
  70    d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
  71       transition methods, which handle the beginning- and end-of-file.
  72
  73    e) In order to handle nested processing, you may wish to override the
  74       attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
  75
  76       If you are using `StateWS` as a base class, in order to handle nested
  77       indented blocks, you may wish to:
  78
  79       - override the attributes `StateWS.indent_sm`,
  80         `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
  81         `StateWS.known_indent_sm_kwargs`;
  82       - override the `StateWS.blank()` method; and/or
  83       - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
  84         and/or `StateWS.firstknown_indent()` methods.
  85
  86 3. Create a state machine object::
  87
  88        sm = StateMachine(state_classes=[MyState, ...],
  89                          initial_state='MyState')
  90
  91 4. Obtain the input text, which needs to be converted into a tab-free list of
  92    one-line strings. For example, to read text from a file called
  93    'inputfile'::
  94
  95        input_string = open('inputfile').read()
  96        input_lines = statemachine.string2lines(input_string)
  97
  98 5. Run the state machine on the input text and collect the results, a list::
  99
 100        results = sm.run(input_lines)
 101
 102 6. Remove any lingering circular references::
 103
 104        sm.unlink()
 105 """
 106
 107 __docformat__ = 'restructuredtext'
 108
 109 import sys
 110 import re
 111 import types
 112 import unicodedata
 113
 114
 115 class StateMachine:
 116
 117     """
 118     A finite state machine for text filters using regular expressions.
 119
 120     The input is provided in the form of a list of one-line strings (no
 121     newlines). States are subclasses of the `State` class. Transitions consist
 122     of regular expression patterns and transition methods, and are defined in
 123     each state.
 124
 125     The state machine is started with the `run()` method, which returns the
 126     results of processing in a list.
 127     """
 128
 129     def __init__(self, state_classes, initial_state, debug=0):
 130         """
 131         Initialize a `StateMachine` object; add state objects.
 132
 133         Parameters:
 134
 135         - `state_classes`: a list of `State` (sub)classes.
 136         - `initial_state`: a string, the class name of the initial state.
 137         - `debug`: a boolean; produce verbose output if true (nonzero).
 138         """
 139
 140         self.input_lines = None
 141         """`StringList` of input lines (without newlines).
 142         Filled by `self.run()`."""
 143
 144         self.input_offset = 0
 145         """Offset of `self.input_lines` from the beginning of the file."""
 146
 147         self.line = None
 148         """Current input line."""
 149
 150         self.line_offset = -1
 151         """Current input line offset from beginning of `self.input_lines`."""
 152
 153         self.debug = debug
 154         """Debugging mode on/off."""
 155
 156         self.initial_state = initial_state
 157         """The name of the initial state (key to `self.states`)."""
 158
 159         self.current_state = initial_state
 160         """The name of the current state (key to `self.states`)."""
 161
 162         self.states = {}
 163         """Mapping of {state_name: State_object}."""
 164
 165         self.add_states(state_classes)
 166
 167         self.observers = []
 168         """List of bound methods or functions to call whenever the current
 169         line changes.  Observers are called with one argument, ``self``.
 170         Cleared at the end of `run()`."""
 171
 172     def unlink(self):
 173         """Remove circular references to objects no longer required."""
 174         for state in self.states.values():
 175             state.unlink()
 176         self.states = None
 177
 178     def run(self, input_lines, input_offset=0, context=None,
 179             input_source=None, initial_state=None):
 180         """
 181         Run the state machine on `input_lines`. Return results (a list).
 182
 183         Reset `self.line_offset` and `self.current_state`. Run the
 184         beginning-of-file transition. Input one line at a time and check for a
 185         matching transition. If a match is found, call the transition method
 186         and possibly change the state. Store the context returned by the
 187         transition method to be passed on to the next transition matched.
 188         Accumulate the results returned by the transition methods in a list.
 189         Run the end-of-file transition. Finally, return the accumulated
 190         results.
 191
 192         Parameters:
 193
 194         - `input_lines`: a list of strings without newlines, or `StringList`.
 195         - `input_offset`: the line offset of `input_lines` from the beginning
 196           of the file.
 197         - `context`: application-specific storage.
 198         - `input_source`: name or path of source of `input_lines`.
 199         - `initial_state`: name of initial state.
 200         """
 201         self.runtime_init()
 202         if isinstance(input_lines, StringList):
 203             self.input_lines = input_lines
 204         else:
 205             self.input_lines = StringList(input_lines, source=input_source)
 206         self.input_offset = input_offset
 207         self.line_offset = -1
 208         self.current_state = initial_state or self.initial_state
 209         if self.debug:
 210             print >>sys.stderr, (
 211                 '\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
 212                 % (self.line_offset, '\n| '.join(self.input_lines)))
 213         transitions = None
 214         results = []
 215         state = self.get_state()
 216         try:
 217             if self.debug:
 218                 print >>sys.stderr, ('\nStateMachine.run: bof transition')
 219             context, result = state.bof(context)
 220             results.extend(result)
 221             while 1:
 222                 try:
 223                     try:
 224                         self.next_line()
 225                         if self.debug:
 226                             source, offset = self.input_lines.info(
 227                                 self.line_offset)
 228                             print >>sys.stderr, (
 229                                 '\nStateMachine.run: line (source=%r, '
 230                                 'offset=%r):\n| %s'
 231                                 % (source, offset, self.line))
 232                         context, next_state, result = self.check_line(
 233                             context, state, transitions)
 234                     except EOFError:
 235                         if self.debug:
 236                             print >>sys.stderr, (
 237                                 '\nStateMachine.run: %s.eof transition'
 238                                 % state.__class__.__name__)
 239                         result = state.eof(context)
 240                         results.extend(result)
 241                         break
 242                     else:
 243                         results.extend(result)
 244                 except TransitionCorrection, exception:
 245                     self.previous_line() # back up for another try
 246                     transitions = (exception.args[0],)
 247                     if self.debug:
 248                         print >>sys.stderr, (
 249                               '\nStateMachine.run: TransitionCorrection to '
 250                               'state "%s", transition %s.'
 251                               % (state.__class__.__name__, transitions[0]))
 252                     continue
 253                 except StateCorrection, exception:
 254                     self.previous_line() # back up for another try
 255                     next_state = exception.args[0]
 256                     if len(exception.args) == 1:
 257                         transitions = None
 258                     else:
 259                         transitions = (exception.args[1],)
 260                     if self.debug:
 261                         print >>sys.stderr, (
 262                               '\nStateMachine.run: StateCorrection to state '
 263                               '"%s", transition %s.'
 264                               % (next_state, transitions[0]))
 265                 else:
 266                     transitions = None
 267                 state = self.get_state(next_state)
 268         except:
 269             if self.debug:
 270                 self.error()
 271             raise
 272         self.observers = []
 273         return results
 274
 275     def get_state(self, next_state=None):
 276         """
 277         Return current state object; set it first if `next_state` given.
 278
 279         Parameter `next_state`: a string, the name of the next state.
 280
 281         Exception: `UnknownStateError` raised if `next_state` unknown.
 282         """
 283         if next_state:
 284             if self.debug and next_state != self.current_state:
 285                 print >>sys.stderr, \
 286                       ('\nStateMachine.get_state: Changing state from '
 287                        '"%s" to "%s" (input line %s).'
 288                        % (self.current_state, next_state,
 289                           self.abs_line_number()))
 290             self.current_state = next_state
 291         try:
 292             return self.states[self.current_state]
 293         except KeyError:
 294             raise UnknownStateError(self.current_state)
 295
 296     def next_line(self, n=1):
 297         """Load `self.line` with the `n`'th next line and return it."""
 298         try:
 299             try:
 300                 self.line_offset += n
 301                 self.line = self.input_lines[self.line_offset]
 302             except IndexError:
 303                 self.line = None
 304                 raise EOFError
 305             return self.line
 306         finally:
 307             self.notify_observers()
 308
 309     def is_next_line_blank(self):
 310         """Return 1 if the next line is blank or non-existant."""
 311         try:
 312             return not self.input_lines[self.line_offset + 1].strip()
 313         except IndexError:
 314             return 1
 315
 316     def at_eof(self):
 317         """Return 1 if the input is at or past end-of-file."""
 318         return self.line_offset >= len(self.input_lines) - 1
 319
 320     def at_bof(self):
 321         """Return 1 if the input is at or before beginning-of-file."""
 322         return self.line_offset <= 0
 323
 324     def previous_line(self, n=1):
 325         """Load `self.line` with the `n`'th previous line and return it."""
 326         self.line_offset -= n
 327         if self.line_offset < 0:
 328             self.line = None
 329         else:
 330             self.line = self.input_lines[self.line_offset]
 331         self.notify_observers()
 332         return self.line
 333
 334     def goto_line(self, line_offset):
 335         """Jump to absolute line offset `line_offset`, load and return it."""
 336         try:
 337             try:
 338                 self.line_offset = line_offset - self.input_offset
 339                 self.line = self.input_lines[self.line_offset]
 340             except IndexError:
 341                 self.line = None
 342                 raise EOFError
 343             return self.line
 344         finally:
 345             self.notify_observers()
 346
 347     def get_source(self, line_offset):
 348         """Return source of line at absolute line offset `line_offset`."""
 349         return self.input_lines.source(line_offset - self.input_offset)
 350
 351     def get_source_spot(self, line_offset=None):
 352         """Return dict with source position of current or given line"""
 353         if line_offset is None:
 354             line_offset = self.line_offset
 355         else:
 356             line_offset -= self.input_offset
 357         (source, offset) = self.input_lines.info(line_offset)
 358         return {'source': source, 'line': offset + 1}
 359
 360     def abs_line_offset(self):
 361         """Return line offset of current line, from beginning of file."""
 362         return self.line_offset + self.input_offset
 363
 364     def abs_line_number(self):
 365         """Return line number of current line (counting from 1)."""
 366         return self.line_offset + self.input_offset + 1
 367
 368     def insert_input(self, input_lines, source):
 369         self.input_lines.insert(self.line_offset + 1, '',
 370                                 source='internal padding after ' + source)
 371         self.input_lines.insert(self.line_offset + 1, '',
 372                                 source='internal padding before '+ source)
 373         self.input_lines.insert(self.line_offset + 2,
 374                                 StringList(input_lines, source))
 375
 376     def get_text_block(self, flush_left=0):
 377         """
 378         Return a contiguous block of text.
 379
 380         If `flush_left` is true, raise `UnexpectedIndentationError` if an
 381         indented line is encountered before the text block ends (with a blank
 382         line).
 383         """
 384         try:
 385             block = self.input_lines.get_text_block(self.line_offset,
 386                                                     flush_left)
 387             self.next_line(len(block) - 1)
 388             return block
 389         except UnexpectedIndentationError, error:
 390             block, source, lineno = error.args
 391             self.next_line(len(block) - 1) # advance to last line of block
 392             raise
 393
 394     def check_line(self, context, state, transitions=None):
 395         """
 396         Examine one line of input for a transition match & execute its method.
 397
 398         Parameters:
 399
 400         - `context`: application-dependent storage.
 401         - `state`: a `State` object, the current state.
 402         - `transitions`: an optional ordered list of transition names to try,
 403           instead of ``state.transition_order``.
 404
 405         Return the values returned by the transition method:
 406
 407         - context: possibly modified from the parameter `context`;
 408         - next state name (`State` subclass name);
 409         - the result output of the transition, a list.
 410
 411         When there is no match, ``state.no_match()`` is called and its return
 412         value is returned.
 413         """
 414         if transitions is None:
 415             transitions =  state.transition_order
 416         state_correction = None
 417         if self.debug:
 418             print >>sys.stderr, (
 419                   '\nStateMachine.check_line: state="%s", transitions=%r.'
 420                   % (state.__class__.__name__, transitions))
 421         for name in transitions:
 422             pattern, method, next_state = state.transitions[name]
 423             match = pattern.match(self.line)
 424             if match:
 425                 if self.debug:
 426                     print >>sys.stderr, (
 427                           '\nStateMachine.check_line: Matched transition '
 428                           '"%s" in state "%s".'
 429                           % (name, state.__class__.__name__))
 430                 return method(match, context, next_state)
 431         else:
 432             if self.debug:
 433                 print >>sys.stderr, (
 434                       '\nStateMachine.check_line: No match in state "%s".'
 435                       % state.__class__.__name__)
 436             return state.no_match(context, transitions)
 437
 438     def add_state(self, state_class):
 439         """
 440         Initialize & add a `state_class` (`State` subclass) object.
 441
 442         Exception: `DuplicateStateError` raised if `state_class` was already
 443         added.
 444         """
 445         statename = state_class.__name__
 446         if statename in self.states:
 447             raise DuplicateStateError(statename)
 448         self.states[statename] = state_class(self, self.debug)
 449
 450     def add_states(self, state_classes):
 451         """
 452         Add `state_classes` (a list of `State` subclasses).
 453         """
 454         for state_class in state_classes:
 455             self.add_state(state_class)
 456
 457     def runtime_init(self):
 458         """
 459         Initialize `self.states`.
 460         """
 461         for state in self.states.values():
 462             state.runtime_init()
 463
 464     def error(self):
 465         """Report error details."""
 466         type, value, module, line, function = _exception_data()
 467         print >>sys.stderr, '%s: %s' % (type, value)
 468         print >>sys.stderr, 'input line %s' % (self.abs_line_number())
 469         print >>sys.stderr, ('module %s, line %s, function %s'
 470                              % (module, line, function))
 471
 472     def attach_observer(self, observer):
 473         """
 474         The `observer` parameter is a function or bound method which takes two
 475         arguments, the source and offset of the current line.
 476         """
 477         self.observers.append(observer)
 478
 479     def detach_observer(self, observer):
 480         self.observers.remove(observer)
 481
 482     def notify_observers(self):
 483         for observer in self.observers:
 484             try:
 485                 info = self.input_lines.info(self.line_offset)
 486             except IndexError:
 487                 info = (None, None)
 488             observer(*info)
 489
 490
 491 class State:
 492
 493     """
 494     State superclass. Contains a list of transitions, and transition methods.
 495
 496     Transition methods all have the same signature. They take 3 parameters:
 497
 498     - An `re` match object. ``match.string`` contains the matched input line,
 499       ``match.start()`` gives the start index of the match, and
 500       ``match.end()`` gives the end index.
 501     - A context object, whose meaning is application-defined (initial value
 502       ``None``). It can be used to store any information required by the state
 503       machine, and the retured context is passed on to the next transition
 504       method unchanged.
 505     - The name of the next state, a string, taken from the transitions list;
 506       normally it is returned unchanged, but it may be altered by the
 507       transition method if necessary.
 508
 509     Transition methods all return a 3-tuple:
 510
 511     - A context object, as (potentially) modified by the transition method.
 512     - The next state name (a return value of ``None`` means no state change).
 513     - The processing result, a list, which is accumulated by the state
 514       machine.
 515
 516     Transition methods may raise an `EOFError` to cut processing short.
 517
 518     There are two implicit transitions, and corresponding transition methods
 519     are defined: `bof()` handles the beginning-of-file, and `eof()` handles
 520     the end-of-file. These methods have non-standard signatures and return
 521     values. `bof()` returns the initial context and results, and may be used
 522     to return a header string, or do any other processing needed. `eof()`
 523     should handle any remaining context and wrap things up; it returns the
 524     final processing result.
 525
 526     Typical applications need only subclass `State` (or a subclass), set the
 527     `patterns` and `initial_transitions` class attributes, and provide
 528     corresponding transition methods. The default object initialization will
 529     take care of constructing the list of transitions.
 530     """
 531
 532     patterns = None
 533     """
 534     {Name: pattern} mapping, used by `make_transition()`. Each pattern may
 535     be a string or a compiled `re` pattern. Override in subclasses.
 536     """
 537
 538     initial_transitions = None
 539     """
 540     A list of transitions to initialize when a `State` is instantiated.
 541     Each entry is either a transition name string, or a (transition name, next
 542     state name) pair. See `make_transitions()`. Override in subclasses.
 543     """
 544
 545     nested_sm = None
 546     """
 547     The `StateMachine` class for handling nested processing.
 548
 549     If left as ``None``, `nested_sm` defaults to the class of the state's
 550     controlling state machine. Override it in subclasses to avoid the default.
 551     """
 552
 553     nested_sm_kwargs = None
 554     """
 555     Keyword arguments dictionary, passed to the `nested_sm` constructor.
 556
 557     Two keys must have entries in the dictionary:
 558
 559     - Key 'state_classes' must be set to a list of `State` classes.
 560     - Key 'initial_state' must be set to the name of the initial state class.
 561
 562     If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
 563     class of the current state, and 'initial_state' defaults to the name of
 564     the class of the current state. Override in subclasses to avoid the
 565     defaults.
 566     """
 567
 568     def __init__(self, state_machine, debug=0):
 569         """
 570         Initialize a `State` object; make & add initial transitions.
 571
 572         Parameters:
 573
 574         - `statemachine`: the controlling `StateMachine` object.
 575         - `debug`: a boolean; produce verbose output if true (nonzero).
 576         """
 577
 578         self.transition_order = []
 579         """A list of transition names in search order."""
 580
 581         self.transitions = {}
 582         """
 583         A mapping of transition names to 3-tuples containing
 584         (compiled_pattern, transition_method, next_state_name). Initialized as
 585         an instance attribute dynamically (instead of as a class attribute)
 586         because it may make forward references to patterns and methods in this
 587         or other classes.
 588         """
 589
 590         self.add_initial_transitions()
 591
 592         self.state_machine = state_machine
 593         """A reference to the controlling `StateMachine` object."""
 594
 595         self.debug = debug
 596         """Debugging mode on/off."""
 597
 598         if self.nested_sm is None:
 599             self.nested_sm = self.state_machine.__class__
 600         if self.nested_sm_kwargs is None:
 601             self.nested_sm_kwargs = {'state_classes': [self.__class__],
 602                                      'initial_state': self.__class__.__name__}
 603
 604     def runtime_init(self):
 605         """
 606         Initialize this `State` before running the state machine; called from
 607         `self.state_machine.run()`.
 608         """
 609         pass
 610
 611     def unlink(self):
 612         """Remove circular references to objects no longer required."""
 613         self.state_machine = None
 614
 615     def add_initial_transitions(self):
 616         """Make and add transitions listed in `self.initial_transitions`."""
 617         if self.initial_transitions:
 618             names, transitions = self.make_transitions(
 619                   self.initial_transitions)
 620             self.add_transitions(names, transitions)
 621
 622     def add_transitions(self, names, transitions):
 623         """
 624         Add a list of transitions to the start of the transition list.
 625
 626         Parameters:
 627
 628         - `names`: a list of transition names.
 629         - `transitions`: a mapping of names to transition tuples.
 630
 631         Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
 632         """
 633         for name in names:
 634             if name in self.transitions:
 635                 raise DuplicateTransitionError(name)
 636             if name not in transitions:
 637                 raise UnknownTransitionError(name)
 638         self.transition_order[:0] = names
 639         self.transitions.update(transitions)
 640
 641     def add_transition(self, name, transition):
 642         """
 643         Add a transition to the start of the transition list.
 644
 645         Parameter `transition`: a ready-made transition 3-tuple.
 646
 647         Exception: `DuplicateTransitionError`.
 648         """
 649         if name in self.transitions:
 650             raise DuplicateTransitionError(name)
 651         self.transition_order[:0] = [name]
 652         self.transitions[name] = transition
 653
 654     def remove_transition(self, name):
 655         """
 656         Remove a transition by `name`.
 657
 658         Exception: `UnknownTransitionError`.
 659         """
 660         try:
 661             del self.transitions[name]
 662             self.transition_order.remove(name)
 663         except:
 664             raise UnknownTransitionError(name)
 665
 666     def make_transition(self, name, next_state=None):
 667         """
 668         Make & return a transition tuple based on `name`.
 669
 670         This is a convenience function to simplify transition creation.
 671
 672         Parameters:
 673
 674         - `name`: a string, the name of the transition pattern & method. This
 675           `State` object must have a method called '`name`', and a dictionary
 676           `self.patterns` containing a key '`name`'.
 677         - `next_state`: a string, the name of the next `State` object for this
 678           transition. A value of ``None`` (or absent) implies no state change
 679           (i.e., continue with the same state).
 680
 681         Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
 682         """
 683         if next_state is None:
 684             next_state = self.__class__.__name__
 685         try:
 686             pattern = self.patterns[name]
 687             if not hasattr(pattern, 'match'):
 688                 pattern = re.compile(pattern)
 689         except KeyError:
 690             raise TransitionPatternNotFound(
 691                   '%s.patterns[%r]' % (self.__class__.__name__, name))
 692         try:
 693             method = getattr(self, name)
 694         except AttributeError:
 695             raise TransitionMethodNotFound(
 696                   '%s.%s' % (self.__class__.__name__, name))
 697         return (pattern, method, next_state)
 698
 699     def make_transitions(self, name_list):
 700         """
 701         Return a list of transition names and a transition mapping.
 702
 703         Parameter `name_list`: a list, where each entry is either a transition
 704         name string, or a 1- or 2-tuple (transition name, optional next state
 705         name).
 706         """
 707         stringtype = type('')
 708         names = []
 709         transitions = {}
 710         for namestate in name_list:
 711             if type(namestate) is stringtype:
 712                 transitions[namestate] = self.make_transition(namestate)
 713                 names.append(namestate)
 714             else:
 715                 transitions[namestate[0]] = self.make_transition(*namestate)
 716                 names.append(namestate[0])
 717         return names, transitions
 718
 719     def no_match(self, context, transitions):
 720         """
 721         Called when there is no match from `StateMachine.check_line()`.
 722
 723         Return the same values returned by transition methods:
 724
 725         - context: unchanged;
 726         - next state name: ``None``;
 727         - empty result list.
 728
 729         Override in subclasses to catch this event.
 730         """
 731         return context, None, []
 732
 733     def bof(self, context):
 734         """
 735         Handle beginning-of-file. Return unchanged `context`, empty result.
 736
 737         Override in subclasses.
 738
 739         Parameter `context`: application-defined storage.
 740         """
 741         return context, []
 742
 743     def eof(self, context):
 744         """
 745         Handle end-of-file. Return empty result.
 746
 747         Override in subclasses.
 748
 749         Parameter `context`: application-defined storage.
 750         """
 751         return []
 752
 753     def nop(self, match, context, next_state):
 754         """
 755         A "do nothing" transition method.
 756
 757         Return unchanged `context` & `next_state`, empty result. Useful for
 758         simple state changes (actionless transitions).
 759         """
 760         return context, next_state, []
 761
 762
 763 class StateMachineWS(StateMachine):
 764
 765     """
 766     `StateMachine` subclass specialized for whitespace recognition.
 767
 768     There are three methods provided for extracting indented text blocks:
 769
 770     - `get_indented()`: use when the indent is unknown.
 771     - `get_known_indented()`: use when the indent is known for all lines.
 772     - `get_first_known_indented()`: use when only the first line's indent is
 773       known.
 774     """
 775
 776     def get_indented(self, until_blank=0, strip_indent=1):
 777         """
 778         Return a block of indented lines of text, and info.
 779
 780         Extract an indented block where the indent is unknown for all lines.
 781
 782         :Parameters:
 783             - `until_blank`: Stop collecting at the first blank line if true
 784               (1).
 785             - `strip_indent`: Strip common leading indent if true (1,
 786               default).
 787
 788         :Return:
 789             - the indented block (a list of lines of text),
 790             - its indent,
 791             - its first line offset from BOF, and
 792             - whether or not it finished with a blank line.
 793         """
 794         offset = self.abs_line_offset()
 795         indented, indent, blank_finish = self.input_lines.get_indented(
 796               self.line_offset, until_blank, strip_indent)
 797         if indented:
 798             self.next_line(len(indented) - 1) # advance to last indented line
 799         while indented and not indented[0].strip():
 800             indented.trim_start()
 801             offset += 1
 802         return indented, indent, offset, blank_finish
 803
 804     def get_known_indented(self, indent, until_blank=0, strip_indent=1):
 805         """
 806         Return an indented block and info.
 807
 808         Extract an indented block where the indent is known for all lines.
 809         Starting with the current line, extract the entire text block with at
 810         least `indent` indentation (which must be whitespace, except for the
 811         first line).
 812
 813         :Parameters:
 814             - `indent`: The number of indent columns/characters.
 815             - `until_blank`: Stop collecting at the first blank line if true
 816               (1).
 817             - `strip_indent`: Strip `indent` characters of indentation if true
 818               (1, default).
 819
 820         :Return:
 821             - the indented block,
 822             - its first line offset from BOF, and
 823             - whether or not it finished with a blank line.
 824         """
 825         offset = self.abs_line_offset()
 826         indented, indent, blank_finish = self.input_lines.get_indented(
 827               self.line_offset, until_blank, strip_indent,
 828               block_indent=indent)
 829         self.next_line(len(indented) - 1) # advance to last indented line
 830         while indented and not indented[0].strip():
 831             indented.trim_start()
 832             offset += 1
 833         return indented, offset, blank_finish
 834
 835     def get_first_known_indented(self, indent, until_blank=0, strip_indent=1,
 836                                  strip_top=1):
 837         """
 838         Return an indented block and info.
 839
 840         Extract an indented block where the indent is known for the first line
 841         and unknown for all other lines.
 842
 843         :Parameters:
 844             - `indent`: The first line's indent (# of columns/characters).
 845             - `until_blank`: Stop collecting at the first blank line if true
 846               (1).
 847             - `strip_indent`: Strip `indent` characters of indentation if true
 848               (1, default).
 849             - `strip_top`: Strip blank lines from the beginning of the block.
 850
 851         :Return:
 852             - the indented block,
 853             - its indent,
 854             - its first line offset from BOF, and
 855             - whether or not it finished with a blank line.
 856         """
 857         offset = self.abs_line_offset()
 858         indented, indent, blank_finish = self.input_lines.get_indented(
 859               self.line_offset, until_blank, strip_indent,
 860               first_indent=indent)
 861         self.next_line(len(indented) - 1) # advance to last indented line
 862         if strip_top:
 863             while indented and not indented[0].strip():
 864                 indented.trim_start()
 865                 offset += 1
 866         return indented, indent, offset, blank_finish
 867
 868
 869 class StateWS(State):
 870
 871     """
 872     State superclass specialized for whitespace (blank lines & indents).
 873
 874     Use this class with `StateMachineWS`.  The transitions 'blank' (for blank
 875     lines) and 'indent' (for indented text blocks) are added automatically,
 876     before any other transitions.  The transition method `blank()` handles
 877     blank lines and `indent()` handles nested indented blocks.  Indented
 878     blocks trigger a new state machine to be created by `indent()` and run.
 879     The class of the state machine to be created is in `indent_sm`, and the
 880     constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
 881
 882     The methods `known_indent()` and `firstknown_indent()` are provided for
 883     indented blocks where the indent (all lines' and first line's only,
 884     respectively) is known to the transition method, along with the attributes
 885     `known_indent_sm` and `known_indent_sm_kwargs`.  Neither transition method
 886     is triggered automatically.
 887     """
 888
 889     indent_sm = None
 890     """
 891     The `StateMachine` class handling indented text blocks.
 892
 893     If left as ``None``, `indent_sm` defaults to the value of
 894     `State.nested_sm`.  Override it in subclasses to avoid the default.
 895     """
 896
 897     indent_sm_kwargs = None
 898     """
 899     Keyword arguments dictionary, passed to the `indent_sm` constructor.
 900
 901     If left as ``None``, `indent_sm_kwargs` defaults to the value of
 902     `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
 903     """
 904
 905     known_indent_sm = None
 906     """
 907     The `StateMachine` class handling known-indented text blocks.
 908
 909     If left as ``None``, `known_indent_sm` defaults to the value of
 910     `indent_sm`.  Override it in subclasses to avoid the default.
 911     """
 912
 913     known_indent_sm_kwargs = None
 914     """
 915     Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
 916
 917     If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
 918     `indent_sm_kwargs`. Override it in subclasses to avoid the default.
 919     """
 920
 921     ws_patterns = {'blank': ' *$',
 922                    'indent': ' +'}
 923     """Patterns for default whitespace transitions.  May be overridden in
 924     subclasses."""
 925
 926     ws_initial_transitions = ('blank', 'indent')
 927     """Default initial whitespace transitions, added before those listed in
 928     `State.initial_transitions`.  May be overridden in subclasses."""
 929
 930     def __init__(self, state_machine, debug=0):
 931         """
 932         Initialize a `StateSM` object; extends `State.__init__()`.
 933
 934         Check for indent state machine attributes, set defaults if not set.
 935         """
 936         State.__init__(self, state_machine, debug)
 937         if self.indent_sm is None:
 938             self.indent_sm = self.nested_sm
 939         if self.indent_sm_kwargs is None:
 940             self.indent_sm_kwargs = self.nested_sm_kwargs
 941         if self.known_indent_sm is None:
 942             self.known_indent_sm = self.indent_sm
 943         if self.known_indent_sm_kwargs is None:
 944             self.known_indent_sm_kwargs = self.indent_sm_kwargs
 945
 946     def add_initial_transitions(self):
 947         """
 948         Add whitespace-specific transitions before those defined in subclass.
 949
 950         Extends `State.add_initial_transitions()`.
 951         """
 952         State.add_initial_transitions(self)
 953         if self.patterns is None:
 954             self.patterns = {}
 955         self.patterns.update(self.ws_patterns)
 956         names, transitions = self.make_transitions(
 957             self.ws_initial_transitions)
 958         self.add_transitions(names, transitions)
 959
 960     def blank(self, match, context, next_state):
 961         """Handle blank lines. Does nothing. Override in subclasses."""
 962         return self.nop(match, context, next_state)
 963
 964     def indent(self, match, context, next_state):
 965         """
 966         Handle an indented text block. Extend or override in subclasses.
 967
 968         Recursively run the registered state machine for indented blocks
 969         (`self.indent_sm`).
 970         """
 971         indented, indent, line_offset, blank_finish = \
 972               self.state_machine.get_indented()
 973         sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
 974         results = sm.run(indented, input_offset=line_offset)
 975         return context, next_state, results
 976
 977     def known_indent(self, match, context, next_state):
 978         """
 979         Handle a known-indent text block. Extend or override in subclasses.
 980
 981         Recursively run the registered state machine for known-indent indented
 982         blocks (`self.known_indent_sm`). The indent is the length of the
 983         match, ``match.end()``.
 984         """
 985         indented, line_offset, blank_finish = \
 986               self.state_machine.get_known_indented(match.end())
 987         sm = self.known_indent_sm(debug=self.debug,
 988                                  **self.known_indent_sm_kwargs)
 989         results = sm.run(indented, input_offset=line_offset)
 990         return context, next_state, results
 991
 992     def first_known_indent(self, match, context, next_state):
 993         """
 994         Handle an indented text block (first line's indent known).
 995
 996         Extend or override in subclasses.
 997
 998         Recursively run the registered state machine for known-indent indented
 999         blocks (`self.known_indent_sm`). The indent is the length of the
1000         match, ``match.end()``.
1001         """
1002         indented, line_offset, blank_finish = \
1003               self.state_machine.get_first_known_indented(match.end())
1004         sm = self.known_indent_sm(debug=self.debug,
1005                                  **self.known_indent_sm_kwargs)
1006         results = sm.run(indented, input_offset=line_offset)
1007         return context, next_state, results
1008
1009
1010 class _SearchOverride:
1011
1012     """
1013     Mix-in class to override `StateMachine` regular expression behavior.
1014
1015     Changes regular expression matching, from the default `re.match()`
1016     (succeeds only if the pattern matches at the start of `self.line`) to
1017     `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1018     When subclassing a `StateMachine`, list this class **first** in the
1019     inheritance list of the class definition.
1020     """
1021
1022     def match(self, pattern):
1023         """
1024         Return the result of a regular expression search.
1025
1026         Overrides `StateMachine.match()`.
1027
1028         Parameter `pattern`: `re` compiled regular expression.
1029         """
1030         return pattern.search(self.line)
1031
1032
1033 class SearchStateMachine(_SearchOverride, StateMachine):
1034     """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1035     pass
1036
1037
1038 class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1039     """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1040     pass
1041
1042
1043 class ViewList:
1044
1045     """
1046     List with extended functionality: slices of ViewList objects are child
1047     lists, linked to their parents. Changes made to a child list also affect
1048     the parent list.  A child list is effectively a "view" (in the SQL sense)
1049     of the parent list.  Changes to parent lists, however, do *not* affect
1050     active child lists.  If a parent list is changed, any active child lists
1051     should be recreated.
1052
1053     The start and end of the slice can be trimmed using the `trim_start()` and
1054     `trim_end()` methods, without affecting the parent list.  The link between
1055     child and parent lists can be broken by calling `disconnect()` on the
1056     child list.
1057
1058     Also, ViewList objects keep track of the source & offset of each item.
1059     This information is accessible via the `source()`, `offset()`, and
1060     `info()` methods.
1061     """
1062
1063     def __init__(self, initlist=None, source=None, items=None,
1064                  parent=None, parent_offset=None):
1065         self.data = []
1066         """The actual list of data, flattened from various sources."""
1067
1068         self.items = []
1069         """A list of (source, offset) pairs, same length as `self.data`: the
1070         source of each line and the offset of each line from the beginning of
1071         its source."""
1072
1073         self.parent = parent
1074         """The parent list."""
1075
1076         self.parent_offset = parent_offset
1077         """Offset of this list from the beginning of the parent list."""
1078
1079         if isinstance(initlist, ViewList):
1080             self.data = initlist.data[:]
1081             self.items = initlist.items[:]
1082         elif initlist is not None:
1083             self.data = list(initlist)
1084             if items:
1085                 self.items = items
1086             else:
1087                 self.items = [(source, i) for i in range(len(initlist))]
1088         assert len(self.data) == len(self.items), 'data mismatch'
1089
1090     def __str__(self):
1091         return str(self.data)
1092
1093     def __repr__(self):
1094         return '%s(%s, items=%s)' % (self.__class__.__name__,
1095                                      self.data, self.items)
1096
1097     def __lt__(self, other): return self.data <  self.__cast(other)
1098     def __le__(self, other): return self.data <= self.__cast(other)
1099     def __eq__(self, other): return self.data == self.__cast(other)
1100     def __ne__(self, other): return self.data != self.__cast(other)
1101     def __gt__(self, other): return self.data >  self.__cast(other)
1102     def __ge__(self, other): return self.data >= self.__cast(other)
1103     def __cmp__(self, other): return cmp(self.data, self.__cast(other))
1104
1105     def __cast(self, other):
1106         if isinstance(other, ViewList):
1107             return other.data
1108         else:
1109             return other
1110
1111     def __contains__(self, item): return item in self.data
1112     def __len__(self): return len(self.data)
1113
1114     # The __getitem__()/__setitem__() methods check whether the index
1115     # is a slice first, since indexing a native list with a slice object
1116     # just works.
1117
1118     def __getitem__(self, i):
1119         if isinstance(i, types.SliceType):
1120             assert i.step in (None, 1),  'cannot handle slice with stride'
1121             return self.__class__(self.data[i.start:i.stop],
1122                                   items=self.items[i.start:i.stop],
1123                                   parent=self, parent_offset=i.start or 0)
1124         else:
1125             return self.data[i]
1126
1127     def __setitem__(self, i, item):
1128         if isinstance(i, types.SliceType):
1129             assert i.step in (None, 1), 'cannot handle slice with stride'
1130             if not isinstance(item, ViewList):
1131                 raise TypeError('assigning non-ViewList to ViewList slice')
1132             self.data[i.start:i.stop] = item.data
1133             self.items[i.start:i.stop] = item.items
1134             assert len(self.data) == len(self.items), 'data mismatch'
1135             if self.parent:
1136                 self.parent[(i.start or 0) + self.parent_offset
1137                             : (i.stop or len(self)) + self.parent_offset] = item
1138         else:
1139             self.data[i] = item
1140             if self.parent:
1141                 self.parent[i + self.parent_offset] = item
1142
1143     def __delitem__(self, i):
1144         try:
1145             del self.data[i]
1146             del self.items[i]
1147             if self.parent:
1148                 del self.parent[i + self.parent_offset]
1149         except TypeError:
1150             assert i.step is None, 'cannot handle slice with stride'
1151             del self.data[i.start:i.stop]
1152             del self.items[i.start:i.stop]
1153             if self.parent:
1154                 del self.parent[(i.start or 0) + self.parent_offset
1155                                 : (i.stop or len(self)) + self.parent_offset]
1156
1157     def __add__(self, other):
1158         if isinstance(other, ViewList):
1159             return self.__class__(self.data + other.data,
1160                                   items=(self.items + other.items))
1161         else:
1162             raise TypeError('adding non-ViewList to a ViewList')
1163
1164     def __radd__(self, other):
1165         if isinstance(other, ViewList):
1166             return self.__class__(other.data + self.data,
1167                                   items=(other.items + self.items))
1168         else:
1169             raise TypeError('adding ViewList to a non-ViewList')
1170
1171     def __iadd__(self, other):
1172         if isinstance(other, ViewList):
1173             self.data += other.data
1174         else:
1175             raise TypeError('argument to += must be a ViewList')
1176         return self
1177
1178     def __mul__(self, n):
1179         return self.__class__(self.data * n, items=(self.items * n))
1180
1181     __rmul__ = __mul__
1182
1183     def __imul__(self, n):
1184         self.data *= n
1185         self.items *= n
1186         return self
1187
1188     def extend(self, other):
1189         if not isinstance(other, ViewList):
1190             raise TypeError('extending a ViewList with a non-ViewList')
1191         if self.parent:
1192             self.parent.insert(len(self.data) + self.parent_offset, other)
1193         self.data.extend(other.data)
1194         self.items.extend(other.items)
1195
1196     def append(self, item, source=None, offset=0):
1197         if source is None:
1198             self.extend(item)
1199         else:
1200             if self.parent:
1201                 self.parent.insert(len(self.data) + self.parent_offset, item,
1202                                    source, offset)
1203             self.data.append(item)
1204             self.items.append((source, offset))
1205
1206     def insert(self, i, item, source=None, offset=0):
1207         if source is None:
1208             if not isinstance(item, ViewList):
1209                 raise TypeError('inserting non-ViewList with no source given')
1210             self.data[i:i] = item.data
1211             self.items[i:i] = item.items
1212             if self.parent:
1213                 index = (len(self.data) + i) % len(self.data)
1214                 self.parent.insert(index + self.parent_offset, item)
1215         else:
1216             self.data.insert(i, item)
1217             self.items.insert(i, (source, offset))
1218             if self.parent:
1219                 index = (len(self.data) + i) % len(self.data)
1220                 self.parent.insert(index + self.parent_offset, item,
1221                                    source, offset)
1222
1223     def pop(self, i=-1):
1224         if self.parent:
1225             index = (len(self.data) + i) % len(self.data)
1226             self.parent.pop(index + self.parent_offset)
1227         self.items.pop(i)
1228         return self.data.pop(i)
1229
1230     def trim_start(self, n=1):
1231         """
1232         Remove items from the start of the list, without touching the parent.
1233         """
1234         if n > len(self.data):
1235             raise IndexError("Size of trim too large; can't trim %s items "
1236                              "from a list of size %s." % (n, len(self.data)))
1237         elif n < 0:
1238             raise IndexError('Trim size must be >= 0.')
1239         del self.data[:n]
1240         del self.items[:n]
1241         if self.parent:
1242             self.parent_offset += n
1243
1244     def trim_end(self, n=1):
1245         """
1246         Remove items from the end of the list, without touching the parent.
1247         """
1248         if n > len(self.data):
1249             raise IndexError("Size of trim too large; can't trim %s items "
1250                              "from a list of size %s." % (n, len(self.data)))
1251         elif n < 0:
1252             raise IndexError('Trim size must be >= 0.')
1253         del self.data[-n:]
1254         del self.items[-n:]
1255
1256     def remove(self, item):
1257         index = self.index(item)
1258         del self[index]
1259
1260     def count(self, item): return self.data.count(item)
1261     def index(self, item): return self.data.index(item)
1262
1263     def reverse(self):
1264         self.data.reverse()
1265         self.items.reverse()
1266         self.parent = None
1267
1268     def sort(self, *args):
1269         tmp = zip(self.data, self.items)
1270         tmp.sort(*args)
1271         self.data = [entry[0] for entry in tmp]
1272         self.items = [entry[1] for entry in tmp]
1273         self.parent = None
1274
1275     def info(self, i):
1276         """Return source & offset for index `i`."""
1277         try:
1278             return self.items[i]
1279         except IndexError:
1280             if i == len(self.data):     # Just past the end
1281                 return self.items[i - 1][0], None
1282             else:
1283                 raise
1284
1285     def source(self, i):
1286         """Return source for index `i`."""
1287         return self.info(i)[0]
1288
1289     def offset(self, i):
1290         """Return offset for index `i`."""
1291         return self.info(i)[1]
1292
1293     def disconnect(self):
1294         """Break link between this list and parent list."""
1295         self.parent = None
1296
1297
1298 class StringList(ViewList):
1299
1300     """A `ViewList` with string-specific methods."""
1301
1302     def trim_left(self, length, start=0, end=sys.maxint):
1303         """
1304         Trim `length` characters off the beginning of each item, in-place,
1305         from index `start` to `end`.  No whitespace-checking is done on the
1306         trimmed text.  Does not affect slice parent.
1307         """
1308         self.data[start:end] = [line[length:]
1309                                 for line in self.data[start:end]]
1310
1311     def get_text_block(self, start, flush_left=0):
1312         """
1313         Return a contiguous block of text.
1314
1315         If `flush_left` is true, raise `UnexpectedIndentationError` if an
1316         indented line is encountered before the text block ends (with a blank
1317         line).
1318         """
1319         end = start
1320         last = len(self.data)
1321         while end < last:
1322             line = self.data[end]
1323             if not line.strip():
1324                 break
1325             if flush_left and (line[0] == ' '):
1326                 source, offset = self.info(end)
1327                 raise UnexpectedIndentationError(self[start:end], source,
1328                                                  offset + 1)
1329             end += 1
1330         return self[start:end]
1331
1332     def get_indented(self, start=0, until_blank=0, strip_indent=1,
1333                      block_indent=None, first_indent=None):
1334         """
1335         Extract and return a StringList of indented lines of text.
1336
1337         Collect all lines with indentation, determine the minimum indentation,
1338         remove the minimum indentation from all indented lines (unless
1339         `strip_indent` is false), and return them. All lines up to but not
1340         including the first unindented line will be returned.
1341
1342         :Parameters:
1343           - `start`: The index of the first line to examine.
1344           - `until_blank`: Stop collecting at the first blank line if true.
1345           - `strip_indent`: Strip common leading indent if true (default).
1346           - `block_indent`: The indent of the entire block, if known.
1347           - `first_indent`: The indent of the first line, if known.
1348
1349         :Return:
1350           - a StringList of indented lines with mininum indent removed;
1351           - the amount of the indent;
1352           - a boolean: did the indented block finish with a blank line or EOF?
1353         """
1354         indent = block_indent           # start with None if unknown
1355         end = start
1356         if block_indent is not None and first_indent is None:
1357             first_indent = block_indent
1358         if first_indent is not None:
1359             end += 1
1360         last = len(self.data)
1361         while end < last:
1362             line = self.data[end]
1363             if line and (line[0] != ' '
1364                          or (block_indent is not None
1365                              and line[:block_indent].strip())):
1366                 # Line not indented or insufficiently indented.
1367                 # Block finished properly iff the last indented line blank:
1368                 blank_finish = ((end > start)
1369                                 and not self.data[end - 1].strip())
1370                 break
1371             stripped = line.lstrip()
1372             if not stripped:            # blank line
1373                 if until_blank:
1374                     blank_finish = 1
1375                     break
1376             elif block_indent is None:
1377                 line_indent = len(line) - len(stripped)
1378                 if indent is None:
1379                     indent = line_indent
1380                 else:
1381                     indent = min(indent, line_indent)
1382             end += 1
1383         else:
1384             blank_finish = 1            # block ends at end of lines
1385         block = self[start:end]
1386         if first_indent is not None and block:
1387             block.data[0] = block.data[0][first_indent:]
1388         if indent and strip_indent:
1389             block.trim_left(indent, start=(first_indent is not None))
1390         return block, indent or 0, blank_finish
1391
1392     def get_2D_block(self, top, left, bottom, right, strip_indent=1):
1393         block = self[top:bottom]
1394         indent = right
1395         for i in range(len(block.data)):
1396             block.data[i] = line = block.data[i][left:right].rstrip()
1397             if line:
1398                 indent = min(indent, len(line) - len(line.lstrip()))
1399         if strip_indent and 0 < indent < right:
1400             block.data = [line[indent:] for line in block.data]
1401         return block
1402
1403     def pad_double_width(self, pad_char):
1404         """
1405         Pad all double-width characters in self by appending `pad_char` to each.
1406         For East Asian language support.
1407         """
1408         if hasattr(unicodedata, 'east_asian_width'):
1409             east_asian_width = unicodedata.east_asian_width
1410         else:
1411             return                      # new in Python 2.4
1412         for i in range(len(self.data)):
1413             line = self.data[i]
1414             if isinstance(line, unicode):
1415                 new = []
1416                 for char in line:
1417                     new.append(char)
1418                     if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1419                         new.append(pad_char)
1420                 self.data[i] = ''.join(new)
1421
1422     def replace(self, old, new):
1423         """Replace all occurrences of substring `old` with `new`."""
1424         for i in range(len(self.data)):
1425             self.data[i] = self.data[i].replace(old, new)
1426
1427
1428 class StateMachineError(Exception): pass
1429 class UnknownStateError(StateMachineError): pass
1430 class DuplicateStateError(StateMachineError): pass
1431 class UnknownTransitionError(StateMachineError): pass
1432 class DuplicateTransitionError(StateMachineError): pass
1433 class TransitionPatternNotFound(StateMachineError): pass
1434 class TransitionMethodNotFound(StateMachineError): pass
1435 class UnexpectedIndentationError(StateMachineError): pass
1436
1437
1438 class TransitionCorrection(Exception):
1439
1440     """
1441     Raise from within a transition method to switch to another transition.
1442
1443     Raise with one argument, the new transition name.
1444     """
1445
1446
1447 class StateCorrection(Exception):
1448
1449     """
1450     Raise from within a transition method to switch to another state.
1451
1452     Raise with one or two arguments: new state name, and an optional new
1453     transition name.
1454     """
1455
1456
1457 def string2lines(astring, tab_width=8, convert_whitespace=0,
1458                  whitespace=re.compile('[\v\f]')):
1459     """
1460     Return a list of one-line strings with tabs expanded, no newlines, and
1461     trailing whitespace stripped.
1462
1463     Each tab is expanded with between 1 and `tab_width` spaces, so that the
1464     next character's index becomes a multiple of `tab_width` (8 by default).
1465
1466     Parameters:
1467
1468     - `astring`: a multi-line string.
1469     - `tab_width`: the number of columns between tab stops.
1470     - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1471     """
1472     if convert_whitespace:
1473         astring = whitespace.sub(' ', astring)
1474     return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1475
1476 def _exception_data():
1477     """
1478     Return exception information:
1479
1480     - the exception's class name;
1481     - the exception object;
1482     - the name of the file containing the offending code;
1483     - the line number of the offending code;
1484     - the function name of the offending code.
1485     """
1486     type, value, traceback = sys.exc_info()
1487     while traceback.tb_next:
1488         traceback = traceback.tb_next
1489     code = traceback.tb_frame.f_code
1490     return (type.__name__, value, code.co_filename, traceback.tb_lineno,
1491             code.co_name)