docutils/statemachine.py

   1  # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 A finite state machine specialized for regular-expression-based text filters,
   7 this module defines the following classes:
   8
   9 - `StateMachine`, a state machine
  10 - `State`, a state superclass
  11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
  12 - `StateWS`, a state superclass for use with `StateMachineWS`
  13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
  14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
  15 - `ViewList`, extends standard Python lists.
  16 - `StringList`, string-specific ViewList.
  17
  18 Exception classes:
  19
  20 - `StateMachineError`
  21 - `UnknownStateError`
  22 - `DuplicateStateError`
  23 - `UnknownTransitionError`
  24 - `DuplicateTransitionError`
  25 - `TransitionPatternNotFound`
  26 - `TransitionMethodNotFound`
  27 - `UnexpectedIndentationError`
  28 - `TransitionCorrection`: Raised to switch to another transition.
  29 - `StateCorrection`: Raised to switch to another state & transition.
  30
  31 Functions:
  32
  33 - `string2lines()`: split a multi-line string into a list of one-line strings
  34
  35
  36 How To Use This Module
  37 ======================
  38 (See the individual classes, methods, and attributes for details.)
  39
  40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
  41    You will also need to ``import re``.
  42
  43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
  44    machine::
  45
  46        class MyState(statemachine.State):
  47
  48    Within the state's class definition:
  49
  50    a) Include a pattern for each transition, in `State.patterns`::
  51
  52           patterns = {'atransition': r'pattern', ...}
  53
  54    b) Include a list of initial transitions to be set up automatically, in
  55       `State.initial_transitions`::
  56
  57           initial_transitions = ['atransition', ...]
  58
  59    c) Define a method for each transition, with the same name as the
  60       transition pattern::
  61
  62           def atransition(self, match, context, next_state):
  63               # do something
  64               result = [...]  # a list
  65               return context, next_state, result
  66               # context, next_state may be altered
  67
  68       Transition methods may raise an `EOFError` to cut processing short.
  69
  70    d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
  71       transition methods, which handle the beginning- and end-of-file.
  72
  73    e) In order to handle nested processing, you may wish to override the
  74       attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
  75
  76       If you are using `StateWS` as a base class, in order to handle nested
  77       indented blocks, you may wish to:
  78
  79       - override the attributes `StateWS.indent_sm`,
  80         `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
  81         `StateWS.known_indent_sm_kwargs`;
  82       - override the `StateWS.blank()` method; and/or
  83       - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
  84         and/or `StateWS.firstknown_indent()` methods.
  85
  86 3. Create a state machine object::
  87
  88        sm = StateMachine(state_classes=[MyState, ...],
  89                          initial_state='MyState')
  90
  91 4. Obtain the input text, which needs to be converted into a tab-free list of
  92    one-line strings. For example, to read text from a file called
  93    'inputfile'::
  94
  95        input_string = open('inputfile').read()
  96        input_lines = statemachine.string2lines(input_string)
  97
  98 5. Run the state machine on the input text and collect the results, a list::
  99
 100        results = sm.run(input_lines)
 101
 102 6. Remove any lingering circular references::
 103
 104        sm.unlink()
 105 """
 106
 107 __docformat__ = 'restructuredtext'
 108
 109 import sys
 110 import re
 111 import types
 112 import unicodedata
 113 from docutils import utils
 114 from docutils.utils.error_reporting import ErrorOutput
 115
 116
 117 class StateMachine:
 118
 119     """
 120     A finite state machine for text filters using regular expressions.
 121
 122     The input is provided in the form of a list of one-line strings (no
 123     newlines). States are subclasses of the `State` class. Transitions consist
 124     of regular expression patterns and transition methods, and are defined in
 125     each state.
 126
 127     The state machine is started with the `run()` method, which returns the
 128     results of processing in a list.
 129     """
 130
 131     def __init__(self, state_classes, initial_state, debug=False):
 132         """
 133         Initialize a `StateMachine` object; add state objects.
 134
 135         Parameters:
 136
 137         - `state_classes`: a list of `State` (sub)classes.
 138         - `initial_state`: a string, the class name of the initial state.
 139         - `debug`: a boolean; produce verbose output if true (nonzero).
 140         """
 141
 142         self.input_lines = None
 143         """`StringList` of input lines (without newlines).
 144         Filled by `self.run()`."""
 145
 146         self.input_offset = 0
 147         """Offset of `self.input_lines` from the beginning of the file."""
 148
 149         self.line = None
 150         """Current input line."""
 151
 152         self.line_offset = -1
 153         """Current input line offset from beginning of `self.input_lines`."""
 154
 155         self.debug = debug
 156         """Debugging mode on/off."""
 157
 158         self.initial_state = initial_state
 159         """The name of the initial state (key to `self.states`)."""
 160
 161         self.current_state = initial_state
 162         """The name of the current state (key to `self.states`)."""
 163
 164         self.states = {}
 165         """Mapping of {state_name: State_object}."""
 166
 167         self.add_states(state_classes)
 168
 169         self.observers = []
 170         """List of bound methods or functions to call whenever the current
 171         line changes.  Observers are called with one argument, ``self``.
 172         Cleared at the end of `run()`."""
 173
 174         self._stderr = ErrorOutput()
 175         """Wrapper around sys.stderr catching en-/decoding errors"""
 176
 177
 178     def unlink(self):
 179         """Remove circular references to objects no longer required."""
 180         for state in self.states.values():
 181             state.unlink()
 182         self.states = None
 183
 184     def run(self, input_lines, input_offset=0, context=None,
 185             input_source=None, initial_state=None):
 186         """
 187         Run the state machine on `input_lines`. Return results (a list).
 188
 189         Reset `self.line_offset` and `self.current_state`. Run the
 190         beginning-of-file transition. Input one line at a time and check for a
 191         matching transition. If a match is found, call the transition method
 192         and possibly change the state. Store the context returned by the
 193         transition method to be passed on to the next transition matched.
 194         Accumulate the results returned by the transition methods in a list.
 195         Run the end-of-file transition. Finally, return the accumulated
 196         results.
 197
 198         Parameters:
 199
 200         - `input_lines`: a list of strings without newlines, or `StringList`.
 201         - `input_offset`: the line offset of `input_lines` from the beginning
 202           of the file.
 203         - `context`: application-specific storage.
 204         - `input_source`: name or path of source of `input_lines`.
 205         - `initial_state`: name of initial state.
 206         """
 207         self.runtime_init()
 208         if isinstance(input_lines, StringList):
 209             self.input_lines = input_lines
 210         else:
 211             self.input_lines = StringList(input_lines, source=input_source)
 212         self.input_offset = input_offset
 213         self.line_offset = -1
 214         self.current_state = initial_state or self.initial_state
 215         if self.debug:
 216             print >>self._stderr, (
 217                 u'\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
 218                 % (self.line_offset, u'\n| '.join(self.input_lines)))
 219         transitions = None
 220         results = []
 221         state = self.get_state()
 222         try:
 223             if self.debug:
 224                 print >>self._stderr, '\nStateMachine.run: bof transition'
 225             context, result = state.bof(context)
 226             results.extend(result)
 227             while True:
 228                 try:
 229                     try:
 230                         self.next_line()
 231                         if self.debug:
 232                             source, offset = self.input_lines.info(
 233                                 self.line_offset)
 234                             print >>self._stderr, (
 235                                 u'\nStateMachine.run: line (source=%r, '
 236                                 u'offset=%r):\n| %s'
 237                                 % (source, offset, self.line))
 238                         context, next_state, result = self.check_line(
 239                             context, state, transitions)
 240                     except EOFError:
 241                         if self.debug:
 242                             print >>self._stderr, (
 243                                 '\nStateMachine.run: %s.eof transition'
 244                                 % state.__class__.__name__)
 245                         result = state.eof(context)
 246                         results.extend(result)
 247                         break
 248                     else:
 249                         results.extend(result)
 250                 except TransitionCorrection, exception:
 251                     self.previous_line() # back up for another try
 252                     transitions = (exception.args[0],)
 253                     if self.debug:
 254                         print >>self._stderr, (
 255                               '\nStateMachine.run: TransitionCorrection to '
 256                               'state "%s", transition %s.'
 257                               % (state.__class__.__name__, transitions[0]))
 258                     continue
 259                 except StateCorrection, exception:
 260                     self.previous_line() # back up for another try
 261                     next_state = exception.args[0]
 262                     if len(exception.args) == 1:
 263                         transitions = None
 264                     else:
 265                         transitions = (exception.args[1],)
 266                     if self.debug:
 267                         print >>self._stderr, (
 268                               '\nStateMachine.run: StateCorrection to state '
 269                               '"%s", transition %s.'
 270                               % (next_state, transitions[0]))
 271                 else:
 272                     transitions = None
 273                 state = self.get_state(next_state)
 274         except:
 275             if self.debug:
 276                 self.error()
 277             raise
 278         self.observers = []
 279         return results
 280
 281     def get_state(self, next_state=None):
 282         """
 283         Return current state object; set it first if `next_state` given.
 284
 285         Parameter `next_state`: a string, the name of the next state.
 286
 287         Exception: `UnknownStateError` raised if `next_state` unknown.
 288         """
 289         if next_state:
 290             if self.debug and next_state != self.current_state:
 291                 print >>self._stderr, (
 292                     '\nStateMachine.get_state: Changing state from '
 293                     '"%s" to "%s" (input line %s).'
 294                     % (self.current_state, next_state,
 295                        self.abs_line_number()))
 296             self.current_state = next_state
 297         try:
 298             return self.states[self.current_state]
 299         except KeyError:
 300             raise UnknownStateError(self.current_state)
 301
 302     def next_line(self, n=1):
 303         """Load `self.line` with the `n`'th next line and return it."""
 304         try:
 305             try:
 306                 self.line_offset += n
 307                 self.line = self.input_lines[self.line_offset]
 308             except IndexError:
 309                 self.line = None
 310                 raise EOFError
 311             return self.line
 312         finally:
 313             self.notify_observers()
 314
 315     def is_next_line_blank(self):
 316         """Return 1 if the next line is blank or non-existent."""
 317         try:
 318             return not self.input_lines[self.line_offset + 1].strip()
 319         except IndexError:
 320             return 1
 321
 322     def at_eof(self):
 323         """Return 1 if the input is at or past end-of-file."""
 324         return self.line_offset >= len(self.input_lines) - 1
 325
 326     def at_bof(self):
 327         """Return 1 if the input is at or before beginning-of-file."""
 328         return self.line_offset <= 0
 329
 330     def previous_line(self, n=1):
 331         """Load `self.line` with the `n`'th previous line and return it."""
 332         self.line_offset -= n
 333         if self.line_offset < 0:
 334             self.line = None
 335         else:
 336             self.line = self.input_lines[self.line_offset]
 337         self.notify_observers()
 338         return self.line
 339
 340     def goto_line(self, line_offset):
 341         """Jump to absolute line offset `line_offset`, load and return it."""
 342         try:
 343             try:
 344                 self.line_offset = line_offset - self.input_offset
 345                 self.line = self.input_lines[self.line_offset]
 346             except IndexError:
 347                 self.line = None
 348                 raise EOFError
 349             return self.line
 350         finally:
 351             self.notify_observers()
 352
 353     def get_source(self, line_offset):
 354         """Return source of line at absolute line offset `line_offset`."""
 355         return self.input_lines.source(line_offset - self.input_offset)
 356
 357     def abs_line_offset(self):
 358         """Return line offset of current line, from beginning of file."""
 359         return self.line_offset + self.input_offset
 360
 361     def abs_line_number(self):
 362         """Return line number of current line (counting from 1)."""
 363         return self.line_offset + self.input_offset + 1
 364
 365     def get_source_and_line(self, lineno=None):
 366         """Return (source, line) tuple for current or given line number.
 367
 368         Looks up the source and line number in the `self.input_lines`
 369         StringList instance to count for included source files.
 370
 371         If the optional argument `lineno` is given, convert it from an
 372         absolute line number to the corresponding (source, line) pair.
 373         """
 374         if lineno is None:
 375             offset = self.line_offset
 376         else:
 377             offset = lineno - self.input_offset - 1
 378         try:
 379             src, srcoffset = self.input_lines.info(offset)
 380             srcline = srcoffset + 1
 381         except (TypeError):
 382             # line is None if index is "Just past the end"
 383             src, srcline = self.get_source_and_line(offset + self.input_offset)
 384             return src, srcline + 1
 385         except (IndexError): # `offset` is off the list
 386             src, srcline = None, None
 387             # raise AssertionError('cannot find line %d in %s lines' %
 388             #                      (offset, len(self.input_lines)))
 389             #                      # list(self.input_lines.lines())))
 390         # assert offset == srcoffset, str(self.input_lines)
 391         # print "get_source_and_line(%s):" % lineno,
 392         # print offset + 1, '->', src, srcline
 393         # print self.input_lines
 394         return (src, srcline)
 395
 396     def insert_input(self, input_lines, source):
 397         self.input_lines.insert(self.line_offset + 1, '',
 398                                 source='internal padding after '+source,
 399                                 offset=len(input_lines))
 400         self.input_lines.insert(self.line_offset + 1, '',
 401                                 source='internal padding before '+source,
 402                                 offset=-1)
 403         self.input_lines.insert(self.line_offset + 2,
 404                                 StringList(input_lines, source))
 405
 406     def get_text_block(self, flush_left=False):
 407         """
 408         Return a contiguous block of text.
 409
 410         If `flush_left` is true, raise `UnexpectedIndentationError` if an
 411         indented line is encountered before the text block ends (with a blank
 412         line).
 413         """
 414         try:
 415             block = self.input_lines.get_text_block(self.line_offset,
 416                                                     flush_left)
 417             self.next_line(len(block) - 1)
 418             return block
 419         except UnexpectedIndentationError, err:
 420             block = err.args[0]
 421             self.next_line(len(block) - 1) # advance to last line of block
 422             raise
 423
 424     def check_line(self, context, state, transitions=None):
 425         """
 426         Examine one line of input for a transition match & execute its method.
 427
 428         Parameters:
 429
 430         - `context`: application-dependent storage.
 431         - `state`: a `State` object, the current state.
 432         - `transitions`: an optional ordered list of transition names to try,
 433           instead of ``state.transition_order``.
 434
 435         Return the values returned by the transition method:
 436
 437         - context: possibly modified from the parameter `context`;
 438         - next state name (`State` subclass name);
 439         - the result output of the transition, a list.
 440
 441         When there is no match, ``state.no_match()`` is called and its return
 442         value is returned.
 443         """
 444         if transitions is None:
 445             transitions =  state.transition_order
 446         state_correction = None
 447         if self.debug:
 448             print >>self._stderr, (
 449                   '\nStateMachine.check_line: state="%s", transitions=%r.'
 450                   % (state.__class__.__name__, transitions))
 451         for name in transitions:
 452             pattern, method, next_state = state.transitions[name]
 453             match = pattern.match(self.line)
 454             if match:
 455                 if self.debug:
 456                     print >>self._stderr, (
 457                           '\nStateMachine.check_line: Matched transition '
 458                           '"%s" in state "%s".'
 459                           % (name, state.__class__.__name__))
 460                 return method(match, context, next_state)
 461         else:
 462             if self.debug:
 463                 print >>self._stderr, (
 464                       '\nStateMachine.check_line: No match in state "%s".'
 465                       % state.__class__.__name__)
 466             return state.no_match(context, transitions)
 467
 468     def add_state(self, state_class):
 469         """
 470         Initialize & add a `state_class` (`State` subclass) object.
 471
 472         Exception: `DuplicateStateError` raised if `state_class` was already
 473         added.
 474         """
 475         statename = state_class.__name__
 476         if statename in self.states:
 477             raise DuplicateStateError(statename)
 478         self.states[statename] = state_class(self, self.debug)
 479
 480     def add_states(self, state_classes):
 481         """
 482         Add `state_classes` (a list of `State` subclasses).
 483         """
 484         for state_class in state_classes:
 485             self.add_state(state_class)
 486
 487     def runtime_init(self):
 488         """
 489         Initialize `self.states`.
 490         """
 491         for state in self.states.values():
 492             state.runtime_init()
 493
 494     def error(self):
 495         """Report error details."""
 496         type, value, module, line, function = _exception_data()
 497         print >>self._stderr, u'%s: %s' % (type, value)
 498         print >>self._stderr, 'input line %s' % (self.abs_line_number())
 499         print >>self._stderr, (u'module %s, line %s, function %s' %
 500                                (module, line, function))
 501
 502     def attach_observer(self, observer):
 503         """
 504         The `observer` parameter is a function or bound method which takes two
 505         arguments, the source and offset of the current line.
 506         """
 507         self.observers.append(observer)
 508
 509     def detach_observer(self, observer):
 510         self.observers.remove(observer)
 511
 512     def notify_observers(self):
 513         for observer in self.observers:
 514             try:
 515                 info = self.input_lines.info(self.line_offset)
 516             except IndexError:
 517                 info = (None, None)
 518             observer(*info)
 519
 520
 521 class State:
 522
 523     """
 524     State superclass. Contains a list of transitions, and transition methods.
 525
 526     Transition methods all have the same signature. They take 3 parameters:
 527
 528     - An `re` match object. ``match.string`` contains the matched input line,
 529       ``match.start()`` gives the start index of the match, and
 530       ``match.end()`` gives the end index.
 531     - A context object, whose meaning is application-defined (initial value
 532       ``None``). It can be used to store any information required by the state
 533       machine, and the retured context is passed on to the next transition
 534       method unchanged.
 535     - The name of the next state, a string, taken from the transitions list;
 536       normally it is returned unchanged, but it may be altered by the
 537       transition method if necessary.
 538
 539     Transition methods all return a 3-tuple:
 540
 541     - A context object, as (potentially) modified by the transition method.
 542     - The next state name (a return value of ``None`` means no state change).
 543     - The processing result, a list, which is accumulated by the state
 544       machine.
 545
 546     Transition methods may raise an `EOFError` to cut processing short.
 547
 548     There are two implicit transitions, and corresponding transition methods
 549     are defined: `bof()` handles the beginning-of-file, and `eof()` handles
 550     the end-of-file. These methods have non-standard signatures and return
 551     values. `bof()` returns the initial context and results, and may be used
 552     to return a header string, or do any other processing needed. `eof()`
 553     should handle any remaining context and wrap things up; it returns the
 554     final processing result.
 555
 556     Typical applications need only subclass `State` (or a subclass), set the
 557     `patterns` and `initial_transitions` class attributes, and provide
 558     corresponding transition methods. The default object initialization will
 559     take care of constructing the list of transitions.
 560     """
 561
 562     patterns = None
 563     """
 564     {Name: pattern} mapping, used by `make_transition()`. Each pattern may
 565     be a string or a compiled `re` pattern. Override in subclasses.
 566     """
 567
 568     initial_transitions = None
 569     """
 570     A list of transitions to initialize when a `State` is instantiated.
 571     Each entry is either a transition name string, or a (transition name, next
 572     state name) pair. See `make_transitions()`. Override in subclasses.
 573     """
 574
 575     nested_sm = None
 576     """
 577     The `StateMachine` class for handling nested processing.
 578
 579     If left as ``None``, `nested_sm` defaults to the class of the state's
 580     controlling state machine. Override it in subclasses to avoid the default.
 581     """
 582
 583     nested_sm_kwargs = None
 584     """
 585     Keyword arguments dictionary, passed to the `nested_sm` constructor.
 586
 587     Two keys must have entries in the dictionary:
 588
 589     - Key 'state_classes' must be set to a list of `State` classes.
 590     - Key 'initial_state' must be set to the name of the initial state class.
 591
 592     If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
 593     class of the current state, and 'initial_state' defaults to the name of
 594     the class of the current state. Override in subclasses to avoid the
 595     defaults.
 596     """
 597
 598     def __init__(self, state_machine, debug=False):
 599         """
 600         Initialize a `State` object; make & add initial transitions.
 601
 602         Parameters:
 603
 604         - `statemachine`: the controlling `StateMachine` object.
 605         - `debug`: a boolean; produce verbose output if true.
 606         """
 607
 608         self.transition_order = []
 609         """A list of transition names in search order."""
 610
 611         self.transitions = {}
 612         """
 613         A mapping of transition names to 3-tuples containing
 614         (compiled_pattern, transition_method, next_state_name). Initialized as
 615         an instance attribute dynamically (instead of as a class attribute)
 616         because it may make forward references to patterns and methods in this
 617         or other classes.
 618         """
 619
 620         self.add_initial_transitions()
 621
 622         self.state_machine = state_machine
 623         """A reference to the controlling `StateMachine` object."""
 624
 625         self.debug = debug
 626         """Debugging mode on/off."""
 627
 628         if self.nested_sm is None:
 629             self.nested_sm = self.state_machine.__class__
 630         if self.nested_sm_kwargs is None:
 631             self.nested_sm_kwargs = {'state_classes': [self.__class__],
 632                                      'initial_state': self.__class__.__name__}
 633
 634     def runtime_init(self):
 635         """
 636         Initialize this `State` before running the state machine; called from
 637         `self.state_machine.run()`.
 638         """
 639         pass
 640
 641     def unlink(self):
 642         """Remove circular references to objects no longer required."""
 643         self.state_machine = None
 644
 645     def add_initial_transitions(self):
 646         """Make and add transitions listed in `self.initial_transitions`."""
 647         if self.initial_transitions:
 648             names, transitions = self.make_transitions(
 649                   self.initial_transitions)
 650             self.add_transitions(names, transitions)
 651
 652     def add_transitions(self, names, transitions):
 653         """
 654         Add a list of transitions to the start of the transition list.
 655
 656         Parameters:
 657
 658         - `names`: a list of transition names.
 659         - `transitions`: a mapping of names to transition tuples.
 660
 661         Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
 662         """
 663         for name in names:
 664             if name in self.transitions:
 665                 raise DuplicateTransitionError(name)
 666             if name not in transitions:
 667                 raise UnknownTransitionError(name)
 668         self.transition_order[:0] = names
 669         self.transitions.update(transitions)
 670
 671     def add_transition(self, name, transition):
 672         """
 673         Add a transition to the start of the transition list.
 674
 675         Parameter `transition`: a ready-made transition 3-tuple.
 676
 677         Exception: `DuplicateTransitionError`.
 678         """
 679         if name in self.transitions:
 680             raise DuplicateTransitionError(name)
 681         self.transition_order[:0] = [name]
 682         self.transitions[name] = transition
 683
 684     def remove_transition(self, name):
 685         """
 686         Remove a transition by `name`.
 687
 688         Exception: `UnknownTransitionError`.
 689         """
 690         try:
 691             del self.transitions[name]
 692             self.transition_order.remove(name)
 693         except:
 694             raise UnknownTransitionError(name)
 695
 696     def make_transition(self, name, next_state=None):
 697         """
 698         Make & return a transition tuple based on `name`.
 699
 700         This is a convenience function to simplify transition creation.
 701
 702         Parameters:
 703
 704         - `name`: a string, the name of the transition pattern & method. This
 705           `State` object must have a method called '`name`', and a dictionary
 706           `self.patterns` containing a key '`name`'.
 707         - `next_state`: a string, the name of the next `State` object for this
 708           transition. A value of ``None`` (or absent) implies no state change
 709           (i.e., continue with the same state).
 710
 711         Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
 712         """
 713         if next_state is None:
 714             next_state = self.__class__.__name__
 715         try:
 716             pattern = self.patterns[name]
 717             if not hasattr(pattern, 'match'):
 718                 pattern = re.compile(pattern)
 719         except KeyError:
 720             raise TransitionPatternNotFound(
 721                   '%s.patterns[%r]' % (self.__class__.__name__, name))
 722         try:
 723             method = getattr(self, name)
 724         except AttributeError:
 725             raise TransitionMethodNotFound(
 726                   '%s.%s' % (self.__class__.__name__, name))
 727         return (pattern, method, next_state)
 728
 729     def make_transitions(self, name_list):
 730         """
 731         Return a list of transition names and a transition mapping.
 732
 733         Parameter `name_list`: a list, where each entry is either a transition
 734         name string, or a 1- or 2-tuple (transition name, optional next state
 735         name).
 736         """
 737         stringtype = type('')
 738         names = []
 739         transitions = {}
 740         for namestate in name_list:
 741             if type(namestate) is stringtype:
 742                 transitions[namestate] = self.make_transition(namestate)
 743                 names.append(namestate)
 744             else:
 745                 transitions[namestate[0]] = self.make_transition(*namestate)
 746                 names.append(namestate[0])
 747         return names, transitions
 748
 749     def no_match(self, context, transitions):
 750         """
 751         Called when there is no match from `StateMachine.check_line()`.
 752
 753         Return the same values returned by transition methods:
 754
 755         - context: unchanged;
 756         - next state name: ``None``;
 757         - empty result list.
 758
 759         Override in subclasses to catch this event.
 760         """
 761         return context, None, []
 762
 763     def bof(self, context):
 764         """
 765         Handle beginning-of-file. Return unchanged `context`, empty result.
 766
 767         Override in subclasses.
 768
 769         Parameter `context`: application-defined storage.
 770         """
 771         return context, []
 772
 773     def eof(self, context):
 774         """
 775         Handle end-of-file. Return empty result.
 776
 777         Override in subclasses.
 778
 779         Parameter `context`: application-defined storage.
 780         """
 781         return []
 782
 783     def nop(self, match, context, next_state):
 784         """
 785         A "do nothing" transition method.
 786
 787         Return unchanged `context` & `next_state`, empty result. Useful for
 788         simple state changes (actionless transitions).
 789         """
 790         return context, next_state, []
 791
 792
 793 class StateMachineWS(StateMachine):
 794
 795     """
 796     `StateMachine` subclass specialized for whitespace recognition.
 797
 798     There are three methods provided for extracting indented text blocks:
 799
 800     - `get_indented()`: use when the indent is unknown.
 801     - `get_known_indented()`: use when the indent is known for all lines.
 802     - `get_first_known_indented()`: use when only the first line's indent is
 803       known.
 804     """
 805
 806     def get_indented(self, until_blank=False, strip_indent=True):
 807         """
 808         Return a block of indented lines of text, and info.
 809
 810         Extract an indented block where the indent is unknown for all lines.
 811
 812         :Parameters:
 813             - `until_blank`: Stop collecting at the first blank line if true.
 814             - `strip_indent`: Strip common leading indent if true (default).
 815
 816         :Return:
 817             - the indented block (a list of lines of text),
 818             - its indent,
 819             - its first line offset from BOF, and
 820             - whether or not it finished with a blank line.
 821         """
 822         offset = self.abs_line_offset()
 823         indented, indent, blank_finish = self.input_lines.get_indented(
 824               self.line_offset, until_blank, strip_indent)
 825         if indented:
 826             self.next_line(len(indented) - 1) # advance to last indented line
 827         while indented and not indented[0].strip():
 828             indented.trim_start()
 829             offset += 1
 830         return indented, indent, offset, blank_finish
 831
 832     def get_known_indented(self, indent, until_blank=False, strip_indent=True):
 833         """
 834         Return an indented block and info.
 835
 836         Extract an indented block where the indent is known for all lines.
 837         Starting with the current line, extract the entire text block with at
 838         least `indent` indentation (which must be whitespace, except for the
 839         first line).
 840
 841         :Parameters:
 842             - `indent`: The number of indent columns/characters.
 843             - `until_blank`: Stop collecting at the first blank line if true.
 844             - `strip_indent`: Strip `indent` characters of indentation if true
 845               (default).
 846
 847         :Return:
 848             - the indented block,
 849             - its first line offset from BOF, and
 850             - whether or not it finished with a blank line.
 851         """
 852         offset = self.abs_line_offset()
 853         indented, indent, blank_finish = self.input_lines.get_indented(
 854               self.line_offset, until_blank, strip_indent,
 855               block_indent=indent)
 856         self.next_line(len(indented) - 1) # advance to last indented line
 857         while indented and not indented[0].strip():
 858             indented.trim_start()
 859             offset += 1
 860         return indented, offset, blank_finish
 861
 862     def get_first_known_indented(self, indent, until_blank=False,
 863                                  strip_indent=True, strip_top=True):
 864         """
 865         Return an indented block and info.
 866
 867         Extract an indented block where the indent is known for the first line
 868         and unknown for all other lines.
 869
 870         :Parameters:
 871             - `indent`: The first line's indent (# of columns/characters).
 872             - `until_blank`: Stop collecting at the first blank line if true
 873               (1).
 874             - `strip_indent`: Strip `indent` characters of indentation if true
 875               (1, default).
 876             - `strip_top`: Strip blank lines from the beginning of the block.
 877
 878         :Return:
 879             - the indented block,
 880             - its indent,
 881             - its first line offset from BOF, and
 882             - whether or not it finished with a blank line.
 883         """
 884         offset = self.abs_line_offset()
 885         indented, indent, blank_finish = self.input_lines.get_indented(
 886               self.line_offset, until_blank, strip_indent,
 887               first_indent=indent)
 888         self.next_line(len(indented) - 1) # advance to last indented line
 889         if strip_top:
 890             while indented and not indented[0].strip():
 891                 indented.trim_start()
 892                 offset += 1
 893         return indented, indent, offset, blank_finish
 894
 895
 896 class StateWS(State):
 897
 898     """
 899     State superclass specialized for whitespace (blank lines & indents).
 900
 901     Use this class with `StateMachineWS`.  The transitions 'blank' (for blank
 902     lines) and 'indent' (for indented text blocks) are added automatically,
 903     before any other transitions.  The transition method `blank()` handles
 904     blank lines and `indent()` handles nested indented blocks.  Indented
 905     blocks trigger a new state machine to be created by `indent()` and run.
 906     The class of the state machine to be created is in `indent_sm`, and the
 907     constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
 908
 909     The methods `known_indent()` and `firstknown_indent()` are provided for
 910     indented blocks where the indent (all lines' and first line's only,
 911     respectively) is known to the transition method, along with the attributes
 912     `known_indent_sm` and `known_indent_sm_kwargs`.  Neither transition method
 913     is triggered automatically.
 914     """
 915
 916     indent_sm = None
 917     """
 918     The `StateMachine` class handling indented text blocks.
 919
 920     If left as ``None``, `indent_sm` defaults to the value of
 921     `State.nested_sm`.  Override it in subclasses to avoid the default.
 922     """
 923
 924     indent_sm_kwargs = None
 925     """
 926     Keyword arguments dictionary, passed to the `indent_sm` constructor.
 927
 928     If left as ``None``, `indent_sm_kwargs` defaults to the value of
 929     `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
 930     """
 931
 932     known_indent_sm = None
 933     """
 934     The `StateMachine` class handling known-indented text blocks.
 935
 936     If left as ``None``, `known_indent_sm` defaults to the value of
 937     `indent_sm`.  Override it in subclasses to avoid the default.
 938     """
 939
 940     known_indent_sm_kwargs = None
 941     """
 942     Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
 943
 944     If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
 945     `indent_sm_kwargs`. Override it in subclasses to avoid the default.
 946     """
 947
 948     ws_patterns = {'blank': ' *$',
 949                    'indent': ' +'}
 950     """Patterns for default whitespace transitions.  May be overridden in
 951     subclasses."""
 952
 953     ws_initial_transitions = ('blank', 'indent')
 954     """Default initial whitespace transitions, added before those listed in
 955     `State.initial_transitions`.  May be overridden in subclasses."""
 956
 957     def __init__(self, state_machine, debug=False):
 958         """
 959         Initialize a `StateSM` object; extends `State.__init__()`.
 960
 961         Check for indent state machine attributes, set defaults if not set.
 962         """
 963         State.__init__(self, state_machine, debug)
 964         if self.indent_sm is None:
 965             self.indent_sm = self.nested_sm
 966         if self.indent_sm_kwargs is None:
 967             self.indent_sm_kwargs = self.nested_sm_kwargs
 968         if self.known_indent_sm is None:
 969             self.known_indent_sm = self.indent_sm
 970         if self.known_indent_sm_kwargs is None:
 971             self.known_indent_sm_kwargs = self.indent_sm_kwargs
 972
 973     def add_initial_transitions(self):
 974         """
 975         Add whitespace-specific transitions before those defined in subclass.
 976
 977         Extends `State.add_initial_transitions()`.
 978         """
 979         State.add_initial_transitions(self)
 980         if self.patterns is None:
 981             self.patterns = {}
 982         self.patterns.update(self.ws_patterns)
 983         names, transitions = self.make_transitions(
 984             self.ws_initial_transitions)
 985         self.add_transitions(names, transitions)
 986
 987     def blank(self, match, context, next_state):
 988         """Handle blank lines. Does nothing. Override in subclasses."""
 989         return self.nop(match, context, next_state)
 990
 991     def indent(self, match, context, next_state):
 992         """
 993         Handle an indented text block. Extend or override in subclasses.
 994
 995         Recursively run the registered state machine for indented blocks
 996         (`self.indent_sm`).
 997         """
 998         indented, indent, line_offset, blank_finish = \
 999               self.state_machine.get_indented()
1000         sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
1001         results = sm.run(indented, input_offset=line_offset)
1002         return context, next_state, results
1003
1004     def known_indent(self, match, context, next_state):
1005         """
1006         Handle a known-indent text block. Extend or override in subclasses.
1007
1008         Recursively run the registered state machine for known-indent indented
1009         blocks (`self.known_indent_sm`). The indent is the length of the
1010         match, ``match.end()``.
1011         """
1012         indented, line_offset, blank_finish = \
1013               self.state_machine.get_known_indented(match.end())
1014         sm = self.known_indent_sm(debug=self.debug,
1015                                  **self.known_indent_sm_kwargs)
1016         results = sm.run(indented, input_offset=line_offset)
1017         return context, next_state, results
1018
1019     def first_known_indent(self, match, context, next_state):
1020         """
1021         Handle an indented text block (first line's indent known).
1022
1023         Extend or override in subclasses.
1024
1025         Recursively run the registered state machine for known-indent indented
1026         blocks (`self.known_indent_sm`). The indent is the length of the
1027         match, ``match.end()``.
1028         """
1029         indented, line_offset, blank_finish = \
1030               self.state_machine.get_first_known_indented(match.end())
1031         sm = self.known_indent_sm(debug=self.debug,
1032                                  **self.known_indent_sm_kwargs)
1033         results = sm.run(indented, input_offset=line_offset)
1034         return context, next_state, results
1035
1036
1037 class _SearchOverride:
1038
1039     """
1040     Mix-in class to override `StateMachine` regular expression behavior.
1041
1042     Changes regular expression matching, from the default `re.match()`
1043     (succeeds only if the pattern matches at the start of `self.line`) to
1044     `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1045     When subclassing a `StateMachine`, list this class **first** in the
1046     inheritance list of the class definition.
1047     """
1048
1049     def match(self, pattern):
1050         """
1051         Return the result of a regular expression search.
1052
1053         Overrides `StateMachine.match()`.
1054
1055         Parameter `pattern`: `re` compiled regular expression.
1056         """
1057         return pattern.search(self.line)
1058
1059
1060 class SearchStateMachine(_SearchOverride, StateMachine):
1061     """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1062     pass
1063
1064
1065 class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1066     """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1067     pass
1068
1069
1070 class ViewList:
1071
1072     """
1073     List with extended functionality: slices of ViewList objects are child
1074     lists, linked to their parents. Changes made to a child list also affect
1075     the parent list.  A child list is effectively a "view" (in the SQL sense)
1076     of the parent list.  Changes to parent lists, however, do *not* affect
1077     active child lists.  If a parent list is changed, any active child lists
1078     should be recreated.
1079
1080     The start and end of the slice can be trimmed using the `trim_start()` and
1081     `trim_end()` methods, without affecting the parent list.  The link between
1082     child and parent lists can be broken by calling `disconnect()` on the
1083     child list.
1084
1085     Also, ViewList objects keep track of the source & offset of each item.
1086     This information is accessible via the `source()`, `offset()`, and
1087     `info()` methods.
1088     """
1089
1090     def __init__(self, initlist=None, source=None, items=None,
1091                  parent=None, parent_offset=None):
1092         self.data = []
1093         """The actual list of data, flattened from various sources."""
1094
1095         self.items = []
1096         """A list of (source, offset) pairs, same length as `self.data`: the
1097         source of each line and the offset of each line from the beginning of
1098         its source."""
1099
1100         self.parent = parent
1101         """The parent list."""
1102
1103         self.parent_offset = parent_offset
1104         """Offset of this list from the beginning of the parent list."""
1105
1106         if isinstance(initlist, ViewList):
1107             self.data = initlist.data[:]
1108             self.items = initlist.items[:]
1109         elif initlist is not None:
1110             self.data = list(initlist)
1111             if items:
1112                 self.items = items
1113             else:
1114                 self.items = [(source, i) for i in range(len(initlist))]
1115         assert len(self.data) == len(self.items), 'data mismatch'
1116
1117     def __str__(self):
1118         return str(self.data)
1119
1120     def __repr__(self):
1121         return '%s(%s, items=%s)' % (self.__class__.__name__,
1122                                      self.data, self.items)
1123
1124     def __lt__(self, other): return self.data <  self.__cast(other)
1125     def __le__(self, other): return self.data <= self.__cast(other)
1126     def __eq__(self, other): return self.data == self.__cast(other)
1127     def __ne__(self, other): return self.data != self.__cast(other)
1128     def __gt__(self, other): return self.data >  self.__cast(other)
1129     def __ge__(self, other): return self.data >= self.__cast(other)
1130     def __cmp__(self, other): return cmp(self.data, self.__cast(other))
1131
1132     def __cast(self, other):
1133         if isinstance(other, ViewList):
1134             return other.data
1135         else:
1136             return other
1137
1138     def __contains__(self, item): return item in self.data
1139     def __len__(self): return len(self.data)
1140
1141     # The __getitem__()/__setitem__() methods check whether the index
1142     # is a slice first, since indexing a native list with a slice object
1143     # just works.
1144
1145     def __getitem__(self, i):
1146         if isinstance(i, types.SliceType):
1147             assert i.step in (None, 1),  'cannot handle slice with stride'
1148             return self.__class__(self.data[i.start:i.stop],
1149                                   items=self.items[i.start:i.stop],
1150                                   parent=self, parent_offset=i.start or 0)
1151         else:
1152             return self.data[i]
1153
1154     def __setitem__(self, i, item):
1155         if isinstance(i, types.SliceType):
1156             assert i.step in (None, 1), 'cannot handle slice with stride'
1157             if not isinstance(item, ViewList):
1158                 raise TypeError('assigning non-ViewList to ViewList slice')
1159             self.data[i.start:i.stop] = item.data
1160             self.items[i.start:i.stop] = item.items
1161             assert len(self.data) == len(self.items), 'data mismatch'
1162             if self.parent:
1163                 self.parent[(i.start or 0) + self.parent_offset
1164                             : (i.stop or len(self)) + self.parent_offset] = item
1165         else:
1166             self.data[i] = item
1167             if self.parent:
1168                 self.parent[i + self.parent_offset] = item
1169
1170     def __delitem__(self, i):
1171         try:
1172             del self.data[i]
1173             del self.items[i]
1174             if self.parent:
1175                 del self.parent[i + self.parent_offset]
1176         except TypeError:
1177             assert i.step is None, 'cannot handle slice with stride'
1178             del self.data[i.start:i.stop]
1179             del self.items[i.start:i.stop]
1180             if self.parent:
1181                 del self.parent[(i.start or 0) + self.parent_offset
1182                                 : (i.stop or len(self)) + self.parent_offset]
1183
1184     def __add__(self, other):
1185         if isinstance(other, ViewList):
1186             return self.__class__(self.data + other.data,
1187                                   items=(self.items + other.items))
1188         else:
1189             raise TypeError('adding non-ViewList to a ViewList')
1190
1191     def __radd__(self, other):
1192         if isinstance(other, ViewList):
1193             return self.__class__(other.data + self.data,
1194                                   items=(other.items + self.items))
1195         else:
1196             raise TypeError('adding ViewList to a non-ViewList')
1197
1198     def __iadd__(self, other):
1199         if isinstance(other, ViewList):
1200             self.data += other.data
1201         else:
1202             raise TypeError('argument to += must be a ViewList')
1203         return self
1204
1205     def __mul__(self, n):
1206         return self.__class__(self.data * n, items=(self.items * n))
1207
1208     __rmul__ = __mul__
1209
1210     def __imul__(self, n):
1211         self.data *= n
1212         self.items *= n
1213         return self
1214
1215     def extend(self, other):
1216         if not isinstance(other, ViewList):
1217             raise TypeError('extending a ViewList with a non-ViewList')
1218         if self.parent:
1219             self.parent.insert(len(self.data) + self.parent_offset, other)
1220         self.data.extend(other.data)
1221         self.items.extend(other.items)
1222
1223     def append(self, item, source=None, offset=0):
1224         if source is None:
1225             self.extend(item)
1226         else:
1227             if self.parent:
1228                 self.parent.insert(len(self.data) + self.parent_offset, item,
1229                                    source, offset)
1230             self.data.append(item)
1231             self.items.append((source, offset))
1232
1233     def insert(self, i, item, source=None, offset=0):
1234         if source is None:
1235             if not isinstance(item, ViewList):
1236                 raise TypeError('inserting non-ViewList with no source given')
1237             self.data[i:i] = item.data
1238             self.items[i:i] = item.items
1239             if self.parent:
1240                 index = (len(self.data) + i) % len(self.data)
1241                 self.parent.insert(index + self.parent_offset, item)
1242         else:
1243             self.data.insert(i, item)
1244             self.items.insert(i, (source, offset))
1245             if self.parent:
1246                 index = (len(self.data) + i) % len(self.data)
1247                 self.parent.insert(index + self.parent_offset, item,
1248                                    source, offset)
1249
1250     def pop(self, i=-1):
1251         if self.parent:
1252             index = (len(self.data) + i) % len(self.data)
1253             self.parent.pop(index + self.parent_offset)
1254         self.items.pop(i)
1255         return self.data.pop(i)
1256
1257     def trim_start(self, n=1):
1258         """
1259         Remove items from the start of the list, without touching the parent.
1260         """
1261         if n > len(self.data):
1262             raise IndexError("Size of trim too large; can't trim %s items "
1263                              "from a list of size %s." % (n, len(self.data)))
1264         elif n < 0:
1265             raise IndexError('Trim size must be >= 0.')
1266         del self.data[:n]
1267         del self.items[:n]
1268         if self.parent:
1269             self.parent_offset += n
1270
1271     def trim_end(self, n=1):
1272         """
1273         Remove items from the end of the list, without touching the parent.
1274         """
1275         if n > len(self.data):
1276             raise IndexError("Size of trim too large; can't trim %s items "
1277                              "from a list of size %s." % (n, len(self.data)))
1278         elif n < 0:
1279             raise IndexError('Trim size must be >= 0.')
1280         del self.data[-n:]
1281         del self.items[-n:]
1282
1283     def remove(self, item):
1284         index = self.index(item)
1285         del self[index]
1286
1287     def count(self, item): return self.data.count(item)
1288     def index(self, item): return self.data.index(item)
1289
1290     def reverse(self):
1291         self.data.reverse()
1292         self.items.reverse()
1293         self.parent = None
1294
1295     def sort(self, *args):
1296         tmp = zip(self.data, self.items)
1297         tmp.sort(*args)
1298         self.data = [entry[0] for entry in tmp]
1299         self.items = [entry[1] for entry in tmp]
1300         self.parent = None
1301
1302     def info(self, i):
1303         """Return source & offset for index `i`."""
1304         try:
1305             return self.items[i]
1306         except IndexError:
1307             if i == len(self.data):     # Just past the end
1308                 return self.items[i - 1][0], None
1309             else:
1310                 raise
1311
1312     def source(self, i):
1313         """Return source for index `i`."""
1314         return self.info(i)[0]
1315
1316     def offset(self, i):
1317         """Return offset for index `i`."""
1318         return self.info(i)[1]
1319
1320     def disconnect(self):
1321         """Break link between this list and parent list."""
1322         self.parent = None
1323
1324     def xitems(self):
1325         """Return iterator yielding (source, offset, value) tuples."""
1326         for (value, (source, offset)) in zip(self.data, self.items):
1327             yield (source, offset, value)
1328
1329     def pprint(self):
1330         """Print the list in `grep` format (`source:offset:value` lines)"""
1331         for line in self.xitems():
1332             print "%s:%d:%s" % line
1333
1334
1335 class StringList(ViewList):
1336
1337     """A `ViewList` with string-specific methods."""
1338
1339     def trim_left(self, length, start=0, end=sys.maxint):
1340         """
1341         Trim `length` characters off the beginning of each item, in-place,
1342         from index `start` to `end`.  No whitespace-checking is done on the
1343         trimmed text.  Does not affect slice parent.
1344         """
1345         self.data[start:end] = [line[length:]
1346                                 for line in self.data[start:end]]
1347
1348     def get_text_block(self, start, flush_left=False):
1349         """
1350         Return a contiguous block of text.
1351
1352         If `flush_left` is true, raise `UnexpectedIndentationError` if an
1353         indented line is encountered before the text block ends (with a blank
1354         line).
1355         """
1356         end = start
1357         last = len(self.data)
1358         while end < last:
1359             line = self.data[end]
1360             if not line.strip():
1361                 break
1362             if flush_left and (line[0] == ' '):
1363                 source, offset = self.info(end)
1364                 raise UnexpectedIndentationError(self[start:end], source,
1365                                                  offset + 1)
1366             end += 1
1367         return self[start:end]
1368
1369     def get_indented(self, start=0, until_blank=False, strip_indent=True,
1370                      block_indent=None, first_indent=None):
1371         """
1372         Extract and return a StringList of indented lines of text.
1373
1374         Collect all lines with indentation, determine the minimum indentation,
1375         remove the minimum indentation from all indented lines (unless
1376         `strip_indent` is false), and return them. All lines up to but not
1377         including the first unindented line will be returned.
1378
1379         :Parameters:
1380           - `start`: The index of the first line to examine.
1381           - `until_blank`: Stop collecting at the first blank line if true.
1382           - `strip_indent`: Strip common leading indent if true (default).
1383           - `block_indent`: The indent of the entire block, if known.
1384           - `first_indent`: The indent of the first line, if known.
1385
1386         :Return:
1387           - a StringList of indented lines with mininum indent removed;
1388           - the amount of the indent;
1389           - a boolean: did the indented block finish with a blank line or EOF?
1390         """
1391         indent = block_indent           # start with None if unknown
1392         end = start
1393         if block_indent is not None and first_indent is None:
1394             first_indent = block_indent
1395         if first_indent is not None:
1396             end += 1
1397         last = len(self.data)
1398         while end < last:
1399             line = self.data[end]
1400             if line and (line[0] != ' '
1401                          or (block_indent is not None
1402                              and line[:block_indent].strip())):
1403                 # Line not indented or insufficiently indented.
1404                 # Block finished properly iff the last indented line blank:
1405                 blank_finish = ((end > start)
1406                                 and not self.data[end - 1].strip())
1407                 break
1408             stripped = line.lstrip()
1409             if not stripped:            # blank line
1410                 if until_blank:
1411                     blank_finish = 1
1412                     break
1413             elif block_indent is None:
1414                 line_indent = len(line) - len(stripped)
1415                 if indent is None:
1416                     indent = line_indent
1417                 else:
1418                     indent = min(indent, line_indent)
1419             end += 1
1420         else:
1421             blank_finish = 1            # block ends at end of lines
1422         block = self[start:end]
1423         if first_indent is not None and block:
1424             block.data[0] = block.data[0][first_indent:]
1425         if indent and strip_indent:
1426             block.trim_left(indent, start=(first_indent is not None))
1427         return block, indent or 0, blank_finish
1428
1429     def get_2D_block(self, top, left, bottom, right, strip_indent=True):
1430         block = self[top:bottom]
1431         indent = right
1432         for i in range(len(block.data)):
1433             # get slice from line, care for combining characters
1434             ci = utils.column_indices(block.data[i])
1435             try:
1436                 left = ci[left]
1437             except IndexError:
1438                 left += len(block.data[i]) - len(ci)
1439             try:
1440                 right = ci[right]
1441             except IndexError:
1442                 right += len(block.data[i]) - len(ci)
1443             block.data[i] = line = block.data[i][left:right].rstrip()
1444             if line:
1445                 indent = min(indent, len(line) - len(line.lstrip()))
1446         if strip_indent and 0 < indent < right:
1447             block.data = [line[indent:] for line in block.data]
1448         return block
1449
1450     def pad_double_width(self, pad_char):
1451         """
1452         Pad all double-width characters in self by appending `pad_char` to each.
1453         For East Asian language support.
1454         """
1455         if hasattr(unicodedata, 'east_asian_width'):
1456             east_asian_width = unicodedata.east_asian_width
1457         else:
1458             return                      # new in Python 2.4
1459         for i in range(len(self.data)):
1460             line = self.data[i]
1461             if isinstance(line, unicode):
1462                 new = []
1463                 for char in line:
1464                     new.append(char)
1465                     if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1466                         new.append(pad_char)
1467                 self.data[i] = ''.join(new)
1468
1469     def replace(self, old, new):
1470         """Replace all occurrences of substring `old` with `new`."""
1471         for i in range(len(self.data)):
1472             self.data[i] = self.data[i].replace(old, new)
1473
1474
1475 class StateMachineError(Exception): pass
1476 class UnknownStateError(StateMachineError): pass
1477 class DuplicateStateError(StateMachineError): pass
1478 class UnknownTransitionError(StateMachineError): pass
1479 class DuplicateTransitionError(StateMachineError): pass
1480 class TransitionPatternNotFound(StateMachineError): pass
1481 class TransitionMethodNotFound(StateMachineError): pass
1482 class UnexpectedIndentationError(StateMachineError): pass
1483
1484
1485 class TransitionCorrection(Exception):
1486
1487     """
1488     Raise from within a transition method to switch to another transition.
1489
1490     Raise with one argument, the new transition name.
1491     """
1492
1493
1494 class StateCorrection(Exception):
1495
1496     """
1497     Raise from within a transition method to switch to another state.
1498
1499     Raise with one or two arguments: new state name, and an optional new
1500     transition name.
1501     """
1502
1503
1504 def string2lines(astring, tab_width=8, convert_whitespace=False,
1505                  whitespace=re.compile('[\v\f]')):
1506     """
1507     Return a list of one-line strings with tabs expanded, no newlines, and
1508     trailing whitespace stripped.
1509
1510     Each tab is expanded with between 1 and `tab_width` spaces, so that the
1511     next character's index becomes a multiple of `tab_width` (8 by default).
1512
1513     Parameters:
1514
1515     - `astring`: a multi-line string.
1516     - `tab_width`: the number of columns between tab stops.
1517     - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1518     """
1519     if convert_whitespace:
1520         astring = whitespace.sub(' ', astring)
1521     return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1522
1523 def _exception_data():
1524     """
1525     Return exception information:
1526
1527     - the exception's class name;
1528     - the exception object;
1529     - the name of the file containing the offending code;
1530     - the line number of the offending code;
1531     - the function name of the offending code.
1532     """
1533     type, value, traceback = sys.exc_info()
1534     while traceback.tb_next:
1535         traceback = traceback.tb_next
1536     code = traceback.tb_frame.f_code
1537     return (type.__name__, value, code.co_filename, traceback.tb_lineno,
1538             code.co_name)