docutils/docutils/statemachine.py

   1  # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 A finite state machine specialized for regular-expression-based text filters,
   7 this module defines the following classes:
   8
   9 - `StateMachine`, a state machine
  10 - `State`, a state superclass
  11 - `StateMachineWS`, a whitespace-sensitive version of `StateMachine`
  12 - `StateWS`, a state superclass for use with `StateMachineWS`
  13 - `SearchStateMachine`, uses `re.search()` instead of `re.match()`
  14 - `SearchStateMachineWS`, uses `re.search()` instead of `re.match()`
  15 - `ViewList`, extends standard Python lists.
  16 - `StringList`, string-specific ViewList.
  17
  18 Exception classes:
  19
  20 - `StateMachineError`
  21 - `UnknownStateError`
  22 - `DuplicateStateError`
  23 - `UnknownTransitionError`
  24 - `DuplicateTransitionError`
  25 - `TransitionPatternNotFound`
  26 - `TransitionMethodNotFound`
  27 - `UnexpectedIndentationError`
  28 - `TransitionCorrection`: Raised to switch to another transition.
  29 - `StateCorrection`: Raised to switch to another state & transition.
  30
  31 Functions:
  32
  33 - `string2lines()`: split a multi-line string into a list of one-line strings
  34
  35
  36 How To Use This Module
  37 ======================
  38 (See the individual classes, methods, and attributes for details.)
  39
  40 1. Import it: ``import statemachine`` or ``from statemachine import ...``.
  41    You will also need to ``import re``.
  42
  43 2. Derive a subclass of `State` (or `StateWS`) for each state in your state
  44    machine::
  45
  46        class MyState(statemachine.State):
  47
  48    Within the state's class definition:
  49
  50    a) Include a pattern for each transition, in `State.patterns`::
  51
  52           patterns = {'atransition': r'pattern', ...}
  53
  54    b) Include a list of initial transitions to be set up automatically, in
  55       `State.initial_transitions`::
  56
  57           initial_transitions = ['atransition', ...]
  58
  59    c) Define a method for each transition, with the same name as the
  60       transition pattern::
  61
  62           def atransition(self, match, context, next_state):
  63               # do something
  64               result = [...]  # a list
  65               return context, next_state, result
  66               # context, next_state may be altered
  67
  68       Transition methods may raise an `EOFError` to cut processing short.
  69
  70    d) You may wish to override the `State.bof()` and/or `State.eof()` implicit
  71       transition methods, which handle the beginning- and end-of-file.
  72
  73    e) In order to handle nested processing, you may wish to override the
  74       attributes `State.nested_sm` and/or `State.nested_sm_kwargs`.
  75
  76       If you are using `StateWS` as a base class, in order to handle nested
  77       indented blocks, you may wish to:
  78
  79       - override the attributes `StateWS.indent_sm`,
  80         `StateWS.indent_sm_kwargs`, `StateWS.known_indent_sm`, and/or
  81         `StateWS.known_indent_sm_kwargs`;
  82       - override the `StateWS.blank()` method; and/or
  83       - override or extend the `StateWS.indent()`, `StateWS.known_indent()`,
  84         and/or `StateWS.firstknown_indent()` methods.
  85
  86 3. Create a state machine object::
  87
  88        sm = StateMachine(state_classes=[MyState, ...],
  89                          initial_state='MyState')
  90
  91 4. Obtain the input text, which needs to be converted into a tab-free list of
  92    one-line strings. For example, to read text from a file called
  93    'inputfile'::
  94
  95        input_string = open('inputfile').read()
  96        input_lines = statemachine.string2lines(input_string)
  97
  98 5. Run the state machine on the input text and collect the results, a list::
  99
 100        results = sm.run(input_lines)
 101
 102 6. Remove any lingering circular references::
 103
 104        sm.unlink()
 105 """
 106
 107 __docformat__ = 'restructuredtext'
 108
 109 import sys
 110 import re
 111 import types
 112 import unicodedata
 113 from docutils import utils
 114 from docutils.error_reporting import ErrorOutput
 115
 116
 117 class StateMachine:
 118
 119     """
 120     A finite state machine for text filters using regular expressions.
 121
 122     The input is provided in the form of a list of one-line strings (no
 123     newlines). States are subclasses of the `State` class. Transitions consist
 124     of regular expression patterns and transition methods, and are defined in
 125     each state.
 126
 127     The state machine is started with the `run()` method, which returns the
 128     results of processing in a list.
 129     """
 130
 131     def __init__(self, state_classes, initial_state, debug=0):
 132         """
 133         Initialize a `StateMachine` object; add state objects.
 134
 135         Parameters:
 136
 137         - `state_classes`: a list of `State` (sub)classes.
 138         - `initial_state`: a string, the class name of the initial state.
 139         - `debug`: a boolean; produce verbose output if true (nonzero).
 140         """
 141
 142         self.input_lines = None
 143         """`StringList` of input lines (without newlines).
 144         Filled by `self.run()`."""
 145
 146         self.input_offset = 0
 147         """Offset of `self.input_lines` from the beginning of the file."""
 148
 149         self.line = None
 150         """Current input line."""
 151
 152         self.line_offset = -1
 153         """Current input line offset from beginning of `self.input_lines`."""
 154
 155         self.debug = debug
 156         """Debugging mode on/off."""
 157
 158         self.initial_state = initial_state
 159         """The name of the initial state (key to `self.states`)."""
 160
 161         self.current_state = initial_state
 162         """The name of the current state (key to `self.states`)."""
 163
 164         self.states = {}
 165         """Mapping of {state_name: State_object}."""
 166
 167         self.add_states(state_classes)
 168
 169         self.observers = []
 170         """List of bound methods or functions to call whenever the current
 171         line changes.  Observers are called with one argument, ``self``.
 172         Cleared at the end of `run()`."""
 173
 174         self._stderr = ErrorOutput()
 175         """Wrapper around sys.stderr catching en-/decoding errors"""
 176
 177
 178     def unlink(self):
 179         """Remove circular references to objects no longer required."""
 180         for state in self.states.values():
 181             state.unlink()
 182         self.states = None
 183
 184     def run(self, input_lines, input_offset=0, context=None,
 185             input_source=None, initial_state=None):
 186         """
 187         Run the state machine on `input_lines`. Return results (a list).
 188
 189         Reset `self.line_offset` and `self.current_state`. Run the
 190         beginning-of-file transition. Input one line at a time and check for a
 191         matching transition. If a match is found, call the transition method
 192         and possibly change the state. Store the context returned by the
 193         transition method to be passed on to the next transition matched.
 194         Accumulate the results returned by the transition methods in a list.
 195         Run the end-of-file transition. Finally, return the accumulated
 196         results.
 197
 198         Parameters:
 199
 200         - `input_lines`: a list of strings without newlines, or `StringList`.
 201         - `input_offset`: the line offset of `input_lines` from the beginning
 202           of the file.
 203         - `context`: application-specific storage.
 204         - `input_source`: name or path of source of `input_lines`.
 205         - `initial_state`: name of initial state.
 206         """
 207         self.runtime_init()
 208         if isinstance(input_lines, StringList):
 209             self.input_lines = input_lines
 210         else:
 211             self.input_lines = StringList(input_lines, source=input_source)
 212         self.input_offset = input_offset
 213         self.line_offset = -1
 214         self.current_state = initial_state or self.initial_state
 215         if self.debug:
 216             print >>self._stderr, (
 217                 u'\nStateMachine.run: input_lines (line_offset=%s):\n| %s'
 218                 % (self.line_offset, u'\n| '.join(self.input_lines)))
 219         transitions = None
 220         results = []
 221         state = self.get_state()
 222         try:
 223             if self.debug:
 224                 print >>self._stderr, '\nStateMachine.run: bof transition'
 225             context, result = state.bof(context)
 226             results.extend(result)
 227             while 1:
 228                 try:
 229                     try:
 230                         self.next_line()
 231                         if self.debug:
 232                             source, offset = self.input_lines.info(
 233                                 self.line_offset)
 234                             print >>self._stderr, (
 235                                 u'\nStateMachine.run: line (source=%r, '
 236                                 u'offset=%r):\n| %s'
 237                                 % (source, offset, self.line))
 238                         context, next_state, result = self.check_line(
 239                             context, state, transitions)
 240                     except EOFError:
 241                         if self.debug:
 242                             print >>self._stderr, (
 243                                 '\nStateMachine.run: %s.eof transition'
 244                                 % state.__class__.__name__)
 245                         result = state.eof(context)
 246                         results.extend(result)
 247                         break
 248                     else:
 249                         results.extend(result)
 250                 except TransitionCorrection, exception:
 251                     self.previous_line() # back up for another try
 252                     transitions = (exception.args[0],)
 253                     if self.debug:
 254                         print >>self._stderr, (
 255                               '\nStateMachine.run: TransitionCorrection to '
 256                               'state "%s", transition %s.'
 257                               % (state.__class__.__name__, transitions[0]))
 258                     continue
 259                 except StateCorrection, exception:
 260                     self.previous_line() # back up for another try
 261                     next_state = exception.args[0]
 262                     if len(exception.args) == 1:
 263                         transitions = None
 264                     else:
 265                         transitions = (exception.args[1],)
 266                     if self.debug:
 267                         print >>self._stderr, (
 268                               '\nStateMachine.run: StateCorrection to state '
 269                               '"%s", transition %s.'
 270                               % (next_state, transitions[0]))
 271                 else:
 272                     transitions = None
 273                 state = self.get_state(next_state)
 274         except:
 275             if self.debug:
 276                 self.error()
 277             raise
 278         self.observers = []
 279         return results
 280
 281     def get_state(self, next_state=None):
 282         """
 283         Return current state object; set it first if `next_state` given.
 284
 285         Parameter `next_state`: a string, the name of the next state.
 286
 287         Exception: `UnknownStateError` raised if `next_state` unknown.
 288         """
 289         if next_state:
 290             if self.debug and next_state != self.current_state:
 291                 print >>self._stderr, (
 292                     '\nStateMachine.get_state: Changing state from '
 293                     '"%s" to "%s" (input line %s).'
 294                     % (self.current_state, next_state,
 295                        self.abs_line_number()))
 296             self.current_state = next_state
 297         try:
 298             return self.states[self.current_state]
 299         except KeyError:
 300             raise UnknownStateError(self.current_state)
 301
 302     def next_line(self, n=1):
 303         """Load `self.line` with the `n`'th next line and return it."""
 304         try:
 305             try:
 306                 self.line_offset += n
 307                 self.line = self.input_lines[self.line_offset]
 308             except IndexError:
 309                 self.line = None
 310                 raise EOFError
 311             return self.line
 312         finally:
 313             self.notify_observers()
 314
 315     def is_next_line_blank(self):
 316         """Return 1 if the next line is blank or non-existant."""
 317         try:
 318             return not self.input_lines[self.line_offset + 1].strip()
 319         except IndexError:
 320             return 1
 321
 322     def at_eof(self):
 323         """Return 1 if the input is at or past end-of-file."""
 324         return self.line_offset >= len(self.input_lines) - 1
 325
 326     def at_bof(self):
 327         """Return 1 if the input is at or before beginning-of-file."""
 328         return self.line_offset <= 0
 329
 330     def previous_line(self, n=1):
 331         """Load `self.line` with the `n`'th previous line and return it."""
 332         self.line_offset -= n
 333         if self.line_offset < 0:
 334             self.line = None
 335         else:
 336             self.line = self.input_lines[self.line_offset]
 337         self.notify_observers()
 338         return self.line
 339
 340     def goto_line(self, line_offset):
 341         """Jump to absolute line offset `line_offset`, load and return it."""
 342         try:
 343             try:
 344                 self.line_offset = line_offset - self.input_offset
 345                 self.line = self.input_lines[self.line_offset]
 346             except IndexError:
 347                 self.line = None
 348                 raise EOFError
 349             return self.line
 350         finally:
 351             self.notify_observers()
 352
 353     def get_source(self, line_offset):
 354         """Return source of line at absolute line offset `line_offset`."""
 355         return self.input_lines.source(line_offset - self.input_offset)
 356
 357     def abs_line_offset(self):
 358         """Return line offset of current line, from beginning of file."""
 359         return self.line_offset + self.input_offset
 360
 361     def abs_line_number(self):
 362         """Return line number of current line (counting from 1)."""
 363         return self.line_offset + self.input_offset + 1
 364
 365     def get_source_and_line(self, lineno=None):
 366         """Return (source, line) tuple for current or given line number.
 367
 368         Looks up the source and line number in the `self.input_lines`
 369         StringList instance to count for included source files.
 370
 371         If the optional argument `lineno` is given, convert it from an
 372         absolute line number to the corresponding (source, line) pair.
 373         """
 374         if lineno is None:
 375             offset = self.line_offset
 376         else:
 377             offset = lineno - self.input_offset - 1
 378         try:
 379             src, srcoffset = self.input_lines.info(offset)
 380             srcline = srcoffset + 1
 381         except (TypeError):
 382             # line is None if index is "Just past the end"
 383             src, srcline = self.get_source_and_line(offset + self.input_offset)
 384             return src, srcline + 1
 385         except (IndexError): # `offset` is off the list
 386             src, srcline = None, None
 387             # raise AssertionError('cannot find line %d in %s lines' %
 388             #                      (offset, len(self.input_lines)))
 389             #                      # list(self.input_lines.lines())))
 390         # assert offset == srcoffset, str(self.input_lines)
 391         # print "get_source_and_line(%s):" % lineno,
 392         # print offset + 1, '->', src, srcline
 393         # print self.input_lines
 394         return (src, srcline)
 395
 396     def insert_input(self, input_lines, source):
 397         self.input_lines.insert(self.line_offset + 1, '',
 398                                 source='internal padding after '+source,
 399                                 offset=len(input_lines))
 400         self.input_lines.insert(self.line_offset + 1, '',
 401                                 source='internal padding before '+source,
 402                                 offset=-1)
 403         self.input_lines.insert(self.line_offset + 2,
 404                                 StringList(input_lines, source))
 405
 406     def get_text_block(self, flush_left=0):
 407         """
 408         Return a contiguous block of text.
 409
 410         If `flush_left` is true, raise `UnexpectedIndentationError` if an
 411         indented line is encountered before the text block ends (with a blank
 412         line).
 413         """
 414         try:
 415             block = self.input_lines.get_text_block(self.line_offset,
 416                                                     flush_left)
 417             self.next_line(len(block) - 1)
 418             return block
 419         except UnexpectedIndentationError, error:
 420             block, source, lineno = error.args
 421             self.next_line(len(block) - 1) # advance to last line of block
 422             raise
 423
 424     def check_line(self, context, state, transitions=None):
 425         """
 426         Examine one line of input for a transition match & execute its method.
 427
 428         Parameters:
 429
 430         - `context`: application-dependent storage.
 431         - `state`: a `State` object, the current state.
 432         - `transitions`: an optional ordered list of transition names to try,
 433           instead of ``state.transition_order``.
 434
 435         Return the values returned by the transition method:
 436
 437         - context: possibly modified from the parameter `context`;
 438         - next state name (`State` subclass name);
 439         - the result output of the transition, a list.
 440
 441         When there is no match, ``state.no_match()`` is called and its return
 442         value is returned.
 443         """
 444         if transitions is None:
 445             transitions =  state.transition_order
 446         state_correction = None
 447         if self.debug:
 448             print >>self._stderr, (
 449                   '\nStateMachine.check_line: state="%s", transitions=%r.'
 450                   % (state.__class__.__name__, transitions))
 451         for name in transitions:
 452             pattern, method, next_state = state.transitions[name]
 453             match = pattern.match(self.line)
 454             if match:
 455                 if self.debug:
 456                     print >>self._stderr, (
 457                           '\nStateMachine.check_line: Matched transition '
 458                           '"%s" in state "%s".'
 459                           % (name, state.__class__.__name__))
 460                 return method(match, context, next_state)
 461         else:
 462             if self.debug:
 463                 print >>self._stderr, (
 464                       '\nStateMachine.check_line: No match in state "%s".'
 465                       % state.__class__.__name__)
 466             return state.no_match(context, transitions)
 467
 468     def add_state(self, state_class):
 469         """
 470         Initialize & add a `state_class` (`State` subclass) object.
 471
 472         Exception: `DuplicateStateError` raised if `state_class` was already
 473         added.
 474         """
 475         statename = state_class.__name__
 476         if statename in self.states:
 477             raise DuplicateStateError(statename)
 478         self.states[statename] = state_class(self, self.debug)
 479
 480     def add_states(self, state_classes):
 481         """
 482         Add `state_classes` (a list of `State` subclasses).
 483         """
 484         for state_class in state_classes:
 485             self.add_state(state_class)
 486
 487     def runtime_init(self):
 488         """
 489         Initialize `self.states`.
 490         """
 491         for state in self.states.values():
 492             state.runtime_init()
 493
 494     def error(self):
 495         """Report error details."""
 496         type, value, module, line, function = _exception_data()
 497         print >>self._stderr, u'%s: %s' % (type, value)
 498         print >>self._stderr, 'input line %s' % (self.abs_line_number())
 499         print >>self._stderr, (u'module %s, line %s, function %s' %
 500                                (module, line, function))
 501
 502     def attach_observer(self, observer):
 503         """
 504         The `observer` parameter is a function or bound method which takes two
 505         arguments, the source and offset of the current line.
 506         """
 507         self.observers.append(observer)
 508
 509     def detach_observer(self, observer):
 510         self.observers.remove(observer)
 511
 512     def notify_observers(self):
 513         for observer in self.observers:
 514             try:
 515                 info = self.input_lines.info(self.line_offset)
 516             except IndexError:
 517                 info = (None, None)
 518             observer(*info)
 519
 520
 521 class State:
 522
 523     """
 524     State superclass. Contains a list of transitions, and transition methods.
 525
 526     Transition methods all have the same signature. They take 3 parameters:
 527
 528     - An `re` match object. ``match.string`` contains the matched input line,
 529       ``match.start()`` gives the start index of the match, and
 530       ``match.end()`` gives the end index.
 531     - A context object, whose meaning is application-defined (initial value
 532       ``None``). It can be used to store any information required by the state
 533       machine, and the retured context is passed on to the next transition
 534       method unchanged.
 535     - The name of the next state, a string, taken from the transitions list;
 536       normally it is returned unchanged, but it may be altered by the
 537       transition method if necessary.
 538
 539     Transition methods all return a 3-tuple:
 540
 541     - A context object, as (potentially) modified by the transition method.
 542     - The next state name (a return value of ``None`` means no state change).
 543     - The processing result, a list, which is accumulated by the state
 544       machine.
 545
 546     Transition methods may raise an `EOFError` to cut processing short.
 547
 548     There are two implicit transitions, and corresponding transition methods
 549     are defined: `bof()` handles the beginning-of-file, and `eof()` handles
 550     the end-of-file. These methods have non-standard signatures and return
 551     values. `bof()` returns the initial context and results, and may be used
 552     to return a header string, or do any other processing needed. `eof()`
 553     should handle any remaining context and wrap things up; it returns the
 554     final processing result.
 555
 556     Typical applications need only subclass `State` (or a subclass), set the
 557     `patterns` and `initial_transitions` class attributes, and provide
 558     corresponding transition methods. The default object initialization will
 559     take care of constructing the list of transitions.
 560     """
 561
 562     patterns = None
 563     """
 564     {Name: pattern} mapping, used by `make_transition()`. Each pattern may
 565     be a string or a compiled `re` pattern. Override in subclasses.
 566     """
 567
 568     initial_transitions = None
 569     """
 570     A list of transitions to initialize when a `State` is instantiated.
 571     Each entry is either a transition name string, or a (transition name, next
 572     state name) pair. See `make_transitions()`. Override in subclasses.
 573     """
 574
 575     nested_sm = None
 576     """
 577     The `StateMachine` class for handling nested processing.
 578
 579     If left as ``None``, `nested_sm` defaults to the class of the state's
 580     controlling state machine. Override it in subclasses to avoid the default.
 581     """
 582
 583     nested_sm_kwargs = None
 584     """
 585     Keyword arguments dictionary, passed to the `nested_sm` constructor.
 586
 587     Two keys must have entries in the dictionary:
 588
 589     - Key 'state_classes' must be set to a list of `State` classes.
 590     - Key 'initial_state' must be set to the name of the initial state class.
 591
 592     If `nested_sm_kwargs` is left as ``None``, 'state_classes' defaults to the
 593     class of the current state, and 'initial_state' defaults to the name of
 594     the class of the current state. Override in subclasses to avoid the
 595     defaults.
 596     """
 597
 598     def __init__(self, state_machine, debug=0):
 599         """
 600         Initialize a `State` object; make & add initial transitions.
 601
 602         Parameters:
 603
 604         - `statemachine`: the controlling `StateMachine` object.
 605         - `debug`: a boolean; produce verbose output if true (nonzero).
 606         """
 607
 608         self.transition_order = []
 609         """A list of transition names in search order."""
 610
 611         self.transitions = {}
 612         """
 613         A mapping of transition names to 3-tuples containing
 614         (compiled_pattern, transition_method, next_state_name). Initialized as
 615         an instance attribute dynamically (instead of as a class attribute)
 616         because it may make forward references to patterns and methods in this
 617         or other classes.
 618         """
 619
 620         self.add_initial_transitions()
 621
 622         self.state_machine = state_machine
 623         """A reference to the controlling `StateMachine` object."""
 624
 625         self.debug = debug
 626         """Debugging mode on/off."""
 627
 628         if self.nested_sm is None:
 629             self.nested_sm = self.state_machine.__class__
 630         if self.nested_sm_kwargs is None:
 631             self.nested_sm_kwargs = {'state_classes': [self.__class__],
 632                                      'initial_state': self.__class__.__name__}
 633
 634     def runtime_init(self):
 635         """
 636         Initialize this `State` before running the state machine; called from
 637         `self.state_machine.run()`.
 638         """
 639         pass
 640
 641     def unlink(self):
 642         """Remove circular references to objects no longer required."""
 643         self.state_machine = None
 644
 645     def add_initial_transitions(self):
 646         """Make and add transitions listed in `self.initial_transitions`."""
 647         if self.initial_transitions:
 648             names, transitions = self.make_transitions(
 649                   self.initial_transitions)
 650             self.add_transitions(names, transitions)
 651
 652     def add_transitions(self, names, transitions):
 653         """
 654         Add a list of transitions to the start of the transition list.
 655
 656         Parameters:
 657
 658         - `names`: a list of transition names.
 659         - `transitions`: a mapping of names to transition tuples.
 660
 661         Exceptions: `DuplicateTransitionError`, `UnknownTransitionError`.
 662         """
 663         for name in names:
 664             if name in self.transitions:
 665                 raise DuplicateTransitionError(name)
 666             if name not in transitions:
 667                 raise UnknownTransitionError(name)
 668         self.transition_order[:0] = names
 669         self.transitions.update(transitions)
 670
 671     def add_transition(self, name, transition):
 672         """
 673         Add a transition to the start of the transition list.
 674
 675         Parameter `transition`: a ready-made transition 3-tuple.
 676
 677         Exception: `DuplicateTransitionError`.
 678         """
 679         if name in self.transitions:
 680             raise DuplicateTransitionError(name)
 681         self.transition_order[:0] = [name]
 682         self.transitions[name] = transition
 683
 684     def remove_transition(self, name):
 685         """
 686         Remove a transition by `name`.
 687
 688         Exception: `UnknownTransitionError`.
 689         """
 690         try:
 691             del self.transitions[name]
 692             self.transition_order.remove(name)
 693         except:
 694             raise UnknownTransitionError(name)
 695
 696     def make_transition(self, name, next_state=None):
 697         """
 698         Make & return a transition tuple based on `name`.
 699
 700         This is a convenience function to simplify transition creation.
 701
 702         Parameters:
 703
 704         - `name`: a string, the name of the transition pattern & method. This
 705           `State` object must have a method called '`name`', and a dictionary
 706           `self.patterns` containing a key '`name`'.
 707         - `next_state`: a string, the name of the next `State` object for this
 708           transition. A value of ``None`` (or absent) implies no state change
 709           (i.e., continue with the same state).
 710
 711         Exceptions: `TransitionPatternNotFound`, `TransitionMethodNotFound`.
 712         """
 713         if next_state is None:
 714             next_state = self.__class__.__name__
 715         try:
 716             pattern = self.patterns[name]
 717             if not hasattr(pattern, 'match'):
 718                 pattern = re.compile(pattern)
 719         except KeyError:
 720             raise TransitionPatternNotFound(
 721                   '%s.patterns[%r]' % (self.__class__.__name__, name))
 722         try:
 723             method = getattr(self, name)
 724         except AttributeError:
 725             raise TransitionMethodNotFound(
 726                   '%s.%s' % (self.__class__.__name__, name))
 727         return (pattern, method, next_state)
 728
 729     def make_transitions(self, name_list):
 730         """
 731         Return a list of transition names and a transition mapping.
 732
 733         Parameter `name_list`: a list, where each entry is either a transition
 734         name string, or a 1- or 2-tuple (transition name, optional next state
 735         name).
 736         """
 737         stringtype = type('')
 738         names = []
 739         transitions = {}
 740         for namestate in name_list:
 741             if type(namestate) is stringtype:
 742                 transitions[namestate] = self.make_transition(namestate)
 743                 names.append(namestate)
 744             else:
 745                 transitions[namestate[0]] = self.make_transition(*namestate)
 746                 names.append(namestate[0])
 747         return names, transitions
 748
 749     def no_match(self, context, transitions):
 750         """
 751         Called when there is no match from `StateMachine.check_line()`.
 752
 753         Return the same values returned by transition methods:
 754
 755         - context: unchanged;
 756         - next state name: ``None``;
 757         - empty result list.
 758
 759         Override in subclasses to catch this event.
 760         """
 761         return context, None, []
 762
 763     def bof(self, context):
 764         """
 765         Handle beginning-of-file. Return unchanged `context`, empty result.
 766
 767         Override in subclasses.
 768
 769         Parameter `context`: application-defined storage.
 770         """
 771         return context, []
 772
 773     def eof(self, context):
 774         """
 775         Handle end-of-file. Return empty result.
 776
 777         Override in subclasses.
 778
 779         Parameter `context`: application-defined storage.
 780         """
 781         return []
 782
 783     def nop(self, match, context, next_state):
 784         """
 785         A "do nothing" transition method.
 786
 787         Return unchanged `context` & `next_state`, empty result. Useful for
 788         simple state changes (actionless transitions).
 789         """
 790         return context, next_state, []
 791
 792
 793 class StateMachineWS(StateMachine):
 794
 795     """
 796     `StateMachine` subclass specialized for whitespace recognition.
 797
 798     There are three methods provided for extracting indented text blocks:
 799
 800     - `get_indented()`: use when the indent is unknown.
 801     - `get_known_indented()`: use when the indent is known for all lines.
 802     - `get_first_known_indented()`: use when only the first line's indent is
 803       known.
 804     """
 805
 806     def get_indented(self, until_blank=0, strip_indent=1):
 807         """
 808         Return a block of indented lines of text, and info.
 809
 810         Extract an indented block where the indent is unknown for all lines.
 811
 812         :Parameters:
 813             - `until_blank`: Stop collecting at the first blank line if true
 814               (1).
 815             - `strip_indent`: Strip common leading indent if true (1,
 816               default).
 817
 818         :Return:
 819             - the indented block (a list of lines of text),
 820             - its indent,
 821             - its first line offset from BOF, and
 822             - whether or not it finished with a blank line.
 823         """
 824         offset = self.abs_line_offset()
 825         indented, indent, blank_finish = self.input_lines.get_indented(
 826               self.line_offset, until_blank, strip_indent)
 827         if indented:
 828             self.next_line(len(indented) - 1) # advance to last indented line
 829         while indented and not indented[0].strip():
 830             indented.trim_start()
 831             offset += 1
 832         return indented, indent, offset, blank_finish
 833
 834     def get_known_indented(self, indent, until_blank=0, strip_indent=1):
 835         """
 836         Return an indented block and info.
 837
 838         Extract an indented block where the indent is known for all lines.
 839         Starting with the current line, extract the entire text block with at
 840         least `indent` indentation (which must be whitespace, except for the
 841         first line).
 842
 843         :Parameters:
 844             - `indent`: The number of indent columns/characters.
 845             - `until_blank`: Stop collecting at the first blank line if true
 846               (1).
 847             - `strip_indent`: Strip `indent` characters of indentation if true
 848               (1, default).
 849
 850         :Return:
 851             - the indented block,
 852             - its first line offset from BOF, and
 853             - whether or not it finished with a blank line.
 854         """
 855         offset = self.abs_line_offset()
 856         indented, indent, blank_finish = self.input_lines.get_indented(
 857               self.line_offset, until_blank, strip_indent,
 858               block_indent=indent)
 859         self.next_line(len(indented) - 1) # advance to last indented line
 860         while indented and not indented[0].strip():
 861             indented.trim_start()
 862             offset += 1
 863         return indented, offset, blank_finish
 864
 865     def get_first_known_indented(self, indent, until_blank=0, strip_indent=1,
 866                                  strip_top=1):
 867         """
 868         Return an indented block and info.
 869
 870         Extract an indented block where the indent is known for the first line
 871         and unknown for all other lines.
 872
 873         :Parameters:
 874             - `indent`: The first line's indent (# of columns/characters).
 875             - `until_blank`: Stop collecting at the first blank line if true
 876               (1).
 877             - `strip_indent`: Strip `indent` characters of indentation if true
 878               (1, default).
 879             - `strip_top`: Strip blank lines from the beginning of the block.
 880
 881         :Return:
 882             - the indented block,
 883             - its indent,
 884             - its first line offset from BOF, and
 885             - whether or not it finished with a blank line.
 886         """
 887         offset = self.abs_line_offset()
 888         indented, indent, blank_finish = self.input_lines.get_indented(
 889               self.line_offset, until_blank, strip_indent,
 890               first_indent=indent)
 891         self.next_line(len(indented) - 1) # advance to last indented line
 892         if strip_top:
 893             while indented and not indented[0].strip():
 894                 indented.trim_start()
 895                 offset += 1
 896         return indented, indent, offset, blank_finish
 897
 898
 899 class StateWS(State):
 900
 901     """
 902     State superclass specialized for whitespace (blank lines & indents).
 903
 904     Use this class with `StateMachineWS`.  The transitions 'blank' (for blank
 905     lines) and 'indent' (for indented text blocks) are added automatically,
 906     before any other transitions.  The transition method `blank()` handles
 907     blank lines and `indent()` handles nested indented blocks.  Indented
 908     blocks trigger a new state machine to be created by `indent()` and run.
 909     The class of the state machine to be created is in `indent_sm`, and the
 910     constructor keyword arguments are in the dictionary `indent_sm_kwargs`.
 911
 912     The methods `known_indent()` and `firstknown_indent()` are provided for
 913     indented blocks where the indent (all lines' and first line's only,
 914     respectively) is known to the transition method, along with the attributes
 915     `known_indent_sm` and `known_indent_sm_kwargs`.  Neither transition method
 916     is triggered automatically.
 917     """
 918
 919     indent_sm = None
 920     """
 921     The `StateMachine` class handling indented text blocks.
 922
 923     If left as ``None``, `indent_sm` defaults to the value of
 924     `State.nested_sm`.  Override it in subclasses to avoid the default.
 925     """
 926
 927     indent_sm_kwargs = None
 928     """
 929     Keyword arguments dictionary, passed to the `indent_sm` constructor.
 930
 931     If left as ``None``, `indent_sm_kwargs` defaults to the value of
 932     `State.nested_sm_kwargs`. Override it in subclasses to avoid the default.
 933     """
 934
 935     known_indent_sm = None
 936     """
 937     The `StateMachine` class handling known-indented text blocks.
 938
 939     If left as ``None``, `known_indent_sm` defaults to the value of
 940     `indent_sm`.  Override it in subclasses to avoid the default.
 941     """
 942
 943     known_indent_sm_kwargs = None
 944     """
 945     Keyword arguments dictionary, passed to the `known_indent_sm` constructor.
 946
 947     If left as ``None``, `known_indent_sm_kwargs` defaults to the value of
 948     `indent_sm_kwargs`. Override it in subclasses to avoid the default.
 949     """
 950
 951     ws_patterns = {'blank': ' *$',
 952                    'indent': ' +'}
 953     """Patterns for default whitespace transitions.  May be overridden in
 954     subclasses."""
 955
 956     ws_initial_transitions = ('blank', 'indent')
 957     """Default initial whitespace transitions, added before those listed in
 958     `State.initial_transitions`.  May be overridden in subclasses."""
 959
 960     def __init__(self, state_machine, debug=0):
 961         """
 962         Initialize a `StateSM` object; extends `State.__init__()`.
 963
 964         Check for indent state machine attributes, set defaults if not set.
 965         """
 966         State.__init__(self, state_machine, debug)
 967         if self.indent_sm is None:
 968             self.indent_sm = self.nested_sm
 969         if self.indent_sm_kwargs is None:
 970             self.indent_sm_kwargs = self.nested_sm_kwargs
 971         if self.known_indent_sm is None:
 972             self.known_indent_sm = self.indent_sm
 973         if self.known_indent_sm_kwargs is None:
 974             self.known_indent_sm_kwargs = self.indent_sm_kwargs
 975
 976     def add_initial_transitions(self):
 977         """
 978         Add whitespace-specific transitions before those defined in subclass.
 979
 980         Extends `State.add_initial_transitions()`.
 981         """
 982         State.add_initial_transitions(self)
 983         if self.patterns is None:
 984             self.patterns = {}
 985         self.patterns.update(self.ws_patterns)
 986         names, transitions = self.make_transitions(
 987             self.ws_initial_transitions)
 988         self.add_transitions(names, transitions)
 989
 990     def blank(self, match, context, next_state):
 991         """Handle blank lines. Does nothing. Override in subclasses."""
 992         return self.nop(match, context, next_state)
 993
 994     def indent(self, match, context, next_state):
 995         """
 996         Handle an indented text block. Extend or override in subclasses.
 997
 998         Recursively run the registered state machine for indented blocks
 999         (`self.indent_sm`).
1000         """
1001         indented, indent, line_offset, blank_finish = \
1002               self.state_machine.get_indented()
1003         sm = self.indent_sm(debug=self.debug, **self.indent_sm_kwargs)
1004         results = sm.run(indented, input_offset=line_offset)
1005         return context, next_state, results
1006
1007     def known_indent(self, match, context, next_state):
1008         """
1009         Handle a known-indent text block. Extend or override in subclasses.
1010
1011         Recursively run the registered state machine for known-indent indented
1012         blocks (`self.known_indent_sm`). The indent is the length of the
1013         match, ``match.end()``.
1014         """
1015         indented, line_offset, blank_finish = \
1016               self.state_machine.get_known_indented(match.end())
1017         sm = self.known_indent_sm(debug=self.debug,
1018                                  **self.known_indent_sm_kwargs)
1019         results = sm.run(indented, input_offset=line_offset)
1020         return context, next_state, results
1021
1022     def first_known_indent(self, match, context, next_state):
1023         """
1024         Handle an indented text block (first line's indent known).
1025
1026         Extend or override in subclasses.
1027
1028         Recursively run the registered state machine for known-indent indented
1029         blocks (`self.known_indent_sm`). The indent is the length of the
1030         match, ``match.end()``.
1031         """
1032         indented, line_offset, blank_finish = \
1033               self.state_machine.get_first_known_indented(match.end())
1034         sm = self.known_indent_sm(debug=self.debug,
1035                                  **self.known_indent_sm_kwargs)
1036         results = sm.run(indented, input_offset=line_offset)
1037         return context, next_state, results
1038
1039
1040 class _SearchOverride:
1041
1042     """
1043     Mix-in class to override `StateMachine` regular expression behavior.
1044
1045     Changes regular expression matching, from the default `re.match()`
1046     (succeeds only if the pattern matches at the start of `self.line`) to
1047     `re.search()` (succeeds if the pattern matches anywhere in `self.line`).
1048     When subclassing a `StateMachine`, list this class **first** in the
1049     inheritance list of the class definition.
1050     """
1051
1052     def match(self, pattern):
1053         """
1054         Return the result of a regular expression search.
1055
1056         Overrides `StateMachine.match()`.
1057
1058         Parameter `pattern`: `re` compiled regular expression.
1059         """
1060         return pattern.search(self.line)
1061
1062
1063 class SearchStateMachine(_SearchOverride, StateMachine):
1064     """`StateMachine` which uses `re.search()` instead of `re.match()`."""
1065     pass
1066
1067
1068 class SearchStateMachineWS(_SearchOverride, StateMachineWS):
1069     """`StateMachineWS` which uses `re.search()` instead of `re.match()`."""
1070     pass
1071
1072
1073 class ViewList:
1074
1075     """
1076     List with extended functionality: slices of ViewList objects are child
1077     lists, linked to their parents. Changes made to a child list also affect
1078     the parent list.  A child list is effectively a "view" (in the SQL sense)
1079     of the parent list.  Changes to parent lists, however, do *not* affect
1080     active child lists.  If a parent list is changed, any active child lists
1081     should be recreated.
1082
1083     The start and end of the slice can be trimmed using the `trim_start()` and
1084     `trim_end()` methods, without affecting the parent list.  The link between
1085     child and parent lists can be broken by calling `disconnect()` on the
1086     child list.
1087
1088     Also, ViewList objects keep track of the source & offset of each item.
1089     This information is accessible via the `source()`, `offset()`, and
1090     `info()` methods.
1091     """
1092
1093     def __init__(self, initlist=None, source=None, items=None,
1094                  parent=None, parent_offset=None):
1095         self.data = []
1096         """The actual list of data, flattened from various sources."""
1097
1098         self.items = []
1099         """A list of (source, offset) pairs, same length as `self.data`: the
1100         source of each line and the offset of each line from the beginning of
1101         its source."""
1102
1103         self.parent = parent
1104         """The parent list."""
1105
1106         self.parent_offset = parent_offset
1107         """Offset of this list from the beginning of the parent list."""
1108
1109         if isinstance(initlist, ViewList):
1110             self.data = initlist.data[:]
1111             self.items = initlist.items[:]
1112         elif initlist is not None:
1113             self.data = list(initlist)
1114             if items:
1115                 self.items = items
1116             else:
1117                 self.items = [(source, i) for i in range(len(initlist))]
1118         assert len(self.data) == len(self.items), 'data mismatch'
1119
1120     def __str__(self):
1121         return str(self.data)
1122
1123     def __repr__(self):
1124         return '%s(%s, items=%s)' % (self.__class__.__name__,
1125                                      self.data, self.items)
1126
1127     def __lt__(self, other): return self.data <  self.__cast(other)
1128     def __le__(self, other): return self.data <= self.__cast(other)
1129     def __eq__(self, other): return self.data == self.__cast(other)
1130     def __ne__(self, other): return self.data != self.__cast(other)
1131     def __gt__(self, other): return self.data >  self.__cast(other)
1132     def __ge__(self, other): return self.data >= self.__cast(other)
1133     def __cmp__(self, other): return cmp(self.data, self.__cast(other))
1134
1135     def __cast(self, other):
1136         if isinstance(other, ViewList):
1137             return other.data
1138         else:
1139             return other
1140
1141     def __contains__(self, item): return item in self.data
1142     def __len__(self): return len(self.data)
1143
1144     # The __getitem__()/__setitem__() methods check whether the index
1145     # is a slice first, since indexing a native list with a slice object
1146     # just works.
1147
1148     def __getitem__(self, i):
1149         if isinstance(i, types.SliceType):
1150             assert i.step in (None, 1),  'cannot handle slice with stride'
1151             return self.__class__(self.data[i.start:i.stop],
1152                                   items=self.items[i.start:i.stop],
1153                                   parent=self, parent_offset=i.start or 0)
1154         else:
1155             return self.data[i]
1156
1157     def __setitem__(self, i, item):
1158         if isinstance(i, types.SliceType):
1159             assert i.step in (None, 1), 'cannot handle slice with stride'
1160             if not isinstance(item, ViewList):
1161                 raise TypeError('assigning non-ViewList to ViewList slice')
1162             self.data[i.start:i.stop] = item.data
1163             self.items[i.start:i.stop] = item.items
1164             assert len(self.data) == len(self.items), 'data mismatch'
1165             if self.parent:
1166                 self.parent[(i.start or 0) + self.parent_offset
1167                             : (i.stop or len(self)) + self.parent_offset] = item
1168         else:
1169             self.data[i] = item
1170             if self.parent:
1171                 self.parent[i + self.parent_offset] = item
1172
1173     def __delitem__(self, i):
1174         try:
1175             del self.data[i]
1176             del self.items[i]
1177             if self.parent:
1178                 del self.parent[i + self.parent_offset]
1179         except TypeError:
1180             assert i.step is None, 'cannot handle slice with stride'
1181             del self.data[i.start:i.stop]
1182             del self.items[i.start:i.stop]
1183             if self.parent:
1184                 del self.parent[(i.start or 0) + self.parent_offset
1185                                 : (i.stop or len(self)) + self.parent_offset]
1186
1187     def __add__(self, other):
1188         if isinstance(other, ViewList):
1189             return self.__class__(self.data + other.data,
1190                                   items=(self.items + other.items))
1191         else:
1192             raise TypeError('adding non-ViewList to a ViewList')
1193
1194     def __radd__(self, other):
1195         if isinstance(other, ViewList):
1196             return self.__class__(other.data + self.data,
1197                                   items=(other.items + self.items))
1198         else:
1199             raise TypeError('adding ViewList to a non-ViewList')
1200
1201     def __iadd__(self, other):
1202         if isinstance(other, ViewList):
1203             self.data += other.data
1204         else:
1205             raise TypeError('argument to += must be a ViewList')
1206         return self
1207
1208     def __mul__(self, n):
1209         return self.__class__(self.data * n, items=(self.items * n))
1210
1211     __rmul__ = __mul__
1212
1213     def __imul__(self, n):
1214         self.data *= n
1215         self.items *= n
1216         return self
1217
1218     def extend(self, other):
1219         if not isinstance(other, ViewList):
1220             raise TypeError('extending a ViewList with a non-ViewList')
1221         if self.parent:
1222             self.parent.insert(len(self.data) + self.parent_offset, other)
1223         self.data.extend(other.data)
1224         self.items.extend(other.items)
1225
1226     def append(self, item, source=None, offset=0):
1227         if source is None:
1228             self.extend(item)
1229         else:
1230             if self.parent:
1231                 self.parent.insert(len(self.data) + self.parent_offset, item,
1232                                    source, offset)
1233             self.data.append(item)
1234             self.items.append((source, offset))
1235
1236     def insert(self, i, item, source=None, offset=0):
1237         if source is None:
1238             if not isinstance(item, ViewList):
1239                 raise TypeError('inserting non-ViewList with no source given')
1240             self.data[i:i] = item.data
1241             self.items[i:i] = item.items
1242             if self.parent:
1243                 index = (len(self.data) + i) % len(self.data)
1244                 self.parent.insert(index + self.parent_offset, item)
1245         else:
1246             self.data.insert(i, item)
1247             self.items.insert(i, (source, offset))
1248             if self.parent:
1249                 index = (len(self.data) + i) % len(self.data)
1250                 self.parent.insert(index + self.parent_offset, item,
1251                                    source, offset)
1252
1253     def pop(self, i=-1):
1254         if self.parent:
1255             index = (len(self.data) + i) % len(self.data)
1256             self.parent.pop(index + self.parent_offset)
1257         self.items.pop(i)
1258         return self.data.pop(i)
1259
1260     def trim_start(self, n=1):
1261         """
1262         Remove items from the start of the list, without touching the parent.
1263         """
1264         if n > len(self.data):
1265             raise IndexError("Size of trim too large; can't trim %s items "
1266                              "from a list of size %s." % (n, len(self.data)))
1267         elif n < 0:
1268             raise IndexError('Trim size must be >= 0.')
1269         del self.data[:n]
1270         del self.items[:n]
1271         if self.parent:
1272             self.parent_offset += n
1273
1274     def trim_end(self, n=1):
1275         """
1276         Remove items from the end of the list, without touching the parent.
1277         """
1278         if n > len(self.data):
1279             raise IndexError("Size of trim too large; can't trim %s items "
1280                              "from a list of size %s." % (n, len(self.data)))
1281         elif n < 0:
1282             raise IndexError('Trim size must be >= 0.')
1283         del self.data[-n:]
1284         del self.items[-n:]
1285
1286     def remove(self, item):
1287         index = self.index(item)
1288         del self[index]
1289
1290     def count(self, item): return self.data.count(item)
1291     def index(self, item): return self.data.index(item)
1292
1293     def reverse(self):
1294         self.data.reverse()
1295         self.items.reverse()
1296         self.parent = None
1297
1298     def sort(self, *args):
1299         tmp = zip(self.data, self.items)
1300         tmp.sort(*args)
1301         self.data = [entry[0] for entry in tmp]
1302         self.items = [entry[1] for entry in tmp]
1303         self.parent = None
1304
1305     def info(self, i):
1306         """Return source & offset for index `i`."""
1307         try:
1308             return self.items[i]
1309         except IndexError:
1310             if i == len(self.data):     # Just past the end
1311                 return self.items[i - 1][0], None
1312             else:
1313                 raise
1314
1315     def source(self, i):
1316         """Return source for index `i`."""
1317         return self.info(i)[0]
1318
1319     def offset(self, i):
1320         """Return offset for index `i`."""
1321         return self.info(i)[1]
1322
1323     def disconnect(self):
1324         """Break link between this list and parent list."""
1325         self.parent = None
1326
1327     def xitems(self):
1328         """Return iterator yielding (source, offset, value) tuples."""
1329         for (value, (source, offset)) in zip(self.data, self.items):
1330             yield (source, offset, value)
1331
1332     def pprint(self):
1333         """Print the list in `grep` format (`source:offset:value` lines)"""
1334         for line in self.xitems():
1335             print "%s:%d:%s" % line
1336
1337
1338 class StringList(ViewList):
1339
1340     """A `ViewList` with string-specific methods."""
1341
1342     def trim_left(self, length, start=0, end=sys.maxint):
1343         """
1344         Trim `length` characters off the beginning of each item, in-place,
1345         from index `start` to `end`.  No whitespace-checking is done on the
1346         trimmed text.  Does not affect slice parent.
1347         """
1348         self.data[start:end] = [line[length:]
1349                                 for line in self.data[start:end]]
1350
1351     def get_text_block(self, start, flush_left=0):
1352         """
1353         Return a contiguous block of text.
1354
1355         If `flush_left` is true, raise `UnexpectedIndentationError` if an
1356         indented line is encountered before the text block ends (with a blank
1357         line).
1358         """
1359         end = start
1360         last = len(self.data)
1361         while end < last:
1362             line = self.data[end]
1363             if not line.strip():
1364                 break
1365             if flush_left and (line[0] == ' '):
1366                 source, offset = self.info(end)
1367                 raise UnexpectedIndentationError(self[start:end], source,
1368                                                  offset + 1)
1369             end += 1
1370         return self[start:end]
1371
1372     def get_indented(self, start=0, until_blank=0, strip_indent=1,
1373                      block_indent=None, first_indent=None):
1374         """
1375         Extract and return a StringList of indented lines of text.
1376
1377         Collect all lines with indentation, determine the minimum indentation,
1378         remove the minimum indentation from all indented lines (unless
1379         `strip_indent` is false), and return them. All lines up to but not
1380         including the first unindented line will be returned.
1381
1382         :Parameters:
1383           - `start`: The index of the first line to examine.
1384           - `until_blank`: Stop collecting at the first blank line if true.
1385           - `strip_indent`: Strip common leading indent if true (default).
1386           - `block_indent`: The indent of the entire block, if known.
1387           - `first_indent`: The indent of the first line, if known.
1388
1389         :Return:
1390           - a StringList of indented lines with mininum indent removed;
1391           - the amount of the indent;
1392           - a boolean: did the indented block finish with a blank line or EOF?
1393         """
1394         indent = block_indent           # start with None if unknown
1395         end = start
1396         if block_indent is not None and first_indent is None:
1397             first_indent = block_indent
1398         if first_indent is not None:
1399             end += 1
1400         last = len(self.data)
1401         while end < last:
1402             line = self.data[end]
1403             if line and (line[0] != ' '
1404                          or (block_indent is not None
1405                              and line[:block_indent].strip())):
1406                 # Line not indented or insufficiently indented.
1407                 # Block finished properly iff the last indented line blank:
1408                 blank_finish = ((end > start)
1409                                 and not self.data[end - 1].strip())
1410                 break
1411             stripped = line.lstrip()
1412             if not stripped:            # blank line
1413                 if until_blank:
1414                     blank_finish = 1
1415                     break
1416             elif block_indent is None:
1417                 line_indent = len(line) - len(stripped)
1418                 if indent is None:
1419                     indent = line_indent
1420                 else:
1421                     indent = min(indent, line_indent)
1422             end += 1
1423         else:
1424             blank_finish = 1            # block ends at end of lines
1425         block = self[start:end]
1426         if first_indent is not None and block:
1427             block.data[0] = block.data[0][first_indent:]
1428         if indent and strip_indent:
1429             block.trim_left(indent, start=(first_indent is not None))
1430         return block, indent or 0, blank_finish
1431
1432     def get_2D_block(self, top, left, bottom, right, strip_indent=1):
1433         block = self[top:bottom]
1434         indent = right
1435         for i in range(len(block.data)):
1436             # get slice from line, care for combining characters
1437             ci = utils.column_indices(block.data[i])
1438             try:
1439                 left = ci[left]
1440             except IndexError:
1441                 left += len(block.data[i]) - len(ci)
1442             try:
1443                 right = ci[right]
1444             except IndexError:
1445                 right += len(block.data[i]) - len(ci)
1446             block.data[i] = line = block.data[i][left:right].rstrip()
1447             if line:
1448                 indent = min(indent, len(line) - len(line.lstrip()))
1449         if strip_indent and 0 < indent < right:
1450             block.data = [line[indent:] for line in block.data]
1451         return block
1452
1453     def pad_double_width(self, pad_char):
1454         """
1455         Pad all double-width characters in self by appending `pad_char` to each.
1456         For East Asian language support.
1457         """
1458         if hasattr(unicodedata, 'east_asian_width'):
1459             east_asian_width = unicodedata.east_asian_width
1460         else:
1461             return                      # new in Python 2.4
1462         for i in range(len(self.data)):
1463             line = self.data[i]
1464             if isinstance(line, unicode):
1465                 new = []
1466                 for char in line:
1467                     new.append(char)
1468                     if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
1469                         new.append(pad_char)
1470                 self.data[i] = ''.join(new)
1471
1472     def replace(self, old, new):
1473         """Replace all occurrences of substring `old` with `new`."""
1474         for i in range(len(self.data)):
1475             self.data[i] = self.data[i].replace(old, new)
1476
1477
1478 class StateMachineError(Exception): pass
1479 class UnknownStateError(StateMachineError): pass
1480 class DuplicateStateError(StateMachineError): pass
1481 class UnknownTransitionError(StateMachineError): pass
1482 class DuplicateTransitionError(StateMachineError): pass
1483 class TransitionPatternNotFound(StateMachineError): pass
1484 class TransitionMethodNotFound(StateMachineError): pass
1485 class UnexpectedIndentationError(StateMachineError): pass
1486
1487
1488 class TransitionCorrection(Exception):
1489
1490     """
1491     Raise from within a transition method to switch to another transition.
1492
1493     Raise with one argument, the new transition name.
1494     """
1495
1496
1497 class StateCorrection(Exception):
1498
1499     """
1500     Raise from within a transition method to switch to another state.
1501
1502     Raise with one or two arguments: new state name, and an optional new
1503     transition name.
1504     """
1505
1506
1507 def string2lines(astring, tab_width=8, convert_whitespace=0,
1508                  whitespace=re.compile('[\v\f]')):
1509     """
1510     Return a list of one-line strings with tabs expanded, no newlines, and
1511     trailing whitespace stripped.
1512
1513     Each tab is expanded with between 1 and `tab_width` spaces, so that the
1514     next character's index becomes a multiple of `tab_width` (8 by default).
1515
1516     Parameters:
1517
1518     - `astring`: a multi-line string.
1519     - `tab_width`: the number of columns between tab stops.
1520     - `convert_whitespace`: convert form feeds and vertical tabs to spaces?
1521     """
1522     if convert_whitespace:
1523         astring = whitespace.sub(' ', astring)
1524     return [s.expandtabs(tab_width).rstrip() for s in astring.splitlines()]
1525
1526 def _exception_data():
1527     """
1528     Return exception information:
1529
1530     - the exception's class name;
1531     - the exception object;
1532     - the name of the file containing the offending code;
1533     - the line number of the offending code;
1534     - the function name of the offending code.
1535     """
1536     type, value, traceback = sys.exc_info()
1537     while traceback.tb_next:
1538         traceback = traceback.tb_next
1539     code = traceback.tb_frame.f_code
1540     return (type.__name__, value, code.co_filename, traceback.tb_lineno,
1541             code.co_name)