lib/yaml/emitter.py

   1
   2 # Emitter expects events obeying the following grammar:
   3 # stream ::= STREAM-START document* STREAM-END
   4 # document ::= DOCUMENT-START node DOCUMENT-END
   5 # node ::= SCALAR | sequence | mapping
   6 # sequence ::= SEQUENCE-START node* SEQUENCE-END
   7 # mapping ::= MAPPING-START (node node)* MAPPING-END
   8
   9 __all__ = ['Emitter', 'EmitterError']
  10
  11 from error import YAMLError
  12 from events import *
  13
  14 import re
  15
  16 class EmitterError(YAMLError):
  17     pass
  18
  19 class ScalarAnalysis:
  20     def __init__(self, scalar, empty, multiline,
  21             allow_flow_plain, allow_block_plain,
  22             allow_single_quoted, allow_double_quoted,
  23             allow_block):
  24         self.scalar = scalar
  25         self.empty = empty
  26         self.multiline = multiline
  27         self.allow_flow_plain = allow_flow_plain
  28         self.allow_block_plain = allow_block_plain
  29         self.allow_single_quoted = allow_single_quoted
  30         self.allow_double_quoted = allow_double_quoted
  31         self.allow_block = allow_block
  32
  33 class Emitter:
  34
  35     DEFAULT_TAG_PREFIXES = {
  36         u'!' : u'!',
  37         u'tag:yaml.org,2002:' : u'!!',
  38     }
  39
  40     def __init__(self, stream, canonical=None, indent=None, width=None,
  41             allow_unicode=None, line_break=None):
  42
  43         # The stream should have the methods `write` and possibly `flush`.
  44         self.stream = stream
  45
  46         # Encoding can be overriden by STREAM-START.
  47         self.encoding = None
  48
  49         # Emitter is a state machine with a stack of states to handle nested
  50         # structures.
  51         self.states = []
  52         self.state = self.expect_stream_start
  53
  54         # Current event and the event queue.
  55         self.events = []
  56         self.event = None
  57
  58         # The current indentation level and the stack of previous indents.
  59         self.indents = []
  60         self.indent = None
  61
  62         # Flow level.
  63         self.flow_level = 0
  64
  65         # Contexts.
  66         self.root_context = False
  67         self.sequence_context = False
  68         self.mapping_context = False
  69         self.simple_key_context = False
  70
  71         # Characteristics of the last emitted character:
  72         #  - current position.
  73         #  - is it a whitespace?
  74         #  - is it an indention character
  75         #    (indentation space, '-', '?', or ':')?
  76         self.line = 0
  77         self.column = 0
  78         self.whitespace = True
  79         self.indention = True
  80
  81         # Formatting details.
  82         self.canonical = canonical
  83         self.allow_unicode = allow_unicode
  84         self.best_indent = 2
  85         if indent and 1 < indent < 10:
  86             self.best_indent = indent
  87         self.best_width = 80
  88         if width and width > self.best_indent*2:
  89             self.best_width = width
  90         self.best_line_break = u'\n'
  91         if line_break in [u'\r', u'\n', u'\r\n']:
  92             self.best_line_break = line_break
  93
  94         # Tag prefixes.
  95         self.tag_prefixes = None
  96
  97         # Prepared anchor and tag.
  98         self.prepared_anchor = None
  99         self.prepared_tag = None
 100
 101         # Scalar analysis and style.
 102         self.analysis = None
 103         self.style = None
 104
 105     def emit(self, event):
 106         self.events.append(event)
 107         while not self.need_more_events():
 108             self.event = self.events.pop(0)
 109             self.state()
 110             self.event = None
 111
 112     # In some cases, we wait for a few next events before emitting.
 113
 114     def need_more_events(self):
 115         if not self.events:
 116             return True
 117         event = self.events[0]
 118         if isinstance(event, DocumentStartEvent):
 119             return self.need_events(1)
 120         elif isinstance(event, SequenceStartEvent):
 121             return self.need_events(2)
 122         elif isinstance(event, MappingStartEvent):
 123             return self.need_events(3)
 124         else:
 125             return False
 126
 127     def need_events(self, count):
 128         level = 0
 129         for event in self.events[1:]:
 130             if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
 131                 level += 1
 132             elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
 133                 level -= 1
 134             elif isinstance(event, StreamEndEvent):
 135                 level = -1
 136             if level < 0:
 137                 return False
 138         return (len(self.events) < count+1)
 139
 140     def increase_indent(self, flow=False, indentless=False):
 141         self.indents.append(self.indent)
 142         if self.indent is None:
 143             if flow:
 144                 self.indent = self.best_indent
 145             else:
 146                 self.indent = 0
 147         elif not indentless:
 148             self.indent += self.best_indent
 149
 150     # States.
 151
 152     # Stream handlers.
 153
 154     def expect_stream_start(self):
 155         if isinstance(self.event, StreamStartEvent):
 156             if self.event.encoding:
 157                 self.encoding = self.event.encoding
 158             self.write_stream_start()
 159             self.state = self.expect_first_document_start
 160         else:
 161             raise EmitterError("expected StreamStartEvent, but got %s"
 162                     % self.event)
 163
 164     def expect_nothing(self):
 165         raise EmitterError("expected nothing, but got %s" % self.event)
 166
 167     # Document handlers.
 168
 169     def expect_first_document_start(self):
 170         return self.expect_document_start(first=True)
 171
 172     def expect_document_start(self, first=False):
 173         if isinstance(self.event, DocumentStartEvent):
 174             if self.event.version:
 175                 version_text = self.prepare_version(self.event.version)
 176                 self.write_version_directive(version_text)
 177             self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
 178             if self.event.tags:
 179                 handles = self.event.tags.keys()
 180                 handles.sort()
 181                 for handle in handles:
 182                     prefix = self.event.tags[handle]
 183                     self.tag_prefixes[prefix] = handle
 184                     handle_text = self.prepare_tag_handle(handle)
 185                     prefix_text = self.prepare_tag_prefix(prefix)
 186                     self.write_tag_directive(handle_text, prefix_text)
 187             implicit = (first and not self.event.explicit and not self.canonical
 188                     and not self.event.version and not self.event.tags
 189                     and not self.check_empty_document())
 190             if not implicit:
 191                 self.write_indent()
 192                 self.write_indicator(u'---', True)
 193                 if self.canonical:
 194                     self.write_indent()
 195             self.state = self.expect_document_root
 196         elif isinstance(self.event, StreamEndEvent):
 197             self.write_stream_end()
 198             self.state = self.expect_nothing
 199         else:
 200             raise EmitterError("expected DocumentStartEvent, but got %s"
 201                     % self.event)
 202
 203     def expect_document_end(self):
 204         if isinstance(self.event, DocumentEndEvent):
 205             self.write_indent()
 206             if self.event.explicit:
 207                 self.write_indicator(u'...', True)
 208                 self.write_indent()
 209             self.flush_stream()
 210             self.state = self.expect_document_start
 211         else:
 212             raise EmitterError("expected DocumentEndEvent, but got %s"
 213                     % self.event)
 214
 215     def expect_document_root(self):
 216         self.states.append(self.expect_document_end)
 217         self.expect_node(root=True)
 218
 219     # Node handlers.
 220
 221     def expect_node(self, root=False, sequence=False, mapping=False,
 222             simple_key=False):
 223         self.root_context = root
 224         self.sequence_context = sequence
 225         self.mapping_context = mapping
 226         self.simple_key_context = simple_key
 227         if isinstance(self.event, AliasEvent):
 228             self.expect_alias()
 229         elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
 230             self.process_anchor(u'&')
 231             self.process_tag()
 232             if isinstance(self.event, ScalarEvent):
 233                 self.expect_scalar()
 234             elif isinstance(self.event, SequenceStartEvent):
 235                 if self.flow_level or self.canonical or self.event.flow_style   \
 236                         or self.check_empty_sequence():
 237                     self.expect_flow_sequence()
 238                 else:
 239                     self.expect_block_sequence()
 240             elif isinstance(self.event, MappingStartEvent):
 241                 if self.flow_level or self.canonical or self.event.flow_style   \
 242                         or self.check_empty_mapping():
 243                     self.expect_flow_mapping()
 244                 else:
 245                     self.expect_block_mapping()
 246         else:
 247             raise EmitterError("expected NodeEvent, but got %s" % self.event)
 248
 249     def expect_alias(self):
 250         if self.event.anchor is None:
 251             raise EmitterError("anchor is not specified for alias")
 252         self.process_anchor(u'*')
 253         self.state = self.states.pop()
 254
 255     def expect_scalar(self):
 256         self.increase_indent(flow=True)
 257         self.process_scalar()
 258         self.indent = self.indents.pop()
 259         self.state = self.states.pop()
 260
 261     # Flow sequence handlers.
 262
 263     def expect_flow_sequence(self):
 264         self.write_indicator(u'[', True, whitespace=True)
 265         self.flow_level += 1
 266         self.increase_indent(flow=True)
 267         self.state = self.expect_first_flow_sequence_item
 268
 269     def expect_first_flow_sequence_item(self):
 270         if isinstance(self.event, SequenceEndEvent):
 271             self.indent = self.indents.pop()
 272             self.flow_level -= 1
 273             self.write_indicator(u']', False)
 274             self.state = self.states.pop()
 275         else:
 276             if self.canonical or self.column > self.best_width:
 277                 self.write_indent()
 278             self.states.append(self.expect_flow_sequence_item)
 279             self.expect_node(sequence=True)
 280
 281     def expect_flow_sequence_item(self):
 282         if isinstance(self.event, SequenceEndEvent):
 283             self.indent = self.indents.pop()
 284             self.flow_level -= 1
 285             if self.canonical:
 286                 self.write_indicator(u',', False)
 287                 self.write_indent()
 288             self.write_indicator(u']', False)
 289             self.state = self.states.pop()
 290         else:
 291             self.write_indicator(u',', False)
 292             if self.canonical or self.column > self.best_width:
 293                 self.write_indent()
 294             self.states.append(self.expect_flow_sequence_item)
 295             self.expect_node(sequence=True)
 296
 297     # Flow mapping handlers.
 298
 299     def expect_flow_mapping(self):
 300         self.write_indicator(u'{', True, whitespace=True)
 301         self.flow_level += 1
 302         self.increase_indent(flow=True)
 303         self.state = self.expect_first_flow_mapping_key
 304
 305     def expect_first_flow_mapping_key(self):
 306         if isinstance(self.event, MappingEndEvent):
 307             self.indent = self.indents.pop()
 308             self.flow_level -= 1
 309             self.write_indicator(u'}', False)
 310             self.state = self.states.pop()
 311         else:
 312             if self.canonical or self.column > self.best_width:
 313                 self.write_indent()
 314             if not self.canonical and self.check_simple_key():
 315                 self.states.append(self.expect_flow_mapping_simple_value)
 316                 self.expect_node(mapping=True, simple_key=True)
 317             else:
 318                 self.write_indicator(u'?', True)
 319                 self.states.append(self.expect_flow_mapping_value)
 320                 self.expect_node(mapping=True)
 321
 322     def expect_flow_mapping_key(self):
 323         if isinstance(self.event, MappingEndEvent):
 324             self.indent = self.indents.pop()
 325             self.flow_level -= 1
 326             if self.canonical:
 327                 self.write_indicator(u',', False)
 328                 self.write_indent()
 329             self.write_indicator(u'}', False)
 330             self.state = self.states.pop()
 331         else:
 332             self.write_indicator(u',', False)
 333             if self.canonical or self.column > self.best_width:
 334                 self.write_indent()
 335             if not self.canonical and self.check_simple_key():
 336                 self.states.append(self.expect_flow_mapping_simple_value)
 337                 self.expect_node(mapping=True, simple_key=True)
 338             else:
 339                 self.write_indicator(u'?', True)
 340                 self.states.append(self.expect_flow_mapping_value)
 341                 self.expect_node(mapping=True)
 342
 343     def expect_flow_mapping_simple_value(self):
 344         self.write_indicator(u':', False)
 345         self.states.append(self.expect_flow_mapping_key)
 346         self.expect_node(mapping=True)
 347
 348     def expect_flow_mapping_value(self):
 349         if self.canonical or self.column > self.best_width:
 350             self.write_indent()
 351         self.write_indicator(u':', True)
 352         self.states.append(self.expect_flow_mapping_key)
 353         self.expect_node(mapping=True)
 354
 355     # Block sequence handlers.
 356
 357     def expect_block_sequence(self):
 358         indentless = (self.mapping_context and not self.indention)
 359         self.increase_indent(flow=False, indentless=indentless)
 360         self.state = self.expect_first_block_sequence_item
 361
 362     def expect_first_block_sequence_item(self):
 363         return self.expect_block_sequence_item(first=True)
 364
 365     def expect_block_sequence_item(self, first=False):
 366         if not first and isinstance(self.event, SequenceEndEvent):
 367             self.indent = self.indents.pop()
 368             self.state = self.states.pop()
 369         else:
 370             self.write_indent()
 371             self.write_indicator(u'-', True, indention=True)
 372             self.states.append(self.expect_block_sequence_item)
 373             self.expect_node(sequence=True)
 374
 375     # Block mapping handlers.
 376
 377     def expect_block_mapping(self):
 378         self.increase_indent(flow=False)
 379         self.state = self.expect_first_block_mapping_key
 380
 381     def expect_first_block_mapping_key(self):
 382         return self.expect_block_mapping_key(first=True)
 383
 384     def expect_block_mapping_key(self, first=False):
 385         if not first and isinstance(self.event, MappingEndEvent):
 386             self.indent = self.indents.pop()
 387             self.state = self.states.pop()
 388         else:
 389             self.write_indent()
 390             if self.check_simple_key():
 391                 self.states.append(self.expect_block_mapping_simple_value)
 392                 self.expect_node(mapping=True, simple_key=True)
 393             else:
 394                 self.write_indicator(u'?', True, indention=True)
 395                 self.states.append(self.expect_block_mapping_value)
 396                 self.expect_node(mapping=True)
 397
 398     def expect_block_mapping_simple_value(self):
 399         self.write_indicator(u':', False)
 400         self.states.append(self.expect_block_mapping_key)
 401         self.expect_node(mapping=True)
 402
 403     def expect_block_mapping_value(self):
 404         self.write_indent()
 405         self.write_indicator(u':', True, indention=True)
 406         self.states.append(self.expect_block_mapping_key)
 407         self.expect_node(mapping=True)
 408
 409     # Checkers.
 410
 411     def check_empty_sequence(self):
 412         return (isinstance(self.event, SequenceStartEvent) and self.events
 413                 and isinstance(self.events[0], SequenceEndEvent))
 414
 415     def check_empty_mapping(self):
 416         return (isinstance(self.event, MappingStartEvent) and self.events
 417                 and isinstance(self.events[0], MappingEndEvent))
 418
 419     def check_empty_document(self):
 420         if not isinstance(self.event, DocumentStartEvent) or not self.events:
 421             return False
 422         event = self.events[0]
 423         return (isinstance(event, ScalarEvent) and event.anchor is None
 424                 and event.tag is None and event.implicit and event.value == u'')
 425
 426     def check_simple_key(self):
 427         length = 0
 428         if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
 429             if self.prepared_anchor is None:
 430                 self.prepared_anchor = self.prepare_anchor(self.event.anchor)
 431             length += len(self.prepared_anchor)
 432         if isinstance(self.event, (ScalarEvent, CollectionStartEvent))  \
 433                 and self.event.tag is not None:
 434             if self.prepared_tag is None:
 435                 self.prepared_tag = self.prepare_tag(self.event.tag)
 436             length += len(self.prepared_tag)
 437         if isinstance(self.event, ScalarEvent):
 438             if self.analysis is None:
 439                 self.analysis = self.analyze_scalar(self.event.value)
 440             length += len(self.analysis.scalar)
 441         return (length < 128 and (isinstance(self.event, AliasEvent)
 442             or (isinstance(self.event, ScalarEvent)
 443                     and not self.analysis.empty and not self.analysis.multiline)
 444             or self.check_empty_sequence() or self.check_empty_mapping()))
 445
 446     # Anchor, Tag, and Scalar processors.
 447
 448     def process_anchor(self, indicator):
 449         if self.event.anchor is None:
 450             self.prepared_anchor = None
 451             return
 452         if self.prepared_anchor is None:
 453             self.prepared_anchor = self.prepare_anchor(self.event.anchor)
 454         if self.prepared_anchor:
 455             self.write_indicator(indicator+self.prepared_anchor, True)
 456         self.prepared_anchor = None
 457
 458     def process_tag(self):
 459         tag = self.event.tag
 460         if isinstance(self.event, ScalarEvent):
 461             if self.style is None:
 462                 self.style = self.choose_scalar_style()
 463             if self.style == '':
 464                 self.prepared_tag = None
 465                 return
 466             if self.event.implicit and not tag:
 467                 tag = u'!'
 468                 self.prepared_tag = None
 469         if not tag:
 470             self.prepared_tag = None
 471             return
 472         if self.prepared_tag is None:
 473             self.prepared_tag = self.prepare_tag(tag)
 474         if self.prepared_tag:
 475             self.write_indicator(self.prepared_tag, True)
 476         self.prepared_tag = None
 477
 478     def choose_scalar_style(self):
 479         if self.analysis is None:
 480             self.analysis = self.analyze_scalar(self.event.value)
 481         if self.event.style == '"' or self.canonical:
 482             return '"'
 483         if not self.event.style and self.event.implicit:
 484             if (not (self.simple_key_context and
 485                     (self.analysis.empty or self.analysis.multiline))
 486                 and (self.flow_level and self.analysis.allow_flow_plain
 487                     or (not self.flow_level and self.analysis.allow_block_plain))):
 488                 return ''
 489         if self.event.style and self.event.style in '|>':
 490             if not self.flow_level and self.analysis.allow_block:
 491                 return self.event.style
 492         if not self.event.style or self.event.style == '\'':
 493             if (self.analysis.allow_single_quoted and
 494                     not (self.simple_key_context and self.analysis.multiline)):
 495                 return '\''
 496         return '"'
 497
 498     def process_scalar(self):
 499         if self.analysis is None:
 500             self.analysis = self.analyze_scalar(self.event.value)
 501         if self.style is None:
 502             self.style = self.choose_scalar_style()
 503         split = (not self.simple_key_context)
 504         #if self.analysis.multiline and split    \
 505         #        and (not self.style or self.style in '\'\"'):
 506         #    self.write_indent()
 507         if self.style == '"':
 508             self.write_double_quoted(self.analysis.scalar, split)
 509         elif self.style == '\'':
 510             self.write_single_quoted(self.analysis.scalar, split)
 511         elif self.style == '>':
 512             self.write_folded(self.analysis.scalar)
 513         elif self.style == '|':
 514             self.write_literal(self.analysis.scalar)
 515         else:
 516             self.write_plain(self.analysis.scalar, split)
 517         self.analysis = None
 518         self.style = None
 519
 520     # Analyzers.
 521
 522     def prepare_version(self, version):
 523         major, minor = version
 524         if major != 1:
 525             raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
 526         return u'%d.%d' % (major, minor)
 527
 528     def prepare_tag_handle(self, handle):
 529         if not handle:
 530             raise EmitterError("tag handle must not be empty")
 531         if handle[0] != u'!' or handle[-1] != u'!':
 532             raise EmitterError("tag handle must start and end with '!': %r"
 533                     % (handle.encode('utf-8')))
 534         for ch in handle[1:-1]:
 535             if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'    \
 536                     or ch in u'-_'):
 537                 raise EmitterError("invalid character %r in the tag handle: %r"
 538                         % (ch.encode('utf-8'), handle.encode('utf-8')))
 539         return handle
 540
 541     def prepare_tag_prefix(self, prefix):
 542         if not prefix:
 543             raise EmitterError("tag prefix must not be empty")
 544         chunks = []
 545         start = end = 0
 546         if prefix[0] == u'!':
 547             end = 1
 548         while end < len(prefix):
 549             ch = prefix[end]
 550             if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
 551                     or ch in u'-;/?!:@&=+$,_.~*\'()[]':
 552                 end += 1
 553             else:
 554                 if start < end:
 555                     chunks.append(prefix[start:end])
 556                 start = end = end+1
 557                 data = ch.encode('utf-8')
 558                 for ch in data:
 559                     chunks.append(u'%%%02X' % ord(ch))
 560         if start < end:
 561             chunks.append(prefix[start:end])
 562         return u''.join(chunks)
 563
 564     def prepare_tag(self, tag):
 565         if not tag:
 566             raise EmitterError("tag must not be empty")
 567         if tag == u'!':
 568             return tag
 569         handle = None
 570         suffix = tag
 571         for prefix in self.tag_prefixes:
 572             if tag.startswith(prefix)   \
 573                     and (prefix == u'!' or len(prefix) < len(tag)):
 574                 handle = self.tag_prefixes[prefix]
 575                 suffix = tag[len(prefix):]
 576         chunks = []
 577         start = end = 0
 578         while end < len(suffix):
 579             ch = suffix[end]
 580             if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
 581                     or ch in u'-;/?:@&=+$,_.~*\'()[]'   \
 582                     or (ch == u'!' and handle != u'!'):
 583                 end += 1
 584             else:
 585                 if start < end:
 586                     chunks.append(suffix[start:end])
 587                 start = end = end+1
 588                 data = ch.encode('utf-8')
 589                 for ch in data:
 590                     chunks.append(u'%%%02X' % ord(ch))
 591         if start < end:
 592             chunks.append(suffix[start:end])
 593         suffix_text = u''.join(chunks)
 594         if handle:
 595             return u'%s%s' % (handle, suffix_text)
 596         else:
 597             return u'!<%s>' % suffix_text
 598
 599     def prepare_anchor(self, anchor):
 600         if not anchor:
 601             raise EmitterError("anchor must not be empty")
 602         for ch in anchor:
 603             if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'    \
 604                     or ch in u'-_'):
 605                 raise EmitterError("invalid character %r in the anchor: %r"
 606                         % (ch.encode('utf-8'), text.encode('utf-8')))
 607         return anchor
 608
 609     def analyze_scalar(self, scalar):
 610
 611         # Empty scalar is a special case.
 612         if not scalar:
 613             return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
 614                     allow_flow_plain=False, allow_block_plain=True,
 615                     allow_single_quoted=True, allow_double_quoted=True,
 616                     allow_block=False)
 617
 618         # Indicators and special characters.
 619         block_indicators = False
 620         flow_indicators = False
 621         line_breaks = False
 622         special_characters = False
 623
 624         # Whitespaces.
 625         inline_spaces = False          # non-space space+ non-space
 626         inline_breaks = False          # non-space break+ non-space
 627         leading_spaces = False         # ^ space+ (non-space | $)
 628         leading_breaks = False         # ^ break+ (non-space | $)
 629         trailing_spaces = False        # (^ | non-space) space+ $
 630         trailing_breaks = False        # (^ | non-space) break+ $
 631         inline_breaks_spaces = False   # non-space break+ space+ non-space
 632         mixed_breaks_spaces = False    # anything else
 633
 634         # Check document indicators.
 635         if scalar.startswith(u'---') or scalar.startswith(u'...'):
 636             block_indicators = True
 637             flow_indicators = True
 638
 639         # First character or preceded by a whitespace.
 640         preceeded_by_space = True
 641
 642         # Last character or followed by a whitespace.
 643         followed_by_space = (len(scalar) == 1 or
 644                 scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
 645
 646         # The current series of whitespaces contain plain spaces.
 647         spaces = False
 648
 649         # The current series of whitespaces contain line breaks.
 650         breaks = False
 651
 652         # The current series of whitespaces contain a space followed by a
 653         # break.
 654         mixed = False
 655
 656         # The current series of whitespaces start at the beginning of the
 657         # scalar.
 658         leading = False
 659
 660         index = 0
 661         while index < len(scalar):
 662             ch = scalar[index]
 663
 664             # Check for indicators.
 665
 666             if index == 0:
 667                 # Leading indicators are special characters.
 668                 if ch in u'#,[]{}#&*!|>\'\"%@`':
 669                     flow_indicators = True
 670                     block_indicators = True
 671                 if ch in u'?:':
 672                     flow_indicators = True
 673                     if followed_by_space:
 674                         block_indicators = True
 675                 if ch == u'-' and followed_by_space:
 676                     flow_indicators = True
 677                     block_indicators = True
 678             else:
 679                 # Some indicators cannot appear within a scalar as well.
 680                 if ch in u',?[]{}':
 681                     flow_indicators = True
 682                 if ch == u':':
 683                     flow_indicators = True
 684                     if followed_by_space:
 685                         block_indicators = True
 686                 if ch == u'#' and preceeded_by_space:
 687                     flow_indicators = True
 688                     block_indicators = True
 689
 690             # Check for line breaks, special, and unicode characters.
 691
 692             if ch in u'\n\x85\u2028\u2029':
 693                 line_breaks = True
 694             if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
 695                 if ch < u'\x80' or ch == u'\uFEFF': # '\uFEFF' is BOM.
 696                     special_characters = True
 697                 else:
 698                     unicode_characters = True
 699                     if not self.allow_unicode:
 700                         special_characters = True
 701
 702             # Spaces, line breaks, and how they are mixed. State machine.
 703
 704             # Start or continue series of whitespaces.
 705             if ch in u' \n\x85\u2028\u2029':
 706                 if spaces and breaks:
 707                     if ch != u' ':      # break+ (space+ break+)    => mixed
 708                         mixed = True
 709                 elif spaces:
 710                     if ch != u' ':      # (space+ break+)   => mixed
 711                         breaks = True
 712                         mixed = True
 713                 elif breaks:
 714                     if ch == u' ':      # break+ space+
 715                         spaces = True
 716                 else:
 717                     leading = (index == 0)
 718                     if ch == u' ':      # space+
 719                         spaces = True
 720                     else:               # break+
 721                         breaks = True
 722
 723             # Series of whitespaces ended with a non-space.
 724             elif spaces or breaks:
 725                 if leading:
 726                     if spaces and breaks:
 727                         mixed_breaks_spaces = True
 728                     elif spaces:
 729                         leading_spaces = True
 730                     elif breaks:
 731                         leading_breaks = True
 732                 else:
 733                     if mixed:
 734                         mixed_breaks_spaces = True
 735                     elif spaces and breaks:
 736                         inline_breaks_spaces = True
 737                     elif spaces:
 738                         inline_spaces = True
 739                     elif breaks:
 740                         inline_breaks = True
 741                 spaces = breaks = mixed = leading = False
 742
 743             # Series of whitespaces reach the end.
 744             if (spaces or breaks) and (index == len(scalar)-1):
 745                 if spaces and breaks:
 746                     mixed_breaks_spaces = True
 747                 elif spaces:
 748                     trailing_spaces = True
 749                     if leading:
 750                         leading_spaces = True
 751                 elif breaks:
 752                     trailing_breaks = True
 753                     if leading:
 754                         leading_breaks = True
 755                 spaces = breaks = mixed = leading = False
 756
 757             # Prepare for the next character.
 758             index += 1
 759             preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
 760             followed_by_space = (index+1 >= len(scalar) or
 761                     scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
 762
 763         # Let's decide what styles are allowed.
 764         allow_flow_plain = True
 765         allow_block_plain = True
 766         allow_single_quoted = True
 767         allow_double_quoted = True
 768         allow_block = True
 769
 770         # Leading and trailing whitespace are bad for plain scalars. We also
 771         # do not want to mess with leading whitespaces for block scalars.
 772         if leading_spaces or leading_breaks or trailing_spaces:
 773             allow_flow_plain = allow_block_plain = allow_block = False
 774
 775         # Trailing breaks are fine for block scalars, but unacceptable for
 776         # plain scalars.
 777         if trailing_breaks:
 778             allow_flow_plain = allow_block_plain = False
 779
 780         # The combination of (space+ break+) is only acceptable for block
 781         # scalars.
 782         if inline_breaks_spaces:
 783             allow_flow_plain = allow_block_plain = allow_single_quoted = False
 784
 785         # Mixed spaces and breaks, as well as special character are only
 786         # allowed for double quoted scalars.
 787         if mixed_breaks_spaces or special_characters:
 788             allow_flow_plain = allow_block_plain =  \
 789             allow_single_quoted = allow_block = False
 790
 791         # We don't emit multiline plain scalars.
 792         if line_breaks:
 793             allow_flow_plain = allow_block_plain = False
 794
 795         # Flow indicators are forbidden for flow plain scalars.
 796         if flow_indicators:
 797             allow_flow_plain = False
 798
 799         # Block indicators are forbidden for block plain scalars.
 800         if block_indicators:
 801             allow_block_plain = False
 802
 803         return ScalarAnalysis(scalar=scalar,
 804                 empty=False, multiline=line_breaks,
 805                 allow_flow_plain=allow_flow_plain,
 806                 allow_block_plain=allow_block_plain,
 807                 allow_single_quoted=allow_single_quoted,
 808                 allow_double_quoted=allow_double_quoted,
 809                 allow_block=allow_block)
 810
 811     # Writers.
 812
 813     def flush_stream(self):
 814         if hasattr(self.stream, 'flush'):
 815             self.stream.flush()
 816
 817     def write_stream_start(self):
 818         # Write BOM if needed.
 819         if self.encoding and self.encoding.startswith('utf-16'):
 820             self.stream.write(u'\xFF\xFE'.encode(self.encoding))
 821
 822     def write_stream_end(self):
 823         self.flush_stream()
 824
 825     def write_indicator(self, indicator, need_whitespace,
 826             whitespace=False, indention=False):
 827         if self.whitespace or not need_whitespace:
 828             data = indicator
 829         else:
 830             data = u' '+indicator
 831         self.whitespace = whitespace
 832         self.indention = self.indention and indention
 833         self.column += len(data)
 834         if self.encoding:
 835             data = data.encode(self.encoding)
 836         self.stream.write(data)
 837
 838     def write_indent(self):
 839         indent = self.indent or 0
 840         if not self.indention or self.column > indent   \
 841                 or (self.column == indent and not self.whitespace):
 842             self.write_line_break()
 843         if self.column < indent:
 844             self.whitespace = True
 845             data = u' '*(indent-self.column)
 846             self.column = indent
 847             if self.encoding:
 848                 data = data.encode(self.encoding)
 849             self.stream.write(data)
 850
 851     def write_line_break(self, data=None):
 852         if data is None:
 853             data = self.best_line_break
 854         self.whitespace = True
 855         self.indention = True
 856         self.line += 1
 857         self.column = 0
 858         if self.encoding:
 859             data = data.encode(self.encoding)
 860         self.stream.write(data)
 861
 862     def write_version_directive(self, version_text):
 863         data = u'%%YAML %s' % version_text
 864         if self.encoding:
 865             data = data.encode(self.encoding)
 866         self.stream.write(data)
 867         self.write_line_break()
 868
 869     def write_tag_directive(self, handle_text, prefix_text):
 870         data = u'%%TAG %s %s' % (handle_text, prefix_text)
 871         if self.encoding:
 872             data = data.encode(self.encoding)
 873         self.stream.write(data)
 874         self.write_line_break()
 875
 876     # Scalar streams.
 877
 878     def write_single_quoted(self, text, split=True):
 879         self.write_indicator(u'\'', True)
 880         spaces = False
 881         breaks = False
 882         start = end = 0
 883         while end <= len(text):
 884             ch = None
 885             if end < len(text):
 886                 ch = text[end]
 887             if spaces:
 888                 if ch is None or ch != u' ':
 889                     if start+1 == end and self.column > self.best_width and split   \
 890                             and start != 0 and end != len(text):
 891                         self.write_indent()
 892                     else:
 893                         data = text[start:end]
 894                         self.column += len(data)
 895                         if self.encoding:
 896                             data = data.encode(self.encoding)
 897                         self.stream.write(data)
 898                     start = end
 899             elif breaks:
 900                 if ch is None or ch not in u'\n\x85\u2028\u2029':
 901                     if text[start] == u'\n':
 902                         self.write_line_break()
 903                     for br in text[start:end]:
 904                         if br == u'\n':
 905                             self.write_line_break()
 906                         else:
 907                             self.write_line_break(br)
 908                     self.write_indent()
 909                     start = end
 910             else:
 911                 if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
 912                     if start < end:
 913                         data = text[start:end]
 914                         self.column += len(data)
 915                         if self.encoding:
 916                             data = data.encode(self.encoding)
 917                         self.stream.write(data)
 918                         start = end
 919                     if ch == u'\'':
 920                         data = u'\'\''
 921                         self.column += 2
 922                         if self.encoding:
 923                             data = data.encode(self.encoding)
 924                         self.stream.write(data)
 925                         start = end + 1
 926             if ch is not None:
 927                 spaces = (ch == u' ')
 928                 breaks = (ch in u'\n\x85\u2028\u2029')
 929             end += 1
 930         self.write_indicator(u'\'', False)
 931
 932     ESCAPE_REPLACEMENTS = {
 933         u'\0':      u'0',
 934         u'\x07':    u'a',
 935         u'\x08':    u'b',
 936         u'\x09':    u't',
 937         u'\x0A':    u'n',
 938         u'\x0B':    u'v',
 939         u'\x0C':    u'f',
 940         u'\x0D':    u'r',
 941         u'\x1B':    u'e',
 942         u'\"':      u'\"',
 943         u'\\':      u'\\',
 944         u'\x85':    u'N',
 945         u'\xA0':    u'_',
 946         u'\u2028':  u'L',
 947         u'\u2029':  u'P',
 948     }
 949
 950     def write_double_quoted(self, text, split=True):
 951         self.write_indicator(u'"', True)
 952         start = end = 0
 953         while end <= len(text):
 954             ch = None
 955             if end < len(text):
 956                 ch = text[end]
 957             if ch is None or ch in u'"\\'   \
 958                     or not (u'\x20' <= ch <= u'\x7E'
 959                             or (self.allow_unicode and ch > u'\x7F'
 960                                 and ch not in u'\x85\u2028\u2029')):
 961                 if start < end:
 962                     data = text[start:end]
 963                     self.column += len(data)
 964                     if self.encoding:
 965                         data = data.encode(self.encoding)
 966                     self.stream.write(data)
 967                     start = end
 968                 if ch is not None:
 969                     if ch in self.ESCAPE_REPLACEMENTS:
 970                         data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
 971                     elif ch <= u'\xFF':
 972                         data = u'\\x%02X' % ord(ch)
 973                     elif ch <= u'\uFFFF':
 974                         data = u'\\u%04X' % ord(ch)
 975                     else:
 976                         data = u'\\U%08X' % ord(ch)
 977                     self.column += len(data)
 978                     if self.encoding:
 979                         data = data.encode(self.encoding)
 980                     self.stream.write(data)
 981                     start = end+1
 982             if 0 < end < len(text)-1 and (ch == u' ' or start >= end)   \
 983                     and self.column+(end-start) > self.best_width and split:
 984                 data = text[start:end]+u'\\'
 985                 if start < end:
 986                     start = end
 987                 self.column += len(data)
 988                 if self.encoding:
 989                     data = data.encode(self.encoding)
 990                 self.stream.write(data)
 991                 self.write_indent()
 992                 self.whitespace = False
 993                 self.indention = False
 994                 if ch == u' ':
 995                     data = u'\\'
 996                     self.column += len(data)
 997                     if self.encoding:
 998                         data = data.encode(self.encoding)
 999                     self.stream.write(data)
1000             end += 1
1001         self.write_indicator(u'"', False)
1002
1003     def determine_chomp(self, text):
1004         tail = text[-2:]
1005         while len(tail) < 2:
1006             tail = u' '+tail
1007         if tail[-1] in u'\n\x85\u2028\u2029':
1008             if tail[-2] in u'\n\x85\u2028\u2029':
1009                 return u'+'
1010             else:
1011                 return u''
1012         else:
1013             return u'-'
1014
1015     def write_folded(self, text):
1016         chomp = self.determine_chomp(text)
1017         self.write_indicator(u'>'+chomp, True)
1018         self.write_indent()
1019         leading_space = False
1020         spaces = False
1021         breaks = False
1022         start = end = 0
1023         while end <= len(text):
1024             ch = None
1025             if end < len(text):
1026                 ch = text[end]
1027             if breaks:
1028                 if ch is None or ch not in u'\n\x85\u2028\u2029':
1029                     if not leading_space and ch is not None and ch != u' '  \
1030                             and text[start] == u'\n':
1031                         self.write_line_break()
1032                     leading_space = (ch == u' ')
1033                     for br in text[start:end]:
1034                         if br == u'\n':
1035                             self.write_line_break()
1036                         else:
1037                             self.write_line_break(br)
1038                     if ch is not None:
1039                         self.write_indent()
1040                     start = end
1041             elif spaces:
1042                 if ch != u' ':
1043                     if start+1 == end and self.column > self.best_width:
1044                         self.write_indent()
1045                     else:
1046                         data = text[start:end]
1047                         self.column += len(data)
1048                         if self.encoding:
1049                             data = data.encode(self.encoding)
1050                         self.stream.write(data)
1051                     start = end
1052             else:
1053                 if ch is None or ch in u' \n\x85\u2028\u2029':
1054                     data = text[start:end]
1055                     if self.encoding:
1056                         data = data.encode(self.encoding)
1057                     self.stream.write(data)
1058                     if ch is None:
1059                         self.write_line_break()
1060                     start = end
1061             if ch is not None:
1062                 breaks = (ch in u'\n\x85\u2028\u2029')
1063                 spaces = (ch == u' ')
1064             end += 1
1065
1066     def write_literal(self, text):
1067         chomp = self.determine_chomp(text)
1068         self.write_indicator(u'|'+chomp, True)
1069         self.write_indent()
1070         breaks = False
1071         start = end = 0
1072         while end <= len(text):
1073             ch = None
1074             if end < len(text):
1075                 ch = text[end]
1076             if breaks:
1077                 if ch is None or ch not in u'\n\x85\u2028\u2029':
1078                     for br in text[start:end]:
1079                         if br == u'\n':
1080                             self.write_line_break()
1081                         else:
1082                             self.write_line_break(br)
1083                     if ch is not None:
1084                         self.write_indent()
1085                     start = end
1086             else:
1087                 if ch is None or ch in u'\n\x85\u2028\u2029':
1088                     data = text[start:end]
1089                     if self.encoding:
1090                         data = data.encode(self.encoding)
1091                     self.stream.write(data)
1092                     if ch is None:
1093                         self.write_line_break()
1094                     start = end
1095             if ch is not None:
1096                 breaks = (ch in u'\n\x85\u2028\u2029')
1097             end += 1
1098
1099     def write_plain(self, text, split=True):
1100         if not text:
1101             return
1102         if not self.whitespace:
1103             data = u' '
1104             self.column += len(data)
1105             if self.encoding:
1106                 data = data.encode(self.encoding)
1107             self.stream.write(data)
1108         self.writespace = False
1109         self.indention = False
1110         spaces = False
1111         breaks = False
1112         start = end = 0
1113         while end <= len(text):
1114             ch = None
1115             if end < len(text):
1116                 ch = text[end]
1117             if spaces:
1118                 if ch != u' ':
1119                     if start+1 == end and self.column > self.best_width and split:
1120                         self.write_indent()
1121                         self.writespace = False
1122                         self.indention = False
1123                     else:
1124                         data = text[start:end]
1125                         self.column += len(data)
1126                         if self.encoding:
1127                             data = data.encode(self.encoding)
1128                         self.stream.write(data)
1129                     start = end
1130             elif breaks:
1131                 if ch not in u'\n\x85\u2028\u2029':
1132                     if text[start] == u'\n':
1133                         self.write_line_break()
1134                     for br in text[start:end]:
1135                         if br == u'\n':
1136                             self.write_line_break()
1137                         else:
1138                             self.write_line_break(br)
1139                     self.write_indent()
1140                     self.whitespace = False
1141                     self.indention = False
1142                     start = end
1143             else:
1144                 if ch is None or ch in u' \n\x85\u2028\u2029':
1145                     data = text[start:end]
1146                     self.column += len(data)
1147                     if self.encoding:
1148                         data = data.encode(self.encoding)
1149                     self.stream.write(data)
1150                     start = end
1151             if ch is not None:
1152                 spaces = (ch == u' ')
1153                 breaks = (ch in u'\n\x85\u2028\u2029')
1154             end += 1
1155