lib/yaml/emitter.py

   1
   2 # Emitter expects events obeying the following grammar:
   3 # stream ::= STREAM-START document* STREAM-END
   4 # document ::= DOCUMENT-START node DOCUMENT-END
   5 # node ::= SCALAR | sequence | mapping
   6 # sequence ::= SEQUENCE-START node* SEQUENCE-END
   7 # mapping ::= MAPPING-START (node node)* MAPPING-END
   8
   9 __all__ = ['Emitter', 'EmitterError']
  10
  11 from error import YAMLError
  12 from events import *
  13
  14 import re
  15
  16 class EmitterError(YAMLError):
  17     pass
  18
  19 class ScalarAnalysis:
  20     def __init__(self, scalar, empty, multiline,
  21             allow_flow_plain, allow_block_plain,
  22             allow_single_quoted, allow_double_quoted,
  23             allow_block):
  24         self.scalar = scalar
  25         self.empty = empty
  26         self.multiline = multiline
  27         self.allow_flow_plain = allow_flow_plain
  28         self.allow_block_plain = allow_block_plain
  29         self.allow_single_quoted = allow_single_quoted
  30         self.allow_double_quoted = allow_double_quoted
  31         self.allow_block = allow_block
  32
  33 class Emitter:
  34
  35     DEFAULT_TAG_PREFIXES = {
  36         u'!' : u'!',
  37         u'tag:yaml.org,2002:' : u'!!',
  38     }
  39
  40     def __init__(self, stream, canonical=None, indent=None, width=None,
  41             allow_unicode=None, line_break=None):
  42
  43         # The stream should have the methods `write` and possibly `flush`.
  44         self.stream = stream
  45
  46         # Encoding can be overriden by STREAM-START.
  47         self.encoding = None
  48
  49         # Emitter is a state machine with a stack of states to handle nested
  50         # structures.
  51         self.states = []
  52         self.state = self.expect_stream_start
  53
  54         # Current event and the event queue.
  55         self.events = []
  56         self.event = None
  57
  58         # The current indentation level and the stack of previous indents.
  59         self.indents = []
  60         self.indent = None
  61
  62         # Flow level.
  63         self.flow_level = 0
  64
  65         # Contexts.
  66         self.root_context = False
  67         self.sequence_context = False
  68         self.mapping_context = False
  69         self.simple_key_context = False
  70
  71         # Characteristics of the last emitted character:
  72         #  - current position.
  73         #  - is it a whitespace?
  74         #  - is it an indention character
  75         #    (indentation space, '-', '?', or ':')?
  76         self.line = 0
  77         self.column = 0
  78         self.whitespace = True
  79         self.indention = True
  80
  81         # Formatting details.
  82         self.canonical = canonical
  83         self.allow_unicode = allow_unicode
  84         self.best_indent = 2
  85         if indent and 1 < indent < 10:
  86             self.best_indent = indent
  87         self.best_width = 80
  88         if width and width > self.best_indent*2:
  89             self.best_width = width
  90         self.best_line_break = u'\n'
  91         if line_break in [u'\r', u'\n', u'\r\n']:
  92             self.best_line_break = line_break
  93
  94         # Tag prefixes.
  95         self.tag_prefixes = None
  96
  97         # Prepared anchor and tag.
  98         self.prepared_anchor = None
  99         self.prepared_tag = None
 100
 101         # Scalar analysis and style.
 102         self.analysis = None
 103         self.style = None
 104
 105     def emit(self, event):
 106         self.events.append(event)
 107         while not self.need_more_events():
 108             self.event = self.events.pop(0)
 109             self.state()
 110             self.event = None
 111
 112     # In some cases, we wait for a few next events before emitting.
 113
 114     def need_more_events(self):
 115         if not self.events:
 116             return True
 117         event = self.events[0]
 118         if isinstance(event, DocumentStartEvent):
 119             return self.need_events(1)
 120         elif isinstance(event, SequenceStartEvent):
 121             return self.need_events(2)
 122         elif isinstance(event, MappingStartEvent):
 123             return self.need_events(3)
 124         else:
 125             return False
 126
 127     def need_events(self, count):
 128         level = 0
 129         for event in self.events[1:]:
 130             if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
 131                 level += 1
 132             elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
 133                 level -= 1
 134             elif isinstance(event, StreamEndEvent):
 135                 level = -1
 136             if level < 0:
 137                 return False
 138         return (len(self.events) < count+1)
 139
 140     def increase_indent(self, flow=False, indentless=False):
 141         self.indents.append(self.indent)
 142         if self.indent is None:
 143             if flow:
 144                 self.indent = self.best_indent
 145             else:
 146                 self.indent = 0
 147         elif not indentless:
 148             self.indent += self.best_indent
 149
 150     # States.
 151
 152     # Stream handlers.
 153
 154     def expect_stream_start(self):
 155         if isinstance(self.event, StreamStartEvent):
 156             if self.event.encoding:
 157                 self.encoding = self.event.encoding
 158             self.write_stream_start()
 159             self.state = self.expect_first_document_start
 160         else:
 161             raise EmitterError("expected StreamStartEvent, but got %s"
 162                     % self.event)
 163
 164     def expect_nothing(self):
 165         raise EmitterError("expected nothing, but got %s" % self.event)
 166
 167     # Document handlers.
 168
 169     def expect_first_document_start(self):
 170         return self.expect_document_start(first=True)
 171
 172     def expect_document_start(self, first=False):
 173         if isinstance(self.event, DocumentStartEvent):
 174             if self.event.version:
 175                 version_text = self.prepare_version(self.event.version)
 176                 self.write_version_directive(version_text)
 177             self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
 178             if self.event.tags:
 179                 handles = self.event.tags.keys()
 180                 handles.sort()
 181                 for handle in handles:
 182                     prefix = self.event.tags[handle]
 183                     self.tag_prefixes[prefix] = handle
 184                     handle_text = self.prepare_tag_handle(handle)
 185                     prefix_text = self.prepare_tag_prefix(prefix)
 186                     self.write_tag_directive(handle_text, prefix_text)
 187             implicit = (first and not self.event.explicit and not self.canonical
 188                     and not self.event.version and not self.event.tags
 189                     and not self.check_empty_document())
 190             if not implicit:
 191                 self.write_indent()
 192                 self.write_indicator(u'---', True)
 193                 if self.canonical:
 194                     self.write_indent()
 195             self.state = self.expect_document_root
 196         elif isinstance(self.event, StreamEndEvent):
 197             self.write_stream_end()
 198             self.state = self.expect_nothing
 199         else:
 200             raise EmitterError("expected DocumentStartEvent, but got %s"
 201                     % self.event)
 202
 203     def expect_document_end(self):
 204         if isinstance(self.event, DocumentEndEvent):
 205             self.write_indent()
 206             if self.event.explicit:
 207                 self.write_indicator(u'...', True)
 208                 self.write_indent()
 209             self.flush_stream()
 210             self.state = self.expect_document_start
 211         else:
 212             raise EmitterError("expected DocumentEndEvent, but got %s"
 213                     % self.event)
 214
 215     def expect_document_root(self):
 216         self.states.append(self.expect_document_end)
 217         self.expect_node(root=True)
 218
 219     # Node handlers.
 220
 221     def expect_node(self, root=False, sequence=False, mapping=False,
 222             simple_key=False):
 223         self.root_context = root
 224         self.sequence_context = sequence
 225         self.mapping_context = mapping
 226         self.simple_key_context = simple_key
 227         if isinstance(self.event, AliasEvent):
 228             self.expect_alias()
 229         elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
 230             self.process_anchor(u'&')
 231             self.process_tag()
 232             if isinstance(self.event, ScalarEvent):
 233                 self.expect_scalar()
 234             elif isinstance(self.event, SequenceStartEvent):
 235                 if self.flow_level or self.canonical or self.event.flow_style   \
 236                         or self.check_empty_sequence():
 237                     self.expect_flow_sequence()
 238                 else:
 239                     self.expect_block_sequence()
 240             elif isinstance(self.event, MappingStartEvent):
 241                 if self.flow_level or self.canonical or self.event.flow_style   \
 242                         or self.check_empty_mapping():
 243                     self.expect_flow_mapping()
 244                 else:
 245                     self.expect_block_mapping()
 246         else:
 247             raise EmitterError("expected NodeEvent, but got %s" % self.event)
 248
 249     def expect_alias(self):
 250         if self.event.anchor is None:
 251             raise EmitterError("anchor is not specified for alias")
 252         self.process_anchor(u'*')
 253         self.state = self.states.pop()
 254
 255     def expect_scalar(self):
 256         self.increase_indent(flow=True)
 257         self.process_scalar()
 258         self.indent = self.indents.pop()
 259         self.state = self.states.pop()
 260
 261     # Flow sequence handlers.
 262
 263     def expect_flow_sequence(self):
 264         self.write_indicator(u'[', True, whitespace=True)
 265         self.flow_level += 1
 266         self.increase_indent(flow=True)
 267         self.state = self.expect_first_flow_sequence_item
 268
 269     def expect_first_flow_sequence_item(self):
 270         if isinstance(self.event, SequenceEndEvent):
 271             self.indent = self.indents.pop()
 272             self.flow_level -= 1
 273             self.write_indicator(u']', False)
 274             self.state = self.states.pop()
 275         else:
 276             if self.canonical or self.column > self.best_width:
 277                 self.write_indent()
 278             self.states.append(self.expect_flow_sequence_item)
 279             self.expect_node(sequence=True)
 280
 281     def expect_flow_sequence_item(self):
 282         if isinstance(self.event, SequenceEndEvent):
 283             self.indent = self.indents.pop()
 284             self.flow_level -= 1
 285             if self.canonical:
 286                 self.write_indicator(u',', False)
 287                 self.write_indent()
 288             self.write_indicator(u']', False)
 289             self.state = self.states.pop()
 290         else:
 291             self.write_indicator(u',', False)
 292             if self.canonical or self.column > self.best_width:
 293                 self.write_indent()
 294             self.states.append(self.expect_flow_sequence_item)
 295             self.expect_node(sequence=True)
 296
 297     # Flow mapping handlers.
 298
 299     def expect_flow_mapping(self):
 300         self.write_indicator(u'{', True, whitespace=True)
 301         self.flow_level += 1
 302         self.increase_indent(flow=True)
 303         self.state = self.expect_first_flow_mapping_key
 304
 305     def expect_first_flow_mapping_key(self):
 306         if isinstance(self.event, MappingEndEvent):
 307             self.indent = self.indents.pop()
 308             self.flow_level -= 1
 309             self.write_indicator(u'}', False)
 310             self.state = self.states.pop()
 311         else:
 312             if self.canonical or self.column > self.best_width:
 313                 self.write_indent()
 314             if not self.canonical and self.check_simple_key():
 315                 self.states.append(self.expect_flow_mapping_simple_value)
 316                 self.expect_node(mapping=True, simple_key=True)
 317             else:
 318                 self.write_indicator(u'?', True)
 319                 self.states.append(self.expect_flow_mapping_value)
 320                 self.expect_node(mapping=True)
 321
 322     def expect_flow_mapping_key(self):
 323         if isinstance(self.event, MappingEndEvent):
 324             self.indent = self.indents.pop()
 325             self.flow_level -= 1
 326             if self.canonical:
 327                 self.write_indicator(u',', False)
 328                 self.write_indent()
 329             self.write_indicator(u'}', False)
 330             self.state = self.states.pop()
 331         else:
 332             self.write_indicator(u',', False)
 333             if self.canonical or self.column > self.best_width:
 334                 self.write_indent()
 335             if not self.canonical and self.check_simple_key():
 336                 self.states.append(self.expect_flow_mapping_simple_value)
 337                 self.expect_node(mapping=True, simple_key=True)
 338             else:
 339                 self.write_indicator(u'?', True)
 340                 self.states.append(self.expect_flow_mapping_value)
 341                 self.expect_node(mapping=True)
 342
 343     def expect_flow_mapping_simple_value(self):
 344         self.write_indicator(u':', False)
 345         self.states.append(self.expect_flow_mapping_key)
 346         self.expect_node(mapping=True)
 347
 348     def expect_flow_mapping_value(self):
 349         if self.canonical or self.column > self.best_width:
 350             self.write_indent()
 351         self.write_indicator(u':', True)
 352         self.states.append(self.expect_flow_mapping_key)
 353         self.expect_node(mapping=True)
 354
 355     # Block sequence handlers.
 356
 357     def expect_block_sequence(self):
 358         indentless = (self.mapping_context and not self.indention)
 359         self.increase_indent(flow=False, indentless=indentless)
 360         self.state = self.expect_first_block_sequence_item
 361
 362     def expect_first_block_sequence_item(self):
 363         return self.expect_block_sequence_item(first=True)
 364
 365     def expect_block_sequence_item(self, first=False):
 366         if not first and isinstance(self.event, SequenceEndEvent):
 367             self.indent = self.indents.pop()
 368             self.state = self.states.pop()
 369         else:
 370             self.write_indent()
 371             self.write_indicator(u'-', True, indention=True)
 372             self.states.append(self.expect_block_sequence_item)
 373             self.expect_node(sequence=True)
 374
 375     # Block mapping handlers.
 376
 377     def expect_block_mapping(self):
 378         self.increase_indent(flow=False)
 379         self.state = self.expect_first_block_mapping_key
 380
 381     def expect_first_block_mapping_key(self):
 382         return self.expect_block_mapping_key(first=True)
 383
 384     def expect_block_mapping_key(self, first=False):
 385         if not first and isinstance(self.event, MappingEndEvent):
 386             self.indent = self.indents.pop()
 387             self.state = self.states.pop()
 388         else:
 389             self.write_indent()
 390             if self.check_simple_key():
 391                 self.states.append(self.expect_block_mapping_simple_value)
 392                 self.expect_node(mapping=True, simple_key=True)
 393             else:
 394                 self.write_indicator(u'?', True, indention=True)
 395                 self.states.append(self.expect_block_mapping_value)
 396                 self.expect_node(mapping=True)
 397
 398     def expect_block_mapping_simple_value(self):
 399         self.write_indicator(u':', False)
 400         self.states.append(self.expect_block_mapping_key)
 401         self.expect_node(mapping=True)
 402
 403     def expect_block_mapping_value(self):
 404         self.write_indent()
 405         self.write_indicator(u':', True, indention=True)
 406         self.states.append(self.expect_block_mapping_key)
 407         self.expect_node(mapping=True)
 408
 409     # Checkers.
 410
 411     def check_empty_sequence(self):
 412         return (isinstance(self.event, SequenceStartEvent) and self.events
 413                 and isinstance(self.events[0], SequenceEndEvent))
 414
 415     def check_empty_mapping(self):
 416         return (isinstance(self.event, MappingStartEvent) and self.events
 417                 and isinstance(self.events[0], MappingEndEvent))
 418
 419     def check_empty_document(self):
 420         if not isinstance(self.event, DocumentStartEvent) or not self.events:
 421             return False
 422         event = self.events[0]
 423         return (isinstance(event, ScalarEvent) and event.anchor is None
 424                 and event.tag is None and event.implicit and event.value == u'')
 425
 426     def check_simple_key(self):
 427         length = 0
 428         if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
 429             if self.prepared_anchor is None:
 430                 self.prepared_anchor = self.prepare_anchor(self.event.anchor)
 431             length += len(self.prepared_anchor)
 432         if isinstance(self.event, (ScalarEvent, CollectionStartEvent))  \
 433                 and self.event.tag is not None:
 434             if self.prepared_tag is None:
 435                 self.prepared_tag = self.prepare_tag(self.event.tag)
 436             length += len(self.prepared_tag)
 437         if isinstance(self.event, ScalarEvent):
 438             if self.analysis is None:
 439                 self.analysis = self.analyze_scalar(self.event.value)
 440             length += len(self.analysis.scalar)
 441         return (length < 128 and (isinstance(self.event, AliasEvent)
 442             or (isinstance(self.event, ScalarEvent)
 443                     and not self.analysis.empty and not self.analysis.multiline)
 444             or self.check_empty_sequence() or self.check_empty_mapping()))
 445
 446     # Anchor, Tag, and Scalar processors.
 447
 448     def process_anchor(self, indicator):
 449         if self.event.anchor is None:
 450             self.prepared_anchor = None
 451             return
 452         if self.prepared_anchor is None:
 453             self.prepared_anchor = self.prepare_anchor(self.event.anchor)
 454         if self.prepared_anchor:
 455             self.write_indicator(indicator+self.prepared_anchor, True)
 456         self.prepared_anchor = None
 457
 458     def process_tag(self):
 459         tag = self.event.tag
 460         if isinstance(self.event, ScalarEvent):
 461             if self.style is None:
 462                 self.style = self.choose_scalar_style()
 463             if ((not self.canonical or tag is None) and
 464                 ((self.style == '' and self.event.implicit[0])
 465                         or (self.style != '' and self.event.implicit[1]))):
 466                 self.prepared_tag = None
 467                 return
 468             if self.event.implicit[0] and tag is None:
 469                 tag = u'!'
 470                 self.prepared_tag = None
 471         else:
 472             if (not self.canonical or tag is None) and self.event.implicit:
 473                 self.prepared_tag = None
 474                 return
 475         if tag is None:
 476             raise EmitterError("tag is not specified")
 477         if self.prepared_tag is None:
 478             self.prepared_tag = self.prepare_tag(tag)
 479         if self.prepared_tag:
 480             self.write_indicator(self.prepared_tag, True)
 481         self.prepared_tag = None
 482
 483     def choose_scalar_style(self):
 484         if self.analysis is None:
 485             self.analysis = self.analyze_scalar(self.event.value)
 486         if self.event.style == '"' or self.canonical:
 487             return '"'
 488         if not self.event.style and self.event.implicit[0]:
 489             if (not (self.simple_key_context and
 490                     (self.analysis.empty or self.analysis.multiline))
 491                 and (self.flow_level and self.analysis.allow_flow_plain
 492                     or (not self.flow_level and self.analysis.allow_block_plain))):
 493                 return ''
 494         if self.event.style and self.event.style in '|>':
 495             if not self.flow_level and self.analysis.allow_block:
 496                 return self.event.style
 497         if not self.event.style or self.event.style == '\'':
 498             if (self.analysis.allow_single_quoted and
 499                     not (self.simple_key_context and self.analysis.multiline)):
 500                 return '\''
 501         return '"'
 502
 503     def process_scalar(self):
 504         if self.analysis is None:
 505             self.analysis = self.analyze_scalar(self.event.value)
 506         if self.style is None:
 507             self.style = self.choose_scalar_style()
 508         split = (not self.simple_key_context)
 509         #if self.analysis.multiline and split    \
 510         #        and (not self.style or self.style in '\'\"'):
 511         #    self.write_indent()
 512         if self.style == '"':
 513             self.write_double_quoted(self.analysis.scalar, split)
 514         elif self.style == '\'':
 515             self.write_single_quoted(self.analysis.scalar, split)
 516         elif self.style == '>':
 517             self.write_folded(self.analysis.scalar)
 518         elif self.style == '|':
 519             self.write_literal(self.analysis.scalar)
 520         else:
 521             self.write_plain(self.analysis.scalar, split)
 522         self.analysis = None
 523         self.style = None
 524
 525     # Analyzers.
 526
 527     def prepare_version(self, version):
 528         major, minor = version
 529         if major != 1:
 530             raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
 531         return u'%d.%d' % (major, minor)
 532
 533     def prepare_tag_handle(self, handle):
 534         if not handle:
 535             raise EmitterError("tag handle must not be empty")
 536         if handle[0] != u'!' or handle[-1] != u'!':
 537             raise EmitterError("tag handle must start and end with '!': %r"
 538                     % (handle.encode('utf-8')))
 539         for ch in handle[1:-1]:
 540             if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'    \
 541                     or ch in u'-_'):
 542                 raise EmitterError("invalid character %r in the tag handle: %r"
 543                         % (ch.encode('utf-8'), handle.encode('utf-8')))
 544         return handle
 545
 546     def prepare_tag_prefix(self, prefix):
 547         if not prefix:
 548             raise EmitterError("tag prefix must not be empty")
 549         chunks = []
 550         start = end = 0
 551         if prefix[0] == u'!':
 552             end = 1
 553         while end < len(prefix):
 554             ch = prefix[end]
 555             if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
 556                     or ch in u'-;/?!:@&=+$,_.~*\'()[]':
 557                 end += 1
 558             else:
 559                 if start < end:
 560                     chunks.append(prefix[start:end])
 561                 start = end = end+1
 562                 data = ch.encode('utf-8')
 563                 for ch in data:
 564                     chunks.append(u'%%%02X' % ord(ch))
 565         if start < end:
 566             chunks.append(prefix[start:end])
 567         return u''.join(chunks)
 568
 569     def prepare_tag(self, tag):
 570         if not tag:
 571             raise EmitterError("tag must not be empty")
 572         if tag == u'!':
 573             return tag
 574         handle = None
 575         suffix = tag
 576         for prefix in self.tag_prefixes:
 577             if tag.startswith(prefix)   \
 578                     and (prefix == u'!' or len(prefix) < len(tag)):
 579                 handle = self.tag_prefixes[prefix]
 580                 suffix = tag[len(prefix):]
 581         chunks = []
 582         start = end = 0
 583         while end < len(suffix):
 584             ch = suffix[end]
 585             if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'  \
 586                     or ch in u'-;/?:@&=+$,_.~*\'()[]'   \
 587                     or (ch == u'!' and handle != u'!'):
 588                 end += 1
 589             else:
 590                 if start < end:
 591                     chunks.append(suffix[start:end])
 592                 start = end = end+1
 593                 data = ch.encode('utf-8')
 594                 for ch in data:
 595                     chunks.append(u'%%%02X' % ord(ch))
 596         if start < end:
 597             chunks.append(suffix[start:end])
 598         suffix_text = u''.join(chunks)
 599         if handle:
 600             return u'%s%s' % (handle, suffix_text)
 601         else:
 602             return u'!<%s>' % suffix_text
 603
 604     def prepare_anchor(self, anchor):
 605         if not anchor:
 606             raise EmitterError("anchor must not be empty")
 607         for ch in anchor:
 608             if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z'    \
 609                     or ch in u'-_'):
 610                 raise EmitterError("invalid character %r in the anchor: %r"
 611                         % (ch.encode('utf-8'), anchor.encode('utf-8')))
 612         return anchor
 613
 614     def analyze_scalar(self, scalar):
 615
 616         # Empty scalar is a special case.
 617         if not scalar:
 618             return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
 619                     allow_flow_plain=False, allow_block_plain=True,
 620                     allow_single_quoted=True, allow_double_quoted=True,
 621                     allow_block=False)
 622
 623         # Indicators and special characters.
 624         block_indicators = False
 625         flow_indicators = False
 626         line_breaks = False
 627         special_characters = False
 628
 629         # Whitespaces.
 630         inline_spaces = False          # non-space space+ non-space
 631         inline_breaks = False          # non-space break+ non-space
 632         leading_spaces = False         # ^ space+ (non-space | $)
 633         leading_breaks = False         # ^ break+ (non-space | $)
 634         trailing_spaces = False        # (^ | non-space) space+ $
 635         trailing_breaks = False        # (^ | non-space) break+ $
 636         inline_breaks_spaces = False   # non-space break+ space+ non-space
 637         mixed_breaks_spaces = False    # anything else
 638
 639         # Check document indicators.
 640         if scalar.startswith(u'---') or scalar.startswith(u'...'):
 641             block_indicators = True
 642             flow_indicators = True
 643
 644         # First character or preceded by a whitespace.
 645         preceeded_by_space = True
 646
 647         # Last character or followed by a whitespace.
 648         followed_by_space = (len(scalar) == 1 or
 649                 scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
 650
 651         # The current series of whitespaces contain plain spaces.
 652         spaces = False
 653
 654         # The current series of whitespaces contain line breaks.
 655         breaks = False
 656
 657         # The current series of whitespaces contain a space followed by a
 658         # break.
 659         mixed = False
 660
 661         # The current series of whitespaces start at the beginning of the
 662         # scalar.
 663         leading = False
 664
 665         index = 0
 666         while index < len(scalar):
 667             ch = scalar[index]
 668
 669             # Check for indicators.
 670
 671             if index == 0:
 672                 # Leading indicators are special characters.
 673                 if ch in u'#,[]{}#&*!|>\'\"%@`':
 674                     flow_indicators = True
 675                     block_indicators = True
 676                 if ch in u'?:':
 677                     flow_indicators = True
 678                     if followed_by_space:
 679                         block_indicators = True
 680                 if ch == u'-' and followed_by_space:
 681                     flow_indicators = True
 682                     block_indicators = True
 683             else:
 684                 # Some indicators cannot appear within a scalar as well.
 685                 if ch in u',?[]{}':
 686                     flow_indicators = True
 687                 if ch == u':':
 688                     flow_indicators = True
 689                     if followed_by_space:
 690                         block_indicators = True
 691                 if ch == u'#' and preceeded_by_space:
 692                     flow_indicators = True
 693                     block_indicators = True
 694
 695             # Check for line breaks, special, and unicode characters.
 696
 697             if ch in u'\n\x85\u2028\u2029':
 698                 line_breaks = True
 699             if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
 700                 if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
 701                         or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
 702                     unicode_characters = True
 703                     if not self.allow_unicode:
 704                         special_characters = True
 705                 else:
 706                     special_characters = True
 707
 708             # Spaces, line breaks, and how they are mixed. State machine.
 709
 710             # Start or continue series of whitespaces.
 711             if ch in u' \n\x85\u2028\u2029':
 712                 if spaces and breaks:
 713                     if ch != u' ':      # break+ (space+ break+)    => mixed
 714                         mixed = True
 715                 elif spaces:
 716                     if ch != u' ':      # (space+ break+)   => mixed
 717                         breaks = True
 718                         mixed = True
 719                 elif breaks:
 720                     if ch == u' ':      # break+ space+
 721                         spaces = True
 722                 else:
 723                     leading = (index == 0)
 724                     if ch == u' ':      # space+
 725                         spaces = True
 726                     else:               # break+
 727                         breaks = True
 728
 729             # Series of whitespaces ended with a non-space.
 730             elif spaces or breaks:
 731                 if leading:
 732                     if spaces and breaks:
 733                         mixed_breaks_spaces = True
 734                     elif spaces:
 735                         leading_spaces = True
 736                     elif breaks:
 737                         leading_breaks = True
 738                 else:
 739                     if mixed:
 740                         mixed_breaks_spaces = True
 741                     elif spaces and breaks:
 742                         inline_breaks_spaces = True
 743                     elif spaces:
 744                         inline_spaces = True
 745                     elif breaks:
 746                         inline_breaks = True
 747                 spaces = breaks = mixed = leading = False
 748
 749             # Series of whitespaces reach the end.
 750             if (spaces or breaks) and (index == len(scalar)-1):
 751                 if spaces and breaks:
 752                     mixed_breaks_spaces = True
 753                 elif spaces:
 754                     trailing_spaces = True
 755                     if leading:
 756                         leading_spaces = True
 757                 elif breaks:
 758                     trailing_breaks = True
 759                     if leading:
 760                         leading_breaks = True
 761                 spaces = breaks = mixed = leading = False
 762
 763             # Prepare for the next character.
 764             index += 1
 765             preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
 766             followed_by_space = (index+1 >= len(scalar) or
 767                     scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
 768
 769         # Let's decide what styles are allowed.
 770         allow_flow_plain = True
 771         allow_block_plain = True
 772         allow_single_quoted = True
 773         allow_double_quoted = True
 774         allow_block = True
 775
 776         # Leading and trailing whitespace are bad for plain scalars. We also
 777         # do not want to mess with leading whitespaces for block scalars.
 778         if leading_spaces or leading_breaks or trailing_spaces:
 779             allow_flow_plain = allow_block_plain = allow_block = False
 780
 781         # Trailing breaks are fine for block scalars, but unacceptable for
 782         # plain scalars.
 783         if trailing_breaks:
 784             allow_flow_plain = allow_block_plain = False
 785
 786         # The combination of (space+ break+) is only acceptable for block
 787         # scalars.
 788         if inline_breaks_spaces:
 789             allow_flow_plain = allow_block_plain = allow_single_quoted = False
 790
 791         # Mixed spaces and breaks, as well as special character are only
 792         # allowed for double quoted scalars.
 793         if mixed_breaks_spaces or special_characters:
 794             allow_flow_plain = allow_block_plain =  \
 795             allow_single_quoted = allow_block = False
 796
 797         # We don't emit multiline plain scalars.
 798         if line_breaks:
 799             allow_flow_plain = allow_block_plain = False
 800
 801         # Flow indicators are forbidden for flow plain scalars.
 802         if flow_indicators:
 803             allow_flow_plain = False
 804
 805         # Block indicators are forbidden for block plain scalars.
 806         if block_indicators:
 807             allow_block_plain = False
 808
 809         return ScalarAnalysis(scalar=scalar,
 810                 empty=False, multiline=line_breaks,
 811                 allow_flow_plain=allow_flow_plain,
 812                 allow_block_plain=allow_block_plain,
 813                 allow_single_quoted=allow_single_quoted,
 814                 allow_double_quoted=allow_double_quoted,
 815                 allow_block=allow_block)
 816
 817     # Writers.
 818
 819     def flush_stream(self):
 820         if hasattr(self.stream, 'flush'):
 821             self.stream.flush()
 822
 823     def write_stream_start(self):
 824         # Write BOM if needed.
 825         if self.encoding and self.encoding.startswith('utf-16'):
 826             self.stream.write(u'\xFF\xFE'.encode(self.encoding))
 827
 828     def write_stream_end(self):
 829         self.flush_stream()
 830
 831     def write_indicator(self, indicator, need_whitespace,
 832             whitespace=False, indention=False):
 833         if self.whitespace or not need_whitespace:
 834             data = indicator
 835         else:
 836             data = u' '+indicator
 837         self.whitespace = whitespace
 838         self.indention = self.indention and indention
 839         self.column += len(data)
 840         if self.encoding:
 841             data = data.encode(self.encoding)
 842         self.stream.write(data)
 843
 844     def write_indent(self):
 845         indent = self.indent or 0
 846         if not self.indention or self.column > indent   \
 847                 or (self.column == indent and not self.whitespace):
 848             self.write_line_break()
 849         if self.column < indent:
 850             self.whitespace = True
 851             data = u' '*(indent-self.column)
 852             self.column = indent
 853             if self.encoding:
 854                 data = data.encode(self.encoding)
 855             self.stream.write(data)
 856
 857     def write_line_break(self, data=None):
 858         if data is None:
 859             data = self.best_line_break
 860         self.whitespace = True
 861         self.indention = True
 862         self.line += 1
 863         self.column = 0
 864         if self.encoding:
 865             data = data.encode(self.encoding)
 866         self.stream.write(data)
 867
 868     def write_version_directive(self, version_text):
 869         data = u'%%YAML %s' % version_text
 870         if self.encoding:
 871             data = data.encode(self.encoding)
 872         self.stream.write(data)
 873         self.write_line_break()
 874
 875     def write_tag_directive(self, handle_text, prefix_text):
 876         data = u'%%TAG %s %s' % (handle_text, prefix_text)
 877         if self.encoding:
 878             data = data.encode(self.encoding)
 879         self.stream.write(data)
 880         self.write_line_break()
 881
 882     # Scalar streams.
 883
 884     def write_single_quoted(self, text, split=True):
 885         self.write_indicator(u'\'', True)
 886         spaces = False
 887         breaks = False
 888         start = end = 0
 889         while end <= len(text):
 890             ch = None
 891             if end < len(text):
 892                 ch = text[end]
 893             if spaces:
 894                 if ch is None or ch != u' ':
 895                     if start+1 == end and self.column > self.best_width and split   \
 896                             and start != 0 and end != len(text):
 897                         self.write_indent()
 898                     else:
 899                         data = text[start:end]
 900                         self.column += len(data)
 901                         if self.encoding:
 902                             data = data.encode(self.encoding)
 903                         self.stream.write(data)
 904                     start = end
 905             elif breaks:
 906                 if ch is None or ch not in u'\n\x85\u2028\u2029':
 907                     if text[start] == u'\n':
 908                         self.write_line_break()
 909                     for br in text[start:end]:
 910                         if br == u'\n':
 911                             self.write_line_break()
 912                         else:
 913                             self.write_line_break(br)
 914                     self.write_indent()
 915                     start = end
 916             else:
 917                 if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
 918                     if start < end:
 919                         data = text[start:end]
 920                         self.column += len(data)
 921                         if self.encoding:
 922                             data = data.encode(self.encoding)
 923                         self.stream.write(data)
 924                         start = end
 925             if ch == u'\'':
 926                 data = u'\'\''
 927                 self.column += 2
 928                 if self.encoding:
 929                     data = data.encode(self.encoding)
 930                 self.stream.write(data)
 931                 start = end + 1
 932             if ch is not None:
 933                 spaces = (ch == u' ')
 934                 breaks = (ch in u'\n\x85\u2028\u2029')
 935             end += 1
 936         self.write_indicator(u'\'', False)
 937
 938     ESCAPE_REPLACEMENTS = {
 939         u'\0':      u'0',
 940         u'\x07':    u'a',
 941         u'\x08':    u'b',
 942         u'\x09':    u't',
 943         u'\x0A':    u'n',
 944         u'\x0B':    u'v',
 945         u'\x0C':    u'f',
 946         u'\x0D':    u'r',
 947         u'\x1B':    u'e',
 948         u'\"':      u'\"',
 949         u'\\':      u'\\',
 950         u'\x85':    u'N',
 951         u'\xA0':    u'_',
 952         u'\u2028':  u'L',
 953         u'\u2029':  u'P',
 954     }
 955
 956     def write_double_quoted(self, text, split=True):
 957         self.write_indicator(u'"', True)
 958         start = end = 0
 959         while end <= len(text):
 960             ch = None
 961             if end < len(text):
 962                 ch = text[end]
 963             if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
 964                     or not (u'\x20' <= ch <= u'\x7E'
 965                         or (self.allow_unicode
 966                             and (u'\xA0' <= ch <= u'\uD7FF'
 967                                 or u'\uE000' <= ch <= u'\uFFFD'))):
 968                 if start < end:
 969                     data = text[start:end]
 970                     self.column += len(data)
 971                     if self.encoding:
 972                         data = data.encode(self.encoding)
 973                     self.stream.write(data)
 974                     start = end
 975                 if ch is not None:
 976                     if ch in self.ESCAPE_REPLACEMENTS:
 977                         data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
 978                     elif ch <= u'\xFF':
 979                         data = u'\\x%02X' % ord(ch)
 980                     elif ch <= u'\uFFFF':
 981                         data = u'\\u%04X' % ord(ch)
 982                     else:
 983                         data = u'\\U%08X' % ord(ch)
 984                     self.column += len(data)
 985                     if self.encoding:
 986                         data = data.encode(self.encoding)
 987                     self.stream.write(data)
 988                     start = end+1
 989             if 0 < end < len(text)-1 and (ch == u' ' or start >= end)   \
 990                     and self.column+(end-start) > self.best_width and split:
 991                 data = text[start:end]+u'\\'
 992                 if start < end:
 993                     start = end
 994                 self.column += len(data)
 995                 if self.encoding:
 996                     data = data.encode(self.encoding)
 997                 self.stream.write(data)
 998                 self.write_indent()
 999                 self.whitespace = False
1000                 self.indention = False
1001                 if text[start] == u' ':
1002                     data = u'\\'
1003                     self.column += len(data)
1004                     if self.encoding:
1005                         data = data.encode(self.encoding)
1006                     self.stream.write(data)
1007             end += 1
1008         self.write_indicator(u'"', False)
1009
1010     def determine_chomp(self, text):
1011         tail = text[-2:]
1012         while len(tail) < 2:
1013             tail = u' '+tail
1014         if tail[-1] in u'\n\x85\u2028\u2029':
1015             if tail[-2] in u'\n\x85\u2028\u2029':
1016                 return u'+'
1017             else:
1018                 return u''
1019         else:
1020             return u'-'
1021
1022     def write_folded(self, text):
1023         chomp = self.determine_chomp(text)
1024         self.write_indicator(u'>'+chomp, True)
1025         self.write_indent()
1026         leading_space = False
1027         spaces = False
1028         breaks = False
1029         start = end = 0
1030         while end <= len(text):
1031             ch = None
1032             if end < len(text):
1033                 ch = text[end]
1034             if breaks:
1035                 if ch is None or ch not in u'\n\x85\u2028\u2029':
1036                     if not leading_space and ch is not None and ch != u' '  \
1037                             and text[start] == u'\n':
1038                         self.write_line_break()
1039                     leading_space = (ch == u' ')
1040                     for br in text[start:end]:
1041                         if br == u'\n':
1042                             self.write_line_break()
1043                         else:
1044                             self.write_line_break(br)
1045                     if ch is not None:
1046                         self.write_indent()
1047                     start = end
1048             elif spaces:
1049                 if ch != u' ':
1050                     if start+1 == end and self.column > self.best_width:
1051                         self.write_indent()
1052                     else:
1053                         data = text[start:end]
1054                         self.column += len(data)
1055                         if self.encoding:
1056                             data = data.encode(self.encoding)
1057                         self.stream.write(data)
1058                     start = end
1059             else:
1060                 if ch is None or ch in u' \n\x85\u2028\u2029':
1061                     data = text[start:end]
1062                     if self.encoding:
1063                         data = data.encode(self.encoding)
1064                     self.stream.write(data)
1065                     if ch is None:
1066                         self.write_line_break()
1067                     start = end
1068             if ch is not None:
1069                 breaks = (ch in u'\n\x85\u2028\u2029')
1070                 spaces = (ch == u' ')
1071             end += 1
1072
1073     def write_literal(self, text):
1074         chomp = self.determine_chomp(text)
1075         self.write_indicator(u'|'+chomp, True)
1076         self.write_indent()
1077         breaks = False
1078         start = end = 0
1079         while end <= len(text):
1080             ch = None
1081             if end < len(text):
1082                 ch = text[end]
1083             if breaks:
1084                 if ch is None or ch not in u'\n\x85\u2028\u2029':
1085                     for br in text[start:end]:
1086                         if br == u'\n':
1087                             self.write_line_break()
1088                         else:
1089                             self.write_line_break(br)
1090                     if ch is not None:
1091                         self.write_indent()
1092                     start = end
1093             else:
1094                 if ch is None or ch in u'\n\x85\u2028\u2029':
1095                     data = text[start:end]
1096                     if self.encoding:
1097                         data = data.encode(self.encoding)
1098                     self.stream.write(data)
1099                     if ch is None:
1100                         self.write_line_break()
1101                     start = end
1102             if ch is not None:
1103                 breaks = (ch in u'\n\x85\u2028\u2029')
1104             end += 1
1105
1106     def write_plain(self, text, split=True):
1107         if not text:
1108             return
1109         if not self.whitespace:
1110             data = u' '
1111             self.column += len(data)
1112             if self.encoding:
1113                 data = data.encode(self.encoding)
1114             self.stream.write(data)
1115         self.writespace = False
1116         self.indention = False
1117         spaces = False
1118         breaks = False
1119         start = end = 0
1120         while end <= len(text):
1121             ch = None
1122             if end < len(text):
1123                 ch = text[end]
1124             if spaces:
1125                 if ch != u' ':
1126                     if start+1 == end and self.column > self.best_width and split:
1127                         self.write_indent()
1128                         self.writespace = False
1129                         self.indention = False
1130                     else:
1131                         data = text[start:end]
1132                         self.column += len(data)
1133                         if self.encoding:
1134                             data = data.encode(self.encoding)
1135                         self.stream.write(data)
1136                     start = end
1137             elif breaks:
1138                 if ch not in u'\n\x85\u2028\u2029':
1139                     if text[start] == u'\n':
1140                         self.write_line_break()
1141                     for br in text[start:end]:
1142                         if br == u'\n':
1143                             self.write_line_break()
1144                         else:
1145                             self.write_line_break(br)
1146                     self.write_indent()
1147                     self.whitespace = False
1148                     self.indention = False
1149                     start = end
1150             else:
1151                 if ch is None or ch in u' \n\x85\u2028\u2029':
1152                     data = text[start:end]
1153                     self.column += len(data)
1154                     if self.encoding:
1155                         data = data.encode(self.encoding)
1156                     self.stream.write(data)
1157                     start = end
1158             if ch is not None:
1159                 spaces = (ch == u' ')
1160                 breaks = (ch in u'\n\x85\u2028\u2029')
1161             end += 1
1162