Fix invalid output of single-quoted scalars in cases when a single
[pyyaml/python3.git] / lib / yaml / emitter.py
blob95f59db38e4f8eae3c214b3cae379deb378bb2ab
2 # Emitter expects events obeying the following grammar:
3 # stream ::= STREAM-START document* STREAM-END
4 # document ::= DOCUMENT-START node DOCUMENT-END
5 # node ::= SCALAR | sequence | mapping
6 # sequence ::= SEQUENCE-START node* SEQUENCE-END
7 # mapping ::= MAPPING-START (node node)* MAPPING-END
9 __all__ = ['Emitter', 'EmitterError']
11 from error import YAMLError
12 from events import *
14 import re
16 class EmitterError(YAMLError):
17 pass
19 class ScalarAnalysis:
20 def __init__(self, scalar, empty, multiline,
21 allow_flow_plain, allow_block_plain,
22 allow_single_quoted, allow_double_quoted,
23 allow_block):
24 self.scalar = scalar
25 self.empty = empty
26 self.multiline = multiline
27 self.allow_flow_plain = allow_flow_plain
28 self.allow_block_plain = allow_block_plain
29 self.allow_single_quoted = allow_single_quoted
30 self.allow_double_quoted = allow_double_quoted
31 self.allow_block = allow_block
33 class Emitter:
35 DEFAULT_TAG_PREFIXES = {
36 u'!' : u'!',
37 u'tag:yaml.org,2002:' : u'!!',
40 def __init__(self, stream, canonical=None, indent=None, width=None,
41 allow_unicode=None, line_break=None):
43 # The stream should have the methods `write` and possibly `flush`.
44 self.stream = stream
46 # Encoding can be overriden by STREAM-START.
47 self.encoding = None
49 # Emitter is a state machine with a stack of states to handle nested
50 # structures.
51 self.states = []
52 self.state = self.expect_stream_start
54 # Current event and the event queue.
55 self.events = []
56 self.event = None
58 # The current indentation level and the stack of previous indents.
59 self.indents = []
60 self.indent = None
62 # Flow level.
63 self.flow_level = 0
65 # Contexts.
66 self.root_context = False
67 self.sequence_context = False
68 self.mapping_context = False
69 self.simple_key_context = False
71 # Characteristics of the last emitted character:
72 # - current position.
73 # - is it a whitespace?
74 # - is it an indention character
75 # (indentation space, '-', '?', or ':')?
76 self.line = 0
77 self.column = 0
78 self.whitespace = True
79 self.indention = True
81 # Formatting details.
82 self.canonical = canonical
83 self.allow_unicode = allow_unicode
84 self.best_indent = 2
85 if indent and 1 < indent < 10:
86 self.best_indent = indent
87 self.best_width = 80
88 if width and width > self.best_indent*2:
89 self.best_width = width
90 self.best_line_break = u'\n'
91 if line_break in [u'\r', u'\n', u'\r\n']:
92 self.best_line_break = line_break
94 # Tag prefixes.
95 self.tag_prefixes = None
97 # Prepared anchor and tag.
98 self.prepared_anchor = None
99 self.prepared_tag = None
101 # Scalar analysis and style.
102 self.analysis = None
103 self.style = None
105 def emit(self, event):
106 self.events.append(event)
107 while not self.need_more_events():
108 self.event = self.events.pop(0)
109 self.state()
110 self.event = None
112 # In some cases, we wait for a few next events before emitting.
114 def need_more_events(self):
115 if not self.events:
116 return True
117 event = self.events[0]
118 if isinstance(event, DocumentStartEvent):
119 return self.need_events(1)
120 elif isinstance(event, SequenceStartEvent):
121 return self.need_events(2)
122 elif isinstance(event, MappingStartEvent):
123 return self.need_events(3)
124 else:
125 return False
127 def need_events(self, count):
128 level = 0
129 for event in self.events[1:]:
130 if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
131 level += 1
132 elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
133 level -= 1
134 elif isinstance(event, StreamEndEvent):
135 level = -1
136 if level < 0:
137 return False
138 return (len(self.events) < count+1)
140 def increase_indent(self, flow=False, indentless=False):
141 self.indents.append(self.indent)
142 if self.indent is None:
143 if flow:
144 self.indent = self.best_indent
145 else:
146 self.indent = 0
147 elif not indentless:
148 self.indent += self.best_indent
150 # States.
152 # Stream handlers.
154 def expect_stream_start(self):
155 if isinstance(self.event, StreamStartEvent):
156 if self.event.encoding:
157 self.encoding = self.event.encoding
158 self.write_stream_start()
159 self.state = self.expect_first_document_start
160 else:
161 raise EmitterError("expected StreamStartEvent, but got %s"
162 % self.event)
164 def expect_nothing(self):
165 raise EmitterError("expected nothing, but got %s" % self.event)
167 # Document handlers.
169 def expect_first_document_start(self):
170 return self.expect_document_start(first=True)
172 def expect_document_start(self, first=False):
173 if isinstance(self.event, DocumentStartEvent):
174 if self.event.version:
175 version_text = self.prepare_version(self.event.version)
176 self.write_version_directive(version_text)
177 self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
178 if self.event.tags:
179 handles = self.event.tags.keys()
180 handles.sort()
181 for handle in handles:
182 prefix = self.event.tags[handle]
183 self.tag_prefixes[prefix] = handle
184 handle_text = self.prepare_tag_handle(handle)
185 prefix_text = self.prepare_tag_prefix(prefix)
186 self.write_tag_directive(handle_text, prefix_text)
187 implicit = (first and not self.event.explicit and not self.canonical
188 and not self.event.version and not self.event.tags
189 and not self.check_empty_document())
190 if not implicit:
191 self.write_indent()
192 self.write_indicator(u'---', True)
193 if self.canonical:
194 self.write_indent()
195 self.state = self.expect_document_root
196 elif isinstance(self.event, StreamEndEvent):
197 self.write_stream_end()
198 self.state = self.expect_nothing
199 else:
200 raise EmitterError("expected DocumentStartEvent, but got %s"
201 % self.event)
203 def expect_document_end(self):
204 if isinstance(self.event, DocumentEndEvent):
205 self.write_indent()
206 if self.event.explicit:
207 self.write_indicator(u'...', True)
208 self.write_indent()
209 self.flush_stream()
210 self.state = self.expect_document_start
211 else:
212 raise EmitterError("expected DocumentEndEvent, but got %s"
213 % self.event)
215 def expect_document_root(self):
216 self.states.append(self.expect_document_end)
217 self.expect_node(root=True)
219 # Node handlers.
221 def expect_node(self, root=False, sequence=False, mapping=False,
222 simple_key=False):
223 self.root_context = root
224 self.sequence_context = sequence
225 self.mapping_context = mapping
226 self.simple_key_context = simple_key
227 if isinstance(self.event, AliasEvent):
228 self.expect_alias()
229 elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
230 self.process_anchor(u'&')
231 self.process_tag()
232 if isinstance(self.event, ScalarEvent):
233 self.expect_scalar()
234 elif isinstance(self.event, SequenceStartEvent):
235 if self.flow_level or self.canonical or self.event.flow_style \
236 or self.check_empty_sequence():
237 self.expect_flow_sequence()
238 else:
239 self.expect_block_sequence()
240 elif isinstance(self.event, MappingStartEvent):
241 if self.flow_level or self.canonical or self.event.flow_style \
242 or self.check_empty_mapping():
243 self.expect_flow_mapping()
244 else:
245 self.expect_block_mapping()
246 else:
247 raise EmitterError("expected NodeEvent, but got %s" % self.event)
249 def expect_alias(self):
250 if self.event.anchor is None:
251 raise EmitterError("anchor is not specified for alias")
252 self.process_anchor(u'*')
253 self.state = self.states.pop()
255 def expect_scalar(self):
256 self.increase_indent(flow=True)
257 self.process_scalar()
258 self.indent = self.indents.pop()
259 self.state = self.states.pop()
261 # Flow sequence handlers.
263 def expect_flow_sequence(self):
264 self.write_indicator(u'[', True, whitespace=True)
265 self.flow_level += 1
266 self.increase_indent(flow=True)
267 self.state = self.expect_first_flow_sequence_item
269 def expect_first_flow_sequence_item(self):
270 if isinstance(self.event, SequenceEndEvent):
271 self.indent = self.indents.pop()
272 self.flow_level -= 1
273 self.write_indicator(u']', False)
274 self.state = self.states.pop()
275 else:
276 if self.canonical or self.column > self.best_width:
277 self.write_indent()
278 self.states.append(self.expect_flow_sequence_item)
279 self.expect_node(sequence=True)
281 def expect_flow_sequence_item(self):
282 if isinstance(self.event, SequenceEndEvent):
283 self.indent = self.indents.pop()
284 self.flow_level -= 1
285 if self.canonical:
286 self.write_indicator(u',', False)
287 self.write_indent()
288 self.write_indicator(u']', False)
289 self.state = self.states.pop()
290 else:
291 self.write_indicator(u',', False)
292 if self.canonical or self.column > self.best_width:
293 self.write_indent()
294 self.states.append(self.expect_flow_sequence_item)
295 self.expect_node(sequence=True)
297 # Flow mapping handlers.
299 def expect_flow_mapping(self):
300 self.write_indicator(u'{', True, whitespace=True)
301 self.flow_level += 1
302 self.increase_indent(flow=True)
303 self.state = self.expect_first_flow_mapping_key
305 def expect_first_flow_mapping_key(self):
306 if isinstance(self.event, MappingEndEvent):
307 self.indent = self.indents.pop()
308 self.flow_level -= 1
309 self.write_indicator(u'}', False)
310 self.state = self.states.pop()
311 else:
312 if self.canonical or self.column > self.best_width:
313 self.write_indent()
314 if not self.canonical and self.check_simple_key():
315 self.states.append(self.expect_flow_mapping_simple_value)
316 self.expect_node(mapping=True, simple_key=True)
317 else:
318 self.write_indicator(u'?', True)
319 self.states.append(self.expect_flow_mapping_value)
320 self.expect_node(mapping=True)
322 def expect_flow_mapping_key(self):
323 if isinstance(self.event, MappingEndEvent):
324 self.indent = self.indents.pop()
325 self.flow_level -= 1
326 if self.canonical:
327 self.write_indicator(u',', False)
328 self.write_indent()
329 self.write_indicator(u'}', False)
330 self.state = self.states.pop()
331 else:
332 self.write_indicator(u',', False)
333 if self.canonical or self.column > self.best_width:
334 self.write_indent()
335 if not self.canonical and self.check_simple_key():
336 self.states.append(self.expect_flow_mapping_simple_value)
337 self.expect_node(mapping=True, simple_key=True)
338 else:
339 self.write_indicator(u'?', True)
340 self.states.append(self.expect_flow_mapping_value)
341 self.expect_node(mapping=True)
343 def expect_flow_mapping_simple_value(self):
344 self.write_indicator(u':', False)
345 self.states.append(self.expect_flow_mapping_key)
346 self.expect_node(mapping=True)
348 def expect_flow_mapping_value(self):
349 if self.canonical or self.column > self.best_width:
350 self.write_indent()
351 self.write_indicator(u':', True)
352 self.states.append(self.expect_flow_mapping_key)
353 self.expect_node(mapping=True)
355 # Block sequence handlers.
357 def expect_block_sequence(self):
358 indentless = (self.mapping_context and not self.indention)
359 self.increase_indent(flow=False, indentless=indentless)
360 self.state = self.expect_first_block_sequence_item
362 def expect_first_block_sequence_item(self):
363 return self.expect_block_sequence_item(first=True)
365 def expect_block_sequence_item(self, first=False):
366 if not first and isinstance(self.event, SequenceEndEvent):
367 self.indent = self.indents.pop()
368 self.state = self.states.pop()
369 else:
370 self.write_indent()
371 self.write_indicator(u'-', True, indention=True)
372 self.states.append(self.expect_block_sequence_item)
373 self.expect_node(sequence=True)
375 # Block mapping handlers.
377 def expect_block_mapping(self):
378 self.increase_indent(flow=False)
379 self.state = self.expect_first_block_mapping_key
381 def expect_first_block_mapping_key(self):
382 return self.expect_block_mapping_key(first=True)
384 def expect_block_mapping_key(self, first=False):
385 if not first and isinstance(self.event, MappingEndEvent):
386 self.indent = self.indents.pop()
387 self.state = self.states.pop()
388 else:
389 self.write_indent()
390 if self.check_simple_key():
391 self.states.append(self.expect_block_mapping_simple_value)
392 self.expect_node(mapping=True, simple_key=True)
393 else:
394 self.write_indicator(u'?', True, indention=True)
395 self.states.append(self.expect_block_mapping_value)
396 self.expect_node(mapping=True)
398 def expect_block_mapping_simple_value(self):
399 self.write_indicator(u':', False)
400 self.states.append(self.expect_block_mapping_key)
401 self.expect_node(mapping=True)
403 def expect_block_mapping_value(self):
404 self.write_indent()
405 self.write_indicator(u':', True, indention=True)
406 self.states.append(self.expect_block_mapping_key)
407 self.expect_node(mapping=True)
409 # Checkers.
411 def check_empty_sequence(self):
412 return (isinstance(self.event, SequenceStartEvent) and self.events
413 and isinstance(self.events[0], SequenceEndEvent))
415 def check_empty_mapping(self):
416 return (isinstance(self.event, MappingStartEvent) and self.events
417 and isinstance(self.events[0], MappingEndEvent))
419 def check_empty_document(self):
420 if not isinstance(self.event, DocumentStartEvent) or not self.events:
421 return False
422 event = self.events[0]
423 return (isinstance(event, ScalarEvent) and event.anchor is None
424 and event.tag is None and event.implicit and event.value == u'')
426 def check_simple_key(self):
427 length = 0
428 if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
429 if self.prepared_anchor is None:
430 self.prepared_anchor = self.prepare_anchor(self.event.anchor)
431 length += len(self.prepared_anchor)
432 if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
433 and self.event.tag is not None:
434 if self.prepared_tag is None:
435 self.prepared_tag = self.prepare_tag(self.event.tag)
436 length += len(self.prepared_tag)
437 if isinstance(self.event, ScalarEvent):
438 if self.analysis is None:
439 self.analysis = self.analyze_scalar(self.event.value)
440 length += len(self.analysis.scalar)
441 return (length < 128 and (isinstance(self.event, AliasEvent)
442 or (isinstance(self.event, ScalarEvent)
443 and not self.analysis.empty and not self.analysis.multiline)
444 or self.check_empty_sequence() or self.check_empty_mapping()))
446 # Anchor, Tag, and Scalar processors.
448 def process_anchor(self, indicator):
449 if self.event.anchor is None:
450 self.prepared_anchor = None
451 return
452 if self.prepared_anchor is None:
453 self.prepared_anchor = self.prepare_anchor(self.event.anchor)
454 if self.prepared_anchor:
455 self.write_indicator(indicator+self.prepared_anchor, True)
456 self.prepared_anchor = None
458 def process_tag(self):
459 tag = self.event.tag
460 if isinstance(self.event, ScalarEvent):
461 if self.style is None:
462 self.style = self.choose_scalar_style()
463 if ((not self.canonical or tag is None) and
464 ((self.style == '' and self.event.implicit[0])
465 or (self.style != '' and self.event.implicit[1]))):
466 self.prepared_tag = None
467 return
468 if self.event.implicit[0] and tag is None:
469 tag = u'!'
470 self.prepared_tag = None
471 else:
472 if (not self.canonical or tag is None) and self.event.implicit:
473 self.prepared_tag = None
474 return
475 if tag is None:
476 raise EmitterError("tag is not specified")
477 if self.prepared_tag is None:
478 self.prepared_tag = self.prepare_tag(tag)
479 if self.prepared_tag:
480 self.write_indicator(self.prepared_tag, True)
481 self.prepared_tag = None
483 def choose_scalar_style(self):
484 if self.analysis is None:
485 self.analysis = self.analyze_scalar(self.event.value)
486 if self.event.style == '"' or self.canonical:
487 return '"'
488 if not self.event.style and self.event.implicit[0]:
489 if (not (self.simple_key_context and
490 (self.analysis.empty or self.analysis.multiline))
491 and (self.flow_level and self.analysis.allow_flow_plain
492 or (not self.flow_level and self.analysis.allow_block_plain))):
493 return ''
494 if self.event.style and self.event.style in '|>':
495 if not self.flow_level and self.analysis.allow_block:
496 return self.event.style
497 if not self.event.style or self.event.style == '\'':
498 if (self.analysis.allow_single_quoted and
499 not (self.simple_key_context and self.analysis.multiline)):
500 return '\''
501 return '"'
503 def process_scalar(self):
504 if self.analysis is None:
505 self.analysis = self.analyze_scalar(self.event.value)
506 if self.style is None:
507 self.style = self.choose_scalar_style()
508 split = (not self.simple_key_context)
509 #if self.analysis.multiline and split \
510 # and (not self.style or self.style in '\'\"'):
511 # self.write_indent()
512 if self.style == '"':
513 self.write_double_quoted(self.analysis.scalar, split)
514 elif self.style == '\'':
515 self.write_single_quoted(self.analysis.scalar, split)
516 elif self.style == '>':
517 self.write_folded(self.analysis.scalar)
518 elif self.style == '|':
519 self.write_literal(self.analysis.scalar)
520 else:
521 self.write_plain(self.analysis.scalar, split)
522 self.analysis = None
523 self.style = None
525 # Analyzers.
527 def prepare_version(self, version):
528 major, minor = version
529 if major != 1:
530 raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
531 return u'%d.%d' % (major, minor)
533 def prepare_tag_handle(self, handle):
534 if not handle:
535 raise EmitterError("tag handle must not be empty")
536 if handle[0] != u'!' or handle[-1] != u'!':
537 raise EmitterError("tag handle must start and end with '!': %r"
538 % (handle.encode('utf-8')))
539 for ch in handle[1:-1]:
540 if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
541 or ch in u'-_'):
542 raise EmitterError("invalid character %r in the tag handle: %r"
543 % (ch.encode('utf-8'), handle.encode('utf-8')))
544 return handle
546 def prepare_tag_prefix(self, prefix):
547 if not prefix:
548 raise EmitterError("tag prefix must not be empty")
549 chunks = []
550 start = end = 0
551 if prefix[0] == u'!':
552 end = 1
553 while end < len(prefix):
554 ch = prefix[end]
555 if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
556 or ch in u'-;/?!:@&=+$,_.~*\'()[]':
557 end += 1
558 else:
559 if start < end:
560 chunks.append(prefix[start:end])
561 start = end = end+1
562 data = ch.encode('utf-8')
563 for ch in data:
564 chunks.append(u'%%%02X' % ord(ch))
565 if start < end:
566 chunks.append(prefix[start:end])
567 return u''.join(chunks)
569 def prepare_tag(self, tag):
570 if not tag:
571 raise EmitterError("tag must not be empty")
572 if tag == u'!':
573 return tag
574 handle = None
575 suffix = tag
576 for prefix in self.tag_prefixes:
577 if tag.startswith(prefix) \
578 and (prefix == u'!' or len(prefix) < len(tag)):
579 handle = self.tag_prefixes[prefix]
580 suffix = tag[len(prefix):]
581 chunks = []
582 start = end = 0
583 while end < len(suffix):
584 ch = suffix[end]
585 if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
586 or ch in u'-;/?:@&=+$,_.~*\'()[]' \
587 or (ch == u'!' and handle != u'!'):
588 end += 1
589 else:
590 if start < end:
591 chunks.append(suffix[start:end])
592 start = end = end+1
593 data = ch.encode('utf-8')
594 for ch in data:
595 chunks.append(u'%%%02X' % ord(ch))
596 if start < end:
597 chunks.append(suffix[start:end])
598 suffix_text = u''.join(chunks)
599 if handle:
600 return u'%s%s' % (handle, suffix_text)
601 else:
602 return u'!<%s>' % suffix_text
604 def prepare_anchor(self, anchor):
605 if not anchor:
606 raise EmitterError("anchor must not be empty")
607 for ch in anchor:
608 if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
609 or ch in u'-_'):
610 raise EmitterError("invalid character %r in the anchor: %r"
611 % (ch.encode('utf-8'), anchor.encode('utf-8')))
612 return anchor
614 def analyze_scalar(self, scalar):
616 # Empty scalar is a special case.
617 if not scalar:
618 return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
619 allow_flow_plain=False, allow_block_plain=True,
620 allow_single_quoted=True, allow_double_quoted=True,
621 allow_block=False)
623 # Indicators and special characters.
624 block_indicators = False
625 flow_indicators = False
626 line_breaks = False
627 special_characters = False
629 # Whitespaces.
630 inline_spaces = False # non-space space+ non-space
631 inline_breaks = False # non-space break+ non-space
632 leading_spaces = False # ^ space+ (non-space | $)
633 leading_breaks = False # ^ break+ (non-space | $)
634 trailing_spaces = False # (^ | non-space) space+ $
635 trailing_breaks = False # (^ | non-space) break+ $
636 inline_breaks_spaces = False # non-space break+ space+ non-space
637 mixed_breaks_spaces = False # anything else
639 # Check document indicators.
640 if scalar.startswith(u'---') or scalar.startswith(u'...'):
641 block_indicators = True
642 flow_indicators = True
644 # First character or preceded by a whitespace.
645 preceeded_by_space = True
647 # Last character or followed by a whitespace.
648 followed_by_space = (len(scalar) == 1 or
649 scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
651 # The current series of whitespaces contain plain spaces.
652 spaces = False
654 # The current series of whitespaces contain line breaks.
655 breaks = False
657 # The current series of whitespaces contain a space followed by a
658 # break.
659 mixed = False
661 # The current series of whitespaces start at the beginning of the
662 # scalar.
663 leading = False
665 index = 0
666 while index < len(scalar):
667 ch = scalar[index]
669 # Check for indicators.
671 if index == 0:
672 # Leading indicators are special characters.
673 if ch in u'#,[]{}#&*!|>\'\"%@`':
674 flow_indicators = True
675 block_indicators = True
676 if ch in u'?:':
677 flow_indicators = True
678 if followed_by_space:
679 block_indicators = True
680 if ch == u'-' and followed_by_space:
681 flow_indicators = True
682 block_indicators = True
683 else:
684 # Some indicators cannot appear within a scalar as well.
685 if ch in u',?[]{}':
686 flow_indicators = True
687 if ch == u':':
688 flow_indicators = True
689 if followed_by_space:
690 block_indicators = True
691 if ch == u'#' and preceeded_by_space:
692 flow_indicators = True
693 block_indicators = True
695 # Check for line breaks, special, and unicode characters.
697 if ch in u'\n\x85\u2028\u2029':
698 line_breaks = True
699 if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
700 if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF'
701 or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF':
702 unicode_characters = True
703 if not self.allow_unicode:
704 special_characters = True
705 else:
706 special_characters = True
708 # Spaces, line breaks, and how they are mixed. State machine.
710 # Start or continue series of whitespaces.
711 if ch in u' \n\x85\u2028\u2029':
712 if spaces and breaks:
713 if ch != u' ': # break+ (space+ break+) => mixed
714 mixed = True
715 elif spaces:
716 if ch != u' ': # (space+ break+) => mixed
717 breaks = True
718 mixed = True
719 elif breaks:
720 if ch == u' ': # break+ space+
721 spaces = True
722 else:
723 leading = (index == 0)
724 if ch == u' ': # space+
725 spaces = True
726 else: # break+
727 breaks = True
729 # Series of whitespaces ended with a non-space.
730 elif spaces or breaks:
731 if leading:
732 if spaces and breaks:
733 mixed_breaks_spaces = True
734 elif spaces:
735 leading_spaces = True
736 elif breaks:
737 leading_breaks = True
738 else:
739 if mixed:
740 mixed_breaks_spaces = True
741 elif spaces and breaks:
742 inline_breaks_spaces = True
743 elif spaces:
744 inline_spaces = True
745 elif breaks:
746 inline_breaks = True
747 spaces = breaks = mixed = leading = False
749 # Series of whitespaces reach the end.
750 if (spaces or breaks) and (index == len(scalar)-1):
751 if spaces and breaks:
752 mixed_breaks_spaces = True
753 elif spaces:
754 trailing_spaces = True
755 if leading:
756 leading_spaces = True
757 elif breaks:
758 trailing_breaks = True
759 if leading:
760 leading_breaks = True
761 spaces = breaks = mixed = leading = False
763 # Prepare for the next character.
764 index += 1
765 preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
766 followed_by_space = (index+1 >= len(scalar) or
767 scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
769 # Let's decide what styles are allowed.
770 allow_flow_plain = True
771 allow_block_plain = True
772 allow_single_quoted = True
773 allow_double_quoted = True
774 allow_block = True
776 # Leading and trailing whitespace are bad for plain scalars. We also
777 # do not want to mess with leading whitespaces for block scalars.
778 if leading_spaces or leading_breaks or trailing_spaces:
779 allow_flow_plain = allow_block_plain = allow_block = False
781 # Trailing breaks are fine for block scalars, but unacceptable for
782 # plain scalars.
783 if trailing_breaks:
784 allow_flow_plain = allow_block_plain = False
786 # The combination of (space+ break+) is only acceptable for block
787 # scalars.
788 if inline_breaks_spaces:
789 allow_flow_plain = allow_block_plain = allow_single_quoted = False
791 # Mixed spaces and breaks, as well as special character are only
792 # allowed for double quoted scalars.
793 if mixed_breaks_spaces or special_characters:
794 allow_flow_plain = allow_block_plain = \
795 allow_single_quoted = allow_block = False
797 # We don't emit multiline plain scalars.
798 if line_breaks:
799 allow_flow_plain = allow_block_plain = False
801 # Flow indicators are forbidden for flow plain scalars.
802 if flow_indicators:
803 allow_flow_plain = False
805 # Block indicators are forbidden for block plain scalars.
806 if block_indicators:
807 allow_block_plain = False
809 return ScalarAnalysis(scalar=scalar,
810 empty=False, multiline=line_breaks,
811 allow_flow_plain=allow_flow_plain,
812 allow_block_plain=allow_block_plain,
813 allow_single_quoted=allow_single_quoted,
814 allow_double_quoted=allow_double_quoted,
815 allow_block=allow_block)
817 # Writers.
819 def flush_stream(self):
820 if hasattr(self.stream, 'flush'):
821 self.stream.flush()
823 def write_stream_start(self):
824 # Write BOM if needed.
825 if self.encoding and self.encoding.startswith('utf-16'):
826 self.stream.write(u'\xFF\xFE'.encode(self.encoding))
828 def write_stream_end(self):
829 self.flush_stream()
831 def write_indicator(self, indicator, need_whitespace,
832 whitespace=False, indention=False):
833 if self.whitespace or not need_whitespace:
834 data = indicator
835 else:
836 data = u' '+indicator
837 self.whitespace = whitespace
838 self.indention = self.indention and indention
839 self.column += len(data)
840 if self.encoding:
841 data = data.encode(self.encoding)
842 self.stream.write(data)
844 def write_indent(self):
845 indent = self.indent or 0
846 if not self.indention or self.column > indent \
847 or (self.column == indent and not self.whitespace):
848 self.write_line_break()
849 if self.column < indent:
850 self.whitespace = True
851 data = u' '*(indent-self.column)
852 self.column = indent
853 if self.encoding:
854 data = data.encode(self.encoding)
855 self.stream.write(data)
857 def write_line_break(self, data=None):
858 if data is None:
859 data = self.best_line_break
860 self.whitespace = True
861 self.indention = True
862 self.line += 1
863 self.column = 0
864 if self.encoding:
865 data = data.encode(self.encoding)
866 self.stream.write(data)
868 def write_version_directive(self, version_text):
869 data = u'%%YAML %s' % version_text
870 if self.encoding:
871 data = data.encode(self.encoding)
872 self.stream.write(data)
873 self.write_line_break()
875 def write_tag_directive(self, handle_text, prefix_text):
876 data = u'%%TAG %s %s' % (handle_text, prefix_text)
877 if self.encoding:
878 data = data.encode(self.encoding)
879 self.stream.write(data)
880 self.write_line_break()
882 # Scalar streams.
884 def write_single_quoted(self, text, split=True):
885 self.write_indicator(u'\'', True)
886 spaces = False
887 breaks = False
888 start = end = 0
889 while end <= len(text):
890 ch = None
891 if end < len(text):
892 ch = text[end]
893 if spaces:
894 if ch is None or ch != u' ':
895 if start+1 == end and self.column > self.best_width and split \
896 and start != 0 and end != len(text):
897 self.write_indent()
898 else:
899 data = text[start:end]
900 self.column += len(data)
901 if self.encoding:
902 data = data.encode(self.encoding)
903 self.stream.write(data)
904 start = end
905 elif breaks:
906 if ch is None or ch not in u'\n\x85\u2028\u2029':
907 if text[start] == u'\n':
908 self.write_line_break()
909 for br in text[start:end]:
910 if br == u'\n':
911 self.write_line_break()
912 else:
913 self.write_line_break(br)
914 self.write_indent()
915 start = end
916 else:
917 if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
918 if start < end:
919 data = text[start:end]
920 self.column += len(data)
921 if self.encoding:
922 data = data.encode(self.encoding)
923 self.stream.write(data)
924 start = end
925 if ch == u'\'':
926 data = u'\'\''
927 self.column += 2
928 if self.encoding:
929 data = data.encode(self.encoding)
930 self.stream.write(data)
931 start = end + 1
932 if ch is not None:
933 spaces = (ch == u' ')
934 breaks = (ch in u'\n\x85\u2028\u2029')
935 end += 1
936 self.write_indicator(u'\'', False)
938 ESCAPE_REPLACEMENTS = {
939 u'\0': u'0',
940 u'\x07': u'a',
941 u'\x08': u'b',
942 u'\x09': u't',
943 u'\x0A': u'n',
944 u'\x0B': u'v',
945 u'\x0C': u'f',
946 u'\x0D': u'r',
947 u'\x1B': u'e',
948 u'\"': u'\"',
949 u'\\': u'\\',
950 u'\x85': u'N',
951 u'\xA0': u'_',
952 u'\u2028': u'L',
953 u'\u2029': u'P',
956 def write_double_quoted(self, text, split=True):
957 self.write_indicator(u'"', True)
958 start = end = 0
959 while end <= len(text):
960 ch = None
961 if end < len(text):
962 ch = text[end]
963 if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \
964 or not (u'\x20' <= ch <= u'\x7E'
965 or (self.allow_unicode
966 and (u'\xA0' <= ch <= u'\uD7FF'
967 or u'\uE000' <= ch <= u'\uFFFD'))):
968 if start < end:
969 data = text[start:end]
970 self.column += len(data)
971 if self.encoding:
972 data = data.encode(self.encoding)
973 self.stream.write(data)
974 start = end
975 if ch is not None:
976 if ch in self.ESCAPE_REPLACEMENTS:
977 data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
978 elif ch <= u'\xFF':
979 data = u'\\x%02X' % ord(ch)
980 elif ch <= u'\uFFFF':
981 data = u'\\u%04X' % ord(ch)
982 else:
983 data = u'\\U%08X' % ord(ch)
984 self.column += len(data)
985 if self.encoding:
986 data = data.encode(self.encoding)
987 self.stream.write(data)
988 start = end+1
989 if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \
990 and self.column+(end-start) > self.best_width and split:
991 data = text[start:end]+u'\\'
992 if start < end:
993 start = end
994 self.column += len(data)
995 if self.encoding:
996 data = data.encode(self.encoding)
997 self.stream.write(data)
998 self.write_indent()
999 self.whitespace = False
1000 self.indention = False
1001 if text[start] == u' ':
1002 data = u'\\'
1003 self.column += len(data)
1004 if self.encoding:
1005 data = data.encode(self.encoding)
1006 self.stream.write(data)
1007 end += 1
1008 self.write_indicator(u'"', False)
1010 def determine_chomp(self, text):
1011 tail = text[-2:]
1012 while len(tail) < 2:
1013 tail = u' '+tail
1014 if tail[-1] in u'\n\x85\u2028\u2029':
1015 if tail[-2] in u'\n\x85\u2028\u2029':
1016 return u'+'
1017 else:
1018 return u''
1019 else:
1020 return u'-'
1022 def write_folded(self, text):
1023 chomp = self.determine_chomp(text)
1024 self.write_indicator(u'>'+chomp, True)
1025 self.write_indent()
1026 leading_space = False
1027 spaces = False
1028 breaks = False
1029 start = end = 0
1030 while end <= len(text):
1031 ch = None
1032 if end < len(text):
1033 ch = text[end]
1034 if breaks:
1035 if ch is None or ch not in u'\n\x85\u2028\u2029':
1036 if not leading_space and ch is not None and ch != u' ' \
1037 and text[start] == u'\n':
1038 self.write_line_break()
1039 leading_space = (ch == u' ')
1040 for br in text[start:end]:
1041 if br == u'\n':
1042 self.write_line_break()
1043 else:
1044 self.write_line_break(br)
1045 if ch is not None:
1046 self.write_indent()
1047 start = end
1048 elif spaces:
1049 if ch != u' ':
1050 if start+1 == end and self.column > self.best_width:
1051 self.write_indent()
1052 else:
1053 data = text[start:end]
1054 self.column += len(data)
1055 if self.encoding:
1056 data = data.encode(self.encoding)
1057 self.stream.write(data)
1058 start = end
1059 else:
1060 if ch is None or ch in u' \n\x85\u2028\u2029':
1061 data = text[start:end]
1062 if self.encoding:
1063 data = data.encode(self.encoding)
1064 self.stream.write(data)
1065 if ch is None:
1066 self.write_line_break()
1067 start = end
1068 if ch is not None:
1069 breaks = (ch in u'\n\x85\u2028\u2029')
1070 spaces = (ch == u' ')
1071 end += 1
1073 def write_literal(self, text):
1074 chomp = self.determine_chomp(text)
1075 self.write_indicator(u'|'+chomp, True)
1076 self.write_indent()
1077 breaks = False
1078 start = end = 0
1079 while end <= len(text):
1080 ch = None
1081 if end < len(text):
1082 ch = text[end]
1083 if breaks:
1084 if ch is None or ch not in u'\n\x85\u2028\u2029':
1085 for br in text[start:end]:
1086 if br == u'\n':
1087 self.write_line_break()
1088 else:
1089 self.write_line_break(br)
1090 if ch is not None:
1091 self.write_indent()
1092 start = end
1093 else:
1094 if ch is None or ch in u'\n\x85\u2028\u2029':
1095 data = text[start:end]
1096 if self.encoding:
1097 data = data.encode(self.encoding)
1098 self.stream.write(data)
1099 if ch is None:
1100 self.write_line_break()
1101 start = end
1102 if ch is not None:
1103 breaks = (ch in u'\n\x85\u2028\u2029')
1104 end += 1
1106 def write_plain(self, text, split=True):
1107 if not text:
1108 return
1109 if not self.whitespace:
1110 data = u' '
1111 self.column += len(data)
1112 if self.encoding:
1113 data = data.encode(self.encoding)
1114 self.stream.write(data)
1115 self.writespace = False
1116 self.indention = False
1117 spaces = False
1118 breaks = False
1119 start = end = 0
1120 while end <= len(text):
1121 ch = None
1122 if end < len(text):
1123 ch = text[end]
1124 if spaces:
1125 if ch != u' ':
1126 if start+1 == end and self.column > self.best_width and split:
1127 self.write_indent()
1128 self.writespace = False
1129 self.indention = False
1130 else:
1131 data = text[start:end]
1132 self.column += len(data)
1133 if self.encoding:
1134 data = data.encode(self.encoding)
1135 self.stream.write(data)
1136 start = end
1137 elif breaks:
1138 if ch not in u'\n\x85\u2028\u2029':
1139 if text[start] == u'\n':
1140 self.write_line_break()
1141 for br in text[start:end]:
1142 if br == u'\n':
1143 self.write_line_break()
1144 else:
1145 self.write_line_break(br)
1146 self.write_indent()
1147 self.whitespace = False
1148 self.indention = False
1149 start = end
1150 else:
1151 if ch is None or ch in u' \n\x85\u2028\u2029':
1152 data = text[start:end]
1153 self.column += len(data)
1154 if self.encoding:
1155 data = data.encode(self.encoding)
1156 self.stream.write(data)
1157 start = end
1158 if ch is not None:
1159 spaces = (ch == u' ')
1160 breaks = (ch in u'\n\x85\u2028\u2029')
1161 end += 1