Major refactoring.
[pyyaml/python3.git] / lib / yaml / emitter.py
blob985ce634e788588c47a5074ccc26307ec275be2e
2 # Emitter expects events obeying the following grammar:
3 # stream ::= STREAM-START document* STREAM-END
4 # document ::= DOCUMENT-START node DOCUMENT-END
5 # node ::= SCALAR | sequence | mapping
6 # sequence ::= SEQUENCE-START node* SEQUENCE-END
7 # mapping ::= MAPPING-START (node node)* MAPPING-END
9 __all__ = ['Emitter', 'EmitterError']
11 from error import YAMLError
12 from events import *
14 import re
16 class EmitterError(YAMLError):
17 pass
19 class ScalarAnalysis:
20 def __init__(self, scalar, empty, multiline,
21 allow_flow_plain, allow_block_plain,
22 allow_single_quoted, allow_double_quoted,
23 allow_block):
24 self.scalar = scalar
25 self.empty = empty
26 self.multiline = multiline
27 self.allow_flow_plain = allow_flow_plain
28 self.allow_block_plain = allow_block_plain
29 self.allow_single_quoted = allow_single_quoted
30 self.allow_double_quoted = allow_double_quoted
31 self.allow_block = allow_block
33 class Emitter:
35 DEFAULT_TAG_PREFIXES = {
36 u'!' : u'!',
37 u'tag:yaml.org,2002:' : u'!!',
40 def __init__(self, stream, canonical=None, indent=None, width=None,
41 allow_unicode=None, line_break=None):
43 # The stream should have the methods `write` and possibly `flush`.
44 self.stream = stream
46 # Encoding can be overriden by STREAM-START.
47 self.encoding = None
49 # Emitter is a state machine with a stack of states to handle nested
50 # structures.
51 self.states = []
52 self.state = self.expect_stream_start
54 # Current event and the event queue.
55 self.events = []
56 self.event = None
58 # The current indentation level and the stack of previous indents.
59 self.indents = []
60 self.indent = None
62 # Flow level.
63 self.flow_level = 0
65 # Contexts.
66 self.root_context = False
67 self.sequence_context = False
68 self.mapping_context = False
69 self.simple_key_context = False
71 # Characteristics of the last emitted character:
72 # - current position.
73 # - is it a whitespace?
74 # - is it an indention character
75 # (indentation space, '-', '?', or ':')?
76 self.line = 0
77 self.column = 0
78 self.whitespace = True
79 self.indention = True
81 # Formatting details.
82 self.canonical = canonical
83 self.allow_unicode = allow_unicode
84 self.best_indent = 2
85 if indent and 1 < indent < 10:
86 self.best_indent = indent
87 self.best_width = 80
88 if width and width > self.best_indent*2:
89 self.best_width = width
90 self.best_line_break = u'\n'
91 if line_break in [u'\r', u'\n', u'\r\n']:
92 self.best_line_break = line_break
94 # Tag prefixes.
95 self.tag_prefixes = None
97 # Prepared anchor and tag.
98 self.prepared_anchor = None
99 self.prepared_tag = None
101 # Scalar analysis and style.
102 self.analysis = None
103 self.style = None
105 def emit(self, event):
106 self.events.append(event)
107 while not self.need_more_events():
108 self.event = self.events.pop(0)
109 self.state()
110 self.event = None
112 # In some cases, we wait for a few next events before emitting.
114 def need_more_events(self):
115 if not self.events:
116 return True
117 event = self.events[0]
118 if isinstance(event, DocumentStartEvent):
119 return self.need_events(1)
120 elif isinstance(event, SequenceStartEvent):
121 return self.need_events(2)
122 elif isinstance(event, MappingStartEvent):
123 return self.need_events(3)
124 else:
125 return False
127 def need_events(self, count):
128 level = 0
129 for event in self.events[1:]:
130 if isinstance(event, (DocumentStartEvent, CollectionStartEvent)):
131 level += 1
132 elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)):
133 level -= 1
134 elif isinstance(event, StreamEndEvent):
135 level = -1
136 if level < 0:
137 return False
138 return (len(self.events) < count+1)
140 def increase_indent(self, flow=False, indentless=False):
141 self.indents.append(self.indent)
142 if self.indent is None:
143 if flow:
144 self.indent = self.best_indent
145 else:
146 self.indent = 0
147 elif not indentless:
148 self.indent += self.best_indent
150 # States.
152 # Stream handlers.
154 def expect_stream_start(self):
155 if isinstance(self.event, StreamStartEvent):
156 if self.event.encoding:
157 self.encoding = self.event.encoding
158 self.write_stream_start()
159 self.state = self.expect_first_document_start
160 else:
161 raise EmitterError("expected StreamStartEvent, but got %s"
162 % self.event)
164 def expect_nothing(self):
165 raise EmitterError("expected nothing, but got %s" % self.event)
167 # Document handlers.
169 def expect_first_document_start(self):
170 return self.expect_document_start(first=True)
172 def expect_document_start(self, first=False):
173 if isinstance(self.event, DocumentStartEvent):
174 if self.event.version:
175 version_text = self.prepare_version(self.event.version)
176 self.write_version_directive(version_text)
177 self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy()
178 if self.event.tags:
179 handles = self.event.tags.keys()
180 handles.sort()
181 for handle in handles:
182 prefix = self.event.tags[handle]
183 self.tag_prefixes[prefix] = handle
184 handle_text = self.prepare_tag_handle(handle)
185 prefix_text = self.prepare_tag_prefix(prefix)
186 self.write_tag_directive(handle_text, prefix_text)
187 implicit = (first and not self.event.explicit and not self.canonical
188 and not self.event.version and not self.event.tags
189 and not self.check_empty_document())
190 if not implicit:
191 self.write_indent()
192 self.write_indicator(u'---', True)
193 if self.canonical:
194 self.write_indent()
195 self.state = self.expect_document_root
196 elif isinstance(self.event, StreamEndEvent):
197 self.write_stream_end()
198 self.state = self.expect_nothing
199 else:
200 raise EmitterError("expected DocumentStartEvent, but got %s"
201 % self.event)
203 def expect_document_end(self):
204 if isinstance(self.event, DocumentEndEvent):
205 self.write_indent()
206 if self.event.explicit:
207 self.write_indicator(u'...', True)
208 self.write_indent()
209 self.flush_stream()
210 self.state = self.expect_document_start
211 else:
212 raise EmitterError("expected DocumentEndEvent, but got %s"
213 % self.event)
215 def expect_document_root(self):
216 self.states.append(self.expect_document_end)
217 self.expect_node(root=True)
219 # Node handlers.
221 def expect_node(self, root=False, sequence=False, mapping=False,
222 simple_key=False):
223 self.root_context = root
224 self.sequence_context = sequence
225 self.mapping_context = mapping
226 self.simple_key_context = simple_key
227 if isinstance(self.event, AliasEvent):
228 self.expect_alias()
229 elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)):
230 self.process_anchor(u'&')
231 self.process_tag()
232 if isinstance(self.event, ScalarEvent):
233 self.expect_scalar()
234 elif isinstance(self.event, SequenceStartEvent):
235 if self.flow_level or self.canonical or self.event.flow_style \
236 or self.check_empty_sequence():
237 self.expect_flow_sequence()
238 else:
239 self.expect_block_sequence()
240 elif isinstance(self.event, MappingStartEvent):
241 if self.flow_level or self.canonical or self.event.flow_style \
242 or self.check_empty_mapping():
243 self.expect_flow_mapping()
244 else:
245 self.expect_block_mapping()
246 else:
247 raise EmitterError("expected NodeEvent, but got %s" % self.event)
249 def expect_alias(self):
250 if self.event.anchor is None:
251 raise EmitterError("anchor is not specified for alias")
252 self.process_anchor(u'*')
253 self.state = self.states.pop()
255 def expect_scalar(self):
256 self.increase_indent(flow=True)
257 self.process_scalar()
258 self.indent = self.indents.pop()
259 self.state = self.states.pop()
261 # Flow sequence handlers.
263 def expect_flow_sequence(self):
264 self.write_indicator(u'[', True, whitespace=True)
265 self.flow_level += 1
266 self.increase_indent(flow=True)
267 self.state = self.expect_first_flow_sequence_item
269 def expect_first_flow_sequence_item(self):
270 if isinstance(self.event, SequenceEndEvent):
271 self.indent = self.indents.pop()
272 self.flow_level -= 1
273 self.write_indicator(u']', False)
274 self.state = self.states.pop()
275 else:
276 if self.canonical or self.column > self.best_width:
277 self.write_indent()
278 self.states.append(self.expect_flow_sequence_item)
279 self.expect_node(sequence=True)
281 def expect_flow_sequence_item(self):
282 if isinstance(self.event, SequenceEndEvent):
283 self.indent = self.indents.pop()
284 self.flow_level -= 1
285 if self.canonical:
286 self.write_indicator(u',', False)
287 self.write_indent()
288 self.write_indicator(u']', False)
289 self.state = self.states.pop()
290 else:
291 self.write_indicator(u',', False)
292 if self.canonical or self.column > self.best_width:
293 self.write_indent()
294 self.states.append(self.expect_flow_sequence_item)
295 self.expect_node(sequence=True)
297 # Flow mapping handlers.
299 def expect_flow_mapping(self):
300 self.write_indicator(u'{', True, whitespace=True)
301 self.flow_level += 1
302 self.increase_indent(flow=True)
303 self.state = self.expect_first_flow_mapping_key
305 def expect_first_flow_mapping_key(self):
306 if isinstance(self.event, MappingEndEvent):
307 self.indent = self.indents.pop()
308 self.flow_level -= 1
309 self.write_indicator(u'}', False)
310 self.state = self.states.pop()
311 else:
312 if self.canonical or self.column > self.best_width:
313 self.write_indent()
314 if not self.canonical and self.check_simple_key():
315 self.states.append(self.expect_flow_mapping_simple_value)
316 self.expect_node(mapping=True, simple_key=True)
317 else:
318 self.write_indicator(u'?', True)
319 self.states.append(self.expect_flow_mapping_value)
320 self.expect_node(mapping=True)
322 def expect_flow_mapping_key(self):
323 if isinstance(self.event, MappingEndEvent):
324 self.indent = self.indents.pop()
325 self.flow_level -= 1
326 if self.canonical:
327 self.write_indicator(u',', False)
328 self.write_indent()
329 self.write_indicator(u'}', False)
330 self.state = self.states.pop()
331 else:
332 self.write_indicator(u',', False)
333 if self.canonical or self.column > self.best_width:
334 self.write_indent()
335 if not self.canonical and self.check_simple_key():
336 self.states.append(self.expect_flow_mapping_simple_value)
337 self.expect_node(mapping=True, simple_key=True)
338 else:
339 self.write_indicator(u'?', True)
340 self.states.append(self.expect_flow_mapping_value)
341 self.expect_node(mapping=True)
343 def expect_flow_mapping_simple_value(self):
344 self.write_indicator(u':', False)
345 self.states.append(self.expect_flow_mapping_key)
346 self.expect_node(mapping=True)
348 def expect_flow_mapping_value(self):
349 if self.canonical or self.column > self.best_width:
350 self.write_indent()
351 self.write_indicator(u':', True)
352 self.states.append(self.expect_flow_mapping_key)
353 self.expect_node(mapping=True)
355 # Block sequence handlers.
357 def expect_block_sequence(self):
358 indentless = (self.mapping_context and not self.indention)
359 self.increase_indent(flow=False, indentless=indentless)
360 self.state = self.expect_first_block_sequence_item
362 def expect_first_block_sequence_item(self):
363 return self.expect_block_sequence_item(first=True)
365 def expect_block_sequence_item(self, first=False):
366 if not first and isinstance(self.event, SequenceEndEvent):
367 self.indent = self.indents.pop()
368 self.state = self.states.pop()
369 else:
370 self.write_indent()
371 self.write_indicator(u'-', True, indention=True)
372 self.states.append(self.expect_block_sequence_item)
373 self.expect_node(sequence=True)
375 # Block mapping handlers.
377 def expect_block_mapping(self):
378 self.increase_indent(flow=False)
379 self.state = self.expect_first_block_mapping_key
381 def expect_first_block_mapping_key(self):
382 return self.expect_block_mapping_key(first=True)
384 def expect_block_mapping_key(self, first=False):
385 if not first and isinstance(self.event, MappingEndEvent):
386 self.indent = self.indents.pop()
387 self.state = self.states.pop()
388 else:
389 self.write_indent()
390 if self.check_simple_key():
391 self.states.append(self.expect_block_mapping_simple_value)
392 self.expect_node(mapping=True, simple_key=True)
393 else:
394 self.write_indicator(u'?', True, indention=True)
395 self.states.append(self.expect_block_mapping_value)
396 self.expect_node(mapping=True)
398 def expect_block_mapping_simple_value(self):
399 self.write_indicator(u':', False)
400 self.states.append(self.expect_block_mapping_key)
401 self.expect_node(mapping=True)
403 def expect_block_mapping_value(self):
404 self.write_indent()
405 self.write_indicator(u':', True, indention=True)
406 self.states.append(self.expect_block_mapping_key)
407 self.expect_node(mapping=True)
409 # Checkers.
411 def check_empty_sequence(self):
412 return (isinstance(self.event, SequenceStartEvent) and self.events
413 and isinstance(self.events[0], SequenceEndEvent))
415 def check_empty_mapping(self):
416 return (isinstance(self.event, MappingStartEvent) and self.events
417 and isinstance(self.events[0], MappingEndEvent))
419 def check_empty_document(self):
420 if not isinstance(self.event, DocumentStartEvent) or not self.events:
421 return False
422 event = self.events[0]
423 return (isinstance(event, ScalarEvent) and event.anchor is None
424 and event.tag is None and event.implicit and event.value == u'')
426 def check_simple_key(self):
427 length = 0
428 if isinstance(self.event, NodeEvent) and self.event.anchor is not None:
429 if self.prepared_anchor is None:
430 self.prepared_anchor = self.prepare_anchor(self.event.anchor)
431 length += len(self.prepared_anchor)
432 if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \
433 and self.event.tag is not None:
434 if self.prepared_tag is None:
435 self.prepared_tag = self.prepare_tag(self.event.tag)
436 length += len(self.prepared_tag)
437 if isinstance(self.event, ScalarEvent):
438 if self.analysis is None:
439 self.analysis = self.analyze_scalar(self.event.value)
440 length += len(self.analysis.scalar)
441 return (length < 128 and (isinstance(self.event, AliasEvent)
442 or (isinstance(self.event, ScalarEvent)
443 and not self.analysis.empty and not self.analysis.multiline)
444 or self.check_empty_sequence() or self.check_empty_mapping()))
446 # Anchor, Tag, and Scalar processors.
448 def process_anchor(self, indicator):
449 if self.event.anchor is None:
450 self.prepared_anchor = None
451 return
452 if self.prepared_anchor is None:
453 self.prepared_anchor = self.prepare_anchor(self.event.anchor)
454 if self.prepared_anchor:
455 self.write_indicator(indicator+self.prepared_anchor, True)
456 self.prepared_anchor = None
458 def process_tag(self):
459 tag = self.event.tag
460 if isinstance(self.event, ScalarEvent):
461 if self.style is None:
462 self.style = self.choose_scalar_style()
463 if self.style == '':
464 self.prepared_tag = None
465 return
466 if self.event.implicit and not tag:
467 tag = u'!'
468 self.prepared_tag = None
469 if not tag:
470 self.prepared_tag = None
471 return
472 if self.prepared_tag is None:
473 self.prepared_tag = self.prepare_tag(tag)
474 if self.prepared_tag:
475 self.write_indicator(self.prepared_tag, True)
476 self.prepared_tag = None
478 def choose_scalar_style(self):
479 if self.analysis is None:
480 self.analysis = self.analyze_scalar(self.event.value)
481 if self.event.style == '"' or self.canonical:
482 return '"'
483 if not self.event.style and self.event.implicit:
484 if (not (self.simple_key_context and
485 (self.analysis.empty or self.analysis.multiline))
486 and (self.flow_level and self.analysis.allow_flow_plain
487 or (not self.flow_level and self.analysis.allow_block_plain))):
488 return ''
489 if self.event.style and self.event.style in '|>':
490 if not self.flow_level and self.analysis.allow_block:
491 return self.event.style
492 if not self.event.style or self.event.style == '\'':
493 if (self.analysis.allow_single_quoted and
494 not (self.simple_key_context and self.analysis.multiline)):
495 return '\''
496 return '"'
498 def process_scalar(self):
499 if self.analysis is None:
500 self.analysis = self.analyze_scalar(self.event.value)
501 if self.style is None:
502 self.style = self.choose_scalar_style()
503 split = (not self.simple_key_context)
504 #if self.analysis.multiline and split \
505 # and (not self.style or self.style in '\'\"'):
506 # self.write_indent()
507 if self.style == '"':
508 self.write_double_quoted(self.analysis.scalar, split)
509 elif self.style == '\'':
510 self.write_single_quoted(self.analysis.scalar, split)
511 elif self.style == '>':
512 self.write_folded(self.analysis.scalar)
513 elif self.style == '|':
514 self.write_literal(self.analysis.scalar)
515 else:
516 self.write_plain(self.analysis.scalar, split)
517 self.analysis = None
518 self.style = None
520 # Analyzers.
522 def prepare_version(self, version):
523 major, minor = version
524 if major != 1:
525 raise EmitterError("unsupported YAML version: %d.%d" % (major, minor))
526 return u'%d.%d' % (major, minor)
528 def prepare_tag_handle(self, handle):
529 if not handle:
530 raise EmitterError("tag handle must not be empty")
531 if handle[0] != u'!' or handle[-1] != u'!':
532 raise EmitterError("tag handle must start and end with '!': %r"
533 % (handle.encode('utf-8')))
534 for ch in handle[1:-1]:
535 if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
536 or ch in u'-_'):
537 raise EmitterError("invalid character %r in the tag handle: %r"
538 % (ch.encode('utf-8'), handle.encode('utf-8')))
539 return handle
541 def prepare_tag_prefix(self, prefix):
542 if not prefix:
543 raise EmitterError("tag prefix must not be empty")
544 chunks = []
545 start = end = 0
546 if prefix[0] == u'!':
547 end = 1
548 while end < len(prefix):
549 ch = prefix[end]
550 if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
551 or ch in u'-;/?!:@&=+$,_.~*\'()[]':
552 end += 1
553 else:
554 if start < end:
555 chunks.append(prefix[start:end])
556 start = end = end+1
557 data = ch.encode('utf-8')
558 for ch in data:
559 chunks.append(u'%%%02X' % ord(ch))
560 if start < end:
561 chunks.append(prefix[start:end])
562 return u''.join(chunks)
564 def prepare_tag(self, tag):
565 if not tag:
566 raise EmitterError("tag must not be empty")
567 if tag == u'!':
568 return tag
569 handle = None
570 suffix = tag
571 for prefix in self.tag_prefixes:
572 if tag.startswith(prefix) \
573 and (prefix == u'!' or len(prefix) < len(tag)):
574 handle = self.tag_prefixes[prefix]
575 suffix = tag[len(prefix):]
576 chunks = []
577 start = end = 0
578 while end < len(suffix):
579 ch = suffix[end]
580 if u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
581 or ch in u'-;/?:@&=+$,_.~*\'()[]' \
582 or (ch == u'!' and handle != u'!'):
583 end += 1
584 else:
585 if start < end:
586 chunks.append(suffix[start:end])
587 start = end = end+1
588 data = ch.encode('utf-8')
589 for ch in data:
590 chunks.append(u'%%%02X' % ord(ch))
591 if start < end:
592 chunks.append(suffix[start:end])
593 suffix_text = u''.join(chunks)
594 if handle:
595 return u'%s%s' % (handle, suffix_text)
596 else:
597 return u'!<%s>' % suffix_text
599 def prepare_anchor(self, anchor):
600 if not anchor:
601 raise EmitterError("anchor must not be empty")
602 for ch in anchor:
603 if not (u'0' <= ch <= u'9' or u'A' <= ch <= 'Z' or u'a' <= ch <= 'z' \
604 or ch in u'-_'):
605 raise EmitterError("invalid character %r in the anchor: %r"
606 % (ch.encode('utf-8'), text.encode('utf-8')))
607 return anchor
609 def analyze_scalar(self, scalar):
611 # Empty scalar is a special case.
612 if not scalar:
613 return ScalarAnalysis(scalar=scalar, empty=True, multiline=False,
614 allow_flow_plain=False, allow_block_plain=True,
615 allow_single_quoted=True, allow_double_quoted=True,
616 allow_block=False)
618 # Indicators and special characters.
619 block_indicators = False
620 flow_indicators = False
621 line_breaks = False
622 special_characters = False
624 # Whitespaces.
625 inline_spaces = False # non-space space+ non-space
626 inline_breaks = False # non-space break+ non-space
627 leading_spaces = False # ^ space+ (non-space | $)
628 leading_breaks = False # ^ break+ (non-space | $)
629 trailing_spaces = False # (^ | non-space) space+ $
630 trailing_breaks = False # (^ | non-space) break+ $
631 inline_breaks_spaces = False # non-space break+ space+ non-space
632 mixed_breaks_spaces = False # anything else
634 # Check document indicators.
635 if scalar.startswith(u'---') or scalar.startswith(u'...'):
636 block_indicators = True
637 flow_indicators = True
639 # First character or preceded by a whitespace.
640 preceeded_by_space = True
642 # Last character or followed by a whitespace.
643 followed_by_space = (len(scalar) == 1 or
644 scalar[1] in u'\0 \t\r\n\x85\u2028\u2029')
646 # The current series of whitespaces contain plain spaces.
647 spaces = False
649 # The current series of whitespaces contain line breaks.
650 breaks = False
652 # The current series of whitespaces contain a space followed by a
653 # break.
654 mixed = False
656 # The current series of whitespaces start at the beginning of the
657 # scalar.
658 leading = False
660 index = 0
661 while index < len(scalar):
662 ch = scalar[index]
664 # Check for indicators.
666 if index == 0:
667 # Leading indicators are special characters.
668 if ch in u'#,[]{}#&*!|>\'\"%@`':
669 flow_indicators = True
670 block_indicators = True
671 if ch in u'?:':
672 flow_indicators = True
673 if followed_by_space:
674 block_indicators = True
675 if ch == u'-' and followed_by_space:
676 flow_indicators = True
677 block_indicators = True
678 else:
679 # Some indicators cannot appear within a scalar as well.
680 if ch in u',?[]{}':
681 flow_indicators = True
682 if ch == u':':
683 flow_indicators = True
684 if followed_by_space:
685 block_indicators = True
686 if ch == u'#' and preceeded_by_space:
687 flow_indicators = True
688 block_indicators = True
690 # Check for line breaks, special, and unicode characters.
692 if ch in u'\n\x85\u2028\u2029':
693 line_breaks = True
694 if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'):
695 if ch < u'\x80' or ch == u'\uFEFF': # '\uFEFF' is BOM.
696 special_characters = True
697 else:
698 unicode_characters = True
699 if not self.allow_unicode:
700 special_characters = True
702 # Spaces, line breaks, and how they are mixed. State machine.
704 # Start or continue series of whitespaces.
705 if ch in u' \n\x85\u2028\u2029':
706 if spaces and breaks:
707 if ch != u' ': # break+ (space+ break+) => mixed
708 mixed = True
709 elif spaces:
710 if ch != u' ': # (space+ break+) => mixed
711 breaks = True
712 mixed = True
713 elif breaks:
714 if ch == u' ': # break+ space+
715 spaces = True
716 else:
717 leading = (index == 0)
718 if ch == u' ': # space+
719 spaces = True
720 else: # break+
721 breaks = True
723 # Series of whitespaces ended with a non-space.
724 elif spaces or breaks:
725 if leading:
726 if spaces and breaks:
727 mixed_breaks_spaces = True
728 elif spaces:
729 leading_spaces = True
730 elif breaks:
731 leading_breaks = True
732 else:
733 if mixed:
734 mixed_breaks_spaces = True
735 elif spaces and breaks:
736 inline_breaks_spaces = True
737 elif spaces:
738 inline_spaces = True
739 elif breaks:
740 inline_breaks = True
741 spaces = breaks = mixed = leading = False
743 # Series of whitespaces reach the end.
744 if (spaces or breaks) and (index == len(scalar)-1):
745 if spaces and breaks:
746 mixed_breaks_spaces = True
747 elif spaces:
748 trailing_spaces = True
749 if leading:
750 leading_spaces = True
751 elif breaks:
752 trailing_breaks = True
753 if leading:
754 leading_breaks = True
755 spaces = breaks = mixed = leading = False
757 # Prepare for the next character.
758 index += 1
759 preceeded_by_space = (ch in u'\0 \t\r\n\x85\u2028\u2029')
760 followed_by_space = (index+1 >= len(scalar) or
761 scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029')
763 # Let's decide what styles are allowed.
764 allow_flow_plain = True
765 allow_block_plain = True
766 allow_single_quoted = True
767 allow_double_quoted = True
768 allow_block = True
770 # Leading and trailing whitespace are bad for plain scalars. We also
771 # do not want to mess with leading whitespaces for block scalars.
772 if leading_spaces or leading_breaks or trailing_spaces:
773 allow_flow_plain = allow_block_plain = allow_block = False
775 # Trailing breaks are fine for block scalars, but unacceptable for
776 # plain scalars.
777 if trailing_breaks:
778 allow_flow_plain = allow_block_plain = False
780 # The combination of (space+ break+) is only acceptable for block
781 # scalars.
782 if inline_breaks_spaces:
783 allow_flow_plain = allow_block_plain = allow_single_quoted = False
785 # Mixed spaces and breaks, as well as special character are only
786 # allowed for double quoted scalars.
787 if mixed_breaks_spaces or special_characters:
788 allow_flow_plain = allow_block_plain = \
789 allow_single_quoted = allow_block = False
791 # We don't emit multiline plain scalars.
792 if line_breaks:
793 allow_flow_plain = allow_block_plain = False
795 # Flow indicators are forbidden for flow plain scalars.
796 if flow_indicators:
797 allow_flow_plain = False
799 # Block indicators are forbidden for block plain scalars.
800 if block_indicators:
801 allow_block_plain = False
803 return ScalarAnalysis(scalar=scalar,
804 empty=False, multiline=line_breaks,
805 allow_flow_plain=allow_flow_plain,
806 allow_block_plain=allow_block_plain,
807 allow_single_quoted=allow_single_quoted,
808 allow_double_quoted=allow_double_quoted,
809 allow_block=allow_block)
811 # Writers.
813 def flush_stream(self):
814 if hasattr(self.stream, 'flush'):
815 self.stream.flush()
817 def write_stream_start(self):
818 # Write BOM if needed.
819 if self.encoding and self.encoding.startswith('utf-16'):
820 self.stream.write(u'\xFF\xFE'.encode(self.encoding))
822 def write_stream_end(self):
823 self.flush_stream()
825 def write_indicator(self, indicator, need_whitespace,
826 whitespace=False, indention=False):
827 if self.whitespace or not need_whitespace:
828 data = indicator
829 else:
830 data = u' '+indicator
831 self.whitespace = whitespace
832 self.indention = self.indention and indention
833 self.column += len(data)
834 if self.encoding:
835 data = data.encode(self.encoding)
836 self.stream.write(data)
838 def write_indent(self):
839 indent = self.indent or 0
840 if not self.indention or self.column > indent \
841 or (self.column == indent and not self.whitespace):
842 self.write_line_break()
843 if self.column < indent:
844 self.whitespace = True
845 data = u' '*(indent-self.column)
846 self.column = indent
847 if self.encoding:
848 data = data.encode(self.encoding)
849 self.stream.write(data)
851 def write_line_break(self, data=None):
852 if data is None:
853 data = self.best_line_break
854 self.whitespace = True
855 self.indention = True
856 self.line += 1
857 self.column = 0
858 if self.encoding:
859 data = data.encode(self.encoding)
860 self.stream.write(data)
862 def write_version_directive(self, version_text):
863 data = u'%%YAML %s' % version_text
864 if self.encoding:
865 data = data.encode(self.encoding)
866 self.stream.write(data)
867 self.write_line_break()
869 def write_tag_directive(self, handle_text, prefix_text):
870 data = u'%%TAG %s %s' % (handle_text, prefix_text)
871 if self.encoding:
872 data = data.encode(self.encoding)
873 self.stream.write(data)
874 self.write_line_break()
876 # Scalar streams.
878 def write_single_quoted(self, text, split=True):
879 self.write_indicator(u'\'', True)
880 spaces = False
881 breaks = False
882 start = end = 0
883 while end <= len(text):
884 ch = None
885 if end < len(text):
886 ch = text[end]
887 if spaces:
888 if ch is None or ch != u' ':
889 if start+1 == end and self.column > self.best_width and split \
890 and start != 0 and end != len(text):
891 self.write_indent()
892 else:
893 data = text[start:end]
894 self.column += len(data)
895 if self.encoding:
896 data = data.encode(self.encoding)
897 self.stream.write(data)
898 start = end
899 elif breaks:
900 if ch is None or ch not in u'\n\x85\u2028\u2029':
901 if text[start] == u'\n':
902 self.write_line_break()
903 for br in text[start:end]:
904 if br == u'\n':
905 self.write_line_break()
906 else:
907 self.write_line_break(br)
908 self.write_indent()
909 start = end
910 else:
911 if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'':
912 if start < end:
913 data = text[start:end]
914 self.column += len(data)
915 if self.encoding:
916 data = data.encode(self.encoding)
917 self.stream.write(data)
918 start = end
919 if ch == u'\'':
920 data = u'\'\''
921 self.column += 2
922 if self.encoding:
923 data = data.encode(self.encoding)
924 self.stream.write(data)
925 start = end + 1
926 if ch is not None:
927 spaces = (ch == u' ')
928 breaks = (ch in u'\n\x85\u2028\u2029')
929 end += 1
930 self.write_indicator(u'\'', False)
932 ESCAPE_REPLACEMENTS = {
933 u'\0': u'0',
934 u'\x07': u'a',
935 u'\x08': u'b',
936 u'\x09': u't',
937 u'\x0A': u'n',
938 u'\x0B': u'v',
939 u'\x0C': u'f',
940 u'\x0D': u'r',
941 u'\x1B': u'e',
942 u'\"': u'\"',
943 u'\\': u'\\',
944 u'\x85': u'N',
945 u'\xA0': u'_',
946 u'\u2028': u'L',
947 u'\u2029': u'P',
950 def write_double_quoted(self, text, split=True):
951 self.write_indicator(u'"', True)
952 start = end = 0
953 while end <= len(text):
954 ch = None
955 if end < len(text):
956 ch = text[end]
957 if ch is None or ch in u'"\\' \
958 or not (u'\x20' <= ch <= u'\x7E'
959 or (self.allow_unicode and ch > u'\x7F'
960 and ch not in u'\x85\u2028\u2029')):
961 if start < end:
962 data = text[start:end]
963 self.column += len(data)
964 if self.encoding:
965 data = data.encode(self.encoding)
966 self.stream.write(data)
967 start = end
968 if ch is not None:
969 if ch in self.ESCAPE_REPLACEMENTS:
970 data = u'\\'+self.ESCAPE_REPLACEMENTS[ch]
971 elif ch <= u'\xFF':
972 data = u'\\x%02X' % ord(ch)
973 elif ch <= u'\uFFFF':
974 data = u'\\u%04X' % ord(ch)
975 else:
976 data = u'\\U%08X' % ord(ch)
977 self.column += len(data)
978 if self.encoding:
979 data = data.encode(self.encoding)
980 self.stream.write(data)
981 start = end+1
982 if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \
983 and self.column+(end-start) > self.best_width and split:
984 data = text[start:end]+u'\\'
985 if start < end:
986 start = end
987 self.column += len(data)
988 if self.encoding:
989 data = data.encode(self.encoding)
990 self.stream.write(data)
991 self.write_indent()
992 self.whitespace = False
993 self.indention = False
994 if ch == u' ':
995 data = u'\\'
996 self.column += len(data)
997 if self.encoding:
998 data = data.encode(self.encoding)
999 self.stream.write(data)
1000 end += 1
1001 self.write_indicator(u'"', False)
1003 def determine_chomp(self, text):
1004 tail = text[-2:]
1005 while len(tail) < 2:
1006 tail = u' '+tail
1007 if tail[-1] in u'\n\x85\u2028\u2029':
1008 if tail[-2] in u'\n\x85\u2028\u2029':
1009 return u'+'
1010 else:
1011 return u''
1012 else:
1013 return u'-'
1015 def write_folded(self, text):
1016 chomp = self.determine_chomp(text)
1017 self.write_indicator(u'>'+chomp, True)
1018 self.write_indent()
1019 leading_space = False
1020 spaces = False
1021 breaks = False
1022 start = end = 0
1023 while end <= len(text):
1024 ch = None
1025 if end < len(text):
1026 ch = text[end]
1027 if breaks:
1028 if ch is None or ch not in u'\n\x85\u2028\u2029':
1029 if not leading_space and ch is not None and ch != u' ' \
1030 and text[start] == u'\n':
1031 self.write_line_break()
1032 leading_space = (ch == u' ')
1033 for br in text[start:end]:
1034 if br == u'\n':
1035 self.write_line_break()
1036 else:
1037 self.write_line_break(br)
1038 if ch is not None:
1039 self.write_indent()
1040 start = end
1041 elif spaces:
1042 if ch != u' ':
1043 if start+1 == end and self.column > self.best_width:
1044 self.write_indent()
1045 else:
1046 data = text[start:end]
1047 self.column += len(data)
1048 if self.encoding:
1049 data = data.encode(self.encoding)
1050 self.stream.write(data)
1051 start = end
1052 else:
1053 if ch is None or ch in u' \n\x85\u2028\u2029':
1054 data = text[start:end]
1055 if self.encoding:
1056 data = data.encode(self.encoding)
1057 self.stream.write(data)
1058 if ch is None:
1059 self.write_line_break()
1060 start = end
1061 if ch is not None:
1062 breaks = (ch in u'\n\x85\u2028\u2029')
1063 spaces = (ch == u' ')
1064 end += 1
1066 def write_literal(self, text):
1067 chomp = self.determine_chomp(text)
1068 self.write_indicator(u'|'+chomp, True)
1069 self.write_indent()
1070 breaks = False
1071 start = end = 0
1072 while end <= len(text):
1073 ch = None
1074 if end < len(text):
1075 ch = text[end]
1076 if breaks:
1077 if ch is None or ch not in u'\n\x85\u2028\u2029':
1078 for br in text[start:end]:
1079 if br == u'\n':
1080 self.write_line_break()
1081 else:
1082 self.write_line_break(br)
1083 if ch is not None:
1084 self.write_indent()
1085 start = end
1086 else:
1087 if ch is None or ch in u'\n\x85\u2028\u2029':
1088 data = text[start:end]
1089 if self.encoding:
1090 data = data.encode(self.encoding)
1091 self.stream.write(data)
1092 if ch is None:
1093 self.write_line_break()
1094 start = end
1095 if ch is not None:
1096 breaks = (ch in u'\n\x85\u2028\u2029')
1097 end += 1
1099 def write_plain(self, text, split=True):
1100 if not text:
1101 return
1102 if not self.whitespace:
1103 data = u' '
1104 self.column += len(data)
1105 if self.encoding:
1106 data = data.encode(self.encoding)
1107 self.stream.write(data)
1108 self.writespace = False
1109 self.indention = False
1110 spaces = False
1111 breaks = False
1112 start = end = 0
1113 while end <= len(text):
1114 ch = None
1115 if end < len(text):
1116 ch = text[end]
1117 if spaces:
1118 if ch != u' ':
1119 if start+1 == end and self.column > self.best_width and split:
1120 self.write_indent()
1121 self.writespace = False
1122 self.indention = False
1123 else:
1124 data = text[start:end]
1125 self.column += len(data)
1126 if self.encoding:
1127 data = data.encode(self.encoding)
1128 self.stream.write(data)
1129 start = end
1130 elif breaks:
1131 if ch not in u'\n\x85\u2028\u2029':
1132 if text[start] == u'\n':
1133 self.write_line_break()
1134 for br in text[start:end]:
1135 if br == u'\n':
1136 self.write_line_break()
1137 else:
1138 self.write_line_break(br)
1139 self.write_indent()
1140 self.whitespace = False
1141 self.indention = False
1142 start = end
1143 else:
1144 if ch is None or ch in u' \n\x85\u2028\u2029':
1145 data = text[start:end]
1146 self.column += len(data)
1147 if self.encoding:
1148 data = data.encode(self.encoding)
1149 self.stream.write(data)
1150 start = end
1151 if ch is not None:
1152 spaces = (ch == u' ')
1153 breaks = (ch in u'\n\x85\u2028\u2029')
1154 end += 1