2 # The following YAML grammar is LL(1) and is parsed by a recursive descent
5 # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
6 # implicit_document ::= block_node DOCUMENT-END*
7 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
8 # block_node_or_indentless_sequence ::=
10 # | properties (block_content | indentless_block_sequence)?
12 # | indentless_block_sequence
13 # block_node ::= ALIAS
14 # | properties block_content?
17 # | properties flow_content?
19 # properties ::= TAG ANCHOR? | ANCHOR TAG?
20 # block_content ::= block_collection | flow_collection | SCALAR
21 # flow_content ::= flow_collection | SCALAR
22 # block_collection ::= block_sequence | block_mapping
23 # flow_collection ::= flow_sequence | flow_mapping
24 # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
25 # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
26 # block_mapping ::= BLOCK-MAPPING_START
27 # ((KEY block_node_or_indentless_sequence?)?
28 # (VALUE block_node_or_indentless_sequence?)?)*
30 # flow_sequence ::= FLOW-SEQUENCE-START
31 # (flow_sequence_entry FLOW-ENTRY)*
32 # flow_sequence_entry?
34 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
35 # flow_mapping ::= FLOW-MAPPING-START
36 # (flow_mapping_entry FLOW-ENTRY)*
39 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
43 # stream: { STREAM-START }
44 # explicit_document: { DIRECTIVE DOCUMENT-START }
45 # implicit_document: FIRST(block_node)
46 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
47 # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
48 # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
49 # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
50 # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
51 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
52 # block_sequence: { BLOCK-SEQUENCE-START }
53 # block_mapping: { BLOCK-MAPPING-START }
54 # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
55 # indentless_sequence: { ENTRY }
56 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
57 # flow_sequence: { FLOW-SEQUENCE-START }
58 # flow_mapping: { FLOW-MAPPING-START }
59 # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
60 # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
62 __all__
= ['Parser', 'ParserError']
64 from error
import MarkedYAMLError
69 class ParserError(MarkedYAMLError
):
73 # Since writing a recursive-descendant parser is a straightforward task, we
74 # do not give many comments here.
75 # Note that we use Python generators. If you rewrite the parser in another
76 # language, you may replace all 'yield'-s with event handler calls.
80 u
'!!': u
'tag:yaml.org,2002:',
84 self
.current_event
= None
85 self
.yaml_version
= None
89 self
.state
= self
.parse_stream_start
91 def check_event(self
, *choices
):
92 # Check the type of the next event.
93 if self
.current_event
is None:
95 self
.current_event
= self
.state()
96 if self
.current_event
is not None:
99 for choice
in choices
:
100 if isinstance(self
.current_event
, choice
):
104 def peek_event(self
):
105 # Get the next event.
106 if self
.current_event
is None:
108 self
.current_event
= self
.state()
109 return self
.current_event
112 # Get the next event and proceed further.
113 if self
.current_event
is None:
115 self
.current_event
= self
.state()
116 value
= self
.current_event
117 self
.current_event
= None
120 # stream ::= STREAM-START implicit_document? explicit_document* STREAM-END
121 # implicit_document ::= block_node DOCUMENT-END*
122 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END*
124 def parse_stream_start(self
):
126 # Parse the stream start.
127 token
= self
.get_token()
128 event
= StreamStartEvent(token
.start_mark
, token
.end_mark
,
129 encoding
=token
.encoding
)
131 # Prepare the next state.
132 self
.state
= self
.parse_implicit_document_start
136 def parse_implicit_document_start(self
):
138 # Parse an implicit document.
139 if not self
.check_token(DirectiveToken
, DocumentStartToken
,
141 self
.tag_handles
= self
.DEFAULT_TAGS
142 token
= self
.peek_token()
143 start_mark
= end_mark
= token
.start_mark
144 event
= DocumentStartEvent(start_mark
, end_mark
,
147 # Prepare the next state.
148 self
.states
.append(self
.parse_document_end
)
149 self
.state
= self
.parse_block_node
154 return self
.parse_document_start()
156 def parse_document_start(self
):
158 # Parse any extra document end indicators.
159 while self
.check_token(DocumentEndToken
):
162 # Parse an explicit document.
163 if not self
.check_token(StreamEndToken
):
164 token
= self
.peek_token()
165 start_mark
= token
.start_mark
166 version
, tags
= self
.process_directives()
167 if not self
.check_token(DocumentStartToken
):
168 raise ParserError(None, None,
169 "expected '<document start>', but found %r"
170 % self
.peek_token().id,
171 self
.peek_token().start_mark
)
172 token
= self
.get_token()
173 end_mark
= token
.end_mark
174 event
= DocumentStartEvent(start_mark
, end_mark
,
175 explicit
=True, version
=version
, tags
=tags
)
176 self
.states
.append(self
.parse_document_end
)
177 self
.state
= self
.parse_document_content
179 # Parse the end of the stream.
180 token
= self
.get_token()
181 event
= StreamEndEvent(token
.start_mark
, token
.end_mark
)
182 assert not self
.states
183 assert not self
.marks
187 def parse_document_end(self
):
189 # Parse the document end.
190 token
= self
.peek_token()
191 start_mark
= end_mark
= token
.start_mark
193 if self
.check_token(DocumentEndToken
):
194 token
= self
.get_token()
195 end_mark
= token
.end_mark
197 event
= DocumentEndEvent(start_mark
, end_mark
,
200 # Prepare the next state.
201 self
.state
= self
.parse_document_start
205 def parse_document_content(self
):
206 if self
.check_token(DirectiveToken
,
207 DocumentStartToken
, DocumentEndToken
, StreamEndToken
):
208 event
= self
.process_empty_scalar(self
.peek_token().start_mark
)
209 self
.state
= self
.states
.pop()
212 return self
.parse_block_node()
214 def process_directives(self
):
215 self
.yaml_version
= None
216 self
.tag_handles
= {}
217 while self
.check_token(DirectiveToken
):
218 token
= self
.get_token()
219 if token
.name
== u
'YAML':
220 if self
.yaml_version
is not None:
221 raise ParserError(None, None,
222 "found duplicate YAML directive", token
.start_mark
)
223 major
, minor
= token
.value
225 raise ParserError(None, None,
226 "found incompatible YAML document (version 1.* is required)",
228 self
.yaml_version
= token
.value
229 elif token
.name
== u
'TAG':
230 handle
, prefix
= token
.value
231 if handle
in self
.tag_handles
:
232 raise ParserError(None, None,
233 "duplicate tag handle %r" % handle
.encode('utf-8'),
235 self
.tag_handles
[handle
] = prefix
237 value
= self
.yaml_version
, self
.tag_handles
.copy()
239 value
= self
.yaml_version
, None
240 for key
in self
.DEFAULT_TAGS
:
241 if key
not in self
.tag_handles
:
242 self
.tag_handles
[key
] = self
.DEFAULT_TAGS
[key
]
245 # block_node_or_indentless_sequence ::= ALIAS
246 # | properties (block_content | indentless_block_sequence)?
248 # | indentless_block_sequence
249 # block_node ::= ALIAS
250 # | properties block_content?
252 # flow_node ::= ALIAS
253 # | properties flow_content?
255 # properties ::= TAG ANCHOR? | ANCHOR TAG?
256 # block_content ::= block_collection | flow_collection | SCALAR
257 # flow_content ::= flow_collection | SCALAR
258 # block_collection ::= block_sequence | block_mapping
259 # flow_collection ::= flow_sequence | flow_mapping
261 def parse_block_node(self
):
262 return self
.parse_node(block
=True)
264 def parse_flow_node(self
):
265 return self
.parse_node()
267 def parse_block_node_or_indentless_sequence(self
):
268 return self
.parse_node(block
=True, indentless_sequence
=True)
270 def parse_node(self
, block
=False, indentless_sequence
=False):
271 if self
.check_token(AliasToken
):
272 token
= self
.get_token()
273 event
= AliasEvent(token
.value
, token
.start_mark
, token
.end_mark
)
274 self
.state
= self
.states
.pop()
278 start_mark
= end_mark
= tag_mark
= None
279 if self
.check_token(AnchorToken
):
280 token
= self
.get_token()
281 start_mark
= token
.start_mark
282 end_mark
= token
.end_mark
284 if self
.check_token(TagToken
):
285 token
= self
.get_token()
286 tag_mark
= token
.start_mark
287 end_mark
= token
.end_mark
289 elif self
.check_token(TagToken
):
290 token
= self
.get_token()
291 start_mark
= tag_mark
= token
.start_mark
292 end_mark
= token
.end_mark
294 if self
.check_token(AnchorToken
):
295 token
= self
.get_token()
296 end_mark
= token
.end_mark
300 if handle
is not None:
301 if handle
not in self
.tag_handles
:
302 raise ParserError("while parsing a node", start_mark
,
303 "found undefined tag handle %r" % handle
.encode('utf-8'),
305 tag
= self
.tag_handles
[handle
]+suffix
309 # raise ParserError("while parsing a node", start_mark,
310 # "found non-specific tag '!'", tag_mark,
311 # "Please check 'http://pyyaml.org/wiki/YAMLNonSpecificTag' and share your opinion.")
312 if start_mark
is None:
313 start_mark
= end_mark
= self
.peek_token().start_mark
315 implicit
= (tag
is None or tag
== u
'!')
316 if indentless_sequence
and self
.check_token(BlockEntryToken
):
317 end_mark
= self
.peek_token().end_mark
318 event
= SequenceStartEvent(anchor
, tag
, implicit
,
319 start_mark
, end_mark
)
320 self
.state
= self
.parse_indentless_sequence_entry
322 if self
.check_token(ScalarToken
):
323 token
= self
.get_token()
324 end_mark
= token
.end_mark
325 if (token
.plain
and tag
is None) or tag
== u
'!':
326 implicit
= (True, False)
328 implicit
= (False, True)
330 implicit
= (False, False)
331 event
= ScalarEvent(anchor
, tag
, implicit
, token
.value
,
332 start_mark
, end_mark
, style
=token
.style
)
333 self
.state
= self
.states
.pop()
334 elif self
.check_token(FlowSequenceStartToken
):
335 end_mark
= self
.peek_token().end_mark
336 event
= SequenceStartEvent(anchor
, tag
, implicit
,
337 start_mark
, end_mark
, flow_style
=True)
338 self
.state
= self
.parse_flow_sequence_first_entry
339 elif self
.check_token(FlowMappingStartToken
):
340 end_mark
= self
.peek_token().end_mark
341 event
= MappingStartEvent(anchor
, tag
, implicit
,
342 start_mark
, end_mark
, flow_style
=True)
343 self
.state
= self
.parse_flow_mapping_first_key
344 elif block
and self
.check_token(BlockSequenceStartToken
):
345 end_mark
= self
.peek_token().start_mark
346 event
= SequenceStartEvent(anchor
, tag
, implicit
,
347 start_mark
, end_mark
, flow_style
=False)
348 self
.state
= self
.parse_block_sequence_first_entry
349 elif block
and self
.check_token(BlockMappingStartToken
):
350 end_mark
= self
.peek_token().start_mark
351 event
= MappingStartEvent(anchor
, tag
, implicit
,
352 start_mark
, end_mark
, flow_style
=False)
353 self
.state
= self
.parse_block_mapping_first_key
354 elif anchor
is not None or tag
is not None:
355 # Empty scalars are allowed even if a tag or an anchor is
357 event
= ScalarEvent(anchor
, tag
, (implicit
, False), u
'',
358 start_mark
, end_mark
)
359 self
.state
= self
.states
.pop()
365 token
= self
.peek_token()
366 raise ParserError("while parsing a %s node" % node
, start_mark
,
367 "expected the node content, but found %r" % token
.id,
371 # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
373 def parse_block_sequence_first_entry(self
):
374 token
= self
.get_token()
375 self
.marks
.append(token
.start_mark
)
376 return self
.parse_block_sequence_entry()
378 def parse_block_sequence_entry(self
):
379 if self
.check_token(BlockEntryToken
):
380 token
= self
.get_token()
381 if not self
.check_token(BlockEntryToken
, BlockEndToken
):
382 self
.states
.append(self
.parse_block_sequence_entry
)
383 return self
.parse_block_node()
385 self
.state
= self
.parse_block_sequence_entry
386 return self
.process_empty_scalar(token
.end_mark
)
387 if not self
.check_token(BlockEndToken
):
388 token
= self
.peek_token()
389 raise ParserError("while parsing a block collection", self
.marks
[-1],
390 "expected <block end>, but found %r" % token
.id, token
.start_mark
)
391 token
= self
.get_token()
392 event
= SequenceEndEvent(token
.start_mark
, token
.end_mark
)
393 self
.state
= self
.states
.pop()
397 # indentless_sequence ::= (BLOCK-ENTRY block_node?)+
399 def parse_indentless_sequence_entry(self
):
400 if self
.check_token(BlockEntryToken
):
401 token
= self
.get_token()
402 if not self
.check_token(BlockEntryToken
,
403 KeyToken
, ValueToken
, BlockEndToken
):
404 self
.states
.append(self
.parse_indentless_sequence_entry
)
405 return self
.parse_block_node()
407 self
.state
= self
.parse_indentless_sequence_entry
408 return self
.process_empty_scalar(token
.end_mark
)
409 token
= self
.peek_token()
410 event
= SequenceEndEvent(token
.start_mark
, token
.start_mark
)
411 self
.state
= self
.states
.pop()
414 # block_mapping ::= BLOCK-MAPPING_START
415 # ((KEY block_node_or_indentless_sequence?)?
416 # (VALUE block_node_or_indentless_sequence?)?)*
419 def parse_block_mapping_first_key(self
):
420 token
= self
.get_token()
421 self
.marks
.append(token
.start_mark
)
422 return self
.parse_block_mapping_key()
424 def parse_block_mapping_key(self
):
425 if self
.check_token(KeyToken
):
426 token
= self
.get_token()
427 if not self
.check_token(KeyToken
, ValueToken
, BlockEndToken
):
428 self
.states
.append(self
.parse_block_mapping_value
)
429 return self
.parse_block_node_or_indentless_sequence()
431 self
.state
= self
.parse_block_mapping_value
432 return self
.process_empty_scalar(token
.end_mark
)
433 if not self
.check_token(BlockEndToken
):
434 token
= self
.peek_token()
435 raise ParserError("while parsing a block mapping", self
.marks
[-1],
436 "expected <block end>, but found %r" % token
.id, token
.start_mark
)
437 token
= self
.get_token()
438 event
= MappingEndEvent(token
.start_mark
, token
.end_mark
)
439 self
.state
= self
.states
.pop()
443 def parse_block_mapping_value(self
):
444 if self
.check_token(ValueToken
):
445 token
= self
.get_token()
446 if not self
.check_token(KeyToken
, ValueToken
, BlockEndToken
):
447 self
.states
.append(self
.parse_block_mapping_key
)
448 return self
.parse_block_node_or_indentless_sequence()
450 self
.state
= self
.parse_block_mapping_key
451 return self
.process_empty_scalar(token
.end_mark
)
453 self
.state
= self
.parse_block_mapping_key
454 token
= self
.peek_token()
455 return self
.process_empty_scalar(token
.start_mark
)
457 # flow_sequence ::= FLOW-SEQUENCE-START
458 # (flow_sequence_entry FLOW-ENTRY)*
459 # flow_sequence_entry?
461 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
463 # Note that while production rules for both flow_sequence_entry and
464 # flow_mapping_entry are equal, their interpretations are different.
465 # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
466 # generate an inline mapping (set syntax).
468 def parse_flow_sequence_first_entry(self
):
469 token
= self
.get_token()
470 self
.marks
.append(token
.start_mark
)
471 return self
.parse_flow_sequence_entry(first
=True)
473 def parse_flow_sequence_entry(self
, first
=False):
474 if not self
.check_token(FlowSequenceEndToken
):
476 if self
.check_token(FlowEntryToken
):
479 token
= self
.peek_token()
480 raise ParserError("while parsing a flow sequence", self
.marks
[-1],
481 "expected ',' or ']', but got %r" % token
.id, token
.start_mark
)
483 if self
.check_token(KeyToken
):
484 token
= self
.peek_token()
485 event
= MappingStartEvent(None, None, True,
486 token
.start_mark
, token
.end_mark
,
488 self
.state
= self
.parse_flow_sequence_entry_mapping_key
490 elif not self
.check_token(FlowSequenceEndToken
):
491 self
.states
.append(self
.parse_flow_sequence_entry
)
492 return self
.parse_flow_node()
493 token
= self
.get_token()
494 event
= SequenceEndEvent(token
.start_mark
, token
.end_mark
)
495 self
.state
= self
.states
.pop()
499 def parse_flow_sequence_entry_mapping_key(self
):
500 token
= self
.get_token()
501 if not self
.check_token(ValueToken
,
502 FlowEntryToken
, FlowSequenceEndToken
):
503 self
.states
.append(self
.parse_flow_sequence_entry_mapping_value
)
504 return self
.parse_flow_node()
506 self
.state
= self
.parse_flow_sequence_entry_mapping_value
507 return self
.process_empty_scalar(token
.end_mark
)
509 def parse_flow_sequence_entry_mapping_value(self
):
510 if self
.check_token(ValueToken
):
511 token
= self
.get_token()
512 if not self
.check_token(FlowEntryToken
, FlowSequenceEndToken
):
513 self
.states
.append(self
.parse_flow_sequence_entry_mapping_end
)
514 return self
.parse_flow_node()
516 self
.state
= self
.parse_flow_sequence_entry_mapping_end
517 return self
.process_empty_scalar(token
.end_mark
)
519 self
.state
= self
.parse_flow_sequence_entry_mapping_end
520 token
= self
.peek_token()
521 return self
.process_empty_scalar(token
.start_mark
)
523 def parse_flow_sequence_entry_mapping_end(self
):
524 self
.state
= self
.parse_flow_sequence_entry
525 token
= self
.peek_token()
526 return MappingEndEvent(token
.start_mark
, token
.start_mark
)
528 # flow_mapping ::= FLOW-MAPPING-START
529 # (flow_mapping_entry FLOW-ENTRY)*
530 # flow_mapping_entry?
532 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
534 def parse_flow_mapping_first_key(self
):
535 token
= self
.get_token()
536 self
.marks
.append(token
.start_mark
)
537 return self
.parse_flow_mapping_key(first
=True)
539 def parse_flow_mapping_key(self
, first
=False):
540 if not self
.check_token(FlowMappingEndToken
):
542 if self
.check_token(FlowEntryToken
):
545 token
= self
.peek_token()
546 raise ParserError("while parsing a flow mapping", self
.marks
[-1],
547 "expected ',' or '}', but got %r" % token
.id, token
.start_mark
)
548 if self
.check_token(KeyToken
):
549 token
= self
.get_token()
550 if not self
.check_token(ValueToken
,
551 FlowEntryToken
, FlowMappingEndToken
):
552 self
.states
.append(self
.parse_flow_mapping_value
)
553 return self
.parse_flow_node()
555 self
.state
= self
.parse_flow_mapping_value
556 return self
.process_empty_scalar(token
.end_mark
)
557 elif not self
.check_token(FlowMappingEndToken
):
558 self
.states
.append(self
.parse_flow_mapping_empty_value
)
559 return self
.parse_flow_node()
560 token
= self
.get_token()
561 event
= MappingEndEvent(token
.start_mark
, token
.end_mark
)
562 self
.state
= self
.states
.pop()
566 def parse_flow_mapping_value(self
):
567 if self
.check_token(ValueToken
):
568 token
= self
.get_token()
569 if not self
.check_token(FlowEntryToken
, FlowMappingEndToken
):
570 self
.states
.append(self
.parse_flow_mapping_key
)
571 return self
.parse_flow_node()
573 self
.state
= self
.parse_flow_mapping_key
574 return self
.process_empty_scalar(token
.end_mark
)
576 self
.state
= self
.parse_flow_mapping_key
577 token
= self
.peek_token()
578 return self
.process_empty_scalar(token
.start_mark
)
580 def parse_flow_mapping_empty_value(self
):
581 self
.state
= self
.parse_flow_mapping_key
582 return self
.process_empty_scalar(self
.peek_token().start_mark
)
584 def process_empty_scalar(self
, mark
):
585 return ScalarEvent(None, None, (True, False), u
'', mark
, mark
)