2 # YAML can be parsed by an LL(1) parser!
4 # We use the following production rules:
5 # stream ::= implicit_document? explicit_document* STREAM-END
6 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END?
7 # implicit_document ::= block_node DOCUMENT-END?
8 # block_node ::= ALIAS | properties? block_content
9 # flow_node ::= ALIAS | properties? flow_content
10 # properties ::= TAG ANCHOR? | ANCHOR TAG?
11 # block_content ::= block_collection | flow_collection | SCALAR
12 # flow_content ::= flow_collection | SCALAR
13 # block_collection ::= block_sequence | block_mapping
14 # block_sequence ::= BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
15 # block_mapping ::= BLOCK-MAPPING_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK-END
16 # block_node_or_indentless_sequence ::= ALIAS | properties? (block_content | indentless_block_sequence)
17 # indentless_block_sequence ::= (BLOCK-ENTRY block_node?)+
18 # flow_collection ::= flow_sequence | flow_mapping
19 # flow_sequence ::= FLOW-SEQUENCE-START (flow_sequence_entry FLOW-ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END
20 # flow_mapping ::= FLOW-MAPPING-START (flow_mapping_entry FLOW-ENTRY)* flow_mapping_entry? FLOW-MAPPING-END
21 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
22 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
24 # Note that there is a slight deviation from the specification. We require a
25 # non-empty node content if ANCHOR or TAG is specified. This disallow such
28 # key: !!str # empty value
30 # This is done to prevent ambiguity in parsing tags and aliases:
32 # { !!perl/YAML::Parser: value }
34 # What is it? Should it be interpreted as
35 # { ? !<tag:yaml.org,2002:perl/YAML::Parser> '' : value }
37 # { ? !<tag:yaml.org,2002:perl/YAML::Parser:> value : '' }
38 # Since we disallow non-empty node content, tags are always followed by spaces
42 # stream: FIRST(block_node) + { DIRECTIVE DOCUMENT-START }
43 # explicit_document: { DIRECTIVE DOCUMENT-START }
44 # implicit_document: FIRST(block_node)
45 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
46 # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
47 # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
48 # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
49 # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
50 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
51 # block_sequence: { BLOCK-SEQUENCE-START }
52 # block_mapping: { BLOCK-MAPPING-START }
53 # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START BLOCK-ENTRY }
54 # indentless_sequence: { ENTRY }
55 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
56 # flow_sequence: { FLOW-SEQUENCE-START }
57 # flow_mapping: { FLOW-MAPPING-START }
58 # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
59 # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
61 from error
import YAMLError
65 class ParserError(YAMLError
):
67 def __init__(self
, context
=None, context_marker
=None,
68 problem
=None, problem_marker
=None):
69 self
.context
= context
70 self
.context_marker
= context_marker
71 self
.problem
= problem
72 self
.problem_marker
= problem_marker
76 for (place
, marker
) in [(self
.context
, self
.context_marker
),
77 (self
.problem
, self
.problem_marker
)]:
80 if marker
is not None:
81 lines
.append(str(marker
))
82 return '\n'.join(lines
)
85 # Since writing an LL(1) parser is a straightforward task, we do not give
87 # Note that we use Python generators. If you rewrite the parser to another
88 # language, you may replace all 'yield'-s with event handler calls.
92 u
'!!': u
'tag:yaml.org,2002:',
95 def __init__(self
, scanner
):
96 self
.scanner
= scanner
97 self
.current_event
= None
98 self
.yaml_version
= None
100 self
.event_generator
= self
.parse_stream()
102 def check(self
, *choices
):
103 # Check the type of the next event.
104 if self
.current_event
is None:
106 self
.current_event
= self
.event_generator
.next()
107 except StopIteration:
109 if self
.current_event
is not None:
110 for choice
in choices
:
111 if isinstance(self
.current_event
, choice
):
116 # Get the next event.
117 if self
.current_event
is None:
119 self
.current_event
= self
.event_generator
.next()
120 except StopIteration:
122 value
= self
.current_event
123 self
.current_event
= None
128 return self
.event_generator
130 def parse_stream(self
):
131 # implicit_document? explicit_document* STREAM-END
133 # Parse implicit document.
134 if not self
.scanner
.check(DirectiveToken
, DocumentStartToken
,
136 self
.tag_handles
= self
.DEFAULT_TAGS
137 for event
in self
.parse_block_node():
140 # Parse explicit documents.
141 while not self
.scanner
.check(StreamEndToken
):
142 self
.process_directives()
143 if not self
.scanner
.check(DocumentStartToken
):
144 raise ParserError(None, None,
145 "expected '<document start>', but found %r"
146 % self
.scanner
.peek().id,
147 self
.scanner
.peek().start_marker
)
148 token
= self
.scanner
.get()
149 if self
.scanner
.check(DirectiveToken
,
150 DocumentStartToken
, DocumentEndToken
, StreamEndToken
):
151 yield self
.process_empty_scalar(token
.end_marker
)
153 for event
in self
.parse_block_node():
155 while self
.scanner
.check(DocumentEndToken
):
158 # Parse end of stream.
159 token
= self
.scanner
.get()
160 yield StreamEndEvent(token
.start_marker
, token
.end_marker
)
162 def process_directives(self
):
164 self
.yaml_version
= None
165 self
.tag_handles
= {}
166 while self
.scanner
.check(DirectiveToken
):
167 token
= self
.scanner
.get()
168 if token
.name
== u
'YAML':
169 if self
.yaml_version
is not None:
170 raise ParserError(None, None,
171 "found duplicate YAML directive", token
.start_marker())
172 major
, minor
= token
.value
174 raise ParserError(None, None,
175 "found incompatible YAML document (version 1.* is required)",
176 token
.start_marker())
177 self
.yaml_version
= token
.value
178 elif token
.name
== u
'TAG':
179 handle
, prefix
= token
.value
180 if handle
in self
.tag_handles
:
181 raise ParserError(None, None,
182 "duplicate tag handle %r" % handle
.encode('utf-8'),
183 token
.start_marker())
184 self
.tag_handles
[handle
] = prefix
185 for key
in self
.DEFAULT_TAGS
:
186 if key
not in self
.tag_handles
:
187 self
.tag_handles
[key
] = self
.DEFAULT_TAGS
[key
]
189 def parse_block_node(self
):
190 return self
.parse_node(block
=True)
192 def parse_flow_node(self
):
193 return self
.parse_node()
195 def parse_block_node_or_indentless_sequence(self
):
196 return self
.parse_node(block
=True, indentless_sequence
=True)
198 def parse_node(self
, block
=False, indentless_sequence
=False):
199 # block_node ::= ALIAS | properties? block_content
200 # flow_node ::= ALIAS | properties? flow_content
201 # properties ::= TAG ANCHOR? | ANCHOR TAG?
202 # block_content ::= block_collection | flow_collection | SCALAR
203 # flow_content ::= flow_collection | SCALAR
204 # block_collection ::= block_sequence | block_mapping
205 # block_node_or_indentless_sequence ::= ALIAS | properties?
206 # (block_content | indentless_block_sequence)
207 if self
.scanner
.check(AliasToken
):
208 token
= self
.scanner
.get()
209 yield AliasEvent(token
.value
, token
.start_marker
, token
.end_marker
)
213 start_marker
= end_marker
= tag_marker
= None
214 if self
.scanner
.check(AnchorToken
):
215 token
= self
.scanner
.get()
216 start_marker
= end_marker
= token
.start_marker
218 if self
.scanner
.check(TagToken
):
219 token
= self
.scanner
.get()
220 end_marker
= tag_marker
= token
.start_marker
222 elif self
.scanner
.check(TagToken
):
223 token
= self
.scanner
.get()
224 start_marker
= end_marker
= tag_marker
= token
.start_marker
226 if self
.scanner
.check(AnchorToken
):
227 token
= self
.scanner
.get()
228 end_marker
= token
.start_marker
232 if handle
is not None:
233 if handle
not in self
.tag_handles
:
234 raise ParserError("while parsing a node", start_marker
,
235 "found undefined tag handle %r" % handle
.encode('utf-8'),
237 tag
= self
.tag_handles
[handle
]+suffix
241 if not (self
.scanner
.check(ScalarToken
) and
242 self
.scanner
.peek().plain
):
244 if start_marker
is None:
245 start_marker
= self
.scanner
.peek().start_marker
247 collection_events
= None
248 if indentless_sequence
and self
.scanner
.check(BlockEntryToken
):
249 end_marker
= self
.scanner
.peek().end_marker
250 event
= SequenceEvent(anchor
, tag
, start_marker
, end_marker
)
251 collection_events
= self
.parse_indentless_sequence()
253 if self
.scanner
.check(ScalarToken
):
254 token
= self
.scanner
.get()
255 end_marker
= token
.end_marker
256 event
= ScalarEvent(anchor
, tag
, token
.value
,
257 start_marker
, end_marker
)
258 elif self
.scanner
.check(FlowSequenceStartToken
):
259 end_marker
= self
.scanner
.peek().end_marker
260 event
= SequenceEvent(anchor
, tag
, start_marker
, end_marker
)
261 collection_events
= self
.parse_flow_sequence()
262 elif self
.scanner
.check(FlowMappingStartToken
):
263 end_marker
= self
.scanner
.peek().end_marker
264 event
= MappingEvent(anchor
, tag
, start_marker
, end_marker
)
265 collection_events
= self
.parse_flow_mapping()
266 elif block
and self
.scanner
.check(BlockSequenceStartToken
):
267 end_marker
= self
.scanner
.peek().start_marker
268 event
= SequenceEvent(anchor
, tag
, start_marker
, end_marker
)
269 collection_events
= self
.parse_block_sequence()
270 elif block
and self
.scanner
.check(BlockMappingStartToken
):
271 end_marker
= self
.scanner
.peek().start_marker
272 event
= MappingEvent(anchor
, tag
, start_marker
, end_marker
)
273 collection_events
= self
.parse_block_mapping()
279 token
= self
.scanner
.peek()
280 raise ParserError("while scanning a %s node" % node
, start_marker
,
281 "expected the node content, but found %r" % token
.id,
284 if collection_events
is not None:
285 for event
in collection_events
:
288 def parse_block_sequence(self
):
289 # BLOCK-SEQUENCE-START (BLOCK-ENTRY block_node?)* BLOCK-END
290 token
= self
.scanner
.get()
291 start_marker
= token
.start_marker
292 while self
.scanner
.check(BlockEntryToken
):
293 token
= self
.scanner
.get()
294 if not self
.scanner
.check(BlockEntryToken
, BlockEndToken
):
295 for event
in self
.parse_block_node():
298 yield self
.process_empty_scalar(token
.end_marker
)
299 if not self
.scanner
.check(BlockEndToken
):
300 token
= self
.scanner
.peek()
301 raise ParserError("while scanning a block collection", start_marker
,
302 "expected <block end>, but found %r" % token
.id, token
.start_marker
)
303 token
= self
.scanner
.get()
304 yield CollectionEndEvent(token
.start_marker
, token
.end_marker
)
306 def parse_indentless_sequence(self
):
307 # (BLOCK-ENTRY block_node?)+
308 while self
.scanner
.check(BlockEntryToken
):
309 token
= self
.scanner
.get()
310 if not self
.scanner
.check(BlockEntryToken
,
311 KeyToken
, ValueToken
, BlockEndToken
):
312 for event
in self
.parse_block_node():
315 yield self
.process_empty_scalar(token
.end_marker
)
316 token
= self
.scanner
.peek()
317 yield CollectionEndEvent(token
.start_marker
, token
.start_marker
)
319 def parse_block_mapping(self
):
320 # BLOCK-MAPPING_START
321 # ((KEY block_node_or_indentless_sequence?)?
322 # (VALUE block_node_or_indentless_sequence?)?)*
324 token
= self
.scanner
.get()
325 start_marker
= token
.start_marker
326 while self
.scanner
.check(KeyToken
, ValueToken
):
327 if self
.scanner
.check(KeyToken
):
328 token
= self
.scanner
.get()
329 if not self
.scanner
.check(KeyToken
, ValueToken
, BlockEndToken
):
330 for event
in self
.parse_block_node_or_indentless_sequence():
333 yield self
.process_empty_scalar(token
.end_marker
)
334 if self
.scanner
.check(ValueToken
):
335 token
= self
.scanner
.get()
336 if not self
.scanner
.check(KeyToken
, ValueToken
, BlockEndToken
):
337 for event
in self
.parse_block_node_or_indentless_sequence():
340 yield self
.process_empty_scalar(token
.end_marker
)
342 token
= self
.scanner
.peek()
343 yield self
.process_empty_scalar(token
.start_marker
)
344 if not self
.scanner
.check(BlockEndToken
):
345 token
= self
.scanner
.peek()
346 raise ParserError("while scanning a block mapping", start_marker
,
347 "expected <block end>, but found %r" % token
.id, token
.start_marker
)
348 token
= self
.scanner
.get()
349 yield CollectionEndEvent(token
.start_marker
, token
.end_marker
)
351 def parse_flow_sequence(self
):
352 # flow_sequence ::= FLOW-SEQUENCE-START
353 # (flow_sequence_entry FLOW-ENTRY)*
354 # flow_sequence_entry?
356 # flow_sequence_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
358 # Note that while production rules for both flow_sequence_entry and
359 # flow_mapping_entry are equal, their interpretations are different.
360 # For `flow_sequence_entry`, the part `KEY flow_node? (VALUE flow_node?)?`
361 # generate an inline mapping (set syntax).
362 token
= self
.scanner
.get()
363 start_marker
= token
.start_marker
364 while not self
.scanner
.check(FlowSequenceEndToken
):
365 if self
.scanner
.check(KeyToken
):
366 token
= self
.scanner
.get()
367 yield MappingEvent(None, u
'!',
368 token
.start_marker
, token
.end_marker
)
369 if not self
.scanner
.check(ValueToken
,
370 FlowEntryToken
, FlowSequenceEndToken
):
371 for event
in self
.parse_flow_node():
374 yield self
.process_empty_scalar(token
.end_marker
)
375 if self
.scanner
.check(ValueToken
):
376 token
= self
.scanner
.get()
377 if not self
.scanner
.check(FlowEntryToken
, FlowSequenceEndToken
):
378 for event
in self
.parse_flow_node():
381 yield self
.process_empty_scalar(token
.end_marker
)
383 token
= self
.scanner
.peek()
384 yield self
.process_empty_scalar(token
.start_marker
)
385 token
= self
.scanner
.peek()
386 yield CollectionEndEvent(token
.start_marker
, token
.start_marker
)
388 for event
in self
.parse_flow_node():
390 if not self
.scanner
.check(FlowEntryToken
, FlowSequenceEndToken
):
391 token
= self
.scanner
.peek()
392 raise ParserError("while scanning a flow sequence", start_marker
,
393 "expected ',' or ']', but got %r" % token
.id, token
.start_marker
)
394 if self
.scanner
.check(FlowEntryToken
):
396 if not self
.scanner
.check(FlowSequenceEndToken
):
397 token
= self
.scanner
.peek()
398 raise ParserError("while scanning a flow sequence", start_marker
,
399 "expected ']', but found %r" % token
.id, token
.start_marker
)
400 token
= self
.scanner
.get()
401 yield CollectionEndEvent(token
.start_marker
, token
.end_marker
)
403 def parse_flow_mapping(self
):
404 # flow_mapping ::= FLOW-MAPPING-START
405 # (flow_mapping_entry FLOW-ENTRY)*
406 # flow_mapping_entry?
408 # flow_mapping_entry ::= flow_node | KEY flow_node? (VALUE flow_node?)?
409 token
= self
.scanner
.get()
410 start_marker
= token
.start_marker
411 while not self
.scanner
.check(FlowMappingEndToken
):
412 if self
.scanner
.check(KeyToken
):
413 token
= self
.scanner
.get()
414 if not self
.scanner
.check(ValueToken
,
415 FlowEntryToken
, FlowMappingEndToken
):
416 for event
in self
.parse_flow_node():
419 yield self
.process_empty_scalar(token
.end_marker
)
420 if self
.scanner
.check(ValueToken
):
421 token
= self
.scanner
.get()
422 if not self
.scanner
.check(FlowEntryToken
, FlowMappingEndToken
):
423 for event
in self
.parse_flow_node():
426 yield self
.process_empty_scalar(token
.end_marker
)
428 token
= self
.scanner
.peek()
429 yield self
.process_empty_scalar(token
.start_marker
)
431 for event
in self
.parse_flow_node():
433 yield self
.process_empty_scalar(self
.scanner
.peek().start_marker
)
434 if not self
.scanner
.check(FlowEntryToken
, FlowMappingEndToken
):
435 token
= self
.scanner
.peek()
436 raise ParserError("while scanning a flow mapping", start_marker
,
437 "expected ',' or '}', but got %r" % token
.id, token
.start_marker
)
438 if self
.scanner
.check(FlowEntryToken
):
440 if not self
.scanner
.check(FlowMappingEndToken
):
441 token
= self
.scanner
.peek()
442 raise ParserError("while scanning a flow mapping", start_marker
,
443 "expected '}', but found %r" % token
.id, token
.start_marker
)
444 token
= self
.scanner
.get()
445 yield CollectionEndEvent(token
.start_marker
, token
.end_marker
)
447 def process_empty_scalar(self
, marker
):
448 return ScalarEvent(None, None, u
'', marker
, marker
)