All tests passed! Scanner and Parser seem to be correct.
[pyyaml/python3.git] / lib / yaml / parser.py
blob71616b0c0ea27e3ecfbea2f5aa849f1c9b990aaa
2 # Production rules:
3 # stream ::= implicit_document? explicit_document* END
4 # explicit_document ::= DIRECTIVE* DOCUMENT-START block_node? DOCUMENT-END?
5 # implicit_document ::= block_node DOCUMENT-END?
6 # block_node ::= ALIAS | properties? block_content
7 # flow_node ::= ALIAS | properties? flow_content
8 # properties ::= TAG ANCHOR? | ANCHOR TAG?
9 # block_content ::= block_collection | flow_collection | SCALAR
10 # flow_content ::= flow_collection | SCALAR
11 # block_collection ::= block_sequence | block_mapping
12 # block_sequence ::= BLOCK-SEQUENCE-START (ENTRY block_node?)* BLOCK-END
13 # block_mapping ::= BLOCK-MAPPING_START ((KEY block_node_or_indentless_sequence?)? (VALUE block_node_or_indentless_sequence?)?)* BLOCK-END
14 # block_node_or_indentless_sequence ::= ALIAS | properties? (block_content | indentless_block_sequence)
15 # indentless_block_sequence ::= (ENTRY block_node?)+
16 # flow_collection ::= flow_sequence | flow_mapping
17 # flow_sequence ::= FLOW-SEQUENCE-START (flow_sequence_entry ENTRY)* flow_sequence_entry? FLOW-SEQUENCE-END
18 # flow_mapping ::= FLOW-MAPPING-START flow_mapping_entry ENTRY)* flow_mapping_entry? FLOW-MAPPING-END
19 # flow_sequence_entry ::= flow_node | KEY flow_node (VALUE flow_node?)?
20 # flow_mapping_entry ::= flow_node | KEY flow_node (VALUE flow_node?)?
22 # FIRST(rule) sets:
23 # stream: {}
24 # explicit_document: { DIRECTIVE DOCUMENT-START }
25 # implicit_document: block_node
26 # block_node: { ALIAS TAG ANCHOR SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START }
27 # flow_node: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START }
28 # block_content: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
29 # flow_content: { FLOW-SEQUENCE-START FLOW-MAPPING-START SCALAR }
30 # block_collection: { BLOCK-SEQUENCE-START BLOCK-MAPPING-START }
31 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
32 # block_sequence: { BLOCK-SEQUENCE-START }
33 # block_mapping: { BLOCK-MAPPING-START }
34 # block_node_or_indentless_sequence: { ALIAS ANCHOR TAG SCALAR BLOCK-SEQUENCE-START BLOCK-MAPPING-START FLOW-SEQUENCE-START FLOW-MAPPING-START ENTRY }
35 # indentless_sequence: { ENTRY }
36 # flow_collection: { FLOW-SEQUENCE-START FLOW-MAPPING-START }
37 # flow_sequence: { FLOW-SEQUENCE-START }
38 # flow_mapping: { FLOW-MAPPING-START }
39 # flow_sequence_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
40 # flow_mapping_entry: { ALIAS ANCHOR TAG SCALAR FLOW-SEQUENCE-START FLOW-MAPPING-START KEY }
42 from scanner import *
44 class Error(Exception):
45 pass
47 class Node:
48 def __repr__(self):
49 args = []
50 for attribute in ['anchor', 'tag', 'value']:
51 if hasattr(self, attribute):
52 args.append(repr(getattr(self, attribute)))
53 return "%s(%s)" % (self.__class__.__name__, ', '.join(args))
55 class AliasNode(Node):
56 def __init__(self, anchor):
57 self.anchor = anchor
59 class ScalarNode(Node):
60 def __init__(self, anchor, tag, value):
61 self.anchor = anchor
62 self.tag = tag
63 self.value = value
65 class SequenceNode(Node):
66 def __init__(self, anchor, tag, value):
67 self.anchor = anchor
68 self.tag = tag
69 self.value = value
71 class MappingNode(Node):
72 def __init__(self, anchor, tag, value):
73 self.anchor = anchor
74 self.tag = tag
75 self.value = value
77 class Parser:
79 def __init__(self, source, data):
80 self.scanner = Scanner(source, data)
82 def is_token(self, *choices):
83 token = self.scanner.peek_token()
84 for choice in choices:
85 if isinstance(token, choices):
86 return True
87 return False
89 def get_token(self):
90 return self.scanner.get_token()
92 def parse(self):
93 return self.parse_stream()
95 def parse_stream(self):
96 documents = []
97 if not self.is_token(DirectiveToken, DocumentStartToken, EndToken):
98 documents.append(self.parse_block_node())
99 while not self.is_token(EndToken):
100 while self.is_token(DirectiveToken):
101 self.get_token()
102 if not self.is_token(DocumentStartToken):
103 self.fail('DOCUMENT-START is expected')
104 self.get_token()
105 if self.is_token(DirectiveToken,
106 DocumentStartToken, DocumentEndToken, EndToken):
107 documents.append(None)
108 else:
109 documents.append(self.parse_block_node())
110 while self.is_token(DocumentEndToken):
111 self.get_token()
112 if not self.is_token(EndToken):
113 self.fail("END is expected")
114 return documents
116 def parse_block_node(self):
117 return self.parse_node(block=True)
119 def parse_flow_node(self):
120 return self.parse_node()
122 def parse_block_node_or_indentless_sequence(self):
123 return self.parse_node(block=True, indentless_sequence=True)
125 def parse_node(self, block=False, indentless_sequence=False):
126 if self.is_token(AliasToken):
127 token = self.get_token()
128 return AliasNode(token.value)
129 anchor = None
130 tag = None
131 if self.is_token(AnchorToken):
132 anchor = self.get_token().value
133 if self.is_token(TagToken):
134 tag = self.get_token().value
135 elif self.is_token(TagToken):
136 tag = self.get_token().value
137 if self.is_token(AnchorToken):
138 anchor = self.get_token().value
139 if indentless_sequence and self.is_token(EntryToken):
140 NodeClass = SequenceNode
141 value = self.parse_indentless_sequence()
142 else:
143 if self.is_token(ScalarToken):
144 NodeClass = ScalarNode
145 elif self.is_token(BlockSequenceStartToken, FlowSequenceStartToken):
146 NodeClass = SequenceNode
147 elif self.is_token(BlockMappingStartToken, FlowMappingStartToken):
148 NodeClass = MappingNode
149 if block:
150 value = self.parse_block_content()
151 else:
152 value = self.parse_flow_content()
153 return NodeClass(anchor, tag, value)
155 def parse_block_content(self):
156 if self.is_token(ScalarToken):
157 return self.get_token().value
158 elif self.is_token(BlockSequenceStartToken):
159 return self.parse_block_sequence()
160 elif self.is_token(BlockMappingStartToken):
161 return self.parse_block_mapping()
162 elif self.is_token(FlowSequenceStartToken):
163 return self.parse_flow_sequence()
164 elif self.is_token(FlowMappingStartToken):
165 return self.parse_flow_mapping()
166 else:
167 self.fail('block content is expected')
169 def parse_flow_content(self):
170 if self.is_token(ScalarToken):
171 return self.get_token().value
172 elif self.is_token(FlowSequenceStartToken):
173 return self.parse_flow_sequence()
174 elif self.is_token(FlowMappingStartToken):
175 return self.parse_flow_mapping()
176 else:
177 self.fail('flow content is expected')
179 def parse_block_sequence(self):
180 sequence = []
181 if not self.is_token(BlockSequenceStartToken):
182 self.fail('BLOCK-SEQUENCE-START is expected')
183 self.get_token()
184 while self.is_token(EntryToken):
185 self.get_token()
186 if not self.is_token(EntryToken, BlockEndToken):
187 sequence.append(self.parse_block_node())
188 else:
189 sequence.append(None)
190 if not self.is_token(BlockEndToken):
191 self.fail('BLOCK-END is expected')
192 self.get_token()
193 return sequence
195 def parse_indentless_sequence(self):
196 sequence = []
197 while self.is_token(EntryToken):
198 self.get_token()
199 if not self.is_token(EntryToken):
200 sequence.append(self.parse_block_node())
201 else:
202 sequence.append(None)
203 return sequence
205 def parse_block_mapping(self):
206 mapping = []
207 if not self.is_token(BlockMappingStartToken):
208 self.fail('BLOCK-MAPPING-START is expected')
209 self.get_token()
210 while self.is_token(KeyToken, ValueToken):
211 key = None
212 value = None
213 if self.is_token(KeyToken):
214 self.get_token()
215 if not self.is_token(KeyToken, ValueToken, BlockEndToken):
216 key = self.parse_block_node_or_indentless_sequence()
217 if self.is_token(ValueToken):
218 self.get_token()
219 if not self.is_token(KeyToken, ValueToken, BlockEndToken):
220 value = self.parse_block_node_or_indentless_sequence()
221 mapping.append((key, value))
222 if not self.is_token(BlockEndToken):
223 self.fail('BLOCK-END is expected')
224 self.get_token()
225 return mapping
227 def parse_flow_sequence(self):
228 sequence = []
229 if not self.is_token(FlowSequenceStartToken):
230 self.fail('FLOW-SEQUENCE-START is expected')
231 self.get_token()
232 while not self.is_token(FlowSequenceEndToken):
233 if self.is_token(KeyToken):
234 self.get_token()
235 key = None
236 value = None
237 if not self.is_token(ValueToken):
238 key = self.parse_flow_node()
239 if self.is_token(ValueToken):
240 self.get_token()
241 if not self.is_token(EntryToken, FlowSequenceEndToken):
242 value = self.parse_flow_node()
243 node = MappingNode(None, None, [(key, value)])
244 sequence.append(node)
245 else:
246 sequence.append(self.parse_flow_node())
247 if not self.is_token(EntryToken, FlowSequenceEndToken):
248 self.fail("ENTRY or FLOW-SEQUENCE-END are expected")
249 if self.is_token(EntryToken):
250 self.get_token()
251 if not self.is_token(FlowSequenceEndToken):
252 self.fail('FLOW-SEQUENCE-END is expected')
253 self.get_token()
254 return sequence
256 def parse_flow_mapping(self):
257 mapping = []
258 if not self.is_token(FlowMappingStartToken):
259 self.fail('FLOW-MAPPING-START is expected')
260 self.get_token()
261 while not self.is_token(FlowMappingEndToken):
262 if self.is_token(KeyToken):
263 self.get_token()
264 key = None
265 value = None
266 if not self.is_token(ValueToken):
267 key = self.parse_flow_node()
268 if self.is_token(ValueToken):
269 self.get_token()
270 if not self.is_token(EntryToken, FlowMappingEndToken):
271 value = self.parse_flow_node()
272 mapping.append((key, value))
273 else:
274 mapping.append((self.parse_flow_node(), None))
275 if not self.is_token(EntryToken, FlowMappingEndToken):
276 self.fail("ENTRY or FLOW-MAPPING-END are expected")
277 if self.is_token(EntryToken):
278 self.get_token()
279 if not self.is_token(FlowMappingEndToken):
280 self.fail('FLOW-MAPPING-END is expected')
281 self.get_token()
282 return mapping
284 def fail(self, message):
285 marker = self.scanner.peek_token().start_marker
286 raise Error(message+':\n'+marker.get_snippet())