qapi/parser: Drop two bad type hints for now
[qemu/armbru.git] / scripts / qapi / parser.py
blob1ff334e6a81a0552bf196f8bc79e9beea1a192ca
1 # -*- coding: utf-8 -*-
3 # QAPI schema parser
5 # Copyright IBM, Corp. 2011
6 # Copyright (c) 2013-2019 Red Hat Inc.
8 # Authors:
9 # Anthony Liguori <aliguori@us.ibm.com>
10 # Markus Armbruster <armbru@redhat.com>
11 # Marc-André Lureau <marcandre.lureau@redhat.com>
12 # Kevin Wolf <kwolf@redhat.com>
14 # This work is licensed under the terms of the GNU GPL, version 2.
15 # See the COPYING file in the top-level directory.
17 from collections import OrderedDict
18 import os
19 import re
20 from typing import (
21 TYPE_CHECKING,
22 Dict,
23 List,
24 Mapping,
25 Optional,
26 Set,
27 Union,
30 from .common import must_match
31 from .error import QAPISemError, QAPISourceError
32 from .source import QAPISourceInfo
35 if TYPE_CHECKING:
36 # pylint: disable=cyclic-import
37 # TODO: Remove cycle. [schema -> expr -> parser -> schema]
38 from .schema import QAPISchemaFeature, QAPISchemaMember
41 # Return value alias for get_expr().
42 _ExprValue = Union[List[object], Dict[str, object], str, bool]
45 class QAPIExpression(Dict[str, object]):
46 # pylint: disable=too-few-public-methods
47 def __init__(self,
48 data: Mapping[str, object],
49 info: QAPISourceInfo,
50 doc: Optional['QAPIDoc'] = None):
51 super().__init__(data)
52 self.info = info
53 self.doc: Optional['QAPIDoc'] = doc
56 class QAPIParseError(QAPISourceError):
57 """Error class for all QAPI schema parsing errors."""
58 def __init__(self, parser: 'QAPISchemaParser', msg: str):
59 col = 1
60 for ch in parser.src[parser.line_pos:parser.pos]:
61 if ch == '\t':
62 col = (col + 7) % 8 + 1
63 else:
64 col += 1
65 super().__init__(parser.info, msg, col)
68 class QAPISchemaParser:
69 """
70 Parse QAPI schema source.
72 Parse a JSON-esque schema file and process directives. See
73 qapi-code-gen.txt section "Schema Syntax" for the exact syntax.
74 Grammatical validation is handled later by `expr.check_exprs()`.
76 :param fname: Source file name.
77 :param previously_included:
78 The absolute names of previously included source files,
79 if being invoked from another parser.
80 :param incl_info:
81 `QAPISourceInfo` belonging to the parent module.
82 ``None`` implies this is the root module.
84 :ivar exprs: Resulting parsed expressions.
85 :ivar docs: Resulting parsed documentation blocks.
87 :raise OSError: For problems reading the root schema document.
88 :raise QAPIError: For errors in the schema source.
89 """
90 def __init__(self,
91 fname: str,
92 previously_included: Optional[Set[str]] = None,
93 incl_info: Optional[QAPISourceInfo] = None):
94 self._fname = fname
95 self._included = previously_included or set()
96 self._included.add(os.path.abspath(self._fname))
97 self.src = ''
99 # Lexer state (see `accept` for details):
100 self.info = QAPISourceInfo(self._fname, incl_info)
101 self.tok: Union[None, str] = None
102 self.pos = 0
103 self.cursor = 0
104 self.val: Optional[Union[bool, str]] = None
105 self.line_pos = 0
107 # Parser output:
108 self.exprs: List[QAPIExpression] = []
109 self.docs: List[QAPIDoc] = []
111 # Showtime!
112 self._parse()
114 def _parse(self) -> None:
116 Parse the QAPI schema document.
118 :return: None. Results are stored in ``.exprs`` and ``.docs``.
120 cur_doc = None
122 # May raise OSError; allow the caller to handle it.
123 with open(self._fname, 'r', encoding='utf-8') as fp:
124 self.src = fp.read()
125 if self.src == '' or self.src[-1] != '\n':
126 self.src += '\n'
128 # Prime the lexer:
129 self.accept()
131 # Parse until done:
132 while self.tok is not None:
133 info = self.info
134 if self.tok == '#':
135 self.reject_expr_doc(cur_doc)
136 for cur_doc in self.get_doc(info):
137 self.docs.append(cur_doc)
138 continue
140 expr = self.get_expr()
141 if not isinstance(expr, dict):
142 raise QAPISemError(
143 info, "top-level expression must be an object")
145 if 'include' in expr:
146 self.reject_expr_doc(cur_doc)
147 if len(expr) != 1:
148 raise QAPISemError(info, "invalid 'include' directive")
149 include = expr['include']
150 if not isinstance(include, str):
151 raise QAPISemError(info,
152 "value of 'include' must be a string")
153 incl_fname = os.path.join(os.path.dirname(self._fname),
154 include)
155 self._add_expr(OrderedDict({'include': incl_fname}), info)
156 exprs_include = self._include(include, info, incl_fname,
157 self._included)
158 if exprs_include:
159 self.exprs.extend(exprs_include.exprs)
160 self.docs.extend(exprs_include.docs)
161 elif "pragma" in expr:
162 self.reject_expr_doc(cur_doc)
163 if len(expr) != 1:
164 raise QAPISemError(info, "invalid 'pragma' directive")
165 pragma = expr['pragma']
166 if not isinstance(pragma, dict):
167 raise QAPISemError(
168 info, "value of 'pragma' must be an object")
169 for name, value in pragma.items():
170 self._pragma(name, value, info)
171 else:
172 if cur_doc and not cur_doc.symbol:
173 raise QAPISemError(
174 cur_doc.info, "definition documentation required")
175 self._add_expr(expr, info, cur_doc)
176 cur_doc = None
177 self.reject_expr_doc(cur_doc)
179 def _add_expr(self, expr: Mapping[str, object],
180 info: QAPISourceInfo,
181 doc: Optional['QAPIDoc'] = None) -> None:
182 self.exprs.append(QAPIExpression(expr, info, doc))
184 @staticmethod
185 def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
186 if doc and doc.symbol:
187 raise QAPISemError(
188 doc.info,
189 "documentation for '%s' is not followed by the definition"
190 % doc.symbol)
192 @staticmethod
193 def _include(include: str,
194 info: QAPISourceInfo,
195 incl_fname: str,
196 previously_included: Set[str]
197 ) -> Optional['QAPISchemaParser']:
198 incl_abs_fname = os.path.abspath(incl_fname)
199 # catch inclusion cycle
200 inf: Optional[QAPISourceInfo] = info
201 while inf:
202 if incl_abs_fname == os.path.abspath(inf.fname):
203 raise QAPISemError(info, "inclusion loop for %s" % include)
204 inf = inf.parent
206 # skip multiple include of the same file
207 if incl_abs_fname in previously_included:
208 return None
210 try:
211 return QAPISchemaParser(incl_fname, previously_included, info)
212 except OSError as err:
213 raise QAPISemError(
214 info,
215 f"can't read include file '{incl_fname}': {err.strerror}"
216 ) from err
218 @staticmethod
219 def _pragma(name: str, value: object, info: QAPISourceInfo) -> None:
221 def check_list_str(name: str, value: object) -> List[str]:
222 if (not isinstance(value, list) or
223 any(not isinstance(elt, str) for elt in value)):
224 raise QAPISemError(
225 info,
226 "pragma %s must be a list of strings" % name)
227 return value
229 pragma = info.pragma
231 if name == 'doc-required':
232 if not isinstance(value, bool):
233 raise QAPISemError(info,
234 "pragma 'doc-required' must be boolean")
235 pragma.doc_required = value
236 elif name == 'command-name-exceptions':
237 pragma.command_name_exceptions = check_list_str(name, value)
238 elif name == 'command-returns-exceptions':
239 pragma.command_returns_exceptions = check_list_str(name, value)
240 elif name == 'member-name-exceptions':
241 pragma.member_name_exceptions = check_list_str(name, value)
242 else:
243 raise QAPISemError(info, "unknown pragma '%s'" % name)
245 def accept(self, skip_comment: bool = True) -> None:
247 Read and store the next token.
249 :param skip_comment:
250 When false, return COMMENT tokens ("#").
251 This is used when reading documentation blocks.
253 :return:
254 None. Several instance attributes are updated instead:
256 - ``.tok`` represents the token type. See below for values.
257 - ``.info`` describes the token's source location.
258 - ``.val`` is the token's value, if any. See below.
259 - ``.pos`` is the buffer index of the first character of
260 the token.
262 * Single-character tokens:
264 These are "{", "}", ":", ",", "[", and "]".
265 ``.tok`` holds the single character and ``.val`` is None.
267 * Multi-character tokens:
269 * COMMENT:
271 This token is not normally returned by the lexer, but it can
272 be when ``skip_comment`` is False. ``.tok`` is "#", and
273 ``.val`` is a string including all chars until end-of-line,
274 including the "#" itself.
276 * STRING:
278 ``.tok`` is "'", the single quote. ``.val`` contains the
279 string, excluding the surrounding quotes.
281 * TRUE and FALSE:
283 ``.tok`` is either "t" or "f", ``.val`` will be the
284 corresponding bool value.
286 * EOF:
288 ``.tok`` and ``.val`` will both be None at EOF.
290 while True:
291 self.tok = self.src[self.cursor]
292 self.pos = self.cursor
293 self.cursor += 1
294 self.val = None
296 if self.tok == '#':
297 if self.src[self.cursor] == '#':
298 # Start of doc comment
299 skip_comment = False
300 self.cursor = self.src.find('\n', self.cursor)
301 if not skip_comment:
302 self.val = self.src[self.pos:self.cursor]
303 return
304 elif self.tok in '{}:,[]':
305 return
306 elif self.tok == "'":
307 # Note: we accept only printable ASCII
308 string = ''
309 esc = False
310 while True:
311 ch = self.src[self.cursor]
312 self.cursor += 1
313 if ch == '\n':
314 raise QAPIParseError(self, "missing terminating \"'\"")
315 if esc:
316 # Note: we recognize only \\ because we have
317 # no use for funny characters in strings
318 if ch != '\\':
319 raise QAPIParseError(self,
320 "unknown escape \\%s" % ch)
321 esc = False
322 elif ch == '\\':
323 esc = True
324 continue
325 elif ch == "'":
326 self.val = string
327 return
328 if ord(ch) < 32 or ord(ch) >= 127:
329 raise QAPIParseError(
330 self, "funny character in string")
331 string += ch
332 elif self.src.startswith('true', self.pos):
333 self.val = True
334 self.cursor += 3
335 return
336 elif self.src.startswith('false', self.pos):
337 self.val = False
338 self.cursor += 4
339 return
340 elif self.tok == '\n':
341 if self.cursor == len(self.src):
342 self.tok = None
343 return
344 self.info = self.info.next_line()
345 self.line_pos = self.cursor
346 elif not self.tok.isspace():
347 # Show up to next structural, whitespace or quote
348 # character
349 match = must_match('[^[\\]{}:,\\s\']+',
350 self.src[self.cursor-1:])
351 raise QAPIParseError(self, "stray '%s'" % match.group(0))
353 def get_members(self) -> Dict[str, object]:
354 expr: Dict[str, object] = OrderedDict()
355 if self.tok == '}':
356 self.accept()
357 return expr
358 if self.tok != "'":
359 raise QAPIParseError(self, "expected string or '}'")
360 while True:
361 key = self.val
362 assert isinstance(key, str) # Guaranteed by tok == "'"
364 self.accept()
365 if self.tok != ':':
366 raise QAPIParseError(self, "expected ':'")
367 self.accept()
368 if key in expr:
369 raise QAPIParseError(self, "duplicate key '%s'" % key)
370 expr[key] = self.get_expr()
371 if self.tok == '}':
372 self.accept()
373 return expr
374 if self.tok != ',':
375 raise QAPIParseError(self, "expected ',' or '}'")
376 self.accept()
377 if self.tok != "'":
378 raise QAPIParseError(self, "expected string")
380 def get_values(self) -> List[object]:
381 expr: List[object] = []
382 if self.tok == ']':
383 self.accept()
384 return expr
385 if self.tok not in tuple("{['tf"):
386 raise QAPIParseError(
387 self, "expected '{', '[', ']', string, or boolean")
388 while True:
389 expr.append(self.get_expr())
390 if self.tok == ']':
391 self.accept()
392 return expr
393 if self.tok != ',':
394 raise QAPIParseError(self, "expected ',' or ']'")
395 self.accept()
397 def get_expr(self) -> _ExprValue:
398 expr: _ExprValue
399 if self.tok == '{':
400 self.accept()
401 expr = self.get_members()
402 elif self.tok == '[':
403 self.accept()
404 expr = self.get_values()
405 elif self.tok in tuple("'tf"):
406 assert isinstance(self.val, (str, bool))
407 expr = self.val
408 self.accept()
409 else:
410 raise QAPIParseError(
411 self, "expected '{', '[', string, or boolean")
412 return expr
414 def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']:
415 if self.val != '##':
416 raise QAPIParseError(
417 self, "junk after '##' at start of documentation comment")
419 docs = []
420 cur_doc = QAPIDoc(self, info)
421 self.accept(False)
422 while self.tok == '#':
423 assert isinstance(self.val, str)
424 if self.val.startswith('##'):
425 # End of doc comment
426 if self.val != '##':
427 raise QAPIParseError(
428 self,
429 "junk after '##' at end of documentation comment")
430 cur_doc.end_comment()
431 docs.append(cur_doc)
432 self.accept()
433 return docs
434 if self.val.startswith('# ='):
435 if cur_doc.symbol:
436 raise QAPIParseError(
437 self,
438 "unexpected '=' markup in definition documentation")
439 if cur_doc.body.text:
440 cur_doc.end_comment()
441 docs.append(cur_doc)
442 cur_doc = QAPIDoc(self, info)
443 cur_doc.append(self.val)
444 self.accept(False)
446 raise QAPIParseError(self, "documentation comment must end with '##'")
449 class QAPIDoc:
451 A documentation comment block, either definition or free-form
453 Definition documentation blocks consist of
455 * a body section: one line naming the definition, followed by an
456 overview (any number of lines)
458 * argument sections: a description of each argument (for commands
459 and events) or member (for structs, unions and alternates)
461 * features sections: a description of each feature flag
463 * additional (non-argument) sections, possibly tagged
465 Free-form documentation blocks consist only of a body section.
468 class Section:
469 # pylint: disable=too-few-public-methods
470 def __init__(self, parser: QAPISchemaParser,
471 name: Optional[str] = None):
472 # parser, for error messages about indentation
473 self._parser = parser
474 # optional section name (argument/member or section name)
475 self.name = name
476 # section text without section name
477 self.text = ''
478 # indentation to strip (None means indeterminate)
479 self._indent = None if self.name else 0
481 def append(self, line: str) -> None:
482 line = line.rstrip()
484 if line:
485 indent = must_match(r'\s*', line).end()
486 if self._indent is None:
487 # indeterminate indentation
488 if self.text != '':
489 # non-blank, non-first line determines indentation
490 self._indent = indent
491 elif indent < self._indent:
492 raise QAPIParseError(
493 self._parser,
494 "unexpected de-indent (expected at least %d spaces)" %
495 self._indent)
496 line = line[self._indent:]
498 self.text += line + '\n'
500 class ArgSection(Section):
501 def __init__(self, parser: QAPISchemaParser,
502 name: str):
503 super().__init__(parser, name)
504 self.member: Optional['QAPISchemaMember'] = None
506 def connect(self, member: 'QAPISchemaMember') -> None:
507 self.member = member
509 class NullSection(Section):
511 Immutable dummy section for use at the end of a doc block.
513 # pylint: disable=too-few-public-methods
514 def append(self, line: str) -> None:
515 assert False, "Text appended after end_comment() called."
517 def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo):
518 # self._parser is used to report errors with QAPIParseError. The
519 # resulting error position depends on the state of the parser.
520 # It happens to be the beginning of the comment. More or less
521 # servicable, but action at a distance.
522 self._parser = parser
523 self.info = info
524 self.symbol: Optional[str] = None
525 self.body = QAPIDoc.Section(parser)
526 # dicts mapping parameter/feature names to their ArgSection
527 self.args: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
528 self.features: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
529 self.sections: List[QAPIDoc.Section] = []
530 # the current section
531 self._section = self.body
532 self._append_line = self._append_body_line
534 def has_section(self, name: str) -> bool:
535 """Return True if we have a section with this name."""
536 for i in self.sections:
537 if i.name == name:
538 return True
539 return False
541 def append(self, line: str) -> None:
543 Parse a comment line and add it to the documentation.
545 The way that the line is dealt with depends on which part of
546 the documentation we're parsing right now:
547 * The body section: ._append_line is ._append_body_line
548 * An argument section: ._append_line is ._append_args_line
549 * A features section: ._append_line is ._append_features_line
550 * An additional section: ._append_line is ._append_various_line
552 line = line[1:]
553 if not line:
554 self._append_freeform(line)
555 return
557 if line[0] != ' ':
558 raise QAPIParseError(self._parser, "missing space after #")
559 line = line[1:]
560 self._append_line(line)
562 def end_comment(self) -> None:
563 self._switch_section(QAPIDoc.NullSection(self._parser))
565 @staticmethod
566 def _match_at_name_colon(string: str):
567 return re.match(r'@([^:]*): *', string)
569 @staticmethod
570 def _match_section_tag(string: str):
571 return re.match(r'(Returns|Since|Notes?|Examples?|TODO): *', string)
573 def _append_body_line(self, line: str) -> None:
575 Process a line of documentation text in the body section.
577 If this a symbol line and it is the section's first line, this
578 is a definition documentation block for that symbol.
580 If it's a definition documentation block, another symbol line
581 begins the argument section for the argument named by it, and
582 a section tag begins an additional section. Start that
583 section and append the line to it.
585 Else, append the line to the current section.
587 # FIXME not nice: things like '# @foo:' and '# @foo: ' aren't
588 # recognized, and get silently treated as ordinary text
589 if not self.symbol and not self.body.text and line.startswith('@'):
590 if not line.endswith(':'):
591 raise QAPIParseError(self._parser, "line should end with ':'")
592 self.symbol = line[1:-1]
593 # Invalid names are not checked here, but the name provided MUST
594 # match the following definition, which *is* validated in expr.py.
595 if not self.symbol:
596 raise QAPIParseError(
597 self._parser, "name required after '@'")
598 elif self.symbol:
599 # This is a definition documentation block
600 if self._match_at_name_colon(line):
601 self._append_line = self._append_args_line
602 self._append_args_line(line)
603 elif line == 'Features:':
604 self._append_line = self._append_features_line
605 elif self._match_section_tag(line):
606 self._append_line = self._append_various_line
607 self._append_various_line(line)
608 else:
609 self._append_freeform(line)
610 else:
611 # This is a free-form documentation block
612 self._append_freeform(line)
614 def _append_args_line(self, line: str) -> None:
616 Process a line of documentation text in an argument section.
618 A symbol line begins the next argument section, a section tag
619 section or a non-indented line after a blank line begins an
620 additional section. Start that section and append the line to
623 Else, append the line to the current section.
626 match = self._match_at_name_colon(line)
627 if match:
628 line = line[match.end():]
629 self._start_args_section(match.group(1))
630 elif self._match_section_tag(line):
631 self._append_line = self._append_various_line
632 self._append_various_line(line)
633 return
634 elif (self._section.text.endswith('\n\n')
635 and line and not line[0].isspace()):
636 if line == 'Features:':
637 self._append_line = self._append_features_line
638 else:
639 self._start_section()
640 self._append_line = self._append_various_line
641 self._append_various_line(line)
642 return
644 self._append_freeform(line)
646 def _append_features_line(self, line: str) -> None:
647 match = self._match_at_name_colon(line)
648 if match:
649 line = line[match.end():]
650 self._start_features_section(match.group(1))
651 elif self._match_section_tag(line):
652 self._append_line = self._append_various_line
653 self._append_various_line(line)
654 return
655 elif (self._section.text.endswith('\n\n')
656 and line and not line[0].isspace()):
657 self._start_section()
658 self._append_line = self._append_various_line
659 self._append_various_line(line)
660 return
662 self._append_freeform(line)
664 def _append_various_line(self, line: str) -> None:
666 Process a line of documentation text in an additional section.
668 A symbol line is an error.
670 A section tag begins an additional section. Start that
671 section and append the line to it.
673 Else, append the line to the current section.
675 match = self._match_at_name_colon(line)
676 if match:
677 raise QAPIParseError(self._parser,
678 "'@%s:' can't follow '%s' section"
679 % (match.group(1), self.sections[0].name))
680 match = self._match_section_tag(line)
681 if match:
682 line = line[match.end():]
683 self._start_section(match.group(1))
685 self._append_freeform(line)
687 def _start_symbol_section(
688 self,
689 symbols_dict: Dict[str, 'QAPIDoc.ArgSection'],
690 name: str) -> None:
691 # FIXME invalid names other than the empty string aren't flagged
692 if not name:
693 raise QAPIParseError(self._parser, "invalid parameter name")
694 if name in symbols_dict:
695 raise QAPIParseError(self._parser,
696 "'%s' parameter name duplicated" % name)
697 assert not self.sections
698 new_section = QAPIDoc.ArgSection(self._parser, name)
699 self._switch_section(new_section)
700 symbols_dict[name] = new_section
702 def _start_args_section(self, name: str) -> None:
703 self._start_symbol_section(self.args, name)
705 def _start_features_section(self, name: str) -> None:
706 self._start_symbol_section(self.features, name)
708 def _start_section(self, name: Optional[str] = None) -> None:
709 if name in ('Returns', 'Since') and self.has_section(name):
710 raise QAPIParseError(self._parser,
711 "duplicated '%s' section" % name)
712 new_section = QAPIDoc.Section(self._parser, name)
713 self._switch_section(new_section)
714 self.sections.append(new_section)
716 def _switch_section(self, new_section: 'QAPIDoc.Section') -> None:
717 text = self._section.text = self._section.text.strip('\n')
719 # Only the 'body' section is allowed to have an empty body.
720 # All other sections, including anonymous ones, must have text.
721 if self._section != self.body and not text:
722 # We do not create anonymous sections unless there is
723 # something to put in them; this is a parser bug.
724 assert self._section.name
725 raise QAPIParseError(
726 self._parser,
727 "empty doc section '%s'" % self._section.name)
729 self._section = new_section
731 def _append_freeform(self, line: str) -> None:
732 match = re.match(r'(@\S+:)', line)
733 if match:
734 raise QAPIParseError(self._parser,
735 "'%s' not allowed in free-form documentation"
736 % match.group(1))
737 self._section.append(line)
739 def connect_member(self, member: 'QAPISchemaMember') -> None:
740 if member.name not in self.args:
741 # Undocumented TODO outlaw
742 self.args[member.name] = QAPIDoc.ArgSection(self._parser,
743 member.name)
744 self.args[member.name].connect(member)
746 def connect_feature(self, feature: 'QAPISchemaFeature') -> None:
747 if feature.name not in self.features:
748 raise QAPISemError(feature.info,
749 "feature '%s' lacks documentation"
750 % feature.name)
751 self.features[feature.name].connect(feature)
753 def check_expr(self, expr: QAPIExpression) -> None:
754 if self.has_section('Returns') and 'command' not in expr:
755 raise QAPISemError(self.info,
756 "'Returns:' is only valid for commands")
758 def check(self) -> None:
760 def check_args_section(
761 args: Dict[str, QAPIDoc.ArgSection], what: str
762 ) -> None:
763 bogus = [name for name, section in args.items()
764 if not section.member]
765 if bogus:
766 raise QAPISemError(
767 self.info,
768 "documented %s%s '%s' %s not exist" % (
769 what,
770 "s" if len(bogus) > 1 else "",
771 "', '".join(bogus),
772 "do" if len(bogus) > 1 else "does"
775 check_args_section(self.args, 'member')
776 check_args_section(self.features, 'feature')