qapi: Rename QAPIDoc.Section.name to .tag
[qemu/armbru.git] / scripts / qapi / parser.py
blobcc69f4f7703f213f707fa2f09e8055b3ade2a8ee
1 # -*- coding: utf-8 -*-
3 # QAPI schema parser
5 # Copyright IBM, Corp. 2011
6 # Copyright (c) 2013-2019 Red Hat Inc.
8 # Authors:
9 # Anthony Liguori <aliguori@us.ibm.com>
10 # Markus Armbruster <armbru@redhat.com>
11 # Marc-André Lureau <marcandre.lureau@redhat.com>
12 # Kevin Wolf <kwolf@redhat.com>
14 # This work is licensed under the terms of the GNU GPL, version 2.
15 # See the COPYING file in the top-level directory.
17 from collections import OrderedDict
18 import os
19 import re
20 from typing import (
21 TYPE_CHECKING,
22 Dict,
23 List,
24 Mapping,
25 Match,
26 Optional,
27 Set,
28 Union,
31 from .common import must_match
32 from .error import QAPISemError, QAPISourceError
33 from .source import QAPISourceInfo
36 if TYPE_CHECKING:
37 # pylint: disable=cyclic-import
38 # TODO: Remove cycle. [schema -> expr -> parser -> schema]
39 from .schema import QAPISchemaFeature, QAPISchemaMember
42 # Return value alias for get_expr().
43 _ExprValue = Union[List[object], Dict[str, object], str, bool]
46 class QAPIExpression(Dict[str, object]):
47 # pylint: disable=too-few-public-methods
48 def __init__(self,
49 data: Mapping[str, object],
50 info: QAPISourceInfo,
51 doc: Optional['QAPIDoc'] = None):
52 super().__init__(data)
53 self.info = info
54 self.doc: Optional['QAPIDoc'] = doc
57 class QAPIParseError(QAPISourceError):
58 """Error class for all QAPI schema parsing errors."""
59 def __init__(self, parser: 'QAPISchemaParser', msg: str):
60 col = 1
61 for ch in parser.src[parser.line_pos:parser.pos]:
62 if ch == '\t':
63 col = (col + 7) % 8 + 1
64 else:
65 col += 1
66 super().__init__(parser.info, msg, col)
69 class QAPISchemaParser:
70 """
71 Parse QAPI schema source.
73 Parse a JSON-esque schema file and process directives. See
74 qapi-code-gen.rst section "Schema Syntax" for the exact syntax.
75 Grammatical validation is handled later by `expr.check_exprs()`.
77 :param fname: Source file name.
78 :param previously_included:
79 The absolute names of previously included source files,
80 if being invoked from another parser.
81 :param incl_info:
82 `QAPISourceInfo` belonging to the parent module.
83 ``None`` implies this is the root module.
85 :ivar exprs: Resulting parsed expressions.
86 :ivar docs: Resulting parsed documentation blocks.
88 :raise OSError: For problems reading the root schema document.
89 :raise QAPIError: For errors in the schema source.
90 """
91 def __init__(self,
92 fname: str,
93 previously_included: Optional[Set[str]] = None,
94 incl_info: Optional[QAPISourceInfo] = None):
95 self._fname = fname
96 self._included = previously_included or set()
97 self._included.add(os.path.abspath(self._fname))
98 self.src = ''
100 # Lexer state (see `accept` for details):
101 self.info = QAPISourceInfo(self._fname, incl_info)
102 self.tok: Union[None, str] = None
103 self.pos = 0
104 self.cursor = 0
105 self.val: Optional[Union[bool, str]] = None
106 self.line_pos = 0
108 # Parser output:
109 self.exprs: List[QAPIExpression] = []
110 self.docs: List[QAPIDoc] = []
112 # Showtime!
113 self._parse()
115 def _parse(self) -> None:
117 Parse the QAPI schema document.
119 :return: None. Results are stored in ``.exprs`` and ``.docs``.
121 cur_doc = None
123 # May raise OSError; allow the caller to handle it.
124 with open(self._fname, 'r', encoding='utf-8') as fp:
125 self.src = fp.read()
126 if self.src == '' or self.src[-1] != '\n':
127 self.src += '\n'
129 # Prime the lexer:
130 self.accept()
132 # Parse until done:
133 while self.tok is not None:
134 info = self.info
135 if self.tok == '#':
136 self.reject_expr_doc(cur_doc)
137 for cur_doc in self.get_doc(info):
138 self.docs.append(cur_doc)
139 continue
141 expr = self.get_expr()
142 if not isinstance(expr, dict):
143 raise QAPISemError(
144 info, "top-level expression must be an object")
146 if 'include' in expr:
147 self.reject_expr_doc(cur_doc)
148 if len(expr) != 1:
149 raise QAPISemError(info, "invalid 'include' directive")
150 include = expr['include']
151 if not isinstance(include, str):
152 raise QAPISemError(info,
153 "value of 'include' must be a string")
154 incl_fname = os.path.join(os.path.dirname(self._fname),
155 include)
156 self._add_expr(OrderedDict({'include': incl_fname}), info)
157 exprs_include = self._include(include, info, incl_fname,
158 self._included)
159 if exprs_include:
160 self.exprs.extend(exprs_include.exprs)
161 self.docs.extend(exprs_include.docs)
162 elif "pragma" in expr:
163 self.reject_expr_doc(cur_doc)
164 if len(expr) != 1:
165 raise QAPISemError(info, "invalid 'pragma' directive")
166 pragma = expr['pragma']
167 if not isinstance(pragma, dict):
168 raise QAPISemError(
169 info, "value of 'pragma' must be an object")
170 for name, value in pragma.items():
171 self._pragma(name, value, info)
172 else:
173 if cur_doc and not cur_doc.symbol:
174 raise QAPISemError(
175 cur_doc.info, "definition documentation required")
176 self._add_expr(expr, info, cur_doc)
177 cur_doc = None
178 self.reject_expr_doc(cur_doc)
180 def _add_expr(self, expr: Mapping[str, object],
181 info: QAPISourceInfo,
182 doc: Optional['QAPIDoc'] = None) -> None:
183 self.exprs.append(QAPIExpression(expr, info, doc))
185 @staticmethod
186 def reject_expr_doc(doc: Optional['QAPIDoc']) -> None:
187 if doc and doc.symbol:
188 raise QAPISemError(
189 doc.info,
190 "documentation for '%s' is not followed by the definition"
191 % doc.symbol)
193 @staticmethod
194 def _include(include: str,
195 info: QAPISourceInfo,
196 incl_fname: str,
197 previously_included: Set[str]
198 ) -> Optional['QAPISchemaParser']:
199 incl_abs_fname = os.path.abspath(incl_fname)
200 # catch inclusion cycle
201 inf: Optional[QAPISourceInfo] = info
202 while inf:
203 if incl_abs_fname == os.path.abspath(inf.fname):
204 raise QAPISemError(info, "inclusion loop for %s" % include)
205 inf = inf.parent
207 # skip multiple include of the same file
208 if incl_abs_fname in previously_included:
209 return None
211 try:
212 return QAPISchemaParser(incl_fname, previously_included, info)
213 except OSError as err:
214 raise QAPISemError(
215 info,
216 f"can't read include file '{incl_fname}': {err.strerror}"
217 ) from err
219 @staticmethod
220 def _pragma(name: str, value: object, info: QAPISourceInfo) -> None:
222 def check_list_str(name: str, value: object) -> List[str]:
223 if (not isinstance(value, list) or
224 any(not isinstance(elt, str) for elt in value)):
225 raise QAPISemError(
226 info,
227 "pragma %s must be a list of strings" % name)
228 return value
230 pragma = info.pragma
232 if name == 'doc-required':
233 if not isinstance(value, bool):
234 raise QAPISemError(info,
235 "pragma 'doc-required' must be boolean")
236 pragma.doc_required = value
237 elif name == 'command-name-exceptions':
238 pragma.command_name_exceptions = check_list_str(name, value)
239 elif name == 'command-returns-exceptions':
240 pragma.command_returns_exceptions = check_list_str(name, value)
241 elif name == 'documentation-exceptions':
242 pragma.documentation_exceptions = check_list_str(name, value)
243 elif name == 'member-name-exceptions':
244 pragma.member_name_exceptions = check_list_str(name, value)
245 else:
246 raise QAPISemError(info, "unknown pragma '%s'" % name)
248 def accept(self, skip_comment: bool = True) -> None:
250 Read and store the next token.
252 :param skip_comment:
253 When false, return COMMENT tokens ("#").
254 This is used when reading documentation blocks.
256 :return:
257 None. Several instance attributes are updated instead:
259 - ``.tok`` represents the token type. See below for values.
260 - ``.info`` describes the token's source location.
261 - ``.val`` is the token's value, if any. See below.
262 - ``.pos`` is the buffer index of the first character of
263 the token.
265 * Single-character tokens:
267 These are "{", "}", ":", ",", "[", and "]".
268 ``.tok`` holds the single character and ``.val`` is None.
270 * Multi-character tokens:
272 * COMMENT:
274 This token is not normally returned by the lexer, but it can
275 be when ``skip_comment`` is False. ``.tok`` is "#", and
276 ``.val`` is a string including all chars until end-of-line,
277 including the "#" itself.
279 * STRING:
281 ``.tok`` is "'", the single quote. ``.val`` contains the
282 string, excluding the surrounding quotes.
284 * TRUE and FALSE:
286 ``.tok`` is either "t" or "f", ``.val`` will be the
287 corresponding bool value.
289 * EOF:
291 ``.tok`` and ``.val`` will both be None at EOF.
293 while True:
294 self.tok = self.src[self.cursor]
295 self.pos = self.cursor
296 self.cursor += 1
297 self.val = None
299 if self.tok == '#':
300 if self.src[self.cursor] == '#':
301 # Start of doc comment
302 skip_comment = False
303 self.cursor = self.src.find('\n', self.cursor)
304 if not skip_comment:
305 self.val = self.src[self.pos:self.cursor]
306 return
307 elif self.tok in '{}:,[]':
308 return
309 elif self.tok == "'":
310 # Note: we accept only printable ASCII
311 string = ''
312 esc = False
313 while True:
314 ch = self.src[self.cursor]
315 self.cursor += 1
316 if ch == '\n':
317 raise QAPIParseError(self, "missing terminating \"'\"")
318 if esc:
319 # Note: we recognize only \\ because we have
320 # no use for funny characters in strings
321 if ch != '\\':
322 raise QAPIParseError(self,
323 "unknown escape \\%s" % ch)
324 esc = False
325 elif ch == '\\':
326 esc = True
327 continue
328 elif ch == "'":
329 self.val = string
330 return
331 if ord(ch) < 32 or ord(ch) >= 127:
332 raise QAPIParseError(
333 self, "funny character in string")
334 string += ch
335 elif self.src.startswith('true', self.pos):
336 self.val = True
337 self.cursor += 3
338 return
339 elif self.src.startswith('false', self.pos):
340 self.val = False
341 self.cursor += 4
342 return
343 elif self.tok == '\n':
344 if self.cursor == len(self.src):
345 self.tok = None
346 return
347 self.info = self.info.next_line()
348 self.line_pos = self.cursor
349 elif not self.tok.isspace():
350 # Show up to next structural, whitespace or quote
351 # character
352 match = must_match('[^[\\]{}:,\\s\']+',
353 self.src[self.cursor-1:])
354 raise QAPIParseError(self, "stray '%s'" % match.group(0))
356 def get_members(self) -> Dict[str, object]:
357 expr: Dict[str, object] = OrderedDict()
358 if self.tok == '}':
359 self.accept()
360 return expr
361 if self.tok != "'":
362 raise QAPIParseError(self, "expected string or '}'")
363 while True:
364 key = self.val
365 assert isinstance(key, str) # Guaranteed by tok == "'"
367 self.accept()
368 if self.tok != ':':
369 raise QAPIParseError(self, "expected ':'")
370 self.accept()
371 if key in expr:
372 raise QAPIParseError(self, "duplicate key '%s'" % key)
373 expr[key] = self.get_expr()
374 if self.tok == '}':
375 self.accept()
376 return expr
377 if self.tok != ',':
378 raise QAPIParseError(self, "expected ',' or '}'")
379 self.accept()
380 if self.tok != "'":
381 raise QAPIParseError(self, "expected string")
383 def get_values(self) -> List[object]:
384 expr: List[object] = []
385 if self.tok == ']':
386 self.accept()
387 return expr
388 if self.tok not in tuple("{['tf"):
389 raise QAPIParseError(
390 self, "expected '{', '[', ']', string, or boolean")
391 while True:
392 expr.append(self.get_expr())
393 if self.tok == ']':
394 self.accept()
395 return expr
396 if self.tok != ',':
397 raise QAPIParseError(self, "expected ',' or ']'")
398 self.accept()
400 def get_expr(self) -> _ExprValue:
401 expr: _ExprValue
402 if self.tok == '{':
403 self.accept()
404 expr = self.get_members()
405 elif self.tok == '[':
406 self.accept()
407 expr = self.get_values()
408 elif self.tok in tuple("'tf"):
409 assert isinstance(self.val, (str, bool))
410 expr = self.val
411 self.accept()
412 else:
413 raise QAPIParseError(
414 self, "expected '{', '[', string, or boolean")
415 return expr
417 def get_doc(self, info: QAPISourceInfo) -> List['QAPIDoc']:
418 if self.val != '##':
419 raise QAPIParseError(
420 self, "junk after '##' at start of documentation comment")
422 docs = []
423 cur_doc = QAPIDoc(self, info)
424 self.accept(False)
425 while self.tok == '#':
426 assert isinstance(self.val, str)
427 if self.val.startswith('##'):
428 # End of doc comment
429 if self.val != '##':
430 raise QAPIParseError(
431 self,
432 "junk after '##' at end of documentation comment")
433 cur_doc.end_comment()
434 docs.append(cur_doc)
435 self.accept()
436 return docs
437 if self.val.startswith('# ='):
438 if cur_doc.symbol:
439 raise QAPIParseError(
440 self,
441 "unexpected '=' markup in definition documentation")
442 if cur_doc.body.text:
443 cur_doc.end_comment()
444 docs.append(cur_doc)
445 cur_doc = QAPIDoc(self, info)
446 cur_doc.append(self.val)
447 self.accept(False)
449 raise QAPIParseError(self, "documentation comment must end with '##'")
452 class QAPIDoc:
454 A documentation comment block, either definition or free-form
456 Definition documentation blocks consist of
458 * a body section: one line naming the definition, followed by an
459 overview (any number of lines)
461 * argument sections: a description of each argument (for commands
462 and events) or member (for structs, unions and alternates)
464 * features sections: a description of each feature flag
466 * additional (non-argument) sections, possibly tagged
468 Free-form documentation blocks consist only of a body section.
471 class Section:
472 # pylint: disable=too-few-public-methods
473 def __init__(self, parser: QAPISchemaParser,
474 tag: Optional[str] = None):
475 # section source info, i.e. where it begins
476 self.info = parser.info
477 # parser, for error messages about indentation
478 self._parser = parser
479 # section tag, if any ('Returns', '@name', ...)
480 self.tag = tag
481 # section text without tag
482 self.text = ''
483 # indentation to strip (None means indeterminate)
484 self._indent = None if self.tag else 0
486 def append(self, line: str) -> None:
487 line = line.rstrip()
489 if line:
490 indent = must_match(r'\s*', line).end()
491 if self._indent is None:
492 # indeterminate indentation
493 if self.text != '':
494 # non-blank, non-first line determines indentation
495 self._indent = indent
496 elif indent < self._indent:
497 raise QAPIParseError(
498 self._parser,
499 "unexpected de-indent (expected at least %d spaces)" %
500 self._indent)
501 line = line[self._indent:]
503 self.text += line + '\n'
505 class ArgSection(Section):
506 def __init__(self, parser: QAPISchemaParser,
507 tag: str):
508 super().__init__(parser, tag)
509 self.member: Optional['QAPISchemaMember'] = None
511 def connect(self, member: 'QAPISchemaMember') -> None:
512 self.member = member
514 class NullSection(Section):
516 Immutable dummy section for use at the end of a doc block.
518 # pylint: disable=too-few-public-methods
519 def append(self, line: str) -> None:
520 assert False, "Text appended after end_comment() called."
522 def __init__(self, parser: QAPISchemaParser, info: QAPISourceInfo):
523 # self._parser is used to report errors with QAPIParseError. The
524 # resulting error position depends on the state of the parser.
525 # It happens to be the beginning of the comment. More or less
526 # servicable, but action at a distance.
527 self._parser = parser
528 self.info = info
529 self.symbol: Optional[str] = None
530 self.body = QAPIDoc.Section(parser)
531 # dicts mapping parameter/feature names to their ArgSection
532 self.args: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
533 self.features: Dict[str, QAPIDoc.ArgSection] = OrderedDict()
534 self.sections: List[QAPIDoc.Section] = []
535 # the current section
536 self._section = self.body
537 self._append_line = self._append_body_line
539 def has_section(self, tag: str) -> bool:
540 """Return True if we have a section with this tag."""
541 for i in self.sections:
542 if i.tag == tag:
543 return True
544 return False
546 def append(self, line: str) -> None:
548 Parse a comment line and add it to the documentation.
550 The way that the line is dealt with depends on which part of
551 the documentation we're parsing right now:
552 * The body section: ._append_line is ._append_body_line
553 * An argument section: ._append_line is ._append_args_line
554 * A features section: ._append_line is ._append_features_line
555 * An additional section: ._append_line is ._append_various_line
557 line = line[1:]
558 if not line:
559 self._append_freeform(line)
560 return
562 if line[0] != ' ':
563 raise QAPIParseError(self._parser, "missing space after #")
564 line = line[1:]
565 self._append_line(line)
567 def end_comment(self) -> None:
568 self._switch_section(QAPIDoc.NullSection(self._parser))
570 @staticmethod
571 def _match_at_name_colon(string: str) -> Optional[Match[str]]:
572 return re.match(r'@([^:]*): *', string)
574 @staticmethod
575 def _match_section_tag(string: str) -> Optional[Match[str]]:
576 return re.match(r'(Returns|Since|Notes?|Examples?|TODO): *', string)
578 def _append_body_line(self, line: str) -> None:
580 Process a line of documentation text in the body section.
582 If this a symbol line and it is the section's first line, this
583 is a definition documentation block for that symbol.
585 If it's a definition documentation block, another symbol line
586 begins the argument section for the argument named by it, and
587 a section tag begins an additional section. Start that
588 section and append the line to it.
590 Else, append the line to the current section.
592 # FIXME not nice: things like '# @foo:' and '# @foo: ' aren't
593 # recognized, and get silently treated as ordinary text
594 if not self.symbol and not self.body.text and line.startswith('@'):
595 if not line.endswith(':'):
596 raise QAPIParseError(self._parser, "line should end with ':'")
597 self.symbol = line[1:-1]
598 # Invalid names are not checked here, but the name provided MUST
599 # match the following definition, which *is* validated in expr.py.
600 if not self.symbol:
601 raise QAPIParseError(
602 self._parser, "name required after '@'")
603 elif self.symbol:
604 # This is a definition documentation block
605 if self._match_at_name_colon(line):
606 self._append_line = self._append_args_line
607 self._append_args_line(line)
608 elif line == 'Features:':
609 self._append_line = self._append_features_line
610 elif self._match_section_tag(line):
611 self._append_line = self._append_various_line
612 self._append_various_line(line)
613 else:
614 self._append_freeform(line)
615 else:
616 # This is a free-form documentation block
617 self._append_freeform(line)
619 def _append_args_line(self, line: str) -> None:
621 Process a line of documentation text in an argument section.
623 A symbol line begins the next argument section, a section tag
624 section or a non-indented line after a blank line begins an
625 additional section. Start that section and append the line to
628 Else, append the line to the current section.
631 match = self._match_at_name_colon(line)
632 if match:
633 line = line[match.end():]
634 self._start_args_section(match.group(1))
635 elif self._match_section_tag(line):
636 self._append_line = self._append_various_line
637 self._append_various_line(line)
638 return
639 elif (self._section.text.endswith('\n\n')
640 and line and not line[0].isspace()):
641 if line == 'Features:':
642 self._append_line = self._append_features_line
643 else:
644 self._start_section()
645 self._append_line = self._append_various_line
646 self._append_various_line(line)
647 return
649 self._append_freeform(line)
651 def _append_features_line(self, line: str) -> None:
652 match = self._match_at_name_colon(line)
653 if match:
654 line = line[match.end():]
655 self._start_features_section(match.group(1))
656 elif self._match_section_tag(line):
657 self._append_line = self._append_various_line
658 self._append_various_line(line)
659 return
660 elif (self._section.text.endswith('\n\n')
661 and line and not line[0].isspace()):
662 self._start_section()
663 self._append_line = self._append_various_line
664 self._append_various_line(line)
665 return
667 self._append_freeform(line)
669 def _append_various_line(self, line: str) -> None:
671 Process a line of documentation text in an additional section.
673 A symbol line is an error.
675 A section tag begins an additional section. Start that
676 section and append the line to it.
678 Else, append the line to the current section.
680 match = self._match_at_name_colon(line)
681 if match:
682 raise QAPIParseError(self._parser,
683 "description of '@%s:' follows a section"
684 % match.group(1))
685 match = self._match_section_tag(line)
686 if match:
687 line = line[match.end():]
688 self._start_section(match.group(1))
690 self._append_freeform(line)
692 def _start_symbol_section(
693 self,
694 symbols_dict: Dict[str, 'QAPIDoc.ArgSection'],
695 name: str) -> None:
696 # FIXME invalid names other than the empty string aren't flagged
697 if not name:
698 raise QAPIParseError(self._parser, "invalid parameter name")
699 if name in symbols_dict:
700 raise QAPIParseError(self._parser,
701 "'%s' parameter name duplicated" % name)
702 assert not self.sections
703 new_section = QAPIDoc.ArgSection(self._parser, '@' + name)
704 self._switch_section(new_section)
705 symbols_dict[name] = new_section
707 def _start_args_section(self, name: str) -> None:
708 self._start_symbol_section(self.args, name)
710 def _start_features_section(self, name: str) -> None:
711 self._start_symbol_section(self.features, name)
713 def _start_section(self, tag: Optional[str] = None) -> None:
714 if tag in ('Returns', 'Since') and self.has_section(tag):
715 raise QAPIParseError(self._parser,
716 "duplicated '%s' section" % tag)
717 new_section = QAPIDoc.Section(self._parser, tag)
718 self._switch_section(new_section)
719 self.sections.append(new_section)
721 def _switch_section(self, new_section: 'QAPIDoc.Section') -> None:
722 text = self._section.text = self._section.text.strip('\n')
724 # Only the 'body' section is allowed to have an empty body.
725 # All other sections, including anonymous ones, must have text.
726 if self._section != self.body and not text:
727 # We do not create anonymous sections unless there is
728 # something to put in them; this is a parser bug.
729 assert self._section.tag
730 raise QAPISemError(
731 self._section.info,
732 "text required after '%s:'" % self._section.tag)
734 self._section = new_section
736 def _append_freeform(self, line: str) -> None:
737 match = re.match(r'(@\S+:)', line)
738 if match:
739 raise QAPIParseError(self._parser,
740 "'%s' not allowed in free-form documentation"
741 % match.group(1))
742 self._section.append(line)
744 def connect_member(self, member: 'QAPISchemaMember') -> None:
745 if member.name not in self.args:
746 if self.symbol not in member.info.pragma.documentation_exceptions:
747 raise QAPISemError(member.info,
748 "%s '%s' lacks documentation"
749 % (member.role, member.name))
750 self.args[member.name] = QAPIDoc.ArgSection(self._parser,
751 '@' + member.name)
752 self.args[member.name].connect(member)
754 def connect_feature(self, feature: 'QAPISchemaFeature') -> None:
755 if feature.name not in self.features:
756 raise QAPISemError(feature.info,
757 "feature '%s' lacks documentation"
758 % feature.name)
759 self.features[feature.name].connect(feature)
761 def check_expr(self, expr: QAPIExpression) -> None:
762 if 'command' not in expr:
763 sec = next((sec for sec in self.sections
764 if sec.tag == 'Returns'),
765 None)
766 if sec:
767 raise QAPISemError(sec.info,
768 "'Returns:' is only valid for commands")
770 def check(self) -> None:
772 def check_args_section(
773 args: Dict[str, QAPIDoc.ArgSection], what: str
774 ) -> None:
775 bogus = [name for name, section in args.items()
776 if not section.member]
777 if bogus:
778 raise QAPISemError(
779 args[bogus[0]].info,
780 "documented %s%s '%s' %s not exist" % (
781 what,
782 "s" if len(bogus) > 1 else "",
783 "', '".join(bogus),
784 "do" if len(bogus) > 1 else "does"
787 check_args_section(self.args, 'member')
788 check_args_section(self.features, 'feature')