docs/doxygen/check-source.py

   1 #!/usr/bin/python
   2 #
   3 # This file is part of the GROMACS molecular simulation package.
   4 #
   5 # Copyright (c) 2014,2015, by the GROMACS development team, led by
   6 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7 # and including many others, as listed in the AUTHORS file in the
   8 # top-level source directory and at http://www.gromacs.org.
   9 #
  10 # GROMACS is free software; you can redistribute it and/or
  11 # modify it under the terms of the GNU Lesser General Public License
  12 # as published by the Free Software Foundation; either version 2.1
  13 # of the License, or (at your option) any later version.
  14 #
  15 # GROMACS is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 # Lesser General Public License for more details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public
  21 # License along with GROMACS; if not, see
  22 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24 #
  25 # If you want to redistribute modifications to GROMACS, please
  26 # consider that scientific software is very special. Version
  27 # control is crucial - bugs must be traceable. We will be happy to
  28 # consider code for inclusion in the official distribution, but
  29 # derived work must not be called official GROMACS. Details are found
  30 # in the README & COPYING files - if they are missing, get the
  31 # official version at http://www.gromacs.org.
  32 #
  33 # To help us fund GROMACS development, we humbly ask that you cite
  34 # the research papers on the package. Check out http://www.gromacs.org.
  35
  36 """Check source code and Doxygen documentation for issues
  37
  38 This script checks for some issues in the Doxygen documentation, as well as
  39 general issues in the source code, mainly using Doxygen XML output and #include
  40 dependencies parsed from source files.  Part of the checks are generic, like
  41 checking that all documented entities have brief descriptions.  Other are
  42 specific to GROMACS, like checking that only installed headers contribute to
  43 the public API documentation.
  44
  45 The checks should be self-evident from the source code of the script
  46 (they are also described in docs/dev-manual/gmxtree.rst).
  47 All the logic of parsing the Doxygen XML output and creating a GROMACS-specific
  48 representation of the source tree is separated into separate Python modules
  49 (doxygenxml.py and gmxtree.py, respectively).  Similarly, logic for handling
  50 the output messages is in reporter.py.   This leaves only the actual checks and
  51 the script command-line interface in this file.
  52
  53 The script can be run using the 'check-source' target generated by CMake.
  54 This target takes care of generating all the necessary input files and passing
  55 them to the script.
  56 """
  57
  58 import sys
  59 from optparse import OptionParser
  60
  61 import gmxtree
  62 from gmxtree import GromacsTree, DocType
  63 from includesorter import IncludeSorter
  64 from reporter import Reporter
  65
  66 def check_file(fileobj, tree, reporter):
  67     """Check file-level issues."""
  68     if not fileobj.is_external() and fileobj.get_relpath().startswith('src/'):
  69         includes = fileobj.get_includes()
  70         if fileobj.is_source_file():
  71             if includes:
  72                 firstinclude = includes[0].get_file()
  73                 if not firstinclude or firstinclude.get_name() != "gmxpre.h":
  74                     reporter.code_issue(includes[0],
  75                                         "does not include \"gmxpre.h\" first")
  76             else:
  77                 reporter.code_issue(fileobj, "does not include \"gmxpre.h\"")
  78         used_define_files = fileobj.get_used_define_files()
  79         for define_file in tree.get_checked_define_files():
  80             includes_file = False
  81             for include in includes:
  82                 if include.get_file() == define_file:
  83                     includes_file = True
  84                     break
  85             if includes_file:
  86                 if not define_file in used_define_files:
  87                     reporter.code_issue(fileobj,
  88                             "includes \"{0}\" unnecessarily".format(define_file.get_name()))
  89             else:
  90                 if define_file in used_define_files:
  91                     reporter.code_issue(fileobj,
  92                             "should include \"{0}\"".format(define_file.get_name()))
  93
  94     if not fileobj.is_documented():
  95         # TODO: Add rules for required documentation
  96         return
  97
  98     if fileobj.is_source_file():
  99         # TODO: Add rule to exclude examples from this check
 100         if fileobj.is_installed():
 101             reporter.file_error(fileobj, "source file is installed")
 102         if fileobj.get_doc_type() != DocType.internal:
 103             reporter.file_error(fileobj,
 104                     "source file documentation appears outside full documentation")
 105         elif fileobj.get_api_type() != DocType.internal:
 106             reporter.file_error(fileobj, "source file marked as non-internal")
 107     elif fileobj.is_test_file() and fileobj.is_installed():
 108         reporter.file_error(fileobj, "test file is installed")
 109     elif fileobj.is_installed():
 110         if fileobj.get_doc_type() != DocType.public:
 111             reporter.file_error(fileobj,
 112                     "public header has non-public documentation")
 113     elif fileobj.get_doc_type() == DocType.public:
 114         reporter.file_error(fileobj,
 115                 "non-installed header has public documentation")
 116     elif fileobj.get_api_type() == DocType.public:
 117         reporter.file_error(fileobj,
 118                 "non-installed header specified as part of public API")
 119     elif fileobj.get_doc_type() < fileobj.get_api_type():
 120         reporter.file_error(fileobj,
 121                 "API type ({0}) conflicts with documentation visibility ({1})"
 122                 .format(fileobj.get_api_type(), fileobj.get_doc_type()))
 123
 124     if not fileobj.has_brief_description():
 125         reporter.file_error(fileobj,
 126                 "is documented, but does not have brief description")
 127
 128     expectedmod = fileobj.get_expected_module()
 129     if expectedmod:
 130         docmodules = fileobj.get_doc_modules()
 131         if docmodules:
 132             for module in docmodules:
 133                 if module != expectedmod:
 134                     reporter.file_error(fileobj,
 135                             "is documented in incorrect module: {0}"
 136                             .format(module.get_name()))
 137         elif expectedmod.is_documented():
 138             reporter.file_error(fileobj,
 139                     "is not documented in any module, but {0} exists"
 140                     .format(expectedmod.get_name()))
 141
 142 def check_include(fileobj, includedfile, reporter):
 143     """Check an #include directive."""
 144     otherfile = includedfile.get_file()
 145     if includedfile.is_system():
 146         if not otherfile:
 147             return
 148         reporter.code_issue(includedfile,
 149                 "includes local file as {0}".format(includedfile))
 150     if not otherfile:
 151         reporter.code_issue(includedfile,
 152                 "includes non-local file as {0}".format(includedfile))
 153     if not otherfile:
 154         return
 155     if fileobj.is_installed() and not otherfile.is_installed():
 156         reporter.code_issue(includedfile,
 157                 "installed header includes non-installed {0}"
 158                 .format(includedfile))
 159     filemodule = fileobj.get_module()
 160     othermodule = otherfile.get_module()
 161     if fileobj.is_documented() and otherfile.is_documented():
 162         filetype = fileobj.get_doc_type()
 163         othertype = otherfile.get_doc_type()
 164         if filetype > othertype:
 165             reporter.code_issue(includedfile,
 166                     "{0} file includes {1} file {2}"
 167                     .format(filetype, othertype, includedfile))
 168     check_api = (otherfile.api_type_is_reliable() and filemodule != othermodule)
 169     if check_api and otherfile.get_api_type() < DocType.library:
 170         reporter.code_issue(includedfile,
 171                 "included file {0} is not documented as exposed outside its module"
 172                 .format(includedfile))
 173
 174 def check_entity(entity, reporter):
 175     """Check documentation for a code construct."""
 176     if entity.is_documented():
 177         if not entity.has_brief_description():
 178             reporter.doc_error(entity,
 179                     "is documented, but does not have brief description")
 180
 181 def check_class(classobj, reporter):
 182     """Check documentation for a class/struct/union."""
 183     check_entity(classobj, reporter)
 184     if classobj.is_documented():
 185         classtype = classobj.get_doc_type()
 186         filetype = classobj.get_file_doc_type()
 187         if classtype == DocType.public and not classobj.is_in_installed_file():
 188             reporter.doc_error(classobj,
 189                     "has public documentation, but is not in installed header")
 190         elif filetype is not DocType.none and classtype > filetype:
 191             reporter.doc_error(classobj,
 192                     "is in {0} file(s), but appears in {1} documentation"
 193                     .format(filetype, classtype))
 194
 195 def check_member(member, reporter, check_ignored):
 196     """Check documentation for a generic member."""
 197     check_entity(member, reporter)
 198     if member.is_documented():
 199         if check_ignored and not member.is_visible():
 200             reporter.doc_note(member,
 201                     "is documented, but is ignored by Doxygen, because its scope is not documented")
 202         if member.has_inbody_description():
 203             reporter.doc_note(member, "has in-body comments, which are ignored")
 204
 205 def check_cycles(graph, reporter):
 206     """Check cyclic dependencies in a dependency graph.
 207
 208     The graph parameter provides the graph to check.  It should be an object
 209     that has three methods:
 210       iternodes():
 211         Return the list of nodes in the graph.
 212       iteredges(node):
 213         Return the list of edges from a given node.
 214         The list should contain (node, edge) pairs, where node is an object
 215         returned by iternodes() and edge is any object.
 216       report_cycle(cycle, reporter):
 217         Process a found cycle. cycle contains a list of (node, edge) pairs
 218         that describe the cycle.  edge is the edge object that leads _to_
 219         the node in the cycle.
 220
 221     This is implemented using an extended DFS-based strongly connected
 222     component (SCC) search, written using a stack instead of recursion.
 223     The base algorithm is Tarjan's SCC search:
 224       http://en.wikipedia.org/wiki/Tarjan's_strongly_connected_components_algorithm
 225
 226     Each back edge that is encountered during the search is reported as a
 227     cycle.  Additionally, if a cross edge is encountered that is within the
 228     current SCC, the target node and all its children in the current SCC will
 229     be visited again to find all cycles.  All steps except cycle detection are
 230     omitted for such re-traversal.
 231
 232     To avoid duplicates from cycles that do not include all nodes in an SCC,
 233     a cycle is only reported if the target of the back edge is still active
 234     in the search, i.e., all edges from it have not yet been traversed.
 235     """
 236     # The DFS stack; next node is always popped from the end.
 237     # Stores (node, edge) pairs.
 238     # edge is None for start nodes and for post-order processing.
 239     dfsstack = []
 240     for node in graph.iternodes():
 241         dfsstack.append((node, None))
 242     # Stack of visited nodes that have not yet been assigned to a strongly
 243     # connected component.
 244     visitstack = []
 245     # List of nodes in the DFS recursion stack.
 246     currlist = []
 247     # Set of nodes in currlist for more efficient searching.
 248     currset = set()
 249     # Counter for initializing preorder.
 250     visit_count = 0
 251     # DFS pre-order for nodes: initialized when a node is first encountered
 252     # in the search.
 253     preorder = dict()
 254     # Lowest pre-order index reachable from this node.
 255     # Initialized to pre-order, and updated during post-order processing.
 256     linkorder = dict()
 257     # Set to True for a node when first encountered, and set to False when
 258     # a strongly connected component has been processed.
 259     in_progress = dict()
 260     # The DFS search
 261     while dfsstack:
 262         currnode, curredge = dfsstack.pop()
 263         # curredge is None if this is a start node or post-order traversal.
 264         # currlist is empty if this is a start node.
 265         if curredge is None and currlist:
 266             # All children visited: post-order processing.
 267             done = currlist.pop()[0]
 268             assert done == currnode
 269             currset.remove(currnode)
 270             # If this is the first time this node is encountered, fill
 271             # linkorder and check for strongly connected components.
 272             if linkorder[currnode] == preorder[currnode]:
 273                 children = [x for x, dummy in graph.iteredges(currnode) if in_progress[x]]
 274                 if children:
 275                     linkorder[currnode] = min([linkorder[x] for x in children])
 276                 if preorder[currnode] <= linkorder[currnode]:
 277                     # This is a root of a strongly connected component.
 278                     while visitstack:
 279                         node = visitstack.pop()
 280                         in_progress[node] = False
 281                         if node == currnode:
 282                             break
 283                     else:
 284                         assert False
 285             continue
 286         if currnode not in preorder:
 287             # First encounter of this node: pre-order processing.
 288             preorder[currnode] = visit_count
 289             linkorder[currnode] = visit_count
 290             visitstack.append(currnode)
 291             visit_count += 1
 292             in_progress[currnode] = True
 293         elif not in_progress[currnode]:
 294             # Do not enter processed components again.
 295             continue
 296         currlist.append((currnode, curredge))
 297         currset.add(currnode)
 298         # add entry for post-order traversal
 299         dfsstack.append((currnode, None))
 300         for nextnode, edge in graph.iteredges(currnode):
 301             if nextnode not in preorder:
 302                 # Not seen previously: push
 303                 dfsstack.append((nextnode, edge))
 304             else:
 305                 # If an already visited node is in the same component, it is
 306                 # either part of a cycle, or we need to traverse it again to
 307                 # find all cycles.
 308                 if in_progress[nextnode]:
 309                     if nextnode not in currset:
 310                         dfsstack.append((nextnode, edge))
 311                     # Only report cycles to nodes that haven't been processed
 312                     # yet to avoid duplicates.
 313                     elif linkorder[nextnode] == preorder[nextnode]:
 314                         for index in xrange(len(currlist)):
 315                             if currlist[index][0] == nextnode:
 316                                 cycle = [(nextnode, edge)]
 317                                 cycle.extend(currlist[index+1:])
 318                                 graph.report_cycle(cycle, reporter)
 319                                 break
 320                         else:
 321                             assert False
 322
 323 class ModuleDependencyGraph(object):
 324
 325     """Module dependency graph representation for check_cycles().
 326
 327     In the reported graph, the nodes are gmxtree.Module objects and the edges
 328     are gmxtree.ModuleDependency objects.
 329     """
 330
 331     def __init__(self, tree):
 332         self._tree = tree
 333
 334     def iternodes(self):
 335         return self._tree.get_modules()
 336
 337     def iteredges(self, module):
 338         for dependency in module.get_dependencies():
 339             if not dependency.is_test_only_dependency():
 340                 yield (dependency.get_other_module(), dependency)
 341
 342     def report_cycle(self, cycle, reporter):
 343         if any([x[1].is_cycle_suppressed() for x in cycle]):
 344             # TODO: Report unused suppressions.
 345             return
 346         modulelist = ' -> '.join([x[0].get_name()[7:] for x in cycle])
 347         summary = 'module-level cyclic dependency: ' + modulelist
 348         reporter.cyclic_issue(summary)
 349
 350 def check_all(tree, reporter, check_ignored):
 351     """Do all checks for the GROMACS tree."""
 352     includesorter = IncludeSorter()
 353     for fileobj in tree.get_files():
 354         if isinstance(fileobj, gmxtree.GeneratorSourceFile):
 355             continue
 356         check_file(fileobj, tree, reporter)
 357         for includedfile in fileobj.get_includes():
 358             check_include(fileobj, includedfile, reporter)
 359         if fileobj.should_includes_be_sorted():
 360             is_sorted, details = includesorter.check_sorted(fileobj)
 361             if not is_sorted:
 362                 details.append("You can use includesorter.py to do the sorting automatically; see docs/dev-manual/gmxtree.rst")
 363                 reporter.code_issue(fileobj,
 364                         "include style/order is not consistent; see docs/dev-manual/includestyle.rst", details)
 365
 366     for classobj in tree.get_classes():
 367         check_class(classobj, reporter)
 368
 369     for memberobj in tree.get_members():
 370         check_member(memberobj, reporter, check_ignored)
 371
 372     check_cycles(ModuleDependencyGraph(tree), reporter)
 373     tree.report_unused_cycle_suppressions(reporter)
 374
 375 def main():
 376     """Run the checking script."""
 377     parser = OptionParser()
 378     parser.add_option('-S', '--source-root',
 379                       help='Source tree root directory')
 380     parser.add_option('-B', '--build-root',
 381                       help='Build tree root directory')
 382     parser.add_option('-l', '--log',
 383                       help='Write issues into a given log file in addition to stderr')
 384     parser.add_option('--ignore',
 385                       help='Set file with patterns for messages to ignore')
 386     parser.add_option('--ignore-cycles',
 387                       help='Set file with module dependencies to ignore in cycles')
 388     parser.add_option('--check-ignored', action='store_true',
 389                       help='Issue notes for comments ignored by Doxygen')
 390     parser.add_option('-q', '--quiet', action='store_true',
 391                       help='Do not write status messages')
 392     parser.add_option('--exitcode', action='store_true',
 393                       help='Return non-zero exit code if there are warnings')
 394     options, args = parser.parse_args()
 395
 396     reporter = Reporter(options.log)
 397     if options.ignore:
 398         reporter.load_filters(options.ignore)
 399
 400     if not options.quiet:
 401         sys.stderr.write('Scanning source tree...\n')
 402     tree = GromacsTree(options.source_root, options.build_root, reporter)
 403     tree.load_git_attributes()
 404     tree.load_installed_file_list()
 405     if not options.quiet:
 406         sys.stderr.write('Reading source files...\n')
 407     # TODO: The checking should be possible without storing everything in memory
 408     tree.scan_files(keep_contents=True)
 409     if not options.quiet:
 410         sys.stderr.write('Finding config.h and other preprocessor macro uses...\n')
 411     tree.find_define_file_uses()
 412     if options.ignore_cycles:
 413         tree.load_cycle_suppression_list(options.ignore_cycles)
 414     if not options.quiet:
 415         sys.stderr.write('Reading Doxygen XML files...\n')
 416     tree.load_xml()
 417
 418     reporter.write_pending()
 419
 420     if not options.quiet:
 421         sys.stderr.write('Checking...\n')
 422
 423     check_all(tree, reporter, options.check_ignored)
 424
 425     reporter.write_pending()
 426     reporter.report_unused_filters()
 427     reporter.close_log()
 428
 429     if options.exitcode and reporter.had_warnings():
 430         sys.exit(1)
 431
 432 main()