cachegrind/cg_annotate.in

   1 #! /usr/bin/env python3
   2 # pyright: strict
   3
   4 # --------------------------------------------------------------------
   5 # --- Cachegrind's annotator.                       cg_annotate.in ---
   6 # --------------------------------------------------------------------
   7
   8 # This file is part of Cachegrind, a high-precision tracing profiler
   9 # built with Valgrind.
  10 #
  11 # Copyright (C) 2002-2023 Nicholas Nethercote
  12 #    njn@valgrind.org
  13 #
  14 # This program is free software; you can redistribute it and/or
  15 # modify it under the terms of the GNU General Public License as
  16 # published by the Free Software Foundation; either version 2 of the
  17 # License, or (at your option) any later version.
  18 #
  19 # This program is distributed in the hope that it will be useful, but
  20 # WITHOUT ANY WARRANTY; without even the implied warranty of
  21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22 # General Public License for more details.
  23 #
  24 # You should have received a copy of the GNU General Public License
  25 # along with this program; if not, see <http://www.gnu.org/licenses/>.
  26 #
  27 # The GNU General Public License is contained in the file COPYING.
  28
  29 # This script reads Cachegrind output files and produces human-readable output.
  30 #
  31 # Use `make pyann` to "build" this script with `auxprogs/pybuild.rs` every time
  32 # it is changed. This runs the formatters, type-checkers, and linters on
  33 # `cg_annotate.in` and then generates `cg_annotate`.
  34
  35 from __future__ import annotations
  36
  37 import filecmp
  38 import os
  39 import re
  40 import sys
  41 from argparse import ArgumentParser, BooleanOptionalAction, Namespace
  42 from collections import defaultdict
  43 from typing import Callable, DefaultDict, NoReturn, TextIO
  44
  45
  46 def die(msg: str) -> NoReturn:
  47     print("cg_annotate: error:", msg, file=sys.stderr)
  48     sys.exit(1)
  49
  50
  51 SearchAndReplace = Callable[[str], str]
  52
  53
  54 # A typed wrapper for parsed args.
  55 class Args(Namespace):
  56     # None of these fields are modified after arg parsing finishes.
  57     diff: bool
  58     mod_filename: SearchAndReplace
  59     mod_funcname: SearchAndReplace
  60     show: list[str]
  61     sort: list[str]
  62     threshold: float  # a percentage
  63     show_percs: bool
  64     annotate: bool
  65     context: int
  66     cgout_filename: list[str]
  67
  68     @staticmethod
  69     def parse() -> Args:
  70         # We support Perl-style `s/old/new/flags` search-and-replace
  71         # expressions, because that's how this option was implemented in the
  72         # old Perl version of `cg_diff`. This requires conversion from
  73         # `s/old/new/` style to `re.sub`. The conversion isn't a perfect
  74         # emulation of Perl regexps (e.g. Python uses `\1` rather than `$1` for
  75         # using captures in the `new` part), but it should be close enough. The
  76         # only supported flags are `g` (global) and `i` (ignore case).
  77         def search_and_replace(regex: str | None) -> SearchAndReplace:
  78             if regex is None:
  79                 return lambda s: s
  80
  81             # Extract the parts of an `s/old/new/tail` regex. `(?<!\\)/` is an
  82             # example of negative lookbehind. It means "match a forward slash
  83             # unless preceded by a backslash".
  84             m = re.match(r"s/(.*)(?<!\\)/(.*)(?<!\\)/(g|i|gi|ig|)$", regex)
  85             if m is None:
  86                 raise ValueError
  87
  88             # Forward slashes must be escaped in an `s/old/new/` expression,
  89             # but we then must unescape them before using them with `re.sub`.
  90             pat = m.group(1).replace(r"\/", r"/")
  91             repl = m.group(2).replace(r"\/", r"/")
  92             tail = m.group(3)
  93
  94             if "g" in tail:
  95                 count = 0  # unlimited
  96             else:
  97                 count = 1
  98
  99             if "i" in tail:
 100                 flags = re.IGNORECASE
 101             else:
 102                 flags = re.RegexFlag(0)
 103
 104             return lambda s: re.sub(re.compile(pat, flags=flags), repl, s, count=count)
 105
 106         def comma_separated_list(values: str) -> list[str]:
 107             return values.split(",")
 108
 109         def threshold(n: str) -> float:
 110             f = float(n)
 111             if 0 <= f <= 20:
 112                 return f
 113             raise ValueError
 114
 115         # Add a bool argument that defaults to true.
 116         #
 117         # Supports these forms: `--foo`, `--no-foo`, `--foo=yes`, `--foo=no`.
 118         # The latter two were the forms supported by the old Perl version of
 119         # `cg_annotate`, and are now deprecated.
 120         def add_bool_argument(
 121             p: ArgumentParser, new_name: str, old_name: str, help_: str
 122         ) -> None:
 123             new_flag = "--" + new_name
 124             old_flag = "--" + old_name
 125             dest = new_name.replace("-", "_")
 126
 127             # Note: the default value is always printed with `BooleanOptionalAction`,
 128             # due to an argparse bug: https://github.com/python/cpython/issues/83137.
 129             p.add_argument(
 130                 new_flag,
 131                 default=True,
 132                 action=BooleanOptionalAction,
 133                 help=help_,
 134             )
 135             p.add_argument(
 136                 f"{old_flag}=yes",
 137                 dest=dest,
 138                 action="store_true",
 139                 help=f"(deprecated) same as --{new_name}",
 140             )
 141             p.add_argument(
 142                 f"{old_flag}=no",
 143                 dest=dest,
 144                 action="store_false",
 145                 help=f"(deprecated) same as --no-{new_name}",
 146             )
 147
 148         p = ArgumentParser(description="Process one or more Cachegrind output files.")
 149
 150         p.add_argument("--version", action="version", version="%(prog)s-@VERSION@")
 151         p.add_argument(
 152             "--diff",
 153             default=False,
 154             action="store_true",
 155             help="perform a diff between two Cachegrind output files",
 156         )
 157         p.add_argument(
 158             "--mod-filename",
 159             type=search_and_replace,
 160             metavar="REGEX",
 161             default=search_and_replace(None),
 162             help="a search-and-replace regex applied to filenames, e.g. "
 163             "`s/prog[0-9]/progN/`",
 164         )
 165         p.add_argument(
 166             "--mod-funcname",
 167             type=search_and_replace,
 168             metavar="REGEX",
 169             default=search_and_replace(None),
 170             help="like --mod-filename, but for function names",
 171         )
 172         p.add_argument(
 173             "--show",
 174             type=comma_separated_list,
 175             metavar="A,B,C",
 176             help="only show figures for events A,B,C (default: all events)",
 177         )
 178         p.add_argument(
 179             "--sort",
 180             type=comma_separated_list,
 181             metavar="A,B,C",
 182             help="sort functions by events A,B,C (default: event column order)",
 183         )
 184         p.add_argument(
 185             "--threshold",
 186             type=threshold,
 187             default=0.1,
 188             metavar="N:[0,20]",
 189             help="only show file:function/function:file pairs with more than "
 190             "N%% of primary sort event counts (default: %(default)s)",
 191         )
 192         add_bool_argument(
 193             p,
 194             "show-percs",
 195             "show-percs",
 196             "show a percentage for each non-zero count",
 197         )
 198         add_bool_argument(
 199             p,
 200             "annotate",
 201             "auto",
 202             "annotate all source files containing functions that reached the "
 203             "event count threshold",
 204         )
 205         p.add_argument(
 206             "--context",
 207             type=int,
 208             default=8,
 209             metavar="N",
 210             help="print N lines of context before and after annotated lines "
 211             "(default: %(default)s)",
 212         )
 213         p.add_argument(
 214             "cgout_filename",
 215             nargs="+",
 216             metavar="cachegrind-out-file",
 217             help="file produced by Cachegrind",
 218         )
 219
 220         # `args0` name used to avoid shadowing the global `args`, which pylint
 221         # doesn't like.
 222         args0 = p.parse_args(namespace=Args())
 223         if args0.diff and len(args0.cgout_filename) != 2:
 224             p.print_usage(file=sys.stderr)
 225             die("argument --diff: requires exactly two Cachegrind output files")
 226
 227         return args0  # type: ignore [return-value]
 228
 229
 230 # Args are stored in a global for easy access.
 231 args = Args.parse()
 232
 233
 234 # A single instance of this class is constructed, from `args` and the `events:`
 235 # line in the cgout file.
 236 class Events:
 237     # The event names.
 238     events: list[str]
 239
 240     # Equal to `len(self.events)`.
 241     num_events: int
 242
 243     # The order in which we must traverse events for --show. Can be shorter
 244     # than `events`.
 245     show_events: list[str]
 246
 247     # Like `show_events`, but indices into `events`, rather than names.
 248     show_indices: list[int]
 249
 250     # The order in which we must traverse events for --sort. Can be shorter
 251     # than `events`.
 252     sort_events: list[str]
 253
 254     # Like `sort_events`, but indices into `events`, rather than names.
 255     sort_indices: list[int]
 256
 257     def __init__(self) -> None:
 258         # All fields are left uninitialized here, and set instead in `init`.
 259         pass
 260
 261     def init(self, text: str) -> None:
 262         self.events = text.split()
 263         self.num_events = len(self.events)
 264
 265         # A temporary dict mapping events to indices, [0, n-1].
 266         event_indices = {event: n for n, event in enumerate(self.events)}
 267
 268         # If --show is given, check it is valid. If --show is not given,
 269         # default to all events in the standard order.
 270         if args.show:
 271             for event in args.show:
 272                 if event not in event_indices:
 273                     die(f"--show event `{event}` did not appear in `events:` line")
 274             self.show_events = args.show
 275         else:
 276             self.show_events = self.events
 277
 278         self.show_indices = [event_indices[event] for event in self.show_events]
 279
 280         # Likewise for --sort.
 281         if args.sort:
 282             for event in args.sort:
 283                 if event not in event_indices:
 284                     die(f"--sort event `{event}` did not appear in `events:` line")
 285             self.sort_events = args.sort
 286         else:
 287             self.sort_events = self.events
 288
 289         self.sort_indices = [event_indices[event] for event in self.sort_events]
 290
 291     # Raises a `ValueError` exception on syntax error.
 292     def mk_cc(self, str_counts: list[str]) -> Cc:
 293         # This is slightly faster than a list comprehension.
 294         counts = list(map(int, str_counts))
 295
 296         if len(counts) == self.num_events:
 297             pass
 298         elif len(counts) < self.num_events:
 299             # Add zeroes at the end for any missing numbers.
 300             counts.extend([0] * (self.num_events - len(counts)))
 301         else:
 302             raise ValueError
 303
 304         return counts
 305
 306     def mk_empty_cc(self) -> Cc:
 307         # This is much faster than a list comprehension.
 308         return [0] * self.num_events
 309
 310     def mk_empty_dcc(self) -> Dcc:
 311         return Dcc(self.mk_empty_cc(), defaultdict(self.mk_empty_cc))
 312
 313
 314 # A "cost centre", which is a dumb container for counts. Always the same length
 315 # as `Events.events`, but it doesn't even know event names. `Events.mk_cc` and
 316 # `Events.mk_empty_cc` are used for construction.
 317 #
 318 # This used to be a class with a single field `counts: list[int]`, but this
 319 # type is very hot and just using a type alias is much faster.
 320 Cc = list[int]
 321
 322
 323 # Add the counts in `a_cc` to `b_cc`.
 324 def add_cc_to_cc(a_cc: Cc, b_cc: Cc) -> None:
 325     for i, a_count in enumerate(a_cc):
 326         b_cc[i] += a_count
 327
 328
 329 # Subtract the counts in `a_cc` from `b_cc`.
 330 def sub_cc_from_cc(a_cc: Cc, b_cc: Cc) -> None:
 331     for i, a_count in enumerate(a_cc):
 332         b_cc[i] -= a_count
 333
 334
 335 # Unrolled version of `add_cc_to_cc`, for speed.
 336 def add_cc_to_ccs(
 337     a_cc: Cc, b_cc1: Cc, b_cc2: Cc, b_cc3: Cc, b_cc4: Cc, b_cc5: Cc, total_cc: Cc
 338 ) -> None:
 339     for i, a_count in enumerate(a_cc):
 340         b_cc1[i] += a_count
 341         b_cc2[i] += a_count
 342         b_cc3[i] += a_count
 343         b_cc4[i] += a_count
 344         b_cc5[i] += a_count
 345         total_cc[i] += a_count
 346
 347
 348 # Unrolled version of `sub_cc_from_cc`, for speed. Note that the last one,
 349 # `total_cc`, is added.
 350 def sub_cc_from_ccs(
 351     a_cc: Cc, b_cc1: Cc, b_cc2: Cc, b_cc3: Cc, b_cc4: Cc, b_cc5: Cc, total_cc: Cc
 352 ) -> None:
 353     for i, a_count in enumerate(a_cc):
 354         b_cc1[i] -= a_count
 355         b_cc2[i] -= a_count
 356         b_cc3[i] -= a_count
 357         b_cc4[i] -= a_count
 358         b_cc5[i] -= a_count
 359         total_cc[i] += a_count
 360
 361
 362 # Update `min_cc` and `max_cc` with `self`.
 363 def update_cc_extremes(self: Cc, min_cc: Cc, max_cc: Cc) -> None:
 364     for i, count in enumerate(self):
 365         if count > max_cc[i]:
 366             max_cc[i] = count
 367         elif count < min_cc[i]:
 368             min_cc[i] = count
 369
 370
 371 # Note: some abbrevations used below:
 372 # - Ofl/ofl: original filename, as mentioned in a cgout file.
 373 # - Ofn/ofn: original function name, as mentioned in a cgout file.
 374 # - Mfl/mfl: modified filename, the result of passing an Ofl through
 375 #   `--mod-filename`.
 376 # - Mfn/mfn: modified function name, the result of passing an Ofn through
 377 #   `--mod-funcname`.
 378 # - Mname/mname: modified name, used for what could be an Mfl or an Mfn.
 379
 380
 381 # A deep cost centre with a dict for the inner mnames and CCs.
 382 class Dcc:
 383     outer_cc: Cc
 384     inner_dict_mname_cc: DictMnameCc
 385
 386     def __init__(self, outer_cc: Cc, inner_dict_mname_cc: DictMnameCc) -> None:
 387         self.outer_cc = outer_cc
 388         self.inner_dict_mname_cc = inner_dict_mname_cc
 389
 390
 391 # A deep cost centre with a list for the inner mnames and CCs. Used during
 392 # filtering and sorting.
 393 class Lcc:
 394     outer_cc: Cc
 395     inner_list_mname_cc: ListMnameCc
 396
 397     def __init__(self, outer_cc: Cc, inner_list_mname_cc: ListMnameCc) -> None:
 398         self.outer_cc = outer_cc
 399         self.inner_list_mname_cc = inner_list_mname_cc
 400
 401
 402 # Per-Mfl/Mfn CCs. The list version is used during filtering and sorting.
 403 DictMnameCc = DefaultDict[str, Cc]
 404 ListMnameCc = list[tuple[str, Cc]]
 405
 406 # Per-Mfl/Mfn DCCs. The outer Mnames are Mfls and the inner Mnames are Mfns, or
 407 # vice versa. The list version is used during filtering and sorting.
 408 DictMnameDcc = DefaultDict[str, Dcc]
 409 ListMnameLcc = list[tuple[str, Lcc]]
 410
 411 # Per-line CCs, organised by Mfl and line number.
 412 DictLineCc = DefaultDict[int, Cc]
 413 DictMflDictLineCc = DefaultDict[str, DictLineCc]
 414
 415 # A dictionary tracking how Ofls get mapped to Mfls by `--mod-filename`. If
 416 # `--mod-filename` isn't used, each entry will be the identity mapping: ("foo"
 417 # -> set(["foo"])).
 418 DictMflOfls = DefaultDict[str, set[str]]
 419
 420
 421 def read_cgout_file(
 422     cgout_filename: str,
 423     is_first_file: bool,
 424     descs: list[str],
 425     cmds: list[str],
 426     events: Events,
 427     dict_mfl_ofls: DictMflOfls,
 428     dict_mfl_dcc: DictMnameDcc,
 429     dict_mfn_dcc: DictMnameDcc,
 430     dict_mfl_dict_line_cc: DictMflDictLineCc,
 431     summary_cc: Cc,
 432 ) -> None:
 433     # The file format is described in Cachegrind's manual.
 434     try:
 435         cgout_file = open(cgout_filename, "r", encoding="utf-8")
 436     except OSError as err:
 437         die(f"{err}")
 438
 439     with cgout_file:
 440         cgout_line_num = 0
 441
 442         def parse_die(msg: str) -> NoReturn:
 443             die(f"{cgout_file.name}:{cgout_line_num}: {msg}")
 444
 445         def readline() -> str:
 446             nonlocal cgout_line_num
 447             cgout_line_num += 1
 448             return cgout_file.readline()
 449
 450         # Read "desc:" lines.
 451         desc = ""
 452         while line := readline():
 453             if m := re.match(r"desc:\s+(.*)", line):
 454                 desc += m.group(1) + "\n"
 455             else:
 456                 break
 457         descs.append(desc)
 458
 459         # Read "cmd:" line. (`line` is already set from the "desc:" loop.)
 460         if m := re.match(r"cmd:\s+(.*)", line):
 461             cmds.append(m.group(1))
 462         else:
 463             parse_die("missing a `command:` line")
 464
 465         # Read "events:" line.
 466         line = readline()
 467         if m := re.match(r"events:\s+(.*)", line):
 468             if is_first_file:
 469                 events.init(m.group(1))
 470             else:
 471                 events2 = Events()
 472                 events2.init(m.group(1))
 473                 if events.events != events2.events:
 474                     die("events in data files don't match")
 475         else:
 476             parse_die("missing an `events:` line")
 477
 478         def mk_empty_dict_line_cc() -> DictLineCc:
 479             return defaultdict(events.mk_empty_cc)
 480
 481         # The current Mfl and Mfn.
 482         mfl = ""
 483         mfn = ""
 484
 485         # These values are passed in by reference and are modified by this
 486         # function. But they can't be properly initialized until the `events:`
 487         # line of the first file is read and the number of events is known. So
 488         # we initialize them in an invalid state in `main`, and then
 489         # reinitialize them properly here, before their first use.
 490         if is_first_file:
 491             dict_mfl_dcc.default_factory = events.mk_empty_dcc
 492             dict_mfn_dcc.default_factory = events.mk_empty_dcc
 493             dict_mfl_dict_line_cc.default_factory = mk_empty_dict_line_cc
 494             summary_cc.extend(events.mk_empty_cc())
 495
 496         # These are refs into the dicts above, used to avoid repeated lookups.
 497         # They are all overwritten before first use.
 498         mfl_dcc = events.mk_empty_dcc()
 499         mfn_dcc = events.mk_empty_dcc()
 500         mfl_dcc_inner_mfn_cc = events.mk_empty_cc()
 501         mfn_dcc_inner_mfl_cc = events.mk_empty_cc()
 502         dict_line_cc = mk_empty_dict_line_cc()
 503         total_cc = events.mk_empty_cc()
 504
 505         # When diffing, we negate the first cgout file's counts to effectively
 506         # achieve `cgout2 - cgout1`.
 507         if args.diff and is_first_file:
 508             combine_cc_with_cc = sub_cc_from_cc
 509             combine_cc_with_ccs = sub_cc_from_ccs
 510         else:
 511             combine_cc_with_cc = add_cc_to_cc
 512             combine_cc_with_ccs = add_cc_to_ccs
 513
 514         summary_cc_present = False
 515
 516         # Line matching is done in order of pattern frequency, for speed.
 517         while line := readline():
 518             if line[0].isdigit():
 519                 split_line = line.split()
 520                 try:
 521                     line_num = int(split_line[0])
 522                     cc = events.mk_cc(split_line[1:])
 523                 except ValueError:
 524                     parse_die("malformed or too many event counts")
 525
 526                 # Record this CC at various levels.
 527                 combine_cc_with_ccs(
 528                     cc,
 529                     mfl_dcc.outer_cc,
 530                     mfn_dcc.outer_cc,
 531                     mfl_dcc_inner_mfn_cc,
 532                     mfn_dcc_inner_mfl_cc,
 533                     dict_line_cc[line_num],
 534                     total_cc,
 535                 )
 536
 537             elif line.startswith("fn="):
 538                 ofn = line[3:-1]
 539                 mfn = args.mod_funcname(ofn)
 540                 # `mfl_dcc` is unchanged.
 541                 mfn_dcc = dict_mfn_dcc[mfn]
 542                 mfl_dcc_inner_mfn_cc = mfl_dcc.inner_dict_mname_cc[mfn]
 543                 mfn_dcc_inner_mfl_cc = mfn_dcc.inner_dict_mname_cc[mfl]
 544
 545             elif line.startswith("fl="):
 546                 ofl = line[3:-1]
 547                 mfl = args.mod_filename(ofl)
 548                 dict_mfl_ofls[mfl].add(ofl)
 549                 # A `fn=` line should follow, overwriting the function name.
 550                 mfn = "<unspecified>"
 551                 mfl_dcc = dict_mfl_dcc[mfl]
 552                 mfn_dcc = dict_mfn_dcc[mfn]
 553                 mfl_dcc_inner_mfn_cc = mfl_dcc.inner_dict_mname_cc[mfn]
 554                 mfn_dcc_inner_mfl_cc = mfn_dcc.inner_dict_mname_cc[mfl]
 555                 dict_line_cc = dict_mfl_dict_line_cc[mfl]
 556
 557             elif m := re.match(r"summary:\s+(.*)", line):
 558                 summary_cc_present = True
 559                 try:
 560                     this_summary_cc = events.mk_cc(m.group(1).split())
 561                     combine_cc_with_cc(this_summary_cc, summary_cc)
 562
 563                     # Check summary is correct. Note that `total_cc` doesn't
 564                     # get negated for the first file in a diff, unlike the
 565                     # other CCs, because it's only used here as a sanity check.
 566                     if this_summary_cc != total_cc:
 567                         msg = (
 568                             "`summary:` line doesn't match computed total\n"
 569                             f"- summary:  {this_summary_cc}\n"
 570                             f"- computed: {total_cc}"
 571                         )
 572                         parse_die(msg)
 573
 574                 except ValueError:
 575                     parse_die("malformed or too many event counts")
 576
 577             elif line == "\n" or line.startswith("#"):
 578                 # Skip empty lines and comment lines.
 579                 pass
 580
 581             else:
 582                 parse_die(f"malformed line: {line[:-1]}")
 583
 584     # Check if summary line was present.
 585     if not summary_cc_present:
 586         parse_die("missing `summary:` line, aborting")
 587
 588
 589 # The width of a column, in three parts.
 590 class Width:
 591     # Width of the widest commified event count.
 592     count: int
 593
 594     # Width of the widest first percentage, of the form ` (n.n%)` or ` (n.n%,`.
 595     perc1: int
 596
 597     # Width of the widest second percentage, of the form ` n.n%)`.
 598     perc2: int
 599
 600     def __init__(self, count: int, perc1: int, perc2: int) -> None:
 601         self.count = count
 602         self.perc1 = perc1
 603         self.perc2 = perc2
 604
 605
 606 class CcPrinter:
 607     # Note: every `CcPrinter` gets the same `Events` object.
 608     events: Events
 609
 610     # Note: every `CcPrinter` gets the same summary CC.
 611     summary_cc: Cc
 612
 613     # String to print before the event names.
 614     events_prefix: str
 615
 616     # The widths of each event column. For simplicity, its length matches
 617     # `events.events`, even though not all events are necessarily shown.
 618     widths: list[Width]
 619
 620     # Text of a missing CC, which can be computed in advance.
 621     missing_cc_str: str
 622
 623     # Must call `init_ccs` or `init_list_mname_lcc` after this.
 624     def __init__(self, events: Events, summary_cc: Cc) -> None:
 625         self.events = events
 626         self.summary_cc = summary_cc
 627         # Other fields initialized in `init_*`.
 628
 629     def init_ccs(self, ccs: list[Cc]) -> None:
 630         self.events_prefix = ""
 631
 632         # Find min and max count for each event. One of them will be the widest
 633         # value.
 634         min_cc = self.events.mk_empty_cc()
 635         max_cc = self.events.mk_empty_cc()
 636         for cc in ccs:
 637             update_cc_extremes(cc, min_cc, max_cc)
 638
 639         self.init_widths(min_cc, max_cc, None, None)
 640
 641     def init_list_mname_lcc(self, list_mname_lcc: ListMnameLcc) -> None:
 642         self.events_prefix = "  "
 643
 644         cumul_cc = self.events.mk_empty_cc()
 645
 646         # Find min and max value for each event. One of them will be the widest
 647         # value. Likewise for the cumulative counts.
 648         min_cc = self.events.mk_empty_cc()
 649         max_cc = self.events.mk_empty_cc()
 650         min_cumul_cc = self.events.mk_empty_cc()
 651         max_cumul_cc = self.events.mk_empty_cc()
 652         for _, lcc in list_mname_lcc:
 653             # Consider both outer and inner CCs for `count` and `perc1`.
 654             update_cc_extremes(lcc.outer_cc, min_cc, max_cc)
 655             for _, inner_cc in lcc.inner_list_mname_cc:
 656                 update_cc_extremes(inner_cc, min_cc, max_cc)
 657
 658             # Consider only outer CCs for `perc2`.
 659             add_cc_to_cc(lcc.outer_cc, cumul_cc)
 660             update_cc_extremes(cumul_cc, min_cumul_cc, max_cumul_cc)
 661
 662         self.init_widths(min_cc, max_cc, min_cumul_cc, max_cumul_cc)
 663
 664     def init_widths(
 665         self, min_cc1: Cc, max_cc1: Cc, min_cc2: Cc | None, max_cc2: Cc | None
 666     ) -> None:
 667         self.widths = [Width(0, 0, 0)] * self.events.num_events
 668         for i in range(len(self.events.events)):
 669             # Get count and percs widths of the min and max CCs.
 670             (min_count, min_perc1, min_perc2) = self.count_and_percs_strs(
 671                 min_cc1, min_cc2, i
 672             )
 673             (max_count, max_perc1, max_perc2) = self.count_and_percs_strs(
 674                 max_cc1, max_cc2, i
 675             )
 676             self.widths[i] = Width(
 677                 max(len(min_count), len(max_count)),
 678                 max(len(min_perc1), len(max_perc1)),
 679                 max(len(min_perc2), len(max_perc2)),
 680             )
 681
 682         self.missing_cc_str = ""
 683         for i in self.events.show_indices:
 684             self.missing_cc_str += self.count_and_percs_str(i, ".", "", "")
 685
 686     # Get the count and perc string for `cc1[i]` and the perc string for
 687     # `cc2[i]`. (Unless `cc2` is `None`, in which case `perc2` will be "".)
 688     def count_and_percs_strs(
 689         self, cc1: Cc, cc2: Cc | None, i: int
 690     ) -> tuple[str, str, str]:
 691         count = f"{cc1[i]:,d}"  # commify
 692         if args.show_percs:
 693             summary_count = self.summary_cc[i]
 694             if cc2 is None:
 695                 # A plain or inner CC, with a single percentage.
 696                 if cc1[i] == 0:
 697                     # Don't show percentages for "0" entries, it's just clutter.
 698                     perc1 = ""
 699                 elif summary_count == 0:
 700                     # Avoid dividing by zero.
 701                     perc1 = " (n/a)"
 702                 else:
 703                     perc1 = f" ({cc1[i] * 100 / summary_count:.1f}%)"
 704                 perc2 = ""
 705             else:
 706                 # An outer CC, with two percentages.
 707                 if summary_count == 0:
 708                     # Avoid dividing by zero.
 709                     perc1 = " (n/a,"
 710                     perc2 = " n/a)"
 711                 else:
 712                     perc1 = f" ({cc1[i] * 100 / summary_count:.1f}%,"
 713                     perc2 = f" {cc2[i] * 100 / summary_count:.1f}%)"
 714         else:
 715             perc1 = ""
 716             perc2 = ""
 717
 718         return (count, perc1, perc2)
 719
 720     def count_and_percs_str(self, i: int, count: str, perc1: str, perc2: str) -> str:
 721         event_w = len(self.events.events[i])
 722         count_w = self.widths[i].count
 723         perc1_w = self.widths[i].perc1
 724         perc2_w = self.widths[i].perc2
 725         pre_w = max(0, event_w - count_w - perc1_w - perc2_w)
 726         return f"{'':>{pre_w}}{count:>{count_w}}{perc1:>{perc1_w}}{perc2:>{perc2_w}} "
 727
 728     def print_events(self, suffix: str) -> None:
 729         print(self.events_prefix, end="")
 730         for i in self.events.show_indices:
 731             event = self.events.events[i]
 732             event_w = len(event)
 733             count_w = self.widths[i].count
 734             perc1_w = self.widths[i].perc1
 735             perc2_w = self.widths[i].perc2
 736             print(f"{event:_<{max(event_w, count_w + perc1_w + perc2_w)}} ", end="")
 737
 738         print(suffix)
 739
 740     def print_lcc(self, indent: str, lcc: Lcc, outer_mname: str, cumul_cc: Cc) -> None:
 741         print(indent, end="")
 742         if (
 743             len(lcc.inner_list_mname_cc) == 1
 744             and lcc.outer_cc == lcc.inner_list_mname_cc[0][1]
 745         ):
 746             # There is only one inner CC, it met the threshold, and it is equal
 747             # to the outer CC. Print the inner CC and outer CC in a single
 748             # line, because they are the same.
 749             inner_mname = lcc.inner_list_mname_cc[0][0]
 750             self.print_cc(lcc.outer_cc, cumul_cc, f"{outer_mname}:{inner_mname}")
 751         else:
 752             # There are multiple inner CCs, and at least one met the threshold.
 753             # Print the outer CC and then the inner CCs, indented.
 754             self.print_cc(lcc.outer_cc, cumul_cc, f"{outer_mname}:")
 755             for inner_mname, inner_cc in lcc.inner_list_mname_cc:
 756                 print("  ", end="")
 757                 self.print_cc(inner_cc, None, f"  {inner_mname}")
 758         print()
 759
 760     # If `cc2` is `None`, it's a vanilla CC or inner CC. Otherwise, it's an
 761     # outer CC.
 762     def print_cc(self, cc: Cc, cc2: Cc | None, suffix: str) -> None:
 763         for i in self.events.show_indices:
 764             (count, perc1, perc2) = self.count_and_percs_strs(cc, cc2, i)
 765             print(self.count_and_percs_str(i, count, perc1, perc2), end="")
 766
 767         print("", suffix)
 768
 769     def print_missing_cc(self, suffix: str) -> None:
 770         print(self.missing_cc_str, suffix)
 771
 772
 773 # Used in various places in the output.
 774 def print_fancy(text: str) -> None:
 775     fancy = "-" * 80
 776     print(fancy)
 777     print("--", text)
 778     print(fancy)
 779
 780
 781 def print_metadata(descs: list[str], cmds: list[str], events: Events) -> None:
 782     print_fancy("Metadata")
 783
 784     def all_the_same(strs: list[str]) -> bool:
 785         for i in range(len(strs) - 1):
 786             if strs[i] != strs[i + 1]:
 787                 return False
 788
 789         return True
 790
 791     print("Invocation:      ", *sys.argv)
 792
 793     # When there are multiple descriptions, they are usually all the same. Only
 794     # print the description once in that case.
 795     if all_the_same(descs):
 796         print(descs[0], end="")
 797     else:
 798         for i, desc in enumerate(descs):
 799             print(f"Description {i+1}:")
 800             print(desc, end="")
 801
 802     # Commands are sometimes the same, sometimes not. Always print them
 803     # individually, but refer to the previous one when appropriate.
 804     if len(cmds) == 1:
 805         print("Command:         ", cmds[0])
 806     else:
 807         for i, cmd in enumerate(cmds):
 808             if i > 0 and cmds[i - 1] == cmd:
 809                 print(f"Command {i+1}:        (same as Command {i})")
 810             else:
 811                 print(f"Command {i+1}:       ", cmd)
 812
 813     print("Events recorded: ", *events.events)
 814     print("Events shown:    ", *events.show_events)
 815     print("Event sort order:", *events.sort_events)
 816     print("Threshold:        ", args.threshold, "%", sep="")
 817     print("Annotation:      ", "on" if args.annotate else "off")
 818     print()
 819
 820
 821 def print_summary(events: Events, summary_cc: Cc) -> None:
 822     printer = CcPrinter(events, summary_cc)
 823     printer.init_ccs([summary_cc])
 824     print_fancy("Summary")
 825     printer.print_events("")
 826     print()
 827     printer.print_cc(summary_cc, None, "PROGRAM TOTALS")
 828     print()
 829
 830
 831 def print_mname_summary(
 832     kind: str, indent: str, events: Events, dict_mname_dcc: DictMnameDcc, summary_cc: Cc
 833 ) -> set[str]:
 834     # The primary sort event is used for the threshold.
 835     threshold_index = events.sort_indices[0]
 836
 837     # Convert the threshold from a percentage to an event count.
 838     threshold = args.threshold * abs(summary_cc[threshold_index]) / 100
 839
 840     def meets_threshold(mname_and_cc: tuple[str, Cc]) -> bool:
 841         cc = mname_and_cc[1]
 842         return abs(cc[threshold_index]) >= threshold
 843
 844     # Create a list with the outer CC counts in sort order, so that
 845     # left-to-right list comparison does the right thing. Plus the outer name
 846     # at the end for deterministic output when all the event counts are
 847     # identical in two CCs.
 848     def key_mname_and_lcc(mname_and_lcc: tuple[str, Lcc]) -> tuple[list[int], str]:
 849         (outer_mname, lcc) = mname_and_lcc
 850         return (
 851             [abs(lcc.outer_cc[i]) for i in events.sort_indices],
 852             outer_mname,
 853         )
 854
 855     # Similar to `key_mname_and_lcc`.
 856     def key_mname_and_cc(mname_and_cc: tuple[str, Cc]) -> tuple[list[int], str]:
 857         (mname, cc) = mname_and_cc
 858         return ([abs(cc[i]) for i in events.sort_indices], mname)
 859
 860     # This is a `filter_map` operation, which Python doesn't directly support.
 861     list_mname_lcc: ListMnameLcc = []
 862     for outer_mname, dcc in dict_mname_dcc.items():
 863         # Filter out inner CCs for which the primary sort event count is below the
 864         # threshold, and sort the remainder.
 865         inner_list_mname_cc = sorted(
 866             filter(meets_threshold, dcc.inner_dict_mname_cc.items()),
 867             key=key_mname_and_cc,
 868             reverse=True,
 869         )
 870
 871         # If no inner CCs meet the threshold, ignore the entire DCC, even if
 872         # the outer CC meets the threshold.
 873         if len(inner_list_mname_cc) == 0:
 874             continue
 875
 876         list_mname_lcc.append((outer_mname, Lcc(dcc.outer_cc, inner_list_mname_cc)))
 877
 878     list_mname_lcc = sorted(list_mname_lcc, key=key_mname_and_lcc, reverse=True)
 879
 880     printer = CcPrinter(events, summary_cc)
 881     printer.init_list_mname_lcc(list_mname_lcc)
 882     print_fancy(kind + " summary")
 883     printer.print_events(" " + kind.lower())
 884     print()
 885
 886     # Print LCCs.
 887     threshold_mnames: set[str] = set([])
 888     cumul_cc = events.mk_empty_cc()
 889     for mname, lcc in list_mname_lcc:
 890         add_cc_to_cc(lcc.outer_cc, cumul_cc)
 891         printer.print_lcc(indent, lcc, mname, cumul_cc)
 892         threshold_mnames.add(mname)
 893
 894     return threshold_mnames
 895
 896
 897 class AnnotatedCcs:
 898     line_nums_known_cc: Cc
 899     line_nums_unknown_cc: Cc
 900     non_identical_cc: Cc
 901     unreadable_cc: Cc
 902     below_threshold_cc: Cc
 903     files_unknown_cc: Cc
 904
 905     labels = [
 906         "  annotated: files known & above threshold & readable, line numbers known",
 907         "  annotated: files known & above threshold & readable, line numbers unknown",
 908         "unannotated: files known & above threshold & two or more non-identical",
 909         "unannotated: files known & above threshold & unreadable ",
 910         "unannotated: files known & below threshold",
 911         "unannotated: files unknown",
 912     ]
 913
 914     def __init__(self, events: Events) -> None:
 915         self.line_nums_known_cc = events.mk_empty_cc()
 916         self.line_nums_unknown_cc = events.mk_empty_cc()
 917         self.non_identical_cc = events.mk_empty_cc()
 918         self.unreadable_cc = events.mk_empty_cc()
 919         self.below_threshold_cc = events.mk_empty_cc()
 920         self.files_unknown_cc = events.mk_empty_cc()
 921
 922     def ccs(self) -> list[Cc]:
 923         return [
 924             self.line_nums_known_cc,
 925             self.line_nums_unknown_cc,
 926             self.non_identical_cc,
 927             self.unreadable_cc,
 928             self.below_threshold_cc,
 929             self.files_unknown_cc,
 930         ]
 931
 932
 933 def mk_warning(msg: str) -> str:
 934     return f"""\
 935 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 936 @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
 937 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 938 {msg}\
 939 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 940 """
 941
 942
 943 def warn_ofls_are_all_newer(ofls: list[str], cgout_filename: str) -> None:
 944     s = "".join([f"@ - {ofl}\n" for ofl in ofls])
 945     msg = f"""\
 946 @ Original source files are all newer than data file '{cgout_filename}':
 947 {s}@ Annotations may not be correct.
 948 """
 949     print(mk_warning(msg))
 950
 951
 952 def warn_bogus_lines(src_filename: str) -> None:
 953     msg = f"""\
 954 @@ Information recorded about lines past the end of '{src_filename}'.
 955 """
 956     print(mk_warning(msg), end="")
 957
 958
 959 def print_annotated_src_file(
 960     events: Events,
 961     dict_line_cc: DictLineCc,
 962     src_file: TextIO,
 963     annotated_ccs: AnnotatedCcs,
 964     summary_cc: Cc,
 965 ) -> None:
 966     printer = CcPrinter(events, summary_cc)
 967     printer.init_ccs(list(dict_line_cc.values()))
 968     # The starting fancy has already been printed by the caller.
 969     printer.print_events("")
 970     print()
 971
 972     # The CC for line 0 is special, holding counts attributed to the source
 973     # file but not to any particular line (due to incomplete debug info).
 974     # Annotate the start of the file with this info, if present.
 975     line0_cc = dict_line_cc.pop(0, None)
 976     if line0_cc:
 977         suffix = "<unknown (line 0)>"
 978         printer.print_cc(line0_cc, None, suffix)
 979         add_cc_to_cc(line0_cc, annotated_ccs.line_nums_unknown_cc)
 980         print()
 981
 982     # Find interesting line ranges: all lines with a CC, and all lines within
 983     # `args.context` lines of a line with a CC.
 984     line_nums = list(sorted(dict_line_cc.keys()))
 985     pairs: list[tuple[int, int]] = []
 986     n = len(line_nums)
 987     i = 0
 988     context = args.context
 989     while i < n:
 990         lo = max(line_nums[i] - context, 1)  # `max` to prevent negatives
 991         while i < n - 1 and line_nums[i] + 2 * context >= line_nums[i + 1]:
 992             i += 1
 993         hi = line_nums[i] + context
 994         pairs.append((lo, hi))
 995         i += 1
 996
 997     def print_lines(pairs: list[tuple[int, int]]) -> None:
 998         line_num = 0
 999         while pairs:
1000             src_line = ""
1001             (lo, hi) = pairs.pop(0)
1002             while line_num < lo - 1:
1003                 src_line = src_file.readline()
1004                 line_num += 1
1005                 if not src_line:
1006                     return  # EOF
1007
1008             # Print line number, unless start of file.
1009             if lo != 1:
1010                 print("-- line", lo, "-" * 40)
1011
1012             while line_num < hi:
1013                 src_line = src_file.readline()
1014                 line_num += 1
1015                 if not src_line:
1016                     return  # EOF
1017                 if line_nums and line_num == line_nums[0]:
1018                     printer.print_cc(dict_line_cc[line_num], None, src_line[:-1])
1019                     add_cc_to_cc(
1020                         dict_line_cc[line_num], annotated_ccs.line_nums_known_cc
1021                     )
1022                     del line_nums[0]
1023                 else:
1024                     printer.print_missing_cc(src_line[:-1])
1025
1026             # Print line number.
1027             print("-- line", hi, "-" * 40)
1028
1029     # Annotate chosen lines, tracking total annotated counts.
1030     if pairs:
1031         print_lines(pairs)
1032
1033         # If there was info on lines past the end of the file, warn.
1034         if line_nums:
1035             print()
1036             for line_num in line_nums:
1037                 printer.print_cc(
1038                     dict_line_cc[line_num], None, f"<bogus line {line_num}>"
1039                 )
1040                 add_cc_to_cc(dict_line_cc[line_num], annotated_ccs.line_nums_known_cc)
1041
1042             print()
1043             warn_bogus_lines(src_file.name)
1044
1045         print()
1046
1047
1048 # This partially consumes `dict_mfl_dict_line_cc`, and fully consumes
1049 # `dict_mfl_olfs`.
1050 def print_annotated_src_files(
1051     ann_mfls: set[str],
1052     events: Events,
1053     dict_mfl_ofls: DictMflOfls,
1054     dict_mfl_dict_line_cc: DictMflDictLineCc,
1055     summary_cc: Cc,
1056 ) -> AnnotatedCcs:
1057     annotated_ccs = AnnotatedCcs(events)
1058
1059     def add_dict_line_cc_to_cc(dict_line_cc: DictLineCc, accum_cc: Cc) -> None:
1060         for line_cc in dict_line_cc.values():
1061             add_cc_to_cc(line_cc, accum_cc)
1062
1063     # Exclude the unknown ("???") file, which is unannotatable.
1064     ann_mfls.discard("???")
1065     if "???" in dict_mfl_dict_line_cc:
1066         dict_line_cc = dict_mfl_dict_line_cc.pop("???")
1067         add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.files_unknown_cc)
1068
1069     def print_ann_fancy(mfl: str) -> None:
1070         print_fancy(f"Annotated source file: {mfl}")
1071
1072     # This can raise an `OSError`.
1073     def all_ofl_contents_identical(ofls: list[str]) -> bool:
1074         for i in range(len(ofls) - 1):
1075             if not filecmp.cmp(ofls[i], ofls[i + 1], shallow=False):
1076                 return False
1077
1078         return True
1079
1080     for mfl in sorted(ann_mfls):
1081         ofls = sorted(dict_mfl_ofls.pop(mfl))
1082         first_ofl = ofls[0]
1083
1084         try:
1085             if all_ofl_contents_identical(ofls):
1086                 # All the Ofls that map to this Mfl are identical, which means we
1087                 # can annotate, and it doesn't matter which Ofl we use.
1088                 with open(first_ofl, "r", encoding="utf-8") as src_file:
1089                     dict_line_cc = dict_mfl_dict_line_cc.pop(mfl)
1090                     print_ann_fancy(mfl)
1091
1092                     # Because all the Ofls are identical, we can treat their
1093                     # mtimes as if they are all as early as the earliest one.
1094                     # Therefore, we warn only if the earliest source file is
1095                     # more recent than the cgout file.
1096                     min_ofl_st_mtime_ns = min(os.stat(ofl).st_mtime_ns for ofl in ofls)
1097
1098                     for cgout_filename in args.cgout_filename:
1099                         if min_ofl_st_mtime_ns > os.stat(cgout_filename).st_mtime_ns:
1100                             warn_ofls_are_all_newer(ofls, cgout_filename)
1101
1102                     print_annotated_src_file(
1103                         events,
1104                         dict_line_cc,
1105                         src_file,
1106                         annotated_ccs,
1107                         summary_cc,
1108                     )
1109             else:
1110                 dict_line_cc = dict_mfl_dict_line_cc.pop(mfl)
1111                 add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.non_identical_cc)
1112
1113                 # We could potentially do better here.
1114                 # - Annotate until the first line where the src files diverge.
1115                 # - Also, heuristic resyncing, e.g. by looking for matching
1116                 #   lines (of sufficient complexity) after a divergence.
1117                 print_ann_fancy(mfl)
1118                 print(
1119                     "Unannotated because two or more of these original files are not "
1120                     "identical:",
1121                     *ofls,
1122                     sep="\n- ",
1123                 )
1124                 print()
1125         except OSError:
1126             dict_line_cc = dict_mfl_dict_line_cc.pop(mfl)
1127             add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.unreadable_cc)
1128
1129             print_ann_fancy(mfl)
1130             print(
1131                 "Unannotated because one or more of these original files are "
1132                 "unreadable:",
1133                 *ofls,
1134                 sep="\n- ",
1135             )
1136             print()
1137
1138     # Sum the CCs remaining in `dict_mfl_dict_line_cc`, which are all in files
1139     # below the threshold.
1140     for dict_line_cc in dict_mfl_dict_line_cc.values():
1141         add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.below_threshold_cc)
1142
1143     return annotated_ccs
1144
1145
1146 def print_annotation_summary(
1147     events: Events,
1148     annotated_ccs: AnnotatedCcs,
1149     summary_cc: Cc,
1150 ) -> None:
1151     # Show how many events were covered by annotated lines above.
1152     printer = CcPrinter(events, summary_cc)
1153     printer.init_ccs(annotated_ccs.ccs())
1154     print_fancy("Annotation summary")
1155     printer.print_events("")
1156     print()
1157
1158     total_cc = events.mk_empty_cc()
1159     for cc, label in zip(annotated_ccs.ccs(), AnnotatedCcs.labels):
1160         printer.print_cc(cc, None, label)
1161         add_cc_to_cc(cc, total_cc)
1162
1163     print()
1164
1165     # Internal sanity check.
1166     if summary_cc != total_cc:
1167         msg = (
1168             "`summary:` line doesn't match computed annotated counts\n"
1169             f"- summary:   {summary_cc}\n"
1170             f"- annotated: {total_cc}"
1171         )
1172         die(msg)
1173
1174
1175 def main() -> None:
1176     # Metadata, initialized to empty states.
1177     descs: list[str] = []
1178     cmds: list[str] = []
1179     events = Events()
1180
1181     # For tracking original filenames to modified filenames.
1182     dict_mfl_ofls: DictMflOfls = defaultdict(set)
1183
1184     # Different places where we accumulate CC data. Initialized to invalid
1185     # states prior to the number of events being known.
1186     dict_mfl_dcc: DictMnameDcc = defaultdict(None)
1187     dict_mfn_dcc: DictMnameDcc = defaultdict(None)
1188     dict_mfl_dict_line_cc: DictMflDictLineCc = defaultdict(None)
1189     summary_cc: Cc = []
1190
1191     for n, filename in enumerate(args.cgout_filename):
1192         is_first_file = n == 0
1193         read_cgout_file(
1194             filename,
1195             is_first_file,
1196             descs,
1197             cmds,
1198             events,
1199             dict_mfl_ofls,
1200             dict_mfl_dcc,
1201             dict_mfn_dcc,
1202             dict_mfl_dict_line_cc,
1203             summary_cc,
1204         )
1205
1206     # Each of the following calls prints a section of the output.
1207     print_metadata(descs, cmds, events)
1208     print_summary(events, summary_cc)
1209     ann_mfls = print_mname_summary(
1210         "File:function", "< ", events, dict_mfl_dcc, summary_cc
1211     )
1212     print_mname_summary("Function:file", "> ", events, dict_mfn_dcc, summary_cc)
1213     if args.annotate:
1214         annotated_ccs = print_annotated_src_files(
1215             ann_mfls, events, dict_mfl_ofls, dict_mfl_dict_line_cc, summary_cc
1216         )
1217
1218         print_annotation_summary(events, annotated_ccs, summary_cc)
1219
1220
1221 if __name__ == "__main__":
1222     main()