cachegrind/cg_annotate.in

   1 #! /usr/bin/env python3
   2 # pyright: strict
   3
   4 # --------------------------------------------------------------------
   5 # --- Cachegrind's annotator.                       cg_annotate.in ---
   6 # --------------------------------------------------------------------
   7
   8 # This file is part of Cachegrind, a Valgrind tool for cache
   9 # profiling programs.
  10 #
  11 # Copyright (C) 2002-2023 Nicholas Nethercote
  12 #    njn@valgrind.org
  13 #
  14 # This program is free software; you can redistribute it and/or
  15 # modify it under the terms of the GNU General Public License as
  16 # published by the Free Software Foundation; either version 2 of the
  17 # License, or (at your option) any later version.
  18 #
  19 # This program is distributed in the hope that it will be useful, but
  20 # WITHOUT ANY WARRANTY; without even the implied warranty of
  21 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22 # General Public License for more details.
  23 #
  24 # You should have received a copy of the GNU General Public License
  25 # along with this program; if not, see <http://www.gnu.org/licenses/>.
  26 #
  27 # The GNU General Public License is contained in the file COPYING.
  28
  29 # This script reads Cachegrind output files and produces human-readable output.
  30 #
  31 # Use `make pyann` to "build" this script with `auxprogs/pybuild.rs` every time
  32 # it is changed. This runs the formatters, type-checkers, and linters on
  33 # `cg_annotate.in` and then generates `cg_annotate`.
  34
  35 from __future__ import annotations
  36
  37 import filecmp
  38 import os
  39 import re
  40 import sys
  41 from argparse import ArgumentParser, BooleanOptionalAction, Namespace
  42 from collections import defaultdict
  43 from typing import Callable, DefaultDict, NoReturn, TextIO
  44
  45
  46 def die(msg: str) -> NoReturn:
  47     print("cg_annotate: error:", msg, file=sys.stderr)
  48     sys.exit(1)
  49
  50
  51 SearchAndReplace = Callable[[str], str]
  52
  53 # A typed wrapper for parsed args.
  54 class Args(Namespace):
  55     # None of these fields are modified after arg parsing finishes.
  56     diff: bool
  57     mod_filename: SearchAndReplace
  58     mod_funcname: SearchAndReplace
  59     show: list[str]
  60     sort: list[str]
  61     threshold: float  # a percentage
  62     show_percs: bool
  63     annotate: bool
  64     context: int
  65     cgout_filename: list[str]
  66
  67     @staticmethod
  68     def parse() -> Args:
  69         # We support Perl-style `s/old/new/flags` search-and-replace
  70         # expressions, because that's how this option was implemented in the
  71         # old Perl version of `cg_diff`. This requires conversion from
  72         # `s/old/new/` style to `re.sub`. The conversion isn't a perfect
  73         # emulation of Perl regexps (e.g. Python uses `\1` rather than `$1` for
  74         # using captures in the `new` part), but it should be close enough. The
  75         # only supported flags are `g` (global) and `i` (ignore case).
  76         def search_and_replace(regex: str | None) -> SearchAndReplace:
  77             if regex is None:
  78                 return lambda s: s
  79
  80             # Extract the parts of an `s/old/new/tail` regex. `(?<!\\)/` is an
  81             # example of negative lookbehind. It means "match a forward slash
  82             # unless preceded by a backslash".
  83             m = re.match(r"s/(.*)(?<!\\)/(.*)(?<!\\)/(g|i|gi|ig|)$", regex)
  84             if m is None:
  85                 raise ValueError
  86
  87             # Forward slashes must be escaped in an `s/old/new/` expression,
  88             # but we then must unescape them before using them with `re.sub`.
  89             pat = m.group(1).replace(r"\/", r"/")
  90             repl = m.group(2).replace(r"\/", r"/")
  91             tail = m.group(3)
  92
  93             if "g" in tail:
  94                 count = 0  # unlimited
  95             else:
  96                 count = 1
  97
  98             if "i" in tail:
  99                 flags = re.IGNORECASE
 100             else:
 101                 flags = re.RegexFlag(0)
 102
 103             return lambda s: re.sub(re.compile(pat, flags=flags), repl, s, count=count)
 104
 105         def comma_separated_list(values: str) -> list[str]:
 106             return values.split(",")
 107
 108         def threshold(n: str) -> float:
 109             f = float(n)
 110             if 0 <= f <= 20:
 111                 return f
 112             raise ValueError
 113
 114         # Add a bool argument that defaults to true.
 115         #
 116         # Supports these forms: `--foo`, `--no-foo`, `--foo=yes`, `--foo=no`.
 117         # The latter two were the forms supported by the old Perl version of
 118         # `cg_annotate`, and are now deprecated.
 119         def add_bool_argument(
 120             p: ArgumentParser, new_name: str, old_name: str, help_: str
 121         ) -> None:
 122             new_flag = "--" + new_name
 123             old_flag = "--" + old_name
 124             dest = new_name.replace("-", "_")
 125
 126             # Note: the default value is always printed with `BooleanOptionalAction`,
 127             # due to an argparse bug: https://github.com/python/cpython/issues/83137.
 128             p.add_argument(
 129                 new_flag,
 130                 default=True,
 131                 action=BooleanOptionalAction,
 132                 help=help_,
 133             )
 134             p.add_argument(
 135                 f"{old_flag}=yes",
 136                 dest=dest,
 137                 action="store_true",
 138                 help=f"(deprecated) same as --{new_name}",
 139             )
 140             p.add_argument(
 141                 f"{old_flag}=no",
 142                 dest=dest,
 143                 action="store_false",
 144                 help=f"(deprecated) same as --no-{new_name}",
 145             )
 146
 147         p = ArgumentParser(description="Process one or more Cachegrind output files.")
 148
 149         p.add_argument("--version", action="version", version="%(prog)s-@VERSION@")
 150         p.add_argument(
 151             "--diff",
 152             default=False,
 153             action="store_true",
 154             help="perform a diff between two Cachegrind output files",
 155         )
 156         p.add_argument(
 157             "--mod-filename",
 158             type=search_and_replace,
 159             metavar="REGEX",
 160             default=search_and_replace(None),
 161             help="a search-and-replace regex applied to filenames, e.g. "
 162             "`s/prog[0-9]/progN/`",
 163         )
 164         p.add_argument(
 165             "--mod-funcname",
 166             type=search_and_replace,
 167             metavar="REGEX",
 168             default=search_and_replace(None),
 169             help="like --mod-filename, but for function names",
 170         )
 171         p.add_argument(
 172             "--show",
 173             type=comma_separated_list,
 174             metavar="A,B,C",
 175             help="only show figures for events A,B,C (default: all events)",
 176         )
 177         p.add_argument(
 178             "--sort",
 179             type=comma_separated_list,
 180             metavar="A,B,C",
 181             help="sort functions by events A,B,C (default: event column order)",
 182         )
 183         p.add_argument(
 184             "--threshold",
 185             type=threshold,
 186             default=0.1,
 187             metavar="N:[0,20]",
 188             help="only show file:function/function:file pairs with more than "
 189             "N%% of primary sort event counts (default: %(default)s)",
 190         )
 191         add_bool_argument(
 192             p,
 193             "show-percs",
 194             "show-percs",
 195             "show a percentage for each non-zero count",
 196         )
 197         add_bool_argument(
 198             p,
 199             "annotate",
 200             "auto",
 201             "annotate all source files containing functions that reached the "
 202             "event count threshold",
 203         )
 204         p.add_argument(
 205             "--context",
 206             type=int,
 207             default=8,
 208             metavar="N",
 209             help="print N lines of context before and after annotated lines "
 210             "(default: %(default)s)",
 211         )
 212         p.add_argument(
 213             "cgout_filename",
 214             nargs="+",
 215             metavar="cachegrind-out-file",
 216             help="file produced by Cachegrind",
 217         )
 218
 219         # `args0` name used to avoid shadowing the global `args`, which pylint
 220         # doesn't like.
 221         args0 = p.parse_args(namespace=Args())
 222         if args0.diff and len(args0.cgout_filename) != 2:
 223             p.print_usage(file=sys.stderr)
 224             die("argument --diff: requires exactly two Cachegrind output files")
 225
 226         return args0
 227
 228
 229 # Args are stored in a global for easy access.
 230 args = Args.parse()
 231
 232
 233 # A single instance of this class is constructed, from `args` and the `events:`
 234 # line in the cgout file.
 235 class Events:
 236     # The event names.
 237     events: list[str]
 238
 239     # Equal to `len(self.events)`.
 240     num_events: int
 241
 242     # The order in which we must traverse events for --show. Can be shorter
 243     # than `events`.
 244     show_events: list[str]
 245
 246     # Like `show_events`, but indices into `events`, rather than names.
 247     show_indices: list[int]
 248
 249     # The order in which we must traverse events for --sort. Can be shorter
 250     # than `events`.
 251     sort_events: list[str]
 252
 253     # Like `sort_events`, but indices into `events`, rather than names.
 254     sort_indices: list[int]
 255
 256     def __init__(self) -> None:
 257         # All fields are left uninitialized here, and set instead in `init`.
 258         pass
 259
 260     def init(self, text: str) -> None:
 261         self.events = text.split()
 262         self.num_events = len(self.events)
 263
 264         # A temporary dict mapping events to indices, [0, n-1].
 265         event_indices = {event: n for n, event in enumerate(self.events)}
 266
 267         # If --show is given, check it is valid. If --show is not given,
 268         # default to all events in the standard order.
 269         if args.show:
 270             for event in args.show:
 271                 if event not in event_indices:
 272                     die(f"--show event `{event}` did not appear in `events:` line")
 273             self.show_events = args.show
 274         else:
 275             self.show_events = self.events
 276
 277         self.show_indices = [event_indices[event] for event in self.show_events]
 278
 279         # Likewise for --sort.
 280         if args.sort:
 281             for event in args.sort:
 282                 if event not in event_indices:
 283                     die(f"--sort event `{event}` did not appear in `events:` line")
 284             self.sort_events = args.sort
 285         else:
 286             self.sort_events = self.events
 287
 288         self.sort_indices = [event_indices[event] for event in self.sort_events]
 289
 290     # Raises a `ValueError` exception on syntax error.
 291     def mk_cc(self, str_counts: list[str]) -> Cc:
 292         # This is slightly faster than a list comprehension.
 293         counts = list(map(int, str_counts))
 294
 295         if len(counts) == self.num_events:
 296             pass
 297         elif len(counts) < self.num_events:
 298             # Add zeroes at the end for any missing numbers.
 299             counts.extend([0] * (self.num_events - len(counts)))
 300         else:
 301             raise ValueError
 302
 303         return counts
 304
 305     def mk_empty_cc(self) -> Cc:
 306         # This is much faster than a list comprehension.
 307         return [0] * self.num_events
 308
 309     def mk_empty_dcc(self) -> Dcc:
 310         return Dcc(self.mk_empty_cc(), defaultdict(self.mk_empty_cc))
 311
 312
 313 # A "cost centre", which is a dumb container for counts. Always the same length
 314 # as `Events.events`, but it doesn't even know event names. `Events.mk_cc` and
 315 # `Events.mk_empty_cc` are used for construction.
 316 #
 317 # This used to be a class with a single field `counts: list[int]`, but this
 318 # type is very hot and just using a type alias is much faster.
 319 Cc = list[int]
 320
 321 # Add the counts in `a_cc` to `b_cc`.
 322 def add_cc_to_cc(a_cc: Cc, b_cc: Cc) -> None:
 323     for i, a_count in enumerate(a_cc):
 324         b_cc[i] += a_count
 325
 326
 327 # Subtract the counts in `a_cc` from `b_cc`.
 328 def sub_cc_from_cc(a_cc: Cc, b_cc: Cc) -> None:
 329     for i, a_count in enumerate(a_cc):
 330         b_cc[i] -= a_count
 331
 332
 333 # Unrolled version of `add_cc_to_cc`, for speed.
 334 def add_cc_to_ccs(
 335     a_cc: Cc, b_cc1: Cc, b_cc2: Cc, b_cc3: Cc, b_cc4: Cc, b_cc5: Cc, total_cc: Cc
 336 ) -> None:
 337     for i, a_count in enumerate(a_cc):
 338         b_cc1[i] += a_count
 339         b_cc2[i] += a_count
 340         b_cc3[i] += a_count
 341         b_cc4[i] += a_count
 342         b_cc5[i] += a_count
 343         total_cc[i] += a_count
 344
 345
 346 # Unrolled version of `sub_cc_from_cc`, for speed. Note that the last one,
 347 # `total_cc`, is added.
 348 def sub_cc_from_ccs(
 349     a_cc: Cc, b_cc1: Cc, b_cc2: Cc, b_cc3: Cc, b_cc4: Cc, b_cc5: Cc, total_cc: Cc
 350 ) -> None:
 351     for i, a_count in enumerate(a_cc):
 352         b_cc1[i] -= a_count
 353         b_cc2[i] -= a_count
 354         b_cc3[i] -= a_count
 355         b_cc4[i] -= a_count
 356         b_cc5[i] -= a_count
 357         total_cc[i] += a_count
 358
 359
 360 # Update `min_cc` and `max_cc` with `self`.
 361 def update_cc_extremes(self: Cc, min_cc: Cc, max_cc: Cc) -> None:
 362     for i, count in enumerate(self):
 363         if count > max_cc[i]:
 364             max_cc[i] = count
 365         elif count < min_cc[i]:
 366             min_cc[i] = count
 367
 368
 369 # Note: some abbrevations used below:
 370 # - Ofl/ofl: original filename, as mentioned in a cgout file.
 371 # - Ofn/ofn: original function name, as mentioned in a cgout file.
 372 # - Mfl/mfl: modified filename, the result of passing an Ofl through
 373 #   `--mod-filename`.
 374 # - Mfn/mfn: modified function name, the result of passing an Ofn through
 375 #   `--mod-funcname`.
 376 # - Mname/mname: modified name, used for what could be an Mfl or an Mfn.
 377
 378 # A deep cost centre with a dict for the inner mnames and CCs.
 379 class Dcc:
 380     outer_cc: Cc
 381     inner_dict_mname_cc: DictMnameCc
 382
 383     def __init__(self, outer_cc: Cc, inner_dict_mname_cc: DictMnameCc) -> None:
 384         self.outer_cc = outer_cc
 385         self.inner_dict_mname_cc = inner_dict_mname_cc
 386
 387
 388 # A deep cost centre with a list for the inner mnames and CCs. Used during
 389 # filtering and sorting.
 390 class Lcc:
 391     outer_cc: Cc
 392     inner_list_mname_cc: ListMnameCc
 393
 394     def __init__(self, outer_cc: Cc, inner_list_mname_cc: ListMnameCc) -> None:
 395         self.outer_cc = outer_cc
 396         self.inner_list_mname_cc = inner_list_mname_cc
 397
 398
 399 # Per-Mfl/Mfn CCs. The list version is used during filtering and sorting.
 400 DictMnameCc = DefaultDict[str, Cc]
 401 ListMnameCc = list[tuple[str, Cc]]
 402
 403 # Per-Mfl/Mfn DCCs. The outer Mnames are Mfls and the inner Mnames are Mfns, or
 404 # vice versa. The list version is used during filtering and sorting.
 405 DictMnameDcc = DefaultDict[str, Dcc]
 406 ListMnameLcc = list[tuple[str, Lcc]]
 407
 408 # Per-line CCs, organised by Mfl and line number.
 409 DictLineCc = DefaultDict[int, Cc]
 410 DictMflDictLineCc = DefaultDict[str, DictLineCc]
 411
 412 # A dictionary tracking how Ofls get mapped to Mfls by `--mod-filename`. If
 413 # `--mod-filename` isn't used, each entry will be the identity mapping: ("foo"
 414 # -> set(["foo"])).
 415 DictMflOfls = DefaultDict[str, set[str]]
 416
 417
 418 def read_cgout_file(
 419     cgout_filename: str,
 420     is_first_file: bool,
 421     descs: list[str],
 422     cmds: list[str],
 423     events: Events,
 424     dict_mfl_ofls: DictMflOfls,
 425     dict_mfl_dcc: DictMnameDcc,
 426     dict_mfn_dcc: DictMnameDcc,
 427     dict_mfl_dict_line_cc: DictMflDictLineCc,
 428     summary_cc: Cc,
 429 ) -> None:
 430     # The file format is described in Cachegrind's manual.
 431     try:
 432         cgout_file = open(cgout_filename, "r", encoding="utf-8")
 433     except OSError as err:
 434         die(f"{err}")
 435
 436     with cgout_file:
 437         cgout_line_num = 0
 438
 439         def parse_die(msg: str) -> NoReturn:
 440             die(f"{cgout_file.name}:{cgout_line_num}: {msg}")
 441
 442         def readline() -> str:
 443             nonlocal cgout_line_num
 444             cgout_line_num += 1
 445             return cgout_file.readline()
 446
 447         # Read "desc:" lines.
 448         desc = ""
 449         while line := readline():
 450             if m := re.match(r"desc:\s+(.*)", line):
 451                 desc += m.group(1) + "\n"
 452             else:
 453                 break
 454         descs.append(desc)
 455
 456         # Read "cmd:" line. (`line` is already set from the "desc:" loop.)
 457         if m := re.match(r"cmd:\s+(.*)", line):
 458             cmds.append(m.group(1))
 459         else:
 460             parse_die("missing a `command:` line")
 461
 462         # Read "events:" line.
 463         line = readline()
 464         if m := re.match(r"events:\s+(.*)", line):
 465             if is_first_file:
 466                 events.init(m.group(1))
 467             else:
 468                 events2 = Events()
 469                 events2.init(m.group(1))
 470                 if events.events != events2.events:
 471                     die("events in data files don't match")
 472         else:
 473             parse_die("missing an `events:` line")
 474
 475         def mk_empty_dict_line_cc() -> DictLineCc:
 476             return defaultdict(events.mk_empty_cc)
 477
 478         # The current Mfl and Mfn.
 479         mfl = ""
 480         mfn = ""
 481
 482         # These values are passed in by reference and are modified by this
 483         # function. But they can't be properly initialized until the `events:`
 484         # line of the first file is read and the number of events is known. So
 485         # we initialize them in an invalid state in `main`, and then
 486         # reinitialize them properly here, before their first use.
 487         if is_first_file:
 488             dict_mfl_dcc.default_factory = events.mk_empty_dcc
 489             dict_mfn_dcc.default_factory = events.mk_empty_dcc
 490             dict_mfl_dict_line_cc.default_factory = mk_empty_dict_line_cc
 491             summary_cc.extend(events.mk_empty_cc())
 492
 493         # These are refs into the dicts above, used to avoid repeated lookups.
 494         # They are all overwritten before first use.
 495         mfl_dcc = events.mk_empty_dcc()
 496         mfn_dcc = events.mk_empty_dcc()
 497         mfl_dcc_inner_mfn_cc = events.mk_empty_cc()
 498         mfn_dcc_inner_mfl_cc = events.mk_empty_cc()
 499         dict_line_cc = mk_empty_dict_line_cc()
 500         total_cc = events.mk_empty_cc()
 501
 502         # When diffing, we negate the first cgout file's counts to effectively
 503         # achieve `cgout2 - cgout1`.
 504         if args.diff and is_first_file:
 505             combine_cc_with_cc = sub_cc_from_cc
 506             combine_cc_with_ccs = sub_cc_from_ccs
 507         else:
 508             combine_cc_with_cc = add_cc_to_cc
 509             combine_cc_with_ccs = add_cc_to_ccs
 510
 511         summary_cc_present = False
 512
 513         # Line matching is done in order of pattern frequency, for speed.
 514         while line := readline():
 515             if line[0].isdigit():
 516                 split_line = line.split()
 517                 try:
 518                     line_num = int(split_line[0])
 519                     cc = events.mk_cc(split_line[1:])
 520                 except ValueError:
 521                     parse_die("malformed or too many event counts")
 522
 523                 # Record this CC at various levels.
 524                 combine_cc_with_ccs(
 525                     cc,
 526                     mfl_dcc.outer_cc,
 527                     mfn_dcc.outer_cc,
 528                     mfl_dcc_inner_mfn_cc,
 529                     mfn_dcc_inner_mfl_cc,
 530                     dict_line_cc[line_num],
 531                     total_cc,
 532                 )
 533
 534             elif line.startswith("fn="):
 535                 ofn = line[3:-1]
 536                 mfn = args.mod_funcname(ofn)
 537                 # `mfl_dcc` is unchanged.
 538                 mfn_dcc = dict_mfn_dcc[mfn]
 539                 mfl_dcc_inner_mfn_cc = mfl_dcc.inner_dict_mname_cc[mfn]
 540                 mfn_dcc_inner_mfl_cc = mfn_dcc.inner_dict_mname_cc[mfl]
 541
 542             elif line.startswith("fl="):
 543                 ofl = line[3:-1]
 544                 mfl = args.mod_filename(ofl)
 545                 dict_mfl_ofls[mfl].add(ofl)
 546                 # A `fn=` line should follow, overwriting the function name.
 547                 mfn = "<unspecified>"
 548                 mfl_dcc = dict_mfl_dcc[mfl]
 549                 mfn_dcc = dict_mfn_dcc[mfn]
 550                 mfl_dcc_inner_mfn_cc = mfl_dcc.inner_dict_mname_cc[mfn]
 551                 mfn_dcc_inner_mfl_cc = mfn_dcc.inner_dict_mname_cc[mfl]
 552                 dict_line_cc = dict_mfl_dict_line_cc[mfl]
 553
 554             elif m := re.match(r"summary:\s+(.*)", line):
 555                 summary_cc_present = True
 556                 try:
 557                     this_summary_cc = events.mk_cc(m.group(1).split())
 558                     combine_cc_with_cc(this_summary_cc, summary_cc)
 559
 560                     # Check summary is correct. Note that `total_cc` doesn't
 561                     # get negated for the first file in a diff, unlike the
 562                     # other CCs, because it's only used here as a sanity check.
 563                     if this_summary_cc != total_cc:
 564                         msg = (
 565                             "`summary:` line doesn't match computed total\n"
 566                             f"- summary:  {this_summary_cc}\n"
 567                             f"- computed: {total_cc}"
 568                         )
 569                         parse_die(msg)
 570
 571                 except ValueError:
 572                     parse_die("malformed or too many event counts")
 573
 574             elif line == "\n" or line.startswith("#"):
 575                 # Skip empty lines and comment lines.
 576                 pass
 577
 578             else:
 579                 parse_die(f"malformed line: {line[:-1]}")
 580
 581     # Check if summary line was present.
 582     if not summary_cc_present:
 583         parse_die("missing `summary:` line, aborting")
 584
 585
 586 # The width of a column, in three parts.
 587 class Width:
 588     # Width of the widest commified event count.
 589     count: int
 590
 591     # Width of the widest first percentage, of the form ` (n.n%)` or ` (n.n%,`.
 592     perc1: int
 593
 594     # Width of the widest second percentage, of the form ` n.n%)`.
 595     perc2: int
 596
 597     def __init__(self, count: int, perc1: int, perc2: int) -> None:
 598         self.count = count
 599         self.perc1 = perc1
 600         self.perc2 = perc2
 601
 602
 603 class CcPrinter:
 604     # Note: every `CcPrinter` gets the same `Events` object.
 605     events: Events
 606
 607     # Note: every `CcPrinter` gets the same summary CC.
 608     summary_cc: Cc
 609
 610     # String to print before the event names.
 611     events_prefix: str
 612
 613     # The widths of each event column. For simplicity, its length matches
 614     # `events.events`, even though not all events are necessarily shown.
 615     widths: list[Width]
 616
 617     # Text of a missing CC, which can be computed in advance.
 618     missing_cc_str: str
 619
 620     # Must call `init_ccs` or `init_list_mname_lcc` after this.
 621     def __init__(self, events: Events, summary_cc: Cc) -> None:
 622         self.events = events
 623         self.summary_cc = summary_cc
 624         # Other fields initialized in `init_*`.
 625
 626     def init_ccs(self, ccs: list[Cc]) -> None:
 627         self.events_prefix = ""
 628
 629         # Find min and max count for each event. One of them will be the widest
 630         # value.
 631         min_cc = self.events.mk_empty_cc()
 632         max_cc = self.events.mk_empty_cc()
 633         for cc in ccs:
 634             update_cc_extremes(cc, min_cc, max_cc)
 635
 636         self.init_widths(min_cc, max_cc, None, None)
 637
 638     def init_list_mname_lcc(self, list_mname_lcc: ListMnameLcc) -> None:
 639         self.events_prefix = "  "
 640
 641         cumul_cc = self.events.mk_empty_cc()
 642
 643         # Find min and max value for each event. One of them will be the widest
 644         # value. Likewise for the cumulative counts.
 645         min_cc = self.events.mk_empty_cc()
 646         max_cc = self.events.mk_empty_cc()
 647         min_cumul_cc = self.events.mk_empty_cc()
 648         max_cumul_cc = self.events.mk_empty_cc()
 649         for _, lcc in list_mname_lcc:
 650             # Consider both outer and inner CCs for `count` and `perc1`.
 651             update_cc_extremes(lcc.outer_cc, min_cc, max_cc)
 652             for _, inner_cc in lcc.inner_list_mname_cc:
 653                 update_cc_extremes(inner_cc, min_cc, max_cc)
 654
 655             # Consider only outer CCs for `perc2`.
 656             add_cc_to_cc(lcc.outer_cc, cumul_cc)
 657             update_cc_extremes(cumul_cc, min_cumul_cc, max_cumul_cc)
 658
 659         self.init_widths(min_cc, max_cc, min_cumul_cc, max_cumul_cc)
 660
 661     def init_widths(
 662         self, min_cc1: Cc, max_cc1: Cc, min_cc2: Cc | None, max_cc2: Cc | None
 663     ) -> None:
 664         self.widths = [Width(0, 0, 0)] * self.events.num_events
 665         for i in range(len(self.events.events)):
 666             # Get count and percs widths of the min and max CCs.
 667             (min_count, min_perc1, min_perc2) = self.count_and_percs_strs(
 668                 min_cc1, min_cc2, i
 669             )
 670             (max_count, max_perc1, max_perc2) = self.count_and_percs_strs(
 671                 max_cc1, max_cc2, i
 672             )
 673             self.widths[i] = Width(
 674                 max(len(min_count), len(max_count)),
 675                 max(len(min_perc1), len(max_perc1)),
 676                 max(len(min_perc2), len(max_perc2)),
 677             )
 678
 679         self.missing_cc_str = ""
 680         for i in self.events.show_indices:
 681             self.missing_cc_str += self.count_and_percs_str(i, ".", "", "")
 682
 683     # Get the count and perc string for `cc1[i]` and the perc string for
 684     # `cc2[i]`. (Unless `cc2` is `None`, in which case `perc2` will be "".)
 685     def count_and_percs_strs(
 686         self, cc1: Cc, cc2: Cc | None, i: int
 687     ) -> tuple[str, str, str]:
 688         count = f"{cc1[i]:,d}"  # commify
 689         if args.show_percs:
 690             summary_count = self.summary_cc[i]
 691             if cc2 is None:
 692                 # A plain or inner CC, with a single percentage.
 693                 if cc1[i] == 0:
 694                     # Don't show percentages for "0" entries, it's just clutter.
 695                     perc1 = ""
 696                 elif summary_count == 0:
 697                     # Avoid dividing by zero.
 698                     perc1 = " (n/a)"
 699                 else:
 700                     perc1 = f" ({cc1[i] * 100 / summary_count:.1f}%)"
 701                 perc2 = ""
 702             else:
 703                 # An outer CC, with two percentages.
 704                 if summary_count == 0:
 705                     # Avoid dividing by zero.
 706                     perc1 = " (n/a,"
 707                     perc2 = " n/a)"
 708                 else:
 709                     perc1 = f" ({cc1[i] * 100 / summary_count:.1f}%,"
 710                     perc2 = f" {cc2[i] * 100 / summary_count:.1f}%)"
 711         else:
 712             perc1 = ""
 713             perc2 = ""
 714
 715         return (count, perc1, perc2)
 716
 717     def count_and_percs_str(self, i: int, count: str, perc1: str, perc2: str) -> str:
 718         event_w = len(self.events.events[i])
 719         count_w = self.widths[i].count
 720         perc1_w = self.widths[i].perc1
 721         perc2_w = self.widths[i].perc2
 722         pre_w = max(0, event_w - count_w - perc1_w - perc2_w)
 723         return f"{'':>{pre_w}}{count:>{count_w}}{perc1:>{perc1_w}}{perc2:>{perc2_w}} "
 724
 725     def print_events(self, suffix: str) -> None:
 726         print(self.events_prefix, end="")
 727         for i in self.events.show_indices:
 728             event = self.events.events[i]
 729             event_w = len(event)
 730             count_w = self.widths[i].count
 731             perc1_w = self.widths[i].perc1
 732             perc2_w = self.widths[i].perc2
 733             print(f"{event:_<{max(event_w, count_w + perc1_w + perc2_w)}} ", end="")
 734
 735         print(suffix)
 736
 737     def print_lcc(self, indent: str, lcc: Lcc, outer_mname: str, cumul_cc: Cc) -> None:
 738         print(indent, end="")
 739         if (
 740             len(lcc.inner_list_mname_cc) == 1
 741             and lcc.outer_cc == lcc.inner_list_mname_cc[0][1]
 742         ):
 743             # There is only one inner CC, it met the threshold, and it is equal
 744             # to the outer CC. Print the inner CC and outer CC in a single
 745             # line, because they are the same.
 746             inner_mname = lcc.inner_list_mname_cc[0][0]
 747             self.print_cc(lcc.outer_cc, cumul_cc, f"{outer_mname}:{inner_mname}")
 748         else:
 749             # There are multiple inner CCs, and at least one met the threshold.
 750             # Print the outer CC and then the inner CCs, indented.
 751             self.print_cc(lcc.outer_cc, cumul_cc, f"{outer_mname}:")
 752             for inner_mname, inner_cc in lcc.inner_list_mname_cc:
 753                 print("  ", end="")
 754                 self.print_cc(inner_cc, None, f"  {inner_mname}")
 755         print()
 756
 757     # If `cc2` is `None`, it's a vanilla CC or inner CC. Otherwise, it's an
 758     # outer CC.
 759     def print_cc(self, cc: Cc, cc2: Cc | None, suffix: str) -> None:
 760         for i in self.events.show_indices:
 761             (count, perc1, perc2) = self.count_and_percs_strs(cc, cc2, i)
 762             print(self.count_and_percs_str(i, count, perc1, perc2), end="")
 763
 764         print("", suffix)
 765
 766     def print_missing_cc(self, suffix: str) -> None:
 767         print(self.missing_cc_str, suffix)
 768
 769
 770 # Used in various places in the output.
 771 def print_fancy(text: str) -> None:
 772     fancy = "-" * 80
 773     print(fancy)
 774     print("--", text)
 775     print(fancy)
 776
 777
 778 def print_metadata(descs: list[str], cmds: list[str], events: Events) -> None:
 779     print_fancy("Metadata")
 780
 781     def all_the_same(strs: list[str]) -> bool:
 782         for i in range(len(strs) - 1):
 783             if strs[i] != strs[i + 1]:
 784                 return False
 785
 786         return True
 787
 788     print("Invocation:      ", *sys.argv)
 789
 790     # When there are multiple descriptions, they are usually all the same. Only
 791     # print the description once in that case.
 792     if all_the_same(descs):
 793         print(descs[0], end="")
 794     else:
 795         for i, desc in enumerate(descs):
 796             print(f"Description {i+1}:")
 797             print(desc, end="")
 798
 799     # Commands are sometimes the same, sometimes not. Always print them
 800     # individually, but refer to the previous one when appropriate.
 801     if len(cmds) == 1:
 802         print("Command:         ", cmds[0])
 803     else:
 804         for i, cmd in enumerate(cmds):
 805             if i > 0 and cmds[i - 1] == cmd:
 806                 print(f"Command {i+1}:        (same as Command {i})")
 807             else:
 808                 print(f"Command {i+1}:       ", cmd)
 809
 810     print("Events recorded: ", *events.events)
 811     print("Events shown:    ", *events.show_events)
 812     print("Event sort order:", *events.sort_events)
 813     print("Threshold:        ", args.threshold, "%", sep="")
 814     print("Annotation:      ", "on" if args.annotate else "off")
 815     print()
 816
 817
 818 def print_summary(events: Events, summary_cc: Cc) -> None:
 819     printer = CcPrinter(events, summary_cc)
 820     printer.init_ccs([summary_cc])
 821     print_fancy("Summary")
 822     printer.print_events("")
 823     print()
 824     printer.print_cc(summary_cc, None, "PROGRAM TOTALS")
 825     print()
 826
 827
 828 def print_mname_summary(
 829     kind: str, indent: str, events: Events, dict_mname_dcc: DictMnameDcc, summary_cc: Cc
 830 ) -> set[str]:
 831     # The primary sort event is used for the threshold.
 832     threshold_index = events.sort_indices[0]
 833
 834     # Convert the threshold from a percentage to an event count.
 835     threshold = args.threshold * abs(summary_cc[threshold_index]) / 100
 836
 837     def meets_threshold(mname_and_cc: tuple[str, Cc]) -> bool:
 838         cc = mname_and_cc[1]
 839         return abs(cc[threshold_index]) >= threshold
 840
 841     # Create a list with the outer CC counts in sort order, so that
 842     # left-to-right list comparison does the right thing. Plus the outer name
 843     # at the end for deterministic output when all the event counts are
 844     # identical in two CCs.
 845     def key_mname_and_lcc(mname_and_lcc: tuple[str, Lcc]) -> tuple[list[int], str]:
 846         (outer_mname, lcc) = mname_and_lcc
 847         return (
 848             [abs(lcc.outer_cc[i]) for i in events.sort_indices],
 849             outer_mname,
 850         )
 851
 852     # Similar to `key_mname_and_lcc`.
 853     def key_mname_and_cc(mname_and_cc: tuple[str, Cc]) -> tuple[list[int], str]:
 854         (mname, cc) = mname_and_cc
 855         return ([abs(cc[i]) for i in events.sort_indices], mname)
 856
 857     # This is a `filter_map` operation, which Python doesn't directly support.
 858     list_mname_lcc: ListMnameLcc = []
 859     for outer_mname, dcc in dict_mname_dcc.items():
 860         # Filter out inner CCs for which the primary sort event count is below the
 861         # threshold, and sort the remainder.
 862         inner_list_mname_cc = sorted(
 863             filter(meets_threshold, dcc.inner_dict_mname_cc.items()),
 864             key=key_mname_and_cc,
 865             reverse=True,
 866         )
 867
 868         # If no inner CCs meet the threshold, ignore the entire DCC, even if
 869         # the outer CC meets the threshold.
 870         if len(inner_list_mname_cc) == 0:
 871             continue
 872
 873         list_mname_lcc.append((outer_mname, Lcc(dcc.outer_cc, inner_list_mname_cc)))
 874
 875     list_mname_lcc = sorted(list_mname_lcc, key=key_mname_and_lcc, reverse=True)
 876
 877     printer = CcPrinter(events, summary_cc)
 878     printer.init_list_mname_lcc(list_mname_lcc)
 879     print_fancy(kind + " summary")
 880     printer.print_events(" " + kind.lower())
 881     print()
 882
 883     # Print LCCs.
 884     threshold_mnames = set([])
 885     cumul_cc = events.mk_empty_cc()
 886     for mname, lcc in list_mname_lcc:
 887         add_cc_to_cc(lcc.outer_cc, cumul_cc)
 888         printer.print_lcc(indent, lcc, mname, cumul_cc)
 889         threshold_mnames.add(mname)
 890
 891     return threshold_mnames
 892
 893
 894 class AnnotatedCcs:
 895     line_nums_known_cc: Cc
 896     line_nums_unknown_cc: Cc
 897     non_identical_cc: Cc
 898     unreadable_cc: Cc
 899     below_threshold_cc: Cc
 900     files_unknown_cc: Cc
 901
 902     labels = [
 903         "  annotated: files known & above threshold & readable, line numbers known",
 904         "  annotated: files known & above threshold & readable, line numbers unknown",
 905         "unannotated: files known & above threshold & two or more non-identical",
 906         "unannotated: files known & above threshold & unreadable ",
 907         "unannotated: files known & below threshold",
 908         "unannotated: files unknown",
 909     ]
 910
 911     def __init__(self, events: Events) -> None:
 912         self.line_nums_known_cc = events.mk_empty_cc()
 913         self.line_nums_unknown_cc = events.mk_empty_cc()
 914         self.non_identical_cc = events.mk_empty_cc()
 915         self.unreadable_cc = events.mk_empty_cc()
 916         self.below_threshold_cc = events.mk_empty_cc()
 917         self.files_unknown_cc = events.mk_empty_cc()
 918
 919     def ccs(self) -> list[Cc]:
 920         return [
 921             self.line_nums_known_cc,
 922             self.line_nums_unknown_cc,
 923             self.non_identical_cc,
 924             self.unreadable_cc,
 925             self.below_threshold_cc,
 926             self.files_unknown_cc,
 927         ]
 928
 929
 930 def mk_warning(msg: str) -> str:
 931     return f"""\
 932 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 933 @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@ WARNING @@
 934 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 935 {msg}\
 936 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 937 """
 938
 939
 940 def warn_ofls_are_all_newer(ofls: list[str], cgout_filename: str) -> None:
 941     s = "".join([f"@ - {ofl}\n" for ofl in ofls])
 942     msg = f"""\
 943 @ Original source files are all newer than data file '{cgout_filename}':
 944 {s}@ Annotations may not be correct.
 945 """
 946     print(mk_warning(msg))
 947
 948
 949 def warn_bogus_lines(src_filename: str) -> None:
 950     msg = f"""\
 951 @@ Information recorded about lines past the end of '{src_filename}'.
 952 """
 953     print(mk_warning(msg), end="")
 954
 955
 956 def print_annotated_src_file(
 957     events: Events,
 958     dict_line_cc: DictLineCc,
 959     src_file: TextIO,
 960     annotated_ccs: AnnotatedCcs,
 961     summary_cc: Cc,
 962 ) -> None:
 963     printer = CcPrinter(events, summary_cc)
 964     printer.init_ccs(list(dict_line_cc.values()))
 965     # The starting fancy has already been printed by the caller.
 966     printer.print_events("")
 967     print()
 968
 969     # The CC for line 0 is special, holding counts attributed to the source
 970     # file but not to any particular line (due to incomplete debug info).
 971     # Annotate the start of the file with this info, if present.
 972     line0_cc = dict_line_cc.pop(0, None)
 973     if line0_cc:
 974         suffix = "<unknown (line 0)>"
 975         printer.print_cc(line0_cc, None, suffix)
 976         add_cc_to_cc(line0_cc, annotated_ccs.line_nums_unknown_cc)
 977         print()
 978
 979     # Find interesting line ranges: all lines with a CC, and all lines within
 980     # `args.context` lines of a line with a CC.
 981     line_nums = list(sorted(dict_line_cc.keys()))
 982     pairs: list[tuple[int, int]] = []
 983     n = len(line_nums)
 984     i = 0
 985     context = args.context
 986     while i < n:
 987         lo = max(line_nums[i] - context, 1)  # `max` to prevent negatives
 988         while i < n - 1 and line_nums[i] + 2 * context >= line_nums[i + 1]:
 989             i += 1
 990         hi = line_nums[i] + context
 991         pairs.append((lo, hi))
 992         i += 1
 993
 994     def print_lines(pairs: list[tuple[int, int]]) -> None:
 995         line_num = 0
 996         while pairs:
 997             src_line = ""
 998             (lo, hi) = pairs.pop(0)
 999             while line_num < lo - 1:
1000                 src_line = src_file.readline()
1001                 line_num += 1
1002                 if not src_line:
1003                     return  # EOF
1004
1005             # Print line number, unless start of file.
1006             if lo != 1:
1007                 print("-- line", lo, "-" * 40)
1008
1009             while line_num < hi:
1010                 src_line = src_file.readline()
1011                 line_num += 1
1012                 if not src_line:
1013                     return  # EOF
1014                 if line_nums and line_num == line_nums[0]:
1015                     printer.print_cc(dict_line_cc[line_num], None, src_line[:-1])
1016                     add_cc_to_cc(
1017                         dict_line_cc[line_num], annotated_ccs.line_nums_known_cc
1018                     )
1019                     del line_nums[0]
1020                 else:
1021                     printer.print_missing_cc(src_line[:-1])
1022
1023             # Print line number.
1024             print("-- line", hi, "-" * 40)
1025
1026     # Annotate chosen lines, tracking total annotated counts.
1027     if pairs:
1028         print_lines(pairs)
1029
1030         # If there was info on lines past the end of the file, warn.
1031         if line_nums:
1032             print()
1033             for line_num in line_nums:
1034                 printer.print_cc(
1035                     dict_line_cc[line_num], None, f"<bogus line {line_num}>"
1036                 )
1037                 add_cc_to_cc(dict_line_cc[line_num], annotated_ccs.line_nums_known_cc)
1038
1039             print()
1040             warn_bogus_lines(src_file.name)
1041
1042         print()
1043
1044
1045 # This partially consumes `dict_mfl_dict_line_cc`, and fully consumes
1046 # `dict_mfl_olfs`.
1047 def print_annotated_src_files(
1048     ann_mfls: set[str],
1049     events: Events,
1050     dict_mfl_ofls: DictMflOfls,
1051     dict_mfl_dict_line_cc: DictMflDictLineCc,
1052     summary_cc: Cc,
1053 ) -> AnnotatedCcs:
1054     annotated_ccs = AnnotatedCcs(events)
1055
1056     def add_dict_line_cc_to_cc(dict_line_cc: DictLineCc, accum_cc: Cc) -> None:
1057         for line_cc in dict_line_cc.values():
1058             add_cc_to_cc(line_cc, accum_cc)
1059
1060     # Exclude the unknown ("???") file, which is unannotatable.
1061     ann_mfls.discard("???")
1062     if "???" in dict_mfl_dict_line_cc:
1063         dict_line_cc = dict_mfl_dict_line_cc.pop("???")
1064         add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.files_unknown_cc)
1065
1066     def print_ann_fancy(mfl: str) -> None:
1067         print_fancy(f"Annotated source file: {mfl}")
1068
1069     # This can raise an `OSError`.
1070     def all_ofl_contents_identical(ofls: list[str]) -> bool:
1071         for i in range(len(ofls) - 1):
1072             if not filecmp.cmp(ofls[i], ofls[i + 1], shallow=False):
1073                 return False
1074
1075         return True
1076
1077     for mfl in sorted(ann_mfls):
1078         ofls = sorted(dict_mfl_ofls.pop(mfl))
1079         first_ofl = ofls[0]
1080
1081         try:
1082             if all_ofl_contents_identical(ofls):
1083                 # All the Ofls that map to this Mfl are identical, which means we
1084                 # can annotate, and it doesn't matter which Ofl we use.
1085                 with open(first_ofl, "r", encoding="utf-8") as src_file:
1086                     dict_line_cc = dict_mfl_dict_line_cc.pop(mfl)
1087                     print_ann_fancy(mfl)
1088
1089                     # Because all the Ofls are identical, we can treat their
1090                     # mtimes as if they are all as early as the earliest one.
1091                     # Therefore, we warn only if the earliest source file is
1092                     # more recent than the cgout file.
1093                     min_ofl_st_mtime_ns = min(
1094                         [os.stat(ofl).st_mtime_ns for ofl in ofls]
1095                     )
1096
1097                     for cgout_filename in args.cgout_filename:
1098                         if min_ofl_st_mtime_ns > os.stat(cgout_filename).st_mtime_ns:
1099                             warn_ofls_are_all_newer(ofls, cgout_filename)
1100
1101                     print_annotated_src_file(
1102                         events,
1103                         dict_line_cc,
1104                         src_file,
1105                         annotated_ccs,
1106                         summary_cc,
1107                     )
1108             else:
1109                 dict_line_cc = dict_mfl_dict_line_cc.pop(mfl)
1110                 add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.non_identical_cc)
1111
1112                 # We could potentially do better here.
1113                 # - Annotate until the first line where the src files diverge.
1114                 # - Also, heuristic resyncing, e.g. by looking for matching
1115                 #   lines (of sufficient complexity) after a divergence.
1116                 print_ann_fancy(mfl)
1117                 print(
1118                     "Unannotated because two or more of these original files are not "
1119                     "identical:",
1120                     *ofls,
1121                     sep="\n- ",
1122                 )
1123                 print()
1124         except OSError:
1125             dict_line_cc = dict_mfl_dict_line_cc.pop(mfl)
1126             add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.unreadable_cc)
1127
1128             print_ann_fancy(mfl)
1129             print(
1130                 "Unannotated because one or more of these original files are "
1131                 "unreadable:",
1132                 *ofls,
1133                 sep="\n- ",
1134             )
1135             print()
1136
1137     # Sum the CCs remaining in `dict_mfl_dict_line_cc`, which are all in files
1138     # below the threshold.
1139     for dict_line_cc in dict_mfl_dict_line_cc.values():
1140         add_dict_line_cc_to_cc(dict_line_cc, annotated_ccs.below_threshold_cc)
1141
1142     return annotated_ccs
1143
1144
1145 def print_annotation_summary(
1146     events: Events,
1147     annotated_ccs: AnnotatedCcs,
1148     summary_cc: Cc,
1149 ) -> None:
1150     # Show how many events were covered by annotated lines above.
1151     printer = CcPrinter(events, summary_cc)
1152     printer.init_ccs(annotated_ccs.ccs())
1153     print_fancy("Annotation summary")
1154     printer.print_events("")
1155     print()
1156
1157     total_cc = events.mk_empty_cc()
1158     for (cc, label) in zip(annotated_ccs.ccs(), AnnotatedCcs.labels):
1159         printer.print_cc(cc, None, label)
1160         add_cc_to_cc(cc, total_cc)
1161
1162     print()
1163
1164     # Internal sanity check.
1165     if summary_cc != total_cc:
1166         msg = (
1167             "`summary:` line doesn't match computed annotated counts\n"
1168             f"- summary:   {summary_cc}\n"
1169             f"- annotated: {total_cc}"
1170         )
1171         die(msg)
1172
1173
1174 def main() -> None:
1175     # Metadata, initialized to empty states.
1176     descs: list[str] = []
1177     cmds: list[str] = []
1178     events = Events()
1179
1180     # For tracking original filenames to modified filenames.
1181     dict_mfl_ofls: DictMflOfls = defaultdict(set)
1182
1183     # Different places where we accumulate CC data. Initialized to invalid
1184     # states prior to the number of events being known.
1185     dict_mfl_dcc: DictMnameDcc = defaultdict(None)
1186     dict_mfn_dcc: DictMnameDcc = defaultdict(None)
1187     dict_mfl_dict_line_cc: DictMflDictLineCc = defaultdict(None)
1188     summary_cc: Cc = []
1189
1190     for n, filename in enumerate(args.cgout_filename):
1191         is_first_file = n == 0
1192         read_cgout_file(
1193             filename,
1194             is_first_file,
1195             descs,
1196             cmds,
1197             events,
1198             dict_mfl_ofls,
1199             dict_mfl_dcc,
1200             dict_mfn_dcc,
1201             dict_mfl_dict_line_cc,
1202             summary_cc,
1203         )
1204
1205     # Each of the following calls prints a section of the output.
1206     print_metadata(descs, cmds, events)
1207     print_summary(events, summary_cc)
1208     ann_mfls = print_mname_summary(
1209         "File:function", "< ", events, dict_mfl_dcc, summary_cc
1210     )
1211     print_mname_summary("Function:file", "> ", events, dict_mfn_dcc, summary_cc)
1212     if args.annotate:
1213         annotated_ccs = print_annotated_src_files(
1214             ann_mfls, events, dict_mfl_ofls, dict_mfl_dict_line_cc, summary_cc
1215         )
1216
1217         print_annotation_summary(events, annotated_ccs, summary_cc)
1218
1219
1220 if __name__ == "__main__":
1221     main()