contrib/analyze_brprob

   1 #!/usr/bin/awk -f
   2 # Script to analyze experimental results of our branch prediction heuristics
   3 # Contributed by Jan Hubicka, SuSE Inc.
   4 # Copyright (C) 2001, 2003 Free Software Foundation, Inc.
   5 #
   6 # This file is part of GCC.
   7 #
   8 # GCC is free software; you can redistribute it and/or modify
   9 # it under the terms of the GNU General Public License as published by
  10 # the Free Software Foundation; either version 2, or (at your option)
  11 # any later version.
  12 #
  13 # GCC is distributed in the hope that it will be useful,
  14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 # GNU General Public License for more details.
  17 #
  18 # You should have received a copy of the GNU General Public License
  19 # along with GCC; see the file COPYING.  If not, write to
  20 # the Free Software Foundation, 59 Temple Place - Suite 330,
  21 # Boston, MA 02111-1307, USA.
  22 #
  23 #
  24 # This script is used to calculate two basic properties of the branch prediction
  25 # heuristics - coverage and hitrate.  Coverage is number of executions of a given
  26 # branch matched by the heuristics and hitrate is probability that once branch is
  27 # predicted as taken it is really taken.
  28 #
  29 # These values are useful to determine the quality of given heuristics.  Hitrate
  30 # may be directly used in predict.c.
  31 #
  32 # Usage:
  33 #  Step 1: Compile and profile your program.  You need to use -fprofile-arcs
  34 #    flag to get the profiles
  35 #  Step 2: Generate log files.  The information about given heuristics are
  36 #    saved into *.bp dumps.  You need to pass the -db switch to the compiler as well
  37 #    as -fbranch-probabilities to get the results of profiling noted in the dumps.
  38 #    Ensure that there are no "Arc profiling: some edge counts were bad." warnings.
  39 #  Step 3: Run this script to concatenate all *.life files:
  40 #    analyze_brprob `find . -name *.life`
  41 #    the information is collected and print once all files are parsed.  This
  42 #    may take a while.
  43 #    Note that the script does use bc to perform long arithmetic.
  44 #  Step 4: Read the results.  Basically the following table is printed:
  45 #  (this is just an example from a very early stage of branch prediction pass
  46 #   development, so please don't take these numbers seriously)
  47 #
  48 #HEURISTICS                  BRANCHES  (REL)  HITRATE             COVERAGE  (REL)
  49 #opcode                          2889  83.7%  94.96%/ 97.62%      7516383  75.3%
  50 #pointer                          246   7.1%  99.69%/ 99.86%       118791   1.2%
  51 #loop header                      449  13.0%  98.32%/ 99.07%        43553   0.4%
  52 #first match                     3450 100.0%  89.92%/ 97.27%      9979782 100.0%
  53 #loop exit                        924  26.8%  88.95%/ 95.58%      9026266  90.4%
  54 #error return                     150   4.3%  64.48%/ 86.81%       453542   4.5%
  55 #call                             803  23.3%  51.66%/ 98.61%      3614037  36.2%
  56 #loop branch                       51   1.5%  99.26%/ 99.27%        26854   0.3%
  57 #noreturn call                    951  27.6% 100.00%/100.00%      1759809  17.6%
  58 #
  59 #  The heuristic called "first match" is a heuristic used by GCC branch
  60 #  prediction pass and it predicts 89.92% branches correctly.
  61 #
  62 #  The quality of heuristics can be rated using both, coverage and hitrate
  63 #  parameters.  For example "loop branch" heuristics (predicting loopback edge
  64 #  as taken) have both very high hitrate and coverage, so it is very useful.
  65 #  On the other hand, "exit block" heuristics (predicting exit edges as not
  66 #  taken) have good hitrate, but poor coverage, so only 3 branches have been
  67 #  predicted.  The "loop header" heuristic has problems, since it tends to
  68 #  misspredict.
  69 #
  70 #  The implementation of this script is somewhat brute force.  My awk skills
  71 #  are limited.
  72
  73 function longeval(e)
  74 {
  75   e = "echo \"scale = 2 ;"e"\" | bc"
  76   e | getline res
  77   close (e)
  78   return res
  79 }
  80
  81 BEGIN {nnames = 0}
  82
  83 /^  .* heuristics: .*.$/ {
  84     name=$0
  85     sub (/^  /,"",name)
  86     sub (/ heuristics: .*.$/,"",name)
  87     if (!(name in branches))
  88       {
  89         names[nnames] = name
  90         branches[name]=0
  91         counts[name]=0
  92         hits[name]=0
  93         phits[name]=0
  94         nnames++
  95       }
  96     branches[name]+=1
  97   }
  98
  99 /^  .* heuristics: .*. exec [0-9]* hit [0-9]* (.*.)$/ {
 100     name=$0
 101     sub (/^  /,"",name)
 102     sub (/ heuristics: .*. exec [0-9]* hit [0-9]* (.*.)$/,"",name)
 103     pred=$0
 104     sub (/^  .* heuristics: /,"",pred)
 105     sub (/. exec [0-9]* hit [0-9]* (.*.)$/,"",pred)
 106     count=$0
 107     sub (/^  .* heuristics: .*. exec /,"",count)
 108     sub (/ hit [0-9]* (.*.)$/,"",count)
 109     hit=$0
 110     sub (/^  .* heuristics: .*. exec [0-9]* hit /,"",hit)
 111     sub (/ (.*.)$/,"",hit)
 112
 113     if (int(pred) < 50.0)
 114       {
 115         hit = count"-"hit;
 116       }
 117     counts[name]=counts[name] "+" count
 118     hits[name]=hits[name] "+" hit
 119     phits[name]=phits[name] "+(("hit")<"count"/2)*("count"-("hit"))+(("hit")>="count"/2)*("hit")"
 120
 121     #BC crashes on long strings.  Irritating.
 122     if (length(counts[name]) > 2000)
 123       counts[name] = longeval(counts[name])
 124     if (length(hits[name]) > 2000)
 125       hits[name] = longeval(hits[name])
 126     if (length(phits[name]) > 2000)
 127       phits[name] = longeval(phits[name])
 128   }
 129 END {
 130   # Heuristics called combined predicts just everything.
 131   maxcounts = longeval(counts["combined"])
 132   maxbranches = branches["combined"]
 133   max = names["combined"]
 134   printf("HEURISTICS                 BRANCHES  (REL)  HITRATE              COVERAGE  (REL)\n")
 135   for (i = 0; i < nnames ; i++)
 136    {
 137      name = names[i]
 138      counts[name] = longeval(counts[name])
 139      printf ("%-26s %8i %5.1f%% %6s%% / %6s%% %12s %5.1f%%\n",
 140              name,
 141              branches[name], branches[name] * 100 / maxbranches,
 142              longeval("("hits[name]") * 100 /(" counts[name]"-0.00001)"),
 143              longeval("("phits[name]") * 100 /(" counts[name]"-0.00001)"),
 144              counts[name], longeval(counts[name]" * 100 / ("maxcounts"-0.00001)"))
 145    }
 146 }