usr.bin/gprof/gprof.c

   1 /*
   2  * Copyright (c) 1983, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. All advertising materials mentioning features or use of this software
  14  *    must display the following acknowledgement:
  15  *      This product includes software developed by the University of
  16  *      California, Berkeley and its contributors.
  17  * 4. Neither the name of the University nor the names of its contributors
  18  *    may be used to endorse or promote products derived from this software
  19  *    without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  31  * SUCH DAMAGE.
  32  *
  33  * @(#) Copyright (c) 1983, 1993 The Regents of the University of California.  All rights reserved.
  34  * @(#)gprof.c  8.1 (Berkeley) 6/6/93
  35  * $FreeBSD: src/usr.bin/gprof/gprof.c,v 1.11 1999/08/28 01:01:55 peter Exp $
  36  * $DragonFly: src/usr.bin/gprof/gprof.c,v 1.5 2006/01/22 03:43:37 swildner Exp $
  37  */
  38
  39 #include <err.h>
  40 #include "gprof.h"
  41
  42 static int valcmp(const void *, const void *);
  43
  44
  45 static struct gmonhdr   gmonhdr;
  46 static int lflag;
  47 static int Lflag;
  48
  49 main(int argc, char **argv)
  50 {
  51     char        **sp;
  52     nltype      **timesortnlp;
  53     char        **defaultEs;
  54
  55     --argc;
  56     argv++;
  57     debug = 0;
  58     bflag = TRUE;
  59     while ( *argv != 0 && **argv == '-' ) {
  60         (*argv)++;
  61         switch ( **argv ) {
  62         case 'a':
  63             aflag = TRUE;
  64             break;
  65         case 'b':
  66             bflag = FALSE;
  67             break;
  68         case 'C':
  69             Cflag = TRUE;
  70             cyclethreshold = atoi( *++argv );
  71             break;
  72         case 'c':
  73 #if defined(vax) || defined(tahoe)
  74             cflag = TRUE;
  75 #else
  76             errx(1, "-c isn't supported on this architecture yet");
  77 #endif
  78             break;
  79         case 'd':
  80             dflag = TRUE;
  81             setlinebuf(stdout);
  82             debug |= atoi( *++argv );
  83             debug |= ANYDEBUG;
  84 #           ifdef DEBUG
  85                 printf("[main] debug = %d\n", debug);
  86 #           else /* not DEBUG */
  87                 printf("gprof: -d ignored\n");
  88 #           endif /* DEBUG */
  89             break;
  90         case 'E':
  91             ++argv;
  92             addlist( Elist , *argv );
  93             Eflag = TRUE;
  94             addlist( elist , *argv );
  95             eflag = TRUE;
  96             break;
  97         case 'e':
  98             addlist( elist , *++argv );
  99             eflag = TRUE;
 100             break;
 101         case 'F':
 102             ++argv;
 103             addlist( Flist , *argv );
 104             Fflag = TRUE;
 105             addlist( flist , *argv );
 106             fflag = TRUE;
 107             break;
 108         case 'f':
 109             addlist( flist , *++argv );
 110             fflag = TRUE;
 111             break;
 112         case 'k':
 113             addlist( kfromlist , *++argv );
 114             addlist( ktolist , *++argv );
 115             kflag = TRUE;
 116             break;
 117     case 'l':
 118             lflag = 1;
 119             Lflag = 0;
 120             break;
 121     case 'L':
 122             Lflag = 1;
 123             lflag = 0;
 124             break;
 125     case 's':
 126             sflag = TRUE;
 127             break;
 128         case 'u':
 129             uflag = TRUE;
 130             break;
 131         case 'z':
 132             zflag = TRUE;
 133             break;
 134         }
 135         argv++;
 136     }
 137     if ( *argv != 0 ) {
 138         a_outname  = *argv;
 139         argv++;
 140     } else {
 141         a_outname  = A_OUTNAME;
 142     }
 143     if ( *argv != 0 ) {
 144         gmonname = *argv;
 145         argv++;
 146     } else {
 147         gmonname = (char *) malloc(strlen(a_outname)+6);
 148         strcpy(gmonname, a_outname);
 149         strcat(gmonname, ".gmon");
 150     }
 151         /*
 152          *      get information from the executable file.
 153          */
 154     if (elf_getnfile(a_outname, &defaultEs) == -1 &&
 155       aout_getnfile(a_outname, &defaultEs) == -1)
 156         errx(1, "%s: bad format", a_outname);
 157         /*
 158          *      sort symbol table.
 159          */
 160     qsort(nl, nname, sizeof(nltype), valcmp);
 161         /*
 162          *      turn off default functions
 163          */
 164     for ( sp = defaultEs ; *sp ; sp++ ) {
 165         Eflag = TRUE;
 166         addlist( Elist , *sp );
 167         eflag = TRUE;
 168         addlist( elist , *sp );
 169     }
 170         /*
 171          *      get information about mon.out file(s).
 172          */
 173     do  {
 174         getpfile( gmonname );
 175         if ( *argv != 0 ) {
 176             gmonname = *argv;
 177         }
 178     } while ( *argv++ != 0 );
 179         /*
 180          *      how many ticks per second?
 181          *      if we can't tell, report time in ticks.
 182          */
 183     if (hz == 0) {
 184         hz = 1;
 185         fprintf(stderr, "time is in ticks, not seconds\n");
 186     }
 187         /*
 188          *      dump out a gmon.sum file if requested
 189          */
 190     if ( sflag ) {
 191         dumpsum( GMONSUM );
 192     }
 193         /*
 194          *      assign samples to procedures
 195          */
 196     asgnsamples();
 197         /*
 198          *      assemble the dynamic profile
 199          */
 200     timesortnlp = doarcs();
 201         /*
 202          *      print the dynamic profile
 203          */
 204     if(!lflag) {
 205             printgprof( timesortnlp );
 206     }
 207         /*
 208          *      print the flat profile
 209          */
 210     if(!Lflag) {
 211             printprof();
 212     }
 213         /*
 214          *      print the index
 215          */
 216     printindex();
 217     done();
 218 }
 219
 220     /*
 221      *  information from a gmon.out file is in two parts:
 222      *  an array of sampling hits within pc ranges,
 223      *  and the arcs.
 224      */
 225 getpfile(char *filename)
 226 {
 227     FILE                *pfile;
 228     FILE                *openpfile();
 229     struct rawarc       arc;
 230
 231     pfile = openpfile(filename);
 232     readsamples(pfile);
 233         /*
 234          *      the rest of the file consists of
 235          *      a bunch of <from,self,count> tuples.
 236          */
 237     while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) {
 238 #       ifdef DEBUG
 239             if ( debug & SAMPLEDEBUG ) {
 240                 printf( "[getpfile] frompc 0x%x selfpc 0x%x count %d\n" ,
 241                         arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
 242             }
 243 #       endif /* DEBUG */
 244             /*
 245              *  add this arc
 246              */
 247         tally( &arc );
 248     }
 249     fclose(pfile);
 250 }
 251
 252 FILE *
 253 openpfile(char *filename)
 254 {
 255     struct gmonhdr      tmp;
 256     FILE                *pfile;
 257     int                 size;
 258     int                 rate;
 259
 260     if((pfile = fopen(filename, "r")) == NULL) {
 261         perror(filename);
 262         done();
 263     }
 264     fread(&tmp, sizeof(struct gmonhdr), 1, pfile);
 265     if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc ||
 266          tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt ) ) {
 267         warnx("%s: incompatible with first gmon file", filename);
 268         done();
 269     }
 270     gmonhdr = tmp;
 271     if ( gmonhdr.version == GMONVERSION ) {
 272         rate = gmonhdr.profrate;
 273         size = sizeof(struct gmonhdr);
 274     } else {
 275         fseek(pfile, sizeof(struct ophdr), SEEK_SET);
 276         size = sizeof(struct ophdr);
 277         gmonhdr.profrate = rate = hertz();
 278         gmonhdr.version = GMONVERSION;
 279     }
 280     if (hz == 0) {
 281         hz = rate;
 282     } else if (hz != rate) {
 283         fprintf(stderr,
 284             "%s: profile clock rate (%d) %s (%d) in first gmon file\n",
 285             filename, rate, "incompatible with clock rate", hz);
 286         done();
 287     }
 288     s_lowpc = (unsigned long) gmonhdr.lpc;
 289     s_highpc = (unsigned long) gmonhdr.hpc;
 290     lowpc = (unsigned long)gmonhdr.lpc / sizeof(UNIT);
 291     highpc = (unsigned long)gmonhdr.hpc / sizeof(UNIT);
 292     sampbytes = gmonhdr.ncnt - size;
 293     nsamples = sampbytes / sizeof (UNIT);
 294 #   ifdef DEBUG
 295         if ( debug & SAMPLEDEBUG ) {
 296             printf( "[openpfile] hdr.lpc 0x%x hdr.hpc 0x%x hdr.ncnt %d\n",
 297                 gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt );
 298             printf( "[openpfile]   s_lowpc 0x%x   s_highpc 0x%x\n" ,
 299                 s_lowpc , s_highpc );
 300             printf( "[openpfile]     lowpc 0x%x     highpc 0x%x\n" ,
 301                 lowpc , highpc );
 302             printf( "[openpfile] sampbytes %d nsamples %d\n" ,
 303                 sampbytes , nsamples );
 304             printf( "[openpfile] sample rate %d\n" , hz );
 305         }
 306 #   endif /* DEBUG */
 307     return(pfile);
 308 }
 309
 310 tally(struct rawarc *rawp)
 311 {
 312     nltype              *parentp;
 313     nltype              *childp;
 314
 315     parentp = nllookup( rawp -> raw_frompc );
 316     childp = nllookup( rawp -> raw_selfpc );
 317     if ( parentp == 0 || childp == 0 )
 318         return;
 319     if ( kflag
 320          && onlist( kfromlist , parentp -> name )
 321          && onlist( ktolist , childp -> name ) ) {
 322         return;
 323     }
 324     childp -> ncall += rawp -> raw_count;
 325 #   ifdef DEBUG
 326         if ( debug & TALLYDEBUG ) {
 327             printf( "[tally] arc from %s to %s traversed %d times\n" ,
 328                     parentp -> name , childp -> name , rawp -> raw_count );
 329         }
 330 #   endif /* DEBUG */
 331     addarc( parentp , childp , rawp -> raw_count );
 332 }
 333
 334 /*
 335  * dump out the gmon.sum file
 336  */
 337 dumpsum(char *sumfile)
 338 {
 339     nltype *nlp;
 340     arctype *arcp;
 341     struct rawarc arc;
 342     FILE *sfile;
 343
 344     if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) {
 345         perror( sumfile );
 346         done();
 347     }
 348     /*
 349      * dump the header; use the last header read in
 350      */
 351     if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) {
 352         perror( sumfile );
 353         done();
 354     }
 355     /*
 356      * dump the samples
 357      */
 358     if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples) {
 359         perror( sumfile );
 360         done();
 361     }
 362     /*
 363      * dump the normalized raw arc information
 364      */
 365     for ( nlp = nl ; nlp < npe ; nlp++ ) {
 366         for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) {
 367             arc.raw_frompc = arcp -> arc_parentp -> value;
 368             arc.raw_selfpc = arcp -> arc_childp -> value;
 369             arc.raw_count = arcp -> arc_count;
 370             if ( fwrite ( &arc , sizeof arc , 1 , sfile ) != 1 ) {
 371                 perror( sumfile );
 372                 done();
 373             }
 374 #           ifdef DEBUG
 375                 if ( debug & SAMPLEDEBUG ) {
 376                     printf( "[dumpsum] frompc 0x%x selfpc 0x%x count %d\n" ,
 377                             arc.raw_frompc , arc.raw_selfpc , arc.raw_count );
 378                 }
 379 #           endif /* DEBUG */
 380         }
 381     }
 382     fclose( sfile );
 383 }
 384
 385 static int
 386 valcmp(const void *v1, const void *v2)
 387 {
 388     const nltype *p1 = (const nltype *)v1;
 389     const nltype *p2 = (const nltype *)v2;
 390
 391     if ( p1 -> value < p2 -> value ) {
 392         return LESSTHAN;
 393     }
 394     if ( p1 -> value > p2 -> value ) {
 395         return GREATERTHAN;
 396     }
 397     return EQUALTO;
 398 }
 399
 400 readsamples(FILE *pfile)
 401 {
 402     register i;
 403     UNIT        sample;
 404
 405     if (samples == 0) {
 406         samples = (UNIT *) calloc(sampbytes, sizeof (UNIT));
 407         if (samples == 0) {
 408             warnx("no room for %d sample pc's", sampbytes / sizeof (UNIT));
 409             done();
 410         }
 411     }
 412     for (i = 0; i < nsamples; i++) {
 413         fread(&sample, sizeof (UNIT), 1, pfile);
 414         if (feof(pfile))
 415                 break;
 416         samples[i] += sample;
 417     }
 418     if (i != nsamples) {
 419         warnx("unexpected EOF after reading %d/%d samples", --i , nsamples );
 420         done();
 421     }
 422 }
 423
 424 /*
 425  *      Assign samples to the procedures to which they belong.
 426  *
 427  *      There are three cases as to where pcl and pch can be
 428  *      with respect to the routine entry addresses svalue0 and svalue1
 429  *      as shown in the following diagram.  overlap computes the
 430  *      distance between the arrows, the fraction of the sample
 431  *      that is to be credited to the routine which starts at svalue0.
 432  *
 433  *          svalue0                                         svalue1
 434  *             |                                               |
 435  *             v                                               v
 436  *
 437  *             +-----------------------------------------------+
 438  *             |                                               |
 439  *        |  ->|    |<-         ->|         |<-         ->|    |<-  |
 440  *        |         |             |         |             |         |
 441  *        +---------+             +---------+             +---------+
 442  *
 443  *        ^         ^             ^         ^             ^         ^
 444  *        |         |             |         |             |         |
 445  *       pcl       pch           pcl       pch           pcl       pch
 446  *
 447  *      For the vax we assert that samples will never fall in the first
 448  *      two bytes of any routine, since that is the entry mask,
 449  *      thus we give call alignentries() to adjust the entry points if
 450  *      the entry mask falls in one bucket but the code for the routine
 451  *      doesn't start until the next bucket.  In conjunction with the
 452  *      alignment of routine addresses, this should allow us to have
 453  *      only one sample for every four bytes of text space and never
 454  *      have any overlap (the two end cases, above).
 455  */
 456 asgnsamples(void)
 457 {
 458     int j;
 459     UNIT                ccnt;
 460     double              time;
 461     unsigned long       pcl, pch;
 462     int i;
 463     unsigned long       overlap;
 464     unsigned long       svalue0, svalue1;
 465
 466     /* read samples and assign to namelist symbols */
 467     scale = highpc - lowpc;
 468     scale /= nsamples;
 469     alignentries();
 470     for (i = 0, j = 1; i < nsamples; i++) {
 471         ccnt = samples[i];
 472         if (ccnt == 0)
 473                 continue;
 474         pcl = lowpc + (unsigned long)(scale * i);
 475         pch = lowpc + (unsigned long)(scale * (i + 1));
 476         time = ccnt;
 477 #       ifdef DEBUG
 478             if ( debug & SAMPLEDEBUG ) {
 479                 printf( "[asgnsamples] pcl 0x%x pch 0x%x ccnt %d\n" ,
 480                         pcl , pch , ccnt );
 481             }
 482 #       endif /* DEBUG */
 483         totime += time;
 484         for (j = j - 1; j < nname; j++) {
 485             svalue0 = nl[j].svalue;
 486             svalue1 = nl[j+1].svalue;
 487                 /*
 488                  *      if high end of tick is below entry address,
 489                  *      go for next tick.
 490                  */
 491             if (pch < svalue0)
 492                     break;
 493                 /*
 494                  *      if low end of tick into next routine,
 495                  *      go for next routine.
 496                  */
 497             if (pcl >= svalue1)
 498                     continue;
 499             overlap = min(pch, svalue1) - max(pcl, svalue0);
 500             if (overlap > 0) {
 501 #               ifdef DEBUG
 502                     if (debug & SAMPLEDEBUG) {
 503                         printf("[asgnsamples] (0x%x->0x%x-0x%x) %s gets %f ticks %d overlap\n",
 504                                 nl[j].value/sizeof(UNIT), svalue0, svalue1,
 505                                 nl[j].name,
 506                                 overlap * time / scale, overlap);
 507                     }
 508 #               endif /* DEBUG */
 509                 nl[j].time += overlap * time / scale;
 510             }
 511         }
 512     }
 513 #   ifdef DEBUG
 514         if (debug & SAMPLEDEBUG) {
 515             printf("[asgnsamples] totime %f\n", totime);
 516         }
 517 #   endif /* DEBUG */
 518 }
 519
 520
 521 unsigned long
 522 min(unsigned long a, unsigned long b)
 523 {
 524     if (a<b)
 525         return(a);
 526     return(b);
 527 }
 528
 529 unsigned long
 530 max(unsigned long a, unsigned long b)
 531 {
 532     if (a>b)
 533         return(a);
 534     return(b);
 535 }
 536
 537     /*
 538      *  calculate scaled entry point addresses (to save time in asgnsamples),
 539      *  and possibly push the scaled entry points over the entry mask,
 540      *  if it turns out that the entry point is in one bucket and the code
 541      *  for a routine is in the next bucket.
 542      */
 543 alignentries(void)
 544 {
 545     struct nl   *nlp;
 546     unsigned long       bucket_of_entry;
 547     unsigned long       bucket_of_code;
 548
 549     for (nlp = nl; nlp < npe; nlp++) {
 550         nlp -> svalue = nlp -> value / sizeof(UNIT);
 551         bucket_of_entry = (nlp->svalue - lowpc) / scale;
 552         bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale;
 553         if (bucket_of_entry < bucket_of_code) {
 554 #           ifdef DEBUG
 555                 if (debug & SAMPLEDEBUG) {
 556                     printf("[alignentries] pushing svalue 0x%x to 0x%x\n",
 557                             nlp->svalue, nlp->svalue + UNITS_TO_CODE);
 558                 }
 559 #           endif /* DEBUG */
 560             nlp->svalue += UNITS_TO_CODE;
 561         }
 562     }
 563 }
 564
 565 done(void)
 566 {
 567
 568     exit(0);
 569 }