1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 A program that computes a function ordering for an executable based
8 on runtime profile information.
10 This program is directly based on work done by Roger Chickering
11 <rogc@netscape.com> in
13 <http://bugzilla.mozilla.org/show_bug.cgi?id=65845>
15 to implement Nat Friedman's <nat@nat.org> `grope',
17 _GNU Rope - A Subroutine Position Optimizer_
18 <http://www.hungry.com/~shaver/grope/grope.ps>
20 Specifically, it implements the procedure re-ordering algorithm
23 K. Pettis and R. Hansen. ``Profile-Guided Core Position.'' In
24 _Proceedings of the Int. Conference on Programming Language Design
25 and Implementation_, pages 16-27, June 1990.
41 #include "elf_symbol_table.h"
46 elf_symbol_table symtab
;
48 // Straight outta plhash.c!
49 #define GOLDEN_RATIO 0x9E3779B9U
51 struct hash
<const Elf32_Sym
*>
53 size_t operator()(const Elf32_Sym
*sym
) const {
54 return (reinterpret_cast<size_t>(sym
) >> 4) * GOLDEN_RATIO
; }
57 typedef unsigned int call_count_t
;
61 const Elf32_Sym
*m_from
;
62 const Elf32_Sym
*m_to
;
65 call_graph_arc(const Elf32_Sym
*left
, const Elf32_Sym
*right
, call_count_t count
= 0)
68 assert(left
!= right
);
81 operator==(const call_graph_arc
&lhs
, const call_graph_arc
&rhs
)
83 return (lhs
.m_from
== rhs
.m_from
) && (lhs
.m_to
== rhs
.m_to
);
87 operator<<(ostream
&out
, const call_graph_arc
&arc
)
90 out
.form("<(%s %s) %d>",
91 symtab
.get_symbol_name(arc
.m_from
),
92 symtab
.get_symbol_name(arc
.m_to
),
99 struct hash
<call_graph_arc
*>
102 operator()(const call_graph_arc
* arc
) const
105 result
= reinterpret_cast<unsigned long>(arc
->m_from
);
106 result
^= reinterpret_cast<unsigned long>(arc
->m_to
) >> 16;
107 result
^= reinterpret_cast<unsigned long>(arc
->m_to
) << 16;
108 result
*= GOLDEN_RATIO
;
113 struct equal_to
<call_graph_arc
*>
116 operator()(const call_graph_arc
* lhs
, const call_graph_arc
*rhs
) const
122 typedef hash_set
<call_graph_arc
*> arc_container_t
;
123 arc_container_t arcs
;
125 typedef multimap
<const Elf32_Sym
*, call_graph_arc
*> arc_map_t
;
129 struct less_call_graph_arc_count
132 operator()(const call_graph_arc
*lhs
, const call_graph_arc
*rhs
) const
134 if (lhs
->m_count
== rhs
->m_count
) {
135 if (lhs
->m_from
== rhs
->m_from
)
136 return lhs
->m_to
> rhs
->m_to
;
138 return lhs
->m_from
> rhs
->m_from
;
140 return lhs
->m_count
> rhs
->m_count
;
144 typedef set
<call_graph_arc
*, less_call_graph_arc_count
> arc_count_index_t
;
146 bool opt_debug
= false;
147 const char *opt_out
= "order.out";
149 bool opt_verbose
= false;
152 static struct option long_options
[] = {
153 { "debug", no_argument
, 0, 'd' },
154 { "exe", required_argument
, 0, 'e' },
155 { "help", no_argument
, 0, '?' },
156 { "out", required_argument
, 0, 'o' },
157 { "tick", optional_argument
, 0, 't' },
158 { "verbose", no_argument
, 0, 'v' },
159 { "window", required_argument
, 0, 'w' },
163 //----------------------------------------------------------------------
166 usage(const char *name
)
168 cerr
<< "usage: " << name
<< " [options] [<file> ...]" << endl
;
169 cerr
<< " Options:" << endl
;
170 cerr
<< " --debug, -d" << endl
;
171 cerr
<< " Print lots of verbose debugging cruft." << endl
;
172 cerr
<< " --exe=<image>, -e <image> (required)" << endl
;
173 cerr
<< " Specify the executable image from which to read symbol information." << endl
;
174 cerr
<< " --help, -?" << endl
;
175 cerr
<< " Print this message and exit." << endl
;
176 cerr
<< " --out=<file>, -o <file>" << endl
;
177 cerr
<< " Specify the output file to which to dump the symbol ordering of the" << endl
;
178 cerr
<< " best individual (default is `order.out')." << endl
;
179 cerr
<< " --tick[=<num>], -t [<num>]" << endl
;
180 cerr
<< " When reading address data, print a dot to stderr every <num>th" << endl
;
181 cerr
<< " address processed from the call trace. If specified with no argument," << endl
;
182 cerr
<< " a dot will be printed for every million addresses processed." << endl
;
183 cerr
<< " --verbose, -v" << endl
;
184 cerr
<< " Issue progress messages to stderr." << endl
;
185 cerr
<< " --window=<num>, -w <num>" << endl
;
186 cerr
<< " Use a sliding window instead of pagination to score orderings." << endl
;
190 * Using the symbol table, map a stream of address references into a
196 long long total_calls
= 0;
197 typedef list
<const Elf32_Sym
*> window_t
;
201 unsigned int buf
[128];
204 while ((cb
= read(fd
, buf
, sizeof buf
)) > 0) {
205 if (cb
% sizeof buf
[0])
206 fprintf(stderr
, "unaligned read\n");
208 unsigned int *addr
= buf
;
209 unsigned int *limit
= buf
+ (cb
/ 4);
211 for (; addr
< limit
; ++addr
) {
212 const Elf32_Sym
*sym
= symtab
.lookup(*addr
);
218 window
.push_front(sym
);
220 if (window_size
>= opt_window
)
225 window_t::const_iterator i
= window
.begin();
226 window_t::const_iterator end
= window
.end();
227 for (; i
!= end
; ++i
) {
230 call_graph_arc
key(sym
, *i
);
231 arc_container_t::iterator iter
= arcs
.find(&key
);
232 if (iter
== arcs
.end()) {
233 arc
= new call_graph_arc(sym
, *i
);
235 from_map
.insert(arc_map_t::value_type(arc
->m_from
, arc
));
236 to_map
.insert(arc_map_t::value_type(arc
->m_to
, arc
));
239 arc
= const_cast<call_graph_arc
*>(*iter
);
245 if (opt_verbose
&& opt_tick
&& (total_calls
% opt_tick
== 0)) {
256 cerr
<< "Total calls: " << total_calls
<< endl
;
261 remove_from(arc_map_t
& map
, const Elf32_Sym
*sym
, const call_graph_arc
*arc
)
263 pair
<arc_map_t::iterator
, arc_map_t::iterator
> p
264 = map
.equal_range(sym
);
266 for (arc_map_t::iterator i
= p
.first
; i
!= p
.second
; ++i
) {
267 if (i
->second
== arc
) {
278 main(int argc
, char *argv
[])
280 const char *opt_exe
= 0;
284 int option_index
= 0;
285 c
= getopt_long(argc
, argv
, "?de:o:t:vw:", long_options
, &option_index
);
308 opt_tick
= optarg
? atoi(optarg
) : 1000000;
316 opt_window
= atoi(optarg
);
317 if (opt_window
<= 0) {
318 cerr
<< "invalid window size: " << opt_window
<< endl
;
329 // Make sure an image was specified
335 // Read the sym table.
336 symtab
.init(opt_exe
);
338 // Process addresses to construct the weighted call graph.
339 if (optind
>= argc
) {
340 map_addrs(STDIN_FILENO
);
344 int fd
= open(argv
[optind
], O_RDONLY
);
346 perror(argv
[optind
]);
352 } while (++optind
< argc
);
355 // Emit the ordering.
356 ofstream
out(opt_out
);
358 // Collect all of the arcs into a sorted container, with arcs
359 // having the largest weight at the front.
360 arc_count_index_t
sorted_arcs(arcs
.begin(), arcs
.end());
362 while (sorted_arcs
.size()) {
364 cerr
<< "==========================================" << endl
<< endl
;
366 cerr
<< "Sorted Arcs:" << endl
;
367 for (arc_count_index_t::const_iterator iter
= sorted_arcs
.begin();
368 iter
!= sorted_arcs
.end();
370 cerr
<< **iter
<< endl
;
373 cerr
<< endl
<< "Arc Container:" << endl
;
374 for (arc_container_t::const_iterator iter
= arcs
.begin();
377 cerr
<< **iter
<< endl
;
380 cerr
<< endl
<< "From Map:" << endl
;
381 for (arc_map_t::const_iterator iter
= from_map
.begin();
382 iter
!= from_map
.end();
384 cerr
<< symtab
.get_symbol_name(iter
->first
) << ": " << *(iter
->second
) << endl
;
387 cerr
<< endl
<< "To Map:" << endl
;
388 for (arc_map_t::const_iterator iter
= to_map
.begin();
389 iter
!= to_map
.end();
391 cerr
<< symtab
.get_symbol_name(iter
->first
) << ": " << *(iter
->second
) << endl
;
397 // The first arc in the sorted set will have the largest
398 // weight. Pull it out, and emit its sink.
399 arc_count_index_t::iterator max
= sorted_arcs
.begin();
400 call_graph_arc
*arc
= const_cast<call_graph_arc
*>(*max
);
402 sorted_arcs
.erase(max
);
405 cerr
<< "pulling " << *arc
<< endl
;
408 remove_from(from_map
, arc
->m_from
, arc
);
409 remove_from(to_map
, arc
->m_to
, arc
);
411 out
<< symtab
.get_symbol_name(arc
->m_to
) << endl
;
413 // Merge arc->m_to into arc->m_from. First, modify anything
414 // that used to point to arc->m_to to point to arc->m_from.
415 typedef list
<call_graph_arc
*> arc_list_t
;
416 arc_list_t map_add_queue
;
418 pair
<arc_map_t::iterator
, arc_map_t::iterator
> p
;
420 // Find all the arcs that point to arc->m_to.
421 p
= to_map
.equal_range(arc
->m_to
);
422 for (arc_map_t::iterator i
= p
.first
; i
!= p
.second
; ++i
) {
423 // Remove the arc that points to arc->m_to (`doomed') from
424 // all of our indicies.
425 call_graph_arc
*doomed
= i
->second
;
426 const Elf32_Sym
*source
= doomed
->m_from
;
428 sorted_arcs
.erase(doomed
);
430 remove_from(from_map
, source
, doomed
);
431 // N.B. that `doomed' will be removed from the `to_map'
432 // after the loop completes.
434 // Create a new (or locate an existing) arc whose source
435 // was the doomed arc's source, and whose sink is
436 // arc->m_from (i.e., the node that subsumed arc->m_to).
437 call_graph_arc
*merge
;
438 call_graph_arc key
= call_graph_arc(source
, arc
->m_from
);
439 arc_container_t::iterator iter
= arcs
.find(&key
);
441 if (iter
== arcs
.end()) {
442 merge
= new call_graph_arc(source
, arc
->m_from
);
445 from_map
.insert(arc_map_t::value_type(merge
->m_from
, merge
));
446 map_add_queue
.push_back(merge
);
449 // We found an existing arc: temporarily remove it
450 // from the sorted index.
451 merge
= const_cast<call_graph_arc
*>(*iter
);
452 sorted_arcs
.erase(merge
);
455 // Include the doomed arc's weight in the merged arc, and
456 // (re-)insert it into the sorted index.
457 merge
->m_count
+= doomed
->m_count
;
458 sorted_arcs
.insert(merge
);
463 to_map
.erase(p
.first
, p
.second
);
465 for (arc_list_t::iterator merge
= map_add_queue
.begin();
466 merge
!= map_add_queue
.end();
467 map_add_queue
.erase(merge
++)) {
468 to_map
.insert(arc_map_t::value_type((*merge
)->m_to
, *merge
));
471 // Now, roll anything that arc->m_to used to point at into
472 // what arc->m_from now points at.
474 // Collect all of the arcs that originate at arc->m_to.
475 p
= from_map
.equal_range(arc
->m_to
);
476 for (arc_map_t::iterator i
= p
.first
; i
!= p
.second
; ++i
) {
477 // Remove the arc originating from arc->m_to (`doomed')
478 // from all of our indicies.
479 call_graph_arc
*doomed
= i
->second
;
480 const Elf32_Sym
*sink
= doomed
->m_to
;
482 // There shouldn't be any self-referential arcs.
483 assert(sink
!= arc
->m_to
);
485 sorted_arcs
.erase(doomed
);
487 remove_from(to_map
, sink
, doomed
);
488 // N.B. that `doomed' will be removed from the `from_map'
489 // once the loop completes.
491 // Create a new (or locate an existing) arc whose source
492 // is arc->m_from and whose sink was the removed arc's
494 call_graph_arc
*merge
;
495 call_graph_arc key
= call_graph_arc(arc
->m_from
, sink
);
496 arc_container_t::iterator iter
= arcs
.find(&key
);
498 if (iter
== arcs
.end()) {
499 merge
= new call_graph_arc(arc
->m_from
, sink
);
503 map_add_queue
.push_back(merge
);
504 to_map
.insert(arc_map_t::value_type(merge
->m_to
, merge
));
507 // We found an existing arc: temporarily remove it
508 // from the sorted index.
509 merge
= const_cast<call_graph_arc
*>(*iter
);
510 sorted_arcs
.erase(merge
);
513 // Include the doomed arc's weight in the merged arc, and
514 // (re-)insert it into the sorted index.
515 merge
->m_count
+= doomed
->m_count
;
516 sorted_arcs
.insert(merge
);
521 from_map
.erase(p
.first
, p
.second
);
523 for (arc_list_t::iterator merge
= map_add_queue
.begin();
524 merge
!= map_add_queue
.end();
525 map_add_queue
.erase(merge
++)) {
526 from_map
.insert(arc_map_t::value_type((*merge
)->m_from
, *merge
));