c++: Implement modules ABI for vtable emissions
[official-gcc.git] / gcc / tree-profile.cc
blobb87c121790c99f4d296b11769c815d63f67e6437
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990-2024 Free Software Foundation, Inc.
3 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
4 based on some ideas from Dain Samples of UC Berkeley.
5 Further mangling by Bob Manson, Cygnus Support.
6 Converted to use trees by Dale Johannesen, Apple Computer.
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 /* Generate basic block profile instrumentation and auxiliary files.
25 Tree-based version. See profile.cc for overview. */
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "memmodel.h"
31 #include "backend.h"
32 #include "target.h"
33 #include "tree.h"
34 #include "gimple.h"
35 #include "cfghooks.h"
36 #include "tree-pass.h"
37 #include "ssa.h"
38 #include "cgraph.h"
39 #include "coverage.h"
40 #include "diagnostic-core.h"
41 #include "fold-const.h"
42 #include "varasm.h"
43 #include "tree-nested.h"
44 #include "gimplify.h"
45 #include "gimple-iterator.h"
46 #include "gimplify-me.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "value-prof.h"
50 #include "profile.h"
51 #include "tree-cfgcleanup.h"
52 #include "stringpool.h"
53 #include "attribs.h"
54 #include "tree-pretty-print.h"
55 #include "langhooks.h"
56 #include "stor-layout.h"
57 #include "xregex.h"
58 #include "alloc-pool.h"
59 #include "symbol-summary.h"
60 #include "symtab-thunks.h"
61 #include "cfganal.h"
63 static GTY(()) tree gcov_type_node;
64 static GTY(()) tree tree_interval_profiler_fn;
65 static GTY(()) tree tree_pow2_profiler_fn;
66 static GTY(()) tree tree_topn_values_profiler_fn;
67 static GTY(()) tree tree_indirect_call_profiler_fn;
68 static GTY(()) tree tree_average_profiler_fn;
69 static GTY(()) tree tree_ior_profiler_fn;
70 static GTY(()) tree tree_time_profiler_counter;
73 static GTY(()) tree ic_tuple_var;
74 static GTY(()) tree ic_tuple_counters_field;
75 static GTY(()) tree ic_tuple_callee_field;
77 /* Types of counter update methods.
79 By default, the counter updates are done for a single threaded system
80 (COUNTER_UPDATE_SINGLE_THREAD).
82 If the user selected atomic profile counter updates
83 (-fprofile-update=atomic), then the counter updates will be done atomically
84 on a best-effort basis. One of three methods to do the counter updates is
85 selected according to the target capabilities.
87 Ideally, the counter updates are done through atomic operations in hardware
88 (COUNTER_UPDATE_ATOMIC_BUILTIN).
90 If the target supports only 32-bit atomic increments and gcov_type_node is a
91 64-bit integer type, then for the profile edge counters the increment is
92 performed through two separate 32-bit atomic increments
93 (COUNTER_UPDATE_ATOMIC_SPLIT or COUNTER_UPDATE_ATOMIC_PARTIAL). If the
94 target supports libatomic (targetm.have_libatomic), then other counter
95 updates are carried out by libatomic calls (COUNTER_UPDATE_ATOMIC_SPLIT).
96 If the target does not support libatomic, then the other counter updates are
97 not done atomically (COUNTER_UPDATE_ATOMIC_PARTIAL) and a warning is
98 issued.
100 If the target does not support atomic operations in hardware, however, it
101 supports libatomic, then all updates are carried out by libatomic calls
102 (COUNTER_UPDATE_ATOMIC_BUILTIN). */
103 enum counter_update_method {
104 COUNTER_UPDATE_SINGLE_THREAD,
105 COUNTER_UPDATE_ATOMIC_BUILTIN,
106 COUNTER_UPDATE_ATOMIC_SPLIT,
107 COUNTER_UPDATE_ATOMIC_PARTIAL
110 static counter_update_method counter_update = COUNTER_UPDATE_SINGLE_THREAD;
112 /* These functions support measuring modified conditition/decision coverage
113 (MC/DC). MC/DC requires all of the below during testing:
115 - Each entry and exit point is invoked
116 - Each decision takes every possible outcome
117 - Each condition in a decision takes every possible outcome
118 - Each condition in a decision is shown to independently affect the outcome
119 of the decision
121 Independence of a condition is shown by recording it being evaluated to a
122 value (true/false) and not being made irrelevant ("masked") by a later term.
123 This feature adds some instrumentation code, a few bitwise operators, that
124 records the branches taken in conditions and applies a filter for the
125 masking effect. Masking is essentially short-circuiting in reverse: a
126 condition does not contribute to the outcome if it would short circuit the
127 (sub) expression if it was evaluated right-to-left, (_ && false) and (_ ||
128 true).
130 The program is essentially rewritten this way:
132 - if (a || b) { fn () }
133 + if (a) { _t |= 0x1; goto _then; }
134 + else { _f |= 0x1;
135 + if (b) { _t |= 0x2; _mask |= 0x1; goto _then; }
136 + else { _f |= 0x2; goto _else; }
137 + _then:
138 + _gcov_t |= (_t & _mask);
139 + _gcov_f |= (_f & _mask);
140 + fn (); goto _end;
141 + _else:
142 + _gcov_t |= (_t & _mask);
143 + _gcov_f |= (_f & _mask);
144 + fn ();
145 + _end:
147 It is assumed the front end will provide discrimnators so that conditional
148 basic blocks (basic block with a conditional jump and outgoing true/false
149 edges) that belong to the same Boolean expression have the same
150 discriminator. Masking is determined by analyzing these expressions as a
151 reduced order binary decision diagram. */
152 namespace
154 /* Some context and reused instances between function calls. Large embedded
155 buffers are used to up-front request enough memory for most programs and
156 merge them into a single allocation at the cost of using more memory in the
157 average case. Some numbers from linux v5.13 which is assumed to be a
158 reasonably diverse code base: 75% of the functions in linux have less than
159 16 nodes in the CFG and approx 2.5% have more than 64 nodes. The functions
160 that go beyond a few dozen nodes tend to be very large (>100) and so 64
161 seems like a good balance.
163 This is really just a performance balance of the cost of allocation and
164 wasted memory. */
165 struct conds_ctx
167 /* This is both a reusable shared allocation which is also used to return
168 single expressions, which means it for most code should only hold a
169 couple of elements. */
170 auto_vec<basic_block, 64> blocks;
172 /* Index for the topological order indexed by basic_block->index to an
173 ordering so that expression (a || b && c) => top_index[a] < top_index[b]
174 < top_index[c]. */
175 auto_vec<int, 256> top_index;
177 /* Pre-allocate bitmaps and vectors for per-function book keeping. This is
178 pure instance reuse and the bitmaps carry no data between function
179 calls. */
180 auto_vec<basic_block, 64> B1;
181 auto_vec<basic_block, 64> B2;
182 auto_sbitmap G1;
183 auto_sbitmap G2;
184 auto_sbitmap G3;
186 explicit conds_ctx (unsigned size) noexcept (true) : G1 (size), G2 (size),
187 G3 (size)
192 /* Only instrument terms with fewer than number of bits in a (wide) gcov
193 integer, which is probably 64. The algorithm itself does not impose this
194 limitation, but it makes for a simpler implementation.
196 * Allocating the output data structure (coverage_counter_alloc ()) can
197 assume pairs of gcov_type_unsigned and not use a separate length field.
198 * A pair gcov_type_unsigned can be used as accumulators.
199 * Updating accumulators is can use the bitwise operations |=, &= and not
200 custom operators that work for arbitrary-sized bit-sets.
202 Most real-world code should be unaffected by this, but it is possible
203 (especially for generated code) to exceed this limit. */
204 #define CONDITIONS_MAX_TERMS (TYPE_PRECISION (gcov_type_node))
205 #define EDGE_CONDITION (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)
207 /* Compare two basic blocks by their order in the expression i.e. for (a || b)
208 then topological_cmp (a, b, ...) < 0. The result is undefined if LHS, RHS
209 belong to different expressions. The TOP_INDEX argument should be the
210 top_index vector from ctx. */
212 topological_cmp (const void *lhs, const void *rhs, void *top_index)
214 const_basic_block l = *(const basic_block*) lhs;
215 const_basic_block r = *(const basic_block*) rhs;
216 const vec<int>* im = (const vec<int>*) top_index;
217 return (*im)[l->index] - (*im)[r->index];
220 /* Find the index of NEEDLE in BLOCKS; return -1 if not found. This has two
221 uses, sometimes for the index and sometimes for set member checks. Sets are
222 typically very small (number of conditions, >8 is uncommon) so linear search
223 should be very fast. */
225 index_of (const basic_block needle, array_slice<basic_block> blocks)
227 for (size_t i = 0; i < blocks.size (); i++)
228 if (blocks[i] == needle)
229 return int (i);
230 return -1;
233 /* Special cases of the single_*_p and single_*_edge functions in basic-block.h
234 that don't consider exception handling or other complex edges. This helps
235 create a view of the CFG with only normal edges - if a basic block has both
236 an outgoing fallthrough and exceptional edge, it should be considered a
237 single-successor. */
238 bool
239 single_p (const vec<edge, va_gc> *edges)
241 int n = EDGE_COUNT (edges);
242 if (n == 0)
243 return false;
245 for (edge e : edges)
246 if (e->flags & EDGE_COMPLEX)
247 n -= 1;
249 return n == 1;
252 /* Get the single, non-complex edge. Behavior is undefined edges have more
253 than 1 non-complex edges. */
254 edge
255 single_edge (const vec<edge, va_gc> *edges)
257 gcc_checking_assert (single_p (edges));
258 for (edge e : edges)
260 if (e->flags & EDGE_COMPLEX)
261 continue;
262 return e;
264 return NULL;
267 /* Sometimes, for example with function calls, goto labels, and C++
268 destructors, the CFG gets extra nodes that are essentially single-entry
269 single-exit in the middle of boolean expressions. For example:
271 x || can_throw (y)
279 / \ |
280 / \|
283 Without the extra node inserted by the function + exception it becomes a
284 proper 2-term graph, not 2 single-term graphs.
289 / \|
292 This function finds the source edge of these paths. This is often the
293 identity function. */
294 edge
295 contract_edge_up (edge e)
297 while (true)
299 basic_block src = e->src;
300 if (!single_p (src->preds))
301 return e;
302 if (!single_p (src->succs))
303 return e;
304 e = single_edge (src->preds);
308 /* A simple struct for storing/returning outcome block pairs. Either both
309 blocks are set or both are NULL. */
310 struct outcomes
312 basic_block t = NULL;
313 basic_block f = NULL;
315 operator bool () const noexcept (true)
317 return t && f;
321 /* Get the true/false successors of a basic block. If b is not a conditional
322 block both edges are NULL. */
323 outcomes
324 conditional_succs (const basic_block b)
326 outcomes c;
327 for (edge e : b->succs)
329 if (e->flags & EDGE_TRUE_VALUE)
330 c.t = e->dest;
331 if (e->flags & EDGE_FALSE_VALUE)
332 c.f = e->dest;
335 gcc_assert ((c.t && c.f) || (!c.t && !c.f));
336 return c;
339 /* Get the index or offset of a conditional flag, 0 for true and 1 for false.
340 These indices carry no semantics but must be consistent as they are used to
341 index into data structures in code generation and gcov. */
342 unsigned
343 condition_index (unsigned flag)
345 return (flag & EDGE_CONDITION) == EDGE_TRUE_VALUE ? 0 : 1;
348 /* Returns the condition identifier for the basic block if set, otherwise 0.
349 This is only meaningful in GIMPLE and is used for condition coverage.
351 There may be conditions created that did not get an uid, such as those
352 implicitly created by destructors. We could include them in the condition
353 coverage for completeness (i.e. condition coverage implies (implicit) branch
354 coverage), but they have no natural buckets and should all be single-term.
355 For now these are ignored and given uid = 0, and branch coverage is left to
356 -fprofile-arcs.
358 Under optimization, COND_EXPRs may be folded, replaced with switches,
359 min-max, etc., which leaves ghost identifiers in basic blocks that do not
360 end with a conditional jump. They are not really meaningful for condition
361 coverage anymore, but since coverage is unreliable under optimization anyway
362 this is not a big problem.
364 The cond_uids map in FN cannot be expected to exist. It will only be
365 created if it is needed, and a function may have gconds even though there
366 are none in source. This can be seen in PR gcov-profile/114601, when
367 -finstrument-functions-once is used and the function has no conditions. */
368 unsigned
369 condition_uid (struct function *fn, basic_block b)
371 gimple *stmt = gsi_stmt (gsi_last_bb (b));
372 if (!safe_is_a <gcond*> (stmt) || !fn->cond_uids)
373 return 0;
375 unsigned *v = fn->cond_uids->get (as_a <gcond*> (stmt));
376 return v ? *v : 0;
379 /* Compute the masking table.
381 Masking and short circuiting are deeply connected - masking occurs when
382 control flow reaches a state that is also reachable with short circuiting.
383 In fact, masking corresponds to short circuiting for the reversed
384 expression. This means we can find the limits, the last term in preceeding
385 subexpressions, by following the edges that short circuit to the same
386 outcome. The algorithm treats the CFG as a reduced order binary decision
387 diagram (see Randall E. Bryant's Graph Based Algorithms for Boolean
388 Function Manipulation (1987)).
390 In the simplest case a || b:
395 |/ \
398 T has multiple incoming edges and is the outcome of a short circuit,
399 with top = a, bot = b. The top node (a) is masked when the edge (b, T) is
400 taken.
402 The names "top" and "bot" refer to a pair of nodes with a shared
403 successor. The top is always the node corresponding to the left-most
404 operand of the two, and it holds that top < bot in a topological ordering.
406 Now consider (a && b) || (c && d) and its masking table:
413 | |\
414 | d \
415 |/ \|
418 a[0] = {}
419 a[1] = {}
420 b[0] = {a}
421 b[1] = {}
422 c[0] = {}
423 c[1] = {}
424 d[0] = {c}
425 d[1] = {a,b}
427 Note that 0 and 1 are indices and not boolean values - a[0] is the index in
428 the masking vector when a takes the true edge.
430 b[0] and d[0] are identical to the a || b example, and d[1] is the bot in
431 the triangle [d, b] -> T. b is the top node in the [d, b] relationship and
432 last term in (a && b). To find the other terms masked we use the fact that
433 all paths in an expression go through either of the outcomes, found by
434 collecting all non-complex edges that go out of the expression (the
435 neighborhood). In some cases the outgoing edge go through intermediate (or
436 bypass) nodes, and we collect these paths too (see contract_edge_up).
438 We find the terms by marking the outcomes (in this case c, T) and walk the
439 predecessors starting at top (in this case b) and masking nodes when both
440 successors are marked.
442 The masking table is represented as two bitfields per term in the expression
443 with the index corresponding to the term in the Boolean expression.
444 a || b && c becomes the term vector [a b c] and the masking table [a[0]
445 a[1] b[0] ...]. The kth bit of a masking vector is set if the kth term
446 is masked by taking the edge.
448 The out masks are in uint64_t (the practical maximum for gcov_type_node for
449 any target) as it has to be big enough to store the target size gcov types
450 independent of the host. */
451 void
452 masking_vectors (conds_ctx& ctx, array_slice<basic_block> blocks,
453 array_slice<sbitmap> maps, array_slice<uint64_t> masks)
455 gcc_assert (blocks.is_valid ());
456 gcc_assert (!blocks.empty ());
457 gcc_assert (maps.is_valid ());
458 gcc_assert (masks.is_valid ());
459 gcc_assert (sizeof (masks[0]) * BITS_PER_UNIT >= CONDITIONS_MAX_TERMS);
461 if (bitmap_count_bits (maps[0]) == 1)
462 return;
464 sbitmap marks = ctx.G1;
465 const sbitmap core = maps[0];
466 const sbitmap allg = maps[1];
467 vec<basic_block>& queue = ctx.B1;
468 vec<basic_block>& body = ctx.B2;
469 const vec<int>& top_index = ctx.top_index;
471 /* Set up for the iteration - include the outcome nodes in the traversal.
472 The algorithm compares pairs of nodes and is not really sensitive to
473 traversal order, but need to maintain topological order because the
474 index of masking nodes maps to the index in the accumulators. We must
475 also check the incoming-to-outcome pairs. These edges may in turn be
476 split (this happens with labels on top of then/else blocks) so we must
477 follow any single-in single-out path. The non-condition blocks do not
478 have to be in order as they are non-condition blocks and will not be
479 considered for the set-bit index. */
480 body.truncate (0);
481 body.reserve (blocks.size () + 2);
482 for (const basic_block b : blocks)
483 if (bitmap_bit_p (core, b->index))
484 body.quick_push (b);
486 for (basic_block b : blocks)
488 if (!bitmap_bit_p (core, b->index))
489 continue;
491 for (edge e : b->succs)
493 if (e->flags & EDGE_COMPLEX)
494 continue;
495 if (bitmap_bit_p (allg, e->dest->index))
496 continue;
497 body.safe_push (e->dest);
499 /* There may be multiple nodes between the condition edge and the
500 actual outcome, and we need to know when these paths join to
501 determine if there is short circuit/masking. This is
502 effectively creating a virtual edge from the condition node to
503 the real outcome. */
504 while (!(e->flags & EDGE_DFS_BACK) && single_p (e->dest->succs))
506 e = single_edge (e->dest->succs);
507 body.safe_push (e->dest);
512 /* Find the masking. The leftmost element cannot mask anything, so
513 start at 1. */
514 for (size_t i = 1; i != body.length (); i++)
516 const basic_block b = body[i];
517 for (edge e1 : b->preds)
518 for (edge e2 : b->preds)
520 if (e1 == e2)
521 continue;
522 if ((e1->flags | e2->flags) & EDGE_COMPLEX)
523 continue;
525 edge etop = contract_edge_up (e1);
526 edge ebot = contract_edge_up (e2);
527 gcc_assert (etop != ebot);
529 const basic_block top = etop->src;
530 const basic_block bot = ebot->src;
531 const unsigned cond = etop->flags & ebot->flags & EDGE_CONDITION;
532 if (!cond)
533 continue;
534 if (top_index[top->index] > top_index[bot->index])
535 continue;
536 if (!bitmap_bit_p (core, top->index))
537 continue;
538 if (!bitmap_bit_p (core, bot->index))
539 continue;
541 outcomes out = conditional_succs (top);
542 gcc_assert (out);
543 bitmap_clear (marks);
544 bitmap_set_bit (marks, out.t->index);
545 bitmap_set_bit (marks, out.f->index);
546 queue.truncate (0);
547 queue.safe_push (top);
549 // The edge bot -> outcome triggers the masking
550 const int m = 2*index_of (bot, body) + condition_index (cond);
551 gcc_assert (m >= 0);
552 while (!queue.is_empty ())
554 basic_block q = queue.pop ();
555 /* q may have been processed & completed by being added to the
556 queue multiple times, so check that there is still work to
557 do before continuing. */
558 if (bitmap_bit_p (marks, q->index))
559 continue;
561 outcomes succs = conditional_succs (q);
562 if (!bitmap_bit_p (marks, succs.t->index))
563 continue;
564 if (!bitmap_bit_p (marks, succs.f->index))
565 continue;
567 const int index = index_of (q, body);
568 gcc_assert (index != -1);
569 masks[m] |= uint64_t (1) << index;
570 bitmap_set_bit (marks, q->index);
572 for (edge e : q->preds)
574 e = contract_edge_up (e);
575 if (e->flags & EDGE_DFS_BACK)
576 continue;
577 if (bitmap_bit_p (marks, e->src->index))
578 continue;
579 if (!bitmap_bit_p (core, e->src->index))
580 continue;
581 queue.safe_push (e->src);
588 /* Emit LHS = RHS on edges. This is just a short hand that automates the
589 building of the assign and immediately puts it on the edge, which becomes
590 noisy. */
591 tree
592 emit_assign (edge e, tree lhs, tree rhs)
594 gassign *w = gimple_build_assign (lhs, rhs);
595 gsi_insert_on_edge (e, w);
596 return lhs;
599 /* Emit lhs = RHS on edges. The lhs is created. */
600 tree
601 emit_assign (edge e, tree rhs)
603 return emit_assign (e, make_ssa_name (gcov_type_node), rhs);
606 /* Emit LHS = OP1 <OP> OP2 on edges. */
607 tree
608 emit_bitwise_op (edge e, tree op1, tree_code op, tree op2 = NULL_TREE)
610 tree lhs = make_ssa_name (gcov_type_node);
611 gassign *w = gimple_build_assign (lhs, op, op1, op2);
612 gsi_insert_on_edge (e, w);
613 return lhs;
616 /* Visitor for make_top_index. */
617 void
618 make_top_index_visit (basic_block b, vec<basic_block>& L, vec<int>& marks)
620 if (marks[b->index])
621 return;
623 /* Follow the false edge first, if it exists, so that true paths are given
624 the lower index in the ordering. Any iteration order
625 would yield a valid and useful topological ordering, but making sure the
626 true branch has the lower index first makes reporting work better for
627 expressions with ternaries. Walk the false branch first because the
628 array will be reversed to finalize the topological order.
630 With the wrong ordering (a ? b : c) && d could become [a c b d], but the
631 (expected) order is really [a b c d]. */
633 const unsigned false_fwd = EDGE_DFS_BACK | EDGE_FALSE_VALUE;
634 for (edge e : b->succs)
635 if ((e->flags & false_fwd) == EDGE_FALSE_VALUE)
636 make_top_index_visit (e->dest, L, marks);
638 for (edge e : b->succs)
639 if (!(e->flags & false_fwd))
640 make_top_index_visit (e->dest, L, marks);
642 marks[b->index] = 1;
643 L.quick_push (b);
646 /* Find a topological sorting of the blocks in a function so that left operands
647 are before right operands including subexpressions. Sorting on block index
648 does not guarantee this property and the syntactical order of terms is very
649 important to the condition coverage. The sorting algorithm is from Cormen
650 et al (2001) but with back-edges ignored and thus there is no need for
651 temporary marks (for cycle detection). The L argument is a buffer/working
652 memory, and the output will be written to TOP_INDEX.
654 For the expression (a || (b && c) || d) the blocks should be [a b c d]. */
655 void
656 make_top_index (array_slice<basic_block> blocks, vec<basic_block>& L,
657 vec<int>& top_index)
659 L.truncate (0);
660 L.reserve (blocks.size ());
662 /* Use of the output map as a temporary for tracking visited status. */
663 top_index.truncate (0);
664 top_index.safe_grow_cleared (blocks.size ());
665 for (const basic_block b : blocks)
666 make_top_index_visit (b, L, top_index);
668 /* Insert canaries - if there are unreachable nodes (for example infinite
669 loops) then the unreachable nodes should never be needed for comparison,
670 and L.length () < max_index. An index mapping should also never be
671 recorded twice. */
672 for (unsigned i = 0; i != top_index.length (); i++)
673 top_index[i] = -1;
675 gcc_assert (blocks.size () == L.length ());
676 L.reverse ();
677 const unsigned nblocks = L.length ();
678 for (unsigned i = 0; i != nblocks; i++)
680 gcc_assert (L[i]->index != -1);
681 top_index[L[i]->index] = int (i);
685 /* Find all nodes including non-conditions in a Boolean expression. We need to
686 know the paths through the expression so that the masking and
687 instrumentation phases can limit searches and know what subgraphs must be
688 threaded through, but not counted, such as the (b || c) in
689 a && fn (b || c) && d.
691 It is essentially the intersection of downwards paths from the expression
692 nodes EXPR to the post-dominator and upwards from the post-dominator.
693 Finding the dominator is slightly more involved than picking the first/last,
694 particularly under optimization, because both incoming and outgoing paths
695 may have multiple entries/exits.
697 It is assumed GRAPH is an array_slice of the basic blocks of this function
698 sorted by the basic block index. */
699 vec<basic_block>&
700 paths_between (conds_ctx &ctx, array_slice<basic_block> graph,
701 const vec<basic_block>& expr)
703 if (expr.length () == 1)
705 ctx.blocks.truncate (0);
706 ctx.blocks.safe_push (expr[0]);
707 return ctx.blocks;
710 basic_block dom;
711 sbitmap up = ctx.G1;
712 sbitmap down = ctx.G2;
713 sbitmap paths = ctx.G3;
714 vec<basic_block>& queue = ctx.B1;
716 queue.truncate (0);
717 bitmap_clear (down);
718 dom = get_immediate_dominator (CDI_POST_DOMINATORS, expr[0]);
719 for (basic_block b : expr)
720 if (dom != b)
721 dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, b);
722 queue.safe_splice (expr);
723 while (!queue.is_empty ())
725 basic_block b = queue.pop ();
726 if (!bitmap_set_bit (down, b->index))
727 continue;
728 if (b == dom)
729 continue;
730 for (edge e : b->succs)
731 if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
732 queue.safe_push (e->dest);
735 queue.truncate (0);
736 bitmap_clear (up);
737 dom = expr[0];
738 for (basic_block b : expr)
739 if (dom != b)
740 dom = nearest_common_dominator (CDI_DOMINATORS, dom, b);
741 queue.safe_splice (expr);
742 while (!queue.is_empty ())
744 basic_block b = queue.pop ();
745 if (!bitmap_set_bit (up, b->index))
746 continue;
747 if (b == dom)
748 continue;
749 for (edge e : b->preds)
750 if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
751 queue.safe_push (e->src);
754 bitmap_and (paths, up, down);
755 vec<basic_block>& blocks = ctx.blocks;
756 blocks.truncate (0);
757 blocks.reserve (graph.size ());
758 sbitmap_iterator itr;
759 unsigned index;
760 EXECUTE_IF_SET_IN_BITMAP (paths, 0, index, itr)
761 blocks.quick_push (graph[index]);
762 return blocks;
767 /* Context object for the condition coverage. This stores conds_ctx (the
768 buffers reused when analyzing the cfg) and the output arrays. This is
769 designed to be heap allocated and aggressively preallocates large buffers to
770 avoid having to reallocate for most programs. */
771 struct condcov
773 explicit condcov (unsigned nblocks) noexcept (true) : ctx (nblocks),
774 m_maps (sbitmap_vector_alloc (2 * nblocks, nblocks))
776 bitmap_vector_clear (m_maps, 2 * nblocks);
778 auto_vec<size_t, 128> m_index;
779 auto_vec<basic_block, 256> m_blocks;
780 auto_vec<uint64_t, 512> m_masks;
781 conds_ctx ctx;
782 sbitmap *m_maps;
785 /* Get the length, that is the number of Boolean expression found. cov_length
786 is the one-past index for cov_{blocks,masks,maps}. */
787 size_t
788 cov_length (const struct condcov* cov)
790 if (cov->m_index.is_empty ())
791 return 0;
792 return cov->m_index.length () - 1;
795 /* The subgraph, exluding intermediates, for the nth Boolean expression. */
796 array_slice<basic_block>
797 cov_blocks (struct condcov* cov, size_t n)
799 if (n >= cov->m_index.length ())
800 return array_slice<basic_block>::invalid ();
802 basic_block *begin = cov->m_blocks.begin () + cov->m_index[n];
803 basic_block *end = cov->m_blocks.begin () + cov->m_index[n + 1];
804 return array_slice<basic_block> (begin, end - begin);
807 /* The masks for the nth Boolean expression. */
808 array_slice<uint64_t>
809 cov_masks (struct condcov* cov, size_t n)
811 if (n >= cov->m_index.length ())
812 return array_slice<uint64_t>::invalid ();
814 uint64_t *begin = cov->m_masks.begin () + 2*cov->m_index[n];
815 uint64_t *end = cov->m_masks.begin () + 2*cov->m_index[n + 1];
816 return array_slice<uint64_t> (begin, end - begin);
819 /* The maps for the nth Boolean expression. */
820 array_slice<sbitmap>
821 cov_maps (struct condcov* cov, size_t n)
823 if (n >= cov->m_index.length ())
824 return array_slice<sbitmap>::invalid ();
826 sbitmap *begin = cov->m_maps + 2*n;
827 sbitmap *end = begin + 2;
828 return array_slice<sbitmap> (begin, end - begin);
831 /* Deleter for condcov. */
832 void
833 cov_free (struct condcov* cov)
835 sbitmap_vector_free (cov->m_maps);
836 delete cov;
839 /* Condition coverage (MC/DC)
841 Whalen, Heimdahl, De Silva in "Efficient Test Coverage Measurement for
842 MC/DC" describe an algorithm for modified condition/decision coverage based
843 on AST analysis. This algorithm does analyzes the control flow graph
844 (interpreted as a binary decision diagram) to determine the masking vectors.
845 The individual phases are described in more detail closer to the
846 implementation.
848 The coverage only considers the positions, not the symbols, in a
849 conditional, e.g. !A || (!B && A) is a 3-term conditional even though A
850 appears twice. Subexpressions have no effect on term ordering:
851 (a && (b || (c && d)) || e) comes out as [a b c d e]. Functions whose
852 arguments are Boolean expressions are treated as separate expressions, that
853 is, a && fn (b || c) && d is treated as [a _fn d] and [b c], not [a b c d].
855 The output for gcov is a vector of pairs of unsigned integers, interpreted
856 as bit-sets, where the bit index corresponds to the index of the condition
857 in the expression.
859 The returned condcov should be released by the caller with cov_free. */
860 struct condcov*
861 find_conditions (struct function *fn)
863 mark_dfs_back_edges (fn);
864 const bool have_dom = dom_info_available_p (fn, CDI_DOMINATORS);
865 const bool have_post_dom = dom_info_available_p (fn, CDI_POST_DOMINATORS);
866 if (!have_dom)
867 calculate_dominance_info (CDI_DOMINATORS);
868 if (!have_post_dom)
869 calculate_dominance_info (CDI_POST_DOMINATORS);
871 const unsigned nblocks = n_basic_blocks_for_fn (fn);
872 basic_block *fnblocksp = basic_block_info_for_fn (fn)->address ();
873 condcov *cov = new condcov (nblocks);
874 conds_ctx& ctx = cov->ctx;
875 array_slice<basic_block> fnblocks (fnblocksp, nblocks);
876 make_top_index (fnblocks, ctx.B1, ctx.top_index);
878 /* Bin the Boolean expressions so that exprs[id] -> [x1, x2, ...]. */
879 hash_map<int_hash<unsigned, 0>, vec<basic_block>> exprs;
880 for (basic_block b : fnblocks)
882 const unsigned uid = condition_uid (fn, b);
883 if (uid == 0)
884 continue;
885 exprs.get_or_insert (uid).safe_push (b);
888 /* Visit all reachable nodes and collect conditions. Topological order is
889 important so the first node of a boolean expression is visited first
890 (it will mark subsequent terms). */
891 cov->m_index.safe_push (0);
892 for (auto expr : exprs)
894 vec<basic_block>& conds = expr.second;
895 if (conds.length () > CONDITIONS_MAX_TERMS)
897 location_t loc = gimple_location (gsi_stmt (gsi_last_bb (conds[0])));
898 warning_at (loc, OPT_Wcoverage_too_many_conditions,
899 "Too many conditions (found %u); giving up coverage",
900 conds.length ());
901 continue;
903 conds.sort (topological_cmp, &ctx.top_index);
904 vec<basic_block>& subgraph = paths_between (ctx, fnblocks, conds);
905 subgraph.sort (topological_cmp, &ctx.top_index);
906 const unsigned index = cov->m_index.length () - 1;
907 sbitmap condm = cov->m_maps[0 + 2*index];
908 sbitmap subgm = cov->m_maps[1 + 2*index];
909 for (basic_block b : conds)
910 bitmap_set_bit (condm, b->index);
911 for (basic_block b : subgraph)
912 bitmap_set_bit (subgm, b->index);
913 cov->m_blocks.safe_splice (subgraph);
914 cov->m_index.safe_push (cov->m_blocks.length ());
917 if (!have_dom)
918 free_dominance_info (fn, CDI_DOMINATORS);
919 if (!have_post_dom)
920 free_dominance_info (fn, CDI_POST_DOMINATORS);
922 cov->m_masks.safe_grow_cleared (2 * cov->m_index.last ());
923 const size_t length = cov_length (cov);
924 for (size_t i = 0; i != length; i++)
925 masking_vectors (ctx, cov_blocks (cov, i), cov_maps (cov, i),
926 cov_masks (cov, i));
928 return cov;
931 namespace
934 /* Stores the incoming edge and previous counters (in SSA form) on that edge
935 for the node e->deston that edge for the node e->dest. The counters record
936 the seen-true (0), seen-false (1), and current-mask (2). They are stored in
937 an array rather than proper members for access-by-index as the code paths
938 tend to be identical for the different counters. */
939 struct counters
941 edge e;
942 tree counter[3];
943 tree& operator [] (size_t i) { return counter[i]; }
946 /* Find the counters for the incoming edge e, or NULL if the edge has not been
947 recorded (could be for complex incoming edges). */
948 counters*
949 find_counters (vec<counters>& candidates, edge e)
951 for (counters& candidate : candidates)
952 if (candidate.e == e)
953 return &candidate;
954 return NULL;
957 /* Resolve the SSA for a specific counter KIND. If it is not modified by any
958 incoming edges, simply forward it, otherwise create a phi node of all the
959 candidate counters and return it. */
960 tree
961 resolve_counter (vec<counters>& cands, size_t kind)
963 gcc_assert (!cands.is_empty ());
964 gcc_assert (kind < 3);
966 counters& fst = cands[0];
968 if (!fst.e || fst.e->dest->preds->length () == 1)
970 gcc_assert (cands.length () == 1);
971 return fst[kind];
974 tree zero0 = build_int_cst (gcov_type_node, 0);
975 tree ssa = make_ssa_name (gcov_type_node);
976 gphi *phi = create_phi_node (ssa, fst.e->dest);
977 for (edge e : fst.e->dest->preds)
979 counters *prev = find_counters (cands, e);
980 if (prev)
981 add_phi_arg (phi, (*prev)[kind], e, UNKNOWN_LOCATION);
982 else
984 tree zero = make_ssa_name (gcov_type_node);
985 gimple_stmt_iterator gsi = gsi_after_labels (e->src);
986 gassign *set = gimple_build_assign (zero, zero0);
987 gsi_insert_before (&gsi, set, GSI_NEW_STMT);
988 add_phi_arg (phi, zero, e, UNKNOWN_LOCATION);
991 return ssa;
994 /* Resolve all the counters for a node. Note that the edge is undefined, as
995 the counters are intended to form the base to push to the successors, and
996 because the is only meaningful for nodes with a single predecessor. */
997 counters
998 resolve_counters (vec<counters>& cands)
1000 counters next;
1001 next[0] = resolve_counter (cands, 0);
1002 next[1] = resolve_counter (cands, 1);
1003 next[2] = resolve_counter (cands, 2);
1004 return next;
1009 /* Add instrumentation to a decision subgraph. EXPR should be the
1010 (topologically sorted) block of nodes returned by cov_blocks, MAPS the
1011 bitmaps returned by cov_maps, and MASKS the block of bitsets returned by
1012 cov_masks. CONDNO should be the index of this condition in the function,
1013 i.e. the same argument given to cov_{masks,graphs}. EXPR may contain nodes
1014 in-between the conditions, e.g. when an operand contains a function call,
1015 or there is a setjmp and the cfg is filled with complex edges.
1017 Every node is annotated with three counters; the true, false, and mask
1018 value. First, walk the graph and determine what if there are multiple
1019 possible values for either accumulator depending on the path taken, in which
1020 case a phi node is created and registered as the accumulator. Then, those
1021 values are pushed as accumulators to the immediate successors. For some
1022 very particular programs there may be multiple paths into the expression
1023 (e.g. when prior terms are determined by a surrounding conditional) in which
1024 case the default zero-counter is pushed, otherwise all predecessors will
1025 have been considered before the successor because of topologically ordered
1026 traversal. Finally, expr is traversed again to look for edges to the
1027 outcomes, that is, edges with a destination outside of expr, and the local
1028 accumulators are flushed to the global gcov counters on these edges. In
1029 some cases there are edge splits that cause 3+ edges to the two outcome
1030 nodes.
1032 If a complex edge is taken (e.g. on a longjmp) the accumulators are
1033 attempted poisoned so that there would be no change to the global counters,
1034 but this has proven unreliable in the presence of undefined behavior, see
1035 the setjmp003 test.
1037 It is important that the flushes happen on the basic condition outgoing
1038 edge, otherwise flushes could be lost to exception handling or other
1039 abnormal control flow. */
1040 size_t
1041 instrument_decisions (array_slice<basic_block> expr, size_t condno,
1042 array_slice<sbitmap> maps, array_slice<uint64_t> masks)
1044 tree zero = build_int_cst (gcov_type_node, 0);
1045 tree poison = build_int_cst (gcov_type_node, ~0ULL);
1046 const sbitmap core = maps[0];
1047 const sbitmap allg = maps[1];
1049 hash_map<basic_block, vec<counters>> table;
1050 counters zerocounter;
1051 zerocounter.e = NULL;
1052 zerocounter[0] = zero;
1053 zerocounter[1] = zero;
1054 zerocounter[2] = zero;
1056 unsigned xi = 0;
1057 bool increment = false;
1058 tree rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1059 for (basic_block current : expr)
1061 vec<counters>& candidates = table.get_or_insert (current);
1062 if (candidates.is_empty ())
1063 candidates.safe_push (zerocounter);
1064 counters prev = resolve_counters (candidates);
1066 if (increment)
1068 xi += 1;
1069 gcc_checking_assert (xi < sizeof (uint64_t) * BITS_PER_UNIT);
1070 rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1071 increment = false;
1074 for (edge e : current->succs)
1076 counters next = prev;
1077 next.e = e;
1079 if (bitmap_bit_p (core, e->src->index) && (e->flags & EDGE_CONDITION))
1081 const int k = condition_index (e->flags);
1082 next[k] = emit_bitwise_op (e, prev[k], BIT_IOR_EXPR, rhs);
1083 if (masks[2*xi + k])
1085 tree m = build_int_cst (gcov_type_node, masks[2*xi + k]);
1086 next[2] = emit_bitwise_op (e, prev[2], BIT_IOR_EXPR, m);
1088 increment = true;
1090 else if (e->flags & EDGE_COMPLEX)
1092 /* A complex edge has been taken - wipe the accumulators and
1093 poison the mask so that this path does not contribute to
1094 coverage. */
1095 next[0] = poison;
1096 next[1] = poison;
1097 next[2] = poison;
1099 table.get_or_insert (e->dest).safe_push (next);
1103 /* Since this is also the return value, the number of conditions, make sure
1104 to include the increment of the last basic block. */
1105 if (increment)
1106 xi += 1;
1108 gcc_assert (xi == bitmap_count_bits (core));
1110 const tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1111 const bool atomic = flag_profile_update == PROFILE_UPDATE_ATOMIC;
1112 const tree atomic_ior = builtin_decl_explicit
1113 (TYPE_PRECISION (gcov_type_node) > 32
1114 ? BUILT_IN_ATOMIC_FETCH_OR_8
1115 : BUILT_IN_ATOMIC_FETCH_OR_4);
1117 /* Flush to the gcov accumulators. */
1118 for (const basic_block b : expr)
1120 if (!bitmap_bit_p (core, b->index))
1121 continue;
1123 for (edge e : b->succs)
1125 /* Flush the accumulators on leaving the Boolean function. The
1126 destination may be inside the function only when it returns to
1127 the loop header, such as do { ... } while (x); */
1128 if (bitmap_bit_p (allg, e->dest->index)) {
1129 if (!(e->flags & EDGE_DFS_BACK))
1130 continue;
1131 if (e->dest != expr[0])
1132 continue;
1135 vec<counters> *cands = table.get (e->dest);
1136 gcc_assert (cands);
1137 counters *prevp = find_counters (*cands, e);
1138 gcc_assert (prevp);
1139 counters prev = *prevp;
1141 /* _true &= ~mask, _false &= ~mask */
1142 counters next;
1143 next[2] = emit_bitwise_op (e, prev[2], BIT_NOT_EXPR);
1144 next[0] = emit_bitwise_op (e, prev[0], BIT_AND_EXPR, next[2]);
1145 next[1] = emit_bitwise_op (e, prev[1], BIT_AND_EXPR, next[2]);
1147 /* _global_true |= _true, _global_false |= _false */
1148 for (size_t k = 0; k != 2; ++k)
1150 tree ref = tree_coverage_counter_ref (GCOV_COUNTER_CONDS,
1151 2*condno + k);
1152 if (atomic)
1154 ref = unshare_expr (ref);
1155 gcall *flush = gimple_build_call (atomic_ior, 3,
1156 build_addr (ref),
1157 next[k], relaxed);
1158 gsi_insert_on_edge (e, flush);
1160 else
1162 tree get = emit_assign (e, ref);
1163 tree put = emit_bitwise_op (e, next[k], BIT_IOR_EXPR, get);
1164 emit_assign (e, unshare_expr (ref), put);
1170 return xi;
1173 #undef CONDITIONS_MAX_TERMS
1174 #undef EDGE_CONDITION
1176 /* Do initialization work for the edge profiler. */
1178 /* Add code:
1179 __thread gcov* __gcov_indirect_call.counters; // pointer to actual counter
1180 __thread void* __gcov_indirect_call.callee; // actual callee address
1181 __thread int __gcov_function_counter; // time profiler function counter
1183 static void
1184 init_ic_make_global_vars (void)
1186 tree gcov_type_ptr;
1188 gcov_type_ptr = build_pointer_type (get_gcov_type ());
1190 tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE);
1192 /* callee */
1193 ic_tuple_callee_field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
1194 ptr_type_node);
1196 /* counters */
1197 ic_tuple_counters_field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
1198 NULL_TREE, gcov_type_ptr);
1199 DECL_CHAIN (ic_tuple_counters_field) = ic_tuple_callee_field;
1201 finish_builtin_struct (tuple_type, "indirect_call_tuple",
1202 ic_tuple_counters_field, NULL_TREE);
1204 ic_tuple_var
1205 = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1206 get_identifier ("__gcov_indirect_call"), tuple_type);
1207 TREE_PUBLIC (ic_tuple_var) = 1;
1208 DECL_ARTIFICIAL (ic_tuple_var) = 1;
1209 DECL_INITIAL (ic_tuple_var) = NULL;
1210 DECL_EXTERNAL (ic_tuple_var) = 1;
1211 if (targetm.have_tls)
1212 set_decl_tls_model (ic_tuple_var, decl_default_tls_model (ic_tuple_var));
1215 /* Create the type and function decls for the interface with gcov. */
1217 void
1218 gimple_init_gcov_profiler (void)
1220 tree interval_profiler_fn_type;
1221 tree pow2_profiler_fn_type;
1222 tree topn_values_profiler_fn_type;
1223 tree gcov_type_ptr;
1224 tree ic_profiler_fn_type;
1225 tree average_profiler_fn_type;
1226 const char *fn_name;
1228 if (!gcov_type_node)
1230 const char *fn_suffix
1231 = flag_profile_update == PROFILE_UPDATE_ATOMIC ? "_atomic" : "";
1233 gcov_type_node = get_gcov_type ();
1234 gcov_type_ptr = build_pointer_type (gcov_type_node);
1236 /* void (*) (gcov_type *, gcov_type, int, unsigned) */
1237 interval_profiler_fn_type
1238 = build_function_type_list (void_type_node,
1239 gcov_type_ptr, gcov_type_node,
1240 integer_type_node,
1241 unsigned_type_node, NULL_TREE);
1242 fn_name = concat ("__gcov_interval_profiler", fn_suffix, NULL);
1243 tree_interval_profiler_fn = build_fn_decl (fn_name,
1244 interval_profiler_fn_type);
1245 free (CONST_CAST (char *, fn_name));
1246 TREE_NOTHROW (tree_interval_profiler_fn) = 1;
1247 DECL_ATTRIBUTES (tree_interval_profiler_fn)
1248 = tree_cons (get_identifier ("leaf"), NULL,
1249 DECL_ATTRIBUTES (tree_interval_profiler_fn));
1251 /* void (*) (gcov_type *, gcov_type) */
1252 pow2_profiler_fn_type
1253 = build_function_type_list (void_type_node,
1254 gcov_type_ptr, gcov_type_node,
1255 NULL_TREE);
1256 fn_name = concat ("__gcov_pow2_profiler", fn_suffix, NULL);
1257 tree_pow2_profiler_fn = build_fn_decl (fn_name, pow2_profiler_fn_type);
1258 free (CONST_CAST (char *, fn_name));
1259 TREE_NOTHROW (tree_pow2_profiler_fn) = 1;
1260 DECL_ATTRIBUTES (tree_pow2_profiler_fn)
1261 = tree_cons (get_identifier ("leaf"), NULL,
1262 DECL_ATTRIBUTES (tree_pow2_profiler_fn));
1264 /* void (*) (gcov_type *, gcov_type) */
1265 topn_values_profiler_fn_type
1266 = build_function_type_list (void_type_node,
1267 gcov_type_ptr, gcov_type_node,
1268 NULL_TREE);
1269 fn_name = concat ("__gcov_topn_values_profiler", fn_suffix, NULL);
1270 tree_topn_values_profiler_fn
1271 = build_fn_decl (fn_name, topn_values_profiler_fn_type);
1272 free (CONST_CAST (char *, fn_name));
1274 TREE_NOTHROW (tree_topn_values_profiler_fn) = 1;
1275 DECL_ATTRIBUTES (tree_topn_values_profiler_fn)
1276 = tree_cons (get_identifier ("leaf"), NULL,
1277 DECL_ATTRIBUTES (tree_topn_values_profiler_fn));
1279 init_ic_make_global_vars ();
1281 /* void (*) (gcov_type, void *) */
1282 ic_profiler_fn_type
1283 = build_function_type_list (void_type_node,
1284 gcov_type_node,
1285 ptr_type_node,
1286 NULL_TREE);
1287 fn_name = concat ("__gcov_indirect_call_profiler_v4", fn_suffix, NULL);
1288 tree_indirect_call_profiler_fn
1289 = build_fn_decl (fn_name, ic_profiler_fn_type);
1290 free (CONST_CAST (char *, fn_name));
1292 TREE_NOTHROW (tree_indirect_call_profiler_fn) = 1;
1293 DECL_ATTRIBUTES (tree_indirect_call_profiler_fn)
1294 = tree_cons (get_identifier ("leaf"), NULL,
1295 DECL_ATTRIBUTES (tree_indirect_call_profiler_fn));
1297 tree_time_profiler_counter
1298 = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1299 get_identifier ("__gcov_time_profiler_counter"),
1300 get_gcov_type ());
1301 TREE_PUBLIC (tree_time_profiler_counter) = 1;
1302 DECL_EXTERNAL (tree_time_profiler_counter) = 1;
1303 TREE_STATIC (tree_time_profiler_counter) = 1;
1304 DECL_ARTIFICIAL (tree_time_profiler_counter) = 1;
1305 DECL_INITIAL (tree_time_profiler_counter) = NULL;
1307 /* void (*) (gcov_type *, gcov_type) */
1308 average_profiler_fn_type
1309 = build_function_type_list (void_type_node,
1310 gcov_type_ptr, gcov_type_node, NULL_TREE);
1311 fn_name = concat ("__gcov_average_profiler", fn_suffix, NULL);
1312 tree_average_profiler_fn = build_fn_decl (fn_name,
1313 average_profiler_fn_type);
1314 free (CONST_CAST (char *, fn_name));
1315 TREE_NOTHROW (tree_average_profiler_fn) = 1;
1316 DECL_ATTRIBUTES (tree_average_profiler_fn)
1317 = tree_cons (get_identifier ("leaf"), NULL,
1318 DECL_ATTRIBUTES (tree_average_profiler_fn));
1319 fn_name = concat ("__gcov_ior_profiler", fn_suffix, NULL);
1320 tree_ior_profiler_fn = build_fn_decl (fn_name, average_profiler_fn_type);
1321 free (CONST_CAST (char *, fn_name));
1322 TREE_NOTHROW (tree_ior_profiler_fn) = 1;
1323 DECL_ATTRIBUTES (tree_ior_profiler_fn)
1324 = tree_cons (get_identifier ("leaf"), NULL,
1325 DECL_ATTRIBUTES (tree_ior_profiler_fn));
1327 /* LTO streamer needs assembler names. Because we create these decls
1328 late, we need to initialize them by hand. */
1329 DECL_ASSEMBLER_NAME (tree_interval_profiler_fn);
1330 DECL_ASSEMBLER_NAME (tree_pow2_profiler_fn);
1331 DECL_ASSEMBLER_NAME (tree_topn_values_profiler_fn);
1332 DECL_ASSEMBLER_NAME (tree_indirect_call_profiler_fn);
1333 DECL_ASSEMBLER_NAME (tree_average_profiler_fn);
1334 DECL_ASSEMBLER_NAME (tree_ior_profiler_fn);
1338 /* If RESULT is not null, then output instructions as GIMPLE trees to assign
1339 the updated counter from CALL of FUNC to RESULT. Insert the CALL and the
1340 optional assignment instructions to GSI. Use NAME for temporary values. */
1342 static inline void
1343 gen_assign_counter_update (gimple_stmt_iterator *gsi, gcall *call, tree func,
1344 tree result, const char *name)
1346 if (result)
1348 tree result_type = TREE_TYPE (TREE_TYPE (func));
1349 tree tmp1 = make_temp_ssa_name (result_type, NULL, name);
1350 gimple_set_lhs (call, tmp1);
1351 gsi_insert_after (gsi, call, GSI_NEW_STMT);
1352 tree tmp2 = make_temp_ssa_name (TREE_TYPE (result), NULL, name);
1353 gassign *assign = gimple_build_assign (tmp2, NOP_EXPR, tmp1);
1354 gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1355 assign = gimple_build_assign (result, tmp2);
1356 gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1358 else
1359 gsi_insert_after (gsi, call, GSI_NEW_STMT);
1362 /* Output instructions as GIMPLE trees to increment the COUNTER. If RESULT is
1363 not null, then assign the updated counter value to RESULT. Insert the
1364 instructions to GSI. Use NAME for temporary values. */
1366 static inline void
1367 gen_counter_update (gimple_stmt_iterator *gsi, tree counter, tree result,
1368 const char *name)
1370 tree type = gcov_type_node;
1371 tree addr = build_fold_addr_expr (counter);
1372 tree one = build_int_cst (type, 1);
1373 tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1375 if (counter_update == COUNTER_UPDATE_ATOMIC_BUILTIN
1376 || (result && counter_update == COUNTER_UPDATE_ATOMIC_SPLIT))
1378 /* __atomic_fetch_add (&counter, 1, MEMMODEL_RELAXED); */
1379 tree f = builtin_decl_explicit (TYPE_PRECISION (type) > 32
1380 ? BUILT_IN_ATOMIC_ADD_FETCH_8
1381 : BUILT_IN_ATOMIC_ADD_FETCH_4);
1382 gcall *call = gimple_build_call (f, 3, addr, one, relaxed);
1383 gen_assign_counter_update (gsi, call, f, result, name);
1385 else if (!result && (counter_update == COUNTER_UPDATE_ATOMIC_SPLIT
1386 || counter_update == COUNTER_UPDATE_ATOMIC_PARTIAL))
1388 /* low = __atomic_add_fetch_4 (addr, 1, MEMMODEL_RELAXED);
1389 high_inc = low == 0 ? 1 : 0;
1390 __atomic_add_fetch_4 (addr_high, high_inc, MEMMODEL_RELAXED); */
1391 tree zero32 = build_zero_cst (uint32_type_node);
1392 tree one32 = build_one_cst (uint32_type_node);
1393 tree addr_high = make_temp_ssa_name (TREE_TYPE (addr), NULL, name);
1394 tree four = build_int_cst (size_type_node, 4);
1395 gassign *assign1 = gimple_build_assign (addr_high, POINTER_PLUS_EXPR,
1396 addr, four);
1397 gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1398 if (WORDS_BIG_ENDIAN)
1399 std::swap (addr, addr_high);
1400 tree f = builtin_decl_explicit (BUILT_IN_ATOMIC_ADD_FETCH_4);
1401 gcall *call1 = gimple_build_call (f, 3, addr, one, relaxed);
1402 tree low = make_temp_ssa_name (uint32_type_node, NULL, name);
1403 gimple_call_set_lhs (call1, low);
1404 gsi_insert_after (gsi, call1, GSI_NEW_STMT);
1405 tree is_zero = make_temp_ssa_name (boolean_type_node, NULL, name);
1406 gassign *assign2 = gimple_build_assign (is_zero, EQ_EXPR, low,
1407 zero32);
1408 gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1409 tree high_inc = make_temp_ssa_name (uint32_type_node, NULL, name);
1410 gassign *assign3 = gimple_build_assign (high_inc, COND_EXPR,
1411 is_zero, one32, zero32);
1412 gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1413 gcall *call2 = gimple_build_call (f, 3, addr_high, high_inc,
1414 relaxed);
1415 gsi_insert_after (gsi, call2, GSI_NEW_STMT);
1417 else
1419 tree tmp1 = make_temp_ssa_name (type, NULL, name);
1420 gassign *assign1 = gimple_build_assign (tmp1, counter);
1421 gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1422 tree tmp2 = make_temp_ssa_name (type, NULL, name);
1423 gassign *assign2 = gimple_build_assign (tmp2, PLUS_EXPR, tmp1, one);
1424 gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1425 gassign *assign3 = gimple_build_assign (unshare_expr (counter), tmp2);
1426 gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1427 if (result)
1429 gassign *assign4 = gimple_build_assign (result, tmp2);
1430 gsi_insert_after (gsi, assign4, GSI_NEW_STMT);
1435 /* Output instructions as GIMPLE trees to increment the edge
1436 execution count, and insert them on E. */
1438 void
1439 gimple_gen_edge_profiler (int edgeno, edge e)
1441 gimple_stmt_iterator gsi = gsi_last (PENDING_STMT (e));
1442 tree counter = tree_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
1443 gen_counter_update (&gsi, counter, NULL_TREE, "PROF_edge_counter");
1446 /* Emits code to get VALUE to instrument at GSI, and returns the
1447 variable containing the value. */
1449 static tree
1450 prepare_instrumented_value (gimple_stmt_iterator *gsi, histogram_value value)
1452 tree val = value->hvalue.value;
1453 if (POINTER_TYPE_P (TREE_TYPE (val)))
1454 val = fold_convert (build_nonstandard_integer_type
1455 (TYPE_PRECISION (TREE_TYPE (val)), 1), val);
1456 return force_gimple_operand_gsi (gsi, fold_convert (gcov_type_node, val),
1457 true, NULL_TREE, true, GSI_SAME_STMT);
1460 /* Output instructions as GIMPLE trees to increment the interval histogram
1461 counter. VALUE is the expression whose value is profiled. TAG is the
1462 tag of the section for counters, BASE is offset of the counter position. */
1464 void
1465 gimple_gen_interval_profiler (histogram_value value, unsigned tag)
1467 gimple *stmt = value->hvalue.stmt;
1468 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1469 tree ref = tree_coverage_counter_ref (tag, 0), ref_ptr;
1470 gcall *call;
1471 tree val;
1472 tree start = build_int_cst_type (integer_type_node,
1473 value->hdata.intvl.int_start);
1474 tree steps = build_int_cst_type (unsigned_type_node,
1475 value->hdata.intvl.steps);
1477 ref_ptr = force_gimple_operand_gsi (&gsi,
1478 build_addr (ref),
1479 true, NULL_TREE, true, GSI_SAME_STMT);
1480 val = prepare_instrumented_value (&gsi, value);
1481 call = gimple_build_call (tree_interval_profiler_fn, 4,
1482 ref_ptr, val, start, steps);
1483 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1486 /* Output instructions as GIMPLE trees to increment the power of two histogram
1487 counter. VALUE is the expression whose value is profiled. TAG is the tag
1488 of the section for counters. */
1490 void
1491 gimple_gen_pow2_profiler (histogram_value value, unsigned tag)
1493 gimple *stmt = value->hvalue.stmt;
1494 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1495 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1496 gcall *call;
1497 tree val;
1499 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1500 true, NULL_TREE, true, GSI_SAME_STMT);
1501 val = prepare_instrumented_value (&gsi, value);
1502 call = gimple_build_call (tree_pow2_profiler_fn, 2, ref_ptr, val);
1503 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1506 /* Output instructions as GIMPLE trees for code to find the most N common
1507 values. VALUE is the expression whose value is profiled. TAG is the tag
1508 of the section for counters. */
1510 void
1511 gimple_gen_topn_values_profiler (histogram_value value, unsigned tag)
1513 gimple *stmt = value->hvalue.stmt;
1514 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1515 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1516 gcall *call;
1517 tree val;
1519 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1520 true, NULL_TREE, true, GSI_SAME_STMT);
1521 val = prepare_instrumented_value (&gsi, value);
1522 call = gimple_build_call (tree_topn_values_profiler_fn, 2, ref_ptr, val);
1523 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1527 /* Output instructions as GIMPLE trees for code to find the most
1528 common called function in indirect call.
1529 VALUE is the call expression whose indirect callee is profiled.
1530 TAG is the tag of the section for counters. */
1532 void
1533 gimple_gen_ic_profiler (histogram_value value, unsigned tag)
1535 tree tmp1;
1536 gassign *stmt1, *stmt2, *stmt3;
1537 gimple *stmt = value->hvalue.stmt;
1538 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1539 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1541 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1542 true, NULL_TREE, true, GSI_SAME_STMT);
1544 /* Insert code:
1546 stmt1: __gcov_indirect_call.counters = get_relevant_counter_ptr ();
1547 stmt2: tmp1 = (void *) (indirect call argument value)
1548 stmt3: __gcov_indirect_call.callee = tmp1;
1550 Example:
1551 f_1 = foo;
1552 __gcov_indirect_call.counters = &__gcov4.main[0];
1553 PROF_fn_9 = f_1;
1554 __gcov_indirect_call.callee = PROF_fn_9;
1555 _4 = f_1 ();
1558 tree gcov_type_ptr = build_pointer_type (get_gcov_type ());
1560 tree counter_ref = build3 (COMPONENT_REF, gcov_type_ptr,
1561 ic_tuple_var, ic_tuple_counters_field, NULL_TREE);
1563 stmt1 = gimple_build_assign (counter_ref, ref_ptr);
1564 tmp1 = make_temp_ssa_name (ptr_type_node, NULL, "PROF_fn");
1565 stmt2 = gimple_build_assign (tmp1, unshare_expr (value->hvalue.value));
1566 tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1567 ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1568 stmt3 = gimple_build_assign (callee_ref, tmp1);
1570 gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1571 gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
1572 gsi_insert_before (&gsi, stmt3, GSI_SAME_STMT);
1576 /* Output instructions as GIMPLE trees for code to find the most
1577 common called function in indirect call. Insert instructions at the
1578 beginning of every possible called function.
1581 void
1582 gimple_gen_ic_func_profiler (void)
1584 struct cgraph_node * c_node = cgraph_node::get (current_function_decl);
1585 gcall *stmt1;
1586 tree tree_uid, cur_func, void0;
1588 /* Disable indirect call profiling for an IFUNC resolver and its
1589 callees since it requires TLS which hasn't been set up yet when
1590 the dynamic linker is resolving IFUNC symbols. See
1591 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114115
1593 if (c_node->only_called_directly_p ()
1594 || c_node->called_by_ifunc_resolver)
1595 return;
1597 gimple_init_gcov_profiler ();
1599 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1600 basic_block cond_bb = split_edge (single_succ_edge (entry));
1601 basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1603 /* We need to do an extra split in order to not create an input
1604 for a possible PHI node. */
1605 split_edge (single_succ_edge (update_bb));
1607 edge true_edge = single_succ_edge (cond_bb);
1608 true_edge->flags = EDGE_TRUE_VALUE;
1610 profile_probability probability;
1611 if (DECL_VIRTUAL_P (current_function_decl))
1612 probability = profile_probability::very_likely ();
1613 else
1614 probability = profile_probability::unlikely ();
1616 true_edge->probability = probability;
1617 edge e = make_edge (cond_bb, single_succ_edge (update_bb)->dest,
1618 EDGE_FALSE_VALUE);
1619 e->probability = true_edge->probability.invert ();
1621 /* Insert code:
1623 if (__gcov_indirect_call.callee != NULL)
1624 __gcov_indirect_call_profiler_v3 (profile_id, &current_function_decl);
1626 The function __gcov_indirect_call_profiler_v3 is responsible for
1627 resetting __gcov_indirect_call.callee to NULL. */
1629 gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1630 void0 = build_int_cst (ptr_type_node, 0);
1632 tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1633 ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1635 tree ref = force_gimple_operand_gsi (&gsi, callee_ref, true, NULL_TREE,
1636 true, GSI_SAME_STMT);
1638 gcond *cond = gimple_build_cond (NE_EXPR, ref,
1639 void0, NULL, NULL);
1640 gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1642 gsi = gsi_after_labels (update_bb);
1644 cur_func = force_gimple_operand_gsi (&gsi,
1645 build_addr (current_function_decl),
1646 true, NULL_TREE,
1647 true, GSI_SAME_STMT);
1648 tree_uid = build_int_cst
1649 (gcov_type_node,
1650 cgraph_node::get (current_function_decl)->profile_id);
1651 stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 2,
1652 tree_uid, cur_func);
1653 gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1656 /* Output instructions as GIMPLE tree at the beginning for each function.
1657 TAG is the tag of the section for counters, BASE is offset of the
1658 counter position and GSI is the iterator we place the counter. */
1660 void
1661 gimple_gen_time_profiler (unsigned tag)
1663 tree type = get_gcov_type ();
1664 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1665 basic_block cond_bb = split_edge (single_succ_edge (entry));
1666 basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1668 /* We need to do an extra split in order to not create an input
1669 for a possible PHI node. */
1670 split_edge (single_succ_edge (update_bb));
1672 edge true_edge = single_succ_edge (cond_bb);
1673 true_edge->flags = EDGE_TRUE_VALUE;
1674 true_edge->probability = profile_probability::unlikely ();
1675 edge e
1676 = make_edge (cond_bb, single_succ_edge (update_bb)->dest, EDGE_FALSE_VALUE);
1677 e->probability = true_edge->probability.invert ();
1679 gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1680 tree original_ref = tree_coverage_counter_ref (tag, 0);
1681 tree ref = force_gimple_operand_gsi (&gsi, original_ref, true, NULL_TREE,
1682 true, GSI_SAME_STMT);
1684 /* Emit: if (counters[0] != 0). */
1685 gcond *cond = gimple_build_cond (EQ_EXPR, ref, build_int_cst (type, 0),
1686 NULL, NULL);
1687 gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1689 /* Emit: counters[0] = ++__gcov_time_profiler_counter. */
1690 gsi = gsi_start_bb (update_bb);
1691 gen_counter_update (&gsi, tree_time_profiler_counter, original_ref,
1692 "PROF_time_profile");
1695 /* Output instructions as GIMPLE trees to increment the average histogram
1696 counter. VALUE is the expression whose value is profiled. TAG is the
1697 tag of the section for counters, BASE is offset of the counter position. */
1699 void
1700 gimple_gen_average_profiler (histogram_value value, unsigned tag)
1702 gimple *stmt = value->hvalue.stmt;
1703 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1704 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1705 gcall *call;
1706 tree val;
1708 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1709 true, NULL_TREE,
1710 true, GSI_SAME_STMT);
1711 val = prepare_instrumented_value (&gsi, value);
1712 call = gimple_build_call (tree_average_profiler_fn, 2, ref_ptr, val);
1713 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1716 /* Output instructions as GIMPLE trees to increment the ior histogram
1717 counter. VALUE is the expression whose value is profiled. TAG is the
1718 tag of the section for counters, BASE is offset of the counter position. */
1720 void
1721 gimple_gen_ior_profiler (histogram_value value, unsigned tag)
1723 gimple *stmt = value->hvalue.stmt;
1724 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1725 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1726 gcall *call;
1727 tree val;
1729 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1730 true, NULL_TREE, true, GSI_SAME_STMT);
1731 val = prepare_instrumented_value (&gsi, value);
1732 call = gimple_build_call (tree_ior_profiler_fn, 2, ref_ptr, val);
1733 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1736 static vec<regex_t> profile_filter_files;
1737 static vec<regex_t> profile_exclude_files;
1739 /* Parse list of provided REGEX (separated with semi-collon) and
1740 create expressions (of type regex_t) and save them into V vector.
1741 If there is a regular expression parsing error, error message is
1742 printed for FLAG_NAME. */
1744 static void
1745 parse_profile_filter (const char *regex, vec<regex_t> *v,
1746 const char *flag_name)
1748 v->create (4);
1749 if (regex != NULL)
1751 char *str = xstrdup (regex);
1752 for (char *p = strtok (str, ";"); p != NULL; p = strtok (NULL, ";"))
1754 regex_t r;
1755 if (regcomp (&r, p, REG_EXTENDED | REG_NOSUB) != 0)
1757 error ("invalid regular expression %qs in %qs",
1758 p, flag_name);
1759 return;
1762 v->safe_push (r);
1767 /* Parse values of -fprofile-filter-files and -fprofile-exclude-files
1768 options. */
1770 static void
1771 parse_profile_file_filtering ()
1773 parse_profile_filter (flag_profile_filter_files, &profile_filter_files,
1774 "-fprofile-filter-files");
1775 parse_profile_filter (flag_profile_exclude_files, &profile_exclude_files,
1776 "-fprofile-exclude-files");
1779 /* Parse vectors of regular expressions. */
1781 static void
1782 release_profile_file_filtering ()
1784 profile_filter_files.release ();
1785 profile_exclude_files.release ();
1788 /* Return true when FILENAME should be instrumented based on
1789 -fprofile-filter-files and -fprofile-exclude-files options. */
1791 static bool
1792 include_source_file_for_profile (const char *filename)
1794 /* First check whether file is included in flag_profile_exclude_files. */
1795 for (unsigned i = 0; i < profile_exclude_files.length (); i++)
1796 if (regexec (&profile_exclude_files[i],
1797 filename, 0, NULL, 0) == REG_NOERROR)
1798 return false;
1800 /* For non-empty flag_profile_filter_files include only files matching a
1801 regex in the flag. */
1802 if (profile_filter_files.is_empty ())
1803 return true;
1805 for (unsigned i = 0; i < profile_filter_files.length (); i++)
1806 if (regexec (&profile_filter_files[i], filename, 0, NULL, 0) == REG_NOERROR)
1807 return true;
1809 return false;
1812 #ifndef HAVE_sync_compare_and_swapsi
1813 #define HAVE_sync_compare_and_swapsi 0
1814 #endif
1815 #ifndef HAVE_atomic_compare_and_swapsi
1816 #define HAVE_atomic_compare_and_swapsi 0
1817 #endif
1819 #ifndef HAVE_sync_compare_and_swapdi
1820 #define HAVE_sync_compare_and_swapdi 0
1821 #endif
1822 #ifndef HAVE_atomic_compare_and_swapdi
1823 #define HAVE_atomic_compare_and_swapdi 0
1824 #endif
1826 /* Profile all functions in the callgraph. */
1828 static unsigned int
1829 tree_profiling (void)
1831 struct cgraph_node *node;
1833 /* Verify whether we can utilize atomic update operations. */
1834 bool can_support_atomic = targetm.have_libatomic;
1835 unsigned HOST_WIDE_INT gcov_type_size
1836 = tree_to_uhwi (TYPE_SIZE_UNIT (get_gcov_type ()));
1837 bool have_atomic_4
1838 = HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi;
1839 bool have_atomic_8
1840 = HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi;
1841 bool needs_split = gcov_type_size == 8 && !have_atomic_8 && have_atomic_4;
1842 if (!can_support_atomic)
1844 if (gcov_type_size == 4)
1845 can_support_atomic = have_atomic_4;
1846 else if (gcov_type_size == 8)
1847 can_support_atomic = have_atomic_8;
1850 if (flag_profile_update != PROFILE_UPDATE_SINGLE && needs_split)
1851 counter_update = COUNTER_UPDATE_ATOMIC_PARTIAL;
1853 if (flag_profile_update == PROFILE_UPDATE_ATOMIC
1854 && !can_support_atomic)
1856 warning (0, "target does not support atomic profile update, "
1857 "single mode is selected");
1858 flag_profile_update = PROFILE_UPDATE_SINGLE;
1860 else if (flag_profile_update == PROFILE_UPDATE_PREFER_ATOMIC)
1861 flag_profile_update
1862 = can_support_atomic ? PROFILE_UPDATE_ATOMIC : PROFILE_UPDATE_SINGLE;
1864 if (flag_profile_update == PROFILE_UPDATE_ATOMIC)
1866 if (needs_split)
1867 counter_update = COUNTER_UPDATE_ATOMIC_SPLIT;
1868 else
1869 counter_update = COUNTER_UPDATE_ATOMIC_BUILTIN;
1872 /* This is a small-ipa pass that gets called only once, from
1873 cgraphunit.cc:ipa_passes(). */
1874 gcc_assert (symtab->state == IPA_SSA);
1876 init_node_map (true);
1877 parse_profile_file_filtering ();
1879 FOR_EACH_DEFINED_FUNCTION (node)
1881 bool thunk = false;
1882 if (!gimple_has_body_p (node->decl) && !node->thunk)
1883 continue;
1885 /* Don't profile functions produced for builtin stuff. */
1886 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1887 continue;
1889 if (lookup_attribute ("no_profile_instrument_function",
1890 DECL_ATTRIBUTES (node->decl)))
1891 continue;
1892 /* Do not instrument extern inline functions when testing coverage.
1893 While this is not perfectly consistent (early inlined extern inlines
1894 will get acocunted), testsuite expects that. */
1895 if (DECL_EXTERNAL (node->decl)
1896 && flag_test_coverage)
1897 continue;
1899 const char *file = LOCATION_FILE (DECL_SOURCE_LOCATION (node->decl));
1900 if (!include_source_file_for_profile (file))
1901 continue;
1903 if (node->thunk)
1905 /* We cannot expand variadic thunks to Gimple. */
1906 if (stdarg_p (TREE_TYPE (node->decl)))
1907 continue;
1908 thunk = true;
1909 /* When generate profile, expand thunk to gimple so it can be
1910 instrumented same way as other functions. */
1911 if (profile_arc_flag || condition_coverage_flag)
1912 expand_thunk (node, false, true);
1913 /* Read cgraph profile but keep function as thunk at profile-use
1914 time. */
1915 else
1917 read_thunk_profile (node);
1918 continue;
1922 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1924 if (dump_file)
1925 dump_function_header (dump_file, cfun->decl, dump_flags);
1927 /* Local pure-const may imply need to fixup the cfg. */
1928 if (gimple_has_body_p (node->decl)
1929 && (execute_fixup_cfg () & TODO_cleanup_cfg))
1930 cleanup_tree_cfg ();
1932 branch_prob (thunk);
1934 if (! flag_branch_probabilities
1935 && flag_profile_values)
1936 gimple_gen_ic_func_profiler ();
1938 if (flag_branch_probabilities
1939 && !thunk
1940 && flag_profile_values
1941 && flag_value_profile_transformations
1942 && profile_status_for_fn (cfun) == PROFILE_READ)
1943 gimple_value_profile_transformations ();
1945 /* The above could hose dominator info. Currently there is
1946 none coming in, this is a safety valve. It should be
1947 easy to adjust it, if and when there is some. */
1948 free_dominance_info (CDI_DOMINATORS);
1949 free_dominance_info (CDI_POST_DOMINATORS);
1950 pop_cfun ();
1953 release_profile_file_filtering ();
1955 /* Drop pure/const flags from instrumented functions. */
1956 if (profile_arc_flag || condition_coverage_flag || flag_test_coverage)
1957 FOR_EACH_DEFINED_FUNCTION (node)
1959 if (!gimple_has_body_p (node->decl)
1960 || !(!node->clone_of
1961 || node->decl != node->clone_of->decl))
1962 continue;
1964 /* Don't profile functions produced for builtin stuff. */
1965 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1966 continue;
1968 node->set_const_flag (false, false);
1969 node->set_pure_flag (false, false);
1972 /* Update call statements and rebuild the cgraph. */
1973 FOR_EACH_DEFINED_FUNCTION (node)
1975 basic_block bb;
1977 if (!gimple_has_body_p (node->decl)
1978 || !(!node->clone_of
1979 || node->decl != node->clone_of->decl))
1980 continue;
1982 /* Don't profile functions produced for builtin stuff. */
1983 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1984 continue;
1986 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1988 if (profile_arc_flag || condition_coverage_flag || flag_test_coverage)
1989 FOR_EACH_BB_FN (bb, cfun)
1991 gimple_stmt_iterator gsi;
1992 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1994 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
1995 if (!call || gimple_call_internal_p (call))
1996 continue;
1998 /* We do not clear pure/const on decls without body. */
1999 tree fndecl = gimple_call_fndecl (call);
2000 cgraph_node *callee;
2001 if (fndecl
2002 && (callee = cgraph_node::get (fndecl))
2003 && callee->get_availability (node) == AVAIL_NOT_AVAILABLE)
2004 continue;
2006 /* Drop the const attribute from the call type (the pure
2007 attribute is not available on types). */
2008 tree fntype = gimple_call_fntype (call);
2009 if (fntype && TYPE_READONLY (fntype))
2011 int quals = TYPE_QUALS (fntype) & ~TYPE_QUAL_CONST;
2012 fntype = build_qualified_type (fntype, quals);
2013 gimple_call_set_fntype (call, fntype);
2016 /* Update virtual operands of calls to no longer const/pure
2017 functions. */
2018 update_stmt (call);
2022 /* re-merge split blocks. */
2023 cleanup_tree_cfg ();
2024 update_ssa (TODO_update_ssa);
2026 cgraph_edge::rebuild_edges ();
2028 pop_cfun ();
2031 handle_missing_profiles ();
2033 del_node_map ();
2034 return 0;
2037 namespace {
2039 const pass_data pass_data_ipa_tree_profile =
2041 SIMPLE_IPA_PASS, /* type */
2042 "profile", /* name */
2043 OPTGROUP_NONE, /* optinfo_flags */
2044 TV_IPA_PROFILE, /* tv_id */
2045 0, /* properties_required */
2046 0, /* properties_provided */
2047 0, /* properties_destroyed */
2048 0, /* todo_flags_start */
2049 TODO_dump_symtab, /* todo_flags_finish */
2052 class pass_ipa_tree_profile : public simple_ipa_opt_pass
2054 public:
2055 pass_ipa_tree_profile (gcc::context *ctxt)
2056 : simple_ipa_opt_pass (pass_data_ipa_tree_profile, ctxt)
2059 /* opt_pass methods: */
2060 bool gate (function *) final override;
2061 unsigned int execute (function *) final override { return tree_profiling (); }
2063 }; // class pass_ipa_tree_profile
2065 bool
2066 pass_ipa_tree_profile::gate (function *)
2068 /* When profile instrumentation, use or test coverage shall be performed.
2069 But for AutoFDO, this there is no instrumentation, thus this pass is
2070 disabled. */
2071 return (!in_lto_p && !flag_auto_profile
2072 && (flag_branch_probabilities || flag_test_coverage
2073 || profile_arc_flag || condition_coverage_flag));
2076 } // anon namespace
2078 simple_ipa_opt_pass *
2079 make_pass_ipa_tree_profile (gcc::context *ctxt)
2081 return new pass_ipa_tree_profile (ctxt);
2084 #include "gt-tree-profile.h"