amd64 pipeline: improve performance of cvtdq2ps and cvtps2dq (128 and 256 bit version...
[valgrind.git] / callgrind / bbcc.c
blob7ba4e8a870800560e862b1a17d2a5df2c65f4bf1
1 /*--------------------------------------------------------------------*/
2 /*--- Callgrind ---*/
3 /*--- bbcc.c ---*/
4 /*--------------------------------------------------------------------*/
6 /*
7 This file is part of Callgrind, a Valgrind tool for call tracing.
9 Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
11 This program is free software; you can redistribute it and/or
12 modify it under the terms of the GNU General Public License as
13 published by the Free Software Foundation; either version 2 of the
14 License, or (at your option) any later version.
16 This program is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 02111-1307, USA.
26 The GNU General Public License is contained in the file COPYING.
29 #include "global.h"
30 #include "costs.h"
32 #include "pub_tool_threadstate.h"
34 /*------------------------------------------------------------*/
35 /*--- BBCC operations ---*/
36 /*------------------------------------------------------------*/
38 #define N_BBCC_INITIAL_ENTRIES 10437
40 /* BBCC table (key is BB/Context), per thread, resizable */
41 bbcc_hash current_bbccs;
43 void CLG_(init_bbcc_hash)(bbcc_hash* bbccs)
45 Int i;
47 CLG_ASSERT(bbccs != 0);
49 bbccs->size = N_BBCC_INITIAL_ENTRIES;
50 bbccs->entries = 0;
51 bbccs->table = (BBCC**) CLG_MALLOC("cl.bbcc.ibh.1",
52 bbccs->size * sizeof(BBCC*));
54 for (i = 0; i < bbccs->size; i++) bbccs->table[i] = NULL;
57 void CLG_(copy_current_bbcc_hash)(bbcc_hash* dst)
59 CLG_ASSERT(dst != 0);
61 dst->size = current_bbccs.size;
62 dst->entries = current_bbccs.entries;
63 dst->table = current_bbccs.table;
66 bbcc_hash* CLG_(get_current_bbcc_hash)()
68 return &current_bbccs;
71 void CLG_(set_current_bbcc_hash)(bbcc_hash* h)
73 CLG_ASSERT(h != 0);
75 current_bbccs.size = h->size;
76 current_bbccs.entries = h->entries;
77 current_bbccs.table = h->table;
81 * Zero all costs of a BBCC
83 void CLG_(zero_bbcc)(BBCC* bbcc)
85 Int i;
86 jCC* jcc;
88 CLG_ASSERT(bbcc->cxt != 0);
89 CLG_DEBUG(1, " zero_bbcc: BB %#lx, Cxt %u "
90 "(fn '%s', rec %u)\n",
91 bb_addr(bbcc->bb),
92 bbcc->cxt->base_number + bbcc->rec_index,
93 bbcc->cxt->fn[0]->name,
94 bbcc->rec_index);
96 if ((bbcc->ecounter_sum ==0) &&
97 (bbcc->ret_counter ==0)) return;
99 for(i=0;i<bbcc->bb->cost_count;i++)
100 bbcc->cost[i] = 0;
101 for(i=0;i <= bbcc->bb->cjmp_count;i++) {
102 bbcc->jmp[i].ecounter = 0;
103 for(jcc=bbcc->jmp[i].jcc_list; jcc; jcc=jcc->next_from) {
104 CLG_(init_cost)( CLG_(sets).full, jcc->cost );
105 jcc->call_counter = 0;
108 bbcc->ecounter_sum = 0;
109 bbcc->ret_counter = 0;
114 void CLG_(forall_bbccs)(void (*func)(BBCC*))
116 BBCC *bbcc, *bbcc2;
117 int i, j;
119 for (i = 0; i < current_bbccs.size; i++) {
120 if ((bbcc=current_bbccs.table[i]) == NULL) continue;
121 while (bbcc) {
122 /* every bbcc should have a rec_array */
123 CLG_ASSERT(bbcc->rec_array != 0);
125 for(j=0;j<bbcc->cxt->fn[0]->separate_recursions;j++) {
126 if ((bbcc2 = bbcc->rec_array[j]) == 0) continue;
128 (*func)(bbcc2);
130 bbcc = bbcc->next;
136 /* All BBCCs for recursion level 0 are inserted into a
137 * thread specific hash table with key
138 * - address of BB structure (unique, as never freed)
139 * - current context (includes caller chain)
140 * BBCCs for other recursion levels are in bbcc->rec_array.
142 * The hash is used in setup_bb(), i.e. to find the cost
143 * counters to be changed in the execution of a BB.
146 static __inline__
147 UInt bbcc_hash_idx(BB* bb, Context* cxt, UInt size)
149 CLG_ASSERT(bb != 0);
150 CLG_ASSERT(cxt != 0);
152 return ((Addr)bb + (Addr)cxt) % size;
156 /* Lookup for a BBCC in hash.
158 static
159 BBCC* lookup_bbcc(BB* bb, Context* cxt)
161 BBCC* bbcc = bb->last_bbcc;
162 UInt idx;
164 /* check LRU */
165 if (bbcc->cxt == cxt) {
166 if (!CLG_(clo).separate_threads) {
167 /* if we don't dump threads separate, tid doesn't have to match */
168 return bbcc;
170 if (bbcc->tid == CLG_(current_tid)) return bbcc;
173 CLG_(stat).bbcc_lru_misses++;
175 idx = bbcc_hash_idx(bb, cxt, current_bbccs.size);
176 bbcc = current_bbccs.table[idx];
177 while (bbcc &&
178 (bb != bbcc->bb ||
179 cxt != bbcc->cxt)) {
180 bbcc = bbcc->next;
183 CLG_DEBUG(2," lookup_bbcc(BB %#lx, Cxt %u, fn '%s'): %p (tid %u)\n",
184 bb_addr(bb), cxt->base_number, cxt->fn[0]->name,
185 bbcc, bbcc ? bbcc->tid : 0);
187 CLG_DEBUGIF(2)
188 if (bbcc) CLG_(print_bbcc)(-2,bbcc);
190 return bbcc;
194 /* double size of hash table 1 (addr->BBCC) */
195 static void resize_bbcc_hash(void)
197 Int i, new_size, conflicts1 = 0, conflicts2 = 0;
198 BBCC** new_table;
199 UInt new_idx;
200 BBCC *curr_BBCC, *next_BBCC;
202 new_size = 2*current_bbccs.size+3;
203 new_table = (BBCC**) CLG_MALLOC("cl.bbcc.rbh.1",
204 new_size * sizeof(BBCC*));
206 for (i = 0; i < new_size; i++)
207 new_table[i] = NULL;
209 for (i = 0; i < current_bbccs.size; i++) {
210 if (current_bbccs.table[i] == NULL) continue;
212 curr_BBCC = current_bbccs.table[i];
213 while (NULL != curr_BBCC) {
214 next_BBCC = curr_BBCC->next;
216 new_idx = bbcc_hash_idx(curr_BBCC->bb,
217 curr_BBCC->cxt,
218 new_size);
220 curr_BBCC->next = new_table[new_idx];
221 new_table[new_idx] = curr_BBCC;
222 if (curr_BBCC->next) {
223 conflicts1++;
224 if (curr_BBCC->next->next)
225 conflicts2++;
228 curr_BBCC = next_BBCC;
232 VG_(free)(current_bbccs.table);
235 CLG_DEBUG(0,"Resize BBCC Hash: %u => %d (entries %u, conflicts %d/%d)\n",
236 current_bbccs.size, new_size,
237 current_bbccs.entries, conflicts1, conflicts2);
239 current_bbccs.size = new_size;
240 current_bbccs.table = new_table;
241 CLG_(stat).bbcc_hash_resizes++;
245 static __inline
246 BBCC** new_recursion(int size)
248 BBCC** bbccs;
249 int i;
251 bbccs = (BBCC**) CLG_MALLOC("cl.bbcc.nr.1", sizeof(BBCC*) * size);
252 for(i=0;i<size;i++)
253 bbccs[i] = 0;
255 CLG_DEBUG(3," new_recursion(size %d): %p\n", size, bbccs);
257 return bbccs;
262 * Allocate a new BBCC
264 * Uninitialized:
265 * cxt, rec_index, rec_array, next_bbcc, next1, next2
267 static __inline__
268 BBCC* new_bbcc(BB* bb)
270 BBCC* bbcc;
271 Int i;
273 /* We need cjmp_count+1 JmpData structs:
274 * the last is for the unconditional jump/call/ret at end of BB
276 bbcc = (BBCC*)CLG_MALLOC("cl.bbcc.nb.1",
277 sizeof(BBCC) +
278 (bb->cjmp_count+1) * sizeof(JmpData));
279 bbcc->bb = bb;
280 bbcc->tid = CLG_(current_tid);
282 bbcc->ret_counter = 0;
283 bbcc->skipped = 0;
284 bbcc->cost = CLG_(get_costarray)(bb->cost_count);
285 for(i=0;i<bb->cost_count;i++)
286 bbcc->cost[i] = 0;
287 for(i=0; i<=bb->cjmp_count; i++) {
288 bbcc->jmp[i].ecounter = 0;
289 bbcc->jmp[i].jcc_list = 0;
291 bbcc->ecounter_sum = 0;
293 /* Init pointer caches (LRU) */
294 bbcc->lru_next_bbcc = 0;
295 bbcc->lru_from_jcc = 0;
296 bbcc->lru_to_jcc = 0;
298 CLG_(stat).distinct_bbccs++;
300 CLG_DEBUG(3, " new_bbcc(BB %#lx): %p (now %d)\n",
301 bb_addr(bb), bbcc, CLG_(stat).distinct_bbccs);
303 return bbcc;
308 * Inserts a new BBCC into hashes.
309 * BBCC specific items must be set as this is used for the hash
310 * keys:
311 * fn : current function
312 * tid : current thread ID
313 * from : position where current function is called from
315 * Recursion level doesn't need to be set as this is not included
316 * in the hash key: Only BBCCs with rec level 0 are in hashes.
318 static
319 void insert_bbcc_into_hash(BBCC* bbcc)
321 UInt idx;
323 CLG_ASSERT(bbcc->cxt != 0);
325 CLG_DEBUG(3,"+ insert_bbcc_into_hash(BB %#lx, fn '%s')\n",
326 bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name);
328 /* check fill degree of hash and resize if needed (>90%) */
329 current_bbccs.entries++;
330 if (100 * current_bbccs.entries / current_bbccs.size > 90)
331 resize_bbcc_hash();
333 idx = bbcc_hash_idx(bbcc->bb, bbcc->cxt, current_bbccs.size);
334 bbcc->next = current_bbccs.table[idx];
335 current_bbccs.table[idx] = bbcc;
337 CLG_DEBUG(3,"- insert_bbcc_into_hash: %u entries\n",
338 current_bbccs.entries);
341 /* String is returned in a dynamically allocated buffer. Caller is
342 responsible for free'ing it. */
343 static HChar* mangled_cxt(const Context* cxt, Int rec_index)
345 Int i, p;
347 if (!cxt) return VG_(strdup)("cl.bbcc.mcxt", "(no context)");
349 /* Overestimate the number of bytes we need to hold the string. */
350 SizeT need = 20; // rec_index + nul-terminator
351 for (i = 0; i < cxt->size; ++i)
352 need += VG_(strlen)(cxt->fn[i]->name) + 1; // 1 for leading '
354 HChar *mangled = CLG_MALLOC("cl.bbcc.mcxt", need);
355 p = VG_(sprintf)(mangled, "%s", cxt->fn[0]->name);
356 if (rec_index >0)
357 p += VG_(sprintf)(mangled+p, "'%d", rec_index +1);
358 for(i=1;i<cxt->size;i++)
359 p += VG_(sprintf)(mangled+p, "'%s", cxt->fn[i]->name);
361 return mangled;
365 /* Create a new BBCC as a copy of an existing one,
366 * but with costs set to 0 and jcc chains empty.
368 * This is needed when a BB is executed in another context than
369 * the one at instrumentation time of the BB.
371 * Use cases:
372 * rec_index == 0: clone from a BBCC with differing tid/cxt
373 * and insert into hashes
374 * rec_index >0 : clone from a BBCC with same tid/cxt and rec_index 0
375 * don't insert into hashes
377 static BBCC* clone_bbcc(BBCC* orig, Context* cxt, Int rec_index)
379 BBCC* bbcc;
381 CLG_DEBUG(3,"+ clone_bbcc(BB %#lx, rec %d, fn %s)\n",
382 bb_addr(orig->bb), rec_index, cxt->fn[0]->name);
384 bbcc = new_bbcc(orig->bb);
386 if (rec_index == 0) {
388 /* hash insertion is only allowed if tid or cxt is different */
389 CLG_ASSERT((orig->tid != CLG_(current_tid)) ||
390 (orig->cxt != cxt));
392 bbcc->rec_index = 0;
393 bbcc->cxt = cxt;
394 bbcc->rec_array = new_recursion(cxt->fn[0]->separate_recursions);
395 bbcc->rec_array[0] = bbcc;
397 insert_bbcc_into_hash(bbcc);
399 else {
400 if (CLG_(clo).separate_threads)
401 CLG_ASSERT(orig->tid == CLG_(current_tid));
403 CLG_ASSERT(orig->cxt == cxt);
404 CLG_ASSERT(orig->rec_array);
405 CLG_ASSERT(cxt->fn[0]->separate_recursions > rec_index);
406 CLG_ASSERT(orig->rec_array[rec_index] ==0);
408 /* new BBCC will only have differing recursion level */
409 bbcc->rec_index = rec_index;
410 bbcc->cxt = cxt;
411 bbcc->rec_array = orig->rec_array;
412 bbcc->rec_array[rec_index] = bbcc;
415 /* update list of BBCCs for same BB */
416 bbcc->next_bbcc = orig->bb->bbcc_list;
417 orig->bb->bbcc_list = bbcc;
420 CLG_DEBUGIF(3)
421 CLG_(print_bbcc)(-2, bbcc);
423 HChar *mangled_orig = mangled_cxt(orig->cxt, orig->rec_index);
424 HChar *mangled_bbcc = mangled_cxt(bbcc->cxt, bbcc->rec_index);
425 CLG_DEBUG(2,"- clone_BBCC(%p, %d) for BB %#lx\n"
426 " orig %s\n"
427 " new %s\n",
428 orig, rec_index, bb_addr(orig->bb),
429 mangled_orig,
430 mangled_bbcc);
431 CLG_FREE(mangled_orig);
432 CLG_FREE(mangled_bbcc);
434 CLG_(stat).bbcc_clones++;
436 return bbcc;
441 /* Get a pointer to the cost centre structure for given basic block
442 * address. If created, the BBCC is inserted into the BBCC hash.
443 * Also sets BB_seen_before by reference.
446 BBCC* CLG_(get_bbcc)(BB* bb)
448 BBCC* bbcc;
450 CLG_DEBUG(3, "+ get_bbcc(BB %#lx)\n", bb_addr(bb));
452 bbcc = bb->bbcc_list;
454 if (!bbcc) {
455 bbcc = new_bbcc(bb);
457 /* initialize BBCC */
458 bbcc->cxt = 0;
459 bbcc->rec_array = 0;
460 bbcc->rec_index = 0;
462 bbcc->next_bbcc = bb->bbcc_list;
463 bb->bbcc_list = bbcc;
464 bb->last_bbcc = bbcc;
466 CLG_DEBUGIF(3)
467 CLG_(print_bbcc)(-2, bbcc);
470 CLG_DEBUG(3, "- get_bbcc(BB %#lx): BBCC %p\n",
471 bb_addr(bb), bbcc);
473 return bbcc;
477 /* Callgrind manages its own call stack for each thread.
478 * When leaving a function, a underflow can happen when
479 * Callgrind's tracing was switched on in the middle of
480 * a run, i.e. when Callgrind was not able to trace the
481 * call instruction.
482 * This function tries to reconstruct the original call.
483 * As we know the return address (the address following
484 * the CALL instruction), we can detect the function
485 * we return back to, but the original call site is unknown.
486 * We suppose a call site at return address - 1.
487 * (TODO: other heuristic: lookup info of instrumented BBs).
489 static void handleUnderflow(BB* bb)
491 /* RET at top of call stack */
492 BBCC* source_bbcc;
493 BB* source_bb;
494 Bool seen_before;
495 fn_node* caller;
496 int fn_number;
497 unsigned *pactive;
498 call_entry* call_entry_up;
500 CLG_DEBUG(1," Callstack underflow !\n");
502 /* we emulate an old call from the function we return to
503 * by using (<return address> -1) */
504 source_bb = CLG_(get_bb)(bb_addr(bb)-1, 0, &seen_before);
505 source_bbcc = CLG_(get_bbcc)(source_bb);
507 /* seen_before can be true if RET from a signal handler */
508 if (!seen_before) {
509 source_bbcc->ecounter_sum = CLG_(current_state).collect ? 1 : 0;
511 else if (CLG_(current_state).collect)
512 source_bbcc->ecounter_sum++;
514 /* Force a new top context, will be set active by push_cxt() */
515 CLG_(current_fn_stack).top--;
516 CLG_(current_state).cxt = 0;
517 caller = CLG_(get_fn_node)(bb);
518 CLG_(push_cxt)( caller );
520 if (!seen_before) {
521 /* set rec array for source BBCC: this is at rec level 1 */
522 source_bbcc->rec_array = new_recursion(caller->separate_recursions);
523 source_bbcc->rec_array[0] = source_bbcc;
525 CLG_ASSERT(source_bbcc->cxt == 0);
526 source_bbcc->cxt = CLG_(current_state).cxt;
527 insert_bbcc_into_hash(source_bbcc);
529 CLG_ASSERT(CLG_(current_state).bbcc);
531 /* correct active counts */
532 fn_number = CLG_(current_state).bbcc->cxt->fn[0]->number;
533 pactive = CLG_(get_fn_entry)(fn_number);
534 (*pactive)--;
536 /* This assertion is not correct for reentrant
537 * signal handlers */
538 /* CLG_ASSERT(*pactive == 0); */
540 CLG_(current_state).nonskipped = 0; /* we didn't skip this function */
541 /* back to current context */
542 CLG_(push_cxt)( CLG_(current_state).bbcc->cxt->fn[0] );
543 CLG_(push_call_stack)(source_bbcc, 0, CLG_(current_state).bbcc,
544 (Addr)-1, False);
545 call_entry_up =
546 &(CLG_(current_call_stack).entry[CLG_(current_call_stack).sp -1]);
547 /* assume this call is lasting since last dump or
548 * for a signal handler since it's call */
549 if (CLG_(current_state).sig == 0)
550 CLG_(copy_cost)( CLG_(sets).full, call_entry_up->enter_cost,
551 CLG_(get_current_thread)()->lastdump_cost );
552 else
553 CLG_(zero_cost)( CLG_(sets).full, call_entry_up->enter_cost );
558 * Helper function called at start of each instrumented BB to setup
559 * pointer to costs for current thread/context/recursion level
562 VG_REGPARM(1)
563 void CLG_(setup_bbcc)(BB* bb)
565 BBCC *bbcc, *last_bbcc;
566 Bool call_emulation = False, delayed_push = False, skip = False;
567 Addr sp;
568 BB* last_bb;
569 ThreadId tid;
570 ClgJumpKind jmpkind;
571 Bool isConditionalJump;
572 Int passed = 0, csp;
573 Bool ret_without_call = False;
574 Int popcount_on_return = 1;
576 CLG_DEBUG(3,"+ setup_bbcc(BB %#lx)\n", bb_addr(bb));
578 /* This is needed because thread switches can not reliable be tracked
579 * with callback CLG_(run_thread) only: we have otherwise no way to get
580 * the thread ID after a signal handler returns.
581 * This could be removed again if that bug is fixed in Valgrind.
582 * This is in the hot path but hopefully not to costly.
584 tid = VG_(get_running_tid)();
585 #if 1
586 /* CLG_(switch_thread) is a no-op when tid is equal to CLG_(current_tid).
587 * As this is on the hot path, we only call CLG_(switch_thread)(tid)
588 * if tid differs from the CLG_(current_tid).
590 if (UNLIKELY(tid != CLG_(current_tid)))
591 CLG_(switch_thread)(tid);
592 #else
593 CLG_ASSERT(VG_(get_running_tid)() == CLG_(current_tid));
594 #endif
596 sp = VG_(get_SP)(tid);
597 last_bbcc = CLG_(current_state).bbcc;
598 last_bb = last_bbcc ? last_bbcc->bb : 0;
600 if (last_bb) {
601 passed = CLG_(current_state).jmps_passed;
602 CLG_ASSERT(passed <= last_bb->cjmp_count);
603 jmpkind = last_bb->jmp[passed].jmpkind;
604 isConditionalJump = (passed < last_bb->cjmp_count);
606 if (CLG_(current_state).collect) {
607 if (!CLG_(current_state).nonskipped) {
608 last_bbcc->ecounter_sum++;
609 last_bbcc->jmp[passed].ecounter++;
610 if (!CLG_(clo).simulate_cache) {
611 /* update Ir cost */
612 UInt instr_count = last_bb->jmp[passed].instr+1;
613 CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count;
616 else {
617 /* do not increment exe counter of BBs in skipped functions, as it
618 * would fool dumping code */
619 if (!CLG_(clo).simulate_cache) {
620 /* update Ir cost */
621 UInt instr_count = last_bb->jmp[passed].instr+1;
622 CLG_(current_state).cost[ fullOffset(EG_IR) ] += instr_count;
623 CLG_(current_state).nonskipped->skipped[ fullOffset(EG_IR) ]
624 += instr_count;
629 CLG_DEBUGIF(4) {
630 CLG_(print_execstate)(-2, &CLG_(current_state) );
631 CLG_(print_bbcc_cost)(-2, last_bbcc);
634 else {
635 jmpkind = jk_None;
636 isConditionalJump = False;
639 /* Manipulate JmpKind if needed, only using BB specific info */
641 csp = CLG_(current_call_stack).sp;
643 /* A return not matching the top call in our callstack is a jump */
644 if ( (jmpkind == jk_Return) && (csp >0)) {
645 Int csp_up = csp-1;
646 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp_up]);
648 /* We have a real return if
649 * - the stack pointer (SP) left the current stack frame, or
650 * - SP has the same value as when reaching the current function
651 * and the address of this BB is the return address of last call
652 * (we even allow to leave multiple frames if the SP stays the
653 * same and we find a matching return address)
654 * The latter condition is needed because on PPC, SP can stay
655 * the same over CALL=b(c)l / RET=b(c)lr boundaries
657 if (sp < top_ce->sp) popcount_on_return = 0;
658 else if (top_ce->sp == sp) {
659 while(1) {
660 if (top_ce->ret_addr == bb_addr(bb)) break;
661 if (csp_up>0) {
662 csp_up--;
663 top_ce = &(CLG_(current_call_stack).entry[csp_up]);
664 if (top_ce->sp == sp) {
665 popcount_on_return++;
666 continue;
669 popcount_on_return = 0;
670 break;
673 if (popcount_on_return == 0) {
674 jmpkind = jk_Jump;
675 ret_without_call = True;
679 /* Should this jump be converted to call or pop/call ? */
680 if (( jmpkind != jk_Return) &&
681 ( jmpkind != jk_Call) && last_bb) {
683 /* We simulate a JMP/Cont to be a CALL if
684 * - jump is in another ELF object or section kind
685 * - jump is to first instruction of a function (tail recursion)
687 if (ret_without_call ||
688 /* This is for detection of optimized tail recursion.
689 * On PPC, this is only detected as call when going to another
690 * function. The problem is that on PPC it can go wrong
691 * more easily (no stack frame setup needed)
693 #if defined(VGA_ppc32)
694 (bb->is_entry && (last_bb->fn != bb->fn)) ||
695 #else
696 bb->is_entry ||
697 #endif
698 (last_bb->sect_kind != bb->sect_kind) ||
699 (last_bb->obj->number != bb->obj->number)) {
701 CLG_DEBUG(1," JMP: %s[%s] to %s[%s]%s!\n",
702 last_bb->fn->name, last_bb->obj->name,
703 bb->fn->name, bb->obj->name,
704 ret_without_call?" (RET w/o CALL)":"");
706 if (CLG_(get_fn_node)(last_bb)->pop_on_jump && (csp>0)) {
708 call_entry* top_ce = &(CLG_(current_call_stack).entry[csp-1]);
710 if (top_ce->jcc) {
712 CLG_DEBUG(1," Pop on Jump!\n");
714 /* change source for delayed push */
715 CLG_(current_state).bbcc = top_ce->jcc->from;
716 sp = top_ce->sp;
717 passed = top_ce->jcc->jmp;
718 CLG_(pop_call_stack)();
720 else {
721 CLG_ASSERT(CLG_(current_state).nonskipped != 0);
725 jmpkind = jk_Call;
726 call_emulation = True;
730 if (jmpkind == jk_Call)
731 skip = CLG_(get_fn_node)(bb)->skip;
733 CLG_DEBUGIF(1) {
734 if (isConditionalJump)
735 VG_(printf)("Cond-");
736 switch(jmpkind) {
737 case jk_None: VG_(printf)("Fall-through"); break;
738 case jk_Jump: VG_(printf)("Jump"); break;
739 case jk_Call: VG_(printf)("Call"); break;
740 case jk_Return: VG_(printf)("Return"); break;
741 default: tl_assert(0);
743 VG_(printf)(" %08lx -> %08lx, SP %08lx\n",
744 last_bb ? bb_jmpaddr(last_bb) : 0,
745 bb_addr(bb), sp);
748 /* Handle CALL/RET and update context to get correct BBCC */
750 if (jmpkind == jk_Return) {
752 if ((csp == 0) ||
753 ((CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom) &&
754 ( *(CLG_(current_fn_stack).top-1)==0)) ) {
756 /* On an empty call stack or at a signal separation marker,
757 * a RETURN generates an call stack underflow.
759 handleUnderflow(bb);
760 CLG_(pop_call_stack)();
762 else {
763 CLG_ASSERT(popcount_on_return >0);
764 CLG_(unwind_call_stack)(sp, popcount_on_return);
767 else {
768 Int unwind_count = CLG_(unwind_call_stack)(sp, 0);
769 if (unwind_count > 0) {
770 /* if unwinding was done, this actually is a return */
771 jmpkind = jk_Return;
774 if (jmpkind == jk_Call) {
775 delayed_push = True;
777 csp = CLG_(current_call_stack).sp;
778 if (call_emulation && csp>0)
779 sp = CLG_(current_call_stack).entry[csp-1].sp;
784 /* Change new context if needed, taking delayed_push into account */
785 if ((delayed_push && !skip) || (CLG_(current_state).cxt == 0)) {
786 CLG_(push_cxt)(CLG_(get_fn_node)(bb));
788 CLG_ASSERT(CLG_(current_fn_stack).top > CLG_(current_fn_stack).bottom);
790 /* If there is a fresh instrumented BBCC, assign current context */
791 bbcc = CLG_(get_bbcc)(bb);
792 if (bbcc->cxt == 0) {
793 CLG_ASSERT(bbcc->rec_array == 0);
795 bbcc->cxt = CLG_(current_state).cxt;
796 bbcc->rec_array =
797 new_recursion((*CLG_(current_fn_stack).top)->separate_recursions);
798 bbcc->rec_array[0] = bbcc;
800 insert_bbcc_into_hash(bbcc);
802 else {
803 /* get BBCC with current context */
805 /* first check LRU of last bbcc executed */
807 if (last_bbcc) {
808 bbcc = last_bbcc->lru_next_bbcc;
809 if (bbcc &&
810 ((bbcc->bb != bb) ||
811 (bbcc->cxt != CLG_(current_state).cxt)))
812 bbcc = 0;
814 else
815 bbcc = 0;
817 if (!bbcc)
818 bbcc = lookup_bbcc(bb, CLG_(current_state).cxt);
819 if (!bbcc)
820 bbcc = clone_bbcc(bb->bbcc_list, CLG_(current_state).cxt, 0);
822 bb->last_bbcc = bbcc;
825 /* save for fast lookup */
826 if (last_bbcc)
827 last_bbcc->lru_next_bbcc = bbcc;
829 if ((*CLG_(current_fn_stack).top)->separate_recursions >1) {
830 UInt level, idx;
831 fn_node* top = *(CLG_(current_fn_stack).top);
833 level = *CLG_(get_fn_entry)(top->number);
835 if (delayed_push && !skip) {
836 if (CLG_(clo).skip_direct_recursion) {
837 /* a call was detected, which means that the source BB != 0 */
838 CLG_ASSERT(CLG_(current_state).bbcc != 0);
839 /* only increment rec. level if called from different function */
840 if (CLG_(current_state).bbcc->cxt->fn[0] != bbcc->cxt->fn[0])
841 level++;
843 else level++;
845 if (level> top->separate_recursions)
846 level = top->separate_recursions;
848 if (level == 0) {
849 /* can only happen if instrumentation just was switched on */
850 level = 1;
851 *CLG_(get_fn_entry)(top->number) = 1;
854 idx = level -1;
855 if (bbcc->rec_array[idx])
856 bbcc = bbcc->rec_array[idx];
857 else
858 bbcc = clone_bbcc(bbcc, CLG_(current_state).cxt, idx);
860 CLG_ASSERT(bbcc->rec_array[bbcc->rec_index] == bbcc);
863 if (delayed_push) {
864 if (!skip && CLG_(current_state).nonskipped) {
865 /* a call from skipped to nonskipped */
866 CLG_(current_state).bbcc = CLG_(current_state).nonskipped;
867 /* FIXME: take the real passed count from shadow stack */
868 passed = CLG_(current_state).bbcc->bb->cjmp_count;
870 CLG_(push_call_stack)(CLG_(current_state).bbcc, passed,
871 bbcc, sp, skip);
874 if (CLG_(clo).collect_jumps && (jmpkind == jk_Jump)) {
876 /* Handle conditional jumps followed, i.e. trace arcs
877 * This uses JCC structures, too */
879 jCC* jcc = CLG_(get_jcc)(last_bbcc, passed, bbcc);
880 CLG_ASSERT(jcc != 0);
881 // Change from default, and check if already changed
882 if (jcc->jmpkind == jk_Call)
883 jcc->jmpkind = isConditionalJump ? jk_CondJump : jk_Jump;
884 else {
885 // FIXME: Why can this fail?
886 // CLG_ASSERT(jcc->jmpkind == jmpkind);
889 jcc->call_counter++;
890 if (isConditionalJump)
891 CLG_(stat).jcnd_counter++;
892 else
893 CLG_(stat).jump_counter++;
896 CLG_(current_state).bbcc = bbcc;
897 /* Even though this will be set in instrumented code directly before
898 * side exits, it needs to be set to 0 here in case an exception
899 * happens in first instructions of the BB */
900 CLG_(current_state).jmps_passed = 0;
901 // needed for log_* handlers called in this BB
902 CLG_(bb_base) = bb->obj->offset + bb->offset;
903 CLG_(cost_base) = bbcc->cost;
905 CLG_DEBUGIF(1) {
906 VG_(printf)(" ");
907 CLG_(print_bbcc_fn)(bbcc);
908 VG_(printf)("\n");
911 CLG_DEBUG(3,"- setup_bbcc (BB %#lx): Cost %p (Len %u), Instrs %u (Len %u)\n",
912 bb_addr(bb), bbcc->cost, bb->cost_count,
913 bb->instr_count, bb->instr_len);
914 CLG_DEBUGIF(3)
915 CLG_(print_cxt)(-8, CLG_(current_state).cxt, bbcc->rec_index);
916 CLG_DEBUG(3,"\n");
918 CLG_(stat).bb_executions++;