1 /* Copyright (C) 2021 Free Software Foundation, Inc.
4 This file is part of GNU Binutils.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
26 #include "CallStack.h"
27 #include "DbeSession.h"
29 #include "DbeLinkList.h"
30 #include "Experiment.h"
31 #include "Exp_Layout.h"
33 #include "LoadObject.h"
36 Descendants::Descendants ()
39 limit
= sizeof (first_data
) / sizeof (CallStackNode
*);
43 Descendants::~Descendants ()
45 if (data
!= first_data
)
50 Descendants::find (Histable
*hi
, int *index
)
54 for (int right
= cnt
- 1; left
<= right
;)
56 int ind
= (left
+ right
) / 2;
57 CallStackNode
*node
= data
[ind
];
58 Histable
*instr
= node
->get_instr ();
65 if (instr
->id
< hi
->id
)
76 Descendants::append (CallStackNode
* item
)
85 Descendants::insert (int ind
, CallStackNode
* item
)
87 CallStackNode
**old_data
= data
;
89 if (old_cnt
+ 1 >= limit
)
91 int new_limit
= (limit
== 0) ? DELTA
: limit
* 2;
92 CallStackNode
**new_data
= (CallStackNode
**) malloc (new_limit
* sizeof (CallStackNode
*));
93 for (int i
= 0; i
< ind
; i
++)
94 new_data
[i
] = old_data
[i
];
96 for (int i
= ind
; i
< old_cnt
; i
++)
97 new_data
[i
+ 1] = old_data
[i
];
100 if (old_data
!= first_data
)
105 for (int i
= ind
; i
< old_cnt
; i
++)
106 old_data
[i
+ 1] = old_data
[i
];
107 old_data
[ind
] = item
;
113 * Private implementation of CallStack interface
116 // When performing pipeline optimization on resolve_frame_info + add_stack
117 // cstk_ctx structure contains the state (or context) for one iteration to pass on
118 // from Phase 2 to Phase 3 (More details in Experiment.cc)
119 class CallStackP
: public CallStack
122 CallStackP (Experiment
*exp
);
124 virtual ~CallStackP ();
126 virtual void add_stack (DataDescriptor
*dDscr
, long idx
, FramePacket
*frp
, cstk_ctx_chunk
*cstCtxChunk
);
127 virtual void *add_stack (Vector
<Histable
*> *objs
);
128 virtual CallStackNode
*get_node (int n
);
129 virtual void print (FILE *);
133 static const int CHUNKSZ
= 16384;
135 Experiment
*experiment
;
137 CallStackNode
*jvm_node
;
140 CallStackNode
**chunks
;
141 Map
<uint64_t, CallStackNode
*> *cstackMap
;
144 CallStackNode
*add_stack (long start
, long end
, Vector
<Histable
*> *objs
, CallStackNode
*myRoot
);
145 CallStackNode
*new_Node (CallStackNode
*, Histable
*);
146 CallStackNode
*find_preg_stack (uint64_t);
147 // objs are in the root..leaf order
148 void *add_stack_d (Vector
<Histable
*> *objs
);
149 void add_stack_java (DataDescriptor
*dDscr
, long idx
, FramePacket
*frp
, hrtime_t tstamp
, uint32_t thrid
, Vector
<DbeInstr
*>* natpcs
, bool natpc_added
, cstk_ctx_chunk
*cstCtxChunk
);
150 void add_stack_java_epilogue (DataDescriptor
*dDscr
, long idx
, FramePacket
*frp
, hrtime_t tstamp
, uint32_t thrid
, Vector
<DbeInstr
*>* natpcs
, Vector
<Histable
*>* jpcs
, bool natpc_added
);
152 // Adjust HW counter event to find better trigger PC, etc.
153 DbeInstr
*adjustEvent (DbeInstr
*leafPC
, DbeInstr
* candPC
,
154 Vaddr
&eventEA
, int abst_type
);
155 Vector
<DbeInstr
*> *natpcsP
;
156 Vector
<Histable
*> *jpcsP
;
159 CallStackP::CallStackP (Experiment
*exp
)
165 cstackMap
= new CacheMap
<uint64_t, CallStackNode
*>;
166 cstackLock
= new DbeLock ();
167 Function
*total
= dbeSession
->get_Total_Function ();
168 root
= new_Node (0, total
->find_dbeinstr (0, 0));
174 CallStackP::~CallStackP ()
179 for (int i
= 0; i
< nodes
; i
++)
181 CallStackNode
*node
= get_node (i
);
182 node
->~CallStackNode ();
184 for (int i
= 0; i
< nchunks
; i
++)
190 destroy_map (CallStackNode
*, cstackMap
);
194 CallStackP::new_Node (CallStackNode
*anc
, Histable
*pcval
)
196 // cstackLock->aquireLock(); // Caller already locked it
197 if (nodes
>= nchunks
* CHUNKSZ
)
199 CallStackNode
**old_chunks
= chunks
;
202 // Reallocate Node chunk array
203 chunks
= (CallStackNode
**) malloc (nchunks
* sizeof (CallStackNode
*));
204 for (int i
= 0; i
< nchunks
- 1; i
++)
205 chunks
[i
] = old_chunks
[i
];
207 // Allocate new chunk for nodes.
208 chunks
[nchunks
- 1] = (CallStackNode
*) malloc (CHUNKSZ
* sizeof (CallStackNode
));
211 CallStackNode
*node
= get_node (nodes
- 1);
212 new (node
) CallStackNode (anc
, pcval
);
213 // cstackLock->releaseLock();
218 CallStackP::find_preg_stack (uint64_t prid
)
220 DataView
*dview
= experiment
->getOpenMPdata ();
221 dview
->sort (PROP_CPRID
);
223 tval
.setUINT64 (prid
);
224 long idx
= dview
->getIdxByVals (&tval
, DataView::REL_EQ
);
227 CallStackNode
*node
= (CallStackNode
*) dview
->getObjValue (PROP_USTACK
, idx
);
230 uint64_t pprid
= dview
->getLongValue (PROP_PPRID
, idx
);
233 void *nat_stack
= dview
->getObjValue (PROP_MSTACK
, idx
);
234 Vector
<Histable
*> *pcs
= getStackPCs (nat_stack
);
236 // Find the bottom frame
241 for (btm
= 0; btm
< pcs
->size (); btm
++)
243 hist
= pcs
->fetch (btm
);
244 if (hist
->get_type () == Histable::INSTR
)
245 instr
= (DbeInstr
*) hist
;
247 instr
= (DbeInstr
*) hist
->convertto (Histable::INSTR
);
248 LoadObject
*lo
= instr
->func
->module
->loadobject
;
251 if (lo
->flags
& SEG_FLAG_OMP
)
254 else if (!(lo
->flags
& SEG_FLAG_OMP
))
258 // Find the top frame
259 dview
->sort (PROP_CPRID
);
261 tval
.setUINT64 (pprid
);
262 long pidx
= dview
->getIdxByVals (&tval
, DataView::REL_EQ
);
263 if (pidx
< 0) // No parent. Process the entire nat_stack
264 top
= pcs
->size () - 1;
267 uint32_t thrid
= (uint32_t) dview
->getIntValue (PROP_THRID
, idx
);
268 uint32_t pthrid
= (uint32_t) dview
->getIntValue (PROP_THRID
, pidx
);
271 // Parent is on a different stack.
272 // Process the entire nat_stack. Skip libthread.
273 for (top
= pcs
->size () - 1; top
>= 0; top
--)
275 hist
= pcs
->fetch (top
);
276 if (hist
->get_type () == Histable::INSTR
)
277 instr
= (DbeInstr
*) hist
;
279 instr
= (DbeInstr
*) hist
->convertto (Histable::INSTR
);
280 if (instr
->func
->module
->loadobject
->flags
& SEG_FLAG_OMP
)
283 if (top
< 0) // None found. May be incomplete call stack (x86)
284 top
= pcs
->size () - 1;
288 // Parent is on the same stack. Find match.
289 top
= pcs
->size () - 1;
290 void *pnat_stack
= dview
->getObjValue (PROP_MSTACK
, pidx
);
291 Vector
<Histable
*> *ppcs
= getStackPCs (pnat_stack
);
292 for (int ptop
= ppcs
->size () - 1; top
>= 0 && ptop
>= 0;
295 if (pcs
->fetch (top
) != ppcs
->fetch (ptop
))
302 // Process the found range
303 Vector
<Histable
*> *upcs
= new Vector
<Histable
*>(128);
304 for (int i
= btm
; i
<= top
; ++i
)
306 hist
= (DbeInstr
*) pcs
->fetch (i
);
307 if (hist
->get_type () == Histable::INSTR
)
308 instr
= (DbeInstr
*) hist
;
310 instr
= (DbeInstr
*) hist
->convertto (Histable::INSTR
);
312 if (instr
->func
->module
->loadobject
->flags
& SEG_FLAG_OMP
)
313 // Skip all frames from libmtsk
315 upcs
->append (instr
);
318 node
= find_preg_stack (pprid
);
321 upcs
->append (node
->instr
);
322 node
= node
->ancestor
;
324 node
= (CallStackNode
*) add_stack (upcs
);
325 dview
->setObjValue (PROP_USTACK
, idx
, node
);
330 #define JNI_MARKER -3
332 // This is one iteration if the third stage of
333 // resolve_frame_info + add_stack pipeline. Works on building the java
336 CallStackP::add_stack_java (DataDescriptor
*dDscr
, long idx
, FramePacket
*frp
,
337 hrtime_t tstamp
, uint32_t thrid
,
338 Vector
<DbeInstr
*>* natpcs
, bool natpc_added
,
339 cstk_ctx_chunk
*cstCtxChunk
)
341 Vector
<Histable
*> *jpcs
= NULL
;
342 cstk_ctx
*cstctx
= NULL
;
343 if (cstCtxChunk
!= NULL
)
345 cstctx
= cstCtxChunk
->cstCtxAr
[idx
% CSTCTX_CHUNK_SZ
];
351 // this is when we are not doing the pipeline optimization
352 // Temporary array for resolved addresses
353 // [leaf_pc .. root_pc] == [0..stack_size-1]
354 // Leave room for a possible "truncated" frame
356 jpcsP
= new Vector
<Histable
*>;
362 // Construct the user stack
364 // Construct Java user stack
365 int jstack_size
= frp
->stackSize (true);
368 // jpcs = new Vector<Histable*>( jstack_size );
369 if (frp
->isTruncatedStack (true))
371 Function
*truncf
= dbeSession
->getSpecialFunction (DbeSession::TruncatedStackFunc
);
372 jpcs
->append (truncf
->find_dbeinstr (0, 0));
375 int nind
= natpcs
->size () - 1; // first native frame
376 for (int jind
= jstack_size
- 1; jind
>= 0; jind
--)
378 bool jleaf
= (jind
== 0); // is current java frame a leaf?
379 Vaddr mid
= frp
->getMthdFromStack (jind
);
380 int bci
= frp
->getBciFromStack (jind
);
381 DbeInstr
*cur_instr
= experiment
->map_jmid_to_PC (mid
, bci
, tstamp
);
382 jpcs
->append (cur_instr
);
383 if (bci
== JNI_MARKER
)
385 JMethod
*j_method
= (JMethod
*) cur_instr
->func
;
386 // Find matching native function on the native stack
388 for (; nind
>= 0; nind
--)
390 DbeInstr
*nat_addr
= natpcs
->fetch (nind
);
393 Function
*nat_func
= nat_addr
->func
;
394 if (!found
&& j_method
->jni_match (nat_func
))
398 // XXX omazur: the following will skip JNI native method
399 // implemented in JVM itself.
400 // If we are back in JVM switch to processing Java
401 // frames if there are any.
402 if ((nat_func
->module
->loadobject
->flags
& SEG_FLAG_JVM
) && !jleaf
)
404 jpcs
->append (nat_addr
);
410 add_stack_java_epilogue (dDscr
, idx
, frp
, tstamp
, thrid
, natpcs
, jpcs
, natpc_added
);
413 // This is one iteration if the fourth stage of
414 // resolve_frame_info + add_stack pipeline.
415 // It adds the native and java stacks to the stackmap
418 CallStackP::add_stack_java_epilogue (DataDescriptor
*dDscr
, long idx
, FramePacket
*frp
, hrtime_t tstamp
, uint32_t thrid
, Vector
<DbeInstr
*>* natpcs
, Vector
<Histable
*> *jpcs
, bool natpc_added
)
420 CallStackNode
*node
= NULL
;
423 node
= (CallStackNode
*) add_stack ((Vector
<Histable
*>*)natpcs
);
424 dDscr
->setObjValue (PROP_MSTACK
, idx
, node
);
425 dDscr
->setObjValue (PROP_XSTACK
, idx
, node
);
426 dDscr
->setObjValue (PROP_USTACK
, idx
, node
);
429 int jstack_size
= frp
->stackSize (true);
433 node
= (CallStackNode
*) add_stack_d (jpcs
);
435 node
= (CallStackNode
*) dDscr
->getObjValue (PROP_USTACK
, idx
);
436 dDscr
->setObjValue (PROP_USTACK
, idx
, node
);
437 Function
*func
= (Function
*) node
->instr
->convertto (Histable::FUNCTION
);
438 if (func
!= dbeSession
->get_JUnknown_Function ())
439 dDscr
->setObjValue (PROP_XSTACK
, idx
, node
);
442 JThread
*jthread
= experiment
->map_pckt_to_Jthread (thrid
, tstamp
);
443 if (jthread
== JTHREAD_NONE
&& jstack_size
!= 0 && node
!= NULL
)
445 Function
*func
= (Function
*) node
->instr
->convertto (Histable::FUNCTION
);
446 if (func
!= dbeSession
->get_JUnknown_Function ())
447 jthread
= JTHREAD_DEFAULT
;
449 dDscr
->setObjValue (PROP_JTHREAD
, idx
, jthread
);
450 if (jthread
== JTHREAD_NONE
|| (jthread
!= JTHREAD_DEFAULT
&& jthread
->is_system ()))
452 if (jvm_node
== NULL
)
454 Function
*jvm
= dbeSession
->get_jvm_Function ();
457 jvm_node
= new_Node (root
, jvm
->find_dbeinstr (0, 0));
458 CommonPacket::jvm_overhead
= jvm_node
;
461 dDscr
->setObjValue (PROP_USTACK
, idx
, jvm_node
);
465 // This is one iteration of the 2nd stage of
466 // resolve_frame_info + add_stack() pipeline. Builds the stack for a given framepacket.
467 // When pipeline optimization is turnd off, cstctxchunk passed is NULL
469 CallStackP::add_stack (DataDescriptor
*dDscr
, long idx
, FramePacket
*frp
,
470 cstk_ctx_chunk
* cstCtxChunk
)
472 Vector
<DbeInstr
*> *natpcs
= NULL
;
473 cstk_ctx
*cstctx
= NULL
;
474 int stack_size
= frp
->stackSize ();
475 if (cstCtxChunk
!= NULL
)
477 cstctx
= cstCtxChunk
->cstCtxAr
[idx
% CSTCTX_CHUNK_SZ
];
478 natpcs
= cstctx
->natpcs
;
483 // this is when we are not doing the pipeline optimization
484 // Temporary array for resolved addresses
485 // [leaf_pc .. root_pc] == [0..stack_size-1]
486 // Leave room for a possible "truncated" frame
488 natpcsP
= new Vector
<DbeInstr
*>;
494 hrtime_t tstamp
= (hrtime_t
) dDscr
->getLongValue (PROP_TSTAMP
, idx
);
495 uint32_t thrid
= (uint32_t) dDscr
->getIntValue (PROP_THRID
, idx
);
505 Vaddr o7_to_skip
= 0;
506 for (int index
= 0; index
< stack_size
; index
++)
508 if (frp
->isLeafMark (index
))
514 if (state
== SKIP_O7
)
516 // remember this bad o7 value since OMP might not recognize it
517 o7_to_skip
= frp
->getFromStack (index
);
522 Vaddr va
= frp
->getFromStack (index
);
523 DbeInstr
*cur_instr
= experiment
->map_Vaddr_to_PC (va
, tstamp
);
524 #if ARCH(Intel)// TBR? FIXUP_XXX_SPARC_LINUX: switch should be on experiment ARCH, not dbe ARCH
525 // We need to adjust return addresses on intel
526 // in order to attribute inclusive metrics to
527 // proper call instructions.
528 if (experiment
->exp_maj_version
<= 9)
529 if (!leaf
&& cur_instr
->addr
!= 0)
530 cur_instr
= cur_instr
->func
->find_dbeinstr (0, cur_instr
->addr
- 1);
533 // Skip PC's from PLT, update leaf and state accordingly
534 if ((cur_instr
->func
->flags
& FUNC_FLAG_PLT
)
535 && (leaf
|| state
== CHECK_O7
))
537 if (state
== CHECK_O7
)
542 if (state
== CHECK_O7
)
545 uint64_t saddr
= cur_instr
->func
->save_addr
;
546 if (cur_instr
->func
->isOutlineFunction
)
547 // outline functions assume 'save' instruction
548 // Note: they accidentally have saddr == FUNC_ROOT
550 else if (saddr
== FUNC_ROOT
)
552 // If a function is statically determined as a root
553 // but dynamically appears not, don't discard o7.
554 // One such case is __misalign_trap_handler on sparcv9.
558 else if (saddr
!= FUNC_NO_SAVE
&& cur_instr
->addr
> saddr
)
561 else if (state
== USE_O7
)
564 if (cur_instr
->flags
& PCInvlFlag
)
569 Vaddr evpc
= (Vaddr
) dDscr
->getLongValue (PROP_VIRTPC
, idx
);
571 && !(index
> 0 && frp
->isLeafMark (index
- 1)
572 && evpc
== (Vaddr
) (-1)))
574 /* contains hwcprof info */
575 cur_instr
->func
->module
->read_hwcprof_info ();
577 // complete ABS validation of candidate eventPC/eventEA
578 // and correction/adjustment of collected callstack leaf PC
579 DbeInstr
*candPC
= experiment
->map_Vaddr_to_PC (evpc
, tstamp
);
580 Vaddr vaddr
= (Vaddr
) dDscr
->getLongValue (PROP_VADDR
, idx
);
581 Vaddr tmp_vaddr
= vaddr
;
583 uint32_t tag
= dDscr
->getIntValue (PROP_HWCTAG
, idx
);
584 if (tag
< 0 || tag
>= MAX_HWCOUNT
)
585 abst_type
= ABST_NOPC
;
587 abst_type
= experiment
->coll_params
.hw_tpc
[tag
];
589 // We need to adjust addresses for ABST_EXACT_PEBS_PLUS1
590 // (Nehalem/SandyBridge PEBS identifies PC+1, not PC)
591 if (abst_type
== ABST_EXACT_PEBS_PLUS1
&& candPC
->addr
!= 0)
592 candPC
= candPC
->func
->find_dbeinstr (0, candPC
->func
->find_previous_addr (candPC
->addr
));
594 cur_instr
= adjustEvent (cur_instr
, candPC
, tmp_vaddr
, abst_type
);
595 if (vaddr
!= tmp_vaddr
)
597 if (tmp_vaddr
< ABS_CODE_RANGE
)
599 /* post processing backtrack failed */
600 dDscr
->setValue (PROP_VADDR
, idx
, tmp_vaddr
);
601 dDscr
->setValue (PROP_PADDR
, idx
, ABS_NULL
);
602 /* hwcp->eventVPC = xxxxx leave eventPC alone,
603 * or can we set it to leafpc? */
604 dDscr
->setValue (PROP_PHYSPC
, idx
, ABS_NULL
);
608 /* internal error: why would post-processing modify vaddr? */
609 dDscr
->setValue (PROP_PADDR
, idx
, (Vaddr
) (-1));
610 dDscr
->setValue (PROP_PHYSPC
, idx
, (Vaddr
) (-1));
615 natpcs
->append (cur_instr
);
618 // A hack to deceive the user into believing that outlined code
619 // is called from the base function
620 DbeInstr
*drvd
= cur_instr
->func
->derivedNode
;
622 natpcs
->append (drvd
);
624 if (frp
->isTruncatedStack ())
626 Function
*truncf
= dbeSession
->getSpecialFunction (DbeSession::TruncatedStackFunc
);
627 natpcs
->append (truncf
->find_dbeinstr (0, 0));
629 else if (frp
->isFailedUnwindStack ())
631 Function
*funwf
= dbeSession
->getSpecialFunction (DbeSession::FailedUnwindFunc
);
632 natpcs
->append (funwf
->find_dbeinstr (0, 0));
635 CallStackNode
*node
= (CallStackNode
*) add_stack ((Vector
<Histable
*>*)natpcs
);
636 dDscr
->setObjValue (PROP_MSTACK
, idx
, node
);
637 dDscr
->setObjValue (PROP_XSTACK
, idx
, node
);
638 dDscr
->setObjValue (PROP_USTACK
, idx
, node
);
641 stack_size
= frp
->ompstack
->size ();
642 if (stack_size
> 0 || frp
->omp_state
== OMP_IDLE_STATE
)
645 Vector
<Histable
*> *omppcs
= new Vector
<Histable
*>(stack_size
);
646 Vector
<Histable
*> *ompxpcs
= new Vector
<Histable
*>(stack_size
);
647 switch (frp
->omp_state
)
658 func
= dbeSession
->get_OMP_Function (frp
->omp_state
);
659 DbeInstr
*instr
= func
->find_dbeinstr (0, 0);
660 omppcs
->append (instr
);
661 ompxpcs
->append (instr
);
665 Vector
<Vaddr
> *stck
= frp
->ompstack
;
667 for (int index
= 0; index
< stack_size
; index
++)
669 if (stck
->fetch (index
) == SP_LEAF_CHECK_MARKER
)
674 if (state
== SKIP_O7
)
680 // The OMP stack might not have enough information to know to discard a bad o7.
681 // So just remember what the native stack skipped.
682 if (o7_to_skip
== stck
->fetch (index
))
687 Vaddr va
= stck
->fetch (index
);
688 DbeInstr
*cur_instr
= experiment
->map_Vaddr_to_PC (va
, tstamp
);
690 // Skip PC's from PLT, update leaf and state accordingly
691 if ((cur_instr
->func
->flags
& FUNC_FLAG_PLT
) &&
692 (leaf
|| state
== CHECK_O7
))
694 if (state
== CHECK_O7
)
699 if (state
== CHECK_O7
)
702 uint64_t saddr
= cur_instr
->func
->save_addr
;
703 if (cur_instr
->func
->isOutlineFunction
)
704 // outline functions assume 'save' instruction
705 // Note: they accidentally have saddr == FUNC_ROOT
707 else if (saddr
== FUNC_ROOT
)
709 // If a function is statically determined as a root
710 // but dynamically appears not, don't discard o7.
711 // One such case is __misalign_trap_handler on sparcv9.
715 else if (saddr
!= FUNC_NO_SAVE
&& cur_instr
->addr
> saddr
)
718 else if (state
== USE_O7
)
721 if (cur_instr
->flags
& PCInvlFlag
)
725 DbeLine
*dbeline
= (DbeLine
*) cur_instr
->convertto (Histable::LINE
);
726 if (cur_instr
->func
->usrfunc
)
728 dbeline
= dbeline
->sourceFile
->find_dbeline (cur_instr
->func
->usrfunc
, dbeline
->lineno
);
729 omppcs
->append (dbeline
);
731 else if (dbeline
->lineno
> 0)
732 omppcs
->append (dbeline
);
734 omppcs
->append (cur_instr
);
735 if (dbeline
->is_set (DbeLine::OMPPRAGMA
) &&
736 frp
->omp_state
== OMP_WORK_STATE
)
737 dDscr
->setValue (PROP_OMPSTATE
, idx
, OMP_OVHD_STATE
);
738 ompxpcs
->append (cur_instr
);
741 if (frp
->omptruncated
== SP_TRUNC_STACK_MARKER
)
743 func
= dbeSession
->getSpecialFunction (DbeSession::TruncatedStackFunc
);
744 DbeInstr
*instr
= func
->find_dbeinstr (0, 0);
745 omppcs
->append (instr
);
746 ompxpcs
->append (instr
);
748 else if (frp
->omptruncated
== SP_FAILED_UNWIND_MARKER
)
750 func
= dbeSession
->getSpecialFunction (DbeSession::FailedUnwindFunc
);
751 DbeInstr
*instr
= func
->find_dbeinstr (0, 0);
752 omppcs
->append (instr
);
753 ompxpcs
->append (instr
);
756 // User model call stack
757 node
= (CallStackNode
*) add_stack (omppcs
);
758 dDscr
->setObjValue (PROP_USTACK
, idx
, node
);
762 node
= (CallStackNode
*) add_stack (ompxpcs
);
763 dDscr
->setObjValue (PROP_XSTACK
, idx
, node
);
765 dDscr
->setObjValue (PROP_JTHREAD
, idx
, JTHREAD_DEFAULT
);
770 if (frp
->omp_cprid
|| frp
->omp_state
)
772 DataView
*dview
= experiment
->getOpenMPdata ();
775 // It appears we may get OMP_SERL_STATE from a passive libmtsk
776 dDscr
->setObjValue (PROP_JTHREAD
, idx
, JTHREAD_DEFAULT
);
779 if (dview
->getDataDescriptor () == dDscr
)
781 // Don't process the user stack for OpenMP fork events yet
782 dDscr
->setObjValue (PROP_USTACK
, idx
, (void*) NULL
);
783 dDscr
->setObjValue (PROP_JTHREAD
, idx
, JTHREAD_DEFAULT
);
786 Vector
<Histable
*> *omppcs
= new Vector
<Histable
*>(stack_size
);
788 // Construct OMP user stack
789 // Find the bottom frame
791 switch (frp
->omp_state
)
795 Function
*func
= dbeSession
->get_OMP_Function (frp
->omp_state
);
796 omppcs
->append (func
->find_dbeinstr (0, 0));
797 // XXX: workaround for inconsistency between OMP_IDLE_STATE
798 // and omp_cprid != 0
800 btm
= natpcs
->size ();
811 Function
*func
= dbeSession
->get_OMP_Function (frp
->omp_state
);
812 omppcs
->append (func
->find_dbeinstr (0, 0));
814 for (btm
= 0; btm
< natpcs
->size (); btm
++)
816 LoadObject
*lo
= natpcs
->fetch (btm
)->func
->module
->loadobject
;
819 if (lo
->flags
& SEG_FLAG_OMP
)
822 else if (!(lo
->flags
& SEG_FLAG_OMP
))
834 // Find the top frame
836 switch (frp
->omp_state
)
842 dview
->sort (PROP_CPRID
);
844 tval
.setUINT64 (frp
->omp_cprid
);
845 long pidx
= dview
->getIdxByVals (&tval
, DataView::REL_EQ
);
846 if (pidx
< 0) // No parent. Process the entire nat_stack
847 top
= natpcs
->size () - 1;
850 uint32_t pthrid
= (uint32_t) dview
->getIntValue (PROP_THRID
, pidx
);
853 // Parent is on a different stack.
854 // Process the entire nat_stack. Skip libthread.
855 for (top
= natpcs
->size () - 1; top
>= 0; top
--)
857 DbeInstr
*instr
= natpcs
->fetch (top
);
858 if (instr
->func
->module
->loadobject
->flags
& SEG_FLAG_OMP
)
861 if (top
< 0) // None found. May be incomplete call stack
862 top
= natpcs
->size () - 1;
866 // Parent is on the same stack. Find match.
867 top
= natpcs
->size () - 1;
868 void *pnat_stack
= dview
->getObjValue (PROP_MSTACK
, pidx
);
869 Vector
<Histable
*> *ppcs
= getStackPCs (pnat_stack
);
870 for (int ptop
= ppcs
->size () - 1; top
>= 0 && ptop
>= 0;
873 if (natpcs
->fetch (top
) != ppcs
->fetch (ptop
))
879 // If no frames are found for Barrier/Reduction save at least one
880 if ((frp
->omp_state
== OMP_RDUC_STATE
881 || frp
->omp_state
== OMP_IBAR_STATE
882 || frp
->omp_state
== OMP_EBAR_STATE
)
883 && top
< btm
&& btm
< natpcs
->size ())
887 for (int i
= btm
; i
<= top
; ++i
)
889 DbeInstr
*instr
= natpcs
->fetch (i
);
890 if (instr
->func
->module
->loadobject
->flags
& SEG_FLAG_OMP
)
891 continue; // Skip all frames from libmtsk
892 omppcs
->append (instr
);
894 node
= find_preg_stack (frp
->omp_cprid
);
897 omppcs
->append (node
->instr
);
898 node
= node
->ancestor
;
900 node
= (CallStackNode
*) add_stack (omppcs
);
901 dDscr
->setObjValue (PROP_USTACK
, idx
, node
);
903 dDscr
->setObjValue (PROP_JTHREAD
, idx
, JTHREAD_DEFAULT
);
907 // Construct Java user stack
908 add_stack_java (dDscr
, idx
, frp
, tstamp
, thrid
, natpcs
, true, NULL
);
911 // adjustment of leafPC/eventVA for XHWC packets with candidate eventPC
912 // Called from CallStack during initial processing of the events
914 CallStackP::adjustEvent (DbeInstr
*leafPC
, DbeInstr
*candPC
, Vaddr
&eventVA
,
917 // increment counter of dataspace events
918 experiment
->dsevents
++;
920 if (abst_type
== ABST_EXACT_PEBS_PLUS1
)
922 else if (abst_type
== ABST_EXACT
)
928 /* precise backtracking */
929 /* assume within 1 instruction of leaf (this could be checked here) */
930 // no change to eventVA or candPC
933 Function
*func
= leafPC
->func
;
934 unsigned int bt_entries
= func
->module
->bTargets
.size ();
935 DbeInstr
*bestPC
= NULL
;
937 // bt == branch target (potential destination of a branch
939 { // no XHWCprof info for this module
941 experiment
->dsnoxhwcevents
++;
943 // see if event is to be processed anyway
944 if (!dbeSession
->check_ignore_no_xhwcprof ())
946 // Don't ignore error
947 // XXX -- set error code in event VA -- replace with other mechanism
948 if (eventVA
> ABS_CODE_RANGE
)
950 eventVA
|= ABS_NO_CTI_INFO
; // => effective address can't be validated
951 bestPC
= leafPC
; // => no PC correction possible
954 bestPC
= candPC
; // assume the event valid
958 // we have the info to verify the backtracking
960 int bt_entry
= bt_entries
;
961 uint64_t leafPC_offset
= func
->img_offset
+ leafPC
->addr
;
962 uint64_t candPC_offset
= candPC
->func
->img_offset
+ candPC
->addr
;
966 bt
= func
->module
->bTargets
.fetch (bt_entry
);
967 /* bts seem to be sorted by offset, smallest to largest */
969 while (bt_entry
> 0 && bt
->offset
> leafPC_offset
);
970 /* if bt_entry == 0, all items have been checked */
972 if (bt
->offset
> leafPC_offset
)
973 { /* XXXX isn't is possible that all bt's are after leafPC_offset? */
974 bestPC
= leafPC
; // actual event PC can't be determined
975 if (eventVA
> ABS_CODE_RANGE
)
977 eventVA
|= ABS_INFO_FAILED
; // effective address can't be validated
979 else if (bt
->offset
> candPC_offset
)
981 // use synthetic PC corresponding to bTarget
982 bestPC
= func
->find_dbeinstr (PCTrgtFlag
, bt
->offset
- func
->img_offset
);
983 if (eventVA
> ABS_CODE_RANGE
)
985 eventVA
|= ABS_CTI_TARGET
; // effective address can't be validated
988 bestPC
= candPC
; // accept provided virtual address as valid
994 CallStackP::add_stack_d (Vector
<Histable
*> *objs
)
998 for (int i
= 0, j
= objs
->size () - 1; i
< j
; ++i
, --j
)
1000 return add_stack (objs
);
1003 CallStackNode::CallStackNode (CallStackNode
*_ancestor
, Histable
*_instr
)
1005 ancestor
= _ancestor
;
1010 CallStackNode::~CallStackNode () { }
1013 CallStackNode::compare (long start
, long end
, Vector
<Histable
*> *objs
, CallStackNode
*mRoot
)
1015 CallStackNode
*p
= this;
1016 for (long i
= start
; i
< end
; i
++, p
= p
->get_ancestor ())
1017 if (p
== NULL
|| p
->get_instr () != objs
->get (i
))
1023 CallStackNode::dump ()
1027 for (CallStackNode
*p
= this; p
; p
= p
->get_ancestor ())
1029 fprintf (stderr
, NTXT ("%.*s 0x%08llx id=0x%08llx %s\n"), sz
, s
,
1030 (long long) p
, (long long) p
->get_instr ()->id
,
1031 STR (p
->get_instr ()->get_name ()));
1037 long total_calls_add_stack
, total_stacks
, total_nodes
, call_stack_size
[201];
1040 CallStackP::add_stack (Vector
<Histable
*> *objs
)
1043 uint64_t hash
= objs
->size ();
1044 for (long i
= objs
->size () - 1; i
>= 0; --i
)
1045 hash
^= (unsigned long long) objs
->get (i
);
1047 uint64_t key
= hash
? hash
: 1;
1048 CallStackNode
*node
= cstackMap
->get (key
);
1050 if (DUMP_CALL_STACK
)
1052 total_calls_add_stack
++;
1053 call_stack_size
[objs
->size () > 200 ? 200 : objs
->size ()]++;
1054 Dprintf (DUMP_CALL_STACK
,
1055 "add_stack: %lld size=%lld key=0x%08llx cashNode=0x%08llx\n",
1056 (long long) total_calls_add_stack
, (long long) objs
->size (),
1057 (long long) key
, (long long) node
);
1058 for (long i
= 0, sz
= VecSize (objs
); i
< sz
; i
++)
1059 Dprintf (DUMP_CALL_STACK
, " add_stack: %.*s 0x%08llx id=0x%08llx %s\n",
1060 (int) i
, NTXT (" "), (long long) objs
->get (i
),
1061 (long long) objs
->get (i
)->id
, STR (objs
->get (i
)->get_name ()));
1064 if (node
&& node
->compare (0, objs
->size (), objs
, root
))
1066 Dprintf (DUMP_CALL_STACK
, NTXT ("STACK FOUND: key=0x%08llx 0x%08llx id=0x%08llx %s\n"),
1067 (long long) key
, (long long) node
,
1068 (long long) node
->get_instr ()->id
,
1069 STR (node
->get_instr ()->get_name ()));
1073 for (long i
= objs
->size () - 1; i
>= 0; i
--)
1075 Histable
*instr
= objs
->get (i
);
1076 int old_count
= node
->count
;
1078 CallStackNode
*nd
= node
->find (instr
, &left
);
1084 cstackLock
->aquireLock (); // Use one lock for all nodes
1085 // node->aquireLock();
1086 if (old_count
!= node
->count
)
1088 nd
= node
->find (instr
, &left
);
1090 { // the other thread has created this node
1091 cstackLock
->releaseLock ();
1092 // node->releaseLock();
1100 CallStackNode
*first
= NULL
;
1103 CallStackNode
*anc
= node
;
1105 node
= new_Node (anc
, objs
->get (i
));
1112 nd
->insert (left
, first
);
1113 cstackLock
->releaseLock ();
1114 // nd->releaseLock();
1117 cstackMap
->put (key
, node
);
1118 if (DUMP_CALL_STACK
)
1124 CallStackP::get_node (int n
)
1127 return &chunks
[n
/ CHUNKSZ
][n
% CHUNKSZ
];
1135 CallStackP::print (FILE *fd
)
1137 FILE *f
= (fd
== NULL
? stderr
: fd
);
1138 fprintf (f
, GTXT ("CallStack: nodes = %d\n\n"), nodes
);
1143 for (int i
= 0; i
< nodes
; i
++)
1145 CallStackNode
*node
= &chunks
[i
/ CHUNKSZ
][i
% CHUNKSZ
];
1146 Histable
*instr
= node
->instr
;
1147 if (instr
->get_type () == Histable::LINE
)
1150 n
= ((DbeLine
*) instr
)->func
->get_name ();
1152 else if (instr
->get_type () == Histable::INSTR
)
1155 n
= ((DbeInstr
*) instr
)->func
->get_name ();
1160 n
= instr
->get_name ();
1162 long long addr
= (long long) instr
->get_addr ();
1163 fprintf (f
, GTXT ("node: 0x%016llx anc: 0x%016llx -- 0x%016llX: %s %s\n"),
1164 (unsigned long long) node
, (unsigned long long) node
->ancestor
,
1167 fprintf (f
, GTXT ("md = %d, mw = %d\n"), maxdepth
, maxwidth
);
1171 * Static CallStack methods
1174 CallStack::getInstance (Experiment
*exp
)
1176 return new CallStackP (exp
);
1180 CallStack::stackSize (void *stack
)
1182 CallStackNode
*node
= (CallStackNode
*) stack
;
1184 for (; node
; node
= node
->ancestor
)
1186 return sz
- 1; // don't count the root node
1190 CallStack::getStackPC (void *stack
, int n
)
1192 CallStackNode
*node
= (CallStackNode
*) stack
;
1194 node
= node
->ancestor
;
1196 return dbeSession
->get_Unknown_Function ()->find_dbeinstr (PCInvlFlag
, 0);
1201 CallStack::getStackPCs (void *stack
, bool get_hide_stack
)
1203 Vector
<Histable
*> *res
= new Vector
<Histable
*>;
1204 CallStackNode
*node
= (CallStackNode
*) stack
;
1205 if (get_hide_stack
&& node
->alt_node
!= NULL
)
1206 node
= node
->alt_node
;
1207 while (node
&& node
->ancestor
)
1208 { // skip the root node
1209 res
->append (node
->instr
);
1210 node
= node
->ancestor
;
1216 CallStack::compare (void *stack1
, void *stack2
)
1218 // Quick comparision
1219 if (stack1
== stack2
)
1222 CallStackNode
*node1
= (CallStackNode
*) stack1
;
1223 CallStackNode
*node2
= (CallStackNode
*) stack2
;
1224 while (node1
!= NULL
&& node2
!= NULL
)
1226 //to keep the result const on different platforms
1227 //we use instr->id instead of instr
1228 if (node1
->instr
->id
< node2
->instr
->id
)
1230 else if (node1
->instr
->id
> node2
->instr
->id
)
1232 node1
= node1
->ancestor
;
1233 node2
= node2
->ancestor
;
1235 if (node1
== NULL
&& node2
!= NULL
)
1237 else if (node1
!= NULL
&& node2
== NULL
)
1243 // LIBRARY VISIBILITY
1246 CallStack::setHideStack (void *stack
, void *hideStack
)
1248 CallStackNode
*hNode
= (CallStackNode
*) stack
;
1249 hNode
->alt_node
= (CallStackNode
*) hideStack
;