1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 //===----------------------------------------------------------------------===//
12 #include "polly/CodeGen/PerfMonitor.h"
13 #include "polly/CodeGen/RuntimeDebugBuilder.h"
14 #include "llvm/ADT/Triple.h"
15 #include "llvm/IR/Intrinsics.h"
18 using namespace polly
;
20 Function
*PerfMonitor::getAtExit() {
21 const char *Name
= "atexit";
22 Function
*F
= M
->getFunction(Name
);
25 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
26 FunctionType
*Ty
= FunctionType::get(Builder
.getInt32Ty(),
27 {Builder
.getInt8PtrTy()}, false);
28 F
= Function::Create(Ty
, Linkage
, Name
, M
);
34 void PerfMonitor::addToGlobalConstructors(Function
*Fn
) {
35 const char *Name
= "llvm.global_ctors";
36 GlobalVariable
*GV
= M
->getGlobalVariable(Name
);
37 std::vector
<Constant
*> V
;
40 Constant
*Array
= GV
->getInitializer();
41 for (Value
*X
: Array
->operand_values())
42 V
.push_back(cast
<Constant
>(X
));
43 GV
->eraseFromParent();
46 StructType
*ST
= StructType::get(Builder
.getInt32Ty(), Fn
->getType(),
47 Builder
.getInt8PtrTy());
49 V
.push_back(ConstantStruct::get(
50 ST
, Builder
.getInt32(10), Fn
,
51 ConstantPointerNull::get(Builder
.getInt8PtrTy())));
52 ArrayType
*Ty
= ArrayType::get(ST
, V
.size());
54 GV
= new GlobalVariable(*M
, Ty
, true, GlobalValue::AppendingLinkage
,
55 ConstantArray::get(Ty
, V
), Name
, nullptr,
56 GlobalVariable::NotThreadLocal
);
59 Function
*PerfMonitor::getRDTSCP() {
60 return Intrinsic::getDeclaration(M
, Intrinsic::x86_rdtscp
);
63 PerfMonitor::PerfMonitor(Module
*M
) : M(M
), Builder(M
->getContext()) {
64 if (Triple(M
->getTargetTriple()).getArch() == llvm::Triple::x86_64
)
70 void PerfMonitor::addGlobalVariables() {
71 auto TryRegisterGlobal
= [=](const char *Name
, Constant
*InitialValue
,
73 *Location
= M
->getGlobalVariable(Name
);
76 *Location
= new GlobalVariable(
77 *M
, InitialValue
->getType(), true, GlobalValue::WeakAnyLinkage
,
78 InitialValue
, Name
, nullptr, GlobalVariable::InitialExecTLSModel
);
81 TryRegisterGlobal("__polly_perf_cycles_total_start", Builder
.getInt64(0),
82 &CyclesTotalStartPtr
);
84 TryRegisterGlobal("__polly_perf_initialized", Builder
.getInt1(0),
85 &AlreadyInitializedPtr
);
87 TryRegisterGlobal("__polly_perf_cycles_in_scops", Builder
.getInt64(0),
90 TryRegisterGlobal("__polly_perf_cycles_in_scop_start", Builder
.getInt64(0),
91 &CyclesInScopStartPtr
);
93 TryRegisterGlobal("__polly_perf_write_loation", Builder
.getInt32(0),
94 &RDTSCPWriteLocation
);
97 static const char *InitFunctionName
= "__polly_perf_init";
98 static const char *FinalReportingFunctionName
= "__polly_perf_final";
100 Function
*PerfMonitor::insertFinalReporting() {
101 // Create new function.
102 GlobalValue::LinkageTypes Linkage
= Function::WeakODRLinkage
;
103 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), {}, false);
105 Function::Create(Ty
, Linkage
, FinalReportingFunctionName
, M
);
106 BasicBlock
*Start
= BasicBlock::Create(M
->getContext(), "start", ExitFn
);
107 Builder
.SetInsertPoint(Start
);
110 RuntimeDebugBuilder::createCPUPrinter(
111 Builder
, "Polly runtime information generation not supported\n");
112 Builder
.CreateRetVoid();
116 // Measure current cycles and compute final timings.
117 Function
*RDTSCPFn
= getRDTSCP();
118 Value
*CurrentCycles
= Builder
.CreateCall(
120 Builder
.CreatePointerCast(RDTSCPWriteLocation
, Builder
.getInt8PtrTy()));
121 Value
*CyclesStart
= Builder
.CreateLoad(CyclesTotalStartPtr
, true);
122 Value
*CyclesTotal
= Builder
.CreateSub(CurrentCycles
, CyclesStart
);
123 Value
*CyclesInScops
= Builder
.CreateLoad(CyclesInScopsPtr
, true);
125 // Print the runtime information.
126 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Polly runtime information\n");
127 RuntimeDebugBuilder::createCPUPrinter(Builder
, "-------------------------\n");
128 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Total: ", CyclesTotal
, "\n");
129 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Scops: ", CyclesInScops
,
132 // Finalize function.
133 Builder
.CreateRetVoid();
137 void PerfMonitor::initialize() {
138 addGlobalVariables();
140 Function
*F
= M
->getFunction(InitFunctionName
);
145 Function
*FinalReporting
= insertFinalReporting();
146 Function
*InitFn
= insertInitFunction(FinalReporting
);
147 addToGlobalConstructors(InitFn
);
150 Function
*PerfMonitor::insertInitFunction(Function
*FinalReporting
) {
151 // Insert function definition and BBs.
152 GlobalValue::LinkageTypes Linkage
= Function::WeakODRLinkage
;
153 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), {}, false);
154 Function
*InitFn
= Function::Create(Ty
, Linkage
, InitFunctionName
, M
);
155 BasicBlock
*Start
= BasicBlock::Create(M
->getContext(), "start", InitFn
);
156 BasicBlock
*EarlyReturn
=
157 BasicBlock::Create(M
->getContext(), "earlyreturn", InitFn
);
158 BasicBlock
*InitBB
= BasicBlock::Create(M
->getContext(), "initbb", InitFn
);
160 Builder
.SetInsertPoint(Start
);
162 // Check if this function was already run. If yes, return.
164 // In case profiling has been enabled in multiple translation units, the
165 // initializer function will be added to the global constructors list of
166 // each translation unit. When merging translation units, the global
167 // constructor lists are just appended, such that the initializer will appear
168 // multiple times. To avoid initializations being run multiple times (and
169 // especially to avoid that atExitFn is called more than once), we bail
170 // out if the intializer is run more than once.
171 Value
*HasRunBefore
= Builder
.CreateLoad(AlreadyInitializedPtr
);
172 Builder
.CreateCondBr(HasRunBefore
, EarlyReturn
, InitBB
);
173 Builder
.SetInsertPoint(EarlyReturn
);
174 Builder
.CreateRetVoid();
176 // Keep track that this function has been run once.
177 Builder
.SetInsertPoint(InitBB
);
178 Value
*True
= Builder
.getInt1(true);
179 Builder
.CreateStore(True
, AlreadyInitializedPtr
);
181 // Register the final reporting function with atexit().
182 Value
*FinalReportingPtr
=
183 Builder
.CreatePointerCast(FinalReporting
, Builder
.getInt8PtrTy());
184 Function
*AtExitFn
= getAtExit();
185 Builder
.CreateCall(AtExitFn
, {FinalReportingPtr
});
188 // Read the currently cycle counter and store the result for later.
189 Function
*RDTSCPFn
= getRDTSCP();
190 Value
*CurrentCycles
= Builder
.CreateCall(
192 Builder
.CreatePointerCast(RDTSCPWriteLocation
, Builder
.getInt8PtrTy()));
193 Builder
.CreateStore(CurrentCycles
, CyclesTotalStartPtr
, true);
195 Builder
.CreateRetVoid();
200 void PerfMonitor::insertRegionStart(Instruction
*InsertBefore
) {
204 Builder
.SetInsertPoint(InsertBefore
);
205 Function
*RDTSCPFn
= getRDTSCP();
206 Value
*CurrentCycles
= Builder
.CreateCall(
208 Builder
.CreatePointerCast(RDTSCPWriteLocation
, Builder
.getInt8PtrTy()));
209 Builder
.CreateStore(CurrentCycles
, CyclesInScopStartPtr
, true);
212 void PerfMonitor::insertRegionEnd(Instruction
*InsertBefore
) {
216 Builder
.SetInsertPoint(InsertBefore
);
217 Function
*RDTSCPFn
= getRDTSCP();
218 LoadInst
*CyclesStart
= Builder
.CreateLoad(CyclesInScopStartPtr
, true);
219 Value
*CurrentCycles
= Builder
.CreateCall(
221 Builder
.CreatePointerCast(RDTSCPWriteLocation
, Builder
.getInt8PtrTy()));
222 Value
*CyclesInScop
= Builder
.CreateSub(CurrentCycles
, CyclesStart
);
223 Value
*CyclesInScops
= Builder
.CreateLoad(CyclesInScopsPtr
, true);
224 CyclesInScops
= Builder
.CreateAdd(CyclesInScops
, CyclesInScop
);
225 Builder
.CreateStore(CyclesInScops
, CyclesInScopsPtr
, true);