1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 //===----------------------------------------------------------------------===//
12 #include "polly/CodeGen/PerfMonitor.h"
13 #include "polly/CodeGen/RuntimeDebugBuilder.h"
14 #include "polly/ScopInfo.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/IR/Intrinsics.h"
20 using namespace polly
;
22 Function
*PerfMonitor::getAtExit() {
23 const char *Name
= "atexit";
24 Function
*F
= M
->getFunction(Name
);
27 GlobalValue::LinkageTypes Linkage
= Function::ExternalLinkage
;
28 FunctionType
*Ty
= FunctionType::get(Builder
.getInt32Ty(),
29 {Builder
.getInt8PtrTy()}, false);
30 F
= Function::Create(Ty
, Linkage
, Name
, M
);
36 void PerfMonitor::addToGlobalConstructors(Function
*Fn
) {
37 const char *Name
= "llvm.global_ctors";
38 GlobalVariable
*GV
= M
->getGlobalVariable(Name
);
39 std::vector
<Constant
*> V
;
42 Constant
*Array
= GV
->getInitializer();
43 for (Value
*X
: Array
->operand_values())
44 V
.push_back(cast
<Constant
>(X
));
45 GV
->eraseFromParent();
48 StructType
*ST
= StructType::get(Builder
.getInt32Ty(), Fn
->getType(),
49 Builder
.getInt8PtrTy());
52 ConstantStruct::get(ST
, Builder
.getInt32(10), Fn
,
53 ConstantPointerNull::get(Builder
.getInt8PtrTy())));
54 ArrayType
*Ty
= ArrayType::get(ST
, V
.size());
56 GV
= new GlobalVariable(*M
, Ty
, true, GlobalValue::AppendingLinkage
,
57 ConstantArray::get(Ty
, V
), Name
, nullptr,
58 GlobalVariable::NotThreadLocal
);
61 Function
*PerfMonitor::getRDTSCP() {
62 return Intrinsic::getDeclaration(M
, Intrinsic::x86_rdtscp
);
65 PerfMonitor::PerfMonitor(const Scop
&S
, Module
*M
)
66 : M(M
), Builder(M
->getContext()), S(S
) {
67 if (Triple(M
->getTargetTriple()).getArch() == llvm::Triple::x86_64
)
73 static void TryRegisterGlobal(Module
*M
, const char *Name
,
74 Constant
*InitialValue
, Value
**Location
) {
75 *Location
= M
->getGlobalVariable(Name
);
78 *Location
= new GlobalVariable(
79 *M
, InitialValue
->getType(), true, GlobalValue::WeakAnyLinkage
,
80 InitialValue
, Name
, nullptr, GlobalVariable::InitialExecTLSModel
);
83 // Generate a unique name that is usable as a LLVM name for a scop to name its
84 // performance counter.
85 static std::string
GetScopUniqueVarname(const Scop
&S
) {
86 std::stringstream Name
;
87 std::string EntryString
, ExitString
;
88 std::tie(EntryString
, ExitString
) = S
.getEntryExitStr();
90 Name
<< "__polly_perf_in_" << std::string(S
.getFunction().getName())
91 << "_from__" << EntryString
<< "__to__" << ExitString
;
95 void PerfMonitor::addScopCounter() {
96 const std::string varname
= GetScopUniqueVarname(S
);
97 TryRegisterGlobal(M
, (varname
+ "_cycles").c_str(), Builder
.getInt64(0),
98 &CyclesInCurrentScopPtr
);
100 TryRegisterGlobal(M
, (varname
+ "_trip_count").c_str(), Builder
.getInt64(0),
101 &TripCountForCurrentScopPtr
);
104 void PerfMonitor::addGlobalVariables() {
105 TryRegisterGlobal(M
, "__polly_perf_cycles_total_start", Builder
.getInt64(0),
106 &CyclesTotalStartPtr
);
108 TryRegisterGlobal(M
, "__polly_perf_initialized", Builder
.getInt1(0),
109 &AlreadyInitializedPtr
);
111 TryRegisterGlobal(M
, "__polly_perf_cycles_in_scops", Builder
.getInt64(0),
114 TryRegisterGlobal(M
, "__polly_perf_cycles_in_scop_start", Builder
.getInt64(0),
115 &CyclesInScopStartPtr
);
117 TryRegisterGlobal(M
, "__polly_perf_write_loation", Builder
.getInt32(0),
118 &RDTSCPWriteLocation
);
121 static const char *InitFunctionName
= "__polly_perf_init";
122 static const char *FinalReportingFunctionName
= "__polly_perf_final";
124 static BasicBlock
*FinalStartBB
= nullptr;
125 static ReturnInst
*ReturnFromFinal
= nullptr;
127 Function
*PerfMonitor::insertFinalReporting() {
128 // Create new function.
129 GlobalValue::LinkageTypes Linkage
= Function::WeakODRLinkage
;
130 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), {}, false);
132 Function::Create(Ty
, Linkage
, FinalReportingFunctionName
, M
);
133 FinalStartBB
= BasicBlock::Create(M
->getContext(), "start", ExitFn
);
134 Builder
.SetInsertPoint(FinalStartBB
);
137 RuntimeDebugBuilder::createCPUPrinter(
138 Builder
, "Polly runtime information generation not supported\n");
139 Builder
.CreateRetVoid();
143 // Measure current cycles and compute final timings.
144 Function
*RDTSCPFn
= getRDTSCP();
145 Value
*CurrentCycles
= Builder
.CreateCall(
147 Builder
.CreatePointerCast(RDTSCPWriteLocation
, Builder
.getInt8PtrTy()));
148 Value
*CyclesStart
= Builder
.CreateLoad(CyclesTotalStartPtr
, true);
149 Value
*CyclesTotal
= Builder
.CreateSub(CurrentCycles
, CyclesStart
);
150 Value
*CyclesInScops
= Builder
.CreateLoad(CyclesInScopsPtr
, true);
152 // Print the runtime information.
153 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Polly runtime information\n");
154 RuntimeDebugBuilder::createCPUPrinter(Builder
, "-------------------------\n");
155 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Total: ", CyclesTotal
, "\n");
156 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Scops: ", CyclesInScops
,
159 // Print the preamble for per-scop information.
160 RuntimeDebugBuilder::createCPUPrinter(Builder
, "\n");
161 RuntimeDebugBuilder::createCPUPrinter(Builder
, "Per SCoP information\n");
162 RuntimeDebugBuilder::createCPUPrinter(Builder
, "--------------------\n");
164 RuntimeDebugBuilder::createCPUPrinter(
165 Builder
, "scop function, "
166 "entry block name, exit block name, total time, trip count\n");
167 ReturnFromFinal
= Builder
.CreateRetVoid();
171 void PerfMonitor::AppendScopReporting() {
175 assert(FinalStartBB
&& "Expected FinalStartBB to be initialized by "
176 "PerfMonitor::insertFinalReporting.");
177 assert(ReturnFromFinal
&& "Expected ReturnFromFinal to be initialized by "
178 "PerfMonitor::insertFinalReporting.");
180 Builder
.SetInsertPoint(FinalStartBB
);
181 ReturnFromFinal
->eraseFromParent();
183 Value
*CyclesInCurrentScop
=
184 Builder
.CreateLoad(this->CyclesInCurrentScopPtr
, true);
186 Value
*TripCountForCurrentScop
=
187 Builder
.CreateLoad(this->TripCountForCurrentScopPtr
, true);
189 std::string EntryName
, ExitName
;
190 std::tie(EntryName
, ExitName
) = S
.getEntryExitStr();
192 // print in CSV for easy parsing with other tools.
193 RuntimeDebugBuilder::createCPUPrinter(
194 Builder
, S
.getFunction().getName(), ", ", EntryName
, ", ", ExitName
, ", ",
195 CyclesInCurrentScop
, ", ", TripCountForCurrentScop
, "\n");
197 ReturnFromFinal
= Builder
.CreateRetVoid();
200 static Function
*FinalReporting
= nullptr;
202 void PerfMonitor::initialize() {
203 addGlobalVariables();
206 // Ensure that we only add the final reporting function once.
207 // On later invocations, append to the reporting function.
208 if (!FinalReporting
) {
209 FinalReporting
= insertFinalReporting();
211 Function
*InitFn
= insertInitFunction(FinalReporting
);
212 addToGlobalConstructors(InitFn
);
215 AppendScopReporting();
218 Function
*PerfMonitor::insertInitFunction(Function
*FinalReporting
) {
219 // Insert function definition and BBs.
220 GlobalValue::LinkageTypes Linkage
= Function::WeakODRLinkage
;
221 FunctionType
*Ty
= FunctionType::get(Builder
.getVoidTy(), {}, false);
222 Function
*InitFn
= Function::Create(Ty
, Linkage
, InitFunctionName
, M
);
223 BasicBlock
*Start
= BasicBlock::Create(M
->getContext(), "start", InitFn
);
224 BasicBlock
*EarlyReturn
=
225 BasicBlock::Create(M
->getContext(), "earlyreturn", InitFn
);
226 BasicBlock
*InitBB
= BasicBlock::Create(M
->getContext(), "initbb", InitFn
);
228 Builder
.SetInsertPoint(Start
);
230 // Check if this function was already run. If yes, return.
232 // In case profiling has been enabled in multiple translation units, the
233 // initializer function will be added to the global constructors list of
234 // each translation unit. When merging translation units, the global
235 // constructor lists are just appended, such that the initializer will appear
236 // multiple times. To avoid initializations being run multiple times (and
237 // especially to avoid that atExitFn is called more than once), we bail
238 // out if the initializer is run more than once.
239 Value
*HasRunBefore
= Builder
.CreateLoad(AlreadyInitializedPtr
);
240 Builder
.CreateCondBr(HasRunBefore
, EarlyReturn
, InitBB
);
241 Builder
.SetInsertPoint(EarlyReturn
);
242 Builder
.CreateRetVoid();
244 // Keep track that this function has been run once.
245 Builder
.SetInsertPoint(InitBB
);
246 Value
*True
= Builder
.getInt1(true);
247 Builder
.CreateStore(True
, AlreadyInitializedPtr
);
249 // Register the final reporting function with atexit().
250 Value
*FinalReportingPtr
=
251 Builder
.CreatePointerCast(FinalReporting
, Builder
.getInt8PtrTy());
252 Function
*AtExitFn
= getAtExit();
253 Builder
.CreateCall(AtExitFn
, {FinalReportingPtr
});
256 // Read the currently cycle counter and store the result for later.
257 Function
*RDTSCPFn
= getRDTSCP();
258 Value
*CurrentCycles
= Builder
.CreateCall(
260 Builder
.CreatePointerCast(RDTSCPWriteLocation
, Builder
.getInt8PtrTy()));
261 Builder
.CreateStore(CurrentCycles
, CyclesTotalStartPtr
, true);
263 Builder
.CreateRetVoid();
268 void PerfMonitor::insertRegionStart(Instruction
*InsertBefore
) {
272 Builder
.SetInsertPoint(InsertBefore
);
273 Function
*RDTSCPFn
= getRDTSCP();
274 Value
*CurrentCycles
= Builder
.CreateCall(
276 Builder
.CreatePointerCast(RDTSCPWriteLocation
, Builder
.getInt8PtrTy()));
277 Builder
.CreateStore(CurrentCycles
, CyclesInScopStartPtr
, true);
280 void PerfMonitor::insertRegionEnd(Instruction
*InsertBefore
) {
284 Builder
.SetInsertPoint(InsertBefore
);
285 Function
*RDTSCPFn
= getRDTSCP();
286 LoadInst
*CyclesStart
= Builder
.CreateLoad(CyclesInScopStartPtr
, true);
287 Value
*CurrentCycles
= Builder
.CreateCall(
289 Builder
.CreatePointerCast(RDTSCPWriteLocation
, Builder
.getInt8PtrTy()));
290 Value
*CyclesInScop
= Builder
.CreateSub(CurrentCycles
, CyclesStart
);
291 Value
*CyclesInScops
= Builder
.CreateLoad(CyclesInScopsPtr
, true);
292 CyclesInScops
= Builder
.CreateAdd(CyclesInScops
, CyclesInScop
);
293 Builder
.CreateStore(CyclesInScops
, CyclesInScopsPtr
, true);
295 Value
*CyclesInCurrentScop
= Builder
.CreateLoad(CyclesInCurrentScopPtr
, true);
296 CyclesInCurrentScop
= Builder
.CreateAdd(CyclesInCurrentScop
, CyclesInScop
);
297 Builder
.CreateStore(CyclesInCurrentScop
, CyclesInCurrentScopPtr
, true);
299 Value
*TripCountForCurrentScop
=
300 Builder
.CreateLoad(TripCountForCurrentScopPtr
, true);
301 TripCountForCurrentScop
=
302 Builder
.CreateAdd(TripCountForCurrentScop
, Builder
.getInt64(1));
303 Builder
.CreateStore(TripCountForCurrentScop
, TripCountForCurrentScopPtr
,