[ScopBuilder] Build escaping dependencies separately.
[polly-mirror.git] / lib / CodeGen / PerfMonitor.cpp
blobfce65fdeea9249707fe98c5bb34cd93fcbbe56d9
1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
12 #include "polly/CodeGen/PerfMonitor.h"
13 #include "polly/CodeGen/RuntimeDebugBuilder.h"
14 #include "polly/ScopInfo.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/IR/Intrinsics.h"
17 #include <sstream>
19 using namespace llvm;
20 using namespace polly;
22 Function *PerfMonitor::getAtExit() {
23 const char *Name = "atexit";
24 Function *F = M->getFunction(Name);
26 if (!F) {
27 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
28 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
29 {Builder.getInt8PtrTy()}, false);
30 F = Function::Create(Ty, Linkage, Name, M);
33 return F;
36 void PerfMonitor::addToGlobalConstructors(Function *Fn) {
37 const char *Name = "llvm.global_ctors";
38 GlobalVariable *GV = M->getGlobalVariable(Name);
39 std::vector<Constant *> V;
41 if (GV) {
42 Constant *Array = GV->getInitializer();
43 for (Value *X : Array->operand_values())
44 V.push_back(cast<Constant>(X));
45 GV->eraseFromParent();
48 StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(),
49 Builder.getInt8PtrTy());
51 V.push_back(
52 ConstantStruct::get(ST, Builder.getInt32(10), Fn,
53 ConstantPointerNull::get(Builder.getInt8PtrTy())));
54 ArrayType *Ty = ArrayType::get(ST, V.size());
56 GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
57 ConstantArray::get(Ty, V), Name, nullptr,
58 GlobalVariable::NotThreadLocal);
61 Function *PerfMonitor::getRDTSCP() {
62 return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp);
65 PerfMonitor::PerfMonitor(const Scop &S, Module *M)
66 : M(M), Builder(M->getContext()), S(S) {
67 if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
68 Supported = true;
69 else
70 Supported = false;
73 static void TryRegisterGlobal(Module *M, const char *Name,
74 Constant *InitialValue, Value **Location) {
75 *Location = M->getGlobalVariable(Name);
77 if (!*Location)
78 *Location = new GlobalVariable(
79 *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
80 InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
83 // Generate a unique name that is usable as a LLVM name for a scop to name its
84 // performance counter.
85 static std::string GetScopUniqueVarname(const Scop &S) {
86 std::stringstream Name;
87 std::string EntryString, ExitString;
88 std::tie(EntryString, ExitString) = S.getEntryExitStr();
90 Name << "__polly_perf_in_" << std::string(S.getFunction().getName())
91 << "_from__" << EntryString << "__to__" << ExitString;
92 return Name.str();
95 void PerfMonitor::addScopCounter() {
96 const std::string varname = GetScopUniqueVarname(S);
97 TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
98 &CyclesInCurrentScopPtr);
100 TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
101 &TripCountForCurrentScopPtr);
104 void PerfMonitor::addGlobalVariables() {
105 TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0),
106 &CyclesTotalStartPtr);
108 TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(0),
109 &AlreadyInitializedPtr);
111 TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0),
112 &CyclesInScopsPtr);
114 TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
115 &CyclesInScopStartPtr);
117 TryRegisterGlobal(M, "__polly_perf_write_loation", Builder.getInt32(0),
118 &RDTSCPWriteLocation);
121 static const char *InitFunctionName = "__polly_perf_init";
122 static const char *FinalReportingFunctionName = "__polly_perf_final";
124 static BasicBlock *FinalStartBB = nullptr;
125 static ReturnInst *ReturnFromFinal = nullptr;
127 Function *PerfMonitor::insertFinalReporting() {
128 // Create new function.
129 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
130 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
131 Function *ExitFn =
132 Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
133 FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn);
134 Builder.SetInsertPoint(FinalStartBB);
136 if (!Supported) {
137 RuntimeDebugBuilder::createCPUPrinter(
138 Builder, "Polly runtime information generation not supported\n");
139 Builder.CreateRetVoid();
140 return ExitFn;
143 // Measure current cycles and compute final timings.
144 Function *RDTSCPFn = getRDTSCP();
145 Value *CurrentCycles = Builder.CreateCall(
146 RDTSCPFn,
147 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
148 Value *CyclesStart = Builder.CreateLoad(CyclesTotalStartPtr, true);
149 Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
150 Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
152 // Print the runtime information.
153 RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n");
154 RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n");
155 RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
156 RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
157 "\n");
159 // Print the preamble for per-scop information.
160 RuntimeDebugBuilder::createCPUPrinter(Builder, "\n");
161 RuntimeDebugBuilder::createCPUPrinter(Builder, "Per SCoP information\n");
162 RuntimeDebugBuilder::createCPUPrinter(Builder, "--------------------\n");
164 RuntimeDebugBuilder::createCPUPrinter(
165 Builder, "scop function, "
166 "entry block name, exit block name, total time, trip count\n");
167 ReturnFromFinal = Builder.CreateRetVoid();
168 return ExitFn;
171 void PerfMonitor::AppendScopReporting() {
172 if (!Supported)
173 return;
175 assert(FinalStartBB && "Expected FinalStartBB to be initialized by "
176 "PerfMonitor::insertFinalReporting.");
177 assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by "
178 "PerfMonitor::insertFinalReporting.");
180 Builder.SetInsertPoint(FinalStartBB);
181 ReturnFromFinal->eraseFromParent();
183 Value *CyclesInCurrentScop =
184 Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
186 Value *TripCountForCurrentScop =
187 Builder.CreateLoad(this->TripCountForCurrentScopPtr, true);
189 std::string EntryName, ExitName;
190 std::tie(EntryName, ExitName) = S.getEntryExitStr();
192 // print in CSV for easy parsing with other tools.
193 RuntimeDebugBuilder::createCPUPrinter(
194 Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
195 CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
197 ReturnFromFinal = Builder.CreateRetVoid();
200 static Function *FinalReporting = nullptr;
202 void PerfMonitor::initialize() {
203 addGlobalVariables();
204 addScopCounter();
206 // Ensure that we only add the final reporting function once.
207 // On later invocations, append to the reporting function.
208 if (!FinalReporting) {
209 FinalReporting = insertFinalReporting();
211 Function *InitFn = insertInitFunction(FinalReporting);
212 addToGlobalConstructors(InitFn);
215 AppendScopReporting();
218 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
219 // Insert function definition and BBs.
220 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
221 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
222 Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);
223 BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn);
224 BasicBlock *EarlyReturn =
225 BasicBlock::Create(M->getContext(), "earlyreturn", InitFn);
226 BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn);
228 Builder.SetInsertPoint(Start);
230 // Check if this function was already run. If yes, return.
232 // In case profiling has been enabled in multiple translation units, the
233 // initializer function will be added to the global constructors list of
234 // each translation unit. When merging translation units, the global
235 // constructor lists are just appended, such that the initializer will appear
236 // multiple times. To avoid initializations being run multiple times (and
237 // especially to avoid that atExitFn is called more than once), we bail
238 // out if the initializer is run more than once.
239 Value *HasRunBefore = Builder.CreateLoad(AlreadyInitializedPtr);
240 Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB);
241 Builder.SetInsertPoint(EarlyReturn);
242 Builder.CreateRetVoid();
244 // Keep track that this function has been run once.
245 Builder.SetInsertPoint(InitBB);
246 Value *True = Builder.getInt1(true);
247 Builder.CreateStore(True, AlreadyInitializedPtr);
249 // Register the final reporting function with atexit().
250 Value *FinalReportingPtr =
251 Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy());
252 Function *AtExitFn = getAtExit();
253 Builder.CreateCall(AtExitFn, {FinalReportingPtr});
255 if (Supported) {
256 // Read the currently cycle counter and store the result for later.
257 Function *RDTSCPFn = getRDTSCP();
258 Value *CurrentCycles = Builder.CreateCall(
259 RDTSCPFn,
260 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
261 Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
263 Builder.CreateRetVoid();
265 return InitFn;
268 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
269 if (!Supported)
270 return;
272 Builder.SetInsertPoint(InsertBefore);
273 Function *RDTSCPFn = getRDTSCP();
274 Value *CurrentCycles = Builder.CreateCall(
275 RDTSCPFn,
276 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
277 Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
280 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
281 if (!Supported)
282 return;
284 Builder.SetInsertPoint(InsertBefore);
285 Function *RDTSCPFn = getRDTSCP();
286 LoadInst *CyclesStart = Builder.CreateLoad(CyclesInScopStartPtr, true);
287 Value *CurrentCycles = Builder.CreateCall(
288 RDTSCPFn,
289 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
290 Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
291 Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
292 CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
293 Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
295 Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
296 CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
297 Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
299 Value *TripCountForCurrentScop =
300 Builder.CreateLoad(TripCountForCurrentScopPtr, true);
301 TripCountForCurrentScop =
302 Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
303 Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
304 true);