From ad5ef16e38a7809a0c9d6988f8e2a248385f62a0 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Sat, 13 May 2017 11:49:34 +0000 Subject: [PATCH] [Simplify] Remove writes that are overwritten. Remove memory writes that are overwritten by later writes. This works for StoreInsts: store double 21.0, double* %A store double 42.0, double* %A scalar writes at the end of a statement and mixes of these. Multiple writes can be the result of DeLICM, which might map multiple writes to the same location when it knows that these do no conflict (for instance because they write the same value). Such writes interfere with pattern-matched optimization such as gemm and may not get removed by other LLVM passes after code generation. Differential Revision: https://reviews.llvm.org/D33142 git-svn-id: https://llvm.org/svn/llvm-project/polly/trunk@302986 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transform/Simplify.cpp | 106 ++++++++++++++++++++- test/Simplify/overwritten.ll | 44 +++++++++ test/Simplify/overwritten_3store.ll | 47 +++++++++ test/Simplify/overwritten_implicit_and_explicit.ll | 53 +++++++++++ ...en_implicit_and_explicit___%for---%return.jscop | 48 ++++++++++ ...and_explicit___%for---%return.jscop.transformed | 48 ++++++++++ test/Simplify/overwritten_loadbetween.ll | 46 +++++++++ 7 files changed, 390 insertions(+), 2 deletions(-) create mode 100644 test/Simplify/overwritten.ll create mode 100644 test/Simplify/overwritten_3store.ll create mode 100644 test/Simplify/overwritten_implicit_and_explicit.ll create mode 100644 test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop create mode 100644 test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop.transformed create mode 100644 test/Simplify/overwritten_loadbetween.ll diff --git a/lib/Transform/Simplify.cpp b/lib/Transform/Simplify.cpp index 16fdbe11..47450fba 100644 --- a/lib/Transform/Simplify.cpp +++ b/lib/Transform/Simplify.cpp @@ -33,6 +33,7 @@ STATISTIC(InBetweenStore, "Number of Load-Store pairs NOT removed because " "there is another store between them"); STATISTIC(TotalIdenticalWritesRemoved, "Number of double writes removed in any SCoP"); +STATISTIC(TotalOverwritesRemoved, "Number of removed overwritten writes"); STATISTIC(TotalRedundantWritesRemoved, "Number of writes of same value removed in any SCoP"); STATISTIC(TotalStmtsRemoved, "Number of statements removed in any SCoP"); @@ -63,6 +64,43 @@ static Value *getWrittenScalar(MemoryAccess *WA) { return WA->getAccessInstruction(); } +static bool isImplicitRead(MemoryAccess *MA) { + return MA->isRead() && MA->isOriginalScalarKind(); +} + +static bool isExplicitAccess(MemoryAccess *MA) { + return MA->isOriginalArrayKind(); +} + +static bool isImplicitWrite(MemoryAccess *MA) { + return MA->isWrite() && MA->isOriginalScalarKind(); +} + +/// Return an iterator range that iterates over MemoryAccesses in the order in +/// which they are executed. +/// +/// The order is: +/// - Implicit reads (BlockGenerator::generateScalarLoads) +/// - Explicit reads and writes (BlockGenerator::generateArrayLoad, +/// BlockGenerator::generateArrayStore) +/// - In block statements, the accesses are in order in which their +/// instructions are executed. +/// - In region statements, that order of execution is not predictable at +/// compile-time. +/// - Implicit writes (BlockGenerator::generateScalarStores) +/// The order in which implicit writes are executed relative to each other is +/// undefined. +static auto accessesInOrder(ScopStmt *Stmt) -> decltype(concat( + make_filter_range(make_range(Stmt->begin(), Stmt->end()), isImplicitRead), + make_filter_range(make_range(Stmt->begin(), Stmt->end()), isExplicitAccess), + make_filter_range(make_range(Stmt->begin(), Stmt->end()), + isImplicitWrite))) { + auto AllRange = make_range(Stmt->begin(), Stmt->end()); + return concat(make_filter_range(AllRange, isImplicitRead), + make_filter_range(AllRange, isExplicitAccess), + make_filter_range(AllRange, isImplicitWrite)); +} + class Simplify : public ScopPass { private: /// The last/current SCoP that is/has been processed. @@ -71,6 +109,9 @@ private: /// Number of double writes removed from this SCoP. int IdenticalWritesRemoved = 0; + /// Number of writes that are overwritten anyway. + int OverwritesRemoved = 0; + /// Number of redundant writes removed from this SCoP. int RedundantWritesRemoved = 0; @@ -79,8 +120,8 @@ private: /// Return whether at least one simplification has been applied. bool isModified() const { - return IdenticalWritesRemoved > 0 || RedundantWritesRemoved > 0 || - StmtsRemoved > 0; + return IdenticalWritesRemoved > 0 || OverwritesRemoved > 0 || + RedundantWritesRemoved > 0 || StmtsRemoved > 0; } MemoryAccess *getReadAccessForValue(ScopStmt *Stmt, llvm::Value *Val) { @@ -223,6 +264,61 @@ private: } } + /// Remove writes that are overwritten unconditionally later in the same + /// statement. + /// + /// There must be no read of the same value between the write (that is to be + /// removed) and the overwrite. + void removeOverwrites() { + for (auto &Stmt : *S) { + auto Domain = give(Stmt.getDomain()); + isl::union_map WillBeOverwritten = + isl::union_map::empty(give(S->getParamSpace())); + + auto AccRange = accessesInOrder(&Stmt); + SmallVector Accesses{AccRange.begin(), + AccRange.end()}; + + // Iterate in reverse order, so the overwrite comes before the write that + // is to be removed. + for (auto *MA : reverse(Accesses)) { + + // In region statements, the explicit accesses can be in blocks that are + // can be executed in any order. We therefore process only the implicit + // writes and stop after that. + if (Stmt.isRegionStmt() && isExplicitAccess(MA)) + break; + + auto AccRel = give(MA->getAccessRelation()); + AccRel = AccRel.intersect_domain(Domain); + AccRel = AccRel.intersect_params(give(S->getContext())); + + // If a value is read in-between, do not consider it as overwritten. + if (MA->isRead()) { + WillBeOverwritten = WillBeOverwritten.subtract(AccRel); + continue; + } + + // If all of a write's elements are overwritten, remove it. + isl::union_map AccRelUnion = AccRel; + if (isl_union_map_is_subset(AccRelUnion.keep(), + WillBeOverwritten.keep()) == + isl_bool_true) { + DEBUG(dbgs() << "Removing " << MA + << " which will be overwritten anyway\n"); + + Stmt.removeSingleMemoryAccess(MA); + OverwritesRemoved++; + TotalOverwritesRemoved++; + } + + // Unconditional writes overwrite other values. + if (MA->isMustWrite()) + WillBeOverwritten = WillBeOverwritten.add_map(AccRel); + } + } + } + /// Remove writes that just write the same value already stored in the /// element. void removeRedundantWrites() { @@ -314,6 +410,8 @@ private: OS.indent(Indent) << "Statistics {\n"; OS.indent(Indent + 4) << "Identical writes removed: " << IdenticalWritesRemoved << '\n'; + OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved + << '\n'; OS.indent(Indent + 4) << "Redundant writes removed: " << RedundantWritesRemoved << "\n"; OS.indent(Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n"; @@ -351,6 +449,9 @@ public: DEBUG(dbgs() << "Removing identical writes...\n"); removeIdenticalWrites(); + DEBUG(dbgs() << "Removing overwrites...\n"); + removeOverwrites(); + DEBUG(dbgs() << "Removing redundant writes...\n"); removeRedundantWrites(); @@ -381,6 +482,7 @@ public: S = nullptr; IdenticalWritesRemoved = 0; + OverwritesRemoved = 0; RedundantWritesRemoved = 0; StmtsRemoved = 0; } diff --git a/test/Simplify/overwritten.ll b/test/Simplify/overwritten.ll new file mode 100644 index 00000000..55d59b34 --- /dev/null +++ b/test/Simplify/overwritten.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Remove a store that is overwritten by another store in the same statement. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 21.0; +; A[0] = 42.0; +; } +; +define void @overwritten(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 21.0, double* %A + store double 42.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 1 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: } diff --git a/test/Simplify/overwritten_3store.ll b/test/Simplify/overwritten_3store.ll new file mode 100644 index 00000000..133a3478 --- /dev/null +++ b/test/Simplify/overwritten_3store.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Remove a store that is overwritten by another store in the same statement. +; Check that even multiple stores are removed. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 10.5; +; A[0] = 21.0; +; A[0] = 42.0; +; } +; +define void @overwritten_3store(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 10.5, double* %A + store double 21.0, double* %A + store double 42.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 2 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: } diff --git a/test/Simplify/overwritten_implicit_and_explicit.ll b/test/Simplify/overwritten_implicit_and_explicit.ll new file mode 100644 index 00000000..43d18ed5 --- /dev/null +++ b/test/Simplify/overwritten_implicit_and_explicit.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Remove a store that is overwritten by another store in the same statement. +; Check that this works even if one of the writes is a scalar MemoryKind. +; +; for (int j = 0; j < n; j += 1) { +; body: +; val = 21.0 + 21.0; +; A[1] = val; +; +; user: +; A[0] = val; +; } +; +define void @overwritten_implicit_and_explicit(i32 %n, double* noalias nonnull %A, double* noalias nonnull %C) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %val = fadd double 21.0, 21.0 + store double %val, double* %A + br label %user + + user: + store double %val, double* %C + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 1 +; CHECK: } + +; CHECK: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_val[] }; +; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: Stmt_user diff --git a/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop b/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop new file mode 100644 index 00000000..7ea3ca2a --- /dev/null +++ b/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop @@ -0,0 +1,48 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + }, + { + "name" : "MemRef_C", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_val[] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0, 0] }" + }, + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_user[i0] -> MemRef_C[0] }" + }, + { + "kind" : "read", + "relation" : "[n] -> { Stmt_user[i0] -> MemRef_val[] }" + } + ], + "domain" : "[n] -> { Stmt_user[i0] : 0 <= i0 < n }", + "name" : "Stmt_user", + "schedule" : "[n] -> { Stmt_user[i0] -> [i0, 1] }" + } + ] +} diff --git a/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop.transformed b/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop.transformed new file mode 100644 index 00000000..b49747de --- /dev/null +++ b/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop.transformed @@ -0,0 +1,48 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + }, + { + "name" : "MemRef_C", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0, 0] }" + }, + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_user[i0] -> MemRef_C[0] }" + }, + { + "kind" : "read", + "relation" : "[n] -> { Stmt_user[i0] -> MemRef_val[] }" + } + ], + "domain" : "[n] -> { Stmt_user[i0] : 0 <= i0 < n }", + "name" : "Stmt_user", + "schedule" : "[n] -> { Stmt_user[i0] -> [i0, 1] }" + } + ] +} diff --git a/test/Simplify/overwritten_loadbetween.ll b/test/Simplify/overwritten_loadbetween.ll new file mode 100644 index 00000000..c06263eb --- /dev/null +++ b/test/Simplify/overwritten_loadbetween.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Do not remove overwrites when the value is read before. +; +; for (int j = 0; j < n; j += 1) { +;body: +; A[0] = 21.0; +; val = A[0]; +; A[0] = 42.0; +; +;user: +; B[0] = val; +; } +; +define void @overwritten(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 21.0, double* %A + %val = load double, double* %A + store double 42.0, double* %A + br label %user + + user: + store double %val, double* %B + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: SCoP could not be simplified -- 2.11.4.GIT