1 From cf00b30288c4c81b2c6a5af01c38f236148777a0 Mon Sep 17 00:00:00 2001
2 From: Mike Hommey <mh@glandium.org>
3 Date: Tue, 28 Mar 2023 06:13:36 +0900
4 Subject: [PATCH] Revert "[Passes][VectorCombine] enable early run generally
7 This reverts commit 163bb6d64e5f1220777c3ec2a8b58c0666a74d91.
8 It causes various reftest regressions.
10 llvm/lib/Passes/PassBuilderPipelines.cpp | 7 ++++---
11 llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 8 ++------
12 llvm/test/Other/new-pm-defaults.ll | 2 +-
13 .../Other/new-pm-thinlto-postlink-defaults.ll | 1 -
14 .../Other/new-pm-thinlto-postlink-pgo-defaults.ll | 1 -
15 .../new-pm-thinlto-postlink-samplepgo-defaults.ll | 1 -
16 .../Other/new-pm-thinlto-prelink-pgo-defaults.ll | 1 -
17 .../new-pm-thinlto-prelink-samplepgo-defaults.ll | 1 -
18 .../PhaseOrdering/X86/vec-load-combine.ll | 15 +++++++++++----
19 9 files changed, 18 insertions(+), 19 deletions(-)
21 diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
22 index eed29c25714b..b925448cd6c0 100644
23 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp
24 +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
25 @@ -611,9 +611,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
26 // Delete small array after loop unroll.
27 FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
29 - // Try vectorization/scalarization transforms that are both improvements
30 - // themselves and can allow further folds with GVN and InstCombine.
31 - FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
32 + // The matrix extension can introduce large vector operations early, which can
33 + // benefit from running vector-combine early on.
35 + FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
37 // Eliminate redundancies.
38 FPM.addPass(MergedLoadStoreMotionPass());
39 diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
40 index 2e489757ebc1..810a9f92bb7a 100644
41 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
42 +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
43 @@ -1715,23 +1715,6 @@ bool VectorCombine::run() {
44 bool IsFixedVectorType = isa<FixedVectorType>(I.getType());
45 auto Opcode = I.getOpcode();
47 - // These folds should be beneficial regardless of when this pass is run
48 - // in the optimization pipeline.
49 - // The type checking is for run-time efficiency. We can avoid wasting time
50 - // dispatching to folding functions if there's no chance of matching.
51 - if (IsFixedVectorType) {
53 - case Instruction::InsertElement:
54 - MadeChange |= vectorizeLoadInsert(I);
56 - case Instruction::ShuffleVector:
57 - MadeChange |= widenSubvectorLoad(I);
64 // This transform works with scalable and fixed vectors
65 // TODO: Identify and allow other scalable transforms
66 if (isa<VectorType>(I.getType())) {
67 @@ -1753,9 +1736,11 @@ bool VectorCombine::run() {
68 if (IsFixedVectorType) {
70 case Instruction::InsertElement:
71 + MadeChange |= vectorizeLoadInsert(I);
72 MadeChange |= foldInsExtFNeg(I);
74 case Instruction::ShuffleVector:
75 + MadeChange |= widenSubvectorLoad(I);
76 MadeChange |= foldShuffleOfBinops(I);
77 MadeChange |= foldSelectShuffle(I);
79 diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
80 index 13612c3bb459..5f84d28af4a6 100644
81 --- a/llvm/test/Other/new-pm-defaults.ll
82 +++ b/llvm/test/Other/new-pm-defaults.ll
84 ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
85 ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass
86 ; CHECK-O-NEXT: Running pass: SROAPass on foo
87 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
88 +; CHECK-MATRIX: Running pass: VectorCombinePass
89 ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
90 ; CHECK-O23SZ-NEXT: Running pass: GVNPass
91 ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
92 diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
93 index 3f5d2d5b153d..ea07128c9f6a 100644
94 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
95 +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
97 ; CHECK-O-NEXT: Running pass: LoopDeletionPass
98 ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
99 ; CHECK-O-NEXT: Running pass: SROAPass on foo
100 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
101 ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
102 ; CHECK-O23SZ-NEXT: Running pass: GVNPass
103 ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
104 diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
105 index 29021ceace54..43e943cb6011 100644
106 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
107 +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
109 ; CHECK-O-NEXT: Running pass: LoopDeletionPass
110 ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
111 ; CHECK-O-NEXT: Running pass: SROAPass on foo
112 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
113 ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
114 ; CHECK-O23SZ-NEXT: Running pass: GVNPass
115 ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
116 diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
117 index daf3141a1f2c..78914d1c23b2 100644
118 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
119 +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
121 ; CHECK-O-NEXT: Running pass: LoopDeletionPass
122 ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
123 ; CHECK-O-NEXT: Running pass: SROAPass on foo
124 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
125 ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
126 ; CHECK-O23SZ-NEXT: Running pass: GVNPass
127 ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
128 diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
129 index bfe80902f806..5b62ba39add3 100644
130 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
131 +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
133 ; CHECK-O-NEXT: Running pass: LoopDeletionPass
134 ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
135 ; CHECK-O-NEXT: Running pass: SROAPass on foo
136 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
137 ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
138 ; CHECK-O23SZ-NEXT: Running pass: GVNPass
139 ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
140 diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
141 index c7daf7aa46b1..17475423d696 100644
142 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
143 +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
145 ; CHECK-O-NEXT: Running pass: IndVarSimplifyPass
146 ; CHECK-O-NEXT: Running pass: LoopDeletionPass
147 ; CHECK-O-NEXT: Running pass: SROAPass on foo
148 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
149 ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
150 ; CHECK-O23SZ-NEXT: Running pass: GVNPass
151 ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
152 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
153 index 77cbc70ff369..dd7164febea4 100644
154 --- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
155 +++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
156 @@ -12,13 +12,20 @@ $getAt = comdat any
157 define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) #0 {
158 ; SSE-LABEL: @ConvertVectors_ByRef(
159 ; SSE-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
160 -; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
161 -; SSE-NEXT: ret <4 x float> [[TMP3]]
162 +; SSE-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 1
163 +; SSE-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
164 +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
165 +; SSE-NEXT: [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
166 +; SSE-NEXT: [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
167 +; SSE-NEXT: ret <4 x float> [[TMP7]]
169 ; AVX-LABEL: @ConvertVectors_ByRef(
170 ; AVX-NEXT: [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
171 -; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
172 -; AVX-NEXT: ret <4 x float> [[TMP3]]
173 +; AVX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 2
174 +; AVX-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP3]], align 8
175 +; AVX-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i64 2
176 +; AVX-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP4]], i64 3
177 +; AVX-NEXT: ret <4 x float> [[TMP6]]
179 %2 = alloca ptr, align 8
180 %3 = alloca <4 x float>, align 16