build/build-clang/revert-llvmorg-16-init-11301-g163bb6d64e5f_clang_18.patch

   1 From cf00b30288c4c81b2c6a5af01c38f236148777a0 Mon Sep 17 00:00:00 2001
   2 From: Mike Hommey <mh@glandium.org>
   3 Date: Tue, 28 Mar 2023 06:13:36 +0900
   4 Subject: [PATCH] Revert "[Passes][VectorCombine] enable early run generally
   5  and try load folds"
   6
   7 This reverts commit 163bb6d64e5f1220777c3ec2a8b58c0666a74d91.
   8 It causes various reftest regressions.
   9 ---
  10  llvm/lib/Passes/PassBuilderPipelines.cpp          |  7 ++++---
  11  llvm/lib/Transforms/Vectorize/VectorCombine.cpp   |  8 ++------
  12  llvm/test/Other/new-pm-defaults.ll                |  2 +-
  13  .../Other/new-pm-thinlto-postlink-defaults.ll     |  1 -
  14  .../Other/new-pm-thinlto-postlink-pgo-defaults.ll |  1 -
  15  .../new-pm-thinlto-postlink-samplepgo-defaults.ll |  1 -
  16  .../Other/new-pm-thinlto-prelink-pgo-defaults.ll  |  1 -
  17  .../new-pm-thinlto-prelink-samplepgo-defaults.ll  |  1 -
  18  .../PhaseOrdering/X86/vec-load-combine.ll         | 15 +++++++++++----
  19  9 files changed, 18 insertions(+), 19 deletions(-)
  20
  21 diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
  22 index eed29c25714b..b925448cd6c0 100644
  23 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp
  24 +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
  25 @@ -611,9 +611,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
  26    // Delete small array after loop unroll.
  27    FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
  28
  29 -  // Try vectorization/scalarization transforms that are both improvements
  30 -  // themselves and can allow further folds with GVN and InstCombine.
  31 -  FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
  32 +  // The matrix extension can introduce large vector operations early, which can
  33 +  // benefit from running vector-combine early on.
  34 +  if (EnableMatrix)
  35 +    FPM.addPass(VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
  36
  37    // Eliminate redundancies.
  38    FPM.addPass(MergedLoadStoreMotionPass());
  39 diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
  40 index 2e489757ebc1..810a9f92bb7a 100644
  41 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
  42 +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
  43 @@ -1715,23 +1715,6 @@ bool VectorCombine::run() {
  44      bool IsFixedVectorType = isa<FixedVectorType>(I.getType());
  45      auto Opcode = I.getOpcode();
  46
  47 -    // These folds should be beneficial regardless of when this pass is run
  48 -    // in the optimization pipeline.
  49 -    // The type checking is for run-time efficiency. We can avoid wasting time
  50 -    // dispatching to folding functions if there's no chance of matching.
  51 -    if (IsFixedVectorType) {
  52 -      switch (Opcode) {
  53 -      case Instruction::InsertElement:
  54 -        MadeChange |= vectorizeLoadInsert(I);
  55 -        break;
  56 -      case Instruction::ShuffleVector:
  57 -        MadeChange |= widenSubvectorLoad(I);
  58 -        break;
  59 -      default:
  60 -        break;
  61 -      }
  62 -    }
  63 -
  64      // This transform works with scalable and fixed vectors
  65      // TODO: Identify and allow other scalable transforms
  66      if (isa<VectorType>(I.getType())) {
  67 @@ -1753,9 +1736,11 @@ bool VectorCombine::run() {
  68      if (IsFixedVectorType) {
  69        switch (Opcode) {
  70        case Instruction::InsertElement:
  71 +        MadeChange |= vectorizeLoadInsert(I);
  72          MadeChange |= foldInsExtFNeg(I);
  73          break;
  74        case Instruction::ShuffleVector:
  75 +        MadeChange |= widenSubvectorLoad(I);
  76          MadeChange |= foldShuffleOfBinops(I);
  77          MadeChange |= foldSelectShuffle(I);
  78          break;
  79 diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll
  80 index 13612c3bb459..5f84d28af4a6 100644
  81 --- a/llvm/test/Other/new-pm-defaults.ll
  82 +++ b/llvm/test/Other/new-pm-defaults.ll
  83 @@ -186,7 +186,7 @@
  84  ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
  85  ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass
  86  ; CHECK-O-NEXT: Running pass: SROAPass on foo
  87 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
  88 +; CHECK-MATRIX: Running pass: VectorCombinePass
  89  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
  90  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
  91  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
  92 diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
  93 index 3f5d2d5b153d..ea07128c9f6a 100644
  94 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
  95 +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
  96 @@ -159,7 +159,6 @@
  97  ; CHECK-O-NEXT: Running pass: LoopDeletionPass
  98  ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
  99  ; CHECK-O-NEXT: Running pass: SROAPass on foo
 100 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
 101  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
 102  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
 103  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
 104 diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
 105 index 29021ceace54..43e943cb6011 100644
 106 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
 107 +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
 108 @@ -121,7 +121,6 @@
 109  ; CHECK-O-NEXT: Running pass: LoopDeletionPass
 110  ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
 111  ; CHECK-O-NEXT: Running pass: SROAPass on foo
 112 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
 113  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
 114  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
 115  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
 116 diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
 117 index daf3141a1f2c..78914d1c23b2 100644
 118 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
 119 +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
 120 @@ -130,7 +130,6 @@
 121  ; CHECK-O-NEXT: Running pass: LoopDeletionPass
 122  ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
 123  ; CHECK-O-NEXT: Running pass: SROAPass on foo
 124 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
 125  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
 126  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
 127  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
 128 diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
 129 index bfe80902f806..5b62ba39add3 100644
 130 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
 131 +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
 132 @@ -160,7 +160,6 @@
 133  ; CHECK-O-NEXT: Running pass: LoopDeletionPass
 134  ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass
 135  ; CHECK-O-NEXT: Running pass: SROAPass on foo
 136 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
 137  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
 138  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
 139  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
 140 diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
 141 index c7daf7aa46b1..17475423d696 100644
 142 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
 143 +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll
 144 @@ -124,7 +124,6 @@
 145  ; CHECK-O-NEXT: Running pass: IndVarSimplifyPass
 146  ; CHECK-O-NEXT: Running pass: LoopDeletionPass
 147  ; CHECK-O-NEXT: Running pass: SROAPass on foo
 148 -; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass
 149  ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass
 150  ; CHECK-O23SZ-NEXT: Running pass: GVNPass
 151  ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis
 152 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
 153 index 77cbc70ff369..dd7164febea4 100644
 154 --- a/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
 155 +++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-load-combine.ll
 156 @@ -12,13 +12,20 @@ $getAt = comdat any
 157  define dso_local noundef <4 x float> @ConvertVectors_ByRef(ptr noundef nonnull align 16 dereferenceable(16) %0) #0 {
 158  ; SSE-LABEL: @ConvertVectors_ByRef(
 159  ; SSE-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
 160 -; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
 161 -; SSE-NEXT:    ret <4 x float> [[TMP3]]
 162 +; SSE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 1
 163 +; SSE-NEXT:    [[TMP4:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
 164 +; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 165 +; SSE-NEXT:    [[TMP6:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 5, i32 undef>
 166 +; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 5>
 167 +; SSE-NEXT:    ret <4 x float> [[TMP7]]
 168  ;
 169  ; AVX-LABEL: @ConvertVectors_ByRef(
 170  ; AVX-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[TMP0:%.*]], align 16
 171 -; AVX-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
 172 -; AVX-NEXT:    ret <4 x float> [[TMP3]]
 173 +; AVX-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [4 x float], ptr [[TMP0]], i64 0, i64 2
 174 +; AVX-NEXT:    [[TMP4:%.*]] = load float, ptr [[TMP3]], align 8
 175 +; AVX-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i64 2
 176 +; AVX-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP4]], i64 3
 177 +; AVX-NEXT:    ret <4 x float> [[TMP6]]
 178  ;
 179    %2 = alloca ptr, align 8
 180    %3 = alloca <4 x float>, align 16
 181 --
 182 2.39.0.1.g6739ec1790
 183