From 45a375eb6b1c03371d7608c9d9f42a146b04d079 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 10 Apr 2019 18:00:41 +0000 Subject: [PATCH] Revert rL357745: [SelectionDAG] Compute known bits of CopyFromReg Certain optimisations from ConstantHoisting and CGP rely on Selection DAG not seeing through to the constant in other blocks. Revert this patch while we come up with a better way to handle that. I will try to follow this up with some better tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358113 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 20 -------------------- lib/Target/X86/X86ISelLowering.cpp | 6 +++--- test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll | 10 ++++++---- test/CodeGen/ARM/atomic-op.ll | 8 ++++---- test/CodeGen/PowerPC/pr35688.ll | 16 +++++++++------- test/CodeGen/SystemZ/subregliveness-04.ll | 2 +- test/CodeGen/X86/fold-tied-op.ll | 2 +- test/CodeGen/X86/pr28444.ll | 5 +++-- 8 files changed, 27 insertions(+), 42 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index caa3f14b8a1..c017d6d6eae 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -31,7 +31,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -3208,25 +3207,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One &= Known2.One; break; } - case ISD::CopyFromReg: { - auto R = cast(Op.getOperand(1)); - const unsigned Reg = R->getReg(); - - const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); - if (!TRI->isVirtualRegister(Reg)) - break; - - const MachineRegisterInfo *MRI = &MF->getRegInfo(); - if (!MRI->hasOneDef(Reg)) - break; - - const FunctionLoweringInfo::LiveOutInfo *LOI = FLI->GetLiveOutRegInfo(Reg); - if (!LOI || LOI->Known.getBitWidth() != BitWidth) - break; - - Known = LOI->Known; - break; - } case ISD::FrameIndex: case ISD::TargetFrameIndex: TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2f5db87090e..190d5708324 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -19595,10 +19595,10 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32))) Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); - // If the operand types disagree, extend or truncate the shift amount to match. - // Since BT ignores high bits (like shifts) we can use anyextend for the extension. + // If the operand types disagree, extend the shift amount to match. Since + // BT ignores high bits (like shifts) we can use anyextend. if (Src.getValueType() != BitNo.getValueType()) - BitNo = DAG.getAnyExtOrTrunc(BitNo, dl, Src.getValueType()); + BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); X86CC = DAG.getConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, dl, MVT::i8); diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll index 4e85ca0cc52..1c450e7c0b9 100644 --- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll @@ -110,8 +110,8 @@ main_body: ;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb: ;CHECK-NOT: s_waitcnt; -;CHECK-NOT: v_or_b32 -;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8 +;CHECK: v_or_b32 +;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) { main_body: %tmp = shl i32 %index, 4 @@ -127,8 +127,10 @@ bb1: ; preds = %main_body ;CHECK-LABEL: {{^}}s_buffer_load_index_across_bb_merged: ;CHECK-NOT: s_waitcnt; -;CHECK-NOT: v_or_b32 -;CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen offset:8 +;CHECK: v_or_b32 +;CHECK: v_or_b32 +;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen +;CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen define amdgpu_ps void @s_buffer_load_index_across_bb_merged(<4 x i32> inreg %desc, i32 %index) { main_body: %tmp = shl i32 %index, 4 diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll index 5f206b6189d..8ab20267a18 100644 --- a/test/CodeGen/ARM/atomic-op.ll +++ b/test/CodeGen/ARM/atomic-op.ll @@ -183,11 +183,11 @@ entry: ret void } -define void @func2(i16 %int_val) nounwind { +define void @func2() nounwind { entry: %val = alloca i16 %old = alloca i16 - store i16 %int_val, i16* %val + store i16 31, i16* %val ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -197,7 +197,7 @@ entry: ; CHECK-BAREMETAL-NOT: __sync %0 = atomicrmw umin i16* %val, i16 16 monotonic store i16 %0, i16* %old - %uneg = sub i16 0, 2 + %uneg = sub i16 0, 1 ; CHECK: ldrex ; CHECK: cmp ; CHECK: strex @@ -249,7 +249,7 @@ entry: ; CHECK-T1-M0: bl ___sync_fetch_and_umin_1 ; CHECK-BAREMETAL: cmp ; CHECK-BAREMETAL-NOT: __sync - %uneg = sub i8 0, 2 + %uneg = sub i8 0, 1 %1 = atomicrmw umin i8* %val, i8 %uneg monotonic store i8 %1, i8* %old ; CHECK: ldrex diff --git a/test/CodeGen/PowerPC/pr35688.ll b/test/CodeGen/PowerPC/pr35688.ll index 7573e871ef4..098573ec1b0 100644 --- a/test/CodeGen/PowerPC/pr35688.ll +++ b/test/CodeGen/PowerPC/pr35688.ll @@ -6,14 +6,16 @@ ; Function Attrs: nounwind define void @ec_GFp_nistp256_points_mul() { ; CHECK-LABEL: ec_GFp_nistp256_points_mul: -; CHECK: ld 4, 0(3) -; CHECK: li 3, 0 -; CHECK: subfic 5, 4, 0 -; CHECK: subfze 5, 3 +; CHECK: ld 5, 0(3) +; CHECK: li 3, 127 +; CHECK: li 4, 0 +; CHECK: subfic 6, 5, 0 +; CHECK: subfze 6, 4 +; CHECK: sradi 7, 6, 63 +; CHECK: srad 6, 6, 3 +; CHECK: subfc 5, 5, 7 +; CHECK: subfe 5, 4, 6 ; CHECK: sradi 5, 5, 63 -; CHECK: subfc 4, 4, 5 -; CHECK: subfe 4, 3, 5 -; CHECK: sradi 4, 4, 63 ; With MemorySSA, everything is taken out of the loop by licm. ; Loads and stores to undef are treated as non-aliasing. diff --git a/test/CodeGen/SystemZ/subregliveness-04.ll b/test/CodeGen/SystemZ/subregliveness-04.ll index cb9ae9bbd43..11ecc9bd9c7 100644 --- a/test/CodeGen/SystemZ/subregliveness-04.ll +++ b/test/CodeGen/SystemZ/subregliveness-04.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -disable-early-taildup -disable-cgp -systemz-subreg-liveness < %s | FileCheck %s ; Check for successful compilation. -; CHECK: lhi {{%r[0-9]+}}, -5 +; CHECK: lhi %r0, -5 target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64" target triple = "s390x-ibm-linux" diff --git a/test/CodeGen/X86/fold-tied-op.ll b/test/CodeGen/X86/fold-tied-op.ll index 6fe1713a541..eb06eb75a4d 100644 --- a/test/CodeGen/X86/fold-tied-op.ll +++ b/test/CodeGen/X86/fold-tied-op.ll @@ -6,8 +6,8 @@ target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386--netbsd" ; CHECK-LABEL: fn1 -; CHECK: addl {{.*#+}} 4-byte Folded Reload ; CHECK: orl {{.*#+}} 4-byte Folded Reload +; CHECK: addl {{.*#+}} 4-byte Folded Reload ; CHECK: xorl {{.*#+}} 4-byte Folded Reload ; CHECK: xorl {{.*#+}} 4-byte Folded Reload ; CHECK: retl diff --git a/test/CodeGen/X86/pr28444.ll b/test/CodeGen/X86/pr28444.ll index 4d7d08afa54..23383209e38 100644 --- a/test/CodeGen/X86/pr28444.ll +++ b/test/CodeGen/X86/pr28444.ll @@ -11,8 +11,9 @@ define void @extractelt_mismatch_vector_element_type(i32 %arg, i1 %x) { ; CHECK-LABEL: extractelt_mismatch_vector_element_type: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: movb $1, (%rax) -; CHECK-NEXT: movb $1, (%rax) +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: movb %al, (%rax) +; CHECK-NEXT: movb %al, (%rax) ; CHECK-NEXT: retq bb: %tmp = icmp ult i32 %arg, 0 -- 2.11.4.GIT