test/CodeGen/AMDGPU/merge-load-store-vreg.mir

   1 # RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,VI %s
   2 # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
   3
   4 # If there's a base offset, check that SILoadStoreOptimizer creates
   5 # V_ADD_{I|U}32_e64 for that offset; _e64 uses a vreg for the carry (rather than
   6 # $vcc, which is used in _e32); this ensures that $vcc is not inadvertently
   7 # clobbered.
   8
   9 # GCN-LABEL: name: ds_combine_base_offset{{$}}
  10
  11 # VI: V_ADD_I32_e64 %6, %0,
  12 # VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8,
  13 # VI: V_ADD_I32_e64 %10, %3,
  14 # VI-NEXT: DS_READ2_B32 killed %11, 0, 8,
  15
  16 # GFX9: V_ADD_U32_e64 %6, %0,
  17 # GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0, %3, 0, 8,
  18 # GFX9: V_ADD_U32_e64 %9, %3,
  19 # GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,
  20
  21 --- |
  22   @0 = internal unnamed_addr addrspace(3) global [256 x float] undef, align 4
  23
  24   define amdgpu_kernel void @ds_combine_base_offset() {
  25     bb.0:
  26       br label %bb2
  27
  28     bb1:
  29       ret void
  30
  31     bb2:
  32       %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
  33       %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
  34       %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
  35       %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
  36       br label %bb1
  37     }
  38
  39   define amdgpu_kernel void @ds_combine_base_offset_subreg() {
  40     bb.0:
  41       br label %bb2
  42
  43     bb1:
  44       ret void
  45
  46     bb2:
  47       %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
  48       %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
  49       %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
  50       %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
  51       br label %bb1
  52     }
  53
  54   define amdgpu_kernel void @ds_combine_subreg() {
  55     bb.0:
  56       br label %bb2
  57
  58     bb1:
  59       ret void
  60
  61     bb2:
  62       %tmp = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @0, i32 0, i32 0
  63       %tmp1 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 8
  64       %tmp2 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 16
  65       %tmp3 = getelementptr inbounds float, float addrspace(3)* %tmp, i32 24
  66       br label %bb1
  67     }
  68 ---
  69 name:            ds_combine_base_offset
  70 body:             |
  71   bb.0:
  72     %0:vgpr_32 = IMPLICIT_DEF
  73     S_BRANCH %bb.2
  74
  75   bb.1:
  76     S_ENDPGM 0
  77
  78   bb.2:
  79     %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0, 0, implicit $exec
  80     %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
  81     V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
  82     DS_WRITE_B32 %0, %0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
  83     %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
  84     DS_WRITE_B32 %0, %3, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
  85     %4:vgpr_32 = DS_READ_B32 %3, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
  86     %5:vgpr_32 = DS_READ_B32 %3, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
  87     $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
  88     S_CBRANCH_VCCNZ %bb.1, implicit $vcc
  89     S_BRANCH %bb.1
  90 ...
  91
  92 # GCN-LABEL: name: ds_combine_base_offset_subreg{{$}}
  93
  94 # VI: V_ADD_I32_e64 %6, %0.sub0,
  95 # VI-NEXT: DS_WRITE2_B32 killed %7, %0.sub0, %3.sub0, 0, 8,
  96 # VI: V_ADD_I32_e64 %10, %3.sub0,
  97 # VI-NEXT: DS_READ2_B32 killed %11, 0, 8,
  98
  99 # GFX9: V_ADD_U32_e64 %6, %0.sub0,
 100 # GFX9-NEXT: DS_WRITE2_B32_gfx9 killed %7, %0.sub0, %3.sub0, 0, 8,
 101 # GFX9: V_ADD_U32_e64 %9, %3.sub0,
 102 # GFX9-NEXT: DS_READ2_B32_gfx9 killed %10, 0, 8,
 103 ---
 104 name:            ds_combine_base_offset_subreg
 105 body:             |
 106   bb.0:
 107     %0:vreg_64 = IMPLICIT_DEF
 108     S_BRANCH %bb.2
 109
 110   bb.1:
 111     S_ENDPGM 0
 112
 113   bb.2:
 114     %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
 115     %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
 116     V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
 117     DS_WRITE_B32 %0.sub0, %0.sub0, 1024, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
 118     undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
 119     DS_WRITE_B32 %0.sub0, %3.sub0, 1056, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
 120     %4:vgpr_32 = DS_READ_B32 %3.sub0, 1088, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
 121     %5:vgpr_32 = DS_READ_B32 %3.sub0, 1120, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
 122     $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
 123     S_CBRANCH_VCCNZ %bb.1, implicit $vcc
 124     S_BRANCH %bb.1
 125 ...
 126
 127 # GCN-LABEL: name: ds_combine_subreg{{$}}
 128
 129 # VI: DS_WRITE2_B32 %0.sub0, %0.sub0, %3.sub0, 0, 8,
 130 # VI: DS_READ2_B32 %3.sub0, 0, 8,
 131
 132 # GFX9: DS_WRITE2_B32_gfx9 %0.sub0, %0.sub0, %3.sub0, 0, 8,
 133 # GFX9: DS_READ2_B32_gfx9 %3.sub0, 0, 8,
 134 ---
 135 name:            ds_combine_subreg
 136 body:             |
 137   bb.0:
 138     %0:vreg_64 = IMPLICIT_DEF
 139     S_BRANCH %bb.2
 140
 141   bb.1:
 142     S_ENDPGM 0
 143
 144   bb.2:
 145     %1:sreg_64_xexec = V_CMP_NE_U32_e64 %0.sub0, 0, implicit $exec
 146     %2:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %1, implicit $exec
 147     V_CMP_NE_U32_e32 1, %2, implicit-def $vcc, implicit $exec
 148     DS_WRITE_B32 %0.sub0, %0.sub0, 0, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp)
 149     undef %3.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
 150     DS_WRITE_B32 %0.sub0, %3.sub0, 32, 0, implicit $m0, implicit $exec :: (store 4 into %ir.tmp1)
 151     %4:vgpr_32 = DS_READ_B32 %3.sub0, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp2)
 152     %5:vgpr_32 = DS_READ_B32 %3.sub0, 32, 0, implicit $m0, implicit $exec :: (load 4 from %ir.tmp3)
 153     $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
 154     S_CBRANCH_VCCNZ %bb.1, implicit $vcc
 155     S_BRANCH %bb.1
 156 ...