test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt %s -instcombine -S | FileCheck %s
   3
   4 ; Given pattern:
   5 ;   (trunc (x << Q) to iDst) << K
   6 ; we should rewrite it as
   7 ;   (trunc (x << (Q+K)) to iDst)  iff (Q+K) u< iDst
   8 ; This is only valid for shl.
   9 ; THIS FOLD DOES *NOT* REQUIRE ANY 'nuw'/`nsw` FLAGS!
  10
  11 ; Basic scalar test
  12
  13 define i16 @t0(i32 %x, i16 %y) {
  14 ; CHECK-LABEL: @t0(
  15 ; CHECK-NEXT:    [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16
  16 ; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[X_TR]], 8
  17 ; CHECK-NEXT:    ret i16 [[T5]]
  18 ;
  19   %t0 = sub i16 32, %y
  20   %t1 = zext i16 %t0 to i32
  21   %t2 = shl i32 %x, %t1
  22   %t3 = trunc i32 %t2 to i16
  23   %t4 = add i16 %y, -24
  24   %t5 = shl i16 %t3, %t4
  25   ret i16 %t5
  26 }
  27
  28 define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
  29 ; CHECK-LABEL: @t1_vec_splat(
  30 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 8>
  31 ; CHECK-NEXT:    [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
  32 ; CHECK-NEXT:    ret <2 x i16> [[T5]]
  33 ;
  34   %t0 = sub <2 x i16> <i16 32, i16 32>, %y
  35   %t1 = zext <2 x i16> %t0 to <2 x i32>
  36   %t2 = shl <2 x i32> %x, %t1
  37   %t3 = trunc <2 x i32> %t2 to <2 x i16>
  38   %t4 = add <2 x i16> %y, <i16 -24, i16 -24>
  39   %t5 = shl <2 x i16> %t3, %t4
  40   ret <2 x i16> %t5
  41 }
  42
  43 define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) {
  44 ; CHECK-LABEL: @t2_vec_nonsplat(
  45 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 8, i32 30>
  46 ; CHECK-NEXT:    [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
  47 ; CHECK-NEXT:    ret <2 x i16> [[T5]]
  48 ;
  49   %t0 = sub <2 x i16> <i16 32, i16 30>, %y
  50   %t1 = zext <2 x i16> %t0 to <2 x i32>
  51   %t2 = shl <2 x i32> %x, %t1
  52   %t3 = trunc <2 x i32> %t2 to <2 x i16>
  53   %t4 = add <2 x i16> %y, <i16 -24, i16 0>
  54   %t5 = shl <2 x i16> %t3, %t4
  55   ret <2 x i16> %t5
  56 }
  57
  58 ; Basic vector tests
  59
  60 define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) {
  61 ; CHECK-LABEL: @t3_vec_nonsplat_undef0(
  62 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
  63 ; CHECK-NEXT:    [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
  64 ; CHECK-NEXT:    ret <3 x i16> [[T5]]
  65 ;
  66   %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
  67   %t1 = zext <3 x i16> %t0 to <3 x i32>
  68   %t2 = shl <3 x i32> %x, %t1
  69   %t3 = trunc <3 x i32> %t2 to <3 x i16>
  70   %t4 = add <3 x i16> %y, <i16 -24, i16 -24, i16 -24>
  71   %t5 = shl <3 x i16> %t3, %t4
  72   ret <3 x i16> %t5
  73 }
  74
  75 define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
  76 ; CHECK-LABEL: @t4_vec_nonsplat_undef1(
  77 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
  78 ; CHECK-NEXT:    [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
  79 ; CHECK-NEXT:    ret <3 x i16> [[T5]]
  80 ;
  81   %t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y
  82   %t1 = zext <3 x i16> %t0 to <3 x i32>
  83   %t2 = shl <3 x i32> %x, %t1
  84   %t3 = trunc <3 x i32> %t2 to <3 x i16>
  85   %t4 = add <3 x i16> %y, <i16 -24, i16 undef, i16 -24>
  86   %t5 = shl <3 x i16> %t3, %t4
  87   ret <3 x i16> %t5
  88 }
  89
  90 define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) {
  91 ; CHECK-LABEL: @t5_vec_nonsplat_undef1(
  92 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], <i32 8, i32 0, i32 8>
  93 ; CHECK-NEXT:    [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16>
  94 ; CHECK-NEXT:    ret <3 x i16> [[T5]]
  95 ;
  96   %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y
  97   %t1 = zext <3 x i16> %t0 to <3 x i32>
  98   %t2 = shl <3 x i32> %x, %t1
  99   %t3 = trunc <3 x i32> %t2 to <3 x i16>
 100   %t4 = add <3 x i16> %y, <i16 -24, i16 undef, i16 -24>
 101   %t5 = shl <3 x i16> %t3, %t4
 102   ret <3 x i16> %t5
 103 }
 104
 105 ; One-use tests
 106
 107 declare void @use16(i16)
 108 declare void @use32(i32)
 109
 110 define i16 @t6_extrause0(i32 %x, i16 %y) {
 111 ; CHECK-LABEL: @t6_extrause0(
 112 ; CHECK-NEXT:    [[T0:%.*]] = sub i16 32, [[Y:%.*]]
 113 ; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
 114 ; CHECK-NEXT:    [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
 115 ; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
 116 ; CHECK-NEXT:    call void @use16(i16 [[T3]])
 117 ; CHECK-NEXT:    [[X_TR:%.*]] = trunc i32 [[X]] to i16
 118 ; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[X_TR]], 8
 119 ; CHECK-NEXT:    ret i16 [[T5]]
 120 ;
 121   %t0 = sub i16 32, %y
 122   %t1 = zext i16 %t0 to i32
 123   %t2 = shl i32 %x, %t1
 124   %t3 = trunc i32 %t2 to i16
 125   %t4 = add i16 %y, -24
 126   call void @use16(i16 %t3)
 127   %t5 = shl i16 %t3, %t4
 128   ret i16 %t5
 129 }
 130
 131 define i16 @t7_extrause1(i32 %x, i16 %y) {
 132 ; CHECK-LABEL: @t7_extrause1(
 133 ; CHECK-NEXT:    [[T4:%.*]] = add i16 [[Y:%.*]], -24
 134 ; CHECK-NEXT:    call void @use16(i16 [[T4]])
 135 ; CHECK-NEXT:    [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16
 136 ; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[X_TR]], 8
 137 ; CHECK-NEXT:    ret i16 [[T5]]
 138 ;
 139   %t0 = sub i16 32, %y
 140   %t1 = zext i16 %t0 to i32
 141   %t2 = shl i32 %x, %t1
 142   %t3 = trunc i32 %t2 to i16
 143   %t4 = add i16 %y, -24
 144   call void @use16(i16 %t4)
 145   %t5 = shl i16 %t3, %t4
 146   ret i16 %t5
 147 }
 148
 149 define i16 @t8_extrause2(i32 %x, i16 %y) {
 150 ; CHECK-LABEL: @t8_extrause2(
 151 ; CHECK-NEXT:    [[T0:%.*]] = sub i16 32, [[Y:%.*]]
 152 ; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
 153 ; CHECK-NEXT:    [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
 154 ; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
 155 ; CHECK-NEXT:    [[T4:%.*]] = add i16 [[Y]], -24
 156 ; CHECK-NEXT:    call void @use16(i16 [[T3]])
 157 ; CHECK-NEXT:    call void @use16(i16 [[T4]])
 158 ; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[T3]], [[T4]]
 159 ; CHECK-NEXT:    ret i16 [[T5]]
 160 ;
 161   %t0 = sub i16 32, %y
 162   %t1 = zext i16 %t0 to i32
 163   %t2 = shl i32 %x, %t1
 164   %t3 = trunc i32 %t2 to i16
 165   %t4 = add i16 %y, -24
 166   call void @use16(i16 %t3)
 167   call void @use16(i16 %t4)
 168   %t5 = shl i16 %t3, %t4
 169   ret i16 %t5
 170 }
 171
 172 ; No 'nuw'/'nsw' flags are to be propagated!
 173 ; But we can't test that, such IR does not reach that code.
 174
 175 ; Negative tests
 176
 177 ; Can't fold, total shift would be 32
 178 define i16 @n11(i32 %x, i16 %y) {
 179 ; CHECK-LABEL: @n11(
 180 ; CHECK-NEXT:    [[T0:%.*]] = sub i16 30, [[Y:%.*]]
 181 ; CHECK-NEXT:    [[T1:%.*]] = zext i16 [[T0]] to i32
 182 ; CHECK-NEXT:    [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]]
 183 ; CHECK-NEXT:    [[T3:%.*]] = trunc i32 [[T2]] to i16
 184 ; CHECK-NEXT:    [[T4:%.*]] = add i16 [[Y]], -31
 185 ; CHECK-NEXT:    [[T5:%.*]] = shl i16 [[T3]], [[T4]]
 186 ; CHECK-NEXT:    ret i16 [[T5]]
 187 ;
 188   %t0 = sub i16 30, %y
 189   %t1 = zext i16 %t0 to i32
 190   %t2 = shl i32 %x, %t1
 191   %t3 = trunc i32 %t2 to i16
 192   %t4 = add i16 %y, -31
 193   %t5 = shl i16 %t3, %t4
 194   ret i16 %t5
 195 }
 196
 197 ; Bit width mismatch of shit amount
 198
 199 @Y32 = global i32 42
 200 @Y16 = global i16 42
 201 define i16 @t01(i32 %x) {
 202 ; CHECK-LABEL: @t01(
 203 ; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], ptrtoint (i32* @Y32 to i32)
 204 ; CHECK-NEXT:    [[T1:%.*]] = trunc i32 [[T0]] to i16
 205 ; CHECK-NEXT:    [[T2:%.*]] = shl i16 [[T1]], ptrtoint (i16* @Y16 to i16)
 206 ; CHECK-NEXT:    ret i16 [[T2]]
 207 ;
 208   %t0 = shl i32 %x, ptrtoint (i32* @Y32 to i32)
 209   %t1 = trunc i32 %t0 to i16
 210   %t2 = shl i16 %t1, ptrtoint (i16* @Y16 to i16)
 211   ret i16 %t2
 212 }