test/CodeGen/ARM/domain-conv-vmovs.ll

   1 ; RUN: llc -verify-machineinstrs -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a9 -mattr=+neon,+neonfp -float-abi=hard < %s | FileCheck %s
   2
   3 define <2 x float> @test_vmovs_via_vext_lane0to0(float %arg, <2 x float> %in) {
   4 ; CHECK-LABEL: test_vmovs_via_vext_lane0to0:
   5   %vec = insertelement <2 x float> %in, float %arg, i32 0
   6   %res = fadd <2 x float> %vec, %vec
   7
   8 ; CHECK: vext.32 d1, d1, d0, #1
   9 ; CHECK: vext.32 d1, d1, d1, #1
  10 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
  11
  12   ret <2 x float> %res
  13 }
  14
  15 define <2 x float> @test_vmovs_via_vext_lane0to1(float %arg, <2 x float> %in) {
  16 ; CHECK-LABEL: test_vmovs_via_vext_lane0to1:
  17   %vec = insertelement <2 x float> %in, float %arg, i32 1
  18   %res = fadd <2 x float> %vec, %vec
  19
  20 ; CHECK: vext.32 d1, d1, d1, #1
  21 ; CHECK: vext.32 d1, d1, d0, #1
  22 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
  23
  24   ret <2 x float> %res
  25 }
  26
  27 define <2 x float> @test_vmovs_via_vext_lane1to0(float, float %arg, <2 x float> %in) {
  28 ; CHECK-LABEL: test_vmovs_via_vext_lane1to0:
  29   %vec = insertelement <2 x float> %in, float %arg, i32 0
  30   %res = fadd <2 x float> %vec, %vec
  31
  32 ; CHECK: vext.32 d1, d1, d1, #1
  33 ; CHECK: vext.32 d1, d0, d1, #1
  34 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
  35
  36   ret <2 x float> %res
  37 }
  38
  39 define <2 x float> @test_vmovs_via_vext_lane1to1(float, float %arg, <2 x float> %in) {
  40 ; CHECK-LABEL: test_vmovs_via_vext_lane1to1:
  41   %vec = insertelement <2 x float> %in, float %arg, i32 1
  42   %res = fadd <2 x float> %vec, %vec
  43
  44 ; CHECK: vext.32 d1, d0, d1, #1
  45 ; CHECK: vext.32 d1, d1, d1, #1
  46 ; CHECK: vadd.f32 {{d[0-9]+}}, d1, d1
  47
  48   ret <2 x float> %res
  49 }
  50
  51
  52 define float @test_vmovs_via_vdup(float, float %ret, float %lhs, float %rhs) {
  53 ; CHECK-LABEL: test_vmovs_via_vdup:
  54
  55   ; Do an operation (which will end up NEON because of +neonfp) to convince the
  56   ; execution-domain pass that NEON is a good thing to use.
  57   %res = fadd float %ret, %ret
  58   ;  It makes sense for LLVM to do the addition in d0 here, because it's going
  59   ;  to be returned. This means it will want a "vmov s0, s1":
  60 ; CHECK: vdup.32 d0, d0[1]
  61
  62   ret float %res
  63 }
  64
  65 declare float @llvm.sqrt.f32(float)
  66
  67 declare void @bar()
  68
  69 ; This is a comp
  70 define float @test_ineligible(float, float %in) {
  71 ; CHECK-LABEL: test_ineligible:
  72
  73   %sqrt = call float @llvm.sqrt.f32(float %in)
  74   %val = fadd float %sqrt, %sqrt
  75
  76   ; This call forces a move from a callee-saved register to the return-reg. That
  77   ; move is not eligible for conversion to a d-register instructions because the
  78   ; use-def chains would be messed up. Primarily a compile-test (we used to
  79   ; internal fault).
  80   call void @bar()
  81 ; CHECK: bl bar
  82 ; CHECK: vext.32
  83 ; CHECK: vext.32
  84   ret float %val
  85 }
  86
  87 define i32 @test_vmovs_no_sreg(i32 %in) {
  88 ; CHECK-LABEL: test_vmovs_no_sreg:
  89
  90   ; Check that the movement to and from GPRs takes place in the NEON domain.
  91 ; CHECK: vmov.32 d
  92   %x = bitcast i32 %in to float
  93
  94   %res = fadd float %x, %x
  95
  96 ; CHECK: vmov.32 r{{[0-9]+}}, d
  97   %resi = bitcast float %res to i32
  98
  99   ret i32 %resi
 100 }
 101
 102
 103 ; The point of this test is:
 104 ;   + Make sure s1 is live before the BL
 105 ;   + Make sure s1 is clobbered by the BL
 106 ;   + Convince LLVM to emit a VMOV to S0
 107 ;   + Convince LLVM to domain-convert this.
 108
 109 ; When all of those are satisfied, LLVM should *not* mark s1 as an implicit-use
 110 ; because it's dead.
 111
 112 declare float @clobbers_s1(float, float)
 113
 114 define <2 x float> @test_clobbers_recognised(<2 x float> %invec, float %val) {
 115   %elt = call float @clobbers_s1(float %val, float %val)
 116
 117   %vec = insertelement <2 x float> %invec, float %elt, i32 0
 118   %res = fadd <2 x float> %vec, %vec
 119   ret <2 x float> %res
 120 }