test/CodeGen/X86/v2f32.ll

   1 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64
   2 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=W64
   3 ; RUN: llc < %s -mcpu=yonah -march=x86 -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32
   4
   5 ; PR7518
   6 define void @test1(<2 x float> %Q, float *%P2) nounwind {
   7   %a = extractelement <2 x float> %Q, i32 0
   8   %b = extractelement <2 x float> %Q, i32 1
   9   %c = fadd float %a, %b
  10
  11   store float %c, float* %P2
  12   ret void
  13 ; X64: test1:
  14 ; X64-NEXT: pshufd      $1, %xmm0, %xmm1
  15 ; X64-NEXT: addss       %xmm0, %xmm1
  16 ; X64-NEXT: movss       %xmm1, (%rdi)
  17 ; X64-NEXT: ret
  18
  19 ; W64: test1:
  20 ; W64-NEXT: movdqa  (%rcx), %xmm0
  21 ; W64-NEXT: pshufd  $1, %xmm0, %xmm1
  22 ; W64-NEXT: addss   %xmm0, %xmm1
  23 ; W64-NEXT: movss   %xmm1, (%rdx)
  24 ; W64-NEXT: ret
  25
  26 ; X32: test1:
  27 ; X32-NEXT: pshufd      $1, %xmm0, %xmm1
  28 ; X32-NEXT: addss       %xmm0, %xmm1
  29 ; X32-NEXT: movl        4(%esp), %eax
  30 ; X32-NEXT: movss       %xmm1, (%eax)
  31 ; X32-NEXT: ret
  32 }
  33
  34
  35 define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind {
  36   %Z = fadd <2 x float> %Q, %R
  37   ret <2 x float> %Z
  38
  39 ; X64: test2:
  40 ; X64-NEXT: addps       %xmm1, %xmm0
  41 ; X64-NEXT: ret
  42
  43 ; W64: test2:
  44 ; W64-NEXT: movaps  (%rcx), %xmm0
  45 ; W64-NEXT: addps   (%rdx), %xmm0
  46 ; W64-NEXT: ret
  47
  48 ; X32: test2:
  49 ; X32:      addps       %xmm1, %xmm0
  50 }
  51
  52
  53 define <2 x float> @test3(<4 x float> %A) nounwind {
  54         %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
  55         %C = fadd <2 x float> %B, %B
  56         ret <2 x float> %C
  57 ; X64: test3:
  58 ; X64-NEXT: addps       %xmm0, %xmm0
  59 ; X64-NEXT: ret
  60
  61 ; W64: test3:
  62 ; W64-NEXT: movaps  (%rcx), %xmm0
  63 ; W64-NEXT: addps   %xmm0, %xmm0
  64 ; W64-NEXT: ret
  65
  66 ; X32: test3:
  67 ; X32-NEXT: addps       %xmm0, %xmm0
  68 ; X32-NEXT: ret
  69 }
  70
  71 define <2 x float> @test4(<2 x float> %A) nounwind {
  72         %C = fadd <2 x float> %A, %A
  73         ret <2 x float> %C
  74 ; X64: test4:
  75 ; X64-NEXT: addps       %xmm0, %xmm0
  76 ; X64-NEXT: ret
  77
  78 ; W64: test4:
  79 ; W64-NEXT: movaps  (%rcx), %xmm0
  80 ; W64-NEXT: addps   %xmm0, %xmm0
  81 ; W64-NEXT: ret
  82
  83 ; X32: test4:
  84 ; X32-NEXT: addps       %xmm0, %xmm0
  85 ; X32-NEXT: ret
  86 }
  87
  88 define <4 x float> @test5(<4 x float> %A) nounwind {
  89         %B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
  90         %C = fadd <2 x float> %B, %B
  91         br label %BB
  92
  93 BB:
  94         %D = fadd <2 x float> %C, %C
  95         %E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
  96         ret <4 x float> %E
  97
  98 ; X64: test5:
  99 ; X64-NEXT: addps       %xmm0, %xmm0
 100 ; X64-NEXT: addps       %xmm0, %xmm0
 101 ; X64-NEXT: ret
 102
 103 ; W64: test5:
 104 ; W64-NEXT: movaps  (%rcx), %xmm0
 105 ; W64-NEXT: addps   %xmm0, %xmm0
 106 ; W64-NEXT: addps   %xmm0, %xmm0
 107 ; W64-NEXT: ret
 108
 109 ; X32: test5:
 110 ; X32-NEXT: addps       %xmm0, %xmm0
 111 ; X32-NEXT: addps       %xmm0, %xmm0
 112 ; X32-NEXT: ret
 113 }
 114
 115