From 1113ba513c44eca6af648f7a31858253ce63c5b9 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Sat, 3 Jul 2010 01:15:47 +0000 Subject: [PATCH] Add AVX SSE4.1 binop (some forms of packed max,min,mul,pack,cmp) instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107558 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 26 ++++++++++ test/MC/AsmParser/X86/x86_32-encoding.s | 88 +++++++++++++++++++++++++++++++++ test/MC/AsmParser/X86/x86_64-encoding.s | 88 +++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 206e397953..b5e54e6beb 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4123,6 +4123,32 @@ multiclass SS41I_binop_rm_int opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE41] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, + 0>, VEX_4V; + defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq, + 0>, VEX_4V; + defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, + 0>, VEX_4V; + defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, + 0>, VEX_4V; + defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, + 0>, VEX_4V; + defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, + 0>, VEX_4V; + defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, + 0>, VEX_4V; + defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, + 0>, VEX_4V; + defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, + 0>, VEX_4V; + defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, + 0>, VEX_4V; + defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, + 0>, VEX_4V; +} + let Constraints = "$src1 = $dst" in { let isCommutable = 0 in defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; diff --git a/test/MC/AsmParser/X86/x86_32-encoding.s b/test/MC/AsmParser/X86/x86_32-encoding.s index e1085a3074..28273ee4d2 100644 --- a/test/MC/AsmParser/X86/x86_32-encoding.s +++ b/test/MC/AsmParser/X86/x86_32-encoding.s @@ -11782,3 +11782,91 @@ // CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10] vphminposuw (%eax), %xmm2 +// CHECK: vpackusdw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca] + vpackusdw %xmm2, %xmm3, %xmm1 + +// CHECK: vpackusdw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18] + vpackusdw (%eax), %xmm2, %xmm3 + +// CHECK: vpcmpeqq %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca] + vpcmpeqq %xmm2, %xmm3, %xmm1 + +// CHECK: vpcmpeqq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18] + vpcmpeqq (%eax), %xmm2, %xmm3 + +// CHECK: vpminsb %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca] + vpminsb %xmm2, %xmm3, %xmm1 + +// CHECK: vpminsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18] + vpminsb (%eax), %xmm2, %xmm3 + +// CHECK: vpminsd %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca] + vpminsd %xmm2, %xmm3, %xmm1 + +// CHECK: vpminsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18] + vpminsd (%eax), %xmm2, %xmm3 + +// CHECK: vpminud %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca] + vpminud %xmm2, %xmm3, %xmm1 + +// CHECK: vpminud (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18] + vpminud (%eax), %xmm2, %xmm3 + +// CHECK: vpminuw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca] + vpminuw %xmm2, %xmm3, %xmm1 + +// CHECK: vpminuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18] + vpminuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsb %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca] + vpmaxsb %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxsb (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18] + vpmaxsb (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxsd %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca] + vpmaxsd %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxsd (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18] + vpmaxsd (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxud %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca] + vpmaxud %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxud (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18] + vpmaxud (%eax), %xmm2, %xmm3 + +// CHECK: vpmaxuw %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca] + vpmaxuw %xmm2, %xmm3, %xmm1 + +// CHECK: vpmaxuw (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18] + vpmaxuw (%eax), %xmm2, %xmm3 + +// CHECK: vpmuldq %xmm2, %xmm3, %xmm1 +// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca] + vpmuldq %xmm2, %xmm3, %xmm1 + +// CHECK: vpmuldq (%eax), %xmm2, %xmm3 +// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18] + vpmuldq (%eax), %xmm2, %xmm3 + diff --git a/test/MC/AsmParser/X86/x86_64-encoding.s b/test/MC/AsmParser/X86/x86_64-encoding.s index ad52ffb36a..b676eab740 100644 --- a/test/MC/AsmParser/X86/x86_64-encoding.s +++ b/test/MC/AsmParser/X86/x86_64-encoding.s @@ -1830,3 +1830,91 @@ pshufb CPI1_0(%rip), %xmm1 // CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20] vphminposuw (%rax), %xmm12 +// CHECK: vpackusdw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc] + vpackusdw %xmm12, %xmm13, %xmm11 + +// CHECK: vpackusdw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28] + vpackusdw (%rax), %xmm12, %xmm13 + +// CHECK: vpcmpeqq %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc] + vpcmpeqq %xmm12, %xmm13, %xmm11 + +// CHECK: vpcmpeqq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28] + vpcmpeqq (%rax), %xmm12, %xmm13 + +// CHECK: vpminsb %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc] + vpminsb %xmm12, %xmm13, %xmm11 + +// CHECK: vpminsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28] + vpminsb (%rax), %xmm12, %xmm13 + +// CHECK: vpminsd %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc] + vpminsd %xmm12, %xmm13, %xmm11 + +// CHECK: vpminsd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28] + vpminsd (%rax), %xmm12, %xmm13 + +// CHECK: vpminud %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc] + vpminud %xmm12, %xmm13, %xmm11 + +// CHECK: vpminud (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28] + vpminud (%rax), %xmm12, %xmm13 + +// CHECK: vpminuw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc] + vpminuw %xmm12, %xmm13, %xmm11 + +// CHECK: vpminuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28] + vpminuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsb %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc] + vpmaxsb %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxsb (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28] + vpmaxsb (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxsd %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc] + vpmaxsd %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxsd (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28] + vpmaxsd (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxud %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc] + vpmaxud %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxud (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28] + vpmaxud (%rax), %xmm12, %xmm13 + +// CHECK: vpmaxuw %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc] + vpmaxuw %xmm12, %xmm13, %xmm11 + +// CHECK: vpmaxuw (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28] + vpmaxuw (%rax), %xmm12, %xmm13 + +// CHECK: vpmuldq %xmm12, %xmm13, %xmm11 +// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc] + vpmuldq %xmm12, %xmm13, %xmm11 + +// CHECK: vpmuldq (%rax), %xmm12, %xmm13 +// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28] + vpmuldq (%rax), %xmm12, %xmm13 + -- 2.11.4.GIT