From fabb8546e54830051300c70ddcd8a6fce3b7d790 Mon Sep 17 00:00:00 2001 From: rth Date: Fri, 9 Jul 2004 22:35:35 +0000 Subject: [PATCH] * config/i386/i386.c (classify_argument): Treat V1xx modes the same as their base modes. CTImode, TCmode, and XCmode must be passed in memory. TFmode (__float128) must be is an SSE/SSEUP pair. V2SImode, V4HImode, and V8QI are class SSE. All sufficiently small remaining vector modes must be passed in one or two integer registers. (ix86_libcall_value): TFmode must be returned in xmm0, XCmode must be returned in memory. (bdesc_2arg, ix86_init_mmx_sse_builtins): __builtin_ia32_pmuludq and __builtin_ia32_pmuludq128 have non-uniform argument and return types and must thus be handled explicitly. * config/i386/i386.md (*movdi_1_rex64): Add cases for moving between MMX and XMM regs. (movv8qi_internal, movv4hi_internal, movv2si_internal, movv2sf_internal): Permit moving between MMX and XMM registers (since MMX areguments and return values are passed in XMM registers). (sse2_umulsidi3): Correct type and mode. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@84410 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 19 ++++++++++++++ gcc/config/i386/i386.c | 65 +++++++++++++++++++++++++++++++++-------------- gcc/config/i386/i386.md | 67 +++++++++++++++++++++++++++++++++---------------- 3 files changed, 110 insertions(+), 41 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f332652290c..2486648f955 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,22 @@ +2004-04-09 Jan Beulich + + * config/i386/i386.c (classify_argument): Treat V1xx modes the same as + their base modes. CTImode, TCmode, and XCmode must be passed in memory. + TFmode (__float128) must be is an SSE/SSEUP pair. V2SImode, V4HImode, + and V8QI are class SSE. All sufficiently small remaining vector modes + must be passed in one or two integer registers. + (ix86_libcall_value): TFmode must be returned in xmm0, XCmode must be + returned in memory. + (bdesc_2arg, ix86_init_mmx_sse_builtins): __builtin_ia32_pmuludq and + __builtin_ia32_pmuludq128 have non-uniform argument and return types + and must thus be handled explicitly. + * config/i386/i386.md (*movdi_1_rex64): Add cases for moving between + MMX and XMM regs. + (movv8qi_internal, movv4hi_internal, movv2si_internal, + movv2sf_internal): Permit moving between MMX and XMM registers (since + MMX areguments and return values are passed in XMM registers). + (sse2_umulsidi3): Correct type and mode. + 2004-04-09 Richard Henderson * tree-cfg.c (dump_cfg_stats): Fix 64-bit format mismatch errors. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b468e1d0250..a9ebf210910 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2265,6 +2265,11 @@ classify_argument (enum machine_mode mode, tree type, return 0; } + /* for V1xx modes, just use the base mode */ + if (VECTOR_MODE_P (mode) + && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) + mode = GET_MODE_INNER (mode); + /* Classification of atomic types. */ switch (mode) { @@ -2285,9 +2290,7 @@ classify_argument (enum machine_mode mode, tree type, classes[0] = classes[1] = X86_64_INTEGER_CLASS; return 2; case CTImode: - classes[0] = classes[1] = X86_64_INTEGER_CLASS; - classes[2] = classes[3] = X86_64_INTEGER_CLASS; - return 4; + return 0; case SFmode: if (!(bit_offset % 64)) classes[0] = X86_64_SSESF_CLASS; @@ -2302,21 +2305,20 @@ classify_argument (enum machine_mode mode, tree type, classes[1] = X86_64_X87UP_CLASS; return 2; case TFmode: - case TCmode: - return 0; - case XCmode: - classes[0] = X86_64_X87_CLASS; - classes[1] = X86_64_X87UP_CLASS; - classes[2] = X86_64_X87_CLASS; - classes[3] = X86_64_X87UP_CLASS; - return 4; - case DCmode: - classes[0] = X86_64_SSEDF_CLASS; - classes[1] = X86_64_SSEDF_CLASS; + classes[0] = X86_64_SSE_CLASS; + classes[1] = X86_64_SSEUP_CLASS; return 2; case SCmode: classes[0] = X86_64_SSE_CLASS; return 1; + case DCmode: + classes[0] = X86_64_SSEDF_CLASS; + classes[1] = X86_64_SSEDF_CLASS; + return 2; + case XCmode: + case TCmode: + /* These modes are larger than 16 bytes. */ + return 0; case V4SFmode: case V4SImode: case V16QImode: @@ -2330,11 +2332,26 @@ classify_argument (enum machine_mode mode, tree type, case V2SImode: case V4HImode: case V8QImode: - return 0; + classes[0] = X86_64_SSE_CLASS; + return 1; case BLKmode: case VOIDmode: return 0; default: + if (VECTOR_MODE_P (mode)) + { + if (bytes > 16) + return 0; + if (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT) + { + if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) + classes[0] = X86_64_INTEGERSI_CLASS; + else + classes[0] = X86_64_INTEGER_CLASS; + classes[1] = X86_64_INTEGER_CLASS; + return 1 + (bytes > 8); + } + } abort (); } } @@ -2963,11 +2980,11 @@ ix86_libcall_value (enum machine_mode mode) case SCmode: case DFmode: case DCmode: + case TFmode: return gen_rtx_REG (mode, FIRST_SSE_REG); case XFmode: - case XCmode: return gen_rtx_REG (mode, FIRST_FLOAT_REG); - case TFmode: + case XCmode: case TCmode: return NULL; default: @@ -12856,8 +12873,6 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 }, { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 }, - { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 }, @@ -12895,6 +12910,9 @@ static const struct builtin_description bdesc_2arg[] = { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 }, { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 }, + { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 }, + { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 }, { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 }, { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 }, @@ -13290,9 +13308,15 @@ ix86_init_mmx_sse_builtins (void) tree di_ftype_v8qi_v8qi = build_function_type_list (long_long_unsigned_type_node, V8QI_type_node, V8QI_type_node, NULL_TREE); + tree di_ftype_v2si_v2si + = build_function_type_list (long_long_unsigned_type_node, + V2SI_type_node, V2SI_type_node, NULL_TREE); tree v2di_ftype_v16qi_v16qi = build_function_type_list (V2DI_type_node, V16QI_type_node, V16QI_type_node, NULL_TREE); + tree v2di_ftype_v4si_v4si + = build_function_type_list (V2DI_type_node, + V4SI_type_node, V4SI_type_node, NULL_TREE); tree int_ftype_v16qi = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); tree v16qi_ftype_pcchar @@ -13588,6 +13612,9 @@ ix86_init_mmx_sse_builtins (void) def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI); + def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ); + def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128); + def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128); def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128); def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128); diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9ad227734f0..c3b335e5c3e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1963,14 +1963,19 @@ "ix86_split_long_move (operands); DONE;") (define_insn "*movdi_1_rex64" - [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!rm,!*y,!*Y,!rm,!*Y") - (match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,*y,rm,*Y,*Y,rm"))] + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!rm,!*y,!*Y,!rm,!*Y,!*Y,!*y") + (match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,*y,rm,*Y,*Y,rm,*y,*Y"))] "TARGET_64BIT && (TARGET_INTER_UNIT_MOVES || optimize_size) && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (get_attr_type (insn)) { + case TYPE_SSECVT: + if (which_alternative == 11) + return "movq2dq\t{%1, %0|%0, %1}"; + else + return "movdq2q\t{%1, %0|%0, %1}"; case TYPE_SSEMOV: if (get_attr_mode (insn) == MODE_TI) return "movdqa\t{%1, %0|%0, %1}"; @@ -2001,6 +2006,8 @@ (const_string "mmxmov") (eq_attr "alternative" "8,9,10") (const_string "ssemov") + (eq_attr "alternative" "11,12") + (const_string "ssecvt") (eq_attr "alternative" "4") (const_string "multi") (and (ne (symbol_ref "flag_pic") (const_int 0)) @@ -2008,9 +2015,9 @@ (const_string "lea") ] (const_string "imov"))) - (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*") - (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*") - (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI")]) + (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*") + (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*") + (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI,DI,DI")]) (define_insn "*movdi_1_rex64_nointerunit" [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!m,!*y,!*Y,!m,!*Y") @@ -19705,52 +19712,68 @@ }) (define_insn "movv8qi_internal" - [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m") - (match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))] + [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m,!y,!*Y,?*Y,?m") + (match_operand:V8QI 1 "vector_move_operand" "C,ym,y,*Y,y,*Ym,*Y"))] "TARGET_MMX && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ pxor\t%0, %0 movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxmov") + [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov") (set_attr "mode" "DI")]) (define_insn "movv4hi_internal" - [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m") - (match_operand:V4HI 1 "vector_move_operand" "C,ym,y"))] + [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m,!y,!*Y,?*Y,?m") + (match_operand:V4HI 1 "vector_move_operand" "C,ym,y,*Y,y,*Ym,*Y"))] "TARGET_MMX && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ pxor\t%0, %0 movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxmov") + [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov") (set_attr "mode" "DI")]) -(define_insn "movv2si_internal" - [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m") - (match_operand:V2SI 1 "vector_move_operand" "C,ym,y"))] +(define_insn "*movv2si_internal" + [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m,!y,!*Y,?*Y,?m") + (match_operand:V2SI 1 "vector_move_operand" "C,ym,y,*Y,y,*Ym,*Y"))] "TARGET_MMX && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ pxor\t%0, %0 movq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movq\t{%1, %0|%0, %1} movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") + [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov") (set_attr "mode" "DI")]) (define_insn "movv2sf_internal" - [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m") - (match_operand:V2SF 1 "vector_move_operand" "C,ym,y"))] + [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m,!y,!*Y,?*x,?m") + (match_operand:V2SF 1 "vector_move_operand" "C,ym,y,*Y,y,*xm,*x"))] "TARGET_3DNOW && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" "@ pxor\t%0, %0 movq\t{%1, %0|%0, %1} - movq\t{%1, %0|%0, %1}" - [(set_attr "type" "mmxcvt") - (set_attr "mode" "DI")]) + movq\t{%1, %0|%0, %1} + movdq2q\t{%1, %0|%0, %1} + movq2dq\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1} + movlps\t{%1, %0|%0, %1}" + [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov") + (set_attr "mode" "DI,DI,DI,DI,DI,V2SF,V2SF")]) (define_expand "movti" [(set (match_operand:TI 0 "nonimmediate_operand" "") @@ -23069,8 +23092,8 @@ (parallel [(const_int 0)])))))] "TARGET_SSE2" "pmuludq\t{%2, %0|%0, %2}" - [(set_attr "type" "sseimul") - (set_attr "mode" "TI")]) + [(set_attr "type" "mmxmul") + (set_attr "mode" "DI")]) (define_insn "sse2_umulv2siv2di3" [(set (match_operand:V2DI 0 "register_operand" "=x") -- 2.11.4.GIT