From 0eac26de911d5eccc305d8491c409421cb5c82ce Mon Sep 17 00:00:00 2001 From: meissner Date: Thu, 3 Jun 2010 00:06:12 +0000 Subject: [PATCH] PR target/44218, improve -mrecip on powerpc git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@160199 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 136 +++ gcc/config/rs6000/altivec.h | 2 + gcc/config/rs6000/altivec.md | 84 +- gcc/config/rs6000/rs6000-builtin.def | 11 +- gcc/config/rs6000/rs6000-c.c | 22 + gcc/config/rs6000/rs6000-protos.h | 5 +- gcc/config/rs6000/rs6000.c | 1178 ++++++++++++++++-------- gcc/config/rs6000/rs6000.h | 40 + gcc/config/rs6000/rs6000.md | 119 ++- gcc/config/rs6000/rs6000.opt | 12 +- gcc/config/rs6000/vector.md | 14 + gcc/config/rs6000/vsx.md | 20 +- gcc/doc/extend.texi | 18 +- gcc/doc/invoke.texi | 60 +- gcc/testsuite/ChangeLog | 13 + gcc/testsuite/gcc.target/powerpc/recip-1.c | 18 + gcc/testsuite/gcc.target/powerpc/recip-2.c | 21 + gcc/testsuite/gcc.target/powerpc/recip-3.c | 22 + gcc/testsuite/gcc.target/powerpc/recip-4.c | 36 + gcc/testsuite/gcc.target/powerpc/recip-5.c | 94 ++ gcc/testsuite/gcc.target/powerpc/recip-6.c | 16 + gcc/testsuite/gcc.target/powerpc/recip-7.c | 16 + gcc/testsuite/gcc.target/powerpc/recip-test.h | 149 +++ gcc/testsuite/gcc.target/powerpc/recip-test2.h | 432 +++++++++ gcc/testsuite/lib/target-supports.exp | 28 + 25 files changed, 2112 insertions(+), 454 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/recip-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/recip-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/recip-3.c create mode 100644 gcc/testsuite/gcc.target/powerpc/recip-4.c create mode 100644 gcc/testsuite/gcc.target/powerpc/recip-5.c create mode 100644 gcc/testsuite/gcc.target/powerpc/recip-6.c create mode 100644 gcc/testsuite/gcc.target/powerpc/recip-7.c create mode 100644 gcc/testsuite/gcc.target/powerpc/recip-test.h create mode 100644 gcc/testsuite/gcc.target/powerpc/recip-test2.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 59008a5ab40..ecfdab1044d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,139 @@ +2010-06-02 Michael Meissner + + PR target/44218 + * doc/invoke.texi (RS/6000 and PowerPC Options): Delete obsolete + -mswdiv option. Add -mrecip, -mrecip=, -mrecip-precision + options. + + * doc/extend.texi (powerpc builtins): Document vec_recip, + vec_rsqrt, vec_rsqrte altivec/vsx builtins. + + * config/rs6000/rs60000-protos.h (rs6000_emit_swdiv): New + function. + (rs6000_emit_swrsqrt): Ditto. + (rs6000_emit_swdivsf): Delete. + (rs6000_emit_swdivdf): Ditto. + (rs6000_emit_swrsqrtsf): Ditto. + + * config/rs6000/rs6000.c (rs6000_recip_bits): New global to + describe the reciprocal estimate support for each type. + (recip_options): Map -mrecip= into option bits. + (gen_2arg_fn_t): New typedef for binary rtx gen function. + (rs6000_debug_reg_global): If -mdebug=reg, print the state of the + reciprocal estimate instructions. + (rs6000_init_hard_regno_mode_ok): Key ws constraint off of the + debug -mvsx-scalar-memory switch instead of -mvsx-scalar-double. + Set up rs6000_recip_bits based on the -mrecip* options. Print the + cost information if -mdebug=cost or -mdebug=reg. + (rs6000_override_options): Set -mrecip-precision for power6, and + power7 machines. If -mvsx or -mdfp, enable various options that + came in previous instruction set ISAs, unless the option was + explicitly disabled by the command line option. Parse + -mrecip= options. + (rs6000_builtin_vectorized_function): Add support for vectorizing + the reciprocal estimate builtins and expansions. + (rs6000_handle_option): Add -mrecip, -mrecip= support. + (bdesc_2arg): Add reciprocal estimate builtins. + (bdesc_1arg): Add reciprocal square root estimate builtins. + (rs6000_expand_builtin): Rewrite to use a switch statement, + instead of multiple if/then/elses. Add reciprocal estimate + builtins. + (rs6000_init_builtins): Create declarations for reciprocal + estimate builtins. + (rs6000_preferred_reload_class): Simplify VSX preferences, if scalar + sized, prefer traditional floating point registers, if integer + vector types, prefer altivec registers. Don't actually look at + the memory address any more. + (rs6000_builtin_reciprocal): Add new builtin reciprocal estimate + builtins. + (rs6000_load_constant_and_splat): New helper function to load up + the constant for reciprocal estimate instructions. + (rs6000_emit_madd): New helper function for generating + multiply/add type instructions, based on the current switches. + (rs6000_emit_msub): Ditto. + (rs6000_emit_mnsub): Ditto. + (rs6000_emit_swdiv_high_precision): Replace rs6000_emit_swdivsf to + replace a divide with a reciprocal estimate and fixup, adding + support for machines with high precision and vectors. + (rs6000_emit_swdiv_low_precision): Rewrite rs6000_emit_swdivdf for + low precision machines. + (rs6000_emit_swdiv): New common function to be called to replace a + division with reciprocal estimate and fixup. + (rs6000_emit_swrsqrt): Replace rs6000_emit_swrsqrtsf. Add support + for double and vector types. Add support for high precision + machines. + + * config/rs6000/rs6000.h (TARGET_FRES): New macro to say whether + the reciprocal estimate instructions can be generated. + (TARGET_FRE): Ditto. + (TARGET_FRSQRTES): Ditto. + (TARGET_FRSQRTE): Ditto. + (RS6000_RECIP_*): New macros for reciprocal estimate support. + + * config/rs6000/vector.md (rsqrte2): New insn for reciprocal + square root estimate on vectors. + (re2): New insn for reciprocal division estimate on vectors. + + * config/rs6000/rs6000-buitlins.def (ALTIVEC_BUILTIN_VRSQRTFP): + New builtin. + (ALTIVEC_BUILTIN_VRECIPFP): Ditto. + (ALTIVEC_BUITLIN_VEC_RE): Ditto. + (ALTIVEC_BUILTIN_VEC_RSQRT): Ditto. + (VSX_BUILTIN_RSQRT_V4SF): Ditto. + (VSX_BUITLIN_RSQRT_V2DF): Ditto. + (RS6000_BUILTIN_RSQRT): Ditto. + (ALTIVEC_BUILTIN_VEC_RSQRTE): Denote that the builtin is a + floating point builtin. + + * config/rs6000/rs6000-c.c (rs6000_cpu_cpp_builtins): Define + macros __RECIP__, __RECIPF__, __RSQRTE__, __RSQRTEF__, + __RECIP_PRECISION__ based on the command line switches. + (altivec_overloaded_builtins): Add reciprocal estimate builtins. + + * config/rs6000/rs6000.opt (-mrecip): Document add support for + replacing division instructions with reciprocal estimate and + fixup. + (-mrecip=): New option. + (-mrecip-precision): Ditto. + + * config/rs6000/vsx.md (UNSPEC_VSX_RSQRTE): Delete. + (vsx_rsqrte2): Use UNSPEC_RSQRT not UNSPEC_VSX_RSQRTE. + (vsx_copysignsf3): If -mvsx, use double precision cpsign on single + precision scalar. + + * config/rs6000/altivec.md (UNSPEC_RSQRTEFP): Delete. + (UNSPEC_VREFP): Ditto. + (altivec_vnmsubfp*): Make altivec nmsub mirror the scalar and VSX + conterparts with regard to support of -mno-fused-madd and + -ffast-math. + (altivec_vrsqrtefp): Use common UNSPEC to allow scalar/vector + reciprocal estimate instructions to be generated. + (altivec_vrefp): Ditto. + + * config/rs6000/rs6000.md (RECIPF): New iterator for reciprocal + estimate support. + (rreg): New mode attribute for reciprocal estimate support. + (recip3): New insn for division using reciprocal estimate + and fixup builtins. + (divide define_split): New define_split to convert floating point + division to use reciprocal estimate if the user used the + appropriate options and the split is run when we can add new + pseudo registers for the fixup. + (rsqrt2): New insn for reciprocal square root support. + (recipsf3): Move into recip3. + (recipdf3): Ditto. + (fres): Use TARGET_FRES. + (rsqrtsf2): Move into rsqrt2. + (rsqrtsf_internal1): Use TARGET_FRSQRTSES. + (copysignsf3): Add support for VSX. + (fred): Use TARGET_FRE. + (fred_fpr): Ditto. + (rsqrtdf_internal1): New function for frsqrte instruciton. + + * config/rs6000/altivec.h (vec_recipdiv): Define new vector + builtin. + (vec_rsqrt): Ditto. + 2010-06-03 Richard Guenther PR middle-end/44291 diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index bc4f30f7cb2..5f4510adc30 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -163,6 +163,8 @@ #define vec_vpkshus __builtin_vec_vpkshus #define vec_re __builtin_vec_re #define vec_round __builtin_vec_round +#define vec_recipdiv __builtin_vec_recipdiv +#define vec_rsqrt __builtin_vec_rsqrt #define vec_rsqrte __builtin_vec_rsqrte #define vec_vsubfp __builtin_vec_vsubfp #define vec_subc __builtin_vec_subc diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 6fbb7cdcdac..7bf3c660312 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -75,9 +75,7 @@ (UNSPEC_VCTSXS 154) (UNSPEC_VLOGEFP 155) (UNSPEC_VEXPTEFP 156) - (UNSPEC_VRSQRTEFP 157) - (UNSPEC_VREFP 158) - ;; 159-162 deleted + ;; 157-162 deleted (UNSPEC_VLSDOI 163) (UNSPEC_VUPKHSB 167) (UNSPEC_VUPKHPX 168) @@ -141,10 +139,11 @@ (UNSPEC_VPERMHI 321) (UNSPEC_INTERHI 322) (UNSPEC_INTERLO 323) - (UNSPEC_VUPKHS_V4SF 324) - (UNSPEC_VUPKLS_V4SF 325) - (UNSPEC_VUPKHU_V4SF 326) - (UNSPEC_VUPKLU_V4SF 327) + (UNSPEC_VUPKHS_V4SF 324) + (UNSPEC_VUPKLS_V4SF 325) + (UNSPEC_VUPKHU_V4SF 326) + (UNSPEC_VUPKLU_V4SF 327) + (UNSPEC_VNMSUBFP 328) ]) (define_constants @@ -628,11 +627,64 @@ }") ;; Fused multiply subtract -(define_insn "altivec_vnmsubfp" +(define_expand "altivec_vnmsubfp" + [(match_operand:V4SF 0 "register_operand" "") + (match_operand:V4SF 1 "register_operand" "") + (match_operand:V4SF 2 "register_operand" "") + (match_operand:V4SF 3 "register_operand" "")] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" +{ + if (TARGET_FUSED_MADD && HONOR_SIGNED_ZEROS (SFmode)) + { + emit_insn (gen_altivec_vnmsubfp_1 (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + else if (TARGET_FUSED_MADD && !HONOR_SIGNED_ZEROS (DFmode)) + { + emit_insn (gen_altivec_vnmsubfp_2 (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } + else + { + emit_insn (gen_altivec_vnmsubfp_3 (operands[0], operands[1], + operands[2], operands[3])); + DONE; + } +}) + +(define_insn "altivec_vnmsubfp_1" [(set (match_operand:V4SF 0 "register_operand" "=v") - (neg:V4SF (minus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v") - (match_operand:V4SF 2 "register_operand" "v")) - (match_operand:V4SF 3 "register_operand" "v"))))] + (neg:V4SF + (minus:V4SF + (mult:V4SF + (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v")) + (match_operand:V4SF 3 "register_operand" "v"))))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD + && HONOR_SIGNED_ZEROS (SFmode)" + "vnmsubfp %0,%1,%2,%3" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vnmsubfp_2" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (minus:V4SF + (match_operand:V4SF 3 "register_operand" "v") + (mult:V4SF + (match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v"))))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode) && TARGET_FUSED_MADD + && !HONOR_SIGNED_ZEROS (SFmode)" + "vnmsubfp %0,%1,%2,%3" + [(set_attr "type" "vecfloat")]) + +(define_insn "altivec_vnmsubfp_3" + [(set (match_operand:V4SF 0 "register_operand" "=v") + (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v") + (match_operand:V4SF 2 "register_operand" "v") + (match_operand:V4SF 3 "register_operand" "v")] + UNSPEC_VNMSUBFP))] "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vnmsubfp %0,%1,%2,%3" [(set_attr "type" "vecfloat")]) @@ -1444,19 +1496,19 @@ "vexptefp %0,%1" [(set_attr "type" "vecfloat")]) -(define_insn "altivec_vrsqrtefp" +(define_insn "*altivec_vrsqrtefp" [(set (match_operand:V4SF 0 "register_operand" "=v") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] - UNSPEC_VRSQRTEFP))] - "TARGET_ALTIVEC" + UNSPEC_RSQRT))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vrsqrtefp %0,%1" [(set_attr "type" "vecfloat")]) (define_insn "altivec_vrefp" [(set (match_operand:V4SF 0 "register_operand" "=v") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] - UNSPEC_VREFP))] - "TARGET_ALTIVEC" + UNSPEC_FRES))] + "VECTOR_UNIT_ALTIVEC_P (V4SFmode)" "vrefp %0,%1" [(set_attr "type" "vecfloat")]) diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 7c5619a8e14..9f45a72e2c0 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -159,6 +159,7 @@ RS6000_BUILTIN(ALTIVEC_BUILTIN_VRFIZ, RS6000_BTC_FP_PURE) RS6000_BUILTIN(ALTIVEC_BUILTIN_VRLB, RS6000_BTC_CONST) RS6000_BUILTIN(ALTIVEC_BUILTIN_VRLH, RS6000_BTC_CONST) RS6000_BUILTIN(ALTIVEC_BUILTIN_VRLW, RS6000_BTC_CONST) +RS6000_BUILTIN(ALTIVEC_BUILTIN_VRSQRTFP, RS6000_BTC_FP_PURE) RS6000_BUILTIN(ALTIVEC_BUILTIN_VRSQRTEFP, RS6000_BTC_FP_PURE) RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLB, RS6000_BTC_CONST) RS6000_BUILTIN(ALTIVEC_BUILTIN_VSLH, RS6000_BTC_CONST) @@ -269,6 +270,7 @@ RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V8HI, RS6000_BTC_CONST) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V16QI, RS6000_BTC_CONST) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_EXT_V4SF, RS6000_BTC_CONST) RS6000_BUILTIN(ALTIVEC_BUILTIN_COPYSIGN_V4SF, RS6000_BTC_CONST) +RS6000_BUILTIN(ALTIVEC_BUILTIN_VRECIPFP, RS6000_BTC_FP_PURE) /* Altivec overloaded builtins. */ /* For now, don't set the classification for overloaded functions. @@ -351,10 +353,12 @@ RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PACKS, RS6000_BTC_MISC) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PACKSU, RS6000_BTC_MISC) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_PERM, RS6000_BTC_MISC) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RE, RS6000_BTC_MISC) +RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RECIP, RS6000_BTC_FP_PURE) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RL, RS6000_BTC_MISC) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RINT, RS6000_BTC_MISC) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_ROUND, RS6000_BTC_MISC) -RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RSQRTE, RS6000_BTC_MISC) +RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RSQRT, RS6000_BTC_FP_PURE) +RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_RSQRTE, RS6000_BTC_FP_PURE) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SEL, RS6000_BTC_MISC) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SL, RS6000_BTC_MISC) RS6000_BUILTIN(ALTIVEC_BUILTIN_VEC_SLD, RS6000_BTC_MISC) @@ -959,6 +963,10 @@ RS6000_BUILTIN(VSX_BUILTIN_VEC_MERGEL_V2DF, RS6000_BTC_CONST) RS6000_BUILTIN(VSX_BUILTIN_VEC_MERGEL_V2DI, RS6000_BTC_CONST) RS6000_BUILTIN(VSX_BUILTIN_VEC_MERGEH_V2DF, RS6000_BTC_CONST) RS6000_BUILTIN(VSX_BUILTIN_VEC_MERGEH_V2DI, RS6000_BTC_CONST) +RS6000_BUILTIN(VSX_BUILTIN_VEC_RSQRT_V4SF, RS6000_BTC_FP_PURE) +RS6000_BUILTIN(VSX_BUILTIN_VEC_RSQRT_V2DF, RS6000_BTC_FP_PURE) +RS6000_BUILTIN(VSX_BUILTIN_RECIP_V4SF, RS6000_BTC_FP_PURE) +RS6000_BUILTIN(VSX_BUILTIN_RECIP_V2DF, RS6000_BTC_FP_PURE) /* VSX overloaded builtins, add the overloaded functions not present in Altivec. */ @@ -991,4 +999,5 @@ RS6000_BUILTIN(POWER7_BUILTIN_BPERMD, RS6000_BTC_CONST) RS6000_BUILTIN(RS6000_BUILTIN_RECIP, RS6000_BTC_FP_PURE) RS6000_BUILTIN(RS6000_BUILTIN_RECIPF, RS6000_BTC_FP_PURE) RS6000_BUILTIN(RS6000_BUILTIN_RSQRTF, RS6000_BTC_FP_PURE) +RS6000_BUILTIN(RS6000_BUILTIN_RSQRT, RS6000_BTC_FP_PURE) RS6000_BUILTIN(RS6000_BUILTIN_BSWAP_HI, RS6000_BTC_CONST) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index ac11336aee9..7a197c1fbcc 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -362,6 +362,16 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile) builtin_define ("__builtin_vsx_xvnmsubasp=__builtin_vsx_xvnmsubsp"); builtin_define ("__builtin_vsx_xvnmsubmsp=__builtin_vsx_xvnmsubsp"); } + if (RS6000_RECIP_HAVE_RE_P (DFmode)) + builtin_define ("__RECIP__"); + if (RS6000_RECIP_HAVE_RE_P (SFmode)) + builtin_define ("__RECIPF__"); + if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)) + builtin_define ("__RSQRTE__"); + if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)) + builtin_define ("__RSQRTEF__"); + if (TARGET_RECIP_PRECISION) + builtin_define ("__RECIP_PRECISION__"); /* Tell users they can use __builtin_bswap{16,64}. */ builtin_define ("__HAVE_BSWAP__"); @@ -479,10 +489,22 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_void, RS6000_BTI_bool_V16QI, 0, 0 }, { ALTIVEC_BUILTIN_VEC_RE, ALTIVEC_BUILTIN_VREFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RE, VSX_BUILTIN_XVREDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_ROUND, ALTIVEC_BUILTIN_VRFIN, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RECIP, ALTIVEC_BUILTIN_VRECIPFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, + { ALTIVEC_BUILTIN_VEC_RECIP, VSX_BUILTIN_RECIP_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_RSQRT, ALTIVEC_BUILTIN_VRSQRTFP, + RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RSQRT, VSX_BUILTIN_VEC_RSQRT_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_RSQRTE, ALTIVEC_BUILTIN_VRSQRTEFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, + { ALTIVEC_BUILTIN_VEC_RSQRTE, VSX_BUILTIN_XVRSQRTEDP, + RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_TRUNC, ALTIVEC_BUILTIN_VRFIZ, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 }, { ALTIVEC_BUILTIN_VEC_TRUNC, VSX_BUILTIN_XVRDPIZ, diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 43ed634495b..3f022862332 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -106,9 +106,8 @@ extern void rs6000_split_compare_and_swap (rtx, rtx, rtx, rtx, rtx); extern void rs6000_expand_compare_and_swapqhi (rtx, rtx, rtx, rtx); extern void rs6000_split_compare_and_swapqhi (rtx, rtx, rtx, rtx, rtx, rtx); extern void rs6000_split_lock_test_and_set (rtx, rtx, rtx, rtx); -extern void rs6000_emit_swdivsf (rtx, rtx, rtx); -extern void rs6000_emit_swdivdf (rtx, rtx, rtx); -extern void rs6000_emit_swrsqrtsf (rtx, rtx); +extern void rs6000_emit_swdiv (rtx, rtx, rtx, bool); +extern void rs6000_emit_swrsqrt (rtx, rtx); extern void output_toc (FILE *, rtx, int, enum machine_mode); extern rtx rs6000_longcall_ref (rtx); extern void rs6000_fatal_bad_address (rtx); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index a7434ca5257..9bfaf54c2a2 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -316,6 +316,61 @@ int rs6000_vector_align[NUM_MACHINE_MODES]; /* Map selected modes to types for builtins. */ static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2]; + +/* What modes to automatically generate reciprocal divide estimate (fre) and + reciprocal sqrt (frsqrte) for. */ +unsigned char rs6000_recip_bits[MAX_MACHINE_MODE]; + +/* Masks to determine which reciprocal esitmate instructions to generate + automatically. */ +enum rs6000_recip_mask { + RECIP_SF_DIV = 0x001, /* Use divide estimate */ + RECIP_DF_DIV = 0x002, + RECIP_V4SF_DIV = 0x004, + RECIP_V2DF_DIV = 0x008, + + RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */ + RECIP_DF_RSQRT = 0x020, + RECIP_V4SF_RSQRT = 0x040, + RECIP_V2DF_RSQRT = 0x080, + + /* Various combination of flags for -mrecip=xxx. */ + RECIP_NONE = 0, + RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV + | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT + | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT), + + RECIP_HIGH_PRECISION = RECIP_ALL, + + /* On low precision machines like the power5, don't enable double precision + reciprocal square root estimate, since it isn't accurate enough. */ + RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT)) +}; + +static unsigned int rs6000_recip_control; +static const char *rs6000_recip_name; + +/* -mrecip options. */ +static struct +{ + const char *string; /* option name */ + unsigned int mask; /* mask bits to set */ +} recip_options[] = { + { "all", RECIP_ALL }, + { "none", RECIP_NONE }, + { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV + | RECIP_V2DF_DIV) }, + { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) }, + { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) }, + { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT + | RECIP_V2DF_RSQRT) }, + { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) }, + { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) }, +}; + +/* 2 argument gen function typedef. */ +typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx); + /* Target cpu costs. */ @@ -1807,6 +1862,27 @@ rs6000_debug_reg_global (void) if (nl) fputs (nl, stderr); + if (rs6000_recip_control) + { + fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control); + + for (m = 0; m < NUM_MACHINE_MODES; ++m) + if (rs6000_recip_bits[m]) + { + fprintf (stderr, + "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n", + GET_MODE_NAME (m), + (RS6000_RECIP_AUTO_RE_P (m) + ? "auto" + : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")), + (RS6000_RECIP_AUTO_RSQRTE_P (m) + ? "auto" + : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none"))); + } + + fputs ("\n", stderr); + } + switch (rs6000_sched_costly_dep) { case max_dep_latency: @@ -2014,8 +2090,9 @@ rs6000_init_hard_regno_mode_ok (void) rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS; rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; - if (TARGET_VSX_SCALAR_DOUBLE) - rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; + rs6000_constraints[RS6000_CONSTRAINT_ws] = (TARGET_VSX_SCALAR_MEMORY + ? VSX_REGS + : FLOAT_REGS); } if (TARGET_ALTIVEC) @@ -2093,8 +2170,111 @@ rs6000_init_hard_regno_mode_ok (void) if (TARGET_E500_DOUBLE) rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1; + /* Calculate which modes to automatically generate code to use a the + reciprocal divide and square root instructions. In the future, possibly + automatically generate the instructions even if the user did not specify + -mrecip. The older machines double precision reciprocal sqrt estimate is + not accurate enough. */ + memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits)); + if (TARGET_FRES) + rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE; + if (TARGET_FRE) + rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE; + if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) + rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE; + if (VECTOR_UNIT_VSX_P (V2DFmode)) + rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE; + + if (TARGET_FRSQRTES) + rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; + if (TARGET_FRSQRTE) + rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; + if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) + rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; + if (VECTOR_UNIT_VSX_P (V2DFmode)) + rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; + + if (rs6000_recip_control) + { + if (!TARGET_FUSED_MADD) + warning (0, "-mrecip requires -mfused-madd"); + if (!flag_finite_math_only) + warning (0, "-mrecip requires -ffinite-math or -ffast-math"); + if (flag_trapping_math) + warning (0, "-mrecip requires -fno-trapping-math or -ffast-math"); + if (!flag_reciprocal_math) + warning (0, "-mrecip requires -freciprocal-math or -ffast-math"); + if (TARGET_FUSED_MADD && flag_finite_math_only && !flag_trapping_math + && flag_reciprocal_math) + { + if (RS6000_RECIP_HAVE_RE_P (SFmode) + && (rs6000_recip_control & RECIP_SF_DIV) != 0) + rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE; + + if (RS6000_RECIP_HAVE_RE_P (DFmode) + && (rs6000_recip_control & RECIP_DF_DIV) != 0) + rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE; + + if (RS6000_RECIP_HAVE_RE_P (V4SFmode) + && (rs6000_recip_control & RECIP_V4SF_DIV) != 0) + rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE; + + if (RS6000_RECIP_HAVE_RE_P (V2DFmode) + && (rs6000_recip_control & RECIP_V2DF_DIV) != 0) + rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE; + + if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode) + && (rs6000_recip_control & RECIP_SF_RSQRT) != 0) + rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; + + if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode) + && (rs6000_recip_control & RECIP_DF_RSQRT) != 0) + rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; + + if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode) + && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0) + rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; + + if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode) + && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0) + rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; + } + } + if (TARGET_DEBUG_REG) rs6000_debug_reg_global (); + + if (TARGET_DEBUG_COST || TARGET_DEBUG_REG) + fprintf (stderr, + "SImode variable mult cost = %d\n" + "SImode constant mult cost = %d\n" + "SImode short constant mult cost = %d\n" + "DImode multipliciation cost = %d\n" + "SImode division cost = %d\n" + "DImode division cost = %d\n" + "Simple fp operation cost = %d\n" + "DFmode multiplication cost = %d\n" + "SFmode division cost = %d\n" + "DFmode division cost = %d\n" + "cache line size = %d\n" + "l1 cache size = %d\n" + "l2 cache size = %d\n" + "simultaneous prefetches = %d\n" + "\n", + rs6000_cost->mulsi, + rs6000_cost->mulsi_const, + rs6000_cost->mulsi_const9, + rs6000_cost->muldi, + rs6000_cost->divsi, + rs6000_cost->divdi, + rs6000_cost->fp, + rs6000_cost->dmul, + rs6000_cost->sdiv, + rs6000_cost->ddiv, + rs6000_cost->cache_line_size, + rs6000_cost->l1_cache_size, + rs6000_cost->l2_cache_size, + rs6000_cost->simultaneous_prefetches); } #if TARGET_MACHO @@ -2271,15 +2451,16 @@ rs6000_override_options (const char *default_cpu) | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND}, {"power6", PROCESSOR_POWER6, POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_PPC_GFXOPT - | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP}, + | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP + | MASK_RECIP_PRECISION}, {"power6x", PROCESSOR_POWER6, POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP - | MASK_MFPGPR}, + | MASK_MFPGPR | MASK_RECIP_PRECISION}, {"power7", PROCESSOR_POWER7, POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD - | MASK_VSX}, /* Don't add MASK_ISEL by default */ + | MASK_VSX| MASK_RECIP_PRECISION}, /* Don't add MASK_ISEL by default */ {"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK}, {"powerpc64", PROCESSOR_POWERPC64, POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64}, @@ -2307,7 +2488,24 @@ rs6000_override_options (const char *default_cpu) | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_ALTIVEC | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW | MASK_DLMZB | MASK_CMPB | MASK_MFPGPR | MASK_DFP - | MASK_POPCNTD | MASK_VSX | MASK_ISEL | MASK_NO_UPDATE) + | MASK_POPCNTD | MASK_VSX | MASK_ISEL | MASK_NO_UPDATE + | MASK_RECIP_PRECISION) + }; + + /* Masks for instructions set at various powerpc ISAs. */ + enum { + ISA_2_1_MASKS = MASK_MFCRF, + ISA_2_2_MASKS = (ISA_2_1_MASKS | MASK_POPCNTB | MASK_FPRND), + + /* For ISA 2.05, do not add MFPGPR, since it isn't in ISA 2.06, and + don't add ALTIVEC, since in general it isn't a win on power6. */ + ISA_2_5_MASKS = (ISA_2_2_MASKS | MASK_CMPB | MASK_RECIP_PRECISION + | MASK_DFP), + + /* For ISA 2.06, don't add ISEL, since in general it isn't a win, but + altivec is a win so enable it. */ + ISA_2_6_MASKS = (ISA_2_5_MASKS | MASK_ALTIVEC | MASK_POPCNTD + | MASK_VSX | MASK_RECIP_PRECISION) }; /* Numerous experiment shows that IRA based loop pressure @@ -2449,10 +2647,17 @@ rs6000_override_options (const char *default_cpu) warning (0, msg); target_flags &= ~ MASK_VSX; } - else if (TARGET_VSX && !TARGET_ALTIVEC) - target_flags |= MASK_ALTIVEC; } + /* For the newer switches (vsx, dfp, etc.) set some of the older options, + unless the user explicitly used the -mno-