From f2ccdf8e9d23d8304bd9bd45d974b96ca98eaefc Mon Sep 17 00:00:00 2001 From: wschmidt Date: Tue, 5 Jul 2016 17:50:22 +0000 Subject: [PATCH] [gcc] 2016-07-05 Michael Meissner Bill Schmidt * config/rs6000/rs6000-protos.h (rs6000_split_signbit): New prototype. * config/rs6000/rs6000.c (rs6000_split_signbit): New function. * config/rs6000/rs6000.md (UNSPEC_SIGNBIT): New constant. (SIGNBIT): New mode iterator. (Fsignbit): New mode attribute. (signbit2): Change operand1 to match FLOAT128 instead of IBM128; dispatch to gen_signbit{kf,tf}2_dm for __float128 when direct moves are available. (signbit2_dm): New define_insn_and_split). (signbit2_dm2): New define_insn. [gcc/testsuite] 2016-07-05 Michael Meissner Bill Schmidt * gcc.target/powerpc/signbit-1.c: New test. * gcc.target/powerpc/signbit-2.c: New test. * gcc.target/powerpc/signbit-3.c: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@238016 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 15 +++ gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 42 +++++++ gcc/config/rs6000/rs6000.md | 55 ++++++++- gcc/testsuite/ChangeLog | 7 ++ gcc/testsuite/gcc.target/powerpc/signbit-1.c | 16 +++ gcc/testsuite/gcc.target/powerpc/signbit-2.c | 18 +++ gcc/testsuite/gcc.target/powerpc/signbit-3.c | 172 +++++++++++++++++++++++++++ 8 files changed, 324 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/signbit-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/signbit-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/signbit-3.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index f189d6a6594..f3099046a83 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2016-07-05 Michael Meissner + Bill Schmidt + + * config/rs6000/rs6000-protos.h (rs6000_split_signbit): New + prototype. + * config/rs6000/rs6000.c (rs6000_split_signbit): New function. + * config/rs6000/rs6000.md (UNSPEC_SIGNBIT): New constant. + (SIGNBIT): New mode iterator. + (Fsignbit): New mode attribute. + (signbit2): Change operand1 to match FLOAT128 instead of + IBM128; dispatch to gen_signbit{kf,tf}2_dm for __float128 + when direct moves are available. + (signbit2_dm): New define_insn_and_split). + (signbit2_dm2): New define_insn. + 2016-07-05 Kyrylo Tkachov PR rtl-optimization/71594 diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 6b4d17801d0..0a47075d6bf 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -135,6 +135,7 @@ extern bool rs6000_emit_set_const (rtx, rtx); extern int rs6000_emit_cmove (rtx, rtx, rtx, rtx); extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx); +extern void rs6000_split_signbit (rtx, rtx); extern void rs6000_expand_atomic_compare_and_swap (rtx op[]); extern void rs6000_expand_atomic_exchange (rtx op[]); extern void rs6000_expand_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index dd77e1b15aa..d2a62bdd72b 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -23145,6 +23145,48 @@ rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) emit_move_insn (dest, target); } +/* Split a signbit operation on 64-bit machines with direct move. Also allow + for the value to come from memory or if it is already loaded into a GPR. */ + +void +rs6000_split_signbit (rtx dest, rtx src) +{ + machine_mode d_mode = GET_MODE (dest); + machine_mode s_mode = GET_MODE (src); + rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest); + rtx shift_reg = dest_di; + + gcc_assert (REG_P (dest)); + gcc_assert (REG_P (src) || MEM_P (src)); + gcc_assert (s_mode == KFmode || s_mode == TFmode); + + if (MEM_P (src)) + { + rtx mem = (WORDS_BIG_ENDIAN + ? adjust_address (src, DImode, 0) + : adjust_address (src, DImode, 8)); + emit_insn (gen_rtx_SET (dest_di, mem)); + } + + else + { + unsigned int r = REGNO (src); + + /* If this is a VSX register, generate the special mfvsrd instruction + to get it in a GPR. Until we support SF and DF modes, that will + always be true. */ + gcc_assert (VSX_REGNO_P (r)); + + if (s_mode == KFmode) + emit_insn (gen_signbitkf2_dm2 (dest_di, src)); + else + emit_insn (gen_signbittf2_dm2 (dest_di, src)); + } + + emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63))); + return; +} + /* A subroutine of the atomic operation splitters. Jump to LABEL if COND is true. Mark the jump as unlikely to be taken. */ diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index a7615b1964b..46f73823392 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -147,6 +147,7 @@ UNSPEC_ROUND_TO_ODD UNSPEC_IEEE128_MOVE UNSPEC_IEEE128_CONVERT + UNSPEC_SIGNBIT ]) ;; @@ -508,6 +509,13 @@ (IF "TARGET_FLOAT128") (TF "TARGET_LONG_DOUBLE_128")]) +; Iterator for signbit on 64-bit machines with direct move +(define_mode_iterator SIGNBIT [(KF "FLOAT128_VECTOR_P (KFmode)") + (TF "FLOAT128_VECTOR_P (TFmode)")]) + +(define_mode_attr Fsignbit [(KF "wa") + (TF "wa")]) + ; Iterator for ISA 3.0 supported floating point types (define_mode_iterator FP_ISA3 [SF DF @@ -4567,7 +4575,7 @@ ;; when little-endian. (define_expand "signbit2" [(set (match_dup 2) - (float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" ""))) + (float_truncate:DF (match_operand:FLOAT128 1 "gpc_reg_operand" ""))) (set (match_dup 3) (subreg:DI (match_dup 2) 0)) (set (match_dup 4) @@ -4575,8 +4583,20 @@ (set (match_operand:SI 0 "gpc_reg_operand" "") (match_dup 6))] "TARGET_HARD_FLOAT - && (TARGET_FPRS || TARGET_E500_DOUBLE)" + && (TARGET_FPRS || TARGET_E500_DOUBLE) + && (!FLOAT128_IEEE_P (mode) + || (TARGET_POWERPC64 && TARGET_DIRECT_MOVE))" { + if (FLOAT128_IEEE_P (mode)) + { + if (mode == KFmode) + emit_insn (gen_signbitkf2_dm (operands[0], operands[1])); + else if (mode == TFmode) + emit_insn (gen_signbittf2_dm (operands[0], operands[1])); + else + gcc_unreachable (); + DONE; + } operands[2] = gen_reg_rtx (DFmode); operands[3] = gen_reg_rtx (DImode); if (TARGET_POWERPC64) @@ -4624,6 +4644,37 @@ operands[5] = CONST0_RTX (mode); }) +;; Optimize signbit on 64-bit systems with direct move to avoid doing the store +;; and load. +(define_insn_and_split "signbit2_dm" + [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r,r") + (unspec:SI + [(match_operand:SIGNBIT 1 "input_operand" ",m,r")] + UNSPEC_SIGNBIT))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "#" + "&& reload_completed" + [(const_int 0)] +{ + rs6000_split_signbit (operands[0], operands[1]); + DONE; +} + [(set_attr "length" "8,8,12") + (set_attr "type" "mftgpr,load,integer")]) + +;; MODES_TIEABLE_P doesn't allow DImode to be tied with the various floating +;; point types, which makes normal SUBREG's problematical. Instead use a +;; special pattern to avoid using a normal movdi. +(define_insn "signbit2_dm2" + [(set (match_operand:DI 0 "gpc_reg_operand" "=r") + (unspec:DI [(match_operand:SIGNBIT 1 "gpc_reg_operand" "") + (const_int 0)] + UNSPEC_SIGNBIT))] + "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" + "mfvsrd %0,%x1" + [(set_attr "type" "mftgpr")]) + + ;; Use an unspec rather providing an if-then-else in RTL, to prevent the ;; compiler from optimizing -0.0 (define_insn "copysign3_fcpsgn" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 13e3d68e055..f6fb6cd7e9f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2016-07-05 Michael Meissner + Bill Schmidt + + * gcc.target/powerpc/signbit-1.c: New test. + * gcc.target/powerpc/signbit-2.c: New test. + * gcc.target/powerpc/signbit-3.c: New test. + 2016-07-05 Kyrylo Tkachov PR rtl-optimization/71594 diff --git a/gcc/testsuite/gcc.target/powerpc/signbit-1.c b/gcc/testsuite/gcc.target/powerpc/signbit-1.c new file mode 100644 index 00000000000..bdfeb702663 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/signbit-1.c @@ -0,0 +1,16 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */ +/* { dg-options "-mcpu=power8 -O2 -mfloat128" } */ + +int do_signbit_kf (__float128 a) { return __builtin_signbit (a); } +int do_signbit_if (__ibm128 a) { return __builtin_signbit (a); } +int do_signbit_tf (long double a) { return __builtin_signbit (a); } + +/* { dg-final { scan-assembler-not "stxvd2x" } } */ +/* { dg-final { scan-assembler-not "stxvw4x" } } */ +/* { dg-final { scan-assembler-not "stxsd" } } */ +/* { dg-final { scan-assembler-not "stxsdx" } } */ +/* { dg-final { scan-assembler-times "mfvsrd" 3 } } */ +/* { dg-final { scan-assembler-times "srdi" 3 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/signbit-2.c b/gcc/testsuite/gcc.target/powerpc/signbit-2.c new file mode 100644 index 00000000000..b5bd856d909 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/signbit-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } } */ +/* { dg-require-effective-target powerpc_p9vector_ok } */ +/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */ +/* { dg-options "-mcpu=power9 -O2 -mfloat128" } */ + +int do_signbit_kf (__float128 *a) { return __builtin_signbit (*a); } + +/* { dg-final { scan-assembler-not "stxvd2x" } } */ +/* { dg-final { scan-assembler-not "stxvw4x" } } */ +/* { dg-final { scan-assembler-not "stxsd" } } */ +/* { dg-final { scan-assembler-not "stxsdx" } } */ +/* { dg-final { scan-assembler-not "lxvd2x" } } */ +/* { dg-final { scan-assembler-not "lxvw4x" } } */ +/* { dg-final { scan-assembler-not "lxsd" } } */ +/* { dg-final { scan-assembler-not "lxsdx" } } */ +/* { dg-final { scan-assembler-times "ld" 1 } } */ +/* { dg-final { scan-assembler-times "srdi" 1 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/signbit-3.c b/gcc/testsuite/gcc.target/powerpc/signbit-3.c new file mode 100644 index 00000000000..cd64143fc2f --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/signbit-3.c @@ -0,0 +1,172 @@ +/* { dg-do run { target { powerpc*-*-linux* } } } */ +/* { dg-require-effective-target ppc_float128_sw } */ +/* { dg-options "-mcpu=power7 -O2 -mfloat128 -lm" } */ + +#ifdef DEBUG +#include +#endif + +#include +#include +#include +#include +#include + +#if defined(__BIG_ENDIAN__) +struct ieee128 { + uint64_t upper; + uint64_t lower; +}; + +#elif defined(__LITTLE_ENDIAN__) +struct ieee128 { + uint64_t lower; + uint64_t upper; +}; + +#else +#error "Unknown system" +#endif + +union ieee_union { + __float128 f128; + struct ieee128 st128; +}; + +#ifdef DEBUG +static int num_errors = 0; + +__attribute__((__noinline__)) +static void +failure (int expected, int got, __float128 x) +{ + unsigned sign; + unsigned exponent; + uint64_t mantissa1; + uint64_t mantissa2; + uint64_t upper; + uint64_t lower; + + union ieee_union u; + + u.f128 = x; + upper = u.st128.upper; + lower = u.st128.lower; + + sign = (unsigned)((upper >> 63) & 1); + exponent = (unsigned)((upper >> 48) & ((((uint64_t)1) << 16) - 1)); + mantissa1 = (upper & ((((uint64_t)1) << 48) - 1)); + mantissa2 = lower; + + printf ("Expected %d, got %d, %c 0x%.4x 0x%.12" PRIx64 " 0x%.16" PRIx64, + expected, got, + sign ? '-' : '+', + exponent, + mantissa1, + mantissa2); + + num_errors++; +} + +#else + +#define failure(E, G, F) abort () +#endif + +__attribute__((__noinline__)) +static void +test_signbit_arg (__float128 f128, int expected) +{ + int sign = __builtin_signbit (f128); + + if ((expected != 0 && sign == 0) + || (expected == 0 && sign != 0)) + failure (f128, expected, sign); +} + +__attribute__((__noinline__)) +static void +test_signbit_mem (__float128 *ptr, int expected) +{ + int sign = __builtin_signbit (*ptr); + + if ((expected != 0 && sign == 0) + || (expected == 0 && sign != 0)) + failure (*ptr, expected, sign); +} + +__attribute__((__noinline__)) +static void +test_signbit_gpr (__float128 *ptr, int expected) +{ + __float128 f128 = *ptr; + int sign; + + __asm__ (" # %0" : "+r" (f128)); + + sign = __builtin_signbit (f128); + if ((expected != 0 && sign == 0) + || (expected == 0 && sign != 0)) + failure (f128, expected, sign); +} + +__attribute__((__noinline__)) +static void +test_signbit (__float128 f128, int expected) +{ +#ifdef DEBUG + union ieee_union u; + u.f128 = f128; + printf ("Expecting %d, trying %-5g " + "(0x%.16" PRIx64 " 0x%.16" PRIx64 ")\n", + expected, (double)f128, + u.st128.upper, u.st128.lower); +#endif + + test_signbit_arg (f128, expected); + test_signbit_mem (&f128, expected); + test_signbit_gpr (&f128, expected); +} + +int +main (void) +{ + union ieee_union u; + + test_signbit (+0.0q, 0); + test_signbit (+1.0q, 0); + + test_signbit (-0.0q, 1); + test_signbit (-1.0q, 1); + + test_signbit (__builtin_copysign (__builtin_infq (), +1.0q), 0); + test_signbit (__builtin_copysign (__builtin_infq (), -1.0q), 1); + + test_signbit (__builtin_copysign (__builtin_nanq (""), +1.0q), 0); + test_signbit (__builtin_copysign (__builtin_nanq (""), -1.0q), 1); + + /* force the bottom double word to have specific bits in the 'sign' bit to + make sure we are picking the right word. */ + u.f128 = 1.0q; + u.st128.lower = 0ULL; + test_signbit (u.f128, 0); + + u.st128.lower = ~0ULL; + test_signbit (u.f128, 0); + + u.f128 = -1.0q; + u.st128.lower = 0ULL; + test_signbit (u.f128, 1); + + u.st128.lower = ~0ULL; + test_signbit (u.f128, 1); + +#ifdef DEBUG + printf ("%d error(s) were found\n", num_errors); + if (num_errors) + return num_errors; +#endif + + return 0; +} + -- 2.11.4.GIT