From 66180ff3fe8274c23dbbdfa6f0c61ccbf2715195 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 7 Nov 2005 10:39:36 +0000 Subject: [PATCH] re PR target/24230 (ICE in extract_insn with altivec) 2005-11-07 Paolo Bonzini PR target/24230 * config/rs6000/rs6000.c (easy_vector_splat_const, easy_vector_same, gen_easy_vector_constant_add_self): Delete. (vspltis_constant, easy_altivec_constant, gen_easy_altivec_constant): New. (output_vec_const_move): Use gen_easy_altivec_constant. (rs6000_expand_vector_init): Do not emit a set of a VEC_DUPLICATE. * config/rs6000/predicates.md (easy_vector_constant): Reorganize tests. (easy_vector_constant_add_self): Rewritten. * config/rs6000/rs6000-protos.h (easy_vector_splat_const, easy_vector_same, gen_easy_vector_constant_add_self): Remove prototype. (easy_altivec_constant, gen_easy_altivec_constant): Add prototype. testsuite: 2005-11-07 Paolo Bonzini PR target/24230 * gcc.target/powerpc/altivec-consts.c, gcc.target/powerpc/altivec-splat.c: New testcase. From-SVN: r106588 --- gcc/ChangeLog | 20 ++ gcc/config/rs6000/altivec.md | 59 ++-- gcc/config/rs6000/predicates.md | 82 +++--- gcc/config/rs6000/rs6000-protos.h | 5 +- gcc/config/rs6000/rs6000.c | 218 +++++++++------ gcc/config/rs6000/rs6000.h | 3 +- gcc/testsuite/ChangeLog | 9 + gcc/testsuite/gcc.target/powerpc/altivec-consts.c | 320 ++++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/altivec-splat.c | 48 ++++ 9 files changed, 601 insertions(+), 163 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-consts.c create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-splat.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 17d02b270bb..0fce2cf381f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,25 @@ 2005-11-07 Paolo Bonzini + PR target/24230 + + * config/rs6000/rs6000.c (easy_vector_splat_const, easy_vector_same, + gen_easy_vector_constant_add_self): Delete. + (vspltis_constant, easy_altivec_constant, gen_easy_altivec_constant): + New. + (output_vec_const_move): Use gen_easy_altivec_constant. + (rs6000_expand_vector_init): Do not emit a set of a VEC_DUPLICATE. + * config/rs6000/predicates.md (easy_vector_constant): Reorganize tests. + (easy_vector_constant_add_self): Rewritten. + * config/rs6000/rs6000-protos.h (easy_vector_splat_const, + easy_vector_same, gen_easy_vector_constant_add_self): Remove prototype. + (easy_altivec_constant, gen_easy_altivec_constant): Add prototype. + * config/rs6000/altivec.md (easy_vector_constant_add_self splitters): + Macroize and adjust for the other changes. + +2005-11-07 Paolo Bonzini + + PR c/24599 + * c-typeck.c (build_c_cast): Try using a shared constant, and see if TREE_OVERFLOW or TREE_CONSTANT_OVERFLOW really changed. diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 97e5f4c26ba..26ec2be5cae 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -202,18 +202,6 @@ }) (define_split - [(set (match_operand:V4SI 0 "altivec_register_operand" "") - (match_operand:V4SI 1 "easy_vector_constant_add_self" ""))] - "TARGET_ALTIVEC && reload_completed" - [(set (match_dup 0) (match_dup 3)) - (set (match_dup 0) - (plus:V4SI (match_dup 0) - (match_dup 0)))] -{ - operands[3] = gen_easy_vector_constant_add_self (operands[1]); -}) - -(define_split [(set (match_operand:V8HI 0 "nonimmediate_operand" "") (match_operand:V8HI 1 "input_operand" ""))] "TARGET_ALTIVEC && reload_completed @@ -222,18 +210,6 @@ { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) (define_split - [(set (match_operand:V8HI 0 "altivec_register_operand" "") - (match_operand:V8HI 1 "easy_vector_constant_add_self" ""))] - "TARGET_ALTIVEC && reload_completed" - [(set (match_dup 0) (match_dup 3)) - (set (match_dup 0) - (plus:V8HI (match_dup 0) - (match_dup 0)))] -{ - operands[3] = gen_easy_vector_constant_add_self (operands[1]); -}) - -(define_split [(set (match_operand:V16QI 0 "nonimmediate_operand" "") (match_operand:V16QI 1 "input_operand" ""))] "TARGET_ALTIVEC && reload_completed @@ -242,18 +218,6 @@ { rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) (define_split - [(set (match_operand:V16QI 0 "altivec_register_operand" "") - (match_operand:V16QI 1 "easy_vector_constant_add_self" ""))] - "TARGET_ALTIVEC && reload_completed" - [(set (match_dup 0) (match_dup 3)) - (set (match_dup 0) - (plus:V16QI (match_dup 0) - (match_dup 0)))] -{ - operands[3] = gen_easy_vector_constant_add_self (operands[1]); -}) - -(define_split [(set (match_operand:V4SF 0 "nonimmediate_operand" "") (match_operand:V4SF 1 "input_operand" ""))] "TARGET_ALTIVEC && reload_completed @@ -263,6 +227,29 @@ rs6000_split_multireg_move (operands[0], operands[1]); DONE; }) +(define_split + [(set (match_operand:VI 0 "altivec_register_operand" "") + (match_operand:VI 1 "easy_vector_constant_add_self" ""))] + "TARGET_ALTIVEC && reload_completed" + [(set (match_dup 0) (match_dup 3)) + (set (match_dup 0) (plus:VI (match_dup 0) + (match_dup 0)))] +{ + rtx dup = gen_easy_altivec_constant (operands[1]); + rtx const_vec; + + /* Divide the operand of the resulting VEC_DUPLICATE, and use + simplify_rtx to make a CONST_VECTOR. */ + XEXP (dup, 0) = simplify_const_binary_operation (ASHIFTRT, QImode, + XEXP (dup, 0), const1_rtx); + const_vec = simplify_rtx (dup); + + if (GET_MODE (const_vec) == mode) + operands[3] = const_vec; + else + operands[3] = gen_lowpart (mode, const_vec); +}) + (define_insn "get_vrsave_internal" [(set (match_operand:SI 0 "register_operand" "=r") (unspec:SI [(reg:SI 109)] UNSPEC_GET_VRSAVE))] diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index da813dad5a7..4ce00b0674d 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -271,59 +271,55 @@ (define_predicate "easy_vector_constant" (match_code "const_vector") { - int cst, cst2; - - if (!TARGET_ALTIVEC && !TARGET_SPE) - return 0; - - if (zero_constant (op, mode) - && ((TARGET_ALTIVEC && ALTIVEC_VECTOR_MODE (mode)) - || (TARGET_SPE && SPE_VECTOR_MODE (mode)))) - return 1; - - if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) - return 0; - - if (TARGET_SPE && mode == V1DImode) - return 0; + if (ALTIVEC_VECTOR_MODE (mode)) + { + if (zero_constant (op, mode)) + return true; + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) + return false; - cst = INTVAL (CONST_VECTOR_ELT (op, 0)); - cst2 = INTVAL (CONST_VECTOR_ELT (op, 1)); - - /* Limit SPE vectors to 15 bits signed. These we can generate with: - li r0, CONSTANT1 - evmergelo r0, r0, r0 - li r0, CONSTANT2 - - I don't know how efficient it would be to allow bigger constants, - considering we'll have an extra 'ori' for every 'li'. I doubt 5 - instructions is better than a 64-bit memory load, but I don't - have the e500 timing specs. */ - if (TARGET_SPE && mode == V2SImode - && cst >= -0x7fff && cst <= 0x7fff - && cst2 >= -0x7fff && cst2 <= 0x7fff) - return 1; + return easy_altivec_constant (op, mode); + } - if (TARGET_ALTIVEC - && easy_vector_same (op, mode)) + if (SPE_VECTOR_MODE (mode)) { - cst = easy_vector_splat_const (cst, mode); - if (EASY_VECTOR_15_ADD_SELF (cst) - || EASY_VECTOR_15 (cst)) - return 1; + int cst, cst2; + if (zero_constant (op, mode)) + return true; + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) + return false; + + /* Limit SPE vectors to 15 bits signed. These we can generate with: + li r0, CONSTANT1 + evmergelo r0, r0, r0 + li r0, CONSTANT2 + + I don't know how efficient it would be to allow bigger constants, + considering we'll have an extra 'ori' for every 'li'. I doubt 5 + instructions is better than a 64-bit memory load, but I don't + have the e500 timing specs. */ + if (mode == V2SImode) + { + cst = INTVAL (CONST_VECTOR_ELT (op, 0)); + cst2 = INTVAL (CONST_VECTOR_ELT (op, 1)); + return cst >= -0x7fff && cst <= 0x7fff + && cst2 >= -0x7fff && cst2 <= 0x7fff; + } } - return 0; + + return false; }) ;; Same as easy_vector_constant but only for EASY_VECTOR_15_ADD_SELF. (define_predicate "easy_vector_constant_add_self" (and (match_code "const_vector") (and (match_test "TARGET_ALTIVEC") - (and (match_test "easy_vector_same (op, mode)") - (match_test "EASY_VECTOR_15_ADD_SELF - (easy_vector_splat_const - (INTVAL (CONST_VECTOR_ELT (op, 0)), - mode))"))))) + (match_test "easy_altivec_constant (op, mode)"))) +{ + rtx last = CONST_VECTOR_ELT (op, GET_MODE_NUNITS (mode) - 1); + HOST_WIDE_INT val = (char) (INTVAL (last) & 255); + return EASY_VECTOR_15_ADD_SELF (val); +}) ;; Return 1 if operand is constant zero (scalars and vectors). (define_predicate "zero_constant" diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index b946c90d939..9911e503fa1 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -32,8 +32,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int); extern void rs6000_va_start (tree, rtx); #endif /* TREE_CODE */ -extern int easy_vector_same (rtx, enum machine_mode); -extern int easy_vector_splat_const (int, enum machine_mode); +extern bool easy_altivec_constant (rtx, enum machine_mode); extern bool macho_lo_sum_memory_operand (rtx, enum machine_mode); extern int num_insns_constant (rtx, enum machine_mode); extern int num_insns_constant_wide (HOST_WIDE_INT); @@ -48,7 +47,7 @@ extern bool rs6000_legitimate_small_data_p (enum machine_mode, rtx); extern rtx rs6000_got_register (rtx); extern rtx find_addr_reg (rtx); -extern rtx gen_easy_vector_constant_add_self (rtx); +extern rtx gen_easy_altivec_constant (rtx); extern const char *output_vec_const_move (rtx *); extern void rs6000_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index ab00425f817..f9b46e358ec 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -2042,73 +2042,150 @@ num_insns_constant (rtx op, enum machine_mode mode) } } -/* Returns the constant for the splat instruction, if exists. */ -int -easy_vector_splat_const (int cst, enum machine_mode mode) +/* Return true if OP can be synthesized with a particular vspltisb, vspltish + or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used + depends on STEP and COPIES, one of which will be 1. If COPIES > 1, + all items are set to the same value and contain COPIES replicas of the + vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's + operand and the others are set to the value of the operand's msb. */ + +static bool +vspltis_constant (rtx op, unsigned step, unsigned copies) { - switch (mode) + enum machine_mode mode = GET_MODE (op); + enum machine_mode inner = GET_MODE_INNER (mode); + + unsigned i; + unsigned nunits = GET_MODE_NUNITS (mode); + unsigned bitsize = GET_MODE_BITSIZE (inner); + unsigned mask = GET_MODE_MASK (inner); + + rtx last = CONST_VECTOR_ELT (op, nunits - 1); + HOST_WIDE_INT val = INTVAL (last); + HOST_WIDE_INT splat_val = val; + HOST_WIDE_INT msb_val = val > 0 ? 0 : -1; + + /* Construct the value to be splatted, if possible. If not, return 0. */ + for (i = 2; i <= copies; i *= 2) { - case V4SImode: - if (EASY_VECTOR_15 (cst) - || EASY_VECTOR_15_ADD_SELF (cst)) - return cst; - if ((cst & 0xffff) != ((cst >> 16) & 0xffff)) - break; - cst = cst >> 16; - /* Fall thru */ + HOST_WIDE_INT small_val; + bitsize /= 2; + small_val = splat_val >> bitsize; + mask >>= bitsize; + if (splat_val != ((small_val << bitsize) | (small_val & mask))) + return false; + splat_val = small_val; + } - case V8HImode: - if (EASY_VECTOR_15 (cst) - || EASY_VECTOR_15_ADD_SELF (cst)) - return cst; - if ((cst & 0xff) != ((cst >> 8) & 0xff)) - break; - cst = cst >> 8; - /* Fall thru */ + /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */ + if (EASY_VECTOR_15 (splat_val)) + ; - case V16QImode: - if (EASY_VECTOR_15 (cst) - || EASY_VECTOR_15_ADD_SELF (cst)) - return cst; - default: - break; + /* Also check if we can splat, and then add the result to itself. Do so if + the value is positive, of if the splat instruction is using OP's mode; + for splat_val < 0, the splat and the add should use the same mode. */ + else if (EASY_VECTOR_15_ADD_SELF (splat_val) + && (splat_val >= 0 || (step == 1 && copies == 1))) + ; + + else + return false; + + /* Check if VAL is present in every STEP-th element, and the + other elements are filled with its most significant bit. */ + for (i = 0; i < nunits - 1; ++i) + { + HOST_WIDE_INT desired_val; + if (((i + 1) & (step - 1)) == 0) + desired_val = val; + else + desired_val = msb_val; + + if (desired_val != INTVAL (CONST_VECTOR_ELT (op, i))) + return false; } - return 0; + + return true; } -/* Return nonzero if all elements of a vector have the same value. */ -int -easy_vector_same (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED) +/* Return true if OP is of the given MODE and can be synthesized + with a vspltisb, vspltish or vspltisw. */ + +bool +easy_altivec_constant (rtx op, enum machine_mode mode) { - int units, i, cst; + unsigned step, copies; - units = CONST_VECTOR_NUNITS (op); + if (mode == VOIDmode) + mode = GET_MODE (op); + else if (mode != GET_MODE (op)) + return false; - cst = INTVAL (CONST_VECTOR_ELT (op, 0)); - for (i = 1; i < units; ++i) - if (INTVAL (CONST_VECTOR_ELT (op, i)) != cst) - break; - if (i == units && easy_vector_splat_const (cst, mode)) - return 1; - return 0; + /* Start with a vspltisw. */ + step = GET_MODE_NUNITS (mode) / 4; + copies = 1; + + if (vspltis_constant (op, step, copies)) + return true; + + /* Then try with a vspltish. */ + if (step == 1) + copies <<= 1; + else + step >>= 1; + + if (vspltis_constant (op, step, copies)) + return true; + + /* And finally a vspltisb. */ + if (step == 1) + copies <<= 1; + else + step >>= 1; + + if (vspltis_constant (op, step, copies)) + return true; + + return false; } -/* Generate easy_vector_constant out of a easy_vector_constant_add_self. */ +/* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose + result is OP. Abort if it is not possible. */ rtx -gen_easy_vector_constant_add_self (rtx op) +gen_easy_altivec_constant (rtx op) { - int i, units; - rtvec v; - units = GET_MODE_NUNITS (GET_MODE (op)); - v = rtvec_alloc (units); + enum machine_mode mode = GET_MODE (op); + int nunits = GET_MODE_NUNITS (mode); + rtx last = CONST_VECTOR_ELT (op, nunits - 1); + unsigned step = nunits / 4; + unsigned copies = 1; + + /* Start with a vspltisw. */ + if (vspltis_constant (op, step, copies)) + return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, last)); + + /* Then try with a vspltish. */ + if (step == 1) + copies <<= 1; + else + step >>= 1; + + if (vspltis_constant (op, step, copies)) + return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, last)); + + /* And finally a vspltisb. */ + if (step == 1) + copies <<= 1; + else + step >>= 1; + + if (vspltis_constant (op, step, copies)) + return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, last)); - for (i = 0; i < units; i++) - RTVEC_ELT (v, i) = - GEN_INT (INTVAL (CONST_VECTOR_ELT (op, i)) >> 1); - return gen_rtx_raw_CONST_VECTOR (GET_MODE (op), v); + gcc_unreachable (); } const char * @@ -2127,44 +2204,26 @@ output_vec_const_move (rtx *operands) if (TARGET_ALTIVEC) { + rtx splat_vec; if (zero_constant (vec, mode)) return "vxor %0,%0,%0"; - gcc_assert (easy_vector_constant (vec, mode)); + splat_vec = gen_easy_altivec_constant (vec); + gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE); + operands[1] = XEXP (splat_vec, 0); + if (!EASY_VECTOR_15 (INTVAL (operands[1]))) + return "#"; - operands[1] = GEN_INT (cst); - switch (mode) + switch (GET_MODE (splat_vec)) { case V4SImode: - if (EASY_VECTOR_15 (cst)) - { - operands[1] = GEN_INT (cst); - return "vspltisw %0,%1"; - } - else if (EASY_VECTOR_15_ADD_SELF (cst)) - return "#"; - cst = cst >> 16; - /* Fall thru */ + return "vspltisw %0,%1"; case V8HImode: - if (EASY_VECTOR_15 (cst)) - { - operands[1] = GEN_INT (cst); - return "vspltish %0,%1"; - } - else if (EASY_VECTOR_15_ADD_SELF (cst)) - return "#"; - cst = cst >> 8; - /* Fall thru */ + return "vspltish %0,%1"; case V16QImode: - if (EASY_VECTOR_15 (cst)) - { - operands[1] = GEN_INT (cst); - return "vspltisb %0,%1"; - } - else if (EASY_VECTOR_15_ADD_SELF (cst)) - return "#"; + return "vspltisb %0,%1"; default: gcc_unreachable (); @@ -2220,11 +2279,10 @@ rs6000_expand_vector_init (rtx target, rtx vals) gen_rtx_XOR (mode, target, target))); return; } - else if (mode != V4SFmode && easy_vector_same (vals, mode)) + else if (mode != V4SFmode && easy_vector_constant (vals, mode)) { /* Splat immediate. */ - x = gen_rtx_VEC_DUPLICATE (mode, CONST_VECTOR_ELT (vals, 0)); - emit_insn (gen_rtx_SET (VOIDmode, target, x)); + emit_insn (gen_rtx_SET (VOIDmode, target, vals)); return; } else if (all_same) diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 679ccee8144..35843029da8 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1689,7 +1689,8 @@ typedef struct rs6000_args && !rs6000_tls_referenced_p (X)) #define EASY_VECTOR_15(n) ((n) >= -16 && (n) <= 15) -#define EASY_VECTOR_15_ADD_SELF(n) ((n) >= 0x10 && (n) <= 0x1e && !((n) & 1)) +#define EASY_VECTOR_15_ADD_SELF(n) (!EASY_VECTOR_15((n)) \ + && EASY_VECTOR_15((n) >> 1)) /* The macros REG_OK_FOR..._P assume that the arg is a REG rtx and check its validity for a certain class. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index befbe80310b..53cb4811078 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,5 +1,14 @@ 2005-11-07 Paolo Bonzini + PR target/24230 + + * gcc.target/powerpc/altivec-consts.c, + gcc.target/powerpc/altivec-splat.c: New testcase. + +2005-11-07 Paolo Bonzini + + PR c/24599 + * gcc.dg/overflow-2.c: New testcase. 2005-11-07 Jakub Jelinek diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-consts.c b/gcc/testsuite/gcc.target/powerpc/altivec-consts.c new file mode 100644 index 00000000000..2bea3230b89 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/altivec-consts.c @@ -0,0 +1,320 @@ +/* { dg-do run { target powerpc*-*-* } } */ +/* { dg-options "-maltivec -mabi=altivec -O2" } */ + +/* Check that "easy" AltiVec constants are correctly synthesized. */ + +#include "altivec_check.h" + +extern void abort (void); + +typedef __attribute__ ((vector_size (16))) unsigned char v16qi; +typedef __attribute__ ((vector_size (16))) unsigned short v8hi; +typedef __attribute__ ((vector_size (16))) unsigned int v4si; + +char w[16] __attribute__((aligned(16))); + + +/* Emulate the vspltis? instructions on a 16-byte array of chars. */ + +void vspltisb (char *v, char val) +{ + int i; + for (i = 0; i < 16; i++) + v[i] = val; +} + +void vspltish (char *v, char val) +{ + int i; + for (i = 0; i < 16; i += 2) + v[i] = val >> 7, v[i + 1] = val; +} + +void vspltisw (char *v, char val) +{ + int i; + for (i = 0; i < 16; i += 4) + v[i] = v[i + 1] = v[i + 2] = val >> 7, v[i + 3] = val; +} + + +/* Use three different check functions for each mode-instruction pair. + The callers have no typecasting and no addressable vectors, to make + the test more robust. */ + +void __attribute__ ((noinline)) check_v16qi (v16qi v1, char *v2) +{ + if (memcmp (&v1, v2, 16)) + abort (); +} + +void __attribute__ ((noinline)) check_v8hi (v8hi v1, char *v2) +{ + if (memcmp (&v1, v2, 16)) + abort (); +} + +void __attribute__ ((noinline)) check_v4si (v4si v1, char *v2) +{ + if (memcmp (&v1, v2, 16)) + abort (); +} + + +/* V16QI tests. */ + +void v16qi_vspltisb () +{ + v16qi v = { 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 }; + vspltisb (w, 15); + check_v16qi (v, w); +} + +void v16qi_vspltisb_neg () +{ + v16qi v = { -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5 }; + vspltisb (w, -5); + check_v16qi (v, w); +} + +void v16qi_vspltisb_addself () +{ + v16qi v = { 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30 }; + vspltisb (w, 30); + check_v16qi (v, w); +} + +void v16qi_vspltisb_neg_addself () +{ + v16qi v = { -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24, -24 }; + vspltisb (w, -24); + check_v16qi (v, w); +} + +void v16qi_vspltish () +{ + v16qi v = { 0, 15, 0, 15, 0, 15, 0, 15, 0, 15, 0, 15, 0, 15, 0, 15 }; + vspltish (w, 15); + check_v16qi (v, w); +} + +void v16qi_vspltish_addself () +{ + v16qi v = { 0, 30, 0, 30, 0, 30, 0, 30, 0, 30, 0, 30, 0, 30, 0, 30 }; + vspltish (w, 30); + check_v16qi (v, w); +} + +void v16qi_vspltish_neg () +{ + v16qi v = { -1, -5, -1, -5, -1, -5, -1, -5, -1, -5, -1, -5, -1, -5, -1, -5 }; + vspltish (w, -5); + check_v16qi (v, w); +} + +void v16qi_vspltisw () +{ + v16qi v = { 0, 0, 0, 15, 0, 0, 0, 15, 0, 0, 0, 15, 0, 0, 0, 15 }; + vspltisw (w, 15); + check_v16qi (v, w); +} + +void v16qi_vspltisw_addself () +{ + v16qi v = { 0, 0, 0, 30, 0, 0, 0, 30, 0, 0, 0, 30, 0, 0, 0, 30 }; + vspltisw (w, 30); + check_v16qi (v, w); +} + +void v16qi_vspltisw_neg () +{ + v16qi v = { -1, -1, -1, -5, -1, -1, -1, -5, -1, -1, -1, -5, -1, -1, -1, -5 }; + vspltisw (w, -5); + check_v16qi (v, w); +} + + +/* V8HI tests. */ + +void v8hi_vspltisb () +{ + v8hi v = { 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F, 0x0F0F }; + vspltisb (w, 15); + check_v8hi (v, w); +} + +void v8hi_vspltisb_addself () +{ + v8hi v = { 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E, 0x1E1E }; + vspltisb (w, 30); + check_v8hi (v, w); +} + +void v8hi_vspltisb_neg () +{ + v8hi v = { 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB, 0xFBFB }; + vspltisb (w, -5); + check_v8hi (v, w); +} + +void v8hi_vspltish () +{ + v8hi v = { 15, 15, 15, 15, 15, 15, 15, 15 }; + vspltish (w, 15); + check_v8hi (v, w); +} + +void v8hi_vspltish_neg () +{ + v8hi v = { -5, -5, -5, -5, -5, -5, -5, -5 }; + vspltish (w, -5); + check_v8hi (v, w); +} + +void v8hi_vspltish_addself () +{ + v8hi v = { 30, 30, 30, 30, 30, 30, 30, 30 }; + vspltish (w, 30); + check_v8hi (v, w); +} + +void v8hi_vspltish_neg_addself () +{ + v8hi v = { -24, -24, -24, -24, -24, -24, -24, -24 }; + vspltish (w, -24); + check_v8hi (v, w); +} + +void v8hi_vspltisw () +{ + v8hi v = { 0, 15, 0, 15, 0, 15, 0, 15 }; + vspltisw (w, 15); + check_v8hi (v, w); +} + +void v8hi_vspltisw_addself () +{ + v8hi v = { 0, 30, 0, 30, 0, 30, 0, 30 }; + vspltisw (w, 30); + check_v8hi (v, w); +} + +void v8hi_vspltisw_neg () +{ + v8hi v = { -1, -5, -1, -5, -1, -5, -1, -5 }; + vspltisw (w, -5); + check_v8hi (v, w); +} + +/* V4SI tests. */ + +void v4si_vspltisb () +{ + v4si v = { 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F, 0x0F0F0F0F }; + vspltisb (w, 15); + check_v4si (v, w); +} + +void v4si_vspltisb_addself () +{ + v4si v = { 0x1E1E1E1E, 0x1E1E1E1E, 0x1E1E1E1E, 0x1E1E1E1E }; + vspltisb (w, 30); + check_v4si (v, w); +} + +void v4si_vspltisb_neg () +{ + v4si v = { 0xFBFBFBFB, 0xFBFBFBFB, 0xFBFBFBFB, 0xFBFBFBFB }; + vspltisb (w, -5); + check_v4si (v, w); +} + +void v4si_vspltish () +{ + v4si v = { 0x000F000F, 0x000F000F, 0x000F000F, 0x000F000F }; + vspltish (w, 15); + check_v4si (v, w); +} + +void v4si_vspltish_addself () +{ + v4si v = { 0x001E001E, 0x001E001E, 0x001E001E, 0x001E001E }; + vspltish (w, 30); + check_v4si (v, w); +} + +void v4si_vspltish_neg () +{ + v4si v = { 0xFFFBFFFB, 0xFFFBFFFB, 0xFFFBFFFB, 0xFFFBFFFB }; + vspltish (w, -5); + check_v4si (v, w); +} + +void v4si_vspltisw () +{ + v4si v = { 15, 15, 15, 15 }; + vspltisw (w, 15); + check_v4si (v, w); +} + +void v4si_vspltisw_neg () +{ + v4si v = { -5, -5, -5, -5 }; + vspltisw (w, -5); + check_v4si (v, w); +} + +void v4si_vspltisw_addself () +{ + v4si v = { 30, 30, 30, 30 }; + vspltisw (w, 30); + check_v4si (v, w); +} + +void v4si_vspltisw_neg_addself () +{ + v4si v = { -24, -24, -24, -24 }; + vspltisw (w, -24); + check_v4si (v, w); +} + + + +int main () +{ + altivec_check (); /* Exit if hardware doesn't support AltiVec. */ + + v16qi_vspltisb (); + v16qi_vspltisb_neg (); + v16qi_vspltisb_addself (); + v16qi_vspltisb_neg_addself (); + v16qi_vspltish (); + v16qi_vspltish_addself (); + v16qi_vspltish_neg (); + v16qi_vspltisw (); + v16qi_vspltisw_addself (); + v16qi_vspltisw_neg (); + + v8hi_vspltisb (); + v8hi_vspltisb_addself (); + v8hi_vspltisb_neg (); + v8hi_vspltish (); + v8hi_vspltish_neg (); + v8hi_vspltish_addself (); + v8hi_vspltish_neg_addself (); + v8hi_vspltisw (); + v8hi_vspltisw_addself (); + v8hi_vspltisw_neg (); + + v4si_vspltisb (); + v4si_vspltisb_addself (); + v4si_vspltisb_neg (); + v4si_vspltish (); + v4si_vspltish_addself (); + v4si_vspltish_neg (); + v4si_vspltisw (); + v4si_vspltisw_neg (); + v4si_vspltisw_addself (); + v4si_vspltisw_neg_addself (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-splat.c b/gcc/testsuite/gcc.target/powerpc/altivec-splat.c new file mode 100644 index 00000000000..7e2471fd0b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/altivec-splat.c @@ -0,0 +1,48 @@ +/* { dg-do compile { target powerpc*-*-* } } */ +/* { dg-options "-maltivec -mabi=altivec -O2" } */ + +/* Testcase by Richard Guenther and Steven Bosscher. + Check that "easy" AltiVec constants are correctly synthesized + if they need to be reloaded. */ + +typedef __attribute__ ((vector_size (16))) unsigned char v16qi; +typedef __attribute__ ((vector_size (16))) unsigned short v8hi; +typedef __attribute__ ((vector_size (16))) unsigned int v4si; + +#define REGLIST \ + "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", \ + "87", "88", "89", "90", "91", "92", "93", "94", "95", "96", \ + "97", "98", "99", "100", "101", "102", "103", "104", "105", "106", \ + "107", "108" + + +#define TEST(a, result, b) \ + void a##_##b (int h) \ + { \ + volatile a tmp; \ + while (h-- > 0) \ + { \ + asm ("" : : : REGLIST); \ + tmp = (a) (result) __builtin_altivec_##b (5); \ + } \ + } \ + \ + void a##_##b##_neg (int h) \ + { \ + volatile a tmp; \ + while (h-- > 0) \ + { \ + asm ("" : : : REGLIST); \ + tmp = (a) (result) __builtin_altivec_##b (-5); \ + } \ + } + +TEST(v16qi, v16qi, vspltisb) +TEST(v16qi, v8hi, vspltish) +TEST(v16qi, v4si, vspltisw) +TEST(v8hi, v16qi, vspltisb) +TEST(v8hi, v8hi, vspltish) +TEST(v8hi, v4si, vspltisw) +TEST(v4si, v16qi, vspltisb) +TEST(v4si, v8hi, vspltish) +TEST(v4si, v4si, vspltisw) -- 2.11.4.GIT