From a501acdaaacf5f3b1fa97242ff4f38cc1d87553d Mon Sep 17 00:00:00 2001 From: meissner Date: Thu, 3 Feb 2011 05:42:19 +0000 Subject: [PATCH] Fix PR target/47272 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@169780 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog | 50 ++ gcc/config/rs6000/altivec.h | 4 +- gcc/config/rs6000/altivec.md | 18 +- gcc/config/rs6000/rs6000-builtin.def | 26 +- gcc/config/rs6000/rs6000-c.c | 194 +++++- gcc/config/rs6000/rs6000-protos.h | 4 +- gcc/config/rs6000/rs6000.c | 217 ++++--- gcc/config/rs6000/rs6000.h | 6 +- gcc/config/rs6000/vector.md | 39 +- gcc/config/rs6000/vsx.md | 13 + gcc/doc/extend.texi | 68 ++- gcc/testsuite/ChangeLog | 16 + .../gcc.target/powerpc/avoid-indexed-addresses.c | 2 +- gcc/testsuite/gcc.target/powerpc/ppc32-abi-dfp-1.c | 184 +++--- gcc/testsuite/gcc.target/powerpc/ppc64-abi-dfp-1.c | 649 +++++++++++---------- gcc/testsuite/gcc.target/powerpc/vsx-builtin-8.c | 97 +++ 16 files changed, 1103 insertions(+), 484 deletions(-) rewrite gcc/testsuite/gcc.target/powerpc/ppc64-abi-dfp-1.c (60%) create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-builtin-8.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e276dfdd607..1f16f47ea8d 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,53 @@ +2011-02-02 Michael Meissner + + PR target/47272 + * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): + Document using vector double with the load/store builtins, and + that the load/store builtins always use Altivec instructions. + + * config/rs6000/vector.md (vector_altivec_load_): New insns + to use altivec memory instructions, even on VSX. + (vector_altivec_store_): Ditto. + + * config/rs6000/rs6000-protos.h (rs6000_address_for_altivec): New + function. + + * config/rs6000/rs6000-c.c (altivec_overloaded_builtins): Add + V2DF, V2DI support to load/store overloaded builtins. + + * config/rs6000/rs6000-builtin.def (ALTIVEC_BUILTIN_*): Add + altivec load/store builtins for V2DF/V2DI types. + + * config/rs6000/rs6000.c (rs6000_option_override_internal): Don't + set avoid indexed addresses on power6 if -maltivec. + (altivec_expand_ld_builtin): Add V2DF, V2DI support, use + vector_altivec_load/vector_altivec_store builtins. + (altivec_expand_st_builtin): Ditto. + (altivec_expand_builtin): Add VSX memory builtins. + (rs6000_init_builtins): Add V2DI types to internal types. + (altivec_init_builtins): Add support for V2DF/V2DI altivec + load/store builtins. + (rs6000_address_for_altivec): Insure memory address is appropriate + for Altivec. + + * config/rs6000/vsx.md (vsx_load_): New expanders for + vec_vsx_ld and vec_vsx_st. + (vsx_store_): Ditto. + + * config/rs6000/rs6000.h (RS6000_BTI_long_long): New type + variables to hold long long types for VSX vector memory builtins. + (RS6000_BTI_unsigned_long_long): Ditto. + (long_long_integer_type_internal_node): Ditti. + (long_long_unsigned_type_internal_node): Ditti. + + * config/rs6000/altivec.md (UNSPEC_LVX): New UNSPEC. + (altivec_lvx_): Make altivec_lvx use a mode iterator. + (altivec_stvx_): Make altivec_stvx use a mode iterator. + + * config/rs6000/altivec.h (vec_vsx_ld): Define VSX memory builtin + short cuts. + (vec_vsx_st): Ditto. + 2011-02-02 Joseph Myers * config/pa/pa-hpux10.opt: New. diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 93dd4f9a282..583731b9668 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -1,5 +1,5 @@ /* PowerPC AltiVec include file. - Copyright (C) 2002, 2003, 2004, 2005, 2008, 2009, 2010 + Copyright (C) 2002, 2003, 2004, 2005, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. Contributed by Aldy Hernandez (aldyh@redhat.com). Rewritten by Paolo Bonzini (bonzini@gnu.org). @@ -318,6 +318,8 @@ #define vec_nearbyint __builtin_vec_nearbyint #define vec_rint __builtin_vec_rint #define vec_sqrt __builtin_vec_sqrt +#define vec_vsx_ld __builtin_vec_vsx_ld +#define vec_vsx_st __builtin_vec_vsx_st #endif /* Predicates. diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index d21d5762ce5..d7357ee3262 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1,5 +1,5 @@ ;; AltiVec patterns. -;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 +;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 ;; Free Software Foundation, Inc. ;; Contributed by Aldy Hernandez (aldy@quesejoda.com) @@ -96,7 +96,7 @@ (UNSPEC_STVE 203) (UNSPEC_SET_VSCR 213) (UNSPEC_GET_VRSAVE 214) - ;; 215 deleted + (UNSPEC_LVX 215) (UNSPEC_REDUC_PLUS 217) (UNSPEC_VECSH 219) (UNSPEC_EXTEVEN_V4SI 220) @@ -1750,17 +1750,19 @@ "lvxl %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_lvx" - [(set (match_operand:V4SI 0 "register_operand" "=v") - (match_operand:V4SI 1 "memory_operand" "Z"))] +(define_insn "altivec_lvx_" + [(parallel + [(set (match_operand:VM2 0 "register_operand" "=v") + (match_operand:VM2 1 "memory_operand" "Z")) + (unspec [(const_int 0)] UNSPEC_LVX)])] "TARGET_ALTIVEC" "lvx %0,%y1" [(set_attr "type" "vecload")]) -(define_insn "altivec_stvx" +(define_insn "altivec_stvx_" [(parallel - [(set (match_operand:V4SI 0 "memory_operand" "=Z") - (match_operand:V4SI 1 "register_operand" "v")) + [(set (match_operand:VM2 0 "memory_operand" "=Z") + (match_operand:VM2 1 "register_operand" "v")) (unspec [(const_int 0)] UNSPEC_STVX)])] "TARGET_ALTIVEC" "stvx %1,%y0" diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 9f45a72e2c0..fd6321dcf96 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -1,5 +1,5 @@ /* Builtin functions for rs6000/powerpc. - Copyright (C) 2009, 2010 + Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. Contributed by Michael Meissner (meissner@linux.vnet.ibm.com) @@ -37,6 +37,10 @@ RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_16qi, RS6000_BTC_MEM) RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_16qi, RS6000_BTC_MEM) RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_4sf, RS6000_BTC_MEM) RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_4sf, RS6000_BTC_MEM) +RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_2df, RS6000_BTC_MEM) +RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_2df, RS6000_BTC_MEM) +RS6000_BUILTIN(ALTIVEC_BUILTIN_ST_INTERNAL_2di, RS6000_BTC_MEM) +RS6000_BUILTIN(ALTIVEC_BUILTIN_LD_INTERNAL_2di, RS6000_BTC_MEM) RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUBM, RS6000_BTC_CONST) RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUHM, RS6000_BTC_CONST) RS6000_BUILTIN(ALTIVEC_BUILTIN_VADDUWM, RS6000_BTC_CONST) @@ -778,12 +782,20 @@ RS6000_BUILTIN(PAIRED_BUILTIN_CMPU1, RS6000_BTC_MISC) /* VSX builtins. */ RS6000_BUILTIN(VSX_BUILTIN_LXSDX, RS6000_BTC_MEM) -RS6000_BUILTIN(VSX_BUILTIN_LXVD2X, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_LXVD2X_V2DF, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_LXVD2X_V2DI, RS6000_BTC_MEM) RS6000_BUILTIN(VSX_BUILTIN_LXVDSX, RS6000_BTC_MEM) -RS6000_BUILTIN(VSX_BUILTIN_LXVW4X, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V4SF, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V4SI, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V8HI, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_LXVW4X_V16QI, RS6000_BTC_MEM) RS6000_BUILTIN(VSX_BUILTIN_STXSDX, RS6000_BTC_MEM) -RS6000_BUILTIN(VSX_BUILTIN_STXVD2X, RS6000_BTC_MEM) -RS6000_BUILTIN(VSX_BUILTIN_STXVW4X, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_STXVD2X_V2DF, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_STXVD2X_V2DI, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V4SF, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V4SI, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V8HI, RS6000_BTC_MEM) +RS6000_BUILTIN(VSX_BUILTIN_STXVW4X_V16QI, RS6000_BTC_MEM) RS6000_BUILTIN(VSX_BUILTIN_XSABSDP, RS6000_BTC_CONST) RS6000_BUILTIN(VSX_BUILTIN_XSADDDP, RS6000_BTC_FP_PURE) RS6000_BUILTIN(VSX_BUILTIN_XSCMPODP, RS6000_BTC_FP_PURE) @@ -983,8 +995,10 @@ RS6000_BUILTIN(VSX_BUILTIN_VEC_XXPERMDI, RS6000_BTC_MISC) RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSLDWI, RS6000_BTC_MISC) RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSPLTD, RS6000_BTC_MISC) RS6000_BUILTIN(VSX_BUILTIN_VEC_XXSPLTW, RS6000_BTC_MISC) +RS6000_BUILTIN(VSX_BUILTIN_VEC_LD, RS6000_BTC_MISC) +RS6000_BUILTIN(VSX_BUILTIN_VEC_ST, RS6000_BTC_MISC) RS6000_BUILTIN_EQUATE(VSX_BUILTIN_OVERLOADED_LAST, - VSX_BUILTIN_VEC_XXSPLTW) + VSX_BUILTIN_VEC_ST) /* Combined VSX/Altivec builtins. */ RS6000_BUILTIN(VECTOR_BUILTIN_FLOAT_V4SI_V4SF, RS6000_BTC_FP_PURE) diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index f29373df867..3f4f90b236c 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -1000,6 +1000,15 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { VSX_BUILTIN_VEC_DIV, VSX_BUILTIN_XVDIVDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX, + RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_LD, ALTIVEC_BUILTIN_LVX, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, @@ -1112,9 +1121,19 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL, - RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_unsigned_V16QI, 0 }, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI, 0 }, { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI, 0 }, + { ALTIVEC_BUILTIN_VEC_LDL, ALTIVEC_BUILTIN_LVXL, + RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 }, { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, @@ -1133,6 +1152,17 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 }, { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSL, ALTIVEC_BUILTIN_LVSL, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long_long, 0 }, { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, @@ -1151,6 +1181,17 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 }, { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_double, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTDI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTDI, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_long_long, 0 }, + { ALTIVEC_BUILTIN_VEC_LVSR, ALTIVEC_BUILTIN_LVSR, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long_long, 0 }, { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_LVLX, ALTIVEC_BUILTIN_LVLX, @@ -2644,6 +2685,16 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { ALTIVEC_BUILTIN_VEC_SLD, ALTIVEC_BUILTIN_VSLDOI_16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_NOT_OPAQUE }, { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX, + RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V2DI }, + { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX, RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_ST, ALTIVEC_BUILTIN_STVX, RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, @@ -2809,6 +2860,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL, RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_double }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI }, + { ALTIVEC_BUILTIN_VEC_STL, ALTIVEC_BUILTIN_STVXL, + RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V2DI }, { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, { ALTIVEC_BUILTIN_VEC_STVLX, ALTIVEC_BUILTIN_STVLX, @@ -3002,6 +3065,135 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, RS6000_BTI_NOT_OPAQUE }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DF, + RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI, + RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI, + RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVD2X_V2DI, + RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V2DI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SF, + RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V4SI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_long, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V4SI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTSI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V4SI, + RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_long, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V8HI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_pixel_V8HI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V8HI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V8HI, + RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTHI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI, + RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_bool_V16QI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI, + RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI, 0 }, + { VSX_BUILTIN_VEC_LD, VSX_BUILTIN_LXVW4X_V16QI, + RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_UINTQI, 0 }, + + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DF, + RS6000_BTI_void, RS6000_BTI_V2DF, RS6000_BTI_INTSI, ~RS6000_BTI_V2DF }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_V2DI, RS6000_BTI_INTSI, ~RS6000_BTI_V2DI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V2DI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVD2X_V2DI, + RS6000_BTI_void, RS6000_BTI_bool_V2DI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V2DI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_V4SF }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SF, + RS6000_BTI_void, RS6000_BTI_V4SF, RS6000_BTI_INTSI, ~RS6000_BTI_float }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_V4SI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_V4SI, RS6000_BTI_INTSI, ~RS6000_BTI_INTSI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V4SI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_unsigned_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTSI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V4SI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTSI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V4SI, + RS6000_BTI_void, RS6000_BTI_bool_V4SI, RS6000_BTI_INTSI, + ~RS6000_BTI_INTSI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_V8HI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_V8HI, RS6000_BTI_INTSI, ~RS6000_BTI_INTHI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V8HI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_unsigned_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTHI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V8HI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTHI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V8HI, + RS6000_BTI_void, RS6000_BTI_bool_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_INTHI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_V16QI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_V16QI, RS6000_BTI_INTSI, ~RS6000_BTI_INTQI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_unsigned_V16QI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_unsigned_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_bool_V16QI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_UINTQI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_bool_V16QI, RS6000_BTI_INTSI, + ~RS6000_BTI_INTQI }, + { VSX_BUILTIN_VEC_ST, VSX_BUILTIN_STXVW4X_V16QI, + RS6000_BTI_void, RS6000_BTI_pixel_V8HI, RS6000_BTI_INTSI, + ~RS6000_BTI_pixel_V8HI }, + /* Predicates. */ { ALTIVEC_BUILTIN_VCMPGT_P, ALTIVEC_BUILTIN_VCMPGTUB_P, RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V16QI, RS6000_BTI_unsigned_V16QI }, diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 3eafc166cb8..d9b6bd70cad 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -1,5 +1,6 @@ /* Definitions of target machine for GNU compiler, for IBM RS/6000. - Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, + 2010, 2011 Free Software Foundation, Inc. Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) @@ -129,6 +130,7 @@ extern void rs6000_emit_parity (rtx, rtx); extern rtx rs6000_machopic_legitimize_pic_address (rtx, enum machine_mode, rtx); extern rtx rs6000_address_for_fpconvert (rtx); +extern rtx rs6000_address_for_altivec (rtx); extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool); extern int rs6000_loop_align (rtx); #endif /* RTX_CODE */ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index c5e45fb70be..df02bef54df 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3316,9 +3316,12 @@ rs6000_option_override_internal (bool global_init_p) /* If not explicitly specified via option, decide whether to generate indexed load/store instructions. */ if (TARGET_AVOID_XFORM == -1) - /* Avoid indexed addressing when targeting Power6 in order to avoid - the DERAT mispredict penalty. */ - TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB); + /* Avoid indexed addressing when targeting Power6 in order to avoid the + DERAT mispredict penalty. However the LVE and STVE altivec instructions + need indexed accesses and the type used is the scalar type of the element + being loaded or stored. */ + TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB + && !TARGET_ALTIVEC); /* Set the -mrecip options. */ if (rs6000_recip_name) @@ -11263,16 +11266,22 @@ altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp) switch (fcode) { case ALTIVEC_BUILTIN_LD_INTERNAL_16qi: - icode = CODE_FOR_vector_load_v16qi; + icode = CODE_FOR_vector_altivec_load_v16qi; break; case ALTIVEC_BUILTIN_LD_INTERNAL_8hi: - icode = CODE_FOR_vector_load_v8hi; + icode = CODE_FOR_vector_altivec_load_v8hi; break; case ALTIVEC_BUILTIN_LD_INTERNAL_4si: - icode = CODE_FOR_vector_load_v4si; + icode = CODE_FOR_vector_altivec_load_v4si; break; case ALTIVEC_BUILTIN_LD_INTERNAL_4sf: - icode = CODE_FOR_vector_load_v4sf; + icode = CODE_FOR_vector_altivec_load_v4sf; + break; + case ALTIVEC_BUILTIN_LD_INTERNAL_2df: + icode = CODE_FOR_vector_altivec_load_v2df; + break; + case ALTIVEC_BUILTIN_LD_INTERNAL_2di: + icode = CODE_FOR_vector_altivec_load_v2di; break; default: *expandedp = false; @@ -11316,16 +11325,22 @@ altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, switch (fcode) { case ALTIVEC_BUILTIN_ST_INTERNAL_16qi: - icode = CODE_FOR_vector_store_v16qi; + icode = CODE_FOR_vector_altivec_store_v16qi; break; case ALTIVEC_BUILTIN_ST_INTERNAL_8hi: - icode = CODE_FOR_vector_store_v8hi; + icode = CODE_FOR_vector_altivec_store_v8hi; break; case ALTIVEC_BUILTIN_ST_INTERNAL_4si: - icode = CODE_FOR_vector_store_v4si; + icode = CODE_FOR_vector_altivec_store_v4si; break; case ALTIVEC_BUILTIN_ST_INTERNAL_4sf: - icode = CODE_FOR_vector_store_v4sf; + icode = CODE_FOR_vector_altivec_store_v4sf; + break; + case ALTIVEC_BUILTIN_ST_INTERNAL_2df: + icode = CODE_FOR_vector_altivec_store_v2df; + break; + case ALTIVEC_BUILTIN_ST_INTERNAL_2di: + icode = CODE_FOR_vector_altivec_store_v2di; break; default: *expandedp = false; @@ -11557,7 +11572,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) switch (fcode) { case ALTIVEC_BUILTIN_STVX: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx, exp); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp); case ALTIVEC_BUILTIN_STVEBX: return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp); case ALTIVEC_BUILTIN_STVEHX: @@ -11576,6 +11591,19 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_STVRXL: return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp); + case VSX_BUILTIN_STXVD2X_V2DF: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp); + case VSX_BUILTIN_STXVD2X_V2DI: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp); + case VSX_BUILTIN_STXVW4X_V4SF: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp); + case VSX_BUILTIN_STXVW4X_V4SI: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp); + case VSX_BUILTIN_STXVW4X_V8HI: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp); + case VSX_BUILTIN_STXVW4X_V16QI: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp); + case ALTIVEC_BUILTIN_MFVSCR: icode = CODE_FOR_altivec_mfvscr; tmode = insn_data[icode].operand[0].mode; @@ -11700,7 +11728,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl, exp, target, false); case ALTIVEC_BUILTIN_LVX: - return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx, + return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si, exp, target, false); case ALTIVEC_BUILTIN_LVLX: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx, @@ -11714,6 +11742,25 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_LVRXL: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl, exp, target, true); + case VSX_BUILTIN_LXVD2X_V2DF: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df, + exp, target, false); + case VSX_BUILTIN_LXVD2X_V2DI: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di, + exp, target, false); + case VSX_BUILTIN_LXVW4X_V4SF: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf, + exp, target, false); + case VSX_BUILTIN_LXVW4X_V4SI: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si, + exp, target, false); + case VSX_BUILTIN_LXVW4X_V8HI: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi, + exp, target, false); + case VSX_BUILTIN_LXVW4X_V16QI: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi, + exp, target, false); + break; default: break; /* Fall through. */ @@ -12331,6 +12378,8 @@ rs6000_init_builtins (void) long_integer_type_internal_node = long_integer_type_node; long_unsigned_type_internal_node = long_unsigned_type_node; + long_long_integer_type_internal_node = long_long_integer_type_node; + long_long_unsigned_type_internal_node = long_long_unsigned_type_node; intQI_type_internal_node = intQI_type_node; uintQI_type_internal_node = unsigned_intQI_type_node; intHI_type_internal_node = intHI_type_node; @@ -12340,7 +12389,7 @@ rs6000_init_builtins (void) intDI_type_internal_node = intDI_type_node; uintDI_type_internal_node = unsigned_intDI_type_node; float_type_internal_node = float_type_node; - double_type_internal_node = float_type_node; + double_type_internal_node = double_type_node; void_type_internal_node = void_type_node; /* Initialize the modes for builtin_function_type, mapping a machine mode to @@ -12872,19 +12921,11 @@ altivec_init_builtins (void) size_t i; tree ftype; - tree pfloat_type_node = build_pointer_type (float_type_node); - tree pint_type_node = build_pointer_type (integer_type_node); - tree pshort_type_node = build_pointer_type (short_integer_type_node); - tree pchar_type_node = build_pointer_type (char_type_node); - tree pvoid_type_node = build_pointer_type (void_type_node); - tree pcfloat_type_node = build_pointer_type (build_qualified_type (float_type_node, TYPE_QUAL_CONST)); - tree pcint_type_node = build_pointer_type (build_qualified_type (integer_type_node, TYPE_QUAL_CONST)); - tree pcshort_type_node = build_pointer_type (build_qualified_type (short_integer_type_node, TYPE_QUAL_CONST)); - tree pcchar_type_node = build_pointer_type (build_qualified_type (char_type_node, TYPE_QUAL_CONST)); - - tree pcvoid_type_node = build_pointer_type (build_qualified_type (void_type_node, TYPE_QUAL_CONST)); + tree pcvoid_type_node + = build_pointer_type (build_qualified_type (void_type_node, + TYPE_QUAL_CONST)); tree int_ftype_opaque = build_function_type_list (integer_type_node, @@ -12907,26 +12948,6 @@ altivec_init_builtins (void) = build_function_type_list (integer_type_node, integer_type_node, V4SI_type_node, V4SI_type_node, NULL_TREE); - tree v4sf_ftype_pcfloat - = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); - tree void_ftype_pfloat_v4sf - = build_function_type_list (void_type_node, - pfloat_type_node, V4SF_type_node, NULL_TREE); - tree v4si_ftype_pcint - = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE); - tree void_ftype_pint_v4si - = build_function_type_list (void_type_node, - pint_type_node, V4SI_type_node, NULL_TREE); - tree v8hi_ftype_pcshort - = build_function_type_list (V8HI_type_node, pcshort_type_node, NULL_TREE); - tree void_ftype_pshort_v8hi - = build_function_type_list (void_type_node, - pshort_type_node, V8HI_type_node, NULL_TREE); - tree v16qi_ftype_pcchar - = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); - tree void_ftype_pchar_v16qi - = build_function_type_list (void_type_node, - pchar_type_node, V16QI_type_node, NULL_TREE); tree void_ftype_v4si = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE); tree v8hi_ftype_void @@ -12938,16 +12959,32 @@ altivec_init_builtins (void) tree opaque_ftype_long_pcvoid = build_function_type_list (opaque_V4SI_type_node, - long_integer_type_node, pcvoid_type_node, NULL_TREE); + long_integer_type_node, pcvoid_type_node, + NULL_TREE); tree v16qi_ftype_long_pcvoid = build_function_type_list (V16QI_type_node, - long_integer_type_node, pcvoid_type_node, NULL_TREE); + long_integer_type_node, pcvoid_type_node, + NULL_TREE); tree v8hi_ftype_long_pcvoid = build_function_type_list (V8HI_type_node, - long_integer_type_node, pcvoid_type_node, NULL_TREE); + long_integer_type_node, pcvoid_type_node, + NULL_TREE); tree v4si_ftype_long_pcvoid = build_function_type_list (V4SI_type_node, - long_integer_type_node, pcvoid_type_node, NULL_TREE); + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree v4sf_ftype_long_pcvoid + = build_function_type_list (V4SF_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree v2df_ftype_long_pcvoid + = build_function_type_list (V2DF_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree v2di_ftype_long_pcvoid + = build_function_type_list (V2DI_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); tree void_ftype_opaque_long_pvoid = build_function_type_list (void_type_node, @@ -12965,6 +13002,18 @@ altivec_init_builtins (void) = build_function_type_list (void_type_node, V8HI_type_node, long_integer_type_node, pvoid_type_node, NULL_TREE); + tree void_ftype_v4sf_long_pvoid + = build_function_type_list (void_type_node, + V4SF_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + tree void_ftype_v2df_long_pvoid + = build_function_type_list (void_type_node, + V2DF_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + tree void_ftype_v2di_long_pvoid + = build_function_type_list (void_type_node, + V2DI_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); tree int_ftype_int_v8hi_v8hi = build_function_type_list (integer_type_node, integer_type_node, V8HI_type_node, @@ -12996,22 +13045,6 @@ altivec_init_builtins (void) pcvoid_type_node, integer_type_node, integer_type_node, NULL_TREE); - def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_4sf", v4sf_ftype_pcfloat, - ALTIVEC_BUILTIN_LD_INTERNAL_4sf); - def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_4sf", void_ftype_pfloat_v4sf, - ALTIVEC_BUILTIN_ST_INTERNAL_4sf); - def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_4si", v4si_ftype_pcint, - ALTIVEC_BUILTIN_LD_INTERNAL_4si); - def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_4si", void_ftype_pint_v4si, - ALTIVEC_BUILTIN_ST_INTERNAL_4si); - def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_8hi", v8hi_ftype_pcshort, - ALTIVEC_BUILTIN_LD_INTERNAL_8hi); - def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_8hi", void_ftype_pshort_v8hi, - ALTIVEC_BUILTIN_ST_INTERNAL_8hi); - def_builtin (MASK_ALTIVEC, "__builtin_altivec_ld_internal_16qi", v16qi_ftype_pcchar, - ALTIVEC_BUILTIN_LD_INTERNAL_16qi); - def_builtin (MASK_ALTIVEC, "__builtin_altivec_st_internal_16qi", void_ftype_pchar_v16qi, - ALTIVEC_BUILTIN_ST_INTERNAL_16qi); def_builtin (MASK_ALTIVEC, "__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR); def_builtin (MASK_ALTIVEC, "__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR); def_builtin (MASK_ALTIVEC, "__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL); @@ -13043,6 +13076,35 @@ altivec_init_builtins (void) def_builtin (MASK_ALTIVEC, "__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX); def_builtin (MASK_ALTIVEC, "__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX); + def_builtin (MASK_VSX, "__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid, + VSX_BUILTIN_LXVD2X_V2DF); + def_builtin (MASK_VSX, "__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid, + VSX_BUILTIN_LXVD2X_V2DI); + def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid, + VSX_BUILTIN_LXVW4X_V4SF); + def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid, + VSX_BUILTIN_LXVW4X_V4SI); + def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v8hi", + v8hi_ftype_long_pcvoid, VSX_BUILTIN_LXVW4X_V8HI); + def_builtin (MASK_VSX, "__builtin_vsx_lxvw4x_v16qi", + v16qi_ftype_long_pcvoid, VSX_BUILTIN_LXVW4X_V16QI); + def_builtin (MASK_VSX, "__builtin_vsx_stxvd2x_v2df", + void_ftype_v2df_long_pvoid, VSX_BUILTIN_STXVD2X_V2DF); + def_builtin (MASK_VSX, "__builtin_vsx_stxvd2x_v2di", + void_ftype_v2di_long_pvoid, VSX_BUILTIN_STXVD2X_V2DI); + def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v4sf", + void_ftype_v4sf_long_pvoid, VSX_BUILTIN_STXVW4X_V4SF); + def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v4si", + void_ftype_v4si_long_pvoid, VSX_BUILTIN_STXVW4X_V4SI); + def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v8hi", + void_ftype_v8hi_long_pvoid, VSX_BUILTIN_STXVW4X_V8HI); + def_builtin (MASK_VSX, "__builtin_vsx_stxvw4x_v16qi", + void_ftype_v16qi_long_pvoid, VSX_BUILTIN_STXVW4X_V16QI); + def_builtin (MASK_VSX, "__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid, + VSX_BUILTIN_VEC_LD); + def_builtin (MASK_VSX, "__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid, + VSX_BUILTIN_VEC_ST); + if (rs6000_cpu == PROCESSOR_CELL) { def_builtin (MASK_ALTIVEC, "__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX); @@ -27925,4 +27987,29 @@ rs6000_address_for_fpconvert (rtx x) return x; } +/* Given a memory reference, if it is not in the form for altivec memory + reference instructions (i.e. reg or reg+reg addressing with AND of -16), + convert to the altivec format. */ + +rtx +rs6000_address_for_altivec (rtx x) +{ + gcc_assert (MEM_P (x)); + if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x))) + { + rtx addr = XEXP (x, 0); + int strict_p = (reload_in_progress || reload_completed); + + if (!legitimate_indexed_address_p (addr, strict_p) + && !legitimate_indirect_address_p (addr, strict_p)) + addr = copy_to_mode_reg (Pmode, addr); + + addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16)); + x = change_address (x, GET_MODE (x), addr); + } + + return x; +} + + #include "gt-rs6000.h" diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index fb6130ffcdb..8c76d7ce101 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1,7 +1,7 @@ /* Definitions of target machine for GNU compiler, for IBM RS/6000. Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, - 2010 + 2010, 2011 Free Software Foundation, Inc. Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) @@ -2368,6 +2368,8 @@ enum rs6000_builtin_type_index RS6000_BTI_pixel_V8HI, /* __vector __pixel */ RS6000_BTI_long, /* long_integer_type_node */ RS6000_BTI_unsigned_long, /* long_unsigned_type_node */ + RS6000_BTI_long_long, /* long_long_integer_type_node */ + RS6000_BTI_unsigned_long_long, /* long_long_unsigned_type_node */ RS6000_BTI_INTQI, /* intQI_type_node */ RS6000_BTI_UINTQI, /* unsigned_intQI_type_node */ RS6000_BTI_INTHI, /* intHI_type_node */ @@ -2411,6 +2413,8 @@ enum rs6000_builtin_type_index #define bool_V2DI_type_node (rs6000_builtin_types[RS6000_BTI_bool_V2DI]) #define pixel_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_pixel_V8HI]) +#define long_long_integer_type_internal_node (rs6000_builtin_types[RS6000_BTI_long_long]) +#define long_long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long_long]) #define long_integer_type_internal_node (rs6000_builtin_types[RS6000_BTI_long]) #define long_unsigned_type_internal_node (rs6000_builtin_types[RS6000_BTI_unsigned_long]) #define intQI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTQI]) diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 71961fbc57c..5335d9d4301 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -3,7 +3,7 @@ ;; expander, and the actual vector instructions will be in altivec.md and ;; vsx.md -;; Copyright (C) 2009, 2010 +;; Copyright (C) 2009, 2010, 2011 ;; Free Software Foundation, Inc. ;; Contributed by Michael Meissner @@ -123,6 +123,43 @@ DONE; }) +;; Vector floating point load/store instructions that uses the Altivec +;; instructions even if we are compiling for VSX, since the Altivec +;; instructions silently ignore the bottom 3 bits of the address, and VSX does +;; not. +(define_expand "vector_altivec_load_" + [(set (match_operand:VEC_M 0 "vfloat_operand" "") + (match_operand:VEC_M 1 "memory_operand" ""))] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" + " +{ + gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)); + + if (VECTOR_MEM_VSX_P (mode)) + { + operands[1] = rs6000_address_for_altivec (operands[1]); + emit_insn (gen_altivec_lvx_ (operands[0], operands[1])); + DONE; + } +}") + +(define_expand "vector_altivec_store_" + [(set (match_operand:VEC_M 0 "memory_operand" "") + (match_operand:VEC_M 1 "vfloat_operand" ""))] + "VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)" + " +{ + gcc_assert (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)); + + if (VECTOR_MEM_VSX_P (mode)) + { + operands[0] = rs6000_address_for_altivec (operands[0]); + emit_insn (gen_altivec_stvx_ (operands[0], operands[1])); + DONE; + } +}") + + ;; Reload patterns for vector operations. We may need an addtional base ;; register to convert the reg+offset addressing to reg+reg for vector diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 5b3040cc2f6..3f6da4c0b40 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -308,6 +308,19 @@ } [(set_attr "type" "vecstore,vecload,vecsimple,*,*,*,vecsimple,*,vecstore,vecload")]) +;; Explicit load/store expanders for the builtin functions +(define_expand "vsx_load_" + [(set (match_operand:VSX_M 0 "vsx_register_operand" "") + (match_operand:VSX_M 1 "memory_operand" ""))] + "VECTOR_MEM_VSX_P (mode)" + "") + +(define_expand "vsx_store_" + [(set (match_operand:VEC_M 0 "memory_operand" "") + (match_operand:VEC_M 1 "vsx_register_operand" ""))] + "VECTOR_MEM_VSX_P (mode)" + "") + ;; VSX scalar and vector floating point arithmetic instructions (define_insn "*vsx_add3" diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index cf025bdf05e..67513608e3d 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -12359,6 +12359,12 @@ vector bool long vec_cmplt (vector double, vector double); vector float vec_div (vector float, vector float); vector double vec_div (vector double, vector double); vector double vec_floor (vector double); +vector double vec_ld (int, const vector double *); +vector double vec_ld (int, const double *); +vector double vec_ldl (int, const vector double *); +vector double vec_ldl (int, const double *); +vector unsigned char vec_lvsl (int, const volatile double *); +vector unsigned char vec_lvsr (int, const volatile double *); vector double vec_madd (vector double, vector double, vector double); vector double vec_max (vector double, vector double); vector double vec_min (vector double, vector double); @@ -12387,6 +12393,8 @@ vector double vec_sel (vector double, vector double, vector unsigned long); vector double vec_sub (vector double, vector double); vector float vec_sqrt (vector float); vector double vec_sqrt (vector double); +void vec_st (vector double, int, vector double *); +void vec_st (vector double, int, double *); vector double vec_trunc (vector double); vector double vec_xor (vector double, vector double); vector double vec_xor (vector double, vector bool long); @@ -12415,7 +12423,65 @@ int vec_any_ngt (vector double, vector double); int vec_any_nle (vector double, vector double); int vec_any_nlt (vector double, vector double); int vec_any_numeric (vector double); -@end smallexample + +vector double vec_vsx_ld (int, const vector double *); +vector double vec_vsx_ld (int, const double *); +vector float vec_vsx_ld (int, const vector float *); +vector float vec_vsx_ld (int, const float *); +vector bool int vec_vsx_ld (int, const vector bool int *); +vector signed int vec_vsx_ld (int, const vector signed int *); +vector signed int vec_vsx_ld (int, const int *); +vector signed int vec_vsx_ld (int, const long *); +vector unsigned int vec_vsx_ld (int, const vector unsigned int *); +vector unsigned int vec_vsx_ld (int, const unsigned int *); +vector unsigned int vec_vsx_ld (int, const unsigned long *); +vector bool short vec_vsx_ld (int, const vector bool short *); +vector pixel vec_vsx_ld (int, const vector pixel *); +vector signed short vec_vsx_ld (int, const vector signed short *); +vector signed short vec_vsx_ld (int, const short *); +vector unsigned short vec_vsx_ld (int, const vector unsigned short *); +vector unsigned short vec_vsx_ld (int, const unsigned short *); +vector bool char vec_vsx_ld (int, const vector bool char *); +vector signed char vec_vsx_ld (int, const vector signed char *); +vector signed char vec_vsx_ld (int, const signed char *); +vector unsigned char vec_vsx_ld (int, const vector unsigned char *); +vector unsigned char vec_vsx_ld (int, const unsigned char *); + +void vec_vsx_st (vector double, int, vector double *); +void vec_vsx_st (vector double, int, double *); +void vec_vsx_st (vector float, int, vector float *); +void vec_vsx_st (vector float, int, float *); +void vec_vsx_st (vector signed int, int, vector signed int *); +void vec_vsx_st (vector signed int, int, int *); +void vec_vsx_st (vector unsigned int, int, vector unsigned int *); +void vec_vsx_st (vector unsigned int, int, unsigned int *); +void vec_vsx_st (vector bool int, int, vector bool int *); +void vec_vsx_st (vector bool int, int, unsigned int *); +void vec_vsx_st (vector bool int, int, int *); +void vec_vsx_st (vector signed short, int, vector signed short *); +void vec_vsx_st (vector signed short, int, short *); +void vec_vsx_st (vector unsigned short, int, vector unsigned short *); +void vec_vsx_st (vector unsigned short, int, unsigned short *); +void vec_vsx_st (vector bool short, int, vector bool short *); +void vec_vsx_st (vector bool short, int, unsigned short *); +void vec_vsx_st (vector pixel, int, vector pixel *); +void vec_vsx_st (vector pixel, int, unsigned short *); +void vec_vsx_st (vector pixel, int, short *); +void vec_vsx_st (vector bool short, int, short *); +void vec_vsx_st (vector signed char, int, vector signed char *); +void vec_vsx_st (vector signed char, int, signed char *); +void vec_vsx_st (vector unsigned char, int, vector unsigned char *); +void vec_vsx_st (vector unsigned char, int, unsigned char *); +void vec_vsx_st (vector bool char, int, vector bool char *); +void vec_vsx_st (vector bool char, int, unsigned char *); +void vec_vsx_st (vector bool char, int, signed char *); +@end smallexample + +Note that the @samp{vec_ld} and @samp{vec_st} builtins will always +generate the Altivec @samp{LVX} and @samp{STVX} instructions even +if the VSX instruction set is available. The @samp{vec_vsx_ld} and +@samp{vec_vsx_st} builtins will always generate the VSX @samp{LXVD2X}, +@samp{LXVW4X}, @samp{STXVD2X}, and @samp{STXVW4X} instructions. GCC provides a few other builtins on Powerpc to access certain instructions: @smallexample diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 440750d1c4b..de8cec78888 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,19 @@ +2011-02-02 Michael Meissner + + PR target/47272 + * gcc.target/powerpc/vsx-builtin-8.c: New file, test vec_vsx_ld + and vec_vsx_st. + + * gcc.target/powerpc/avoid-indexed-addresses.c: Disable altivec + and vsx so a default --with-cpu=power7 doesn't give an error + when -mavoid-indexed-addresses is used. + + * gcc.target/powerpc/ppc32-abi-dfp-1.c: Rewrite to use an asm + wrapper function to save the arguments and then jump to the real + function, rather than depending on the compiler not to move stuff + before an asm. + * gcc.target/powerpc/ppc64-abi-dfp-2.c: Ditto. + 2011-02-02 Janus Weil Paul Thomas diff --git a/gcc/testsuite/gcc.target/powerpc/avoid-indexed-addresses.c b/gcc/testsuite/gcc.target/powerpc/avoid-indexed-addresses.c index b1b067283f7..e86aa8a5d2f 100644 --- a/gcc/testsuite/gcc.target/powerpc/avoid-indexed-addresses.c +++ b/gcc/testsuite/gcc.target/powerpc/avoid-indexed-addresses.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { powerpc*-*-* } } } */ -/* { dg-options "-O2 -mavoid-indexed-addresses" } */ +/* { dg-options "-O2 -mavoid-indexed-addresses -mno-altivec -mno-vsx" } */ /* { dg-final { scan-assembler-not "lbzx" } } diff --git a/gcc/testsuite/gcc.target/powerpc/ppc32-abi-dfp-1.c b/gcc/testsuite/gcc.target/powerpc/ppc32-abi-dfp-1.c index 1b836d72795..14908dba690 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc32-abi-dfp-1.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc32-abi-dfp-1.c @@ -30,31 +30,6 @@ typedef struct reg_parms_t gparms; - -/* Testcase could break on future gcc's, if parameter regs - are changed before this asm. */ - -#define save_parms(lparms) \ - asm volatile ("lis 11,gparms@ha\n\t" \ - "la 11,gparms@l(11)\n\t" \ - "st 3,0(11)\n\t" \ - "st 4,4(11)\n\t" \ - "st 5,8(11)\n\t" \ - "st 6,12(11)\n\t" \ - "st 7,16(11)\n\t" \ - "st 8,20(11)\n\t" \ - "st 9,24(11)\n\t" \ - "st 10,28(11)\n\t" \ - "stfd 1,32(11)\n\t" \ - "stfd 2,40(11)\n\t" \ - "stfd 3,48(11)\n\t" \ - "stfd 4,56(11)\n\t" \ - "stfd 5,64(11)\n\t" \ - "stfd 6,72(11)\n\t" \ - "stfd 7,80(11)\n\t" \ - "stfd 8,88(11)\n\t":::"11", "memory"); \ - lparms = gparms; - typedef struct sf { struct sf *backchain; @@ -62,115 +37,159 @@ typedef struct sf unsigned int slot[200]; } stack_frame_t; +/* Wrapper to save the GPRs and FPRs and then jump to the real function. */ +#define WRAPPER(NAME) \ +__asm__ ("\t.globl\t" #NAME "_asm\n\t" \ + ".text\n\t" \ + ".type " #NAME "_asm, @function\n" \ + #NAME "_asm:\n\t" \ + "lis 11,gparms@ha\n\t" \ + "la 11,gparms@l(11)\n\t" \ + "st 3,0(11)\n\t" \ + "st 4,4(11)\n\t" \ + "st 5,8(11)\n\t" \ + "st 6,12(11)\n\t" \ + "st 7,16(11)\n\t" \ + "st 8,20(11)\n\t" \ + "st 9,24(11)\n\t" \ + "st 10,28(11)\n\t" \ + "stfd 1,32(11)\n\t" \ + "stfd 2,40(11)\n\t" \ + "stfd 3,48(11)\n\t" \ + "stfd 4,56(11)\n\t" \ + "stfd 5,64(11)\n\t" \ + "stfd 6,72(11)\n\t" \ + "stfd 7,80(11)\n\t" \ + "stfd 8,88(11)\n\t" \ + "b " #NAME "\n\t" \ + ".size " #NAME ",.-" #NAME "\n") + /* Fill up floating point registers with double arguments, forcing decimal float arguments into the parameter save area. */ +extern void func0_asm (double, double, double, double, double, + double, double, double, _Decimal64, _Decimal128); + +WRAPPER(func0); + void __attribute__ ((noinline)) func0 (double a1, double a2, double a3, double a4, double a5, double a6, double a7, double a8, _Decimal64 a9, _Decimal128 a10) { - reg_parms_t lparms; stack_frame_t *sp; - save_parms (lparms); sp = __builtin_frame_address (0); sp = sp->backchain; - if (a1 != lparms.fprs[0]) FAILURE - if (a2 != lparms.fprs[1]) FAILURE - if (a3 != lparms.fprs[2]) FAILURE - if (a4 != lparms.fprs[3]) FAILURE - if (a5 != lparms.fprs[4]) FAILURE - if (a6 != lparms.fprs[5]) FAILURE - if (a7 != lparms.fprs[6]) FAILURE - if (a8 != lparms.fprs[7]) FAILURE + if (a1 != gparms.fprs[0]) FAILURE + if (a2 != gparms.fprs[1]) FAILURE + if (a3 != gparms.fprs[2]) FAILURE + if (a4 != gparms.fprs[3]) FAILURE + if (a5 != gparms.fprs[4]) FAILURE + if (a6 != gparms.fprs[5]) FAILURE + if (a7 != gparms.fprs[6]) FAILURE + if (a8 != gparms.fprs[7]) FAILURE if (a9 != *(_Decimal64 *)&sp->slot[0]) FAILURE if (a10 != *(_Decimal128 *)&sp->slot[2]) FAILURE } /* Alternate 64-bit and 128-bit decimal float arguments, checking that _Decimal128 is always passed in even/odd register pairs. */ +extern void func1_asm (_Decimal64, _Decimal128, _Decimal64, _Decimal128, + _Decimal64, _Decimal128, _Decimal64, _Decimal128); + +WRAPPER(func1); + void __attribute__ ((noinline)) func1 (_Decimal64 a1, _Decimal128 a2, _Decimal64 a3, _Decimal128 a4, _Decimal64 a5, _Decimal128 a6, _Decimal64 a7, _Decimal128 a8) { - reg_parms_t lparms; stack_frame_t *sp; - save_parms (lparms); sp = __builtin_frame_address (0); sp = sp->backchain; - if (a1 != *(_Decimal64 *)&lparms.fprs[0]) FAILURE /* f1 */ - if (a2 != *(_Decimal128 *)&lparms.fprs[1]) FAILURE /* f2 & f3 */ - if (a3 != *(_Decimal64 *)&lparms.fprs[3]) FAILURE /* f4 */ - if (a4 != *(_Decimal128 *)&lparms.fprs[5]) FAILURE /* f6 & f7 */ - if (a5 != *(_Decimal64 *)&lparms.fprs[7]) FAILURE /* f8 */ + if (a1 != *(_Decimal64 *)&gparms.fprs[0]) FAILURE /* f1 */ + if (a2 != *(_Decimal128 *)&gparms.fprs[1]) FAILURE /* f2 & f3 */ + if (a3 != *(_Decimal64 *)&gparms.fprs[3]) FAILURE /* f4 */ + if (a4 != *(_Decimal128 *)&gparms.fprs[5]) FAILURE /* f6 & f7 */ + if (a5 != *(_Decimal64 *)&gparms.fprs[7]) FAILURE /* f8 */ if (a6 != *(_Decimal128 *)&sp->slot[0]) FAILURE if (a7 != *(_Decimal64 *)&sp->slot[4]) FAILURE if (a8 != *(_Decimal128 *)&sp->slot[6]) FAILURE } +extern void func2_asm (_Decimal128, _Decimal64, _Decimal128, _Decimal64, + _Decimal128, _Decimal64, _Decimal128, _Decimal64); + +WRAPPER(func2); + void __attribute__ ((noinline)) func2 (_Decimal128 a1, _Decimal64 a2, _Decimal128 a3, _Decimal64 a4, _Decimal128 a5, _Decimal64 a6, _Decimal128 a7, _Decimal64 a8) { - reg_parms_t lparms; stack_frame_t *sp; - save_parms (lparms); sp = __builtin_frame_address (0); sp = sp->backchain; - if (a1 != *(_Decimal128 *)&lparms.fprs[1]) FAILURE /* f2 & f3 */ - if (a2 != *(_Decimal64 *)&lparms.fprs[3]) FAILURE /* f4 */ - if (a3 != *(_Decimal128 *)&lparms.fprs[5]) FAILURE /* f6 & f7 */ - if (a4 != *(_Decimal64 *)&lparms.fprs[7]) FAILURE /* f8 */ + if (a1 != *(_Decimal128 *)&gparms.fprs[1]) FAILURE /* f2 & f3 */ + if (a2 != *(_Decimal64 *)&gparms.fprs[3]) FAILURE /* f4 */ + if (a3 != *(_Decimal128 *)&gparms.fprs[5]) FAILURE /* f6 & f7 */ + if (a4 != *(_Decimal64 *)&gparms.fprs[7]) FAILURE /* f8 */ if (a5 != *(_Decimal128 *)&sp->slot[0]) FAILURE if (a6 != *(_Decimal64 *)&sp->slot[4]) FAILURE if (a7 != *(_Decimal128 *)&sp->slot[6]) FAILURE if (a8 != *(_Decimal64 *)&sp->slot[10]) FAILURE } +extern void func3_asm (_Decimal64, _Decimal128, _Decimal64, _Decimal128, + _Decimal64); + +WRAPPER(func3); + void __attribute__ ((noinline)) func3 (_Decimal64 a1, _Decimal128 a2, _Decimal64 a3, _Decimal128 a4, _Decimal64 a5) { - reg_parms_t lparms; stack_frame_t *sp; - save_parms (lparms); sp = __builtin_frame_address (0); sp = sp->backchain; - if (a1 != *(_Decimal64 *)&lparms.fprs[0]) FAILURE /* f1 */ - if (a2 != *(_Decimal128 *)&lparms.fprs[1]) FAILURE /* f2 & f3 */ - if (a3 != *(_Decimal64 *)&lparms.fprs[3]) FAILURE /* f4 */ - if (a4 != *(_Decimal128 *)&lparms.fprs[5]) FAILURE /* f6 & f7 */ + if (a1 != *(_Decimal64 *)&gparms.fprs[0]) FAILURE /* f1 */ + if (a2 != *(_Decimal128 *)&gparms.fprs[1]) FAILURE /* f2 & f3 */ + if (a3 != *(_Decimal64 *)&gparms.fprs[3]) FAILURE /* f4 */ + if (a4 != *(_Decimal128 *)&gparms.fprs[5]) FAILURE /* f6 & f7 */ if (a5 != *(_Decimal128 *)&sp->slot[0]) FAILURE } +extern void func4_asm (_Decimal32, _Decimal32, _Decimal32, _Decimal32, + _Decimal32, _Decimal32, _Decimal32, _Decimal32, + _Decimal32, _Decimal32, _Decimal32, _Decimal32, + _Decimal32, _Decimal32, _Decimal32, _Decimal32); + +WRAPPER(func4); + void __attribute__ ((noinline)) func4 (_Decimal32 a1, _Decimal32 a2, _Decimal32 a3, _Decimal32 a4, _Decimal32 a5, _Decimal32 a6, _Decimal32 a7, _Decimal32 a8, _Decimal32 a9, _Decimal32 a10, _Decimal32 a11, _Decimal32 a12, _Decimal32 a13, _Decimal32 a14, _Decimal32 a15, _Decimal32 a16) { - reg_parms_t lparms; stack_frame_t *sp; - save_parms (lparms); sp = __builtin_frame_address (0); sp = sp->backchain; /* _Decimal32 is passed in the lower half of an FPR, or in parameter slot. */ - if (a1 != ((d32parm_t *)&lparms.fprs[0])->d) FAILURE /* f1 */ - if (a2 != ((d32parm_t *)&lparms.fprs[1])->d) FAILURE /* f2 */ - if (a3 != ((d32parm_t *)&lparms.fprs[2])->d) FAILURE /* f3 */ - if (a4 != ((d32parm_t *)&lparms.fprs[3])->d) FAILURE /* f4 */ - if (a5 != ((d32parm_t *)&lparms.fprs[4])->d) FAILURE /* f5 */ - if (a6 != ((d32parm_t *)&lparms.fprs[5])->d) FAILURE /* f6 */ - if (a7 != ((d32parm_t *)&lparms.fprs[6])->d) FAILURE /* f7 */ - if (a8 != ((d32parm_t *)&lparms.fprs[7])->d) FAILURE /* f8 */ + if (a1 != ((d32parm_t *)&gparms.fprs[0])->d) FAILURE /* f1 */ + if (a2 != ((d32parm_t *)&gparms.fprs[1])->d) FAILURE /* f2 */ + if (a3 != ((d32parm_t *)&gparms.fprs[2])->d) FAILURE /* f3 */ + if (a4 != ((d32parm_t *)&gparms.fprs[3])->d) FAILURE /* f4 */ + if (a5 != ((d32parm_t *)&gparms.fprs[4])->d) FAILURE /* f5 */ + if (a6 != ((d32parm_t *)&gparms.fprs[5])->d) FAILURE /* f6 */ + if (a7 != ((d32parm_t *)&gparms.fprs[6])->d) FAILURE /* f7 */ + if (a8 != ((d32parm_t *)&gparms.fprs[7])->d) FAILURE /* f8 */ if (a9 != *(_Decimal32 *)&sp->slot[0]) FAILURE if (a10 != *(_Decimal32 *)&sp->slot[1]) FAILURE if (a11 != *(_Decimal32 *)&sp->slot[2]) FAILURE @@ -181,24 +200,29 @@ func4 (_Decimal32 a1, _Decimal32 a2, _Decimal32 a3, _Decimal32 a4, if (a16 != *(_Decimal32 *)&sp->slot[7]) FAILURE } +extern void func5_asm (_Decimal32, _Decimal64, _Decimal128, + _Decimal32, _Decimal64, _Decimal128, + _Decimal32, _Decimal64, _Decimal128, + _Decimal32, _Decimal64, _Decimal128); + +WRAPPER(func5); + void __attribute__ ((noinline)) func5 (_Decimal32 a1, _Decimal64 a2, _Decimal128 a3, _Decimal32 a4, _Decimal64 a5, _Decimal128 a6, _Decimal32 a7, _Decimal64 a8, _Decimal128 a9, _Decimal32 a10, _Decimal64 a11, _Decimal128 a12) { - reg_parms_t lparms; stack_frame_t *sp; - save_parms (lparms); sp = __builtin_frame_address (0); sp = sp->backchain; - if (a1 != ((d32parm_t *)&lparms.fprs[0])->d) FAILURE /* f1 */ - if (a2 != *(_Decimal64 *)&lparms.fprs[1]) FAILURE /* f2 */ - if (a3 != *(_Decimal128 *)&lparms.fprs[3]) FAILURE /* f4 & f5 */ - if (a4 != ((d32parm_t *)&lparms.fprs[5])->d) FAILURE /* f6 */ - if (a5 != *(_Decimal64 *)&lparms.fprs[6]) FAILURE /* f7 */ + if (a1 != ((d32parm_t *)&gparms.fprs[0])->d) FAILURE /* f1 */ + if (a2 != *(_Decimal64 *)&gparms.fprs[1]) FAILURE /* f2 */ + if (a3 != *(_Decimal128 *)&gparms.fprs[3]) FAILURE /* f4 & f5 */ + if (a4 != ((d32parm_t *)&gparms.fprs[5])->d) FAILURE /* f6 */ + if (a5 != *(_Decimal64 *)&gparms.fprs[6]) FAILURE /* f7 */ if (a6 != *(_Decimal128 *)&sp->slot[0]) FAILURE if (a7 != *(_Decimal32 *)&sp->slot[4]) FAILURE @@ -212,15 +236,15 @@ func5 (_Decimal32 a1, _Decimal64 a2, _Decimal128 a3, int main () { - func0 (1., 2., 3., 4., 5., 6., 7., 8., 9.dd, 10.dl); - func1 (1.dd, 2.dl, 3.dd, 4.dl, 5.dd, 6.dl, 7.dd, 8.dl); - func2 (1.dl, 2.dd, 3.dl, 4.dd, 5.dl, 6.dd, 7.dl, 8.dd); - func3 (1.dd, 2.dl, 3.dd, 4.dl, 5.dl); - func4 (501.2df, 502.2df, 503.2df, 504.2df, 505.2df, 506.2df, 507.2df, - 508.2df, 509.2df, 510.2df, 511.2df, 512.2df, 513.2df, 514.2df, - 515.2df, 516.2df); - func5 (601.2df, 602.2dd, 603.2dl, 604.2df, 605.2dd, 606.2dl, - 607.2df, 608.2dd, 609.2dl, 610.2df, 611.2dd, 612.2dl); + func0_asm (1., 2., 3., 4., 5., 6., 7., 8., 9.dd, 10.dl); + func1_asm (1.dd, 2.dl, 3.dd, 4.dl, 5.dd, 6.dl, 7.dd, 8.dl); + func2_asm (1.dl, 2.dd, 3.dl, 4.dd, 5.dl, 6.dd, 7.dl, 8.dd); + func3_asm (1.dd, 2.dl, 3.dd, 4.dl, 5.dl); + func4_asm (501.2df, 502.2df, 503.2df, 504.2df, 505.2df, 506.2df, 507.2df, + 508.2df, 509.2df, 510.2df, 511.2df, 512.2df, 513.2df, 514.2df, + 515.2df, 516.2df); + func5_asm (601.2df, 602.2dd, 603.2dl, 604.2df, 605.2dd, 606.2dl, + 607.2df, 608.2dd, 609.2dl, 610.2df, 611.2dd, 612.2dl); if (failcnt != 0) abort (); diff --git a/gcc/testsuite/gcc.target/powerpc/ppc64-abi-dfp-1.c b/gcc/testsuite/gcc.target/powerpc/ppc64-abi-dfp-1.c dissimilarity index 60% index 3badf7f9848..eb54a653bf7 100644 --- a/gcc/testsuite/gcc.target/powerpc/ppc64-abi-dfp-1.c +++ b/gcc/testsuite/gcc.target/powerpc/ppc64-abi-dfp-1.c @@ -1,318 +1,331 @@ -/* { dg-do run { target { powerpc64-*-* && { lp64 && dfprt } } } } */ -/* { dg-options "-std=gnu99 -O2 -fno-strict-aliasing" } */ - -/* Testcase to check for ABI compliance of parameter passing - for the PowerPC64 ELF ABI for decimal float values. */ - -extern void abort (void); -int failcnt = 0; - -/* Support compiling the test to report individual failures; default is - to abort as soon as a check fails. */ -#ifdef DBG -#include -#define FAILURE { printf ("failed at line %d\n", __LINE__); failcnt++; } -#else -#define FAILURE abort (); -#endif - -typedef struct -{ - int pad; - _Decimal32 d; -} d32parm_t; - -typedef struct -{ - unsigned long gprs[8]; - double fprs[13]; -} reg_parms_t; - -reg_parms_t gparms; - - -/* Testcase could break on future gcc's, if parameter regs - are changed before this asm. */ - -#ifndef __MACH__ -#define save_parms(lparms) \ - asm volatile ("ld 11,gparms@got(2)\n\t" \ - "std 3,0(11)\n\t" \ - "std 4,8(11)\n\t" \ - "std 5,16(11)\n\t" \ - "std 6,24(11)\n\t" \ - "std 7,32(11)\n\t" \ - "std 8,40(11)\n\t" \ - "std 9,48(11)\n\t" \ - "std 10,56(11)\n\t" \ - "stfd 1,64(11)\n\t" \ - "stfd 2,72(11)\n\t" \ - "stfd 3,80(11)\n\t" \ - "stfd 4,88(11)\n\t" \ - "stfd 5,96(11)\n\t" \ - "stfd 6,104(11)\n\t" \ - "stfd 7,112(11)\n\t" \ - "stfd 8,120(11)\n\t" \ - "stfd 9,128(11)\n\t" \ - "stfd 10,136(11)\n\t" \ - "stfd 11,144(11)\n\t" \ - "stfd 12,152(11)\n\t" \ - "stfd 13,160(11)\n\t":::"11", "memory"); \ - lparms = gparms; -#else -#define save_parms(lparms) \ - asm volatile ("ld r11,gparms@got(r2)\n\t" \ - "std r3,0(r11)\n\t" \ - "std r4,8(r11)\n\t" \ - "std r5,16(r11)\n\t" \ - "std r6,24(r11)\n\t" \ - "std r7,32(r11)\n\t" \ - "std r8,40(r11)\n\t" \ - "std r9,48(r11)\n\t" \ - "std r10,56(r11)\n\t" \ - "stfd f1,64(r11)\n\t" \ - "stfd f2,72(r11)\n\t" \ - "stfd f3,80(r11)\n\t" \ - "stfd f4,88(r11)\n\t" \ - "stfd f5,96(r11)\n\t" \ - "stfd f6,104(r11)\n\t" \ - "stfd f7,112(r11)\n\t" \ - "stfd f8,120(r11)\n\t" \ - "stfd f9,128(r11)\n\t" \ - "stfd f10,136(r11)\n\t" \ - "stfd f11,144(r11)\n\t" \ - "stfd f12,152(r11)\n\t" \ - "stfd f13,160(r11)\n\t":::"r11", "memory"); \ - lparms = gparms; -#endif - -typedef struct sf -{ - struct sf *backchain; - long a1; - long a2; - long a3; - long a4; - long a5; - unsigned long slot[100]; -} stack_frame_t; - -/* Fill up floating point registers with double arguments, forcing - decimal float arguments into the parameter save area. */ -void __attribute__ ((noinline)) -func0 (double a1, double a2, double a3, double a4, double a5, double a6, - double a7, double a8, double a9, double a10, double a11, double a12, - double a13, double a14, - _Decimal64 a15, _Decimal128 a16, _Decimal64 a17) -{ - reg_parms_t lparms; - stack_frame_t *sp; - - save_parms (lparms); - sp = __builtin_frame_address (0); - sp = sp->backchain; - - if (a1 != lparms.fprs[0]) FAILURE - if (a2 != lparms.fprs[1]) FAILURE - if (a3 != lparms.fprs[2]) FAILURE - if (a4 != lparms.fprs[3]) FAILURE - if (a5 != lparms.fprs[4]) FAILURE - if (a6 != lparms.fprs[5]) FAILURE - if (a7 != lparms.fprs[6]) FAILURE - if (a8 != lparms.fprs[7]) FAILURE - if (a9 != lparms.fprs[8]) FAILURE - if (a10 != lparms.fprs[9]) FAILURE - if (a11 != lparms.fprs[10]) FAILURE - if (a12 != lparms.fprs[11]) FAILURE - if (a13 != lparms.fprs[12]) FAILURE - if (a14 != *(double *)&sp->slot[13]) FAILURE - if (a15 != *(_Decimal64 *)&sp->slot[14]) FAILURE - if (a16 != *(_Decimal128 *)&sp->slot[15]) FAILURE - if (a17 != *(_Decimal64 *)&sp->slot[17]) FAILURE -} - -void __attribute__ ((noinline)) -func1 (double a1, double a2, double a3, double a4, double a5, double a6, - double a7, double a8, double a9, double a10, double a11, double a12, - double a13, _Decimal128 a14) -{ - reg_parms_t lparms; - stack_frame_t *sp; - - save_parms (lparms); - sp = __builtin_frame_address (0); - sp = sp->backchain; - - if (a1 != lparms.fprs[0]) FAILURE - if (a2 != lparms.fprs[1]) FAILURE - if (a3 != lparms.fprs[2]) FAILURE - if (a4 != lparms.fprs[3]) FAILURE - if (a5 != lparms.fprs[4]) FAILURE - if (a6 != lparms.fprs[5]) FAILURE - if (a7 != lparms.fprs[6]) FAILURE - if (a8 != lparms.fprs[7]) FAILURE - if (a9 != lparms.fprs[8]) FAILURE - if (a10 != lparms.fprs[9]) FAILURE - if (a11 != lparms.fprs[10]) FAILURE - if (a12 != lparms.fprs[11]) FAILURE - if (a13 != lparms.fprs[12]) FAILURE - if (a14 != *(_Decimal128 *)&sp->slot[13]) FAILURE -} - -void __attribute__ ((noinline)) -func2 (double a1, double a2, double a3, double a4, double a5, double a6, - double a7, double a8, double a9, double a10, double a11, double a12, - _Decimal128 a13) -{ - reg_parms_t lparms; - stack_frame_t *sp; - - save_parms (lparms); - sp = __builtin_frame_address (0); - sp = sp->backchain; - - if (a1 != lparms.fprs[0]) FAILURE - if (a2 != lparms.fprs[1]) FAILURE - if (a3 != lparms.fprs[2]) FAILURE - if (a4 != lparms.fprs[3]) FAILURE - if (a5 != lparms.fprs[4]) FAILURE - if (a6 != lparms.fprs[5]) FAILURE - if (a7 != lparms.fprs[6]) FAILURE - if (a8 != lparms.fprs[7]) FAILURE - if (a9 != lparms.fprs[8]) FAILURE - if (a10 != lparms.fprs[9]) FAILURE - if (a11 != lparms.fprs[10]) FAILURE - if (a12 != lparms.fprs[11]) FAILURE - if (a13 != *(_Decimal128 *)&sp->slot[12]) FAILURE -} - -void __attribute__ ((noinline)) -func3 (_Decimal64 a1, _Decimal128 a2, _Decimal64 a3, _Decimal128 a4, - _Decimal64 a5, _Decimal128 a6, _Decimal64 a7, _Decimal128 a8, - _Decimal64 a9, _Decimal128 a10) -{ - reg_parms_t lparms; - stack_frame_t *sp; - - save_parms (lparms); - sp = __builtin_frame_address (0); - sp = sp->backchain; - - if (a1 != *(_Decimal64 *)&lparms.fprs[0]) FAILURE /* f1 */ - if (a2 != *(_Decimal128 *)&lparms.fprs[1]) FAILURE /* f2 & f3 */ - if (a3 != *(_Decimal64 *)&lparms.fprs[3]) FAILURE /* f4 */ - if (a4 != *(_Decimal128 *)&lparms.fprs[5]) FAILURE /* f6 & f7 */ - if (a5 != *(_Decimal64 *)&lparms.fprs[7]) FAILURE /* f8 */ - if (a6 != *(_Decimal128 *)&lparms.fprs[9]) FAILURE /* f10 & f11 */ - if (a7 != *(_Decimal64 *)&lparms.fprs[11]) FAILURE /* f12 */ - if (a8 != *(_Decimal128 *)&sp->slot[10]) FAILURE - if (a9 != *(_Decimal64 *)&sp->slot[12]) FAILURE - if (a10 != *(_Decimal128 *)&sp->slot[13]) FAILURE -} - -void __attribute__ ((noinline)) -func4 (_Decimal128 a1, _Decimal64 a2, _Decimal128 a3, _Decimal64 a4, - _Decimal128 a5, _Decimal64 a6, _Decimal128 a7, _Decimal64 a8) -{ - reg_parms_t lparms; - stack_frame_t *sp; - - save_parms (lparms); - sp = __builtin_frame_address (0); - sp = sp->backchain; - - if (a1 != *(_Decimal128 *)&lparms.fprs[1]) FAILURE /* f2 & f3 */ - if (a2 != *(_Decimal64 *)&lparms.fprs[3]) FAILURE /* f4 */ - if (a3 != *(_Decimal128 *)&lparms.fprs[5]) FAILURE /* f6 & f7 */ - if (a4 != *(_Decimal64 *)&lparms.fprs[7]) FAILURE /* f8 */ - if (a5 != *(_Decimal128 *)&lparms.fprs[9]) FAILURE /* f10 & f11 */ - if (a6 != *(_Decimal64 *)&lparms.fprs[11]) FAILURE /* f12 */ - if (a7 != *(_Decimal128 *)&sp->slot[9]) FAILURE - if (a8 != *(_Decimal64 *)&sp->slot[11]) FAILURE -} - -void __attribute__ ((noinline)) -func5 (_Decimal32 a1, _Decimal32 a2, _Decimal32 a3, _Decimal32 a4, - _Decimal32 a5, _Decimal32 a6, _Decimal32 a7, _Decimal32 a8, - _Decimal32 a9, _Decimal32 a10, _Decimal32 a11, _Decimal32 a12, - _Decimal32 a13, _Decimal32 a14, _Decimal32 a15, _Decimal32 a16) -{ - reg_parms_t lparms; - stack_frame_t *sp; - - save_parms (lparms); - sp = __builtin_frame_address (0); - sp = sp->backchain; - - /* _Decimal32 is passed in the lower half of an FPR or parameter slot. */ - if (a1 != ((d32parm_t *)&lparms.fprs[0])->d) FAILURE /* f1 */ - if (a2 != ((d32parm_t *)&lparms.fprs[1])->d) FAILURE /* f2 */ - if (a3 != ((d32parm_t *)&lparms.fprs[2])->d) FAILURE /* f3 */ - if (a4 != ((d32parm_t *)&lparms.fprs[3])->d) FAILURE /* f4 */ - if (a5 != ((d32parm_t *)&lparms.fprs[4])->d) FAILURE /* f5 */ - if (a6 != ((d32parm_t *)&lparms.fprs[5])->d) FAILURE /* f6 */ - if (a7 != ((d32parm_t *)&lparms.fprs[6])->d) FAILURE /* f7 */ - if (a8 != ((d32parm_t *)&lparms.fprs[7])->d) FAILURE /* f8 */ - if (a9 != ((d32parm_t *)&lparms.fprs[8])->d) FAILURE /* f9 */ - if (a10 != ((d32parm_t *)&lparms.fprs[9])->d) FAILURE /* f10 */ - if (a11 != ((d32parm_t *)&lparms.fprs[10])->d) FAILURE /* f11 */ - if (a12 != ((d32parm_t *)&lparms.fprs[11])->d) FAILURE /* f12 */ - if (a13 != ((d32parm_t *)&lparms.fprs[12])->d) FAILURE /* f13 */ - if (a14 != ((d32parm_t *)&sp->slot[13])->d) FAILURE - if (a15 != ((d32parm_t *)&sp->slot[14])->d) FAILURE - if (a16 != ((d32parm_t *)&sp->slot[15])->d) FAILURE -} - -void __attribute__ ((noinline)) -func6 (_Decimal32 a1, _Decimal64 a2, _Decimal128 a3, - _Decimal32 a4, _Decimal64 a5, _Decimal128 a6, - _Decimal32 a7, _Decimal64 a8, _Decimal128 a9, - _Decimal32 a10, _Decimal64 a11, _Decimal128 a12) -{ - reg_parms_t lparms; - stack_frame_t *sp; - - save_parms (lparms); - sp = __builtin_frame_address (0); - sp = sp->backchain; - - if (a1 != ((d32parm_t *)&lparms.fprs[0])->d) FAILURE /* f1 */ - if (a2 != *(_Decimal64 *)&lparms.fprs[1]) FAILURE /* f2 */ - if (a3 != *(_Decimal128 *)&lparms.fprs[3]) FAILURE /* f4 & f5 */ - if (a4 != ((d32parm_t *)&lparms.fprs[5])->d) FAILURE /* f6 */ - if (a5 != *(_Decimal64 *)&lparms.fprs[6]) FAILURE /* f7 */ - if (a6 != *(_Decimal128 *)&lparms.fprs[7]) FAILURE /* f8 & f9 */ - if (a7 != ((d32parm_t *)&lparms.fprs[9])->d) FAILURE /* f10 */ - if (a8 != *(_Decimal64 *)&lparms.fprs[10]) FAILURE /* f11 */ - if (a9 != *(_Decimal128 *)&lparms.fprs[11]) FAILURE /* f12 & f13 */ - if (a10 != ((d32parm_t *)&sp->slot[12])->d) FAILURE - if (a11 != *(_Decimal64 *)&sp->slot[13]) FAILURE -} - -int -main (void) -{ - func0 (1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, - 14.5, 15.2dd, 16.2dl, 17.2dd); - func1 (101.5, 102.5, 103.5, 104.5, 105.5, 106.5, 107.5, 108.5, 109.5, - 110.5, 111.5, 112.5, 113.5, 114.2dd); - func2 (201.5, 202.5, 203.5, 204.5, 205.5, 206.5, 207.5, 208.5, 209.5, - 210.5, 211.5, 212.5, 213.2dd); - func3 (301.2dd, 302.2dl, 303.2dd, 304.2dl, 305.2dd, 306.2dl, 307.2dd, - 308.2dl, 309.2dd, 310.2dl); - func4 (401.2dl, 402.2dd, 403.2dl, 404.2dd, 405.2dl, 406.2dd, 407.2dl, - 408.2dd); -#if 0 - /* _Decimal32 doesn't yet follow the ABI; enable this when it does. */ - func5 (501.2df, 502.2df, 503.2df, 504.2df, 505.2df, 506.2df, 507.2df, - 508.2df, 509.2df, 510.2df, 511.2df, 512.2df, 513.2df, 514.2df, - 515.2df, 516.2df); - func6 (601.2df, 602.2dd, 603.2dl, 604.2df, 605.2dd, 606.2dl, - 607.2df, 608.2dd, 609.2dl, 610.2df, 611.2dd, 612.2dl); -#endif - - if (failcnt != 0) - abort (); - - return 0; -} +/* { dg-do run { target { powerpc64-*-* && { lp64 && dfprt } } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-options "-std=gnu99 -O2 -fno-strict-aliasing" } */ + +/* Testcase to check for ABI compliance of parameter passing + for the PowerPC64 ELF ABI for decimal float values. */ + +extern void abort (void); +int failcnt = 0; + +/* Support compiling the test to report individual failures; default is + to abort as soon as a check fails. */ +#ifdef DBG +#include +#define FAILURE { printf ("failed at line %d\n", __LINE__); failcnt++; } +#else +#define FAILURE abort (); +#endif + +typedef struct +{ + int pad; + _Decimal32 d; +} d32parm_t; + +typedef struct +{ + unsigned long gprs[8]; + double fprs[13]; +} reg_parms_t; + +reg_parms_t gparms; + + +/* Wrapper to save the GPRs and FPRs and then jump to the real function. */ +#define WRAPPER(NAME) \ +__asm__ ("\t.globl\t" #NAME "_asm\n\t" \ + ".section \".opd\",\"aw\"\n\t" \ + ".align 3\n" \ + #NAME "_asm:\n\t" \ + ".quad .L." #NAME "_asm,.TOC.@tocbase,0\n\t" \ + ".text\n\t" \ + ".type " #NAME "_asm, @function\n" \ + ".L." #NAME "_asm:\n\t" \ + "ld 11,gparms@got(2)\n\t" \ + "std 3,0(11)\n\t" \ + "std 4,8(11)\n\t" \ + "std 5,16(11)\n\t" \ + "std 6,24(11)\n\t" \ + "std 7,32(11)\n\t" \ + "std 8,40(11)\n\t" \ + "std 9,48(11)\n\t" \ + "std 10,56(11)\n\t" \ + "stfd 1,64(11)\n\t" \ + "stfd 2,72(11)\n\t" \ + "stfd 3,80(11)\n\t" \ + "stfd 4,88(11)\n\t" \ + "stfd 5,96(11)\n\t" \ + "stfd 6,104(11)\n\t" \ + "stfd 7,112(11)\n\t" \ + "stfd 8,120(11)\n\t" \ + "stfd 9,128(11)\n\t" \ + "stfd 10,136(11)\n\t" \ + "stfd 11,144(11)\n\t" \ + "stfd 12,152(11)\n\t" \ + "stfd 13,160(11)\n\t" \ + "b " #NAME "\n\t" \ + ".long 0\n\t" \ + ".byte 0,0,0,0,0,0,0,0\n\t" \ + ".size " #NAME ",.-" #NAME "\n") + +typedef struct sf +{ + struct sf *backchain; + long a1; + long a2; + long a3; + long a4; + long a5; + unsigned long slot[100]; +} stack_frame_t; + +extern void func0_asm (double, double, double, double, double, double, + double, double, double, double, double, double, + double, double, + _Decimal64, _Decimal128, _Decimal64); + +WRAPPER(func0); + +/* Fill up floating point registers with double arguments, forcing + decimal float arguments into the parameter save area. */ +void __attribute__ ((noinline)) +func0 (double a1, double a2, double a3, double a4, double a5, double a6, + double a7, double a8, double a9, double a10, double a11, double a12, + double a13, double a14, + _Decimal64 a15, _Decimal128 a16, _Decimal64 a17) +{ + stack_frame_t *sp; + + sp = __builtin_frame_address (0); + sp = sp->backchain; + + if (a1 != gparms.fprs[0]) FAILURE + if (a2 != gparms.fprs[1]) FAILURE + if (a3 != gparms.fprs[2]) FAILURE + if (a4 != gparms.fprs[3]) FAILURE + if (a5 != gparms.fprs[4]) FAILURE + if (a6 != gparms.fprs[5]) FAILURE + if (a7 != gparms.fprs[6]) FAILURE + if (a8 != gparms.fprs[7]) FAILURE + if (a9 != gparms.fprs[8]) FAILURE + if (a10 != gparms.fprs[9]) FAILURE + if (a11 != gparms.fprs[10]) FAILURE + if (a12 != gparms.fprs[11]) FAILURE + if (a13 != gparms.fprs[12]) FAILURE + if (a14 != *(double *)&sp->slot[13]) FAILURE + if (a15 != *(_Decimal64 *)&sp->slot[14]) FAILURE + if (a16 != *(_Decimal128 *)&sp->slot[15]) FAILURE + if (a17 != *(_Decimal64 *)&sp->slot[17]) FAILURE +} + +extern void func1_asm (double, double, double, double, double, double, + double, double, double, double, double, double, + double, _Decimal128 ); + +WRAPPER(func1); + +void __attribute__ ((noinline)) +func1 (double a1, double a2, double a3, double a4, double a5, double a6, + double a7, double a8, double a9, double a10, double a11, double a12, + double a13, _Decimal128 a14) +{ + stack_frame_t *sp; + + sp = __builtin_frame_address (0); + sp = sp->backchain; + + if (a1 != gparms.fprs[0]) FAILURE + if (a2 != gparms.fprs[1]) FAILURE + if (a3 != gparms.fprs[2]) FAILURE + if (a4 != gparms.fprs[3]) FAILURE + if (a5 != gparms.fprs[4]) FAILURE + if (a6 != gparms.fprs[5]) FAILURE + if (a7 != gparms.fprs[6]) FAILURE + if (a8 != gparms.fprs[7]) FAILURE + if (a9 != gparms.fprs[8]) FAILURE + if (a10 != gparms.fprs[9]) FAILURE + if (a11 != gparms.fprs[10]) FAILURE + if (a12 != gparms.fprs[11]) FAILURE + if (a13 != gparms.fprs[12]) FAILURE + if (a14 != *(_Decimal128 *)&sp->slot[13]) FAILURE +} + +extern void func2_asm (double, double, double, double, double, double, + double, double, double, double, double, double, + _Decimal128); + +WRAPPER(func2); + +void __attribute__ ((noinline)) +func2 (double a1, double a2, double a3, double a4, double a5, double a6, + double a7, double a8, double a9, double a10, double a11, double a12, + _Decimal128 a13) +{ + stack_frame_t *sp; + + sp = __builtin_frame_address (0); + sp = sp->backchain; + + if (a1 != gparms.fprs[0]) FAILURE + if (a2 != gparms.fprs[1]) FAILURE + if (a3 != gparms.fprs[2]) FAILURE + if (a4 != gparms.fprs[3]) FAILURE + if (a5 != gparms.fprs[4]) FAILURE + if (a6 != gparms.fprs[5]) FAILURE + if (a7 != gparms.fprs[6]) FAILURE + if (a8 != gparms.fprs[7]) FAILURE + if (a9 != gparms.fprs[8]) FAILURE + if (a10 != gparms.fprs[9]) FAILURE + if (a11 != gparms.fprs[10]) FAILURE + if (a12 != gparms.fprs[11]) FAILURE + if (a13 != *(_Decimal128 *)&sp->slot[12]) FAILURE +} + +extern void func3_asm (_Decimal64, _Decimal128, _Decimal64, _Decimal128, + _Decimal64, _Decimal128, _Decimal64, _Decimal128, + _Decimal64, _Decimal128); + +WRAPPER(func3); + +void __attribute__ ((noinline)) +func3 (_Decimal64 a1, _Decimal128 a2, _Decimal64 a3, _Decimal128 a4, + _Decimal64 a5, _Decimal128 a6, _Decimal64 a7, _Decimal128 a8, + _Decimal64 a9, _Decimal128 a10) +{ + stack_frame_t *sp; + + sp = __builtin_frame_address (0); + sp = sp->backchain; + + if (a1 != *(_Decimal64 *)&gparms.fprs[0]) FAILURE /* f1 */ + if (a2 != *(_Decimal128 *)&gparms.fprs[1]) FAILURE /* f2 & f3 */ + if (a3 != *(_Decimal64 *)&gparms.fprs[3]) FAILURE /* f4 */ + if (a4 != *(_Decimal128 *)&gparms.fprs[5]) FAILURE /* f6 & f7 */ + if (a5 != *(_Decimal64 *)&gparms.fprs[7]) FAILURE /* f8 */ + if (a6 != *(_Decimal128 *)&gparms.fprs[9]) FAILURE /* f10 & f11 */ + if (a7 != *(_Decimal64 *)&gparms.fprs[11]) FAILURE /* f12 */ + if (a8 != *(_Decimal128 *)&sp->slot[10]) FAILURE + if (a9 != *(_Decimal64 *)&sp->slot[12]) FAILURE + if (a10 != *(_Decimal128 *)&sp->slot[13]) FAILURE +} + +extern void func4_asm (_Decimal128, _Decimal64, _Decimal128, _Decimal64, + _Decimal128, _Decimal64, _Decimal128, _Decimal64); + +WRAPPER(func4); + +void __attribute__ ((noinline)) +func4 (_Decimal128 a1, _Decimal64 a2, _Decimal128 a3, _Decimal64 a4, + _Decimal128 a5, _Decimal64 a6, _Decimal128 a7, _Decimal64 a8) +{ + stack_frame_t *sp; + + sp = __builtin_frame_address (0); + sp = sp->backchain; + + if (a1 != *(_Decimal128 *)&gparms.fprs[1]) FAILURE /* f2 & f3 */ + if (a2 != *(_Decimal64 *)&gparms.fprs[3]) FAILURE /* f4 */ + if (a3 != *(_Decimal128 *)&gparms.fprs[5]) FAILURE /* f6 & f7 */ + if (a4 != *(_Decimal64 *)&gparms.fprs[7]) FAILURE /* f8 */ + if (a5 != *(_Decimal128 *)&gparms.fprs[9]) FAILURE /* f10 & f11 */ + if (a6 != *(_Decimal64 *)&gparms.fprs[11]) FAILURE /* f12 */ + if (a7 != *(_Decimal128 *)&sp->slot[9]) FAILURE + if (a8 != *(_Decimal64 *)&sp->slot[11]) FAILURE +} + +extern void func5_asm (_Decimal32, _Decimal32, _Decimal32, _Decimal32, + _Decimal32, _Decimal32, _Decimal32, _Decimal32, + _Decimal32, _Decimal32, _Decimal32, _Decimal32, + _Decimal32, _Decimal32, _Decimal32, _Decimal32); + +WRAPPER(func5); + +void __attribute__ ((noinline)) +func5 (_Decimal32 a1, _Decimal32 a2, _Decimal32 a3, _Decimal32 a4, + _Decimal32 a5, _Decimal32 a6, _Decimal32 a7, _Decimal32 a8, + _Decimal32 a9, _Decimal32 a10, _Decimal32 a11, _Decimal32 a12, + _Decimal32 a13, _Decimal32 a14, _Decimal32 a15, _Decimal32 a16) +{ + stack_frame_t *sp; + + sp = __builtin_frame_address (0); + sp = sp->backchain; + + /* _Decimal32 is passed in the lower half of an FPR or parameter slot. */ + if (a1 != ((d32parm_t *)&gparms.fprs[0])->d) FAILURE /* f1 */ + if (a2 != ((d32parm_t *)&gparms.fprs[1])->d) FAILURE /* f2 */ + if (a3 != ((d32parm_t *)&gparms.fprs[2])->d) FAILURE /* f3 */ + if (a4 != ((d32parm_t *)&gparms.fprs[3])->d) FAILURE /* f4 */ + if (a5 != ((d32parm_t *)&gparms.fprs[4])->d) FAILURE /* f5 */ + if (a6 != ((d32parm_t *)&gparms.fprs[5])->d) FAILURE /* f6 */ + if (a7 != ((d32parm_t *)&gparms.fprs[6])->d) FAILURE /* f7 */ + if (a8 != ((d32parm_t *)&gparms.fprs[7])->d) FAILURE /* f8 */ + if (a9 != ((d32parm_t *)&gparms.fprs[8])->d) FAILURE /* f9 */ + if (a10 != ((d32parm_t *)&gparms.fprs[9])->d) FAILURE /* f10 */ + if (a11 != ((d32parm_t *)&gparms.fprs[10])->d) FAILURE /* f11 */ + if (a12 != ((d32parm_t *)&gparms.fprs[11])->d) FAILURE /* f12 */ + if (a13 != ((d32parm_t *)&gparms.fprs[12])->d) FAILURE /* f13 */ + if (a14 != ((d32parm_t *)&sp->slot[13])->d) FAILURE + if (a15 != ((d32parm_t *)&sp->slot[14])->d) FAILURE + if (a16 != ((d32parm_t *)&sp->slot[15])->d) FAILURE +} + +extern void func6_asm (_Decimal32, _Decimal64, _Decimal128, + _Decimal32, _Decimal64, _Decimal128, + _Decimal32, _Decimal64, _Decimal128, + _Decimal32, _Decimal64, _Decimal128); + +WRAPPER(func6); + +void __attribute__ ((noinline)) +func6 (_Decimal32 a1, _Decimal64 a2, _Decimal128 a3, + _Decimal32 a4, _Decimal64 a5, _Decimal128 a6, + _Decimal32 a7, _Decimal64 a8, _Decimal128 a9, + _Decimal32 a10, _Decimal64 a11, _Decimal128 a12) +{ + stack_frame_t *sp; + + sp = __builtin_frame_address (0); + sp = sp->backchain; + + if (a1 != ((d32parm_t *)&gparms.fprs[0])->d) FAILURE /* f1 */ + if (a2 != *(_Decimal64 *)&gparms.fprs[1]) FAILURE /* f2 */ + if (a3 != *(_Decimal128 *)&gparms.fprs[3]) FAILURE /* f4 & f5 */ + if (a4 != ((d32parm_t *)&gparms.fprs[5])->d) FAILURE /* f6 */ + if (a5 != *(_Decimal64 *)&gparms.fprs[6]) FAILURE /* f7 */ + if (a6 != *(_Decimal128 *)&gparms.fprs[7]) FAILURE /* f8 & f9 */ + if (a7 != ((d32parm_t *)&gparms.fprs[9])->d) FAILURE /* f10 */ + if (a8 != *(_Decimal64 *)&gparms.fprs[10]) FAILURE /* f11 */ + if (a9 != *(_Decimal128 *)&gparms.fprs[11]) FAILURE /* f12 & f13 */ + if (a10 != ((d32parm_t *)&sp->slot[12])->d) FAILURE + if (a11 != *(_Decimal64 *)&sp->slot[13]) FAILURE +} + +int +main (void) +{ + func0_asm (1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5, 11.5, 12.5, 13.5, + 14.5, 15.2dd, 16.2dl, 17.2dd); + func1_asm (101.5, 102.5, 103.5, 104.5, 105.5, 106.5, 107.5, 108.5, 109.5, + 110.5, 111.5, 112.5, 113.5, 114.2dd); + func2_asm (201.5, 202.5, 203.5, 204.5, 205.5, 206.5, 207.5, 208.5, 209.5, + 210.5, 211.5, 212.5, 213.2dd); + func3_asm (301.2dd, 302.2dl, 303.2dd, 304.2dl, 305.2dd, 306.2dl, 307.2dd, + 308.2dl, 309.2dd, 310.2dl); + func4_asm (401.2dl, 402.2dd, 403.2dl, 404.2dd, 405.2dl, 406.2dd, 407.2dl, + 408.2dd); +#if 0 + /* _Decimal32 doesn't yet follow the ABI; enable this when it does. */ + func5_asm (501.2df, 502.2df, 503.2df, 504.2df, 505.2df, 506.2df, 507.2df, + 508.2df, 509.2df, 510.2df, 511.2df, 512.2df, 513.2df, 514.2df, + 515.2df, 516.2df); + func6_asm (601.2df, 602.2dd, 603.2dl, 604.2df, 605.2dd, 606.2dl, + 607.2df, 608.2dd, 609.2dl, 610.2df, 611.2dd, 612.2dl); +#endif + + if (failcnt != 0) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-builtin-8.c b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-8.c new file mode 100644 index 00000000000..836b3851cad --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/vsx-builtin-8.c @@ -0,0 +1,97 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ +/* { dg-options "-O3 -mcpu=power7" } */ + +/* Test the various load/store varients. */ + +#include + +#define TEST_COPY(NAME, TYPE) \ +void NAME ## _copy_native (vector TYPE *a, vector TYPE *b) \ +{ \ + *a = *b; \ +} \ + \ +void NAME ## _copy_vec (vector TYPE *a, vector TYPE *b) \ +{ \ + vector TYPE x = vec_ld (0, b); \ + vec_st (x, 0, a); \ +} \ + +#define TEST_COPYL(NAME, TYPE) \ +void NAME ## _lvxl (vector TYPE *a, vector TYPE *b) \ +{ \ + vector TYPE x = vec_ldl (0, b); \ + vec_stl (x, 0, a); \ +} \ + +#define TEST_VSX_COPY(NAME, TYPE) \ +void NAME ## _copy_vsx (vector TYPE *a, vector TYPE *b) \ +{ \ + vector TYPE x = vec_vsx_ld (0, b); \ + vec_vsx_st (x, 0, a); \ +} \ + +#define TEST_ALIGN(NAME, TYPE) \ +void NAME ## _align (vector unsigned char *a, TYPE *b) \ +{ \ + vector unsigned char x = vec_lvsl (0, b); \ + vector unsigned char y = vec_lvsr (0, b); \ + vec_st (x, 0, a); \ + vec_st (y, 8, a); \ +} + +#ifndef NO_COPY +TEST_COPY(uchar, unsigned char) +TEST_COPY(schar, signed char) +TEST_COPY(bchar, bool char) +TEST_COPY(ushort, unsigned short) +TEST_COPY(sshort, signed short) +TEST_COPY(bshort, bool short) +TEST_COPY(uint, unsigned int) +TEST_COPY(sint, signed int) +TEST_COPY(bint, bool int) +TEST_COPY(float, float) +TEST_COPY(double, double) +#endif /* NO_COPY */ + +#ifndef NO_COPYL +TEST_COPYL(uchar, unsigned char) +TEST_COPYL(schar, signed char) +TEST_COPYL(bchar, bool char) +TEST_COPYL(ushort, unsigned short) +TEST_COPYL(sshort, signed short) +TEST_COPYL(bshort, bool short) +TEST_COPYL(uint, unsigned int) +TEST_COPYL(sint, signed int) +TEST_COPYL(bint, bool int) +TEST_COPYL(float, float) +TEST_COPYL(double, double) +#endif /* NO_COPYL */ + +#ifndef NO_ALIGN +TEST_ALIGN(uchar, unsigned char) +TEST_ALIGN(schar, signed char) +TEST_ALIGN(ushort, unsigned short) +TEST_ALIGN(sshort, signed short) +TEST_ALIGN(uint, unsigned int) +TEST_ALIGN(sint, signed int) +TEST_ALIGN(float, float) +TEST_ALIGN(double, double) +#endif /* NO_ALIGN */ + + +#ifndef NO_VSX_COPY +TEST_VSX_COPY(uchar, unsigned char) +TEST_VSX_COPY(schar, signed char) +TEST_VSX_COPY(bchar, bool char) +TEST_VSX_COPY(ushort, unsigned short) +TEST_VSX_COPY(sshort, signed short) +TEST_VSX_COPY(bshort, bool short) +TEST_VSX_COPY(uint, unsigned int) +TEST_VSX_COPY(sint, signed int) +TEST_VSX_COPY(bint, bool int) +TEST_VSX_COPY(float, float) +TEST_VSX_COPY(double, double) +#endif /* NO_VSX_COPY */ -- 2.11.4.GIT