config.gcc (powerpc64le*): Revert January 16th...
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blobd6d324b7cee2891c4dd89f68913171f8ef67f33b
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #define IN_TARGET_CODE 1
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "memmodel.h"
30 #include "gimple.h"
31 #include "cfghooks.h"
32 #include "cfgloop.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "ira.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "attribs.h"
48 #include "stor-layout.h"
49 #include "calls.h"
50 #include "print-tree.h"
51 #include "varasm.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "output.h"
55 #include "dbxout.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-fold.h"
62 #include "gimple-iterator.h"
63 #include "gimple-ssa.h"
64 #include "gimple-walk.h"
65 #include "intl.h"
66 #include "params.h"
67 #include "tm-constrs.h"
68 #include "tree-vectorizer.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "except.h"
75 #if TARGET_XCOFF
76 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
77 #endif
78 #if TARGET_MACHO
79 #include "gstab.h" /* for N_SLINE */
80 #endif
81 #include "case-cfn-macros.h"
82 #include "ppc-auxv.h"
83 #include "tree-ssa-propagate.h"
85 /* This file should be included last. */
86 #include "target-def.h"
88 #ifndef TARGET_NO_PROTOTYPE
89 #define TARGET_NO_PROTOTYPE 0
90 #endif
92 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
93 systems will also set long double to be IEEE 128-bit. AIX and Darwin
94 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
95 those systems will not pick up this default. This needs to be after all
96 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
97 properly defined. */
98 #ifndef TARGET_IEEEQUAD_DEFAULT
99 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
100 #define TARGET_IEEEQUAD_DEFAULT 1
101 #else
102 #define TARGET_IEEEQUAD_DEFAULT 0
103 #endif
104 #endif
106 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
108 /* Structure used to define the rs6000 stack */
109 typedef struct rs6000_stack {
110 int reload_completed; /* stack info won't change from here on */
111 int first_gp_reg_save; /* first callee saved GP register used */
112 int first_fp_reg_save; /* first callee saved FP register used */
113 int first_altivec_reg_save; /* first callee saved AltiVec register used */
114 int lr_save_p; /* true if the link reg needs to be saved */
115 int cr_save_p; /* true if the CR reg needs to be saved */
116 unsigned int vrsave_mask; /* mask of vec registers to save */
117 int push_p; /* true if we need to allocate stack space */
118 int calls_p; /* true if the function makes any calls */
119 int world_save_p; /* true if we're saving *everything*:
120 r13-r31, cr, f14-f31, vrsave, v20-v31 */
121 enum rs6000_abi abi; /* which ABI to use */
122 int gp_save_offset; /* offset to save GP regs from initial SP */
123 int fp_save_offset; /* offset to save FP regs from initial SP */
124 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
125 int lr_save_offset; /* offset to save LR from initial SP */
126 int cr_save_offset; /* offset to save CR from initial SP */
127 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
128 int varargs_save_offset; /* offset to save the varargs registers */
129 int ehrd_offset; /* offset to EH return data */
130 int ehcr_offset; /* offset to EH CR field data */
131 int reg_size; /* register size (4 or 8) */
132 HOST_WIDE_INT vars_size; /* variable save area size */
133 int parm_size; /* outgoing parameter size */
134 int save_size; /* save area size */
135 int fixed_size; /* fixed size of stack frame */
136 int gp_size; /* size of saved GP registers */
137 int fp_size; /* size of saved FP registers */
138 int altivec_size; /* size of saved AltiVec registers */
139 int cr_size; /* size to hold CR if not in fixed area */
140 int vrsave_size; /* size to hold VRSAVE */
141 int altivec_padding_size; /* size of altivec alignment padding */
142 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
143 int savres_strategy;
144 } rs6000_stack_t;
146 /* A C structure for machine-specific, per-function data.
147 This is added to the cfun structure. */
148 typedef struct GTY(()) machine_function
150 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
151 int ra_needs_full_frame;
152 /* Flags if __builtin_return_address (0) was used. */
153 int ra_need_lr;
154 /* Cache lr_save_p after expansion of builtin_eh_return. */
155 int lr_save_state;
156 /* Whether we need to save the TOC to the reserved stack location in the
157 function prologue. */
158 bool save_toc_in_prologue;
159 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
160 varargs save area. */
161 HOST_WIDE_INT varargs_save_offset;
162 /* Alternative internal arg pointer for -fsplit-stack. */
163 rtx split_stack_arg_pointer;
164 bool split_stack_argp_used;
165 /* Flag if r2 setup is needed with ELFv2 ABI. */
166 bool r2_setup_needed;
167 /* The number of components we use for separate shrink-wrapping. */
168 int n_components;
169 /* The components already handled by separate shrink-wrapping, which should
170 not be considered by the prologue and epilogue. */
171 bool gpr_is_wrapped_separately[32];
172 bool fpr_is_wrapped_separately[32];
173 bool lr_is_wrapped_separately;
174 bool toc_is_wrapped_separately;
175 } machine_function;
177 /* Support targetm.vectorize.builtin_mask_for_load. */
178 static GTY(()) tree altivec_builtin_mask_for_load;
180 /* Set to nonzero once AIX common-mode calls have been defined. */
181 static GTY(()) int common_mode_defined;
183 /* Label number of label created for -mrelocatable, to call to so we can
184 get the address of the GOT section */
185 static int rs6000_pic_labelno;
187 #ifdef USING_ELFOS_H
188 /* Counter for labels which are to be placed in .fixup. */
189 int fixuplabelno = 0;
190 #endif
192 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
193 int dot_symbols;
195 /* Specify the machine mode that pointers have. After generation of rtl, the
196 compiler makes no further distinction between pointers and any other objects
197 of this machine mode. */
198 scalar_int_mode rs6000_pmode;
200 #if TARGET_ELF
201 /* Note whether IEEE 128-bit floating point was passed or returned, either as
202 the __float128/_Float128 explicit type, or when long double is IEEE 128-bit
203 floating point. We changed the default C++ mangling for these types and we
204 may want to generate a weak alias of the old mangling (U10__float128) to the
205 new mangling (u9__ieee128). */
206 static bool rs6000_passes_ieee128;
207 #endif
209 /* Generate the manged name (i.e. U10__float128) used in GCC 8.1, and not the
210 name used in current releases (i.e. u9__ieee128). */
211 static bool ieee128_mangling_gcc_8_1;
213 /* Width in bits of a pointer. */
214 unsigned rs6000_pointer_size;
216 #ifdef HAVE_AS_GNU_ATTRIBUTE
217 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
218 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
219 # endif
220 /* Flag whether floating point values have been passed/returned.
221 Note that this doesn't say whether fprs are used, since the
222 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
223 should be set for soft-float values passed in gprs and ieee128
224 values passed in vsx registers. */
225 static bool rs6000_passes_float;
226 static bool rs6000_passes_long_double;
227 /* Flag whether vector values have been passed/returned. */
228 static bool rs6000_passes_vector;
229 /* Flag whether small (<= 8 byte) structures have been returned. */
230 static bool rs6000_returns_struct;
231 #endif
233 /* Value is TRUE if register/mode pair is acceptable. */
234 static bool rs6000_hard_regno_mode_ok_p
235 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
237 /* Maximum number of registers needed for a given register class and mode. */
238 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
240 /* How many registers are needed for a given register and mode. */
241 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
243 /* Map register number to register class. */
244 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
246 static int dbg_cost_ctrl;
248 /* Built in types. */
249 tree rs6000_builtin_types[RS6000_BTI_MAX];
250 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
252 /* Flag to say the TOC is initialized */
253 int toc_initialized, need_toc_init;
254 char toc_label_name[10];
256 /* Cached value of rs6000_variable_issue. This is cached in
257 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
258 static short cached_can_issue_more;
260 static GTY(()) section *read_only_data_section;
261 static GTY(()) section *private_data_section;
262 static GTY(()) section *tls_data_section;
263 static GTY(()) section *tls_private_data_section;
264 static GTY(()) section *read_only_private_data_section;
265 static GTY(()) section *sdata2_section;
266 static GTY(()) section *toc_section;
268 struct builtin_description
270 const HOST_WIDE_INT mask;
271 const enum insn_code icode;
272 const char *const name;
273 const enum rs6000_builtins code;
276 /* Describe the vector unit used for modes. */
277 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
278 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
280 /* Register classes for various constraints that are based on the target
281 switches. */
282 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
284 /* Describe the alignment of a vector. */
285 int rs6000_vector_align[NUM_MACHINE_MODES];
287 /* Map selected modes to types for builtins. */
288 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
290 /* What modes to automatically generate reciprocal divide estimate (fre) and
291 reciprocal sqrt (frsqrte) for. */
292 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
294 /* Masks to determine which reciprocal esitmate instructions to generate
295 automatically. */
296 enum rs6000_recip_mask {
297 RECIP_SF_DIV = 0x001, /* Use divide estimate */
298 RECIP_DF_DIV = 0x002,
299 RECIP_V4SF_DIV = 0x004,
300 RECIP_V2DF_DIV = 0x008,
302 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
303 RECIP_DF_RSQRT = 0x020,
304 RECIP_V4SF_RSQRT = 0x040,
305 RECIP_V2DF_RSQRT = 0x080,
307 /* Various combination of flags for -mrecip=xxx. */
308 RECIP_NONE = 0,
309 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
310 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
311 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
313 RECIP_HIGH_PRECISION = RECIP_ALL,
315 /* On low precision machines like the power5, don't enable double precision
316 reciprocal square root estimate, since it isn't accurate enough. */
317 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
320 /* -mrecip options. */
321 static struct
323 const char *string; /* option name */
324 unsigned int mask; /* mask bits to set */
325 } recip_options[] = {
326 { "all", RECIP_ALL },
327 { "none", RECIP_NONE },
328 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
329 | RECIP_V2DF_DIV) },
330 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
331 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
332 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
333 | RECIP_V2DF_RSQRT) },
334 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
335 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
338 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
339 static const struct
341 const char *cpu;
342 unsigned int cpuid;
343 } cpu_is_info[] = {
344 { "power9", PPC_PLATFORM_POWER9 },
345 { "power8", PPC_PLATFORM_POWER8 },
346 { "power7", PPC_PLATFORM_POWER7 },
347 { "power6x", PPC_PLATFORM_POWER6X },
348 { "power6", PPC_PLATFORM_POWER6 },
349 { "power5+", PPC_PLATFORM_POWER5_PLUS },
350 { "power5", PPC_PLATFORM_POWER5 },
351 { "ppc970", PPC_PLATFORM_PPC970 },
352 { "power4", PPC_PLATFORM_POWER4 },
353 { "ppca2", PPC_PLATFORM_PPCA2 },
354 { "ppc476", PPC_PLATFORM_PPC476 },
355 { "ppc464", PPC_PLATFORM_PPC464 },
356 { "ppc440", PPC_PLATFORM_PPC440 },
357 { "ppc405", PPC_PLATFORM_PPC405 },
358 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
361 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
362 static const struct
364 const char *hwcap;
365 int mask;
366 unsigned int id;
367 } cpu_supports_info[] = {
368 /* AT_HWCAP masks. */
369 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
370 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
371 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
372 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
373 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
374 { "booke", PPC_FEATURE_BOOKE, 0 },
375 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
376 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
377 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
378 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
379 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
380 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
381 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
382 { "notb", PPC_FEATURE_NO_TB, 0 },
383 { "pa6t", PPC_FEATURE_PA6T, 0 },
384 { "power4", PPC_FEATURE_POWER4, 0 },
385 { "power5", PPC_FEATURE_POWER5, 0 },
386 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
387 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
388 { "ppc32", PPC_FEATURE_32, 0 },
389 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
390 { "ppc64", PPC_FEATURE_64, 0 },
391 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
392 { "smt", PPC_FEATURE_SMT, 0 },
393 { "spe", PPC_FEATURE_HAS_SPE, 0 },
394 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
395 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
396 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
398 /* AT_HWCAP2 masks. */
399 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
400 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
401 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
402 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
403 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
404 { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 },
405 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
406 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
407 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
408 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
409 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
410 { "darn", PPC_FEATURE2_DARN, 1 },
411 { "scv", PPC_FEATURE2_SCV, 1 }
414 /* On PowerPC, we have a limited number of target clones that we care about
415 which means we can use an array to hold the options, rather than having more
416 elaborate data structures to identify each possible variation. Order the
417 clones from the default to the highest ISA. */
418 enum {
419 CLONE_DEFAULT = 0, /* default clone. */
420 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
421 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
422 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
423 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
424 CLONE_MAX
427 /* Map compiler ISA bits into HWCAP names. */
428 struct clone_map {
429 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
430 const char *name; /* name to use in __builtin_cpu_supports. */
433 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
434 { 0, "" }, /* Default options. */
435 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
436 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
437 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
438 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
442 /* Newer LIBCs explicitly export this symbol to declare that they provide
443 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
444 reference to this symbol whenever we expand a CPU builtin, so that
445 we never link against an old LIBC. */
446 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
448 /* True if we have expanded a CPU builtin. */
449 bool cpu_builtin_p;
451 /* Pointer to function (in rs6000-c.c) that can define or undefine target
452 macros that have changed. Languages that don't support the preprocessor
453 don't link in rs6000-c.c, so we can't call it directly. */
454 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
456 /* Simplfy register classes into simpler classifications. We assume
457 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
458 check for standard register classes (gpr/floating/altivec/vsx) and
459 floating/vector classes (float/altivec/vsx). */
461 enum rs6000_reg_type {
462 NO_REG_TYPE,
463 PSEUDO_REG_TYPE,
464 GPR_REG_TYPE,
465 VSX_REG_TYPE,
466 ALTIVEC_REG_TYPE,
467 FPR_REG_TYPE,
468 SPR_REG_TYPE,
469 CR_REG_TYPE
472 /* Map register class to register type. */
473 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
475 /* First/last register type for the 'normal' register types (i.e. general
476 purpose, floating point, altivec, and VSX registers). */
477 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
479 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
482 /* Register classes we care about in secondary reload or go if legitimate
483 address. We only need to worry about GPR, FPR, and Altivec registers here,
484 along an ANY field that is the OR of the 3 register classes. */
486 enum rs6000_reload_reg_type {
487 RELOAD_REG_GPR, /* General purpose registers. */
488 RELOAD_REG_FPR, /* Traditional floating point regs. */
489 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
490 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
491 N_RELOAD_REG
494 /* For setting up register classes, loop through the 3 register classes mapping
495 into real registers, and skip the ANY class, which is just an OR of the
496 bits. */
497 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
498 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
500 /* Map reload register type to a register in the register class. */
501 struct reload_reg_map_type {
502 const char *name; /* Register class name. */
503 int reg; /* Register in the register class. */
506 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
507 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
508 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
509 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
510 { "Any", -1 }, /* RELOAD_REG_ANY. */
513 /* Mask bits for each register class, indexed per mode. Historically the
514 compiler has been more restrictive which types can do PRE_MODIFY instead of
515 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
516 typedef unsigned char addr_mask_type;
518 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
519 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
520 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
521 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
522 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
523 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
524 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
525 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
527 /* Register type masks based on the type, of valid addressing modes. */
528 struct rs6000_reg_addr {
529 enum insn_code reload_load; /* INSN to reload for loading. */
530 enum insn_code reload_store; /* INSN to reload for storing. */
531 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
532 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
533 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
534 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
535 /* INSNs for fusing addi with loads
536 or stores for each reg. class. */
537 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
538 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
539 /* INSNs for fusing addis with loads
540 or stores for each reg. class. */
541 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
542 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
543 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
544 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
545 bool fused_toc; /* Mode supports TOC fusion. */
548 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
550 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
551 static inline bool
552 mode_supports_pre_incdec_p (machine_mode mode)
554 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
555 != 0);
558 /* Helper function to say whether a mode supports PRE_MODIFY. */
559 static inline bool
560 mode_supports_pre_modify_p (machine_mode mode)
562 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
563 != 0);
566 /* Return true if we have D-form addressing in altivec registers. */
567 static inline bool
568 mode_supports_vmx_dform (machine_mode mode)
570 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
573 /* Return true if we have D-form addressing in VSX registers. This addressing
574 is more limited than normal d-form addressing in that the offset must be
575 aligned on a 16-byte boundary. */
576 static inline bool
577 mode_supports_dq_form (machine_mode mode)
579 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
580 != 0);
583 /* Given that there exists at least one variable that is set (produced)
584 by OUT_INSN and read (consumed) by IN_INSN, return true iff
585 IN_INSN represents one or more memory store operations and none of
586 the variables set by OUT_INSN is used by IN_INSN as the address of a
587 store operation. If either IN_INSN or OUT_INSN does not represent
588 a "single" RTL SET expression (as loosely defined by the
589 implementation of the single_set function) or a PARALLEL with only
590 SETs, CLOBBERs, and USEs inside, this function returns false.
592 This rs6000-specific version of store_data_bypass_p checks for
593 certain conditions that result in assertion failures (and internal
594 compiler errors) in the generic store_data_bypass_p function and
595 returns false rather than calling store_data_bypass_p if one of the
596 problematic conditions is detected. */
599 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
601 rtx out_set, in_set;
602 rtx out_pat, in_pat;
603 rtx out_exp, in_exp;
604 int i, j;
606 in_set = single_set (in_insn);
607 if (in_set)
609 if (MEM_P (SET_DEST (in_set)))
611 out_set = single_set (out_insn);
612 if (!out_set)
614 out_pat = PATTERN (out_insn);
615 if (GET_CODE (out_pat) == PARALLEL)
617 for (i = 0; i < XVECLEN (out_pat, 0); i++)
619 out_exp = XVECEXP (out_pat, 0, i);
620 if ((GET_CODE (out_exp) == CLOBBER)
621 || (GET_CODE (out_exp) == USE))
622 continue;
623 else if (GET_CODE (out_exp) != SET)
624 return false;
630 else
632 in_pat = PATTERN (in_insn);
633 if (GET_CODE (in_pat) != PARALLEL)
634 return false;
636 for (i = 0; i < XVECLEN (in_pat, 0); i++)
638 in_exp = XVECEXP (in_pat, 0, i);
639 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
640 continue;
641 else if (GET_CODE (in_exp) != SET)
642 return false;
644 if (MEM_P (SET_DEST (in_exp)))
646 out_set = single_set (out_insn);
647 if (!out_set)
649 out_pat = PATTERN (out_insn);
650 if (GET_CODE (out_pat) != PARALLEL)
651 return false;
652 for (j = 0; j < XVECLEN (out_pat, 0); j++)
654 out_exp = XVECEXP (out_pat, 0, j);
655 if ((GET_CODE (out_exp) == CLOBBER)
656 || (GET_CODE (out_exp) == USE))
657 continue;
658 else if (GET_CODE (out_exp) != SET)
659 return false;
665 return store_data_bypass_p (out_insn, in_insn);
669 /* Processor costs (relative to an add) */
671 const struct processor_costs *rs6000_cost;
673 /* Instruction size costs on 32bit processors. */
674 static const
675 struct processor_costs size32_cost = {
676 COSTS_N_INSNS (1), /* mulsi */
677 COSTS_N_INSNS (1), /* mulsi_const */
678 COSTS_N_INSNS (1), /* mulsi_const9 */
679 COSTS_N_INSNS (1), /* muldi */
680 COSTS_N_INSNS (1), /* divsi */
681 COSTS_N_INSNS (1), /* divdi */
682 COSTS_N_INSNS (1), /* fp */
683 COSTS_N_INSNS (1), /* dmul */
684 COSTS_N_INSNS (1), /* sdiv */
685 COSTS_N_INSNS (1), /* ddiv */
686 32, /* cache line size */
687 0, /* l1 cache */
688 0, /* l2 cache */
689 0, /* streams */
690 0, /* SF->DF convert */
693 /* Instruction size costs on 64bit processors. */
694 static const
695 struct processor_costs size64_cost = {
696 COSTS_N_INSNS (1), /* mulsi */
697 COSTS_N_INSNS (1), /* mulsi_const */
698 COSTS_N_INSNS (1), /* mulsi_const9 */
699 COSTS_N_INSNS (1), /* muldi */
700 COSTS_N_INSNS (1), /* divsi */
701 COSTS_N_INSNS (1), /* divdi */
702 COSTS_N_INSNS (1), /* fp */
703 COSTS_N_INSNS (1), /* dmul */
704 COSTS_N_INSNS (1), /* sdiv */
705 COSTS_N_INSNS (1), /* ddiv */
706 128, /* cache line size */
707 0, /* l1 cache */
708 0, /* l2 cache */
709 0, /* streams */
710 0, /* SF->DF convert */
713 /* Instruction costs on RS64A processors. */
714 static const
715 struct processor_costs rs64a_cost = {
716 COSTS_N_INSNS (20), /* mulsi */
717 COSTS_N_INSNS (12), /* mulsi_const */
718 COSTS_N_INSNS (8), /* mulsi_const9 */
719 COSTS_N_INSNS (34), /* muldi */
720 COSTS_N_INSNS (65), /* divsi */
721 COSTS_N_INSNS (67), /* divdi */
722 COSTS_N_INSNS (4), /* fp */
723 COSTS_N_INSNS (4), /* dmul */
724 COSTS_N_INSNS (31), /* sdiv */
725 COSTS_N_INSNS (31), /* ddiv */
726 128, /* cache line size */
727 128, /* l1 cache */
728 2048, /* l2 cache */
729 1, /* streams */
730 0, /* SF->DF convert */
733 /* Instruction costs on MPCCORE processors. */
734 static const
735 struct processor_costs mpccore_cost = {
736 COSTS_N_INSNS (2), /* mulsi */
737 COSTS_N_INSNS (2), /* mulsi_const */
738 COSTS_N_INSNS (2), /* mulsi_const9 */
739 COSTS_N_INSNS (2), /* muldi */
740 COSTS_N_INSNS (6), /* divsi */
741 COSTS_N_INSNS (6), /* divdi */
742 COSTS_N_INSNS (4), /* fp */
743 COSTS_N_INSNS (5), /* dmul */
744 COSTS_N_INSNS (10), /* sdiv */
745 COSTS_N_INSNS (17), /* ddiv */
746 32, /* cache line size */
747 4, /* l1 cache */
748 16, /* l2 cache */
749 1, /* streams */
750 0, /* SF->DF convert */
753 /* Instruction costs on PPC403 processors. */
754 static const
755 struct processor_costs ppc403_cost = {
756 COSTS_N_INSNS (4), /* mulsi */
757 COSTS_N_INSNS (4), /* mulsi_const */
758 COSTS_N_INSNS (4), /* mulsi_const9 */
759 COSTS_N_INSNS (4), /* muldi */
760 COSTS_N_INSNS (33), /* divsi */
761 COSTS_N_INSNS (33), /* divdi */
762 COSTS_N_INSNS (11), /* fp */
763 COSTS_N_INSNS (11), /* dmul */
764 COSTS_N_INSNS (11), /* sdiv */
765 COSTS_N_INSNS (11), /* ddiv */
766 32, /* cache line size */
767 4, /* l1 cache */
768 16, /* l2 cache */
769 1, /* streams */
770 0, /* SF->DF convert */
773 /* Instruction costs on PPC405 processors. */
774 static const
775 struct processor_costs ppc405_cost = {
776 COSTS_N_INSNS (5), /* mulsi */
777 COSTS_N_INSNS (4), /* mulsi_const */
778 COSTS_N_INSNS (3), /* mulsi_const9 */
779 COSTS_N_INSNS (5), /* muldi */
780 COSTS_N_INSNS (35), /* divsi */
781 COSTS_N_INSNS (35), /* divdi */
782 COSTS_N_INSNS (11), /* fp */
783 COSTS_N_INSNS (11), /* dmul */
784 COSTS_N_INSNS (11), /* sdiv */
785 COSTS_N_INSNS (11), /* ddiv */
786 32, /* cache line size */
787 16, /* l1 cache */
788 128, /* l2 cache */
789 1, /* streams */
790 0, /* SF->DF convert */
793 /* Instruction costs on PPC440 processors. */
794 static const
795 struct processor_costs ppc440_cost = {
796 COSTS_N_INSNS (3), /* mulsi */
797 COSTS_N_INSNS (2), /* mulsi_const */
798 COSTS_N_INSNS (2), /* mulsi_const9 */
799 COSTS_N_INSNS (3), /* muldi */
800 COSTS_N_INSNS (34), /* divsi */
801 COSTS_N_INSNS (34), /* divdi */
802 COSTS_N_INSNS (5), /* fp */
803 COSTS_N_INSNS (5), /* dmul */
804 COSTS_N_INSNS (19), /* sdiv */
805 COSTS_N_INSNS (33), /* ddiv */
806 32, /* cache line size */
807 32, /* l1 cache */
808 256, /* l2 cache */
809 1, /* streams */
810 0, /* SF->DF convert */
813 /* Instruction costs on PPC476 processors. */
814 static const
815 struct processor_costs ppc476_cost = {
816 COSTS_N_INSNS (4), /* mulsi */
817 COSTS_N_INSNS (4), /* mulsi_const */
818 COSTS_N_INSNS (4), /* mulsi_const9 */
819 COSTS_N_INSNS (4), /* muldi */
820 COSTS_N_INSNS (11), /* divsi */
821 COSTS_N_INSNS (11), /* divdi */
822 COSTS_N_INSNS (6), /* fp */
823 COSTS_N_INSNS (6), /* dmul */
824 COSTS_N_INSNS (19), /* sdiv */
825 COSTS_N_INSNS (33), /* ddiv */
826 32, /* l1 cache line size */
827 32, /* l1 cache */
828 512, /* l2 cache */
829 1, /* streams */
830 0, /* SF->DF convert */
833 /* Instruction costs on PPC601 processors. */
834 static const
835 struct processor_costs ppc601_cost = {
836 COSTS_N_INSNS (5), /* mulsi */
837 COSTS_N_INSNS (5), /* mulsi_const */
838 COSTS_N_INSNS (5), /* mulsi_const9 */
839 COSTS_N_INSNS (5), /* muldi */
840 COSTS_N_INSNS (36), /* divsi */
841 COSTS_N_INSNS (36), /* divdi */
842 COSTS_N_INSNS (4), /* fp */
843 COSTS_N_INSNS (5), /* dmul */
844 COSTS_N_INSNS (17), /* sdiv */
845 COSTS_N_INSNS (31), /* ddiv */
846 32, /* cache line size */
847 32, /* l1 cache */
848 256, /* l2 cache */
849 1, /* streams */
850 0, /* SF->DF convert */
853 /* Instruction costs on PPC603 processors. */
854 static const
855 struct processor_costs ppc603_cost = {
856 COSTS_N_INSNS (5), /* mulsi */
857 COSTS_N_INSNS (3), /* mulsi_const */
858 COSTS_N_INSNS (2), /* mulsi_const9 */
859 COSTS_N_INSNS (5), /* muldi */
860 COSTS_N_INSNS (37), /* divsi */
861 COSTS_N_INSNS (37), /* divdi */
862 COSTS_N_INSNS (3), /* fp */
863 COSTS_N_INSNS (4), /* dmul */
864 COSTS_N_INSNS (18), /* sdiv */
865 COSTS_N_INSNS (33), /* ddiv */
866 32, /* cache line size */
867 8, /* l1 cache */
868 64, /* l2 cache */
869 1, /* streams */
870 0, /* SF->DF convert */
873 /* Instruction costs on PPC604 processors. */
874 static const
875 struct processor_costs ppc604_cost = {
876 COSTS_N_INSNS (4), /* mulsi */
877 COSTS_N_INSNS (4), /* mulsi_const */
878 COSTS_N_INSNS (4), /* mulsi_const9 */
879 COSTS_N_INSNS (4), /* muldi */
880 COSTS_N_INSNS (20), /* divsi */
881 COSTS_N_INSNS (20), /* divdi */
882 COSTS_N_INSNS (3), /* fp */
883 COSTS_N_INSNS (3), /* dmul */
884 COSTS_N_INSNS (18), /* sdiv */
885 COSTS_N_INSNS (32), /* ddiv */
886 32, /* cache line size */
887 16, /* l1 cache */
888 512, /* l2 cache */
889 1, /* streams */
890 0, /* SF->DF convert */
893 /* Instruction costs on PPC604e processors. */
894 static const
895 struct processor_costs ppc604e_cost = {
896 COSTS_N_INSNS (2), /* mulsi */
897 COSTS_N_INSNS (2), /* mulsi_const */
898 COSTS_N_INSNS (2), /* mulsi_const9 */
899 COSTS_N_INSNS (2), /* muldi */
900 COSTS_N_INSNS (20), /* divsi */
901 COSTS_N_INSNS (20), /* divdi */
902 COSTS_N_INSNS (3), /* fp */
903 COSTS_N_INSNS (3), /* dmul */
904 COSTS_N_INSNS (18), /* sdiv */
905 COSTS_N_INSNS (32), /* ddiv */
906 32, /* cache line size */
907 32, /* l1 cache */
908 1024, /* l2 cache */
909 1, /* streams */
910 0, /* SF->DF convert */
913 /* Instruction costs on PPC620 processors. */
914 static const
915 struct processor_costs ppc620_cost = {
916 COSTS_N_INSNS (5), /* mulsi */
917 COSTS_N_INSNS (4), /* mulsi_const */
918 COSTS_N_INSNS (3), /* mulsi_const9 */
919 COSTS_N_INSNS (7), /* muldi */
920 COSTS_N_INSNS (21), /* divsi */
921 COSTS_N_INSNS (37), /* divdi */
922 COSTS_N_INSNS (3), /* fp */
923 COSTS_N_INSNS (3), /* dmul */
924 COSTS_N_INSNS (18), /* sdiv */
925 COSTS_N_INSNS (32), /* ddiv */
926 128, /* cache line size */
927 32, /* l1 cache */
928 1024, /* l2 cache */
929 1, /* streams */
930 0, /* SF->DF convert */
933 /* Instruction costs on PPC630 processors. */
934 static const
935 struct processor_costs ppc630_cost = {
936 COSTS_N_INSNS (5), /* mulsi */
937 COSTS_N_INSNS (4), /* mulsi_const */
938 COSTS_N_INSNS (3), /* mulsi_const9 */
939 COSTS_N_INSNS (7), /* muldi */
940 COSTS_N_INSNS (21), /* divsi */
941 COSTS_N_INSNS (37), /* divdi */
942 COSTS_N_INSNS (3), /* fp */
943 COSTS_N_INSNS (3), /* dmul */
944 COSTS_N_INSNS (17), /* sdiv */
945 COSTS_N_INSNS (21), /* ddiv */
946 128, /* cache line size */
947 64, /* l1 cache */
948 1024, /* l2 cache */
949 1, /* streams */
950 0, /* SF->DF convert */
953 /* Instruction costs on Cell processor. */
954 /* COSTS_N_INSNS (1) ~ one add. */
955 static const
956 struct processor_costs ppccell_cost = {
957 COSTS_N_INSNS (9/2)+2, /* mulsi */
958 COSTS_N_INSNS (6/2), /* mulsi_const */
959 COSTS_N_INSNS (6/2), /* mulsi_const9 */
960 COSTS_N_INSNS (15/2)+2, /* muldi */
961 COSTS_N_INSNS (38/2), /* divsi */
962 COSTS_N_INSNS (70/2), /* divdi */
963 COSTS_N_INSNS (10/2), /* fp */
964 COSTS_N_INSNS (10/2), /* dmul */
965 COSTS_N_INSNS (74/2), /* sdiv */
966 COSTS_N_INSNS (74/2), /* ddiv */
967 128, /* cache line size */
968 32, /* l1 cache */
969 512, /* l2 cache */
970 6, /* streams */
971 0, /* SF->DF convert */
974 /* Instruction costs on PPC750 and PPC7400 processors. */
975 static const
976 struct processor_costs ppc750_cost = {
977 COSTS_N_INSNS (5), /* mulsi */
978 COSTS_N_INSNS (3), /* mulsi_const */
979 COSTS_N_INSNS (2), /* mulsi_const9 */
980 COSTS_N_INSNS (5), /* muldi */
981 COSTS_N_INSNS (17), /* divsi */
982 COSTS_N_INSNS (17), /* divdi */
983 COSTS_N_INSNS (3), /* fp */
984 COSTS_N_INSNS (3), /* dmul */
985 COSTS_N_INSNS (17), /* sdiv */
986 COSTS_N_INSNS (31), /* ddiv */
987 32, /* cache line size */
988 32, /* l1 cache */
989 512, /* l2 cache */
990 1, /* streams */
991 0, /* SF->DF convert */
994 /* Instruction costs on PPC7450 processors. */
995 static const
996 struct processor_costs ppc7450_cost = {
997 COSTS_N_INSNS (4), /* mulsi */
998 COSTS_N_INSNS (3), /* mulsi_const */
999 COSTS_N_INSNS (3), /* mulsi_const9 */
1000 COSTS_N_INSNS (4), /* muldi */
1001 COSTS_N_INSNS (23), /* divsi */
1002 COSTS_N_INSNS (23), /* divdi */
1003 COSTS_N_INSNS (5), /* fp */
1004 COSTS_N_INSNS (5), /* dmul */
1005 COSTS_N_INSNS (21), /* sdiv */
1006 COSTS_N_INSNS (35), /* ddiv */
1007 32, /* cache line size */
1008 32, /* l1 cache */
1009 1024, /* l2 cache */
1010 1, /* streams */
1011 0, /* SF->DF convert */
1014 /* Instruction costs on PPC8540 processors. */
1015 static const
1016 struct processor_costs ppc8540_cost = {
1017 COSTS_N_INSNS (4), /* mulsi */
1018 COSTS_N_INSNS (4), /* mulsi_const */
1019 COSTS_N_INSNS (4), /* mulsi_const9 */
1020 COSTS_N_INSNS (4), /* muldi */
1021 COSTS_N_INSNS (19), /* divsi */
1022 COSTS_N_INSNS (19), /* divdi */
1023 COSTS_N_INSNS (4), /* fp */
1024 COSTS_N_INSNS (4), /* dmul */
1025 COSTS_N_INSNS (29), /* sdiv */
1026 COSTS_N_INSNS (29), /* ddiv */
1027 32, /* cache line size */
1028 32, /* l1 cache */
1029 256, /* l2 cache */
1030 1, /* prefetch streams /*/
1031 0, /* SF->DF convert */
1034 /* Instruction costs on E300C2 and E300C3 cores. */
1035 static const
1036 struct processor_costs ppce300c2c3_cost = {
1037 COSTS_N_INSNS (4), /* mulsi */
1038 COSTS_N_INSNS (4), /* mulsi_const */
1039 COSTS_N_INSNS (4), /* mulsi_const9 */
1040 COSTS_N_INSNS (4), /* muldi */
1041 COSTS_N_INSNS (19), /* divsi */
1042 COSTS_N_INSNS (19), /* divdi */
1043 COSTS_N_INSNS (3), /* fp */
1044 COSTS_N_INSNS (4), /* dmul */
1045 COSTS_N_INSNS (18), /* sdiv */
1046 COSTS_N_INSNS (33), /* ddiv */
1048 16, /* l1 cache */
1049 16, /* l2 cache */
1050 1, /* prefetch streams /*/
1051 0, /* SF->DF convert */
1054 /* Instruction costs on PPCE500MC processors. */
1055 static const
1056 struct processor_costs ppce500mc_cost = {
1057 COSTS_N_INSNS (4), /* mulsi */
1058 COSTS_N_INSNS (4), /* mulsi_const */
1059 COSTS_N_INSNS (4), /* mulsi_const9 */
1060 COSTS_N_INSNS (4), /* muldi */
1061 COSTS_N_INSNS (14), /* divsi */
1062 COSTS_N_INSNS (14), /* divdi */
1063 COSTS_N_INSNS (8), /* fp */
1064 COSTS_N_INSNS (10), /* dmul */
1065 COSTS_N_INSNS (36), /* sdiv */
1066 COSTS_N_INSNS (66), /* ddiv */
1067 64, /* cache line size */
1068 32, /* l1 cache */
1069 128, /* l2 cache */
1070 1, /* prefetch streams /*/
1071 0, /* SF->DF convert */
1074 /* Instruction costs on PPCE500MC64 processors. */
1075 static const
1076 struct processor_costs ppce500mc64_cost = {
1077 COSTS_N_INSNS (4), /* mulsi */
1078 COSTS_N_INSNS (4), /* mulsi_const */
1079 COSTS_N_INSNS (4), /* mulsi_const9 */
1080 COSTS_N_INSNS (4), /* muldi */
1081 COSTS_N_INSNS (14), /* divsi */
1082 COSTS_N_INSNS (14), /* divdi */
1083 COSTS_N_INSNS (4), /* fp */
1084 COSTS_N_INSNS (10), /* dmul */
1085 COSTS_N_INSNS (36), /* sdiv */
1086 COSTS_N_INSNS (66), /* ddiv */
1087 64, /* cache line size */
1088 32, /* l1 cache */
1089 128, /* l2 cache */
1090 1, /* prefetch streams /*/
1091 0, /* SF->DF convert */
1094 /* Instruction costs on PPCE5500 processors. */
1095 static const
1096 struct processor_costs ppce5500_cost = {
1097 COSTS_N_INSNS (5), /* mulsi */
1098 COSTS_N_INSNS (5), /* mulsi_const */
1099 COSTS_N_INSNS (4), /* mulsi_const9 */
1100 COSTS_N_INSNS (5), /* muldi */
1101 COSTS_N_INSNS (14), /* divsi */
1102 COSTS_N_INSNS (14), /* divdi */
1103 COSTS_N_INSNS (7), /* fp */
1104 COSTS_N_INSNS (10), /* dmul */
1105 COSTS_N_INSNS (36), /* sdiv */
1106 COSTS_N_INSNS (66), /* ddiv */
1107 64, /* cache line size */
1108 32, /* l1 cache */
1109 128, /* l2 cache */
1110 1, /* prefetch streams /*/
1111 0, /* SF->DF convert */
1114 /* Instruction costs on PPCE6500 processors. */
1115 static const
1116 struct processor_costs ppce6500_cost = {
1117 COSTS_N_INSNS (5), /* mulsi */
1118 COSTS_N_INSNS (5), /* mulsi_const */
1119 COSTS_N_INSNS (4), /* mulsi_const9 */
1120 COSTS_N_INSNS (5), /* muldi */
1121 COSTS_N_INSNS (14), /* divsi */
1122 COSTS_N_INSNS (14), /* divdi */
1123 COSTS_N_INSNS (7), /* fp */
1124 COSTS_N_INSNS (10), /* dmul */
1125 COSTS_N_INSNS (36), /* sdiv */
1126 COSTS_N_INSNS (66), /* ddiv */
1127 64, /* cache line size */
1128 32, /* l1 cache */
1129 128, /* l2 cache */
1130 1, /* prefetch streams /*/
1131 0, /* SF->DF convert */
1134 /* Instruction costs on AppliedMicro Titan processors. */
1135 static const
1136 struct processor_costs titan_cost = {
1137 COSTS_N_INSNS (5), /* mulsi */
1138 COSTS_N_INSNS (5), /* mulsi_const */
1139 COSTS_N_INSNS (5), /* mulsi_const9 */
1140 COSTS_N_INSNS (5), /* muldi */
1141 COSTS_N_INSNS (18), /* divsi */
1142 COSTS_N_INSNS (18), /* divdi */
1143 COSTS_N_INSNS (10), /* fp */
1144 COSTS_N_INSNS (10), /* dmul */
1145 COSTS_N_INSNS (46), /* sdiv */
1146 COSTS_N_INSNS (72), /* ddiv */
1147 32, /* cache line size */
1148 32, /* l1 cache */
1149 512, /* l2 cache */
1150 1, /* prefetch streams /*/
1151 0, /* SF->DF convert */
1154 /* Instruction costs on POWER4 and POWER5 processors. */
1155 static const
1156 struct processor_costs power4_cost = {
1157 COSTS_N_INSNS (3), /* mulsi */
1158 COSTS_N_INSNS (2), /* mulsi_const */
1159 COSTS_N_INSNS (2), /* mulsi_const9 */
1160 COSTS_N_INSNS (4), /* muldi */
1161 COSTS_N_INSNS (18), /* divsi */
1162 COSTS_N_INSNS (34), /* divdi */
1163 COSTS_N_INSNS (3), /* fp */
1164 COSTS_N_INSNS (3), /* dmul */
1165 COSTS_N_INSNS (17), /* sdiv */
1166 COSTS_N_INSNS (17), /* ddiv */
1167 128, /* cache line size */
1168 32, /* l1 cache */
1169 1024, /* l2 cache */
1170 8, /* prefetch streams /*/
1171 0, /* SF->DF convert */
1174 /* Instruction costs on POWER6 processors. */
1175 static const
1176 struct processor_costs power6_cost = {
1177 COSTS_N_INSNS (8), /* mulsi */
1178 COSTS_N_INSNS (8), /* mulsi_const */
1179 COSTS_N_INSNS (8), /* mulsi_const9 */
1180 COSTS_N_INSNS (8), /* muldi */
1181 COSTS_N_INSNS (22), /* divsi */
1182 COSTS_N_INSNS (28), /* divdi */
1183 COSTS_N_INSNS (3), /* fp */
1184 COSTS_N_INSNS (3), /* dmul */
1185 COSTS_N_INSNS (13), /* sdiv */
1186 COSTS_N_INSNS (16), /* ddiv */
1187 128, /* cache line size */
1188 64, /* l1 cache */
1189 2048, /* l2 cache */
1190 16, /* prefetch streams */
1191 0, /* SF->DF convert */
1194 /* Instruction costs on POWER7 processors. */
1195 static const
1196 struct processor_costs power7_cost = {
1197 COSTS_N_INSNS (2), /* mulsi */
1198 COSTS_N_INSNS (2), /* mulsi_const */
1199 COSTS_N_INSNS (2), /* mulsi_const9 */
1200 COSTS_N_INSNS (2), /* muldi */
1201 COSTS_N_INSNS (18), /* divsi */
1202 COSTS_N_INSNS (34), /* divdi */
1203 COSTS_N_INSNS (3), /* fp */
1204 COSTS_N_INSNS (3), /* dmul */
1205 COSTS_N_INSNS (13), /* sdiv */
1206 COSTS_N_INSNS (16), /* ddiv */
1207 128, /* cache line size */
1208 32, /* l1 cache */
1209 256, /* l2 cache */
1210 12, /* prefetch streams */
1211 COSTS_N_INSNS (3), /* SF->DF convert */
1214 /* Instruction costs on POWER8 processors. */
1215 static const
1216 struct processor_costs power8_cost = {
1217 COSTS_N_INSNS (3), /* mulsi */
1218 COSTS_N_INSNS (3), /* mulsi_const */
1219 COSTS_N_INSNS (3), /* mulsi_const9 */
1220 COSTS_N_INSNS (3), /* muldi */
1221 COSTS_N_INSNS (19), /* divsi */
1222 COSTS_N_INSNS (35), /* divdi */
1223 COSTS_N_INSNS (3), /* fp */
1224 COSTS_N_INSNS (3), /* dmul */
1225 COSTS_N_INSNS (14), /* sdiv */
1226 COSTS_N_INSNS (17), /* ddiv */
1227 128, /* cache line size */
1228 32, /* l1 cache */
1229 256, /* l2 cache */
1230 12, /* prefetch streams */
1231 COSTS_N_INSNS (3), /* SF->DF convert */
1234 /* Instruction costs on POWER9 processors. */
1235 static const
1236 struct processor_costs power9_cost = {
1237 COSTS_N_INSNS (3), /* mulsi */
1238 COSTS_N_INSNS (3), /* mulsi_const */
1239 COSTS_N_INSNS (3), /* mulsi_const9 */
1240 COSTS_N_INSNS (3), /* muldi */
1241 COSTS_N_INSNS (8), /* divsi */
1242 COSTS_N_INSNS (12), /* divdi */
1243 COSTS_N_INSNS (3), /* fp */
1244 COSTS_N_INSNS (3), /* dmul */
1245 COSTS_N_INSNS (13), /* sdiv */
1246 COSTS_N_INSNS (18), /* ddiv */
1247 128, /* cache line size */
1248 32, /* l1 cache */
1249 512, /* l2 cache */
1250 8, /* prefetch streams */
1251 COSTS_N_INSNS (3), /* SF->DF convert */
1254 /* Instruction costs on POWER A2 processors. */
1255 static const
1256 struct processor_costs ppca2_cost = {
1257 COSTS_N_INSNS (16), /* mulsi */
1258 COSTS_N_INSNS (16), /* mulsi_const */
1259 COSTS_N_INSNS (16), /* mulsi_const9 */
1260 COSTS_N_INSNS (16), /* muldi */
1261 COSTS_N_INSNS (22), /* divsi */
1262 COSTS_N_INSNS (28), /* divdi */
1263 COSTS_N_INSNS (3), /* fp */
1264 COSTS_N_INSNS (3), /* dmul */
1265 COSTS_N_INSNS (59), /* sdiv */
1266 COSTS_N_INSNS (72), /* ddiv */
1268 16, /* l1 cache */
1269 2048, /* l2 cache */
1270 16, /* prefetch streams */
1271 0, /* SF->DF convert */
1275 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1276 #undef RS6000_BUILTIN_0
1277 #undef RS6000_BUILTIN_1
1278 #undef RS6000_BUILTIN_2
1279 #undef RS6000_BUILTIN_3
1280 #undef RS6000_BUILTIN_A
1281 #undef RS6000_BUILTIN_D
1282 #undef RS6000_BUILTIN_H
1283 #undef RS6000_BUILTIN_P
1284 #undef RS6000_BUILTIN_X
1286 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1287 { NAME, ICODE, MASK, ATTR },
1289 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1290 { NAME, ICODE, MASK, ATTR },
1292 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1293 { NAME, ICODE, MASK, ATTR },
1295 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1296 { NAME, ICODE, MASK, ATTR },
1298 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1299 { NAME, ICODE, MASK, ATTR },
1301 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1302 { NAME, ICODE, MASK, ATTR },
1304 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1305 { NAME, ICODE, MASK, ATTR },
1307 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1308 { NAME, ICODE, MASK, ATTR },
1310 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1311 { NAME, ICODE, MASK, ATTR },
1313 struct rs6000_builtin_info_type {
1314 const char *name;
1315 const enum insn_code icode;
1316 const HOST_WIDE_INT mask;
1317 const unsigned attr;
1320 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1322 #include "rs6000-builtin.def"
1325 #undef RS6000_BUILTIN_0
1326 #undef RS6000_BUILTIN_1
1327 #undef RS6000_BUILTIN_2
1328 #undef RS6000_BUILTIN_3
1329 #undef RS6000_BUILTIN_A
1330 #undef RS6000_BUILTIN_D
1331 #undef RS6000_BUILTIN_H
1332 #undef RS6000_BUILTIN_P
1333 #undef RS6000_BUILTIN_X
1335 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1336 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1339 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1340 static struct machine_function * rs6000_init_machine_status (void);
1341 static int rs6000_ra_ever_killed (void);
1342 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1343 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1344 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1345 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1346 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1347 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1348 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1349 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1350 bool);
1351 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1352 unsigned int);
1353 static bool is_microcoded_insn (rtx_insn *);
1354 static bool is_nonpipeline_insn (rtx_insn *);
1355 static bool is_cracked_insn (rtx_insn *);
1356 static bool is_load_insn (rtx, rtx *);
1357 static bool is_store_insn (rtx, rtx *);
1358 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1359 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1360 static bool insn_must_be_first_in_group (rtx_insn *);
1361 static bool insn_must_be_last_in_group (rtx_insn *);
1362 static void altivec_init_builtins (void);
1363 static tree builtin_function_type (machine_mode, machine_mode,
1364 machine_mode, machine_mode,
1365 enum rs6000_builtins, const char *name);
1366 static void rs6000_common_init_builtins (void);
1367 static void htm_init_builtins (void);
1368 static rs6000_stack_t *rs6000_stack_info (void);
1369 static void is_altivec_return_reg (rtx, void *);
1370 int easy_vector_constant (rtx, machine_mode);
1371 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1372 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1373 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1374 bool, bool);
1375 #if TARGET_MACHO
1376 static void macho_branch_islands (void);
1377 #endif
1378 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1379 int, int *);
1380 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1381 int, int, int *);
1382 static bool rs6000_mode_dependent_address (const_rtx);
1383 static bool rs6000_debug_mode_dependent_address (const_rtx);
1384 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1385 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1386 machine_mode, rtx);
1387 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1388 machine_mode,
1389 rtx);
1390 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1391 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1392 enum reg_class);
1393 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1394 reg_class_t,
1395 reg_class_t);
1396 static bool rs6000_debug_can_change_mode_class (machine_mode,
1397 machine_mode,
1398 reg_class_t);
1399 static bool rs6000_save_toc_in_prologue_p (void);
1400 static rtx rs6000_internal_arg_pointer (void);
1402 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1403 int, int *)
1404 = rs6000_legitimize_reload_address;
1406 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1407 = rs6000_mode_dependent_address;
1409 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1410 machine_mode, rtx)
1411 = rs6000_secondary_reload_class;
1413 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1414 = rs6000_preferred_reload_class;
1416 const int INSN_NOT_AVAILABLE = -1;
1418 static void rs6000_print_isa_options (FILE *, int, const char *,
1419 HOST_WIDE_INT);
1420 static void rs6000_print_builtin_options (FILE *, int, const char *,
1421 HOST_WIDE_INT);
1422 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1424 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1425 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1426 enum rs6000_reg_type,
1427 machine_mode,
1428 secondary_reload_info *,
1429 bool);
1430 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1431 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1432 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1434 /* Hash table stuff for keeping track of TOC entries. */
1436 struct GTY((for_user)) toc_hash_struct
1438 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1439 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1440 rtx key;
1441 machine_mode key_mode;
1442 int labelno;
1445 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1447 static hashval_t hash (toc_hash_struct *);
1448 static bool equal (toc_hash_struct *, toc_hash_struct *);
1451 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1453 /* Hash table to keep track of the argument types for builtin functions. */
1455 struct GTY((for_user)) builtin_hash_struct
1457 tree type;
1458 machine_mode mode[4]; /* return value + 3 arguments. */
1459 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1462 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1464 static hashval_t hash (builtin_hash_struct *);
1465 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1468 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1471 /* Default register names. */
1472 char rs6000_reg_names[][8] =
1474 "0", "1", "2", "3", "4", "5", "6", "7",
1475 "8", "9", "10", "11", "12", "13", "14", "15",
1476 "16", "17", "18", "19", "20", "21", "22", "23",
1477 "24", "25", "26", "27", "28", "29", "30", "31",
1478 "0", "1", "2", "3", "4", "5", "6", "7",
1479 "8", "9", "10", "11", "12", "13", "14", "15",
1480 "16", "17", "18", "19", "20", "21", "22", "23",
1481 "24", "25", "26", "27", "28", "29", "30", "31",
1482 "mq", "lr", "ctr","ap",
1483 "0", "1", "2", "3", "4", "5", "6", "7",
1484 "ca",
1485 /* AltiVec registers. */
1486 "0", "1", "2", "3", "4", "5", "6", "7",
1487 "8", "9", "10", "11", "12", "13", "14", "15",
1488 "16", "17", "18", "19", "20", "21", "22", "23",
1489 "24", "25", "26", "27", "28", "29", "30", "31",
1490 "vrsave", "vscr",
1491 /* Soft frame pointer. */
1492 "sfp",
1493 /* HTM SPR registers. */
1494 "tfhar", "tfiar", "texasr"
1497 #ifdef TARGET_REGNAMES
1498 static const char alt_reg_names[][8] =
1500 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1501 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1502 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1503 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1504 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1505 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1506 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1507 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1508 "mq", "lr", "ctr", "ap",
1509 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1510 "ca",
1511 /* AltiVec registers. */
1512 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1513 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1514 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1515 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1516 "vrsave", "vscr",
1517 /* Soft frame pointer. */
1518 "sfp",
1519 /* HTM SPR registers. */
1520 "tfhar", "tfiar", "texasr"
1522 #endif
1524 /* Table of valid machine attributes. */
1526 static const struct attribute_spec rs6000_attribute_table[] =
1528 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1529 affects_type_identity, handler, exclude } */
1530 { "altivec", 1, 1, false, true, false, false,
1531 rs6000_handle_altivec_attribute, NULL },
1532 { "longcall", 0, 0, false, true, true, false,
1533 rs6000_handle_longcall_attribute, NULL },
1534 { "shortcall", 0, 0, false, true, true, false,
1535 rs6000_handle_longcall_attribute, NULL },
1536 { "ms_struct", 0, 0, false, false, false, false,
1537 rs6000_handle_struct_attribute, NULL },
1538 { "gcc_struct", 0, 0, false, false, false, false,
1539 rs6000_handle_struct_attribute, NULL },
1540 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1541 SUBTARGET_ATTRIBUTE_TABLE,
1542 #endif
1543 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1546 #ifndef TARGET_PROFILE_KERNEL
1547 #define TARGET_PROFILE_KERNEL 0
1548 #endif
1550 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1551 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1553 /* Initialize the GCC target structure. */
1554 #undef TARGET_ATTRIBUTE_TABLE
1555 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1556 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1557 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1558 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1559 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1561 #undef TARGET_ASM_ALIGNED_DI_OP
1562 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1564 /* Default unaligned ops are only provided for ELF. Find the ops needed
1565 for non-ELF systems. */
1566 #ifndef OBJECT_FORMAT_ELF
1567 #if TARGET_XCOFF
1568 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1569 64-bit targets. */
1570 #undef TARGET_ASM_UNALIGNED_HI_OP
1571 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1572 #undef TARGET_ASM_UNALIGNED_SI_OP
1573 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1574 #undef TARGET_ASM_UNALIGNED_DI_OP
1575 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1576 #else
1577 /* For Darwin. */
1578 #undef TARGET_ASM_UNALIGNED_HI_OP
1579 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1580 #undef TARGET_ASM_UNALIGNED_SI_OP
1581 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1582 #undef TARGET_ASM_UNALIGNED_DI_OP
1583 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1584 #undef TARGET_ASM_ALIGNED_DI_OP
1585 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1586 #endif
1587 #endif
1589 /* This hook deals with fixups for relocatable code and DI-mode objects
1590 in 64-bit code. */
1591 #undef TARGET_ASM_INTEGER
1592 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1594 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1595 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1596 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1597 #endif
1599 #undef TARGET_SET_UP_BY_PROLOGUE
1600 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1602 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1603 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1604 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1605 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1606 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1607 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1608 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1609 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1610 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1611 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1612 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1613 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1615 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1616 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1618 #undef TARGET_INTERNAL_ARG_POINTER
1619 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1621 #undef TARGET_HAVE_TLS
1622 #define TARGET_HAVE_TLS HAVE_AS_TLS
1624 #undef TARGET_CANNOT_FORCE_CONST_MEM
1625 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1627 #undef TARGET_DELEGITIMIZE_ADDRESS
1628 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1630 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1631 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1633 #undef TARGET_LEGITIMATE_COMBINED_INSN
1634 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1636 #undef TARGET_ASM_FUNCTION_PROLOGUE
1637 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1638 #undef TARGET_ASM_FUNCTION_EPILOGUE
1639 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1641 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1642 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1644 #undef TARGET_LEGITIMIZE_ADDRESS
1645 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1647 #undef TARGET_SCHED_VARIABLE_ISSUE
1648 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1650 #undef TARGET_SCHED_ISSUE_RATE
1651 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1652 #undef TARGET_SCHED_ADJUST_COST
1653 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1654 #undef TARGET_SCHED_ADJUST_PRIORITY
1655 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1656 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1657 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1658 #undef TARGET_SCHED_INIT
1659 #define TARGET_SCHED_INIT rs6000_sched_init
1660 #undef TARGET_SCHED_FINISH
1661 #define TARGET_SCHED_FINISH rs6000_sched_finish
1662 #undef TARGET_SCHED_REORDER
1663 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1664 #undef TARGET_SCHED_REORDER2
1665 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1667 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1668 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1670 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1671 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1673 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1674 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1675 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1676 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1677 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1678 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1679 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1680 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1682 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1683 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1685 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1686 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1687 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1688 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1689 rs6000_builtin_support_vector_misalignment
1690 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1691 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1692 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1693 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1694 rs6000_builtin_vectorization_cost
1695 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1696 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1697 rs6000_preferred_simd_mode
1698 #undef TARGET_VECTORIZE_INIT_COST
1699 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1700 #undef TARGET_VECTORIZE_ADD_STMT_COST
1701 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1702 #undef TARGET_VECTORIZE_FINISH_COST
1703 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1704 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1705 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1707 #undef TARGET_INIT_BUILTINS
1708 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1709 #undef TARGET_BUILTIN_DECL
1710 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1712 #undef TARGET_FOLD_BUILTIN
1713 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1714 #undef TARGET_GIMPLE_FOLD_BUILTIN
1715 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1717 #undef TARGET_EXPAND_BUILTIN
1718 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1720 #undef TARGET_MANGLE_TYPE
1721 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1723 #undef TARGET_INIT_LIBFUNCS
1724 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1726 #if TARGET_MACHO
1727 #undef TARGET_BINDS_LOCAL_P
1728 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1729 #endif
1731 #undef TARGET_MS_BITFIELD_LAYOUT_P
1732 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1734 #undef TARGET_ASM_OUTPUT_MI_THUNK
1735 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1737 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1738 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1740 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1741 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1743 #undef TARGET_REGISTER_MOVE_COST
1744 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1745 #undef TARGET_MEMORY_MOVE_COST
1746 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1747 #undef TARGET_CANNOT_COPY_INSN_P
1748 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1749 #undef TARGET_RTX_COSTS
1750 #define TARGET_RTX_COSTS rs6000_rtx_costs
1751 #undef TARGET_ADDRESS_COST
1752 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1753 #undef TARGET_INSN_COST
1754 #define TARGET_INSN_COST rs6000_insn_cost
1756 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1757 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1759 #undef TARGET_PROMOTE_FUNCTION_MODE
1760 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1762 #undef TARGET_RETURN_IN_MEMORY
1763 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1765 #undef TARGET_RETURN_IN_MSB
1766 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1768 #undef TARGET_SETUP_INCOMING_VARARGS
1769 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1771 /* Always strict argument naming on rs6000. */
1772 #undef TARGET_STRICT_ARGUMENT_NAMING
1773 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1774 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1775 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1776 #undef TARGET_SPLIT_COMPLEX_ARG
1777 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1778 #undef TARGET_MUST_PASS_IN_STACK
1779 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1780 #undef TARGET_PASS_BY_REFERENCE
1781 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1782 #undef TARGET_ARG_PARTIAL_BYTES
1783 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1784 #undef TARGET_FUNCTION_ARG_ADVANCE
1785 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1786 #undef TARGET_FUNCTION_ARG
1787 #define TARGET_FUNCTION_ARG rs6000_function_arg
1788 #undef TARGET_FUNCTION_ARG_PADDING
1789 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1790 #undef TARGET_FUNCTION_ARG_BOUNDARY
1791 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1793 #undef TARGET_BUILD_BUILTIN_VA_LIST
1794 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1796 #undef TARGET_EXPAND_BUILTIN_VA_START
1797 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1799 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1800 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1802 #undef TARGET_EH_RETURN_FILTER_MODE
1803 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1805 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1806 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1808 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1809 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1811 #undef TARGET_FLOATN_MODE
1812 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1814 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1815 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1817 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1818 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1820 #undef TARGET_MD_ASM_ADJUST
1821 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1823 #undef TARGET_OPTION_OVERRIDE
1824 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1826 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1827 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1828 rs6000_builtin_vectorized_function
1830 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1831 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1832 rs6000_builtin_md_vectorized_function
1834 #undef TARGET_STACK_PROTECT_GUARD
1835 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1837 #if !TARGET_MACHO
1838 #undef TARGET_STACK_PROTECT_FAIL
1839 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1840 #endif
1842 #ifdef HAVE_AS_TLS
1843 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1844 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1845 #endif
1847 /* Use a 32-bit anchor range. This leads to sequences like:
1849 addis tmp,anchor,high
1850 add dest,tmp,low
1852 where tmp itself acts as an anchor, and can be shared between
1853 accesses to the same 64k page. */
1854 #undef TARGET_MIN_ANCHOR_OFFSET
1855 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1856 #undef TARGET_MAX_ANCHOR_OFFSET
1857 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1858 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1859 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1860 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1861 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1863 #undef TARGET_BUILTIN_RECIPROCAL
1864 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1866 #undef TARGET_SECONDARY_RELOAD
1867 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1868 #undef TARGET_SECONDARY_MEMORY_NEEDED
1869 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1870 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1871 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1873 #undef TARGET_LEGITIMATE_ADDRESS_P
1874 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1876 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1877 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1879 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1880 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1882 #undef TARGET_CAN_ELIMINATE
1883 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1885 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1886 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1888 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1889 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1891 #undef TARGET_TRAMPOLINE_INIT
1892 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1894 #undef TARGET_FUNCTION_VALUE
1895 #define TARGET_FUNCTION_VALUE rs6000_function_value
1897 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1898 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1900 #undef TARGET_OPTION_SAVE
1901 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1903 #undef TARGET_OPTION_RESTORE
1904 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1906 #undef TARGET_OPTION_PRINT
1907 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1909 #undef TARGET_CAN_INLINE_P
1910 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1912 #undef TARGET_SET_CURRENT_FUNCTION
1913 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1915 #undef TARGET_LEGITIMATE_CONSTANT_P
1916 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1918 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1919 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1921 #undef TARGET_CAN_USE_DOLOOP_P
1922 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1924 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1925 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1927 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1928 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1929 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1930 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1931 #undef TARGET_UNWIND_WORD_MODE
1932 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1934 #undef TARGET_OFFLOAD_OPTIONS
1935 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1937 #undef TARGET_C_MODE_FOR_SUFFIX
1938 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1940 #undef TARGET_INVALID_BINARY_OP
1941 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1943 #undef TARGET_OPTAB_SUPPORTED_P
1944 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1946 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1947 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1949 #undef TARGET_COMPARE_VERSION_PRIORITY
1950 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1952 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1953 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1954 rs6000_generate_version_dispatcher_body
1956 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1957 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1958 rs6000_get_function_versions_dispatcher
1960 #undef TARGET_OPTION_FUNCTION_VERSIONS
1961 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1963 #undef TARGET_HARD_REGNO_NREGS
1964 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1965 #undef TARGET_HARD_REGNO_MODE_OK
1966 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1968 #undef TARGET_MODES_TIEABLE_P
1969 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1971 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1972 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1973 rs6000_hard_regno_call_part_clobbered
1975 #undef TARGET_SLOW_UNALIGNED_ACCESS
1976 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1978 #undef TARGET_CAN_CHANGE_MODE_CLASS
1979 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1981 #undef TARGET_CONSTANT_ALIGNMENT
1982 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1984 #undef TARGET_STARTING_FRAME_OFFSET
1985 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1987 #if TARGET_ELF && RS6000_WEAK
1988 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
1989 #define TARGET_ASM_GLOBALIZE_DECL_NAME rs6000_globalize_decl_name
1990 #endif
1993 /* Processor table. */
1994 struct rs6000_ptt
1996 const char *const name; /* Canonical processor name. */
1997 const enum processor_type processor; /* Processor type enum value. */
1998 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
2001 static struct rs6000_ptt const processor_target_table[] =
2003 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
2004 #include "rs6000-cpus.def"
2005 #undef RS6000_CPU
2008 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
2009 name is invalid. */
2011 static int
2012 rs6000_cpu_name_lookup (const char *name)
2014 size_t i;
2016 if (name != NULL)
2018 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2019 if (! strcmp (name, processor_target_table[i].name))
2020 return (int)i;
2023 return -1;
2027 /* Return number of consecutive hard regs needed starting at reg REGNO
2028 to hold something of mode MODE.
2029 This is ordinarily the length in words of a value of mode MODE
2030 but can be less for certain modes in special long registers.
2032 POWER and PowerPC GPRs hold 32 bits worth;
2033 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2035 static int
2036 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2038 unsigned HOST_WIDE_INT reg_size;
2040 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2041 128-bit floating point that can go in vector registers, which has VSX
2042 memory addressing. */
2043 if (FP_REGNO_P (regno))
2044 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2045 ? UNITS_PER_VSX_WORD
2046 : UNITS_PER_FP_WORD);
2048 else if (ALTIVEC_REGNO_P (regno))
2049 reg_size = UNITS_PER_ALTIVEC_WORD;
2051 else
2052 reg_size = UNITS_PER_WORD;
2054 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2057 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2058 MODE. */
2059 static int
2060 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2062 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2064 if (COMPLEX_MODE_P (mode))
2065 mode = GET_MODE_INNER (mode);
2067 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2068 register combinations, and use PTImode where we need to deal with quad
2069 word memory operations. Don't allow quad words in the argument or frame
2070 pointer registers, just registers 0..31. */
2071 if (mode == PTImode)
2072 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2073 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2074 && ((regno & 1) == 0));
2076 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2077 implementations. Don't allow an item to be split between a FP register
2078 and an Altivec register. Allow TImode in all VSX registers if the user
2079 asked for it. */
2080 if (TARGET_VSX && VSX_REGNO_P (regno)
2081 && (VECTOR_MEM_VSX_P (mode)
2082 || FLOAT128_VECTOR_P (mode)
2083 || reg_addr[mode].scalar_in_vmx_p
2084 || mode == TImode
2085 || (TARGET_VADDUQM && mode == V1TImode)))
2087 if (FP_REGNO_P (regno))
2088 return FP_REGNO_P (last_regno);
2090 if (ALTIVEC_REGNO_P (regno))
2092 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2093 return 0;
2095 return ALTIVEC_REGNO_P (last_regno);
2099 /* The GPRs can hold any mode, but values bigger than one register
2100 cannot go past R31. */
2101 if (INT_REGNO_P (regno))
2102 return INT_REGNO_P (last_regno);
2104 /* The float registers (except for VSX vector modes) can only hold floating
2105 modes and DImode. */
2106 if (FP_REGNO_P (regno))
2108 if (FLOAT128_VECTOR_P (mode))
2109 return false;
2111 if (SCALAR_FLOAT_MODE_P (mode)
2112 && (mode != TDmode || (regno % 2) == 0)
2113 && FP_REGNO_P (last_regno))
2114 return 1;
2116 if (GET_MODE_CLASS (mode) == MODE_INT)
2118 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2119 return 1;
2121 if (TARGET_P8_VECTOR && (mode == SImode))
2122 return 1;
2124 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2125 return 1;
2128 return 0;
2131 /* The CR register can only hold CC modes. */
2132 if (CR_REGNO_P (regno))
2133 return GET_MODE_CLASS (mode) == MODE_CC;
2135 if (CA_REGNO_P (regno))
2136 return mode == Pmode || mode == SImode;
2138 /* AltiVec only in AldyVec registers. */
2139 if (ALTIVEC_REGNO_P (regno))
2140 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2141 || mode == V1TImode);
2143 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2144 and it must be able to fit within the register set. */
2146 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2149 /* Implement TARGET_HARD_REGNO_NREGS. */
2151 static unsigned int
2152 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2154 return rs6000_hard_regno_nregs[mode][regno];
2157 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2159 static bool
2160 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2162 return rs6000_hard_regno_mode_ok_p[mode][regno];
2165 /* Implement TARGET_MODES_TIEABLE_P.
2167 PTImode cannot tie with other modes because PTImode is restricted to even
2168 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2169 57744).
2171 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2172 128-bit floating point on VSX systems ties with other vectors. */
2174 static bool
2175 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2177 if (mode1 == PTImode)
2178 return mode2 == PTImode;
2179 if (mode2 == PTImode)
2180 return false;
2182 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2183 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2184 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2185 return false;
2187 if (SCALAR_FLOAT_MODE_P (mode1))
2188 return SCALAR_FLOAT_MODE_P (mode2);
2189 if (SCALAR_FLOAT_MODE_P (mode2))
2190 return false;
2192 if (GET_MODE_CLASS (mode1) == MODE_CC)
2193 return GET_MODE_CLASS (mode2) == MODE_CC;
2194 if (GET_MODE_CLASS (mode2) == MODE_CC)
2195 return false;
2197 return true;
2200 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2202 static bool
2203 rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
2205 if (TARGET_32BIT
2206 && TARGET_POWERPC64
2207 && GET_MODE_SIZE (mode) > 4
2208 && INT_REGNO_P (regno))
2209 return true;
2211 if (TARGET_VSX
2212 && FP_REGNO_P (regno)
2213 && GET_MODE_SIZE (mode) > 8
2214 && !FLOAT128_2REG_P (mode))
2215 return true;
2217 return false;
2220 /* Print interesting facts about registers. */
2221 static void
2222 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2224 int r, m;
2226 for (r = first_regno; r <= last_regno; ++r)
2228 const char *comma = "";
2229 int len;
2231 if (first_regno == last_regno)
2232 fprintf (stderr, "%s:\t", reg_name);
2233 else
2234 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2236 len = 8;
2237 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2238 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2240 if (len > 70)
2242 fprintf (stderr, ",\n\t");
2243 len = 8;
2244 comma = "";
2247 if (rs6000_hard_regno_nregs[m][r] > 1)
2248 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2249 rs6000_hard_regno_nregs[m][r]);
2250 else
2251 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2253 comma = ", ";
2256 if (call_used_regs[r])
2258 if (len > 70)
2260 fprintf (stderr, ",\n\t");
2261 len = 8;
2262 comma = "";
2265 len += fprintf (stderr, "%s%s", comma, "call-used");
2266 comma = ", ";
2269 if (fixed_regs[r])
2271 if (len > 70)
2273 fprintf (stderr, ",\n\t");
2274 len = 8;
2275 comma = "";
2278 len += fprintf (stderr, "%s%s", comma, "fixed");
2279 comma = ", ";
2282 if (len > 70)
2284 fprintf (stderr, ",\n\t");
2285 comma = "";
2288 len += fprintf (stderr, "%sreg-class = %s", comma,
2289 reg_class_names[(int)rs6000_regno_regclass[r]]);
2290 comma = ", ";
2292 if (len > 70)
2294 fprintf (stderr, ",\n\t");
2295 comma = "";
2298 fprintf (stderr, "%sregno = %d\n", comma, r);
2302 static const char *
2303 rs6000_debug_vector_unit (enum rs6000_vector v)
2305 const char *ret;
2307 switch (v)
2309 case VECTOR_NONE: ret = "none"; break;
2310 case VECTOR_ALTIVEC: ret = "altivec"; break;
2311 case VECTOR_VSX: ret = "vsx"; break;
2312 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2313 case VECTOR_OTHER: ret = "other"; break;
2314 default: ret = "unknown"; break;
2317 return ret;
2320 /* Inner function printing just the address mask for a particular reload
2321 register class. */
2322 DEBUG_FUNCTION char *
2323 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2325 static char ret[8];
2326 char *p = ret;
2328 if ((mask & RELOAD_REG_VALID) != 0)
2329 *p++ = 'v';
2330 else if (keep_spaces)
2331 *p++ = ' ';
2333 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2334 *p++ = 'm';
2335 else if (keep_spaces)
2336 *p++ = ' ';
2338 if ((mask & RELOAD_REG_INDEXED) != 0)
2339 *p++ = 'i';
2340 else if (keep_spaces)
2341 *p++ = ' ';
2343 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2344 *p++ = 'O';
2345 else if ((mask & RELOAD_REG_OFFSET) != 0)
2346 *p++ = 'o';
2347 else if (keep_spaces)
2348 *p++ = ' ';
2350 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2351 *p++ = '+';
2352 else if (keep_spaces)
2353 *p++ = ' ';
2355 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2356 *p++ = '+';
2357 else if (keep_spaces)
2358 *p++ = ' ';
2360 if ((mask & RELOAD_REG_AND_M16) != 0)
2361 *p++ = '&';
2362 else if (keep_spaces)
2363 *p++ = ' ';
2365 *p = '\0';
2367 return ret;
2370 /* Print the address masks in a human readble fashion. */
2371 DEBUG_FUNCTION void
2372 rs6000_debug_print_mode (ssize_t m)
2374 ssize_t rc;
2375 int spaces = 0;
2376 bool fuse_extra_p;
2378 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2379 for (rc = 0; rc < N_RELOAD_REG; rc++)
2380 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2381 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2383 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2384 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2385 fprintf (stderr, " Reload=%c%c",
2386 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2387 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2388 else
2389 spaces += sizeof (" Reload=sl") - 1;
2391 if (reg_addr[m].scalar_in_vmx_p)
2393 fprintf (stderr, "%*s Upper=y", spaces, "");
2394 spaces = 0;
2396 else
2397 spaces += sizeof (" Upper=y") - 1;
2399 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2400 || reg_addr[m].fused_toc);
2401 if (!fuse_extra_p)
2403 for (rc = 0; rc < N_RELOAD_REG; rc++)
2405 if (rc != RELOAD_REG_ANY)
2407 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2408 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2409 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2410 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2411 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2413 fuse_extra_p = true;
2414 break;
2420 if (fuse_extra_p)
2422 fprintf (stderr, "%*s Fuse:", spaces, "");
2423 spaces = 0;
2425 for (rc = 0; rc < N_RELOAD_REG; rc++)
2427 if (rc != RELOAD_REG_ANY)
2429 char load, store;
2431 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2432 load = 'l';
2433 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2434 load = 'L';
2435 else
2436 load = '-';
2438 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2439 store = 's';
2440 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2441 store = 'S';
2442 else
2443 store = '-';
2445 if (load == '-' && store == '-')
2446 spaces += 5;
2447 else
2449 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2450 reload_reg_map[rc].name[0], load, store);
2451 spaces = 0;
2456 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2458 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2459 spaces = 0;
2461 else
2462 spaces += sizeof (" P8gpr") - 1;
2464 if (reg_addr[m].fused_toc)
2466 fprintf (stderr, "%*sToc", (spaces + 1), "");
2467 spaces = 0;
2469 else
2470 spaces += sizeof (" Toc") - 1;
2472 else
2473 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2475 if (rs6000_vector_unit[m] != VECTOR_NONE
2476 || rs6000_vector_mem[m] != VECTOR_NONE)
2478 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2479 spaces, "",
2480 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2481 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2484 fputs ("\n", stderr);
2487 #define DEBUG_FMT_ID "%-32s= "
2488 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2489 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2490 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2492 /* Print various interesting information with -mdebug=reg. */
2493 static void
2494 rs6000_debug_reg_global (void)
2496 static const char *const tf[2] = { "false", "true" };
2497 const char *nl = (const char *)0;
2498 int m;
2499 size_t m1, m2, v;
2500 char costly_num[20];
2501 char nop_num[20];
2502 char flags_buffer[40];
2503 const char *costly_str;
2504 const char *nop_str;
2505 const char *trace_str;
2506 const char *abi_str;
2507 const char *cmodel_str;
2508 struct cl_target_option cl_opts;
2510 /* Modes we want tieable information on. */
2511 static const machine_mode print_tieable_modes[] = {
2512 QImode,
2513 HImode,
2514 SImode,
2515 DImode,
2516 TImode,
2517 PTImode,
2518 SFmode,
2519 DFmode,
2520 TFmode,
2521 IFmode,
2522 KFmode,
2523 SDmode,
2524 DDmode,
2525 TDmode,
2526 V16QImode,
2527 V8HImode,
2528 V4SImode,
2529 V2DImode,
2530 V1TImode,
2531 V32QImode,
2532 V16HImode,
2533 V8SImode,
2534 V4DImode,
2535 V2TImode,
2536 V4SFmode,
2537 V2DFmode,
2538 V8SFmode,
2539 V4DFmode,
2540 CCmode,
2541 CCUNSmode,
2542 CCEQmode,
2545 /* Virtual regs we are interested in. */
2546 const static struct {
2547 int regno; /* register number. */
2548 const char *name; /* register name. */
2549 } virtual_regs[] = {
2550 { STACK_POINTER_REGNUM, "stack pointer:" },
2551 { TOC_REGNUM, "toc: " },
2552 { STATIC_CHAIN_REGNUM, "static chain: " },
2553 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2554 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2555 { ARG_POINTER_REGNUM, "arg pointer: " },
2556 { FRAME_POINTER_REGNUM, "frame pointer:" },
2557 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2558 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2559 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2560 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2561 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2562 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2563 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2564 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2565 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2568 fputs ("\nHard register information:\n", stderr);
2569 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2570 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2571 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2572 LAST_ALTIVEC_REGNO,
2573 "vs");
2574 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2575 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2576 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2577 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2578 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2579 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2581 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2582 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2583 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2585 fprintf (stderr,
2586 "\n"
2587 "d reg_class = %s\n"
2588 "f reg_class = %s\n"
2589 "v reg_class = %s\n"
2590 "wa reg_class = %s\n"
2591 "wb reg_class = %s\n"
2592 "wd reg_class = %s\n"
2593 "we reg_class = %s\n"
2594 "wf reg_class = %s\n"
2595 "wg reg_class = %s\n"
2596 "wh reg_class = %s\n"
2597 "wi reg_class = %s\n"
2598 "wj reg_class = %s\n"
2599 "wk reg_class = %s\n"
2600 "wl reg_class = %s\n"
2601 "wm reg_class = %s\n"
2602 "wo reg_class = %s\n"
2603 "wp reg_class = %s\n"
2604 "wq reg_class = %s\n"
2605 "wr reg_class = %s\n"
2606 "ws reg_class = %s\n"
2607 "wt reg_class = %s\n"
2608 "wu reg_class = %s\n"
2609 "wv reg_class = %s\n"
2610 "ww reg_class = %s\n"
2611 "wx reg_class = %s\n"
2612 "wy reg_class = %s\n"
2613 "wz reg_class = %s\n"
2614 "wA reg_class = %s\n"
2615 "wH reg_class = %s\n"
2616 "wI reg_class = %s\n"
2617 "wJ reg_class = %s\n"
2618 "wK reg_class = %s\n"
2619 "\n",
2620 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2621 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2622 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2623 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2624 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2625 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2626 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2627 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2628 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2629 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2630 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2631 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2632 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2633 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2634 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2635 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2636 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2637 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2638 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2639 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2640 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2641 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2642 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2643 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2644 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2645 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2646 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2647 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2648 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2649 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2650 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2651 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2653 nl = "\n";
2654 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2655 rs6000_debug_print_mode (m);
2657 fputs ("\n", stderr);
2659 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2661 machine_mode mode1 = print_tieable_modes[m1];
2662 bool first_time = true;
2664 nl = (const char *)0;
2665 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2667 machine_mode mode2 = print_tieable_modes[m2];
2668 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2670 if (first_time)
2672 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2673 nl = "\n";
2674 first_time = false;
2677 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2681 if (!first_time)
2682 fputs ("\n", stderr);
2685 if (nl)
2686 fputs (nl, stderr);
2688 if (rs6000_recip_control)
2690 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2692 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2693 if (rs6000_recip_bits[m])
2695 fprintf (stderr,
2696 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2697 GET_MODE_NAME (m),
2698 (RS6000_RECIP_AUTO_RE_P (m)
2699 ? "auto"
2700 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2701 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2702 ? "auto"
2703 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2706 fputs ("\n", stderr);
2709 if (rs6000_cpu_index >= 0)
2711 const char *name = processor_target_table[rs6000_cpu_index].name;
2712 HOST_WIDE_INT flags
2713 = processor_target_table[rs6000_cpu_index].target_enable;
2715 sprintf (flags_buffer, "-mcpu=%s flags", name);
2716 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2718 else
2719 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2721 if (rs6000_tune_index >= 0)
2723 const char *name = processor_target_table[rs6000_tune_index].name;
2724 HOST_WIDE_INT flags
2725 = processor_target_table[rs6000_tune_index].target_enable;
2727 sprintf (flags_buffer, "-mtune=%s flags", name);
2728 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2730 else
2731 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2733 cl_target_option_save (&cl_opts, &global_options);
2734 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2735 rs6000_isa_flags);
2737 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2738 rs6000_isa_flags_explicit);
2740 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2741 rs6000_builtin_mask);
2743 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2745 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2746 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2748 switch (rs6000_sched_costly_dep)
2750 case max_dep_latency:
2751 costly_str = "max_dep_latency";
2752 break;
2754 case no_dep_costly:
2755 costly_str = "no_dep_costly";
2756 break;
2758 case all_deps_costly:
2759 costly_str = "all_deps_costly";
2760 break;
2762 case true_store_to_load_dep_costly:
2763 costly_str = "true_store_to_load_dep_costly";
2764 break;
2766 case store_to_load_dep_costly:
2767 costly_str = "store_to_load_dep_costly";
2768 break;
2770 default:
2771 costly_str = costly_num;
2772 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2773 break;
2776 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2778 switch (rs6000_sched_insert_nops)
2780 case sched_finish_regroup_exact:
2781 nop_str = "sched_finish_regroup_exact";
2782 break;
2784 case sched_finish_pad_groups:
2785 nop_str = "sched_finish_pad_groups";
2786 break;
2788 case sched_finish_none:
2789 nop_str = "sched_finish_none";
2790 break;
2792 default:
2793 nop_str = nop_num;
2794 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2795 break;
2798 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2800 switch (rs6000_sdata)
2802 default:
2803 case SDATA_NONE:
2804 break;
2806 case SDATA_DATA:
2807 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2808 break;
2810 case SDATA_SYSV:
2811 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2812 break;
2814 case SDATA_EABI:
2815 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2816 break;
2820 switch (rs6000_traceback)
2822 case traceback_default: trace_str = "default"; break;
2823 case traceback_none: trace_str = "none"; break;
2824 case traceback_part: trace_str = "part"; break;
2825 case traceback_full: trace_str = "full"; break;
2826 default: trace_str = "unknown"; break;
2829 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2831 switch (rs6000_current_cmodel)
2833 case CMODEL_SMALL: cmodel_str = "small"; break;
2834 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2835 case CMODEL_LARGE: cmodel_str = "large"; break;
2836 default: cmodel_str = "unknown"; break;
2839 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2841 switch (rs6000_current_abi)
2843 case ABI_NONE: abi_str = "none"; break;
2844 case ABI_AIX: abi_str = "aix"; break;
2845 case ABI_ELFv2: abi_str = "ELFv2"; break;
2846 case ABI_V4: abi_str = "V4"; break;
2847 case ABI_DARWIN: abi_str = "darwin"; break;
2848 default: abi_str = "unknown"; break;
2851 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2853 if (rs6000_altivec_abi)
2854 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2856 if (rs6000_darwin64_abi)
2857 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2859 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2860 (TARGET_SOFT_FLOAT ? "true" : "false"));
2862 if (TARGET_LINK_STACK)
2863 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2865 if (TARGET_P8_FUSION)
2867 char options[80];
2869 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2870 if (TARGET_TOC_FUSION)
2871 strcat (options, ", toc");
2873 if (TARGET_P8_FUSION_SIGN)
2874 strcat (options, ", sign");
2876 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2879 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2880 TARGET_SECURE_PLT ? "secure" : "bss");
2881 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2882 aix_struct_return ? "aix" : "sysv");
2883 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2884 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2885 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2886 tf[!!rs6000_align_branch_targets]);
2887 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2888 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2889 rs6000_long_double_type_size);
2890 if (rs6000_long_double_type_size > 64)
2892 fprintf (stderr, DEBUG_FMT_S, "long double type",
2893 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2894 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2895 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2897 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2898 (int)rs6000_sched_restricted_insns_priority);
2899 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2900 (int)END_BUILTINS);
2901 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2902 (int)RS6000_BUILTIN_COUNT);
2904 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2905 (int)TARGET_FLOAT128_ENABLE_TYPE);
2907 if (TARGET_VSX)
2908 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2909 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2911 if (TARGET_DIRECT_MOVE_128)
2912 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2913 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2917 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2918 legitimate address support to figure out the appropriate addressing to
2919 use. */
2921 static void
2922 rs6000_setup_reg_addr_masks (void)
2924 ssize_t rc, reg, m, nregs;
2925 addr_mask_type any_addr_mask, addr_mask;
2927 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2929 machine_mode m2 = (machine_mode) m;
2930 bool complex_p = false;
2931 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2932 size_t msize;
2934 if (COMPLEX_MODE_P (m2))
2936 complex_p = true;
2937 m2 = GET_MODE_INNER (m2);
2940 msize = GET_MODE_SIZE (m2);
2942 /* SDmode is special in that we want to access it only via REG+REG
2943 addressing on power7 and above, since we want to use the LFIWZX and
2944 STFIWZX instructions to load it. */
2945 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2947 any_addr_mask = 0;
2948 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2950 addr_mask = 0;
2951 reg = reload_reg_map[rc].reg;
2953 /* Can mode values go in the GPR/FPR/Altivec registers? */
2954 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2956 bool small_int_vsx_p = (small_int_p
2957 && (rc == RELOAD_REG_FPR
2958 || rc == RELOAD_REG_VMX));
2960 nregs = rs6000_hard_regno_nregs[m][reg];
2961 addr_mask |= RELOAD_REG_VALID;
2963 /* Indicate if the mode takes more than 1 physical register. If
2964 it takes a single register, indicate it can do REG+REG
2965 addressing. Small integers in VSX registers can only do
2966 REG+REG addressing. */
2967 if (small_int_vsx_p)
2968 addr_mask |= RELOAD_REG_INDEXED;
2969 else if (nregs > 1 || m == BLKmode || complex_p)
2970 addr_mask |= RELOAD_REG_MULTIPLE;
2971 else
2972 addr_mask |= RELOAD_REG_INDEXED;
2974 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2975 addressing. If we allow scalars into Altivec registers,
2976 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2978 For VSX systems, we don't allow update addressing for
2979 DFmode/SFmode if those registers can go in both the
2980 traditional floating point registers and Altivec registers.
2981 The load/store instructions for the Altivec registers do not
2982 have update forms. If we allowed update addressing, it seems
2983 to break IV-OPT code using floating point if the index type is
2984 int instead of long (PR target/81550 and target/84042). */
2986 if (TARGET_UPDATE
2987 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2988 && msize <= 8
2989 && !VECTOR_MODE_P (m2)
2990 && !FLOAT128_VECTOR_P (m2)
2991 && !complex_p
2992 && (m != E_DFmode || !TARGET_VSX)
2993 && (m != E_SFmode || !TARGET_P8_VECTOR)
2994 && !small_int_vsx_p)
2996 addr_mask |= RELOAD_REG_PRE_INCDEC;
2998 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2999 we don't allow PRE_MODIFY for some multi-register
3000 operations. */
3001 switch (m)
3003 default:
3004 addr_mask |= RELOAD_REG_PRE_MODIFY;
3005 break;
3007 case E_DImode:
3008 if (TARGET_POWERPC64)
3009 addr_mask |= RELOAD_REG_PRE_MODIFY;
3010 break;
3012 case E_DFmode:
3013 case E_DDmode:
3014 if (TARGET_HARD_FLOAT)
3015 addr_mask |= RELOAD_REG_PRE_MODIFY;
3016 break;
3021 /* GPR and FPR registers can do REG+OFFSET addressing, except
3022 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
3023 for 64-bit scalars and 32-bit SFmode to altivec registers. */
3024 if ((addr_mask != 0) && !indexed_only_p
3025 && msize <= 8
3026 && (rc == RELOAD_REG_GPR
3027 || ((msize == 8 || m2 == SFmode)
3028 && (rc == RELOAD_REG_FPR
3029 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
3030 addr_mask |= RELOAD_REG_OFFSET;
3032 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
3033 instructions are enabled. The offset for 128-bit VSX registers is
3034 only 12-bits. While GPRs can handle the full offset range, VSX
3035 registers can only handle the restricted range. */
3036 else if ((addr_mask != 0) && !indexed_only_p
3037 && msize == 16 && TARGET_P9_VECTOR
3038 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
3039 || (m2 == TImode && TARGET_VSX)))
3041 addr_mask |= RELOAD_REG_OFFSET;
3042 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
3043 addr_mask |= RELOAD_REG_QUAD_OFFSET;
3046 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3047 addressing on 128-bit types. */
3048 if (rc == RELOAD_REG_VMX && msize == 16
3049 && (addr_mask & RELOAD_REG_VALID) != 0)
3050 addr_mask |= RELOAD_REG_AND_M16;
3052 reg_addr[m].addr_mask[rc] = addr_mask;
3053 any_addr_mask |= addr_mask;
3056 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
3061 /* Initialize the various global tables that are based on register size. */
3062 static void
3063 rs6000_init_hard_regno_mode_ok (bool global_init_p)
3065 ssize_t r, m, c;
3066 int align64;
3067 int align32;
3069 /* Precalculate REGNO_REG_CLASS. */
3070 rs6000_regno_regclass[0] = GENERAL_REGS;
3071 for (r = 1; r < 32; ++r)
3072 rs6000_regno_regclass[r] = BASE_REGS;
3074 for (r = 32; r < 64; ++r)
3075 rs6000_regno_regclass[r] = FLOAT_REGS;
3077 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
3078 rs6000_regno_regclass[r] = NO_REGS;
3080 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3081 rs6000_regno_regclass[r] = ALTIVEC_REGS;
3083 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3084 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3085 rs6000_regno_regclass[r] = CR_REGS;
3087 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3088 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3089 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3090 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3091 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3092 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3093 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3094 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3095 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3096 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3098 /* Precalculate register class to simpler reload register class. We don't
3099 need all of the register classes that are combinations of different
3100 classes, just the simple ones that have constraint letters. */
3101 for (c = 0; c < N_REG_CLASSES; c++)
3102 reg_class_to_reg_type[c] = NO_REG_TYPE;
3104 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3105 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3106 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3107 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3108 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3109 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3110 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3111 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3112 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3113 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3115 if (TARGET_VSX)
3117 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3118 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3120 else
3122 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3123 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3126 /* Precalculate the valid memory formats as well as the vector information,
3127 this must be set up before the rs6000_hard_regno_nregs_internal calls
3128 below. */
3129 gcc_assert ((int)VECTOR_NONE == 0);
3130 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3131 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3133 gcc_assert ((int)CODE_FOR_nothing == 0);
3134 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3136 gcc_assert ((int)NO_REGS == 0);
3137 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3139 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3140 believes it can use native alignment or still uses 128-bit alignment. */
3141 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3143 align64 = 64;
3144 align32 = 32;
3146 else
3148 align64 = 128;
3149 align32 = 128;
3152 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3153 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3154 if (TARGET_FLOAT128_TYPE)
3156 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3157 rs6000_vector_align[KFmode] = 128;
3159 if (FLOAT128_IEEE_P (TFmode))
3161 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3162 rs6000_vector_align[TFmode] = 128;
3166 /* V2DF mode, VSX only. */
3167 if (TARGET_VSX)
3169 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3170 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3171 rs6000_vector_align[V2DFmode] = align64;
3174 /* V4SF mode, either VSX or Altivec. */
3175 if (TARGET_VSX)
3177 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3178 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3179 rs6000_vector_align[V4SFmode] = align32;
3181 else if (TARGET_ALTIVEC)
3183 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3184 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3185 rs6000_vector_align[V4SFmode] = align32;
3188 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3189 and stores. */
3190 if (TARGET_ALTIVEC)
3192 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3193 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3194 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3195 rs6000_vector_align[V4SImode] = align32;
3196 rs6000_vector_align[V8HImode] = align32;
3197 rs6000_vector_align[V16QImode] = align32;
3199 if (TARGET_VSX)
3201 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3202 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3203 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3205 else
3207 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3208 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3209 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3213 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3214 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3215 if (TARGET_VSX)
3217 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3218 rs6000_vector_unit[V2DImode]
3219 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3220 rs6000_vector_align[V2DImode] = align64;
3222 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3223 rs6000_vector_unit[V1TImode]
3224 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3225 rs6000_vector_align[V1TImode] = 128;
3228 /* DFmode, see if we want to use the VSX unit. Memory is handled
3229 differently, so don't set rs6000_vector_mem. */
3230 if (TARGET_VSX)
3232 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3233 rs6000_vector_align[DFmode] = 64;
3236 /* SFmode, see if we want to use the VSX unit. */
3237 if (TARGET_P8_VECTOR)
3239 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3240 rs6000_vector_align[SFmode] = 32;
3243 /* Allow TImode in VSX register and set the VSX memory macros. */
3244 if (TARGET_VSX)
3246 rs6000_vector_mem[TImode] = VECTOR_VSX;
3247 rs6000_vector_align[TImode] = align64;
3250 /* Register class constraints for the constraints that depend on compile
3251 switches. When the VSX code was added, different constraints were added
3252 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3253 of the VSX registers are used. The register classes for scalar floating
3254 point types is set, based on whether we allow that type into the upper
3255 (Altivec) registers. GCC has register classes to target the Altivec
3256 registers for load/store operations, to select using a VSX memory
3257 operation instead of the traditional floating point operation. The
3258 constraints are:
3260 d - Register class to use with traditional DFmode instructions.
3261 f - Register class to use with traditional SFmode instructions.
3262 v - Altivec register.
3263 wa - Any VSX register.
3264 wc - Reserved to represent individual CR bits (used in LLVM).
3265 wd - Preferred register class for V2DFmode.
3266 wf - Preferred register class for V4SFmode.
3267 wg - Float register for power6x move insns.
3268 wh - FP register for direct move instructions.
3269 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3270 wj - FP or VSX register to hold 64-bit integers for direct moves.
3271 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3272 wl - Float register if we can do 32-bit signed int loads.
3273 wm - VSX register for ISA 2.07 direct move operations.
3274 wn - always NO_REGS.
3275 wr - GPR if 64-bit mode is permitted.
3276 ws - Register class to do ISA 2.06 DF operations.
3277 wt - VSX register for TImode in VSX registers.
3278 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3279 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3280 ww - Register class to do SF conversions in with VSX operations.
3281 wx - Float register if we can do 32-bit int stores.
3282 wy - Register class to do ISA 2.07 SF operations.
3283 wz - Float register if we can do 32-bit unsigned int loads.
3284 wH - Altivec register if SImode is allowed in VSX registers.
3285 wI - VSX register if SImode is allowed in VSX registers.
3286 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3287 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3289 if (TARGET_HARD_FLOAT)
3291 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3292 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3295 if (TARGET_VSX)
3297 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3298 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3299 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3300 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode */
3301 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode */
3302 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; /* DImode */
3303 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3306 /* Add conditional constraints based on various options, to allow us to
3307 collapse multiple insn patterns. */
3308 if (TARGET_ALTIVEC)
3309 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3311 if (TARGET_MFPGPR) /* DFmode */
3312 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3314 if (TARGET_LFIWAX)
3315 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3317 if (TARGET_DIRECT_MOVE)
3319 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3320 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3321 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3322 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3323 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3324 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3327 if (TARGET_POWERPC64)
3329 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3330 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3333 if (TARGET_P8_VECTOR) /* SFmode */
3335 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3336 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3337 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3339 else if (TARGET_VSX)
3340 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3342 if (TARGET_STFIWX)
3343 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3345 if (TARGET_LFIWZX)
3346 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3348 if (TARGET_FLOAT128_TYPE)
3350 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3351 if (FLOAT128_IEEE_P (TFmode))
3352 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3355 if (TARGET_P9_VECTOR)
3357 /* Support for new D-form instructions. */
3358 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3360 /* Support for ISA 3.0 (power9) vectors. */
3361 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3364 /* Support for new direct moves (ISA 3.0 + 64bit). */
3365 if (TARGET_DIRECT_MOVE_128)
3366 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3368 /* Support small integers in VSX registers. */
3369 if (TARGET_P8_VECTOR)
3371 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3372 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3373 if (TARGET_P9_VECTOR)
3375 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3376 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3380 /* Set up the reload helper and direct move functions. */
3381 if (TARGET_VSX || TARGET_ALTIVEC)
3383 if (TARGET_64BIT)
3385 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3386 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3387 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3388 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3389 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3390 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3391 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3392 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3393 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3394 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3395 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3396 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3397 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3398 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3399 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3400 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3401 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3402 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3403 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3404 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3406 if (FLOAT128_VECTOR_P (KFmode))
3408 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3409 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3412 if (FLOAT128_VECTOR_P (TFmode))
3414 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3415 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3418 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3419 available. */
3420 if (TARGET_NO_SDMODE_STACK)
3422 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3423 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3426 if (TARGET_VSX)
3428 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3429 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3432 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3434 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3435 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3436 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3437 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3438 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3439 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3440 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3441 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3442 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3444 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3445 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3446 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3447 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3448 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3449 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3450 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3451 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3452 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3454 if (FLOAT128_VECTOR_P (KFmode))
3456 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3457 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3460 if (FLOAT128_VECTOR_P (TFmode))
3462 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3463 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3467 else
3469 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3470 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3471 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3472 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3473 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3474 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3475 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3476 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3477 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3478 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3479 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3480 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3481 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3482 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3483 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3484 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3485 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3486 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3487 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3488 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3490 if (FLOAT128_VECTOR_P (KFmode))
3492 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3493 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3496 if (FLOAT128_IEEE_P (TFmode))
3498 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3499 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3502 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3503 available. */
3504 if (TARGET_NO_SDMODE_STACK)
3506 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3507 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3510 if (TARGET_VSX)
3512 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3513 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3516 if (TARGET_DIRECT_MOVE)
3518 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3519 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3520 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3524 reg_addr[DFmode].scalar_in_vmx_p = true;
3525 reg_addr[DImode].scalar_in_vmx_p = true;
3527 if (TARGET_P8_VECTOR)
3529 reg_addr[SFmode].scalar_in_vmx_p = true;
3530 reg_addr[SImode].scalar_in_vmx_p = true;
3532 if (TARGET_P9_VECTOR)
3534 reg_addr[HImode].scalar_in_vmx_p = true;
3535 reg_addr[QImode].scalar_in_vmx_p = true;
3540 /* Setup the fusion operations. */
3541 if (TARGET_P8_FUSION)
3543 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3544 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3545 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3546 if (TARGET_64BIT)
3547 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3550 if (TARGET_P9_FUSION)
3552 struct fuse_insns {
3553 enum machine_mode mode; /* mode of the fused type. */
3554 enum machine_mode pmode; /* pointer mode. */
3555 enum rs6000_reload_reg_type rtype; /* register type. */
3556 enum insn_code load; /* load insn. */
3557 enum insn_code store; /* store insn. */
3560 static const struct fuse_insns addis_insns[] = {
3561 { E_SFmode, E_DImode, RELOAD_REG_FPR,
3562 CODE_FOR_fusion_vsx_di_sf_load,
3563 CODE_FOR_fusion_vsx_di_sf_store },
3565 { E_SFmode, E_SImode, RELOAD_REG_FPR,
3566 CODE_FOR_fusion_vsx_si_sf_load,
3567 CODE_FOR_fusion_vsx_si_sf_store },
3569 { E_DFmode, E_DImode, RELOAD_REG_FPR,
3570 CODE_FOR_fusion_vsx_di_df_load,
3571 CODE_FOR_fusion_vsx_di_df_store },
3573 { E_DFmode, E_SImode, RELOAD_REG_FPR,
3574 CODE_FOR_fusion_vsx_si_df_load,
3575 CODE_FOR_fusion_vsx_si_df_store },
3577 { E_DImode, E_DImode, RELOAD_REG_FPR,
3578 CODE_FOR_fusion_vsx_di_di_load,
3579 CODE_FOR_fusion_vsx_di_di_store },
3581 { E_DImode, E_SImode, RELOAD_REG_FPR,
3582 CODE_FOR_fusion_vsx_si_di_load,
3583 CODE_FOR_fusion_vsx_si_di_store },
3585 { E_QImode, E_DImode, RELOAD_REG_GPR,
3586 CODE_FOR_fusion_gpr_di_qi_load,
3587 CODE_FOR_fusion_gpr_di_qi_store },
3589 { E_QImode, E_SImode, RELOAD_REG_GPR,
3590 CODE_FOR_fusion_gpr_si_qi_load,
3591 CODE_FOR_fusion_gpr_si_qi_store },
3593 { E_HImode, E_DImode, RELOAD_REG_GPR,
3594 CODE_FOR_fusion_gpr_di_hi_load,
3595 CODE_FOR_fusion_gpr_di_hi_store },
3597 { E_HImode, E_SImode, RELOAD_REG_GPR,
3598 CODE_FOR_fusion_gpr_si_hi_load,
3599 CODE_FOR_fusion_gpr_si_hi_store },
3601 { E_SImode, E_DImode, RELOAD_REG_GPR,
3602 CODE_FOR_fusion_gpr_di_si_load,
3603 CODE_FOR_fusion_gpr_di_si_store },
3605 { E_SImode, E_SImode, RELOAD_REG_GPR,
3606 CODE_FOR_fusion_gpr_si_si_load,
3607 CODE_FOR_fusion_gpr_si_si_store },
3609 { E_SFmode, E_DImode, RELOAD_REG_GPR,
3610 CODE_FOR_fusion_gpr_di_sf_load,
3611 CODE_FOR_fusion_gpr_di_sf_store },
3613 { E_SFmode, E_SImode, RELOAD_REG_GPR,
3614 CODE_FOR_fusion_gpr_si_sf_load,
3615 CODE_FOR_fusion_gpr_si_sf_store },
3617 { E_DImode, E_DImode, RELOAD_REG_GPR,
3618 CODE_FOR_fusion_gpr_di_di_load,
3619 CODE_FOR_fusion_gpr_di_di_store },
3621 { E_DFmode, E_DImode, RELOAD_REG_GPR,
3622 CODE_FOR_fusion_gpr_di_df_load,
3623 CODE_FOR_fusion_gpr_di_df_store },
3626 machine_mode cur_pmode = Pmode;
3627 size_t i;
3629 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3631 machine_mode xmode = addis_insns[i].mode;
3632 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3634 if (addis_insns[i].pmode != cur_pmode)
3635 continue;
3637 if (rtype == RELOAD_REG_FPR && !TARGET_HARD_FLOAT)
3638 continue;
3640 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3641 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3643 if (rtype == RELOAD_REG_FPR && TARGET_P9_VECTOR)
3645 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3646 = addis_insns[i].load;
3647 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3648 = addis_insns[i].store;
3653 /* Note which types we support fusing TOC setup plus memory insn. We only do
3654 fused TOCs for medium/large code models. */
3655 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3656 && (TARGET_CMODEL != CMODEL_SMALL))
3658 reg_addr[QImode].fused_toc = true;
3659 reg_addr[HImode].fused_toc = true;
3660 reg_addr[SImode].fused_toc = true;
3661 reg_addr[DImode].fused_toc = true;
3662 if (TARGET_HARD_FLOAT)
3664 reg_addr[SFmode].fused_toc = true;
3665 reg_addr[DFmode].fused_toc = true;
3669 /* Precalculate HARD_REGNO_NREGS. */
3670 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3671 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3672 rs6000_hard_regno_nregs[m][r]
3673 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3675 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3676 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3677 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3678 if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m))
3679 rs6000_hard_regno_mode_ok_p[m][r] = true;
3681 /* Precalculate CLASS_MAX_NREGS sizes. */
3682 for (c = 0; c < LIM_REG_CLASSES; ++c)
3684 int reg_size;
3686 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3687 reg_size = UNITS_PER_VSX_WORD;
3689 else if (c == ALTIVEC_REGS)
3690 reg_size = UNITS_PER_ALTIVEC_WORD;
3692 else if (c == FLOAT_REGS)
3693 reg_size = UNITS_PER_FP_WORD;
3695 else
3696 reg_size = UNITS_PER_WORD;
3698 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3700 machine_mode m2 = (machine_mode)m;
3701 int reg_size2 = reg_size;
3703 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3704 in VSX. */
3705 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3706 reg_size2 = UNITS_PER_FP_WORD;
3708 rs6000_class_max_nregs[m][c]
3709 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3713 /* Calculate which modes to automatically generate code to use a the
3714 reciprocal divide and square root instructions. In the future, possibly
3715 automatically generate the instructions even if the user did not specify
3716 -mrecip. The older machines double precision reciprocal sqrt estimate is
3717 not accurate enough. */
3718 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3719 if (TARGET_FRES)
3720 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3721 if (TARGET_FRE)
3722 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3723 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3724 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3725 if (VECTOR_UNIT_VSX_P (V2DFmode))
3726 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3728 if (TARGET_FRSQRTES)
3729 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3730 if (TARGET_FRSQRTE)
3731 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3732 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3733 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3734 if (VECTOR_UNIT_VSX_P (V2DFmode))
3735 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3737 if (rs6000_recip_control)
3739 if (!flag_finite_math_only)
3740 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3741 "-ffast-math");
3742 if (flag_trapping_math)
3743 warning (0, "%qs requires %qs or %qs", "-mrecip",
3744 "-fno-trapping-math", "-ffast-math");
3745 if (!flag_reciprocal_math)
3746 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3747 "-ffast-math");
3748 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3750 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3751 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3752 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3754 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3755 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3756 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3758 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3759 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3760 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3762 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3763 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3764 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3766 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3767 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3768 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3770 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3771 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3772 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3774 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3775 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3776 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3778 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3779 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3780 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3784 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3785 legitimate address support to figure out the appropriate addressing to
3786 use. */
3787 rs6000_setup_reg_addr_masks ();
3789 if (global_init_p || TARGET_DEBUG_TARGET)
3791 if (TARGET_DEBUG_REG)
3792 rs6000_debug_reg_global ();
3794 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3795 fprintf (stderr,
3796 "SImode variable mult cost = %d\n"
3797 "SImode constant mult cost = %d\n"
3798 "SImode short constant mult cost = %d\n"
3799 "DImode multipliciation cost = %d\n"
3800 "SImode division cost = %d\n"
3801 "DImode division cost = %d\n"
3802 "Simple fp operation cost = %d\n"
3803 "DFmode multiplication cost = %d\n"
3804 "SFmode division cost = %d\n"
3805 "DFmode division cost = %d\n"
3806 "cache line size = %d\n"
3807 "l1 cache size = %d\n"
3808 "l2 cache size = %d\n"
3809 "simultaneous prefetches = %d\n"
3810 "\n",
3811 rs6000_cost->mulsi,
3812 rs6000_cost->mulsi_const,
3813 rs6000_cost->mulsi_const9,
3814 rs6000_cost->muldi,
3815 rs6000_cost->divsi,
3816 rs6000_cost->divdi,
3817 rs6000_cost->fp,
3818 rs6000_cost->dmul,
3819 rs6000_cost->sdiv,
3820 rs6000_cost->ddiv,
3821 rs6000_cost->cache_line_size,
3822 rs6000_cost->l1_cache_size,
3823 rs6000_cost->l2_cache_size,
3824 rs6000_cost->simultaneous_prefetches);
3828 #if TARGET_MACHO
3829 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3831 static void
3832 darwin_rs6000_override_options (void)
3834 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3835 off. */
3836 rs6000_altivec_abi = 1;
3837 TARGET_ALTIVEC_VRSAVE = 1;
3838 rs6000_current_abi = ABI_DARWIN;
3840 if (DEFAULT_ABI == ABI_DARWIN
3841 && TARGET_64BIT)
3842 darwin_one_byte_bool = 1;
3844 if (TARGET_64BIT && ! TARGET_POWERPC64)
3846 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3847 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3849 if (flag_mkernel)
3851 rs6000_default_long_calls = 1;
3852 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3855 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3856 Altivec. */
3857 if (!flag_mkernel && !flag_apple_kext
3858 && TARGET_64BIT
3859 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3860 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3862 /* Unless the user (not the configurer) has explicitly overridden
3863 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3864 G4 unless targeting the kernel. */
3865 if (!flag_mkernel
3866 && !flag_apple_kext
3867 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3868 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3869 && ! global_options_set.x_rs6000_cpu_index)
3871 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3874 #endif
3876 /* If not otherwise specified by a target, make 'long double' equivalent to
3877 'double'. */
3879 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3880 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3881 #endif
3883 /* Return the builtin mask of the various options used that could affect which
3884 builtins were used. In the past we used target_flags, but we've run out of
3885 bits, and some options are no longer in target_flags. */
3887 HOST_WIDE_INT
3888 rs6000_builtin_mask_calculate (void)
3890 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3891 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3892 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3893 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3894 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3895 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3896 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3897 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3898 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3899 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3900 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3901 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3902 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3903 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3904 | ((TARGET_POWERPC64) ? RS6000_BTM_POWERPC64 : 0)
3905 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3906 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3907 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3908 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3909 | ((TARGET_LONG_DOUBLE_128
3910 && TARGET_HARD_FLOAT
3911 && !TARGET_IEEEQUAD) ? RS6000_BTM_LDBL128 : 0)
3912 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3913 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3916 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3917 to clobber the XER[CA] bit because clobbering that bit without telling
3918 the compiler worked just fine with versions of GCC before GCC 5, and
3919 breaking a lot of older code in ways that are hard to track down is
3920 not such a great idea. */
3922 static rtx_insn *
3923 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3924 vec<const char *> &/*constraints*/,
3925 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3927 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3928 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3929 return NULL;
3932 /* Override command line options.
3934 Combine build-specific configuration information with options
3935 specified on the command line to set various state variables which
3936 influence code generation, optimization, and expansion of built-in
3937 functions. Assure that command-line configuration preferences are
3938 compatible with each other and with the build configuration; issue
3939 warnings while adjusting configuration or error messages while
3940 rejecting configuration.
3942 Upon entry to this function:
3944 This function is called once at the beginning of
3945 compilation, and then again at the start and end of compiling
3946 each section of code that has a different configuration, as
3947 indicated, for example, by adding the
3949 __attribute__((__target__("cpu=power9")))
3951 qualifier to a function definition or, for example, by bracketing
3952 code between
3954 #pragma GCC target("altivec")
3958 #pragma GCC reset_options
3960 directives. Parameter global_init_p is true for the initial
3961 invocation, which initializes global variables, and false for all
3962 subsequent invocations.
3965 Various global state information is assumed to be valid. This
3966 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3967 default CPU specified at build configure time, TARGET_DEFAULT,
3968 representing the default set of option flags for the default
3969 target, and global_options_set.x_rs6000_isa_flags, representing
3970 which options were requested on the command line.
3972 Upon return from this function:
3974 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3975 was set by name on the command line. Additionally, if certain
3976 attributes are automatically enabled or disabled by this function
3977 in order to assure compatibility between options and
3978 configuration, the flags associated with those attributes are
3979 also set. By setting these "explicit bits", we avoid the risk
3980 that other code might accidentally overwrite these particular
3981 attributes with "default values".
3983 The various bits of rs6000_isa_flags are set to indicate the
3984 target options that have been selected for the most current
3985 compilation efforts. This has the effect of also turning on the
3986 associated TARGET_XXX values since these are macros which are
3987 generally defined to test the corresponding bit of the
3988 rs6000_isa_flags variable.
3990 The variable rs6000_builtin_mask is set to represent the target
3991 options for the most current compilation efforts, consistent with
3992 the current contents of rs6000_isa_flags. This variable controls
3993 expansion of built-in functions.
3995 Various other global variables and fields of global structures
3996 (over 50 in all) are initialized to reflect the desired options
3997 for the most current compilation efforts. */
3999 static bool
4000 rs6000_option_override_internal (bool global_init_p)
4002 bool ret = true;
4004 HOST_WIDE_INT set_masks;
4005 HOST_WIDE_INT ignore_masks;
4006 int cpu_index = -1;
4007 int tune_index;
4008 struct cl_target_option *main_target_opt
4009 = ((global_init_p || target_option_default_node == NULL)
4010 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
4012 /* Print defaults. */
4013 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
4014 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
4016 /* Remember the explicit arguments. */
4017 if (global_init_p)
4018 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
4020 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4021 library functions, so warn about it. The flag may be useful for
4022 performance studies from time to time though, so don't disable it
4023 entirely. */
4024 if (global_options_set.x_rs6000_alignment_flags
4025 && rs6000_alignment_flags == MASK_ALIGN_POWER
4026 && DEFAULT_ABI == ABI_DARWIN
4027 && TARGET_64BIT)
4028 warning (0, "%qs is not supported for 64-bit Darwin;"
4029 " it is incompatible with the installed C and C++ libraries",
4030 "-malign-power");
4032 /* Numerous experiment shows that IRA based loop pressure
4033 calculation works better for RTL loop invariant motion on targets
4034 with enough (>= 32) registers. It is an expensive optimization.
4035 So it is on only for peak performance. */
4036 if (optimize >= 3 && global_init_p
4037 && !global_options_set.x_flag_ira_loop_pressure)
4038 flag_ira_loop_pressure = 1;
4040 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4041 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4042 options were already specified. */
4043 if (flag_sanitize & SANITIZE_USER_ADDRESS
4044 && !global_options_set.x_flag_asynchronous_unwind_tables)
4045 flag_asynchronous_unwind_tables = 1;
4047 /* Set the pointer size. */
4048 if (TARGET_64BIT)
4050 rs6000_pmode = DImode;
4051 rs6000_pointer_size = 64;
4053 else
4055 rs6000_pmode = SImode;
4056 rs6000_pointer_size = 32;
4059 /* Some OSs don't support saving the high part of 64-bit registers on context
4060 switch. Other OSs don't support saving Altivec registers. On those OSs,
4061 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4062 if the user wants either, the user must explicitly specify them and we
4063 won't interfere with the user's specification. */
4065 set_masks = POWERPC_MASKS;
4066 #ifdef OS_MISSING_POWERPC64
4067 if (OS_MISSING_POWERPC64)
4068 set_masks &= ~OPTION_MASK_POWERPC64;
4069 #endif
4070 #ifdef OS_MISSING_ALTIVEC
4071 if (OS_MISSING_ALTIVEC)
4072 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
4073 | OTHER_VSX_VECTOR_MASKS);
4074 #endif
4076 /* Don't override by the processor default if given explicitly. */
4077 set_masks &= ~rs6000_isa_flags_explicit;
4079 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4080 the cpu in a target attribute or pragma, but did not specify a tuning
4081 option, use the cpu for the tuning option rather than the option specified
4082 with -mtune on the command line. Process a '--with-cpu' configuration
4083 request as an implicit --cpu. */
4084 if (rs6000_cpu_index >= 0)
4085 cpu_index = rs6000_cpu_index;
4086 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4087 cpu_index = main_target_opt->x_rs6000_cpu_index;
4088 else if (OPTION_TARGET_CPU_DEFAULT)
4089 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
4091 if (cpu_index >= 0)
4093 const char *unavailable_cpu = NULL;
4094 switch (processor_target_table[cpu_index].processor)
4096 #ifndef HAVE_AS_POWER9
4097 case PROCESSOR_POWER9:
4098 unavailable_cpu = "power9";
4099 break;
4100 #endif
4101 #ifndef HAVE_AS_POWER8
4102 case PROCESSOR_POWER8:
4103 unavailable_cpu = "power8";
4104 break;
4105 #endif
4106 #ifndef HAVE_AS_POPCNTD
4107 case PROCESSOR_POWER7:
4108 unavailable_cpu = "power7";
4109 break;
4110 #endif
4111 #ifndef HAVE_AS_DFP
4112 case PROCESSOR_POWER6:
4113 unavailable_cpu = "power6";
4114 break;
4115 #endif
4116 #ifndef HAVE_AS_POPCNTB
4117 case PROCESSOR_POWER5:
4118 unavailable_cpu = "power5";
4119 break;
4120 #endif
4121 default:
4122 break;
4124 if (unavailable_cpu)
4126 cpu_index = -1;
4127 warning (0, "will not generate %qs instructions because "
4128 "assembler lacks %qs support", unavailable_cpu,
4129 unavailable_cpu);
4133 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4134 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4135 with those from the cpu, except for options that were explicitly set. If
4136 we don't have a cpu, do not override the target bits set in
4137 TARGET_DEFAULT. */
4138 if (cpu_index >= 0)
4140 rs6000_cpu_index = cpu_index;
4141 rs6000_isa_flags &= ~set_masks;
4142 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4143 & set_masks);
4145 else
4147 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4148 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4149 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4150 to using rs6000_isa_flags, we need to do the initialization here.
4152 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4153 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4154 HOST_WIDE_INT flags;
4155 if (TARGET_DEFAULT)
4156 flags = TARGET_DEFAULT;
4157 else
4159 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4160 const char *default_cpu = (!TARGET_POWERPC64
4161 ? "powerpc"
4162 : (BYTES_BIG_ENDIAN
4163 ? "powerpc64"
4164 : "powerpc64le"));
4165 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
4166 flags = processor_target_table[default_cpu_index].target_enable;
4168 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4171 if (rs6000_tune_index >= 0)
4172 tune_index = rs6000_tune_index;
4173 else if (cpu_index >= 0)
4174 rs6000_tune_index = tune_index = cpu_index;
4175 else
4177 size_t i;
4178 enum processor_type tune_proc
4179 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4181 tune_index = -1;
4182 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4183 if (processor_target_table[i].processor == tune_proc)
4185 tune_index = i;
4186 break;
4190 if (cpu_index >= 0)
4191 rs6000_cpu = processor_target_table[cpu_index].processor;
4192 else
4193 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
4195 gcc_assert (tune_index >= 0);
4196 rs6000_tune = processor_target_table[tune_index].processor;
4198 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4199 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4200 || rs6000_cpu == PROCESSOR_PPCE5500)
4202 if (TARGET_ALTIVEC)
4203 error ("AltiVec not supported in this target");
4206 /* If we are optimizing big endian systems for space, use the load/store
4207 multiple instructions. */
4208 if (BYTES_BIG_ENDIAN && optimize_size)
4209 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
4211 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
4212 because the hardware doesn't support the instructions used in little
4213 endian mode, and causes an alignment trap. The 750 does not cause an
4214 alignment trap (except when the target is unaligned). */
4216 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
4218 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4219 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4220 warning (0, "%qs is not supported on little endian systems",
4221 "-mmultiple");
4224 /* If little-endian, default to -mstrict-align on older processors.
4225 Testing for htm matches power8 and later. */
4226 if (!BYTES_BIG_ENDIAN
4227 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4228 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4230 if (!rs6000_fold_gimple)
4231 fprintf (stderr,
4232 "gimple folding of rs6000 builtins has been disabled.\n");
4234 /* Add some warnings for VSX. */
4235 if (TARGET_VSX)
4237 const char *msg = NULL;
4238 if (!TARGET_HARD_FLOAT)
4240 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4241 msg = N_("-mvsx requires hardware floating point");
4242 else
4244 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4245 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4248 else if (TARGET_AVOID_XFORM > 0)
4249 msg = N_("-mvsx needs indexed addressing");
4250 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4251 & OPTION_MASK_ALTIVEC))
4253 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4254 msg = N_("-mvsx and -mno-altivec are incompatible");
4255 else
4256 msg = N_("-mno-altivec disables vsx");
4259 if (msg)
4261 warning (0, msg);
4262 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4263 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4267 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4268 the -mcpu setting to enable options that conflict. */
4269 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4270 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4271 | OPTION_MASK_ALTIVEC
4272 | OPTION_MASK_VSX)) != 0)
4273 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4274 | OPTION_MASK_DIRECT_MOVE)
4275 & ~rs6000_isa_flags_explicit);
4277 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4278 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4280 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4281 off all of the options that depend on those flags. */
4282 ignore_masks = rs6000_disable_incompatible_switches ();
4284 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4285 unless the user explicitly used the -mno-<option> to disable the code. */
4286 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
4287 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4288 else if (TARGET_P9_MINMAX)
4290 if (cpu_index >= 0)
4292 if (cpu_index == PROCESSOR_POWER9)
4294 /* legacy behavior: allow -mcpu=power9 with certain
4295 capabilities explicitly disabled. */
4296 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4298 else
4299 error ("power9 target option is incompatible with %<%s=<xxx>%> "
4300 "for <xxx> less than power9", "-mcpu");
4302 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4303 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4304 & rs6000_isa_flags_explicit))
4305 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4306 were explicitly cleared. */
4307 error ("%qs incompatible with explicitly disabled options",
4308 "-mpower9-minmax");
4309 else
4310 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4312 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4313 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4314 else if (TARGET_VSX)
4315 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4316 else if (TARGET_POPCNTD)
4317 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4318 else if (TARGET_DFP)
4319 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4320 else if (TARGET_CMPB)
4321 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4322 else if (TARGET_FPRND)
4323 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4324 else if (TARGET_POPCNTB)
4325 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4326 else if (TARGET_ALTIVEC)
4327 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4329 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4331 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4332 error ("%qs requires %qs", "-mcrypto", "-maltivec");
4333 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4336 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4338 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4339 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
4340 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4343 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4345 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4346 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
4347 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4350 if (TARGET_P8_VECTOR && !TARGET_VSX)
4352 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4353 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4354 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
4355 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4357 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4358 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4359 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4361 else
4363 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4364 not explicit. */
4365 rs6000_isa_flags |= OPTION_MASK_VSX;
4366 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4370 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4372 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4373 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4374 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4377 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4378 silently turn off quad memory mode. */
4379 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4381 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4382 warning (0, N_("-mquad-memory requires 64-bit mode"));
4384 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4385 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4387 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4388 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4391 /* Non-atomic quad memory load/store are disabled for little endian, since
4392 the words are reversed, but atomic operations can still be done by
4393 swapping the words. */
4394 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4396 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4397 warning (0, N_("-mquad-memory is not available in little endian "
4398 "mode"));
4400 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4403 /* Assume if the user asked for normal quad memory instructions, they want
4404 the atomic versions as well, unless they explicity told us not to use quad
4405 word atomic instructions. */
4406 if (TARGET_QUAD_MEMORY
4407 && !TARGET_QUAD_MEMORY_ATOMIC
4408 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4409 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4411 /* If we can shrink-wrap the TOC register save separately, then use
4412 -msave-toc-indirect unless explicitly disabled. */
4413 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4414 && flag_shrink_wrap_separate
4415 && optimize_function_for_speed_p (cfun))
4416 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4418 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4419 generating power8 instructions. */
4420 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4421 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4422 & OPTION_MASK_P8_FUSION);
4424 /* Setting additional fusion flags turns on base fusion. */
4425 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4427 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4429 if (TARGET_P8_FUSION_SIGN)
4430 error ("%qs requires %qs", "-mpower8-fusion-sign",
4431 "-mpower8-fusion");
4433 if (TARGET_TOC_FUSION)
4434 error ("%qs requires %qs", "-mtoc-fusion", "-mpower8-fusion");
4436 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4438 else
4439 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4442 /* Power9 fusion is a superset over power8 fusion. */
4443 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4445 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4447 /* We prefer to not mention undocumented options in
4448 error messages. However, if users have managed to select
4449 power9-fusion without selecting power8-fusion, they
4450 already know about undocumented flags. */
4451 error ("%qs requires %qs", "-mpower9-fusion", "-mpower8-fusion");
4452 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4454 else
4455 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4458 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4459 generating power9 instructions. */
4460 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4461 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4462 & OPTION_MASK_P9_FUSION);
4464 /* Power8 does not fuse sign extended loads with the addis. If we are
4465 optimizing at high levels for speed, convert a sign extended load into a
4466 zero extending load, and an explicit sign extension. */
4467 if (TARGET_P8_FUSION
4468 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4469 && optimize_function_for_speed_p (cfun)
4470 && optimize >= 3)
4471 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4473 /* TOC fusion requires 64-bit and medium/large code model. */
4474 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4476 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4477 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4478 warning (0, N_("-mtoc-fusion requires 64-bit"));
4481 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4483 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4484 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4485 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4488 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4489 model. */
4490 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4491 && (TARGET_CMODEL != CMODEL_SMALL)
4492 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4493 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4495 /* ISA 3.0 vector instructions include ISA 2.07. */
4496 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4498 /* We prefer to not mention undocumented options in
4499 error messages. However, if users have managed to select
4500 power9-vector without selecting power8-vector, they
4501 already know about undocumented flags. */
4502 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4503 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4504 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4505 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4507 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4508 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4509 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4511 else
4513 /* OPTION_MASK_P9_VECTOR is explicit and
4514 OPTION_MASK_P8_VECTOR is not explicit. */
4515 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4516 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4520 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4521 support. If we only have ISA 2.06 support, and the user did not specify
4522 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4523 but we don't enable the full vectorization support */
4524 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4525 TARGET_ALLOW_MOVMISALIGN = 1;
4527 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4529 if (TARGET_ALLOW_MOVMISALIGN > 0
4530 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4531 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4533 TARGET_ALLOW_MOVMISALIGN = 0;
4536 /* Determine when unaligned vector accesses are permitted, and when
4537 they are preferred over masked Altivec loads. Note that if
4538 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4539 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4540 not true. */
4541 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4543 if (!TARGET_VSX)
4545 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4546 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4548 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4551 else if (!TARGET_ALLOW_MOVMISALIGN)
4553 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4554 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4555 "-mallow-movmisalign");
4557 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4561 /* Use long double size to select the appropriate long double. We use
4562 TYPE_PRECISION to differentiate the 3 different long double types. We map
4563 128 into the precision used for TFmode. */
4564 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4565 ? 64
4566 : FLOAT_PRECISION_TFmode);
4568 /* Set long double size before the IEEE 128-bit tests. */
4569 if (!global_options_set.x_rs6000_long_double_type_size)
4571 if (main_target_opt != NULL
4572 && (main_target_opt->x_rs6000_long_double_type_size
4573 != default_long_double_size))
4574 error ("target attribute or pragma changes long double size");
4575 else
4576 rs6000_long_double_type_size = default_long_double_size;
4578 else if (rs6000_long_double_type_size == 128)
4579 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4581 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4582 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4583 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4584 those systems will not pick up this default. Warn if the user changes the
4585 default unless -Wno-psabi. */
4586 if (!global_options_set.x_rs6000_ieeequad)
4587 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4589 else if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4591 static bool warned_change_long_double;
4592 if (!warned_change_long_double)
4594 warned_change_long_double = true;
4595 if (TARGET_IEEEQUAD)
4596 warning (OPT_Wpsabi, "Using IEEE extended precision long double");
4597 else
4598 warning (OPT_Wpsabi, "Using IBM extended precision long double");
4602 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4603 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4604 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4605 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4606 the keyword as well as the type. */
4607 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4609 /* IEEE 128-bit floating point requires VSX support. */
4610 if (TARGET_FLOAT128_KEYWORD)
4612 if (!TARGET_VSX)
4614 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4615 error ("%qs requires VSX support", "-mfloat128");
4617 TARGET_FLOAT128_TYPE = 0;
4618 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4619 | OPTION_MASK_FLOAT128_HW);
4621 else if (!TARGET_FLOAT128_TYPE)
4623 TARGET_FLOAT128_TYPE = 1;
4624 warning (0, "The -mfloat128 option may not be fully supported");
4628 /* Enable the __float128 keyword under Linux by default. */
4629 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4630 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4631 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4633 /* If we have are supporting the float128 type and full ISA 3.0 support,
4634 enable -mfloat128-hardware by default. However, don't enable the
4635 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4636 because sometimes the compiler wants to put things in an integer
4637 container, and if we don't have __int128 support, it is impossible. */
4638 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4639 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4640 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4641 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4643 if (TARGET_FLOAT128_HW
4644 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4646 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4647 error ("%qs requires full ISA 3.0 support", "-mfloat128-hardware");
4649 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4652 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4654 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4655 error ("%qs requires %qs", "-mfloat128-hardware", "-m64");
4657 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4660 /* Print the options after updating the defaults. */
4661 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4662 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4664 /* E500mc does "better" if we inline more aggressively. Respect the
4665 user's opinion, though. */
4666 if (rs6000_block_move_inline_limit == 0
4667 && (rs6000_tune == PROCESSOR_PPCE500MC
4668 || rs6000_tune == PROCESSOR_PPCE500MC64
4669 || rs6000_tune == PROCESSOR_PPCE5500
4670 || rs6000_tune == PROCESSOR_PPCE6500))
4671 rs6000_block_move_inline_limit = 128;
4673 /* store_one_arg depends on expand_block_move to handle at least the
4674 size of reg_parm_stack_space. */
4675 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4676 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4678 if (global_init_p)
4680 /* If the appropriate debug option is enabled, replace the target hooks
4681 with debug versions that call the real version and then prints
4682 debugging information. */
4683 if (TARGET_DEBUG_COST)
4685 targetm.rtx_costs = rs6000_debug_rtx_costs;
4686 targetm.address_cost = rs6000_debug_address_cost;
4687 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4690 if (TARGET_DEBUG_ADDR)
4692 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4693 targetm.legitimize_address = rs6000_debug_legitimize_address;
4694 rs6000_secondary_reload_class_ptr
4695 = rs6000_debug_secondary_reload_class;
4696 targetm.secondary_memory_needed
4697 = rs6000_debug_secondary_memory_needed;
4698 targetm.can_change_mode_class
4699 = rs6000_debug_can_change_mode_class;
4700 rs6000_preferred_reload_class_ptr
4701 = rs6000_debug_preferred_reload_class;
4702 rs6000_legitimize_reload_address_ptr
4703 = rs6000_debug_legitimize_reload_address;
4704 rs6000_mode_dependent_address_ptr
4705 = rs6000_debug_mode_dependent_address;
4708 if (rs6000_veclibabi_name)
4710 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4711 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4712 else
4714 error ("unknown vectorization library ABI type (%qs) for "
4715 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4716 ret = false;
4721 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4722 target attribute or pragma which automatically enables both options,
4723 unless the altivec ABI was set. This is set by default for 64-bit, but
4724 not for 32-bit. */
4725 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4727 TARGET_FLOAT128_TYPE = 0;
4728 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4729 | OPTION_MASK_FLOAT128_KEYWORD)
4730 & ~rs6000_isa_flags_explicit);
4733 /* Enable Altivec ABI for AIX -maltivec. */
4734 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4736 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4737 error ("target attribute or pragma changes AltiVec ABI");
4738 else
4739 rs6000_altivec_abi = 1;
4742 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4743 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4744 be explicitly overridden in either case. */
4745 if (TARGET_ELF)
4747 if (!global_options_set.x_rs6000_altivec_abi
4748 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4750 if (main_target_opt != NULL &&
4751 !main_target_opt->x_rs6000_altivec_abi)
4752 error ("target attribute or pragma changes AltiVec ABI");
4753 else
4754 rs6000_altivec_abi = 1;
4758 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4759 So far, the only darwin64 targets are also MACH-O. */
4760 if (TARGET_MACHO
4761 && DEFAULT_ABI == ABI_DARWIN
4762 && TARGET_64BIT)
4764 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4765 error ("target attribute or pragma changes darwin64 ABI");
4766 else
4768 rs6000_darwin64_abi = 1;
4769 /* Default to natural alignment, for better performance. */
4770 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4774 /* Place FP constants in the constant pool instead of TOC
4775 if section anchors enabled. */
4776 if (flag_section_anchors
4777 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4778 TARGET_NO_FP_IN_TOC = 1;
4780 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4781 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4783 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4784 SUBTARGET_OVERRIDE_OPTIONS;
4785 #endif
4786 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4787 SUBSUBTARGET_OVERRIDE_OPTIONS;
4788 #endif
4789 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4790 SUB3TARGET_OVERRIDE_OPTIONS;
4791 #endif
4793 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4794 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4796 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4797 && rs6000_tune != PROCESSOR_POWER5
4798 && rs6000_tune != PROCESSOR_POWER6
4799 && rs6000_tune != PROCESSOR_POWER7
4800 && rs6000_tune != PROCESSOR_POWER8
4801 && rs6000_tune != PROCESSOR_POWER9
4802 && rs6000_tune != PROCESSOR_PPCA2
4803 && rs6000_tune != PROCESSOR_CELL
4804 && rs6000_tune != PROCESSOR_PPC476);
4805 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4806 || rs6000_tune == PROCESSOR_POWER5
4807 || rs6000_tune == PROCESSOR_POWER7
4808 || rs6000_tune == PROCESSOR_POWER8);
4809 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4810 || rs6000_tune == PROCESSOR_POWER5
4811 || rs6000_tune == PROCESSOR_POWER6
4812 || rs6000_tune == PROCESSOR_POWER7
4813 || rs6000_tune == PROCESSOR_POWER8
4814 || rs6000_tune == PROCESSOR_POWER9
4815 || rs6000_tune == PROCESSOR_PPCE500MC
4816 || rs6000_tune == PROCESSOR_PPCE500MC64
4817 || rs6000_tune == PROCESSOR_PPCE5500
4818 || rs6000_tune == PROCESSOR_PPCE6500);
4820 /* Allow debug switches to override the above settings. These are set to -1
4821 in rs6000.opt to indicate the user hasn't directly set the switch. */
4822 if (TARGET_ALWAYS_HINT >= 0)
4823 rs6000_always_hint = TARGET_ALWAYS_HINT;
4825 if (TARGET_SCHED_GROUPS >= 0)
4826 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4828 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4829 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4831 rs6000_sched_restricted_insns_priority
4832 = (rs6000_sched_groups ? 1 : 0);
4834 /* Handle -msched-costly-dep option. */
4835 rs6000_sched_costly_dep
4836 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4838 if (rs6000_sched_costly_dep_str)
4840 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4841 rs6000_sched_costly_dep = no_dep_costly;
4842 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4843 rs6000_sched_costly_dep = all_deps_costly;
4844 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4845 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4846 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4847 rs6000_sched_costly_dep = store_to_load_dep_costly;
4848 else
4849 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4850 atoi (rs6000_sched_costly_dep_str));
4853 /* Handle -minsert-sched-nops option. */
4854 rs6000_sched_insert_nops
4855 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4857 if (rs6000_sched_insert_nops_str)
4859 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4860 rs6000_sched_insert_nops = sched_finish_none;
4861 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4862 rs6000_sched_insert_nops = sched_finish_pad_groups;
4863 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4864 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4865 else
4866 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4867 atoi (rs6000_sched_insert_nops_str));
4870 /* Handle stack protector */
4871 if (!global_options_set.x_rs6000_stack_protector_guard)
4872 #ifdef TARGET_THREAD_SSP_OFFSET
4873 rs6000_stack_protector_guard = SSP_TLS;
4874 #else
4875 rs6000_stack_protector_guard = SSP_GLOBAL;
4876 #endif
4878 #ifdef TARGET_THREAD_SSP_OFFSET
4879 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4880 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4881 #endif
4883 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4885 char *endp;
4886 const char *str = rs6000_stack_protector_guard_offset_str;
4888 errno = 0;
4889 long offset = strtol (str, &endp, 0);
4890 if (!*str || *endp || errno)
4891 error ("%qs is not a valid number in %qs", str,
4892 "-mstack-protector-guard-offset=");
4894 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4895 || (TARGET_64BIT && (offset & 3)))
4896 error ("%qs is not a valid offset in %qs", str,
4897 "-mstack-protector-guard-offset=");
4899 rs6000_stack_protector_guard_offset = offset;
4902 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4904 const char *str = rs6000_stack_protector_guard_reg_str;
4905 int reg = decode_reg_name (str);
4907 if (!IN_RANGE (reg, 1, 31))
4908 error ("%qs is not a valid base register in %qs", str,
4909 "-mstack-protector-guard-reg=");
4911 rs6000_stack_protector_guard_reg = reg;
4914 if (rs6000_stack_protector_guard == SSP_TLS
4915 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4916 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4918 if (global_init_p)
4920 #ifdef TARGET_REGNAMES
4921 /* If the user desires alternate register names, copy in the
4922 alternate names now. */
4923 if (TARGET_REGNAMES)
4924 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4925 #endif
4927 /* Set aix_struct_return last, after the ABI is determined.
4928 If -maix-struct-return or -msvr4-struct-return was explicitly
4929 used, don't override with the ABI default. */
4930 if (!global_options_set.x_aix_struct_return)
4931 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4933 #if 0
4934 /* IBM XL compiler defaults to unsigned bitfields. */
4935 if (TARGET_XL_COMPAT)
4936 flag_signed_bitfields = 0;
4937 #endif
4939 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4940 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4942 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4944 /* We can only guarantee the availability of DI pseudo-ops when
4945 assembling for 64-bit targets. */
4946 if (!TARGET_64BIT)
4948 targetm.asm_out.aligned_op.di = NULL;
4949 targetm.asm_out.unaligned_op.di = NULL;
4953 /* Set branch target alignment, if not optimizing for size. */
4954 if (!optimize_size)
4956 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4957 aligned 8byte to avoid misprediction by the branch predictor. */
4958 if (rs6000_tune == PROCESSOR_TITAN
4959 || rs6000_tune == PROCESSOR_CELL)
4961 if (align_functions <= 0)
4962 align_functions = 8;
4963 if (align_jumps <= 0)
4964 align_jumps = 8;
4965 if (align_loops <= 0)
4966 align_loops = 8;
4968 if (rs6000_align_branch_targets)
4970 if (align_functions <= 0)
4971 align_functions = 16;
4972 if (align_jumps <= 0)
4973 align_jumps = 16;
4974 if (align_loops <= 0)
4976 can_override_loop_align = 1;
4977 align_loops = 16;
4980 if (align_jumps_max_skip <= 0)
4981 align_jumps_max_skip = 15;
4982 if (align_loops_max_skip <= 0)
4983 align_loops_max_skip = 15;
4986 /* Arrange to save and restore machine status around nested functions. */
4987 init_machine_status = rs6000_init_machine_status;
4989 /* We should always be splitting complex arguments, but we can't break
4990 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4991 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4992 targetm.calls.split_complex_arg = NULL;
4994 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4995 if (DEFAULT_ABI == ABI_AIX)
4996 targetm.calls.custom_function_descriptors = 0;
4999 /* Initialize rs6000_cost with the appropriate target costs. */
5000 if (optimize_size)
5001 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5002 else
5003 switch (rs6000_tune)
5005 case PROCESSOR_RS64A:
5006 rs6000_cost = &rs64a_cost;
5007 break;
5009 case PROCESSOR_MPCCORE:
5010 rs6000_cost = &mpccore_cost;
5011 break;
5013 case PROCESSOR_PPC403:
5014 rs6000_cost = &ppc403_cost;
5015 break;
5017 case PROCESSOR_PPC405:
5018 rs6000_cost = &ppc405_cost;
5019 break;
5021 case PROCESSOR_PPC440:
5022 rs6000_cost = &ppc440_cost;
5023 break;
5025 case PROCESSOR_PPC476:
5026 rs6000_cost = &ppc476_cost;
5027 break;
5029 case PROCESSOR_PPC601:
5030 rs6000_cost = &ppc601_cost;
5031 break;
5033 case PROCESSOR_PPC603:
5034 rs6000_cost = &ppc603_cost;
5035 break;
5037 case PROCESSOR_PPC604:
5038 rs6000_cost = &ppc604_cost;
5039 break;
5041 case PROCESSOR_PPC604e:
5042 rs6000_cost = &ppc604e_cost;
5043 break;
5045 case PROCESSOR_PPC620:
5046 rs6000_cost = &ppc620_cost;
5047 break;
5049 case PROCESSOR_PPC630:
5050 rs6000_cost = &ppc630_cost;
5051 break;
5053 case PROCESSOR_CELL:
5054 rs6000_cost = &ppccell_cost;
5055 break;
5057 case PROCESSOR_PPC750:
5058 case PROCESSOR_PPC7400:
5059 rs6000_cost = &ppc750_cost;
5060 break;
5062 case PROCESSOR_PPC7450:
5063 rs6000_cost = &ppc7450_cost;
5064 break;
5066 case PROCESSOR_PPC8540:
5067 case PROCESSOR_PPC8548:
5068 rs6000_cost = &ppc8540_cost;
5069 break;
5071 case PROCESSOR_PPCE300C2:
5072 case PROCESSOR_PPCE300C3:
5073 rs6000_cost = &ppce300c2c3_cost;
5074 break;
5076 case PROCESSOR_PPCE500MC:
5077 rs6000_cost = &ppce500mc_cost;
5078 break;
5080 case PROCESSOR_PPCE500MC64:
5081 rs6000_cost = &ppce500mc64_cost;
5082 break;
5084 case PROCESSOR_PPCE5500:
5085 rs6000_cost = &ppce5500_cost;
5086 break;
5088 case PROCESSOR_PPCE6500:
5089 rs6000_cost = &ppce6500_cost;
5090 break;
5092 case PROCESSOR_TITAN:
5093 rs6000_cost = &titan_cost;
5094 break;
5096 case PROCESSOR_POWER4:
5097 case PROCESSOR_POWER5:
5098 rs6000_cost = &power4_cost;
5099 break;
5101 case PROCESSOR_POWER6:
5102 rs6000_cost = &power6_cost;
5103 break;
5105 case PROCESSOR_POWER7:
5106 rs6000_cost = &power7_cost;
5107 break;
5109 case PROCESSOR_POWER8:
5110 rs6000_cost = &power8_cost;
5111 break;
5113 case PROCESSOR_POWER9:
5114 rs6000_cost = &power9_cost;
5115 break;
5117 case PROCESSOR_PPCA2:
5118 rs6000_cost = &ppca2_cost;
5119 break;
5121 default:
5122 gcc_unreachable ();
5125 if (global_init_p)
5127 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5128 rs6000_cost->simultaneous_prefetches,
5129 global_options.x_param_values,
5130 global_options_set.x_param_values);
5131 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5132 global_options.x_param_values,
5133 global_options_set.x_param_values);
5134 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5135 rs6000_cost->cache_line_size,
5136 global_options.x_param_values,
5137 global_options_set.x_param_values);
5138 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5139 global_options.x_param_values,
5140 global_options_set.x_param_values);
5142 /* Increase loop peeling limits based on performance analysis. */
5143 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5144 global_options.x_param_values,
5145 global_options_set.x_param_values);
5146 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5147 global_options.x_param_values,
5148 global_options_set.x_param_values);
5150 /* Use the 'model' -fsched-pressure algorithm by default. */
5151 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5152 SCHED_PRESSURE_MODEL,
5153 global_options.x_param_values,
5154 global_options_set.x_param_values);
5156 /* If using typedef char *va_list, signal that
5157 __builtin_va_start (&ap, 0) can be optimized to
5158 ap = __builtin_next_arg (0). */
5159 if (DEFAULT_ABI != ABI_V4)
5160 targetm.expand_builtin_va_start = NULL;
5163 /* If not explicitly specified via option, decide whether to generate indexed
5164 load/store instructions. A value of -1 indicates that the
5165 initial value of this variable has not been overwritten. During
5166 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5167 if (TARGET_AVOID_XFORM == -1)
5168 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5169 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5170 need indexed accesses and the type used is the scalar type of the element
5171 being loaded or stored. */
5172 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
5173 && !TARGET_ALTIVEC);
5175 /* Set the -mrecip options. */
5176 if (rs6000_recip_name)
5178 char *p = ASTRDUP (rs6000_recip_name);
5179 char *q;
5180 unsigned int mask, i;
5181 bool invert;
5183 while ((q = strtok (p, ",")) != NULL)
5185 p = NULL;
5186 if (*q == '!')
5188 invert = true;
5189 q++;
5191 else
5192 invert = false;
5194 if (!strcmp (q, "default"))
5195 mask = ((TARGET_RECIP_PRECISION)
5196 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5197 else
5199 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5200 if (!strcmp (q, recip_options[i].string))
5202 mask = recip_options[i].mask;
5203 break;
5206 if (i == ARRAY_SIZE (recip_options))
5208 error ("unknown option for %<%s=%s%>", "-mrecip", q);
5209 invert = false;
5210 mask = 0;
5211 ret = false;
5215 if (invert)
5216 rs6000_recip_control &= ~mask;
5217 else
5218 rs6000_recip_control |= mask;
5222 /* Set the builtin mask of the various options used that could affect which
5223 builtins were used. In the past we used target_flags, but we've run out
5224 of bits, and some options are no longer in target_flags. */
5225 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5226 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5227 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5228 rs6000_builtin_mask);
5230 /* Initialize all of the registers. */
5231 rs6000_init_hard_regno_mode_ok (global_init_p);
5233 /* Save the initial options in case the user does function specific options */
5234 if (global_init_p)
5235 target_option_default_node = target_option_current_node
5236 = build_target_option_node (&global_options);
5238 /* If not explicitly specified via option, decide whether to generate the
5239 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5240 if (TARGET_LINK_STACK == -1)
5241 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
5243 /* Deprecate use of -mno-speculate-indirect-jumps. */
5244 if (!rs6000_speculate_indirect_jumps)
5245 warning (0, "%qs is deprecated and not recommended in any circumstances",
5246 "-mno-speculate-indirect-jumps");
5248 return ret;
5251 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5252 define the target cpu type. */
5254 static void
5255 rs6000_option_override (void)
5257 (void) rs6000_option_override_internal (true);
5261 /* Implement targetm.vectorize.builtin_mask_for_load. */
5262 static tree
5263 rs6000_builtin_mask_for_load (void)
5265 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5266 if ((TARGET_ALTIVEC && !TARGET_VSX)
5267 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5268 return altivec_builtin_mask_for_load;
5269 else
5270 return 0;
5273 /* Implement LOOP_ALIGN. */
5275 rs6000_loop_align (rtx label)
5277 basic_block bb;
5278 int ninsns;
5280 /* Don't override loop alignment if -falign-loops was specified. */
5281 if (!can_override_loop_align)
5282 return align_loops_log;
5284 bb = BLOCK_FOR_INSN (label);
5285 ninsns = num_loop_insns(bb->loop_father);
5287 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5288 if (ninsns > 4 && ninsns <= 8
5289 && (rs6000_tune == PROCESSOR_POWER4
5290 || rs6000_tune == PROCESSOR_POWER5
5291 || rs6000_tune == PROCESSOR_POWER6
5292 || rs6000_tune == PROCESSOR_POWER7
5293 || rs6000_tune == PROCESSOR_POWER8))
5294 return 5;
5295 else
5296 return align_loops_log;
5299 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5300 static int
5301 rs6000_loop_align_max_skip (rtx_insn *label)
5303 return (1 << rs6000_loop_align (label)) - 1;
5306 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5307 after applying N number of iterations. This routine does not determine
5308 how may iterations are required to reach desired alignment. */
5310 static bool
5311 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5313 if (is_packed)
5314 return false;
5316 if (TARGET_32BIT)
5318 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5319 return true;
5321 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5322 return true;
5324 return false;
5326 else
5328 if (TARGET_MACHO)
5329 return false;
5331 /* Assuming that all other types are naturally aligned. CHECKME! */
5332 return true;
5336 /* Return true if the vector misalignment factor is supported by the
5337 target. */
5338 static bool
5339 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5340 const_tree type,
5341 int misalignment,
5342 bool is_packed)
5344 if (TARGET_VSX)
5346 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5347 return true;
5349 /* Return if movmisalign pattern is not supported for this mode. */
5350 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5351 return false;
5353 if (misalignment == -1)
5355 /* Misalignment factor is unknown at compile time but we know
5356 it's word aligned. */
5357 if (rs6000_vector_alignment_reachable (type, is_packed))
5359 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5361 if (element_size == 64 || element_size == 32)
5362 return true;
5365 return false;
5368 /* VSX supports word-aligned vector. */
5369 if (misalignment % 4 == 0)
5370 return true;
5372 return false;
5375 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5376 static int
5377 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5378 tree vectype, int misalign)
5380 unsigned elements;
5381 tree elem_type;
5383 switch (type_of_cost)
5385 case scalar_stmt:
5386 case scalar_load:
5387 case scalar_store:
5388 case vector_stmt:
5389 case vector_load:
5390 case vector_store:
5391 case vec_to_scalar:
5392 case scalar_to_vec:
5393 case cond_branch_not_taken:
5394 return 1;
5396 case vec_perm:
5397 if (TARGET_VSX)
5398 return 3;
5399 else
5400 return 1;
5402 case vec_promote_demote:
5403 if (TARGET_VSX)
5404 return 4;
5405 else
5406 return 1;
5408 case cond_branch_taken:
5409 return 3;
5411 case unaligned_load:
5412 case vector_gather_load:
5413 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5414 return 1;
5416 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5418 elements = TYPE_VECTOR_SUBPARTS (vectype);
5419 if (elements == 2)
5420 /* Double word aligned. */
5421 return 2;
5423 if (elements == 4)
5425 switch (misalign)
5427 case 8:
5428 /* Double word aligned. */
5429 return 2;
5431 case -1:
5432 /* Unknown misalignment. */
5433 case 4:
5434 case 12:
5435 /* Word aligned. */
5436 return 22;
5438 default:
5439 gcc_unreachable ();
5444 if (TARGET_ALTIVEC)
5445 /* Misaligned loads are not supported. */
5446 gcc_unreachable ();
5448 return 2;
5450 case unaligned_store:
5451 case vector_scatter_store:
5452 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5453 return 1;
5455 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5457 elements = TYPE_VECTOR_SUBPARTS (vectype);
5458 if (elements == 2)
5459 /* Double word aligned. */
5460 return 2;
5462 if (elements == 4)
5464 switch (misalign)
5466 case 8:
5467 /* Double word aligned. */
5468 return 2;
5470 case -1:
5471 /* Unknown misalignment. */
5472 case 4:
5473 case 12:
5474 /* Word aligned. */
5475 return 23;
5477 default:
5478 gcc_unreachable ();
5483 if (TARGET_ALTIVEC)
5484 /* Misaligned stores are not supported. */
5485 gcc_unreachable ();
5487 return 2;
5489 case vec_construct:
5490 /* This is a rough approximation assuming non-constant elements
5491 constructed into a vector via element insertion. FIXME:
5492 vec_construct is not granular enough for uniformly good
5493 decisions. If the initialization is a splat, this is
5494 cheaper than we estimate. Improve this someday. */
5495 elem_type = TREE_TYPE (vectype);
5496 /* 32-bit vectors loaded into registers are stored as double
5497 precision, so we need 2 permutes, 2 converts, and 1 merge
5498 to construct a vector of short floats from them. */
5499 if (SCALAR_FLOAT_TYPE_P (elem_type)
5500 && TYPE_PRECISION (elem_type) == 32)
5501 return 5;
5502 /* On POWER9, integer vector types are built up in GPRs and then
5503 use a direct move (2 cycles). For POWER8 this is even worse,
5504 as we need two direct moves and a merge, and the direct moves
5505 are five cycles. */
5506 else if (INTEGRAL_TYPE_P (elem_type))
5508 if (TARGET_P9_VECTOR)
5509 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5510 else
5511 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5513 else
5514 /* V2DFmode doesn't need a direct move. */
5515 return 2;
5517 default:
5518 gcc_unreachable ();
5522 /* Implement targetm.vectorize.preferred_simd_mode. */
5524 static machine_mode
5525 rs6000_preferred_simd_mode (scalar_mode mode)
5527 if (TARGET_VSX)
5528 switch (mode)
5530 case E_DFmode:
5531 return V2DFmode;
5532 default:;
5534 if (TARGET_ALTIVEC || TARGET_VSX)
5535 switch (mode)
5537 case E_SFmode:
5538 return V4SFmode;
5539 case E_TImode:
5540 return V1TImode;
5541 case E_DImode:
5542 return V2DImode;
5543 case E_SImode:
5544 return V4SImode;
5545 case E_HImode:
5546 return V8HImode;
5547 case E_QImode:
5548 return V16QImode;
5549 default:;
5551 return word_mode;
5554 typedef struct _rs6000_cost_data
5556 struct loop *loop_info;
5557 unsigned cost[3];
5558 } rs6000_cost_data;
5560 /* Test for likely overcommitment of vector hardware resources. If a
5561 loop iteration is relatively large, and too large a percentage of
5562 instructions in the loop are vectorized, the cost model may not
5563 adequately reflect delays from unavailable vector resources.
5564 Penalize the loop body cost for this case. */
5566 static void
5567 rs6000_density_test (rs6000_cost_data *data)
5569 const int DENSITY_PCT_THRESHOLD = 85;
5570 const int DENSITY_SIZE_THRESHOLD = 70;
5571 const int DENSITY_PENALTY = 10;
5572 struct loop *loop = data->loop_info;
5573 basic_block *bbs = get_loop_body (loop);
5574 int nbbs = loop->num_nodes;
5575 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5576 int i, density_pct;
5578 for (i = 0; i < nbbs; i++)
5580 basic_block bb = bbs[i];
5581 gimple_stmt_iterator gsi;
5583 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5585 gimple *stmt = gsi_stmt (gsi);
5586 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5588 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5589 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5590 not_vec_cost++;
5594 free (bbs);
5595 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5597 if (density_pct > DENSITY_PCT_THRESHOLD
5598 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5600 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5601 if (dump_enabled_p ())
5602 dump_printf_loc (MSG_NOTE, vect_location,
5603 "density %d%%, cost %d exceeds threshold, penalizing "
5604 "loop body cost by %d%%", density_pct,
5605 vec_cost + not_vec_cost, DENSITY_PENALTY);
5609 /* Implement targetm.vectorize.init_cost. */
5611 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5612 instruction is needed by the vectorization. */
5613 static bool rs6000_vect_nonmem;
5615 static void *
5616 rs6000_init_cost (struct loop *loop_info)
5618 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5619 data->loop_info = loop_info;
5620 data->cost[vect_prologue] = 0;
5621 data->cost[vect_body] = 0;
5622 data->cost[vect_epilogue] = 0;
5623 rs6000_vect_nonmem = false;
5624 return data;
5627 /* Implement targetm.vectorize.add_stmt_cost. */
5629 static unsigned
5630 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5631 struct _stmt_vec_info *stmt_info, int misalign,
5632 enum vect_cost_model_location where)
5634 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5635 unsigned retval = 0;
5637 if (flag_vect_cost_model)
5639 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5640 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5641 misalign);
5642 /* Statements in an inner loop relative to the loop being
5643 vectorized are weighted more heavily. The value here is
5644 arbitrary and could potentially be improved with analysis. */
5645 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5646 count *= 50; /* FIXME. */
5648 retval = (unsigned) (count * stmt_cost);
5649 cost_data->cost[where] += retval;
5651 /* Check whether we're doing something other than just a copy loop.
5652 Not all such loops may be profitably vectorized; see
5653 rs6000_finish_cost. */
5654 if ((kind == vec_to_scalar || kind == vec_perm
5655 || kind == vec_promote_demote || kind == vec_construct
5656 || kind == scalar_to_vec)
5657 || (where == vect_body && kind == vector_stmt))
5658 rs6000_vect_nonmem = true;
5661 return retval;
5664 /* Implement targetm.vectorize.finish_cost. */
5666 static void
5667 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5668 unsigned *body_cost, unsigned *epilogue_cost)
5670 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5672 if (cost_data->loop_info)
5673 rs6000_density_test (cost_data);
5675 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5676 that require versioning for any reason. The vectorization is at
5677 best a wash inside the loop, and the versioning checks make
5678 profitability highly unlikely and potentially quite harmful. */
5679 if (cost_data->loop_info)
5681 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5682 if (!rs6000_vect_nonmem
5683 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5684 && LOOP_REQUIRES_VERSIONING (vec_info))
5685 cost_data->cost[vect_body] += 10000;
5688 *prologue_cost = cost_data->cost[vect_prologue];
5689 *body_cost = cost_data->cost[vect_body];
5690 *epilogue_cost = cost_data->cost[vect_epilogue];
5693 /* Implement targetm.vectorize.destroy_cost_data. */
5695 static void
5696 rs6000_destroy_cost_data (void *data)
5698 free (data);
5701 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5702 library with vectorized intrinsics. */
5704 static tree
5705 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5706 tree type_in)
5708 char name[32];
5709 const char *suffix = NULL;
5710 tree fntype, new_fndecl, bdecl = NULL_TREE;
5711 int n_args = 1;
5712 const char *bname;
5713 machine_mode el_mode, in_mode;
5714 int n, in_n;
5716 /* Libmass is suitable for unsafe math only as it does not correctly support
5717 parts of IEEE with the required precision such as denormals. Only support
5718 it if we have VSX to use the simd d2 or f4 functions.
5719 XXX: Add variable length support. */
5720 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5721 return NULL_TREE;
5723 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5724 n = TYPE_VECTOR_SUBPARTS (type_out);
5725 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5726 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5727 if (el_mode != in_mode
5728 || n != in_n)
5729 return NULL_TREE;
5731 switch (fn)
5733 CASE_CFN_ATAN2:
5734 CASE_CFN_HYPOT:
5735 CASE_CFN_POW:
5736 n_args = 2;
5737 gcc_fallthrough ();
5739 CASE_CFN_ACOS:
5740 CASE_CFN_ACOSH:
5741 CASE_CFN_ASIN:
5742 CASE_CFN_ASINH:
5743 CASE_CFN_ATAN:
5744 CASE_CFN_ATANH:
5745 CASE_CFN_CBRT:
5746 CASE_CFN_COS:
5747 CASE_CFN_COSH:
5748 CASE_CFN_ERF:
5749 CASE_CFN_ERFC:
5750 CASE_CFN_EXP2:
5751 CASE_CFN_EXP:
5752 CASE_CFN_EXPM1:
5753 CASE_CFN_LGAMMA:
5754 CASE_CFN_LOG10:
5755 CASE_CFN_LOG1P:
5756 CASE_CFN_LOG2:
5757 CASE_CFN_LOG:
5758 CASE_CFN_SIN:
5759 CASE_CFN_SINH:
5760 CASE_CFN_SQRT:
5761 CASE_CFN_TAN:
5762 CASE_CFN_TANH:
5763 if (el_mode == DFmode && n == 2)
5765 bdecl = mathfn_built_in (double_type_node, fn);
5766 suffix = "d2"; /* pow -> powd2 */
5768 else if (el_mode == SFmode && n == 4)
5770 bdecl = mathfn_built_in (float_type_node, fn);
5771 suffix = "4"; /* powf -> powf4 */
5773 else
5774 return NULL_TREE;
5775 if (!bdecl)
5776 return NULL_TREE;
5777 break;
5779 default:
5780 return NULL_TREE;
5783 gcc_assert (suffix != NULL);
5784 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5785 if (!bname)
5786 return NULL_TREE;
5788 strcpy (name, bname + sizeof ("__builtin_") - 1);
5789 strcat (name, suffix);
5791 if (n_args == 1)
5792 fntype = build_function_type_list (type_out, type_in, NULL);
5793 else if (n_args == 2)
5794 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5795 else
5796 gcc_unreachable ();
5798 /* Build a function declaration for the vectorized function. */
5799 new_fndecl = build_decl (BUILTINS_LOCATION,
5800 FUNCTION_DECL, get_identifier (name), fntype);
5801 TREE_PUBLIC (new_fndecl) = 1;
5802 DECL_EXTERNAL (new_fndecl) = 1;
5803 DECL_IS_NOVOPS (new_fndecl) = 1;
5804 TREE_READONLY (new_fndecl) = 1;
5806 return new_fndecl;
5809 /* Returns a function decl for a vectorized version of the builtin function
5810 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5811 if it is not available. */
5813 static tree
5814 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5815 tree type_in)
5817 machine_mode in_mode, out_mode;
5818 int in_n, out_n;
5820 if (TARGET_DEBUG_BUILTIN)
5821 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5822 combined_fn_name (combined_fn (fn)),
5823 GET_MODE_NAME (TYPE_MODE (type_out)),
5824 GET_MODE_NAME (TYPE_MODE (type_in)));
5826 if (TREE_CODE (type_out) != VECTOR_TYPE
5827 || TREE_CODE (type_in) != VECTOR_TYPE)
5828 return NULL_TREE;
5830 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5831 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5832 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5833 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5835 switch (fn)
5837 CASE_CFN_COPYSIGN:
5838 if (VECTOR_UNIT_VSX_P (V2DFmode)
5839 && out_mode == DFmode && out_n == 2
5840 && in_mode == DFmode && in_n == 2)
5841 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5842 if (VECTOR_UNIT_VSX_P (V4SFmode)
5843 && out_mode == SFmode && out_n == 4
5844 && in_mode == SFmode && in_n == 4)
5845 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5846 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5847 && out_mode == SFmode && out_n == 4
5848 && in_mode == SFmode && in_n == 4)
5849 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5850 break;
5851 CASE_CFN_CEIL:
5852 if (VECTOR_UNIT_VSX_P (V2DFmode)
5853 && out_mode == DFmode && out_n == 2
5854 && in_mode == DFmode && in_n == 2)
5855 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5856 if (VECTOR_UNIT_VSX_P (V4SFmode)
5857 && out_mode == SFmode && out_n == 4
5858 && in_mode == SFmode && in_n == 4)
5859 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5860 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5861 && out_mode == SFmode && out_n == 4
5862 && in_mode == SFmode && in_n == 4)
5863 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5864 break;
5865 CASE_CFN_FLOOR:
5866 if (VECTOR_UNIT_VSX_P (V2DFmode)
5867 && out_mode == DFmode && out_n == 2
5868 && in_mode == DFmode && in_n == 2)
5869 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5870 if (VECTOR_UNIT_VSX_P (V4SFmode)
5871 && out_mode == SFmode && out_n == 4
5872 && in_mode == SFmode && in_n == 4)
5873 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5874 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5875 && out_mode == SFmode && out_n == 4
5876 && in_mode == SFmode && in_n == 4)
5877 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5878 break;
5879 CASE_CFN_FMA:
5880 if (VECTOR_UNIT_VSX_P (V2DFmode)
5881 && out_mode == DFmode && out_n == 2
5882 && in_mode == DFmode && in_n == 2)
5883 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5884 if (VECTOR_UNIT_VSX_P (V4SFmode)
5885 && out_mode == SFmode && out_n == 4
5886 && in_mode == SFmode && in_n == 4)
5887 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5888 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5889 && out_mode == SFmode && out_n == 4
5890 && in_mode == SFmode && in_n == 4)
5891 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5892 break;
5893 CASE_CFN_TRUNC:
5894 if (VECTOR_UNIT_VSX_P (V2DFmode)
5895 && out_mode == DFmode && out_n == 2
5896 && in_mode == DFmode && in_n == 2)
5897 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5898 if (VECTOR_UNIT_VSX_P (V4SFmode)
5899 && out_mode == SFmode && out_n == 4
5900 && in_mode == SFmode && in_n == 4)
5901 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5902 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5903 && out_mode == SFmode && out_n == 4
5904 && in_mode == SFmode && in_n == 4)
5905 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5906 break;
5907 CASE_CFN_NEARBYINT:
5908 if (VECTOR_UNIT_VSX_P (V2DFmode)
5909 && flag_unsafe_math_optimizations
5910 && out_mode == DFmode && out_n == 2
5911 && in_mode == DFmode && in_n == 2)
5912 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5913 if (VECTOR_UNIT_VSX_P (V4SFmode)
5914 && flag_unsafe_math_optimizations
5915 && out_mode == SFmode && out_n == 4
5916 && in_mode == SFmode && in_n == 4)
5917 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5918 break;
5919 CASE_CFN_RINT:
5920 if (VECTOR_UNIT_VSX_P (V2DFmode)
5921 && !flag_trapping_math
5922 && out_mode == DFmode && out_n == 2
5923 && in_mode == DFmode && in_n == 2)
5924 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5925 if (VECTOR_UNIT_VSX_P (V4SFmode)
5926 && !flag_trapping_math
5927 && out_mode == SFmode && out_n == 4
5928 && in_mode == SFmode && in_n == 4)
5929 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5930 break;
5931 default:
5932 break;
5935 /* Generate calls to libmass if appropriate. */
5936 if (rs6000_veclib_handler)
5937 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5939 return NULL_TREE;
5942 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5944 static tree
5945 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5946 tree type_in)
5948 machine_mode in_mode, out_mode;
5949 int in_n, out_n;
5951 if (TARGET_DEBUG_BUILTIN)
5952 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
5953 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
5954 GET_MODE_NAME (TYPE_MODE (type_out)),
5955 GET_MODE_NAME (TYPE_MODE (type_in)));
5957 if (TREE_CODE (type_out) != VECTOR_TYPE
5958 || TREE_CODE (type_in) != VECTOR_TYPE)
5959 return NULL_TREE;
5961 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5962 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5963 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5964 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5966 enum rs6000_builtins fn
5967 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
5968 switch (fn)
5970 case RS6000_BUILTIN_RSQRTF:
5971 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5972 && out_mode == SFmode && out_n == 4
5973 && in_mode == SFmode && in_n == 4)
5974 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5975 break;
5976 case RS6000_BUILTIN_RSQRT:
5977 if (VECTOR_UNIT_VSX_P (V2DFmode)
5978 && out_mode == DFmode && out_n == 2
5979 && in_mode == DFmode && in_n == 2)
5980 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5981 break;
5982 case RS6000_BUILTIN_RECIPF:
5983 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5984 && out_mode == SFmode && out_n == 4
5985 && in_mode == SFmode && in_n == 4)
5986 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5987 break;
5988 case RS6000_BUILTIN_RECIP:
5989 if (VECTOR_UNIT_VSX_P (V2DFmode)
5990 && out_mode == DFmode && out_n == 2
5991 && in_mode == DFmode && in_n == 2)
5992 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5993 break;
5994 default:
5995 break;
5997 return NULL_TREE;
6000 /* Default CPU string for rs6000*_file_start functions. */
6001 static const char *rs6000_default_cpu;
6003 /* Do anything needed at the start of the asm file. */
6005 static void
6006 rs6000_file_start (void)
6008 char buffer[80];
6009 const char *start = buffer;
6010 FILE *file = asm_out_file;
6012 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6014 default_file_start ();
6016 if (flag_verbose_asm)
6018 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6020 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6022 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6023 start = "";
6026 if (global_options_set.x_rs6000_cpu_index)
6028 fprintf (file, "%s -mcpu=%s", start,
6029 processor_target_table[rs6000_cpu_index].name);
6030 start = "";
6033 if (global_options_set.x_rs6000_tune_index)
6035 fprintf (file, "%s -mtune=%s", start,
6036 processor_target_table[rs6000_tune_index].name);
6037 start = "";
6040 if (PPC405_ERRATUM77)
6042 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6043 start = "";
6046 #ifdef USING_ELFOS_H
6047 switch (rs6000_sdata)
6049 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6050 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6051 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6052 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6055 if (rs6000_sdata && g_switch_value)
6057 fprintf (file, "%s -G %d", start,
6058 g_switch_value);
6059 start = "";
6061 #endif
6063 if (*start == '\0')
6064 putc ('\n', file);
6067 #ifdef USING_ELFOS_H
6068 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6069 && !global_options_set.x_rs6000_cpu_index)
6071 fputs ("\t.machine ", asm_out_file);
6072 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6073 fputs ("power9\n", asm_out_file);
6074 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6075 fputs ("power8\n", asm_out_file);
6076 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6077 fputs ("power7\n", asm_out_file);
6078 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6079 fputs ("power6\n", asm_out_file);
6080 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6081 fputs ("power5\n", asm_out_file);
6082 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6083 fputs ("power4\n", asm_out_file);
6084 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6085 fputs ("ppc64\n", asm_out_file);
6086 else
6087 fputs ("ppc\n", asm_out_file);
6089 #endif
6091 if (DEFAULT_ABI == ABI_ELFv2)
6092 fprintf (file, "\t.abiversion 2\n");
6096 /* Return nonzero if this function is known to have a null epilogue. */
6099 direct_return (void)
6101 if (reload_completed)
6103 rs6000_stack_t *info = rs6000_stack_info ();
6105 if (info->first_gp_reg_save == 32
6106 && info->first_fp_reg_save == 64
6107 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6108 && ! info->lr_save_p
6109 && ! info->cr_save_p
6110 && info->vrsave_size == 0
6111 && ! info->push_p)
6112 return 1;
6115 return 0;
6118 /* Return the number of instructions it takes to form a constant in an
6119 integer register. */
6122 num_insns_constant_wide (HOST_WIDE_INT value)
6124 /* signed constant loadable with addi */
6125 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6126 return 1;
6128 /* constant loadable with addis */
6129 else if ((value & 0xffff) == 0
6130 && (value >> 31 == -1 || value >> 31 == 0))
6131 return 1;
6133 else if (TARGET_POWERPC64)
6135 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6136 HOST_WIDE_INT high = value >> 31;
6138 if (high == 0 || high == -1)
6139 return 2;
6141 high >>= 1;
6143 if (low == 0)
6144 return num_insns_constant_wide (high) + 1;
6145 else if (high == 0)
6146 return num_insns_constant_wide (low) + 1;
6147 else
6148 return (num_insns_constant_wide (high)
6149 + num_insns_constant_wide (low) + 1);
6152 else
6153 return 2;
6157 num_insns_constant (rtx op, machine_mode mode)
6159 HOST_WIDE_INT low, high;
6161 switch (GET_CODE (op))
6163 case CONST_INT:
6164 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6165 && rs6000_is_valid_and_mask (op, mode))
6166 return 2;
6167 else
6168 return num_insns_constant_wide (INTVAL (op));
6170 case CONST_WIDE_INT:
6172 int i;
6173 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6174 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6175 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6176 return ins;
6179 case CONST_DOUBLE:
6180 if (mode == SFmode || mode == SDmode)
6182 long l;
6184 if (DECIMAL_FLOAT_MODE_P (mode))
6185 REAL_VALUE_TO_TARGET_DECIMAL32
6186 (*CONST_DOUBLE_REAL_VALUE (op), l);
6187 else
6188 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6189 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6192 long l[2];
6193 if (DECIMAL_FLOAT_MODE_P (mode))
6194 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6195 else
6196 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6197 high = l[WORDS_BIG_ENDIAN == 0];
6198 low = l[WORDS_BIG_ENDIAN != 0];
6200 if (TARGET_32BIT)
6201 return (num_insns_constant_wide (low)
6202 + num_insns_constant_wide (high));
6203 else
6205 if ((high == 0 && low >= 0)
6206 || (high == -1 && low < 0))
6207 return num_insns_constant_wide (low);
6209 else if (rs6000_is_valid_and_mask (op, mode))
6210 return 2;
6212 else if (low == 0)
6213 return num_insns_constant_wide (high) + 1;
6215 else
6216 return (num_insns_constant_wide (high)
6217 + num_insns_constant_wide (low) + 1);
6220 default:
6221 gcc_unreachable ();
6225 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6226 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6227 corresponding element of the vector, but for V4SFmode, the
6228 corresponding "float" is interpreted as an SImode integer. */
6230 HOST_WIDE_INT
6231 const_vector_elt_as_int (rtx op, unsigned int elt)
6233 rtx tmp;
6235 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6236 gcc_assert (GET_MODE (op) != V2DImode
6237 && GET_MODE (op) != V2DFmode);
6239 tmp = CONST_VECTOR_ELT (op, elt);
6240 if (GET_MODE (op) == V4SFmode)
6241 tmp = gen_lowpart (SImode, tmp);
6242 return INTVAL (tmp);
6245 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6246 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6247 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6248 all items are set to the same value and contain COPIES replicas of the
6249 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6250 operand and the others are set to the value of the operand's msb. */
6252 static bool
6253 vspltis_constant (rtx op, unsigned step, unsigned copies)
6255 machine_mode mode = GET_MODE (op);
6256 machine_mode inner = GET_MODE_INNER (mode);
6258 unsigned i;
6259 unsigned nunits;
6260 unsigned bitsize;
6261 unsigned mask;
6263 HOST_WIDE_INT val;
6264 HOST_WIDE_INT splat_val;
6265 HOST_WIDE_INT msb_val;
6267 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6268 return false;
6270 nunits = GET_MODE_NUNITS (mode);
6271 bitsize = GET_MODE_BITSIZE (inner);
6272 mask = GET_MODE_MASK (inner);
6274 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6275 splat_val = val;
6276 msb_val = val >= 0 ? 0 : -1;
6278 /* Construct the value to be splatted, if possible. If not, return 0. */
6279 for (i = 2; i <= copies; i *= 2)
6281 HOST_WIDE_INT small_val;
6282 bitsize /= 2;
6283 small_val = splat_val >> bitsize;
6284 mask >>= bitsize;
6285 if (splat_val != ((HOST_WIDE_INT)
6286 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6287 | (small_val & mask)))
6288 return false;
6289 splat_val = small_val;
6292 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6293 if (EASY_VECTOR_15 (splat_val))
6296 /* Also check if we can splat, and then add the result to itself. Do so if
6297 the value is positive, of if the splat instruction is using OP's mode;
6298 for splat_val < 0, the splat and the add should use the same mode. */
6299 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6300 && (splat_val >= 0 || (step == 1 && copies == 1)))
6303 /* Also check if are loading up the most significant bit which can be done by
6304 loading up -1 and shifting the value left by -1. */
6305 else if (EASY_VECTOR_MSB (splat_val, inner))
6308 else
6309 return false;
6311 /* Check if VAL is present in every STEP-th element, and the
6312 other elements are filled with its most significant bit. */
6313 for (i = 1; i < nunits; ++i)
6315 HOST_WIDE_INT desired_val;
6316 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6317 if ((i & (step - 1)) == 0)
6318 desired_val = val;
6319 else
6320 desired_val = msb_val;
6322 if (desired_val != const_vector_elt_as_int (op, elt))
6323 return false;
6326 return true;
6329 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6330 instruction, filling in the bottom elements with 0 or -1.
6332 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6333 for the number of zeroes to shift in, or negative for the number of 0xff
6334 bytes to shift in.
6336 OP is a CONST_VECTOR. */
6339 vspltis_shifted (rtx op)
6341 machine_mode mode = GET_MODE (op);
6342 machine_mode inner = GET_MODE_INNER (mode);
6344 unsigned i, j;
6345 unsigned nunits;
6346 unsigned mask;
6348 HOST_WIDE_INT val;
6350 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6351 return false;
6353 /* We need to create pseudo registers to do the shift, so don't recognize
6354 shift vector constants after reload. */
6355 if (!can_create_pseudo_p ())
6356 return false;
6358 nunits = GET_MODE_NUNITS (mode);
6359 mask = GET_MODE_MASK (inner);
6361 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6363 /* Check if the value can really be the operand of a vspltis[bhw]. */
6364 if (EASY_VECTOR_15 (val))
6367 /* Also check if we are loading up the most significant bit which can be done
6368 by loading up -1 and shifting the value left by -1. */
6369 else if (EASY_VECTOR_MSB (val, inner))
6372 else
6373 return 0;
6375 /* Check if VAL is present in every STEP-th element until we find elements
6376 that are 0 or all 1 bits. */
6377 for (i = 1; i < nunits; ++i)
6379 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6380 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6382 /* If the value isn't the splat value, check for the remaining elements
6383 being 0/-1. */
6384 if (val != elt_val)
6386 if (elt_val == 0)
6388 for (j = i+1; j < nunits; ++j)
6390 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6391 if (const_vector_elt_as_int (op, elt2) != 0)
6392 return 0;
6395 return (nunits - i) * GET_MODE_SIZE (inner);
6398 else if ((elt_val & mask) == mask)
6400 for (j = i+1; j < nunits; ++j)
6402 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6403 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6404 return 0;
6407 return -((nunits - i) * GET_MODE_SIZE (inner));
6410 else
6411 return 0;
6415 /* If all elements are equal, we don't need to do VLSDOI. */
6416 return 0;
6420 /* Return true if OP is of the given MODE and can be synthesized
6421 with a vspltisb, vspltish or vspltisw. */
6423 bool
6424 easy_altivec_constant (rtx op, machine_mode mode)
6426 unsigned step, copies;
6428 if (mode == VOIDmode)
6429 mode = GET_MODE (op);
6430 else if (mode != GET_MODE (op))
6431 return false;
6433 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6434 constants. */
6435 if (mode == V2DFmode)
6436 return zero_constant (op, mode);
6438 else if (mode == V2DImode)
6440 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6441 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6442 return false;
6444 if (zero_constant (op, mode))
6445 return true;
6447 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6448 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6449 return true;
6451 return false;
6454 /* V1TImode is a special container for TImode. Ignore for now. */
6455 else if (mode == V1TImode)
6456 return false;
6458 /* Start with a vspltisw. */
6459 step = GET_MODE_NUNITS (mode) / 4;
6460 copies = 1;
6462 if (vspltis_constant (op, step, copies))
6463 return true;
6465 /* Then try with a vspltish. */
6466 if (step == 1)
6467 copies <<= 1;
6468 else
6469 step >>= 1;
6471 if (vspltis_constant (op, step, copies))
6472 return true;
6474 /* And finally a vspltisb. */
6475 if (step == 1)
6476 copies <<= 1;
6477 else
6478 step >>= 1;
6480 if (vspltis_constant (op, step, copies))
6481 return true;
6483 if (vspltis_shifted (op) != 0)
6484 return true;
6486 return false;
6489 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6490 result is OP. Abort if it is not possible. */
6493 gen_easy_altivec_constant (rtx op)
6495 machine_mode mode = GET_MODE (op);
6496 int nunits = GET_MODE_NUNITS (mode);
6497 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6498 unsigned step = nunits / 4;
6499 unsigned copies = 1;
6501 /* Start with a vspltisw. */
6502 if (vspltis_constant (op, step, copies))
6503 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6505 /* Then try with a vspltish. */
6506 if (step == 1)
6507 copies <<= 1;
6508 else
6509 step >>= 1;
6511 if (vspltis_constant (op, step, copies))
6512 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6514 /* And finally a vspltisb. */
6515 if (step == 1)
6516 copies <<= 1;
6517 else
6518 step >>= 1;
6520 if (vspltis_constant (op, step, copies))
6521 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6523 gcc_unreachable ();
6526 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6527 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6529 Return the number of instructions needed (1 or 2) into the address pointed
6530 via NUM_INSNS_PTR.
6532 Return the constant that is being split via CONSTANT_PTR. */
6534 bool
6535 xxspltib_constant_p (rtx op,
6536 machine_mode mode,
6537 int *num_insns_ptr,
6538 int *constant_ptr)
6540 size_t nunits = GET_MODE_NUNITS (mode);
6541 size_t i;
6542 HOST_WIDE_INT value;
6543 rtx element;
6545 /* Set the returned values to out of bound values. */
6546 *num_insns_ptr = -1;
6547 *constant_ptr = 256;
6549 if (!TARGET_P9_VECTOR)
6550 return false;
6552 if (mode == VOIDmode)
6553 mode = GET_MODE (op);
6555 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6556 return false;
6558 /* Handle (vec_duplicate <constant>). */
6559 if (GET_CODE (op) == VEC_DUPLICATE)
6561 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6562 && mode != V2DImode)
6563 return false;
6565 element = XEXP (op, 0);
6566 if (!CONST_INT_P (element))
6567 return false;
6569 value = INTVAL (element);
6570 if (!IN_RANGE (value, -128, 127))
6571 return false;
6574 /* Handle (const_vector [...]). */
6575 else if (GET_CODE (op) == CONST_VECTOR)
6577 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6578 && mode != V2DImode)
6579 return false;
6581 element = CONST_VECTOR_ELT (op, 0);
6582 if (!CONST_INT_P (element))
6583 return false;
6585 value = INTVAL (element);
6586 if (!IN_RANGE (value, -128, 127))
6587 return false;
6589 for (i = 1; i < nunits; i++)
6591 element = CONST_VECTOR_ELT (op, i);
6592 if (!CONST_INT_P (element))
6593 return false;
6595 if (value != INTVAL (element))
6596 return false;
6600 /* Handle integer constants being loaded into the upper part of the VSX
6601 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6602 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6603 else if (CONST_INT_P (op))
6605 if (!SCALAR_INT_MODE_P (mode))
6606 return false;
6608 value = INTVAL (op);
6609 if (!IN_RANGE (value, -128, 127))
6610 return false;
6612 if (!IN_RANGE (value, -1, 0))
6614 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6615 return false;
6617 if (EASY_VECTOR_15 (value))
6618 return false;
6622 else
6623 return false;
6625 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6626 sign extend. Special case 0/-1 to allow getting any VSX register instead
6627 of an Altivec register. */
6628 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6629 && EASY_VECTOR_15 (value))
6630 return false;
6632 /* Return # of instructions and the constant byte for XXSPLTIB. */
6633 if (mode == V16QImode)
6634 *num_insns_ptr = 1;
6636 else if (IN_RANGE (value, -1, 0))
6637 *num_insns_ptr = 1;
6639 else
6640 *num_insns_ptr = 2;
6642 *constant_ptr = (int) value;
6643 return true;
6646 const char *
6647 output_vec_const_move (rtx *operands)
6649 int shift;
6650 machine_mode mode;
6651 rtx dest, vec;
6653 dest = operands[0];
6654 vec = operands[1];
6655 mode = GET_MODE (dest);
6657 if (TARGET_VSX)
6659 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6660 int xxspltib_value = 256;
6661 int num_insns = -1;
6663 if (zero_constant (vec, mode))
6665 if (TARGET_P9_VECTOR)
6666 return "xxspltib %x0,0";
6668 else if (dest_vmx_p)
6669 return "vspltisw %0,0";
6671 else
6672 return "xxlxor %x0,%x0,%x0";
6675 if (all_ones_constant (vec, mode))
6677 if (TARGET_P9_VECTOR)
6678 return "xxspltib %x0,255";
6680 else if (dest_vmx_p)
6681 return "vspltisw %0,-1";
6683 else if (TARGET_P8_VECTOR)
6684 return "xxlorc %x0,%x0,%x0";
6686 else
6687 gcc_unreachable ();
6690 if (TARGET_P9_VECTOR
6691 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6693 if (num_insns == 1)
6695 operands[2] = GEN_INT (xxspltib_value & 0xff);
6696 return "xxspltib %x0,%2";
6699 return "#";
6703 if (TARGET_ALTIVEC)
6705 rtx splat_vec;
6707 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6708 if (zero_constant (vec, mode))
6709 return "vspltisw %0,0";
6711 if (all_ones_constant (vec, mode))
6712 return "vspltisw %0,-1";
6714 /* Do we need to construct a value using VSLDOI? */
6715 shift = vspltis_shifted (vec);
6716 if (shift != 0)
6717 return "#";
6719 splat_vec = gen_easy_altivec_constant (vec);
6720 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6721 operands[1] = XEXP (splat_vec, 0);
6722 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6723 return "#";
6725 switch (GET_MODE (splat_vec))
6727 case E_V4SImode:
6728 return "vspltisw %0,%1";
6730 case E_V8HImode:
6731 return "vspltish %0,%1";
6733 case E_V16QImode:
6734 return "vspltisb %0,%1";
6736 default:
6737 gcc_unreachable ();
6741 gcc_unreachable ();
6744 /* Initialize vector TARGET to VALS. */
6746 void
6747 rs6000_expand_vector_init (rtx target, rtx vals)
6749 machine_mode mode = GET_MODE (target);
6750 machine_mode inner_mode = GET_MODE_INNER (mode);
6751 int n_elts = GET_MODE_NUNITS (mode);
6752 int n_var = 0, one_var = -1;
6753 bool all_same = true, all_const_zero = true;
6754 rtx x, mem;
6755 int i;
6757 for (i = 0; i < n_elts; ++i)
6759 x = XVECEXP (vals, 0, i);
6760 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6761 ++n_var, one_var = i;
6762 else if (x != CONST0_RTX (inner_mode))
6763 all_const_zero = false;
6765 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6766 all_same = false;
6769 if (n_var == 0)
6771 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6772 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6773 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6775 /* Zero register. */
6776 emit_move_insn (target, CONST0_RTX (mode));
6777 return;
6779 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6781 /* Splat immediate. */
6782 emit_insn (gen_rtx_SET (target, const_vec));
6783 return;
6785 else
6787 /* Load from constant pool. */
6788 emit_move_insn (target, const_vec);
6789 return;
6793 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6794 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6796 rtx op[2];
6797 size_t i;
6798 size_t num_elements = all_same ? 1 : 2;
6799 for (i = 0; i < num_elements; i++)
6801 op[i] = XVECEXP (vals, 0, i);
6802 /* Just in case there is a SUBREG with a smaller mode, do a
6803 conversion. */
6804 if (GET_MODE (op[i]) != inner_mode)
6806 rtx tmp = gen_reg_rtx (inner_mode);
6807 convert_move (tmp, op[i], 0);
6808 op[i] = tmp;
6810 /* Allow load with splat double word. */
6811 else if (MEM_P (op[i]))
6813 if (!all_same)
6814 op[i] = force_reg (inner_mode, op[i]);
6816 else if (!REG_P (op[i]))
6817 op[i] = force_reg (inner_mode, op[i]);
6820 if (all_same)
6822 if (mode == V2DFmode)
6823 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6824 else
6825 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6827 else
6829 if (mode == V2DFmode)
6830 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6831 else
6832 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6834 return;
6837 /* Special case initializing vector int if we are on 64-bit systems with
6838 direct move or we have the ISA 3.0 instructions. */
6839 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6840 && TARGET_DIRECT_MOVE_64BIT)
6842 if (all_same)
6844 rtx element0 = XVECEXP (vals, 0, 0);
6845 if (MEM_P (element0))
6846 element0 = rs6000_address_for_fpconvert (element0);
6847 else
6848 element0 = force_reg (SImode, element0);
6850 if (TARGET_P9_VECTOR)
6851 emit_insn (gen_vsx_splat_v4si (target, element0));
6852 else
6854 rtx tmp = gen_reg_rtx (DImode);
6855 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6856 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6858 return;
6860 else
6862 rtx elements[4];
6863 size_t i;
6865 for (i = 0; i < 4; i++)
6867 elements[i] = XVECEXP (vals, 0, i);
6868 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
6869 elements[i] = copy_to_mode_reg (SImode, elements[i]);
6872 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6873 elements[2], elements[3]));
6874 return;
6878 /* With single precision floating point on VSX, know that internally single
6879 precision is actually represented as a double, and either make 2 V2DF
6880 vectors, and convert these vectors to single precision, or do one
6881 conversion, and splat the result to the other elements. */
6882 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6884 if (all_same)
6886 rtx element0 = XVECEXP (vals, 0, 0);
6888 if (TARGET_P9_VECTOR)
6890 if (MEM_P (element0))
6891 element0 = rs6000_address_for_fpconvert (element0);
6893 emit_insn (gen_vsx_splat_v4sf (target, element0));
6896 else
6898 rtx freg = gen_reg_rtx (V4SFmode);
6899 rtx sreg = force_reg (SFmode, element0);
6900 rtx cvt = (TARGET_XSCVDPSPN
6901 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6902 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6904 emit_insn (cvt);
6905 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6906 const0_rtx));
6909 else
6911 rtx dbl_even = gen_reg_rtx (V2DFmode);
6912 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6913 rtx flt_even = gen_reg_rtx (V4SFmode);
6914 rtx flt_odd = gen_reg_rtx (V4SFmode);
6915 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6916 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6917 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6918 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6920 /* Use VMRGEW if we can instead of doing a permute. */
6921 if (TARGET_P8_VECTOR)
6923 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
6924 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
6925 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6926 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6927 if (BYTES_BIG_ENDIAN)
6928 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
6929 else
6930 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
6932 else
6934 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6935 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6936 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6937 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6938 rs6000_expand_extract_even (target, flt_even, flt_odd);
6941 return;
6944 /* Special case initializing vector short/char that are splats if we are on
6945 64-bit systems with direct move. */
6946 if (all_same && TARGET_DIRECT_MOVE_64BIT
6947 && (mode == V16QImode || mode == V8HImode))
6949 rtx op0 = XVECEXP (vals, 0, 0);
6950 rtx di_tmp = gen_reg_rtx (DImode);
6952 if (!REG_P (op0))
6953 op0 = force_reg (GET_MODE_INNER (mode), op0);
6955 if (mode == V16QImode)
6957 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
6958 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
6959 return;
6962 if (mode == V8HImode)
6964 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
6965 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
6966 return;
6970 /* Store value to stack temp. Load vector element. Splat. However, splat
6971 of 64-bit items is not supported on Altivec. */
6972 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6974 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6975 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6976 XVECEXP (vals, 0, 0));
6977 x = gen_rtx_UNSPEC (VOIDmode,
6978 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6979 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6980 gen_rtvec (2,
6981 gen_rtx_SET (target, mem),
6982 x)));
6983 x = gen_rtx_VEC_SELECT (inner_mode, target,
6984 gen_rtx_PARALLEL (VOIDmode,
6985 gen_rtvec (1, const0_rtx)));
6986 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6987 return;
6990 /* One field is non-constant. Load constant then overwrite
6991 varying field. */
6992 if (n_var == 1)
6994 rtx copy = copy_rtx (vals);
6996 /* Load constant part of vector, substitute neighboring value for
6997 varying element. */
6998 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6999 rs6000_expand_vector_init (target, copy);
7001 /* Insert variable. */
7002 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7003 return;
7006 /* Construct the vector in memory one field at a time
7007 and load the whole vector. */
7008 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7009 for (i = 0; i < n_elts; i++)
7010 emit_move_insn (adjust_address_nv (mem, inner_mode,
7011 i * GET_MODE_SIZE (inner_mode)),
7012 XVECEXP (vals, 0, i));
7013 emit_move_insn (target, mem);
7016 /* Set field ELT of TARGET to VAL. */
7018 void
7019 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7021 machine_mode mode = GET_MODE (target);
7022 machine_mode inner_mode = GET_MODE_INNER (mode);
7023 rtx reg = gen_reg_rtx (mode);
7024 rtx mask, mem, x;
7025 int width = GET_MODE_SIZE (inner_mode);
7026 int i;
7028 val = force_reg (GET_MODE (val), val);
7030 if (VECTOR_MEM_VSX_P (mode))
7032 rtx insn = NULL_RTX;
7033 rtx elt_rtx = GEN_INT (elt);
7035 if (mode == V2DFmode)
7036 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7038 else if (mode == V2DImode)
7039 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7041 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7043 if (mode == V4SImode)
7044 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7045 else if (mode == V8HImode)
7046 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7047 else if (mode == V16QImode)
7048 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7049 else if (mode == V4SFmode)
7050 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7053 if (insn)
7055 emit_insn (insn);
7056 return;
7060 /* Simplify setting single element vectors like V1TImode. */
7061 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7063 emit_move_insn (target, gen_lowpart (mode, val));
7064 return;
7067 /* Load single variable value. */
7068 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7069 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7070 x = gen_rtx_UNSPEC (VOIDmode,
7071 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7072 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7073 gen_rtvec (2,
7074 gen_rtx_SET (reg, mem),
7075 x)));
7077 /* Linear sequence. */
7078 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7079 for (i = 0; i < 16; ++i)
7080 XVECEXP (mask, 0, i) = GEN_INT (i);
7082 /* Set permute mask to insert element into target. */
7083 for (i = 0; i < width; ++i)
7084 XVECEXP (mask, 0, elt*width + i)
7085 = GEN_INT (i + 0x10);
7086 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7088 if (BYTES_BIG_ENDIAN)
7089 x = gen_rtx_UNSPEC (mode,
7090 gen_rtvec (3, target, reg,
7091 force_reg (V16QImode, x)),
7092 UNSPEC_VPERM);
7093 else
7095 if (TARGET_P9_VECTOR)
7096 x = gen_rtx_UNSPEC (mode,
7097 gen_rtvec (3, reg, target,
7098 force_reg (V16QImode, x)),
7099 UNSPEC_VPERMR);
7100 else
7102 /* Invert selector. We prefer to generate VNAND on P8 so
7103 that future fusion opportunities can kick in, but must
7104 generate VNOR elsewhere. */
7105 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7106 rtx iorx = (TARGET_P8_VECTOR
7107 ? gen_rtx_IOR (V16QImode, notx, notx)
7108 : gen_rtx_AND (V16QImode, notx, notx));
7109 rtx tmp = gen_reg_rtx (V16QImode);
7110 emit_insn (gen_rtx_SET (tmp, iorx));
7112 /* Permute with operands reversed and adjusted selector. */
7113 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7114 UNSPEC_VPERM);
7118 emit_insn (gen_rtx_SET (target, x));
7121 /* Extract field ELT from VEC into TARGET. */
7123 void
7124 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7126 machine_mode mode = GET_MODE (vec);
7127 machine_mode inner_mode = GET_MODE_INNER (mode);
7128 rtx mem;
7130 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7132 switch (mode)
7134 default:
7135 break;
7136 case E_V1TImode:
7137 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7138 emit_move_insn (target, gen_lowpart (TImode, vec));
7139 break;
7140 case E_V2DFmode:
7141 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7142 return;
7143 case E_V2DImode:
7144 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7145 return;
7146 case E_V4SFmode:
7147 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7148 return;
7149 case E_V16QImode:
7150 if (TARGET_DIRECT_MOVE_64BIT)
7152 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7153 return;
7155 else
7156 break;
7157 case E_V8HImode:
7158 if (TARGET_DIRECT_MOVE_64BIT)
7160 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7161 return;
7163 else
7164 break;
7165 case E_V4SImode:
7166 if (TARGET_DIRECT_MOVE_64BIT)
7168 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7169 return;
7171 break;
7174 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7175 && TARGET_DIRECT_MOVE_64BIT)
7177 if (GET_MODE (elt) != DImode)
7179 rtx tmp = gen_reg_rtx (DImode);
7180 convert_move (tmp, elt, 0);
7181 elt = tmp;
7183 else if (!REG_P (elt))
7184 elt = force_reg (DImode, elt);
7186 switch (mode)
7188 case E_V2DFmode:
7189 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7190 return;
7192 case E_V2DImode:
7193 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7194 return;
7196 case E_V4SFmode:
7197 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7198 return;
7200 case E_V4SImode:
7201 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7202 return;
7204 case E_V8HImode:
7205 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7206 return;
7208 case E_V16QImode:
7209 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7210 return;
7212 default:
7213 gcc_unreachable ();
7217 gcc_assert (CONST_INT_P (elt));
7219 /* Allocate mode-sized buffer. */
7220 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7222 emit_move_insn (mem, vec);
7224 /* Add offset to field within buffer matching vector element. */
7225 mem = adjust_address_nv (mem, inner_mode,
7226 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7228 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7231 /* Helper function to return the register number of a RTX. */
7232 static inline int
7233 regno_or_subregno (rtx op)
7235 if (REG_P (op))
7236 return REGNO (op);
7237 else if (SUBREG_P (op))
7238 return subreg_regno (op);
7239 else
7240 gcc_unreachable ();
7243 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7244 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7245 temporary (BASE_TMP) to fixup the address. Return the new memory address
7246 that is valid for reads or writes to a given register (SCALAR_REG). */
7249 rs6000_adjust_vec_address (rtx scalar_reg,
7250 rtx mem,
7251 rtx element,
7252 rtx base_tmp,
7253 machine_mode scalar_mode)
7255 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7256 rtx addr = XEXP (mem, 0);
7257 rtx element_offset;
7258 rtx new_addr;
7259 bool valid_addr_p;
7261 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7262 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7264 /* Calculate what we need to add to the address to get the element
7265 address. */
7266 if (CONST_INT_P (element))
7267 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7268 else
7270 int byte_shift = exact_log2 (scalar_size);
7271 gcc_assert (byte_shift >= 0);
7273 if (byte_shift == 0)
7274 element_offset = element;
7276 else
7278 if (TARGET_POWERPC64)
7279 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7280 else
7281 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7283 element_offset = base_tmp;
7287 /* Create the new address pointing to the element within the vector. If we
7288 are adding 0, we don't have to change the address. */
7289 if (element_offset == const0_rtx)
7290 new_addr = addr;
7292 /* A simple indirect address can be converted into a reg + offset
7293 address. */
7294 else if (REG_P (addr) || SUBREG_P (addr))
7295 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7297 /* Optimize D-FORM addresses with constant offset with a constant element, to
7298 include the element offset in the address directly. */
7299 else if (GET_CODE (addr) == PLUS)
7301 rtx op0 = XEXP (addr, 0);
7302 rtx op1 = XEXP (addr, 1);
7303 rtx insn;
7305 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7306 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7308 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7309 rtx offset_rtx = GEN_INT (offset);
7311 if (IN_RANGE (offset, -32768, 32767)
7312 && (scalar_size < 8 || (offset & 0x3) == 0))
7313 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7314 else
7316 emit_move_insn (base_tmp, offset_rtx);
7317 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7320 else
7322 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7323 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7325 /* Note, ADDI requires the register being added to be a base
7326 register. If the register was R0, load it up into the temporary
7327 and do the add. */
7328 if (op1_reg_p
7329 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7331 insn = gen_add3_insn (base_tmp, op1, element_offset);
7332 gcc_assert (insn != NULL_RTX);
7333 emit_insn (insn);
7336 else if (ele_reg_p
7337 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7339 insn = gen_add3_insn (base_tmp, element_offset, op1);
7340 gcc_assert (insn != NULL_RTX);
7341 emit_insn (insn);
7344 else
7346 emit_move_insn (base_tmp, op1);
7347 emit_insn (gen_add2_insn (base_tmp, element_offset));
7350 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7354 else
7356 emit_move_insn (base_tmp, addr);
7357 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7360 /* If we have a PLUS, we need to see whether the particular register class
7361 allows for D-FORM or X-FORM addressing. */
7362 if (GET_CODE (new_addr) == PLUS)
7364 rtx op1 = XEXP (new_addr, 1);
7365 addr_mask_type addr_mask;
7366 int scalar_regno = regno_or_subregno (scalar_reg);
7368 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7369 if (INT_REGNO_P (scalar_regno))
7370 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7372 else if (FP_REGNO_P (scalar_regno))
7373 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7375 else if (ALTIVEC_REGNO_P (scalar_regno))
7376 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7378 else
7379 gcc_unreachable ();
7381 if (REG_P (op1) || SUBREG_P (op1))
7382 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7383 else
7384 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7387 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7388 valid_addr_p = true;
7390 else
7391 valid_addr_p = false;
7393 if (!valid_addr_p)
7395 emit_move_insn (base_tmp, new_addr);
7396 new_addr = base_tmp;
7399 return change_address (mem, scalar_mode, new_addr);
7402 /* Split a variable vec_extract operation into the component instructions. */
7404 void
7405 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7406 rtx tmp_altivec)
7408 machine_mode mode = GET_MODE (src);
7409 machine_mode scalar_mode = GET_MODE (dest);
7410 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7411 int byte_shift = exact_log2 (scalar_size);
7413 gcc_assert (byte_shift >= 0);
7415 /* If we are given a memory address, optimize to load just the element. We
7416 don't have to adjust the vector element number on little endian
7417 systems. */
7418 if (MEM_P (src))
7420 gcc_assert (REG_P (tmp_gpr));
7421 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7422 tmp_gpr, scalar_mode));
7423 return;
7426 else if (REG_P (src) || SUBREG_P (src))
7428 int bit_shift = byte_shift + 3;
7429 rtx element2;
7430 int dest_regno = regno_or_subregno (dest);
7431 int src_regno = regno_or_subregno (src);
7432 int element_regno = regno_or_subregno (element);
7434 gcc_assert (REG_P (tmp_gpr));
7436 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7437 a general purpose register. */
7438 if (TARGET_P9_VECTOR
7439 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7440 && INT_REGNO_P (dest_regno)
7441 && ALTIVEC_REGNO_P (src_regno)
7442 && INT_REGNO_P (element_regno))
7444 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7445 rtx element_si = gen_rtx_REG (SImode, element_regno);
7447 if (mode == V16QImode)
7448 emit_insn (BYTES_BIG_ENDIAN
7449 ? gen_vextublx (dest_si, element_si, src)
7450 : gen_vextubrx (dest_si, element_si, src));
7452 else if (mode == V8HImode)
7454 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7455 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7456 emit_insn (BYTES_BIG_ENDIAN
7457 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7458 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7462 else
7464 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7465 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7466 emit_insn (BYTES_BIG_ENDIAN
7467 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7468 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7471 return;
7475 gcc_assert (REG_P (tmp_altivec));
7477 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7478 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7479 will shift the element into the upper position (adding 3 to convert a
7480 byte shift into a bit shift). */
7481 if (scalar_size == 8)
7483 if (!BYTES_BIG_ENDIAN)
7485 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7486 element2 = tmp_gpr;
7488 else
7489 element2 = element;
7491 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7492 bit. */
7493 emit_insn (gen_rtx_SET (tmp_gpr,
7494 gen_rtx_AND (DImode,
7495 gen_rtx_ASHIFT (DImode,
7496 element2,
7497 GEN_INT (6)),
7498 GEN_INT (64))));
7500 else
7502 if (!BYTES_BIG_ENDIAN)
7504 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7506 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7507 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7508 element2 = tmp_gpr;
7510 else
7511 element2 = element;
7513 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7516 /* Get the value into the lower byte of the Altivec register where VSLO
7517 expects it. */
7518 if (TARGET_P9_VECTOR)
7519 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7520 else if (can_create_pseudo_p ())
7521 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7522 else
7524 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7525 emit_move_insn (tmp_di, tmp_gpr);
7526 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7529 /* Do the VSLO to get the value into the final location. */
7530 switch (mode)
7532 case E_V2DFmode:
7533 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7534 return;
7536 case E_V2DImode:
7537 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7538 return;
7540 case E_V4SFmode:
7542 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7543 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7544 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7545 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7546 tmp_altivec));
7548 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7549 return;
7552 case E_V4SImode:
7553 case E_V8HImode:
7554 case E_V16QImode:
7556 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7557 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7558 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7559 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7560 tmp_altivec));
7561 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7562 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7563 GEN_INT (64 - (8 * scalar_size))));
7564 return;
7567 default:
7568 gcc_unreachable ();
7571 return;
7573 else
7574 gcc_unreachable ();
7577 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7578 two SImode values. */
7580 static void
7581 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7583 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7585 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7587 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7588 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7590 emit_move_insn (dest, GEN_INT (const1 | const2));
7591 return;
7594 /* Put si1 into upper 32-bits of dest. */
7595 if (CONST_INT_P (si1))
7596 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7597 else
7599 /* Generate RLDIC. */
7600 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7601 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7602 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7603 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7604 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7605 emit_insn (gen_rtx_SET (dest, and_rtx));
7608 /* Put si2 into the temporary. */
7609 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7610 if (CONST_INT_P (si2))
7611 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7612 else
7613 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7615 /* Combine the two parts. */
7616 emit_insn (gen_iordi3 (dest, dest, tmp));
7617 return;
7620 /* Split a V4SI initialization. */
7622 void
7623 rs6000_split_v4si_init (rtx operands[])
7625 rtx dest = operands[0];
7627 /* Destination is a GPR, build up the two DImode parts in place. */
7628 if (REG_P (dest) || SUBREG_P (dest))
7630 int d_regno = regno_or_subregno (dest);
7631 rtx scalar1 = operands[1];
7632 rtx scalar2 = operands[2];
7633 rtx scalar3 = operands[3];
7634 rtx scalar4 = operands[4];
7635 rtx tmp1 = operands[5];
7636 rtx tmp2 = operands[6];
7638 /* Even though we only need one temporary (plus the destination, which
7639 has an early clobber constraint, try to use two temporaries, one for
7640 each double word created. That way the 2nd insn scheduling pass can
7641 rearrange things so the two parts are done in parallel. */
7642 if (BYTES_BIG_ENDIAN)
7644 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7645 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7646 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7647 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7649 else
7651 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7652 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7653 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7654 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7656 return;
7659 else
7660 gcc_unreachable ();
7663 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7664 selects whether the alignment is abi mandated, optional, or
7665 both abi and optional alignment. */
7667 unsigned int
7668 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7670 if (how != align_opt)
7672 if (TREE_CODE (type) == VECTOR_TYPE && align < 128)
7673 align = 128;
7676 if (how != align_abi)
7678 if (TREE_CODE (type) == ARRAY_TYPE
7679 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7681 if (align < BITS_PER_WORD)
7682 align = BITS_PER_WORD;
7686 return align;
7689 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7690 instructions simply ignore the low bits; VSX memory instructions
7691 are aligned to 4 or 8 bytes. */
7693 static bool
7694 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7696 return (STRICT_ALIGNMENT
7697 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7698 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7699 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7700 && (int) align < VECTOR_ALIGN (mode)))));
7703 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7705 bool
7706 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7708 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7710 if (computed != 128)
7712 static bool warned;
7713 if (!warned && warn_psabi)
7715 warned = true;
7716 inform (input_location,
7717 "the layout of aggregates containing vectors with"
7718 " %d-byte alignment has changed in GCC 5",
7719 computed / BITS_PER_UNIT);
7722 /* In current GCC there is no special case. */
7723 return false;
7726 return false;
7729 /* AIX increases natural record alignment to doubleword if the first
7730 field is an FP double while the FP fields remain word aligned. */
7732 unsigned int
7733 rs6000_special_round_type_align (tree type, unsigned int computed,
7734 unsigned int specified)
7736 unsigned int align = MAX (computed, specified);
7737 tree field = TYPE_FIELDS (type);
7739 /* Skip all non field decls */
7740 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7741 field = DECL_CHAIN (field);
7743 if (field != NULL && field != type)
7745 type = TREE_TYPE (field);
7746 while (TREE_CODE (type) == ARRAY_TYPE)
7747 type = TREE_TYPE (type);
7749 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7750 align = MAX (align, 64);
7753 return align;
7756 /* Darwin increases record alignment to the natural alignment of
7757 the first field. */
7759 unsigned int
7760 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7761 unsigned int specified)
7763 unsigned int align = MAX (computed, specified);
7765 if (TYPE_PACKED (type))
7766 return align;
7768 /* Find the first field, looking down into aggregates. */
7769 do {
7770 tree field = TYPE_FIELDS (type);
7771 /* Skip all non field decls */
7772 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7773 field = DECL_CHAIN (field);
7774 if (! field)
7775 break;
7776 /* A packed field does not contribute any extra alignment. */
7777 if (DECL_PACKED (field))
7778 return align;
7779 type = TREE_TYPE (field);
7780 while (TREE_CODE (type) == ARRAY_TYPE)
7781 type = TREE_TYPE (type);
7782 } while (AGGREGATE_TYPE_P (type));
7784 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7785 align = MAX (align, TYPE_ALIGN (type));
7787 return align;
7790 /* Return 1 for an operand in small memory on V.4/eabi. */
7793 small_data_operand (rtx op ATTRIBUTE_UNUSED,
7794 machine_mode mode ATTRIBUTE_UNUSED)
7796 #if TARGET_ELF
7797 rtx sym_ref;
7799 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
7800 return 0;
7802 if (DEFAULT_ABI != ABI_V4)
7803 return 0;
7805 if (GET_CODE (op) == SYMBOL_REF)
7806 sym_ref = op;
7808 else if (GET_CODE (op) != CONST
7809 || GET_CODE (XEXP (op, 0)) != PLUS
7810 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
7811 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
7812 return 0;
7814 else
7816 rtx sum = XEXP (op, 0);
7817 HOST_WIDE_INT summand;
7819 /* We have to be careful here, because it is the referenced address
7820 that must be 32k from _SDA_BASE_, not just the symbol. */
7821 summand = INTVAL (XEXP (sum, 1));
7822 if (summand < 0 || summand > g_switch_value)
7823 return 0;
7825 sym_ref = XEXP (sum, 0);
7828 return SYMBOL_REF_SMALL_P (sym_ref);
7829 #else
7830 return 0;
7831 #endif
7834 /* Return true if either operand is a general purpose register. */
7836 bool
7837 gpr_or_gpr_p (rtx op0, rtx op1)
7839 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
7840 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
7843 /* Return true if this is a move direct operation between GPR registers and
7844 floating point/VSX registers. */
7846 bool
7847 direct_move_p (rtx op0, rtx op1)
7849 int regno0, regno1;
7851 if (!REG_P (op0) || !REG_P (op1))
7852 return false;
7854 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
7855 return false;
7857 regno0 = REGNO (op0);
7858 regno1 = REGNO (op1);
7859 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
7860 return false;
7862 if (INT_REGNO_P (regno0))
7863 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
7865 else if (INT_REGNO_P (regno1))
7867 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
7868 return true;
7870 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
7871 return true;
7874 return false;
7877 /* Return true if the OFFSET is valid for the quad address instructions that
7878 use d-form (register + offset) addressing. */
7880 static inline bool
7881 quad_address_offset_p (HOST_WIDE_INT offset)
7883 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
7886 /* Return true if the ADDR is an acceptable address for a quad memory
7887 operation of mode MODE (either LQ/STQ for general purpose registers, or
7888 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
7889 is intended for LQ/STQ. If it is false, the address is intended for the ISA
7890 3.0 LXV/STXV instruction. */
7892 bool
7893 quad_address_p (rtx addr, machine_mode mode, bool strict)
7895 rtx op0, op1;
7897 if (GET_MODE_SIZE (mode) != 16)
7898 return false;
7900 if (legitimate_indirect_address_p (addr, strict))
7901 return true;
7903 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
7904 return false;
7906 if (GET_CODE (addr) != PLUS)
7907 return false;
7909 op0 = XEXP (addr, 0);
7910 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
7911 return false;
7913 op1 = XEXP (addr, 1);
7914 if (!CONST_INT_P (op1))
7915 return false;
7917 return quad_address_offset_p (INTVAL (op1));
7920 /* Return true if this is a load or store quad operation. This function does
7921 not handle the atomic quad memory instructions. */
7923 bool
7924 quad_load_store_p (rtx op0, rtx op1)
7926 bool ret;
7928 if (!TARGET_QUAD_MEMORY)
7929 ret = false;
7931 else if (REG_P (op0) && MEM_P (op1))
7932 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
7933 && quad_memory_operand (op1, GET_MODE (op1))
7934 && !reg_overlap_mentioned_p (op0, op1));
7936 else if (MEM_P (op0) && REG_P (op1))
7937 ret = (quad_memory_operand (op0, GET_MODE (op0))
7938 && quad_int_reg_operand (op1, GET_MODE (op1)));
7940 else
7941 ret = false;
7943 if (TARGET_DEBUG_ADDR)
7945 fprintf (stderr, "\n========== quad_load_store, return %s\n",
7946 ret ? "true" : "false");
7947 debug_rtx (gen_rtx_SET (op0, op1));
7950 return ret;
7953 /* Given an address, return a constant offset term if one exists. */
7955 static rtx
7956 address_offset (rtx op)
7958 if (GET_CODE (op) == PRE_INC
7959 || GET_CODE (op) == PRE_DEC)
7960 op = XEXP (op, 0);
7961 else if (GET_CODE (op) == PRE_MODIFY
7962 || GET_CODE (op) == LO_SUM)
7963 op = XEXP (op, 1);
7965 if (GET_CODE (op) == CONST)
7966 op = XEXP (op, 0);
7968 if (GET_CODE (op) == PLUS)
7969 op = XEXP (op, 1);
7971 if (CONST_INT_P (op))
7972 return op;
7974 return NULL_RTX;
7977 /* Return true if the MEM operand is a memory operand suitable for use
7978 with a (full width, possibly multiple) gpr load/store. On
7979 powerpc64 this means the offset must be divisible by 4.
7980 Implements 'Y' constraint.
7982 Accept direct, indexed, offset, lo_sum and tocref. Since this is
7983 a constraint function we know the operand has satisfied a suitable
7984 memory predicate. Also accept some odd rtl generated by reload
7985 (see rs6000_legitimize_reload_address for various forms). It is
7986 important that reload rtl be accepted by appropriate constraints
7987 but not by the operand predicate.
7989 Offsetting a lo_sum should not be allowed, except where we know by
7990 alignment that a 32k boundary is not crossed, but see the ???
7991 comment in rs6000_legitimize_reload_address. Note that by
7992 "offsetting" here we mean a further offset to access parts of the
7993 MEM. It's fine to have a lo_sum where the inner address is offset
7994 from a sym, since the same sym+offset will appear in the high part
7995 of the address calculation. */
7997 bool
7998 mem_operand_gpr (rtx op, machine_mode mode)
8000 unsigned HOST_WIDE_INT offset;
8001 int extra;
8002 rtx addr = XEXP (op, 0);
8004 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8005 if (TARGET_UPDATE
8006 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8007 && mode_supports_pre_incdec_p (mode)
8008 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8009 return true;
8011 /* Don't allow non-offsettable addresses. See PRs 83969 and 84279. */
8012 if (!rs6000_offsettable_memref_p (op, mode, false))
8013 return false;
8015 op = address_offset (addr);
8016 if (op == NULL_RTX)
8017 return true;
8019 offset = INTVAL (op);
8020 if (TARGET_POWERPC64 && (offset & 3) != 0)
8021 return false;
8023 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8024 if (extra < 0)
8025 extra = 0;
8027 if (GET_CODE (addr) == LO_SUM)
8028 /* For lo_sum addresses, we must allow any offset except one that
8029 causes a wrap, so test only the low 16 bits. */
8030 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8032 return offset + 0x8000 < 0x10000u - extra;
8035 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8036 enforce an offset divisible by 4 even for 32-bit. */
8038 bool
8039 mem_operand_ds_form (rtx op, machine_mode mode)
8041 unsigned HOST_WIDE_INT offset;
8042 int extra;
8043 rtx addr = XEXP (op, 0);
8045 if (!offsettable_address_p (false, mode, addr))
8046 return false;
8048 op = address_offset (addr);
8049 if (op == NULL_RTX)
8050 return true;
8052 offset = INTVAL (op);
8053 if ((offset & 3) != 0)
8054 return false;
8056 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8057 if (extra < 0)
8058 extra = 0;
8060 if (GET_CODE (addr) == LO_SUM)
8061 /* For lo_sum addresses, we must allow any offset except one that
8062 causes a wrap, so test only the low 16 bits. */
8063 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8065 return offset + 0x8000 < 0x10000u - extra;
8068 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8070 static bool
8071 reg_offset_addressing_ok_p (machine_mode mode)
8073 switch (mode)
8075 case E_V16QImode:
8076 case E_V8HImode:
8077 case E_V4SFmode:
8078 case E_V4SImode:
8079 case E_V2DFmode:
8080 case E_V2DImode:
8081 case E_V1TImode:
8082 case E_TImode:
8083 case E_TFmode:
8084 case E_KFmode:
8085 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8086 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8087 a vector mode, if we want to use the VSX registers to move it around,
8088 we need to restrict ourselves to reg+reg addressing. Similarly for
8089 IEEE 128-bit floating point that is passed in a single vector
8090 register. */
8091 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8092 return mode_supports_dq_form (mode);
8093 break;
8095 case E_SDmode:
8096 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8097 addressing for the LFIWZX and STFIWX instructions. */
8098 if (TARGET_NO_SDMODE_STACK)
8099 return false;
8100 break;
8102 default:
8103 break;
8106 return true;
8109 static bool
8110 virtual_stack_registers_memory_p (rtx op)
8112 int regnum;
8114 if (GET_CODE (op) == REG)
8115 regnum = REGNO (op);
8117 else if (GET_CODE (op) == PLUS
8118 && GET_CODE (XEXP (op, 0)) == REG
8119 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8120 regnum = REGNO (XEXP (op, 0));
8122 else
8123 return false;
8125 return (regnum >= FIRST_VIRTUAL_REGISTER
8126 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8129 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8130 is known to not straddle a 32k boundary. This function is used
8131 to determine whether -mcmodel=medium code can use TOC pointer
8132 relative addressing for OP. This means the alignment of the TOC
8133 pointer must also be taken into account, and unfortunately that is
8134 only 8 bytes. */
8136 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8137 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8138 #endif
8140 static bool
8141 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8142 machine_mode mode)
8144 tree decl;
8145 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8147 if (GET_CODE (op) != SYMBOL_REF)
8148 return false;
8150 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8151 SYMBOL_REF. */
8152 if (mode_supports_dq_form (mode))
8153 return false;
8155 dsize = GET_MODE_SIZE (mode);
8156 decl = SYMBOL_REF_DECL (op);
8157 if (!decl)
8159 if (dsize == 0)
8160 return false;
8162 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8163 replacing memory addresses with an anchor plus offset. We
8164 could find the decl by rummaging around in the block->objects
8165 VEC for the given offset but that seems like too much work. */
8166 dalign = BITS_PER_UNIT;
8167 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8168 && SYMBOL_REF_ANCHOR_P (op)
8169 && SYMBOL_REF_BLOCK (op) != NULL)
8171 struct object_block *block = SYMBOL_REF_BLOCK (op);
8173 dalign = block->alignment;
8174 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8176 else if (CONSTANT_POOL_ADDRESS_P (op))
8178 /* It would be nice to have get_pool_align().. */
8179 machine_mode cmode = get_pool_mode (op);
8181 dalign = GET_MODE_ALIGNMENT (cmode);
8184 else if (DECL_P (decl))
8186 dalign = DECL_ALIGN (decl);
8188 if (dsize == 0)
8190 /* Allow BLKmode when the entire object is known to not
8191 cross a 32k boundary. */
8192 if (!DECL_SIZE_UNIT (decl))
8193 return false;
8195 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8196 return false;
8198 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8199 if (dsize > 32768)
8200 return false;
8202 dalign /= BITS_PER_UNIT;
8203 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8204 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8205 return dalign >= dsize;
8208 else
8209 gcc_unreachable ();
8211 /* Find how many bits of the alignment we know for this access. */
8212 dalign /= BITS_PER_UNIT;
8213 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8214 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8215 mask = dalign - 1;
8216 lsb = offset & -offset;
8217 mask &= lsb - 1;
8218 dalign = mask + 1;
8220 return dalign >= dsize;
8223 static bool
8224 constant_pool_expr_p (rtx op)
8226 rtx base, offset;
8228 split_const (op, &base, &offset);
8229 return (GET_CODE (base) == SYMBOL_REF
8230 && CONSTANT_POOL_ADDRESS_P (base)
8231 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8234 /* These are only used to pass through from print_operand/print_operand_address
8235 to rs6000_output_addr_const_extra over the intervening function
8236 output_addr_const which is not target code. */
8237 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8239 /* Return true if OP is a toc pointer relative address (the output
8240 of create_TOC_reference). If STRICT, do not match non-split
8241 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8242 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8243 TOCREL_OFFSET_RET respectively. */
8245 bool
8246 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8247 const_rtx *tocrel_offset_ret)
8249 if (!TARGET_TOC)
8250 return false;
8252 if (TARGET_CMODEL != CMODEL_SMALL)
8254 /* When strict ensure we have everything tidy. */
8255 if (strict
8256 && !(GET_CODE (op) == LO_SUM
8257 && REG_P (XEXP (op, 0))
8258 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8259 return false;
8261 /* When not strict, allow non-split TOC addresses and also allow
8262 (lo_sum (high ..)) TOC addresses created during reload. */
8263 if (GET_CODE (op) == LO_SUM)
8264 op = XEXP (op, 1);
8267 const_rtx tocrel_base = op;
8268 const_rtx tocrel_offset = const0_rtx;
8270 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8272 tocrel_base = XEXP (op, 0);
8273 tocrel_offset = XEXP (op, 1);
8276 if (tocrel_base_ret)
8277 *tocrel_base_ret = tocrel_base;
8278 if (tocrel_offset_ret)
8279 *tocrel_offset_ret = tocrel_offset;
8281 return (GET_CODE (tocrel_base) == UNSPEC
8282 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8285 /* Return true if X is a constant pool address, and also for cmodel=medium
8286 if X is a toc-relative address known to be offsettable within MODE. */
8288 bool
8289 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8290 bool strict)
8292 const_rtx tocrel_base, tocrel_offset;
8293 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8294 && (TARGET_CMODEL != CMODEL_MEDIUM
8295 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8296 || mode == QImode
8297 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8298 INTVAL (tocrel_offset), mode)));
8301 static bool
8302 legitimate_small_data_p (machine_mode mode, rtx x)
8304 return (DEFAULT_ABI == ABI_V4
8305 && !flag_pic && !TARGET_TOC
8306 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8307 && small_data_operand (x, mode));
8310 bool
8311 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8312 bool strict, bool worst_case)
8314 unsigned HOST_WIDE_INT offset;
8315 unsigned int extra;
8317 if (GET_CODE (x) != PLUS)
8318 return false;
8319 if (!REG_P (XEXP (x, 0)))
8320 return false;
8321 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8322 return false;
8323 if (mode_supports_dq_form (mode))
8324 return quad_address_p (x, mode, strict);
8325 if (!reg_offset_addressing_ok_p (mode))
8326 return virtual_stack_registers_memory_p (x);
8327 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8328 return true;
8329 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8330 return false;
8332 offset = INTVAL (XEXP (x, 1));
8333 extra = 0;
8334 switch (mode)
8336 case E_DFmode:
8337 case E_DDmode:
8338 case E_DImode:
8339 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8340 addressing. */
8341 if (VECTOR_MEM_VSX_P (mode))
8342 return false;
8344 if (!worst_case)
8345 break;
8346 if (!TARGET_POWERPC64)
8347 extra = 4;
8348 else if (offset & 3)
8349 return false;
8350 break;
8352 case E_TFmode:
8353 case E_IFmode:
8354 case E_KFmode:
8355 case E_TDmode:
8356 case E_TImode:
8357 case E_PTImode:
8358 extra = 8;
8359 if (!worst_case)
8360 break;
8361 if (!TARGET_POWERPC64)
8362 extra = 12;
8363 else if (offset & 3)
8364 return false;
8365 break;
8367 default:
8368 break;
8371 offset += 0x8000;
8372 return offset < 0x10000 - extra;
8375 bool
8376 legitimate_indexed_address_p (rtx x, int strict)
8378 rtx op0, op1;
8380 if (GET_CODE (x) != PLUS)
8381 return false;
8383 op0 = XEXP (x, 0);
8384 op1 = XEXP (x, 1);
8386 return (REG_P (op0) && REG_P (op1)
8387 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8388 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8389 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8390 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8393 bool
8394 avoiding_indexed_address_p (machine_mode mode)
8396 /* Avoid indexed addressing for modes that have non-indexed
8397 load/store instruction forms. */
8398 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8401 bool
8402 legitimate_indirect_address_p (rtx x, int strict)
8404 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8407 bool
8408 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8410 if (!TARGET_MACHO || !flag_pic
8411 || mode != SImode || GET_CODE (x) != MEM)
8412 return false;
8413 x = XEXP (x, 0);
8415 if (GET_CODE (x) != LO_SUM)
8416 return false;
8417 if (GET_CODE (XEXP (x, 0)) != REG)
8418 return false;
8419 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8420 return false;
8421 x = XEXP (x, 1);
8423 return CONSTANT_P (x);
8426 static bool
8427 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8429 if (GET_CODE (x) != LO_SUM)
8430 return false;
8431 if (GET_CODE (XEXP (x, 0)) != REG)
8432 return false;
8433 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8434 return false;
8435 /* quad word addresses are restricted, and we can't use LO_SUM. */
8436 if (mode_supports_dq_form (mode))
8437 return false;
8438 x = XEXP (x, 1);
8440 if (TARGET_ELF || TARGET_MACHO)
8442 bool large_toc_ok;
8444 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8445 return false;
8446 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8447 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8448 recognizes some LO_SUM addresses as valid although this
8449 function says opposite. In most cases, LRA through different
8450 transformations can generate correct code for address reloads.
8451 It can not manage only some LO_SUM cases. So we need to add
8452 code analogous to one in rs6000_legitimize_reload_address for
8453 LOW_SUM here saying that some addresses are still valid. */
8454 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8455 && small_toc_ref (x, VOIDmode));
8456 if (TARGET_TOC && ! large_toc_ok)
8457 return false;
8458 if (GET_MODE_NUNITS (mode) != 1)
8459 return false;
8460 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8461 && !(/* ??? Assume floating point reg based on mode? */
8462 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8463 return false;
8465 return CONSTANT_P (x) || large_toc_ok;
8468 return false;
8472 /* Try machine-dependent ways of modifying an illegitimate address
8473 to be legitimate. If we find one, return the new, valid address.
8474 This is used from only one place: `memory_address' in explow.c.
8476 OLDX is the address as it was before break_out_memory_refs was
8477 called. In some cases it is useful to look at this to decide what
8478 needs to be done.
8480 It is always safe for this function to do nothing. It exists to
8481 recognize opportunities to optimize the output.
8483 On RS/6000, first check for the sum of a register with a constant
8484 integer that is out of range. If so, generate code to add the
8485 constant with the low-order 16 bits masked to the register and force
8486 this result into another register (this can be done with `cau').
8487 Then generate an address of REG+(CONST&0xffff), allowing for the
8488 possibility of bit 16 being a one.
8490 Then check for the sum of a register and something not constant, try to
8491 load the other things into a register and return the sum. */
8493 static rtx
8494 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8495 machine_mode mode)
8497 unsigned int extra;
8499 if (!reg_offset_addressing_ok_p (mode)
8500 || mode_supports_dq_form (mode))
8502 if (virtual_stack_registers_memory_p (x))
8503 return x;
8505 /* In theory we should not be seeing addresses of the form reg+0,
8506 but just in case it is generated, optimize it away. */
8507 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8508 return force_reg (Pmode, XEXP (x, 0));
8510 /* For TImode with load/store quad, restrict addresses to just a single
8511 pointer, so it works with both GPRs and VSX registers. */
8512 /* Make sure both operands are registers. */
8513 else if (GET_CODE (x) == PLUS
8514 && (mode != TImode || !TARGET_VSX))
8515 return gen_rtx_PLUS (Pmode,
8516 force_reg (Pmode, XEXP (x, 0)),
8517 force_reg (Pmode, XEXP (x, 1)));
8518 else
8519 return force_reg (Pmode, x);
8521 if (GET_CODE (x) == SYMBOL_REF)
8523 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8524 if (model != 0)
8525 return rs6000_legitimize_tls_address (x, model);
8528 extra = 0;
8529 switch (mode)
8531 case E_TFmode:
8532 case E_TDmode:
8533 case E_TImode:
8534 case E_PTImode:
8535 case E_IFmode:
8536 case E_KFmode:
8537 /* As in legitimate_offset_address_p we do not assume
8538 worst-case. The mode here is just a hint as to the registers
8539 used. A TImode is usually in gprs, but may actually be in
8540 fprs. Leave worst-case scenario for reload to handle via
8541 insn constraints. PTImode is only GPRs. */
8542 extra = 8;
8543 break;
8544 default:
8545 break;
8548 if (GET_CODE (x) == PLUS
8549 && GET_CODE (XEXP (x, 0)) == REG
8550 && GET_CODE (XEXP (x, 1)) == CONST_INT
8551 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8552 >= 0x10000 - extra))
8554 HOST_WIDE_INT high_int, low_int;
8555 rtx sum;
8556 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8557 if (low_int >= 0x8000 - extra)
8558 low_int = 0;
8559 high_int = INTVAL (XEXP (x, 1)) - low_int;
8560 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8561 GEN_INT (high_int)), 0);
8562 return plus_constant (Pmode, sum, low_int);
8564 else if (GET_CODE (x) == PLUS
8565 && GET_CODE (XEXP (x, 0)) == REG
8566 && GET_CODE (XEXP (x, 1)) != CONST_INT
8567 && GET_MODE_NUNITS (mode) == 1
8568 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8569 || (/* ??? Assume floating point reg based on mode? */
8570 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
8571 && !avoiding_indexed_address_p (mode))
8573 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8574 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8576 else if ((TARGET_ELF
8577 #if TARGET_MACHO
8578 || !MACHO_DYNAMIC_NO_PIC_P
8579 #endif
8581 && TARGET_32BIT
8582 && TARGET_NO_TOC
8583 && ! flag_pic
8584 && GET_CODE (x) != CONST_INT
8585 && GET_CODE (x) != CONST_WIDE_INT
8586 && GET_CODE (x) != CONST_DOUBLE
8587 && CONSTANT_P (x)
8588 && GET_MODE_NUNITS (mode) == 1
8589 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8590 || (/* ??? Assume floating point reg based on mode? */
8591 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
8593 rtx reg = gen_reg_rtx (Pmode);
8594 if (TARGET_ELF)
8595 emit_insn (gen_elf_high (reg, x));
8596 else
8597 emit_insn (gen_macho_high (reg, x));
8598 return gen_rtx_LO_SUM (Pmode, reg, x);
8600 else if (TARGET_TOC
8601 && GET_CODE (x) == SYMBOL_REF
8602 && constant_pool_expr_p (x)
8603 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8604 return create_TOC_reference (x, NULL_RTX);
8605 else
8606 return x;
8609 /* Debug version of rs6000_legitimize_address. */
8610 static rtx
8611 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8613 rtx ret;
8614 rtx_insn *insns;
8616 start_sequence ();
8617 ret = rs6000_legitimize_address (x, oldx, mode);
8618 insns = get_insns ();
8619 end_sequence ();
8621 if (ret != x)
8623 fprintf (stderr,
8624 "\nrs6000_legitimize_address: mode %s, old code %s, "
8625 "new code %s, modified\n",
8626 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8627 GET_RTX_NAME (GET_CODE (ret)));
8629 fprintf (stderr, "Original address:\n");
8630 debug_rtx (x);
8632 fprintf (stderr, "oldx:\n");
8633 debug_rtx (oldx);
8635 fprintf (stderr, "New address:\n");
8636 debug_rtx (ret);
8638 if (insns)
8640 fprintf (stderr, "Insns added:\n");
8641 debug_rtx_list (insns, 20);
8644 else
8646 fprintf (stderr,
8647 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8648 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8650 debug_rtx (x);
8653 if (insns)
8654 emit_insn (insns);
8656 return ret;
8659 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8660 We need to emit DTP-relative relocations. */
8662 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8663 static void
8664 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8666 switch (size)
8668 case 4:
8669 fputs ("\t.long\t", file);
8670 break;
8671 case 8:
8672 fputs (DOUBLE_INT_ASM_OP, file);
8673 break;
8674 default:
8675 gcc_unreachable ();
8677 output_addr_const (file, x);
8678 if (TARGET_ELF)
8679 fputs ("@dtprel+0x8000", file);
8680 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8682 switch (SYMBOL_REF_TLS_MODEL (x))
8684 case 0:
8685 break;
8686 case TLS_MODEL_LOCAL_EXEC:
8687 fputs ("@le", file);
8688 break;
8689 case TLS_MODEL_INITIAL_EXEC:
8690 fputs ("@ie", file);
8691 break;
8692 case TLS_MODEL_GLOBAL_DYNAMIC:
8693 case TLS_MODEL_LOCAL_DYNAMIC:
8694 fputs ("@m", file);
8695 break;
8696 default:
8697 gcc_unreachable ();
8702 /* Return true if X is a symbol that refers to real (rather than emulated)
8703 TLS. */
8705 static bool
8706 rs6000_real_tls_symbol_ref_p (rtx x)
8708 return (GET_CODE (x) == SYMBOL_REF
8709 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8712 /* In the name of slightly smaller debug output, and to cater to
8713 general assembler lossage, recognize various UNSPEC sequences
8714 and turn them back into a direct symbol reference. */
8716 static rtx
8717 rs6000_delegitimize_address (rtx orig_x)
8719 rtx x, y, offset;
8721 orig_x = delegitimize_mem_from_attrs (orig_x);
8722 x = orig_x;
8723 if (MEM_P (x))
8724 x = XEXP (x, 0);
8726 y = x;
8727 if (TARGET_CMODEL != CMODEL_SMALL
8728 && GET_CODE (y) == LO_SUM)
8729 y = XEXP (y, 1);
8731 offset = NULL_RTX;
8732 if (GET_CODE (y) == PLUS
8733 && GET_MODE (y) == Pmode
8734 && CONST_INT_P (XEXP (y, 1)))
8736 offset = XEXP (y, 1);
8737 y = XEXP (y, 0);
8740 if (GET_CODE (y) == UNSPEC
8741 && XINT (y, 1) == UNSPEC_TOCREL)
8743 y = XVECEXP (y, 0, 0);
8745 #ifdef HAVE_AS_TLS
8746 /* Do not associate thread-local symbols with the original
8747 constant pool symbol. */
8748 if (TARGET_XCOFF
8749 && GET_CODE (y) == SYMBOL_REF
8750 && CONSTANT_POOL_ADDRESS_P (y)
8751 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8752 return orig_x;
8753 #endif
8755 if (offset != NULL_RTX)
8756 y = gen_rtx_PLUS (Pmode, y, offset);
8757 if (!MEM_P (orig_x))
8758 return y;
8759 else
8760 return replace_equiv_address_nv (orig_x, y);
8763 if (TARGET_MACHO
8764 && GET_CODE (orig_x) == LO_SUM
8765 && GET_CODE (XEXP (orig_x, 1)) == CONST)
8767 y = XEXP (XEXP (orig_x, 1), 0);
8768 if (GET_CODE (y) == UNSPEC
8769 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
8770 return XVECEXP (y, 0, 0);
8773 return orig_x;
8776 /* Return true if X shouldn't be emitted into the debug info.
8777 The linker doesn't like .toc section references from
8778 .debug_* sections, so reject .toc section symbols. */
8780 static bool
8781 rs6000_const_not_ok_for_debug_p (rtx x)
8783 if (GET_CODE (x) == UNSPEC)
8784 return true;
8785 if (GET_CODE (x) == SYMBOL_REF
8786 && CONSTANT_POOL_ADDRESS_P (x))
8788 rtx c = get_pool_constant (x);
8789 machine_mode cmode = get_pool_mode (x);
8790 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
8791 return true;
8794 return false;
8798 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
8800 static bool
8801 rs6000_legitimate_combined_insn (rtx_insn *insn)
8803 int icode = INSN_CODE (insn);
8805 /* Reject creating doloop insns. Combine should not be allowed
8806 to create these for a number of reasons:
8807 1) In a nested loop, if combine creates one of these in an
8808 outer loop and the register allocator happens to allocate ctr
8809 to the outer loop insn, then the inner loop can't use ctr.
8810 Inner loops ought to be more highly optimized.
8811 2) Combine often wants to create one of these from what was
8812 originally a three insn sequence, first combining the three
8813 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
8814 allocated ctr, the splitter takes use back to the three insn
8815 sequence. It's better to stop combine at the two insn
8816 sequence.
8817 3) Faced with not being able to allocate ctr for ctrsi/crtdi
8818 insns, the register allocator sometimes uses floating point
8819 or vector registers for the pseudo. Since ctrsi/ctrdi is a
8820 jump insn and output reloads are not implemented for jumps,
8821 the ctrsi/ctrdi splitters need to handle all possible cases.
8822 That's a pain, and it gets to be seriously difficult when a
8823 splitter that runs after reload needs memory to transfer from
8824 a gpr to fpr. See PR70098 and PR71763 which are not fixed
8825 for the difficult case. It's better to not create problems
8826 in the first place. */
8827 if (icode != CODE_FOR_nothing
8828 && (icode == CODE_FOR_bdz_si
8829 || icode == CODE_FOR_bdz_di
8830 || icode == CODE_FOR_bdnz_si
8831 || icode == CODE_FOR_bdnz_di
8832 || icode == CODE_FOR_bdztf_si
8833 || icode == CODE_FOR_bdztf_di
8834 || icode == CODE_FOR_bdnztf_si
8835 || icode == CODE_FOR_bdnztf_di))
8836 return false;
8838 return true;
8841 /* Construct the SYMBOL_REF for the tls_get_addr function. */
8843 static GTY(()) rtx rs6000_tls_symbol;
8844 static rtx
8845 rs6000_tls_get_addr (void)
8847 if (!rs6000_tls_symbol)
8848 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
8850 return rs6000_tls_symbol;
8853 /* Construct the SYMBOL_REF for TLS GOT references. */
8855 static GTY(()) rtx rs6000_got_symbol;
8856 static rtx
8857 rs6000_got_sym (void)
8859 if (!rs6000_got_symbol)
8861 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8862 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
8863 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
8866 return rs6000_got_symbol;
8869 /* AIX Thread-Local Address support. */
8871 static rtx
8872 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
8874 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
8875 const char *name;
8876 char *tlsname;
8878 name = XSTR (addr, 0);
8879 /* Append TLS CSECT qualifier, unless the symbol already is qualified
8880 or the symbol will be in TLS private data section. */
8881 if (name[strlen (name) - 1] != ']'
8882 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
8883 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
8885 tlsname = XALLOCAVEC (char, strlen (name) + 4);
8886 strcpy (tlsname, name);
8887 strcat (tlsname,
8888 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
8889 tlsaddr = copy_rtx (addr);
8890 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
8892 else
8893 tlsaddr = addr;
8895 /* Place addr into TOC constant pool. */
8896 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
8898 /* Output the TOC entry and create the MEM referencing the value. */
8899 if (constant_pool_expr_p (XEXP (sym, 0))
8900 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
8902 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
8903 mem = gen_const_mem (Pmode, tocref);
8904 set_mem_alias_set (mem, get_TOC_alias_set ());
8906 else
8907 return sym;
8909 /* Use global-dynamic for local-dynamic. */
8910 if (model == TLS_MODEL_GLOBAL_DYNAMIC
8911 || model == TLS_MODEL_LOCAL_DYNAMIC)
8913 /* Create new TOC reference for @m symbol. */
8914 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
8915 tlsname = XALLOCAVEC (char, strlen (name) + 1);
8916 strcpy (tlsname, "*LCM");
8917 strcat (tlsname, name + 3);
8918 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
8919 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
8920 tocref = create_TOC_reference (modaddr, NULL_RTX);
8921 rtx modmem = gen_const_mem (Pmode, tocref);
8922 set_mem_alias_set (modmem, get_TOC_alias_set ());
8924 rtx modreg = gen_reg_rtx (Pmode);
8925 emit_insn (gen_rtx_SET (modreg, modmem));
8927 tmpreg = gen_reg_rtx (Pmode);
8928 emit_insn (gen_rtx_SET (tmpreg, mem));
8930 dest = gen_reg_rtx (Pmode);
8931 if (TARGET_32BIT)
8932 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
8933 else
8934 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
8935 return dest;
8937 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
8938 else if (TARGET_32BIT)
8940 tlsreg = gen_reg_rtx (SImode);
8941 emit_insn (gen_tls_get_tpointer (tlsreg));
8943 else
8944 tlsreg = gen_rtx_REG (DImode, 13);
8946 /* Load the TOC value into temporary register. */
8947 tmpreg = gen_reg_rtx (Pmode);
8948 emit_insn (gen_rtx_SET (tmpreg, mem));
8949 set_unique_reg_note (get_last_insn (), REG_EQUAL,
8950 gen_rtx_MINUS (Pmode, addr, tlsreg));
8952 /* Add TOC symbol value to TLS pointer. */
8953 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
8955 return dest;
8958 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
8959 this (thread-local) address. */
8961 static rtx
8962 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
8964 rtx dest, insn;
8966 if (TARGET_XCOFF)
8967 return rs6000_legitimize_tls_address_aix (addr, model);
8969 dest = gen_reg_rtx (Pmode);
8970 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
8972 rtx tlsreg;
8974 if (TARGET_64BIT)
8976 tlsreg = gen_rtx_REG (Pmode, 13);
8977 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
8979 else
8981 tlsreg = gen_rtx_REG (Pmode, 2);
8982 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
8984 emit_insn (insn);
8986 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
8988 rtx tlsreg, tmp;
8990 tmp = gen_reg_rtx (Pmode);
8991 if (TARGET_64BIT)
8993 tlsreg = gen_rtx_REG (Pmode, 13);
8994 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
8996 else
8998 tlsreg = gen_rtx_REG (Pmode, 2);
8999 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9001 emit_insn (insn);
9002 if (TARGET_64BIT)
9003 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9004 else
9005 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9006 emit_insn (insn);
9008 else
9010 rtx r3, got, tga, tmp1, tmp2, call_insn;
9012 /* We currently use relocations like @got@tlsgd for tls, which
9013 means the linker will handle allocation of tls entries, placing
9014 them in the .got section. So use a pointer to the .got section,
9015 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9016 or to secondary GOT sections used by 32-bit -fPIC. */
9017 if (TARGET_64BIT)
9018 got = gen_rtx_REG (Pmode, 2);
9019 else
9021 if (flag_pic == 1)
9022 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9023 else
9025 rtx gsym = rs6000_got_sym ();
9026 got = gen_reg_rtx (Pmode);
9027 if (flag_pic == 0)
9028 rs6000_emit_move (got, gsym, Pmode);
9029 else
9031 rtx mem, lab;
9033 tmp1 = gen_reg_rtx (Pmode);
9034 tmp2 = gen_reg_rtx (Pmode);
9035 mem = gen_const_mem (Pmode, tmp1);
9036 lab = gen_label_rtx ();
9037 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9038 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9039 if (TARGET_LINK_STACK)
9040 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9041 emit_move_insn (tmp2, mem);
9042 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9043 set_unique_reg_note (last, REG_EQUAL, gsym);
9048 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9050 tga = rs6000_tls_get_addr ();
9051 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9052 const0_rtx, Pmode);
9054 r3 = gen_rtx_REG (Pmode, 3);
9055 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9057 if (TARGET_64BIT)
9058 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9059 else
9060 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9062 else if (DEFAULT_ABI == ABI_V4)
9063 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9064 else
9065 gcc_unreachable ();
9066 call_insn = last_call_insn ();
9067 PATTERN (call_insn) = insn;
9068 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9069 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9070 pic_offset_table_rtx);
9072 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9074 tga = rs6000_tls_get_addr ();
9075 tmp1 = gen_reg_rtx (Pmode);
9076 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9077 const0_rtx, Pmode);
9079 r3 = gen_rtx_REG (Pmode, 3);
9080 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9082 if (TARGET_64BIT)
9083 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9084 else
9085 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9087 else if (DEFAULT_ABI == ABI_V4)
9088 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9089 else
9090 gcc_unreachable ();
9091 call_insn = last_call_insn ();
9092 PATTERN (call_insn) = insn;
9093 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9094 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9095 pic_offset_table_rtx);
9097 if (rs6000_tls_size == 16)
9099 if (TARGET_64BIT)
9100 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9101 else
9102 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9104 else if (rs6000_tls_size == 32)
9106 tmp2 = gen_reg_rtx (Pmode);
9107 if (TARGET_64BIT)
9108 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9109 else
9110 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9111 emit_insn (insn);
9112 if (TARGET_64BIT)
9113 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9114 else
9115 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9117 else
9119 tmp2 = gen_reg_rtx (Pmode);
9120 if (TARGET_64BIT)
9121 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9122 else
9123 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9124 emit_insn (insn);
9125 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9127 emit_insn (insn);
9129 else
9131 /* IE, or 64-bit offset LE. */
9132 tmp2 = gen_reg_rtx (Pmode);
9133 if (TARGET_64BIT)
9134 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9135 else
9136 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9137 emit_insn (insn);
9138 if (TARGET_64BIT)
9139 insn = gen_tls_tls_64 (dest, tmp2, addr);
9140 else
9141 insn = gen_tls_tls_32 (dest, tmp2, addr);
9142 emit_insn (insn);
9146 return dest;
9149 /* Only create the global variable for the stack protect guard if we are using
9150 the global flavor of that guard. */
9151 static tree
9152 rs6000_init_stack_protect_guard (void)
9154 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9155 return default_stack_protect_guard ();
9157 return NULL_TREE;
9160 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9162 static bool
9163 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9165 if (GET_CODE (x) == HIGH
9166 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9167 return true;
9169 /* A TLS symbol in the TOC cannot contain a sum. */
9170 if (GET_CODE (x) == CONST
9171 && GET_CODE (XEXP (x, 0)) == PLUS
9172 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9173 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9174 return true;
9176 /* Do not place an ELF TLS symbol in the constant pool. */
9177 return TARGET_ELF && tls_referenced_p (x);
9180 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9181 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9182 can be addressed relative to the toc pointer. */
9184 static bool
9185 use_toc_relative_ref (rtx sym, machine_mode mode)
9187 return ((constant_pool_expr_p (sym)
9188 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9189 get_pool_mode (sym)))
9190 || (TARGET_CMODEL == CMODEL_MEDIUM
9191 && SYMBOL_REF_LOCAL_P (sym)
9192 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9195 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9196 replace the input X, or the original X if no replacement is called for.
9197 The output parameter *WIN is 1 if the calling macro should goto WIN,
9198 0 if it should not.
9200 For RS/6000, we wish to handle large displacements off a base
9201 register by splitting the addend across an addiu/addis and the mem insn.
9202 This cuts number of extra insns needed from 3 to 1.
9204 On Darwin, we use this to generate code for floating point constants.
9205 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9206 The Darwin code is inside #if TARGET_MACHO because only then are the
9207 machopic_* functions defined. */
9208 static rtx
9209 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9210 int opnum, int type,
9211 int ind_levels ATTRIBUTE_UNUSED, int *win)
9213 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9214 bool quad_offset_p = mode_supports_dq_form (mode);
9216 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9217 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9218 if (reg_offset_p
9219 && opnum == 1
9220 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9221 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9222 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9223 && TARGET_P9_VECTOR)
9224 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9225 && TARGET_P9_VECTOR)))
9226 reg_offset_p = false;
9228 /* We must recognize output that we have already generated ourselves. */
9229 if (GET_CODE (x) == PLUS
9230 && GET_CODE (XEXP (x, 0)) == PLUS
9231 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9232 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9233 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9235 if (TARGET_DEBUG_ADDR)
9237 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9238 debug_rtx (x);
9240 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9241 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9242 opnum, (enum reload_type) type);
9243 *win = 1;
9244 return x;
9247 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9248 if (GET_CODE (x) == LO_SUM
9249 && GET_CODE (XEXP (x, 0)) == HIGH)
9251 if (TARGET_DEBUG_ADDR)
9253 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9254 debug_rtx (x);
9256 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9257 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9258 opnum, (enum reload_type) type);
9259 *win = 1;
9260 return x;
9263 #if TARGET_MACHO
9264 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9265 && GET_CODE (x) == LO_SUM
9266 && GET_CODE (XEXP (x, 0)) == PLUS
9267 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9268 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9269 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9270 && machopic_operand_p (XEXP (x, 1)))
9272 /* Result of previous invocation of this function on Darwin
9273 floating point constant. */
9274 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9275 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9276 opnum, (enum reload_type) type);
9277 *win = 1;
9278 return x;
9280 #endif
9282 if (TARGET_CMODEL != CMODEL_SMALL
9283 && reg_offset_p
9284 && !quad_offset_p
9285 && small_toc_ref (x, VOIDmode))
9287 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9288 x = gen_rtx_LO_SUM (Pmode, hi, x);
9289 if (TARGET_DEBUG_ADDR)
9291 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9292 debug_rtx (x);
9294 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9295 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9296 opnum, (enum reload_type) type);
9297 *win = 1;
9298 return x;
9301 if (GET_CODE (x) == PLUS
9302 && REG_P (XEXP (x, 0))
9303 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9304 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9305 && CONST_INT_P (XEXP (x, 1))
9306 && reg_offset_p
9307 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9309 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9310 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9311 HOST_WIDE_INT high
9312 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9314 /* Check for 32-bit overflow or quad addresses with one of the
9315 four least significant bits set. */
9316 if (high + low != val
9317 || (quad_offset_p && (low & 0xf)))
9319 *win = 0;
9320 return x;
9323 /* Reload the high part into a base reg; leave the low part
9324 in the mem directly. */
9326 x = gen_rtx_PLUS (GET_MODE (x),
9327 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9328 GEN_INT (high)),
9329 GEN_INT (low));
9331 if (TARGET_DEBUG_ADDR)
9333 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9334 debug_rtx (x);
9336 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9337 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9338 opnum, (enum reload_type) type);
9339 *win = 1;
9340 return x;
9343 if (GET_CODE (x) == SYMBOL_REF
9344 && reg_offset_p
9345 && !quad_offset_p
9346 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9347 #if TARGET_MACHO
9348 && DEFAULT_ABI == ABI_DARWIN
9349 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9350 && machopic_symbol_defined_p (x)
9351 #else
9352 && DEFAULT_ABI == ABI_V4
9353 && !flag_pic
9354 #endif
9355 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9356 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9357 without fprs.
9358 ??? Assume floating point reg based on mode? This assumption is
9359 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9360 where reload ends up doing a DFmode load of a constant from
9361 mem using two gprs. Unfortunately, at this point reload
9362 hasn't yet selected regs so poking around in reload data
9363 won't help and even if we could figure out the regs reliably,
9364 we'd still want to allow this transformation when the mem is
9365 naturally aligned. Since we say the address is good here, we
9366 can't disable offsets from LO_SUMs in mem_operand_gpr.
9367 FIXME: Allow offset from lo_sum for other modes too, when
9368 mem is sufficiently aligned.
9370 Also disallow this if the type can go in VMX/Altivec registers, since
9371 those registers do not have d-form (reg+offset) address modes. */
9372 && !reg_addr[mode].scalar_in_vmx_p
9373 && mode != TFmode
9374 && mode != TDmode
9375 && mode != IFmode
9376 && mode != KFmode
9377 && (mode != TImode || !TARGET_VSX)
9378 && mode != PTImode
9379 && (mode != DImode || TARGET_POWERPC64)
9380 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9381 || TARGET_HARD_FLOAT))
9383 #if TARGET_MACHO
9384 if (flag_pic)
9386 rtx offset = machopic_gen_offset (x);
9387 x = gen_rtx_LO_SUM (GET_MODE (x),
9388 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9389 gen_rtx_HIGH (Pmode, offset)), offset);
9391 else
9392 #endif
9393 x = gen_rtx_LO_SUM (GET_MODE (x),
9394 gen_rtx_HIGH (Pmode, x), x);
9396 if (TARGET_DEBUG_ADDR)
9398 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9399 debug_rtx (x);
9401 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9402 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9403 opnum, (enum reload_type) type);
9404 *win = 1;
9405 return x;
9408 /* Reload an offset address wrapped by an AND that represents the
9409 masking of the lower bits. Strip the outer AND and let reload
9410 convert the offset address into an indirect address. For VSX,
9411 force reload to create the address with an AND in a separate
9412 register, because we can't guarantee an altivec register will
9413 be used. */
9414 if (VECTOR_MEM_ALTIVEC_P (mode)
9415 && GET_CODE (x) == AND
9416 && GET_CODE (XEXP (x, 0)) == PLUS
9417 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9418 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9419 && GET_CODE (XEXP (x, 1)) == CONST_INT
9420 && INTVAL (XEXP (x, 1)) == -16)
9422 x = XEXP (x, 0);
9423 *win = 1;
9424 return x;
9427 if (TARGET_TOC
9428 && reg_offset_p
9429 && !quad_offset_p
9430 && GET_CODE (x) == SYMBOL_REF
9431 && use_toc_relative_ref (x, mode))
9433 x = create_TOC_reference (x, NULL_RTX);
9434 if (TARGET_CMODEL != CMODEL_SMALL)
9436 if (TARGET_DEBUG_ADDR)
9438 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9439 debug_rtx (x);
9441 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9442 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9443 opnum, (enum reload_type) type);
9445 *win = 1;
9446 return x;
9448 *win = 0;
9449 return x;
9452 /* Debug version of rs6000_legitimize_reload_address. */
9453 static rtx
9454 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9455 int opnum, int type,
9456 int ind_levels, int *win)
9458 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9459 ind_levels, win);
9460 fprintf (stderr,
9461 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9462 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9463 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9464 debug_rtx (x);
9466 if (x == ret)
9467 fprintf (stderr, "Same address returned\n");
9468 else if (!ret)
9469 fprintf (stderr, "NULL returned\n");
9470 else
9472 fprintf (stderr, "New address:\n");
9473 debug_rtx (ret);
9476 return ret;
9479 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9480 that is a valid memory address for an instruction.
9481 The MODE argument is the machine mode for the MEM expression
9482 that wants to use this address.
9484 On the RS/6000, there are four valid address: a SYMBOL_REF that
9485 refers to a constant pool entry of an address (or the sum of it
9486 plus a constant), a short (16-bit signed) constant plus a register,
9487 the sum of two registers, or a register indirect, possibly with an
9488 auto-increment. For DFmode, DDmode and DImode with a constant plus
9489 register, we must ensure that both words are addressable or PowerPC64
9490 with offset word aligned.
9492 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9493 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9494 because adjacent memory cells are accessed by adding word-sized offsets
9495 during assembly output. */
9496 static bool
9497 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9499 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9500 bool quad_offset_p = mode_supports_dq_form (mode);
9502 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9503 if (VECTOR_MEM_ALTIVEC_P (mode)
9504 && GET_CODE (x) == AND
9505 && GET_CODE (XEXP (x, 1)) == CONST_INT
9506 && INTVAL (XEXP (x, 1)) == -16)
9507 x = XEXP (x, 0);
9509 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9510 return 0;
9511 if (legitimate_indirect_address_p (x, reg_ok_strict))
9512 return 1;
9513 if (TARGET_UPDATE
9514 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9515 && mode_supports_pre_incdec_p (mode)
9516 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9517 return 1;
9518 /* Handle restricted vector d-form offsets in ISA 3.0. */
9519 if (quad_offset_p)
9521 if (quad_address_p (x, mode, reg_ok_strict))
9522 return 1;
9524 else if (virtual_stack_registers_memory_p (x))
9525 return 1;
9527 else if (reg_offset_p)
9529 if (legitimate_small_data_p (mode, x))
9530 return 1;
9531 if (legitimate_constant_pool_address_p (x, mode,
9532 reg_ok_strict || lra_in_progress))
9533 return 1;
9534 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9535 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9536 return 1;
9539 /* For TImode, if we have TImode in VSX registers, only allow register
9540 indirect addresses. This will allow the values to go in either GPRs
9541 or VSX registers without reloading. The vector types would tend to
9542 go into VSX registers, so we allow REG+REG, while TImode seems
9543 somewhat split, in that some uses are GPR based, and some VSX based. */
9544 /* FIXME: We could loosen this by changing the following to
9545 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9546 but currently we cannot allow REG+REG addressing for TImode. See
9547 PR72827 for complete details on how this ends up hoodwinking DSE. */
9548 if (mode == TImode && TARGET_VSX)
9549 return 0;
9550 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9551 if (! reg_ok_strict
9552 && reg_offset_p
9553 && GET_CODE (x) == PLUS
9554 && GET_CODE (XEXP (x, 0)) == REG
9555 && (XEXP (x, 0) == virtual_stack_vars_rtx
9556 || XEXP (x, 0) == arg_pointer_rtx)
9557 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9558 return 1;
9559 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9560 return 1;
9561 if (!FLOAT128_2REG_P (mode)
9562 && (TARGET_HARD_FLOAT
9563 || TARGET_POWERPC64
9564 || (mode != DFmode && mode != DDmode))
9565 && (TARGET_POWERPC64 || mode != DImode)
9566 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9567 && mode != PTImode
9568 && !avoiding_indexed_address_p (mode)
9569 && legitimate_indexed_address_p (x, reg_ok_strict))
9570 return 1;
9571 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9572 && mode_supports_pre_modify_p (mode)
9573 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9574 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9575 reg_ok_strict, false)
9576 || (!avoiding_indexed_address_p (mode)
9577 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9578 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9579 return 1;
9580 if (reg_offset_p && !quad_offset_p
9581 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9582 return 1;
9583 return 0;
9586 /* Debug version of rs6000_legitimate_address_p. */
9587 static bool
9588 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9589 bool reg_ok_strict)
9591 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9592 fprintf (stderr,
9593 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9594 "strict = %d, reload = %s, code = %s\n",
9595 ret ? "true" : "false",
9596 GET_MODE_NAME (mode),
9597 reg_ok_strict,
9598 (reload_completed ? "after" : "before"),
9599 GET_RTX_NAME (GET_CODE (x)));
9600 debug_rtx (x);
9602 return ret;
9605 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9607 static bool
9608 rs6000_mode_dependent_address_p (const_rtx addr,
9609 addr_space_t as ATTRIBUTE_UNUSED)
9611 return rs6000_mode_dependent_address_ptr (addr);
9614 /* Go to LABEL if ADDR (a legitimate address expression)
9615 has an effect that depends on the machine mode it is used for.
9617 On the RS/6000 this is true of all integral offsets (since AltiVec
9618 and VSX modes don't allow them) or is a pre-increment or decrement.
9620 ??? Except that due to conceptual problems in offsettable_address_p
9621 we can't really report the problems of integral offsets. So leave
9622 this assuming that the adjustable offset must be valid for the
9623 sub-words of a TFmode operand, which is what we had before. */
9625 static bool
9626 rs6000_mode_dependent_address (const_rtx addr)
9628 switch (GET_CODE (addr))
9630 case PLUS:
9631 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9632 is considered a legitimate address before reload, so there
9633 are no offset restrictions in that case. Note that this
9634 condition is safe in strict mode because any address involving
9635 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9636 been rejected as illegitimate. */
9637 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9638 && XEXP (addr, 0) != arg_pointer_rtx
9639 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9641 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9642 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9644 break;
9646 case LO_SUM:
9647 /* Anything in the constant pool is sufficiently aligned that
9648 all bytes have the same high part address. */
9649 return !legitimate_constant_pool_address_p (addr, QImode, false);
9651 /* Auto-increment cases are now treated generically in recog.c. */
9652 case PRE_MODIFY:
9653 return TARGET_UPDATE;
9655 /* AND is only allowed in Altivec loads. */
9656 case AND:
9657 return true;
9659 default:
9660 break;
9663 return false;
9666 /* Debug version of rs6000_mode_dependent_address. */
9667 static bool
9668 rs6000_debug_mode_dependent_address (const_rtx addr)
9670 bool ret = rs6000_mode_dependent_address (addr);
9672 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9673 ret ? "true" : "false");
9674 debug_rtx (addr);
9676 return ret;
9679 /* Implement FIND_BASE_TERM. */
9682 rs6000_find_base_term (rtx op)
9684 rtx base;
9686 base = op;
9687 if (GET_CODE (base) == CONST)
9688 base = XEXP (base, 0);
9689 if (GET_CODE (base) == PLUS)
9690 base = XEXP (base, 0);
9691 if (GET_CODE (base) == UNSPEC)
9692 switch (XINT (base, 1))
9694 case UNSPEC_TOCREL:
9695 case UNSPEC_MACHOPIC_OFFSET:
9696 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9697 for aliasing purposes. */
9698 return XVECEXP (base, 0, 0);
9701 return op;
9704 /* More elaborate version of recog's offsettable_memref_p predicate
9705 that works around the ??? note of rs6000_mode_dependent_address.
9706 In particular it accepts
9708 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9710 in 32-bit mode, that the recog predicate rejects. */
9712 static bool
9713 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
9715 bool worst_case;
9717 if (!MEM_P (op))
9718 return false;
9720 /* First mimic offsettable_memref_p. */
9721 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
9722 return true;
9724 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9725 the latter predicate knows nothing about the mode of the memory
9726 reference and, therefore, assumes that it is the largest supported
9727 mode (TFmode). As a consequence, legitimate offsettable memory
9728 references are rejected. rs6000_legitimate_offset_address_p contains
9729 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9730 at least with a little bit of help here given that we know the
9731 actual registers used. */
9732 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9733 || GET_MODE_SIZE (reg_mode) == 4);
9734 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9735 strict, worst_case);
9738 /* Determine the reassociation width to be used in reassociate_bb.
9739 This takes into account how many parallel operations we
9740 can actually do of a given type, and also the latency.
9742 int add/sub 6/cycle
9743 mul 2/cycle
9744 vect add/sub/mul 2/cycle
9745 fp add/sub/mul 2/cycle
9746 dfp 1/cycle
9749 static int
9750 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9751 machine_mode mode)
9753 switch (rs6000_tune)
9755 case PROCESSOR_POWER8:
9756 case PROCESSOR_POWER9:
9757 if (DECIMAL_FLOAT_MODE_P (mode))
9758 return 1;
9759 if (VECTOR_MODE_P (mode))
9760 return 4;
9761 if (INTEGRAL_MODE_P (mode))
9762 return 1;
9763 if (FLOAT_MODE_P (mode))
9764 return 4;
9765 break;
9766 default:
9767 break;
9769 return 1;
9772 /* Change register usage conditional on target flags. */
9773 static void
9774 rs6000_conditional_register_usage (void)
9776 int i;
9778 if (TARGET_DEBUG_TARGET)
9779 fprintf (stderr, "rs6000_conditional_register_usage called\n");
9781 /* Set MQ register fixed (already call_used) so that it will not be
9782 allocated. */
9783 fixed_regs[64] = 1;
9785 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
9786 if (TARGET_64BIT)
9787 fixed_regs[13] = call_used_regs[13]
9788 = call_really_used_regs[13] = 1;
9790 /* Conditionally disable FPRs. */
9791 if (TARGET_SOFT_FLOAT)
9792 for (i = 32; i < 64; i++)
9793 fixed_regs[i] = call_used_regs[i]
9794 = call_really_used_regs[i] = 1;
9796 /* The TOC register is not killed across calls in a way that is
9797 visible to the compiler. */
9798 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9799 call_really_used_regs[2] = 0;
9801 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
9802 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9804 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
9805 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9806 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9807 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9809 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
9810 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9811 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9812 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9814 if (TARGET_TOC && TARGET_MINIMAL_TOC)
9815 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
9816 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
9818 if (!TARGET_ALTIVEC && !TARGET_VSX)
9820 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
9821 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9822 call_really_used_regs[VRSAVE_REGNO] = 1;
9825 if (TARGET_ALTIVEC || TARGET_VSX)
9826 global_regs[VSCR_REGNO] = 1;
9828 if (TARGET_ALTIVEC_ABI)
9830 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
9831 call_used_regs[i] = call_really_used_regs[i] = 1;
9833 /* AIX reserves VR20:31 in non-extended ABI mode. */
9834 if (TARGET_XCOFF)
9835 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
9836 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
9841 /* Output insns to set DEST equal to the constant SOURCE as a series of
9842 lis, ori and shl instructions and return TRUE. */
9844 bool
9845 rs6000_emit_set_const (rtx dest, rtx source)
9847 machine_mode mode = GET_MODE (dest);
9848 rtx temp, set;
9849 rtx_insn *insn;
9850 HOST_WIDE_INT c;
9852 gcc_checking_assert (CONST_INT_P (source));
9853 c = INTVAL (source);
9854 switch (mode)
9856 case E_QImode:
9857 case E_HImode:
9858 emit_insn (gen_rtx_SET (dest, source));
9859 return true;
9861 case E_SImode:
9862 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
9864 emit_insn (gen_rtx_SET (copy_rtx (temp),
9865 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
9866 emit_insn (gen_rtx_SET (dest,
9867 gen_rtx_IOR (SImode, copy_rtx (temp),
9868 GEN_INT (c & 0xffff))));
9869 break;
9871 case E_DImode:
9872 if (!TARGET_POWERPC64)
9874 rtx hi, lo;
9876 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
9877 DImode);
9878 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
9879 DImode);
9880 emit_move_insn (hi, GEN_INT (c >> 32));
9881 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
9882 emit_move_insn (lo, GEN_INT (c));
9884 else
9885 rs6000_emit_set_long_const (dest, c);
9886 break;
9888 default:
9889 gcc_unreachable ();
9892 insn = get_last_insn ();
9893 set = single_set (insn);
9894 if (! CONSTANT_P (SET_SRC (set)))
9895 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
9897 return true;
9900 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
9901 Output insns to set DEST equal to the constant C as a series of
9902 lis, ori and shl instructions. */
9904 static void
9905 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
9907 rtx temp;
9908 HOST_WIDE_INT ud1, ud2, ud3, ud4;
9910 ud1 = c & 0xffff;
9911 c = c >> 16;
9912 ud2 = c & 0xffff;
9913 c = c >> 16;
9914 ud3 = c & 0xffff;
9915 c = c >> 16;
9916 ud4 = c & 0xffff;
9918 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
9919 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
9920 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
9922 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
9923 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
9925 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9927 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9928 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9929 if (ud1 != 0)
9930 emit_move_insn (dest,
9931 gen_rtx_IOR (DImode, copy_rtx (temp),
9932 GEN_INT (ud1)));
9934 else if (ud3 == 0 && ud4 == 0)
9936 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9938 gcc_assert (ud2 & 0x8000);
9939 emit_move_insn (copy_rtx (temp),
9940 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
9941 if (ud1 != 0)
9942 emit_move_insn (copy_rtx (temp),
9943 gen_rtx_IOR (DImode, copy_rtx (temp),
9944 GEN_INT (ud1)));
9945 emit_move_insn (dest,
9946 gen_rtx_ZERO_EXTEND (DImode,
9947 gen_lowpart (SImode,
9948 copy_rtx (temp))));
9950 else if ((ud4 == 0xffff && (ud3 & 0x8000))
9951 || (ud4 == 0 && ! (ud3 & 0x8000)))
9953 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9955 emit_move_insn (copy_rtx (temp),
9956 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
9957 if (ud2 != 0)
9958 emit_move_insn (copy_rtx (temp),
9959 gen_rtx_IOR (DImode, copy_rtx (temp),
9960 GEN_INT (ud2)));
9961 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9962 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9963 GEN_INT (16)));
9964 if (ud1 != 0)
9965 emit_move_insn (dest,
9966 gen_rtx_IOR (DImode, copy_rtx (temp),
9967 GEN_INT (ud1)));
9969 else
9971 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
9973 emit_move_insn (copy_rtx (temp),
9974 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
9975 if (ud3 != 0)
9976 emit_move_insn (copy_rtx (temp),
9977 gen_rtx_IOR (DImode, copy_rtx (temp),
9978 GEN_INT (ud3)));
9980 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
9981 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
9982 GEN_INT (32)));
9983 if (ud2 != 0)
9984 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
9985 gen_rtx_IOR (DImode, copy_rtx (temp),
9986 GEN_INT (ud2 << 16)));
9987 if (ud1 != 0)
9988 emit_move_insn (dest,
9989 gen_rtx_IOR (DImode, copy_rtx (temp),
9990 GEN_INT (ud1)));
9994 /* Helper for the following. Get rid of [r+r] memory refs
9995 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
9997 static void
9998 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10000 if (GET_CODE (operands[0]) == MEM
10001 && GET_CODE (XEXP (operands[0], 0)) != REG
10002 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10003 GET_MODE (operands[0]), false))
10004 operands[0]
10005 = replace_equiv_address (operands[0],
10006 copy_addr_to_reg (XEXP (operands[0], 0)));
10008 if (GET_CODE (operands[1]) == MEM
10009 && GET_CODE (XEXP (operands[1], 0)) != REG
10010 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10011 GET_MODE (operands[1]), false))
10012 operands[1]
10013 = replace_equiv_address (operands[1],
10014 copy_addr_to_reg (XEXP (operands[1], 0)));
10017 /* Generate a vector of constants to permute MODE for a little-endian
10018 storage operation by swapping the two halves of a vector. */
10019 static rtvec
10020 rs6000_const_vec (machine_mode mode)
10022 int i, subparts;
10023 rtvec v;
10025 switch (mode)
10027 case E_V1TImode:
10028 subparts = 1;
10029 break;
10030 case E_V2DFmode:
10031 case E_V2DImode:
10032 subparts = 2;
10033 break;
10034 case E_V4SFmode:
10035 case E_V4SImode:
10036 subparts = 4;
10037 break;
10038 case E_V8HImode:
10039 subparts = 8;
10040 break;
10041 case E_V16QImode:
10042 subparts = 16;
10043 break;
10044 default:
10045 gcc_unreachable();
10048 v = rtvec_alloc (subparts);
10050 for (i = 0; i < subparts / 2; ++i)
10051 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10052 for (i = subparts / 2; i < subparts; ++i)
10053 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10055 return v;
10058 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10059 store operation. */
10060 void
10061 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10063 /* Scalar permutations are easier to express in integer modes rather than
10064 floating-point modes, so cast them here. We use V1TImode instead
10065 of TImode to ensure that the values don't go through GPRs. */
10066 if (FLOAT128_VECTOR_P (mode))
10068 dest = gen_lowpart (V1TImode, dest);
10069 source = gen_lowpart (V1TImode, source);
10070 mode = V1TImode;
10073 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10074 scalar. */
10075 if (mode == TImode || mode == V1TImode)
10076 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10077 GEN_INT (64))));
10078 else
10080 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10081 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10085 /* Emit a little-endian load from vector memory location SOURCE to VSX
10086 register DEST in mode MODE. The load is done with two permuting
10087 insn's that represent an lxvd2x and xxpermdi. */
10088 void
10089 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10091 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10092 V1TImode). */
10093 if (mode == TImode || mode == V1TImode)
10095 mode = V2DImode;
10096 dest = gen_lowpart (V2DImode, dest);
10097 source = adjust_address (source, V2DImode, 0);
10100 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10101 rs6000_emit_le_vsx_permute (tmp, source, mode);
10102 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10105 /* Emit a little-endian store to vector memory location DEST from VSX
10106 register SOURCE in mode MODE. The store is done with two permuting
10107 insn's that represent an xxpermdi and an stxvd2x. */
10108 void
10109 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10111 /* This should never be called during or after LRA, because it does
10112 not re-permute the source register. It is intended only for use
10113 during expand. */
10114 gcc_assert (!lra_in_progress && !reload_completed);
10116 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10117 V1TImode). */
10118 if (mode == TImode || mode == V1TImode)
10120 mode = V2DImode;
10121 dest = adjust_address (dest, V2DImode, 0);
10122 source = gen_lowpart (V2DImode, source);
10125 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10126 rs6000_emit_le_vsx_permute (tmp, source, mode);
10127 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10130 /* Emit a sequence representing a little-endian VSX load or store,
10131 moving data from SOURCE to DEST in mode MODE. This is done
10132 separately from rs6000_emit_move to ensure it is called only
10133 during expand. LE VSX loads and stores introduced later are
10134 handled with a split. The expand-time RTL generation allows
10135 us to optimize away redundant pairs of register-permutes. */
10136 void
10137 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10139 gcc_assert (!BYTES_BIG_ENDIAN
10140 && VECTOR_MEM_VSX_P (mode)
10141 && !TARGET_P9_VECTOR
10142 && !gpr_or_gpr_p (dest, source)
10143 && (MEM_P (source) ^ MEM_P (dest)));
10145 if (MEM_P (source))
10147 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10148 rs6000_emit_le_vsx_load (dest, source, mode);
10150 else
10152 if (!REG_P (source))
10153 source = force_reg (mode, source);
10154 rs6000_emit_le_vsx_store (dest, source, mode);
10158 /* Return whether a SFmode or SImode move can be done without converting one
10159 mode to another. This arrises when we have:
10161 (SUBREG:SF (REG:SI ...))
10162 (SUBREG:SI (REG:SF ...))
10164 and one of the values is in a floating point/vector register, where SFmode
10165 scalars are stored in DFmode format. */
10167 bool
10168 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10170 if (TARGET_ALLOW_SF_SUBREG)
10171 return true;
10173 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10174 return true;
10176 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10177 return true;
10179 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10180 if (SUBREG_P (dest))
10182 rtx dest_subreg = SUBREG_REG (dest);
10183 rtx src_subreg = SUBREG_REG (src);
10184 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10187 return false;
10191 /* Helper function to change moves with:
10193 (SUBREG:SF (REG:SI)) and
10194 (SUBREG:SI (REG:SF))
10196 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10197 values are stored as DFmode values in the VSX registers. We need to convert
10198 the bits before we can use a direct move or operate on the bits in the
10199 vector register as an integer type.
10201 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10203 static bool
10204 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10206 if (TARGET_DIRECT_MOVE_64BIT && !lra_in_progress && !reload_completed
10207 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10208 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10210 rtx inner_source = SUBREG_REG (source);
10211 machine_mode inner_mode = GET_MODE (inner_source);
10213 if (mode == SImode && inner_mode == SFmode)
10215 emit_insn (gen_movsi_from_sf (dest, inner_source));
10216 return true;
10219 if (mode == SFmode && inner_mode == SImode)
10221 emit_insn (gen_movsf_from_si (dest, inner_source));
10222 return true;
10226 return false;
10229 /* Emit a move from SOURCE to DEST in mode MODE. */
10230 void
10231 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10233 rtx operands[2];
10234 operands[0] = dest;
10235 operands[1] = source;
10237 if (TARGET_DEBUG_ADDR)
10239 fprintf (stderr,
10240 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10241 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10242 GET_MODE_NAME (mode),
10243 lra_in_progress,
10244 reload_completed,
10245 can_create_pseudo_p ());
10246 debug_rtx (dest);
10247 fprintf (stderr, "source:\n");
10248 debug_rtx (source);
10251 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10252 if (CONST_WIDE_INT_P (operands[1])
10253 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10255 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10256 gcc_unreachable ();
10259 #ifdef HAVE_AS_GNU_ATTRIBUTE
10260 /* If we use a long double type, set the flags in .gnu_attribute that say
10261 what the long double type is. This is to allow the linker's warning
10262 message for the wrong long double to be useful, even if the function does
10263 not do a call (for example, doing a 128-bit add on power9 if the long
10264 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10265 used if they aren't the default long dobule type. */
10266 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10268 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10269 rs6000_passes_float = rs6000_passes_long_double = true;
10271 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10272 rs6000_passes_float = rs6000_passes_long_double = true;
10274 #endif
10276 /* See if we need to special case SImode/SFmode SUBREG moves. */
10277 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10278 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10279 return;
10281 /* Check if GCC is setting up a block move that will end up using FP
10282 registers as temporaries. We must make sure this is acceptable. */
10283 if (GET_CODE (operands[0]) == MEM
10284 && GET_CODE (operands[1]) == MEM
10285 && mode == DImode
10286 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10287 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10288 && ! (rs6000_slow_unaligned_access (SImode,
10289 (MEM_ALIGN (operands[0]) > 32
10290 ? 32 : MEM_ALIGN (operands[0])))
10291 || rs6000_slow_unaligned_access (SImode,
10292 (MEM_ALIGN (operands[1]) > 32
10293 ? 32 : MEM_ALIGN (operands[1]))))
10294 && ! MEM_VOLATILE_P (operands [0])
10295 && ! MEM_VOLATILE_P (operands [1]))
10297 emit_move_insn (adjust_address (operands[0], SImode, 0),
10298 adjust_address (operands[1], SImode, 0));
10299 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10300 adjust_address (copy_rtx (operands[1]), SImode, 4));
10301 return;
10304 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10305 && !gpc_reg_operand (operands[1], mode))
10306 operands[1] = force_reg (mode, operands[1]);
10308 /* Recognize the case where operand[1] is a reference to thread-local
10309 data and load its address to a register. */
10310 if (tls_referenced_p (operands[1]))
10312 enum tls_model model;
10313 rtx tmp = operands[1];
10314 rtx addend = NULL;
10316 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10318 addend = XEXP (XEXP (tmp, 0), 1);
10319 tmp = XEXP (XEXP (tmp, 0), 0);
10322 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10323 model = SYMBOL_REF_TLS_MODEL (tmp);
10324 gcc_assert (model != 0);
10326 tmp = rs6000_legitimize_tls_address (tmp, model);
10327 if (addend)
10329 tmp = gen_rtx_PLUS (mode, tmp, addend);
10330 tmp = force_operand (tmp, operands[0]);
10332 operands[1] = tmp;
10335 /* 128-bit constant floating-point values on Darwin should really be loaded
10336 as two parts. However, this premature splitting is a problem when DFmode
10337 values can go into Altivec registers. */
10338 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10339 && GET_CODE (operands[1]) == CONST_DOUBLE)
10341 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10342 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10343 DFmode);
10344 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10345 GET_MODE_SIZE (DFmode)),
10346 simplify_gen_subreg (DFmode, operands[1], mode,
10347 GET_MODE_SIZE (DFmode)),
10348 DFmode);
10349 return;
10352 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10353 p1:SD) if p1 is not of floating point class and p0 is spilled as
10354 we can have no analogous movsd_store for this. */
10355 if (lra_in_progress && mode == DDmode
10356 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10357 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10358 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10359 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10361 enum reg_class cl;
10362 int regno = REGNO (SUBREG_REG (operands[1]));
10364 if (regno >= FIRST_PSEUDO_REGISTER)
10366 cl = reg_preferred_class (regno);
10367 regno = reg_renumber[regno];
10368 if (regno < 0)
10369 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10371 if (regno >= 0 && ! FP_REGNO_P (regno))
10373 mode = SDmode;
10374 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10375 operands[1] = SUBREG_REG (operands[1]);
10378 if (lra_in_progress
10379 && mode == SDmode
10380 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10381 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10382 && (REG_P (operands[1])
10383 || (GET_CODE (operands[1]) == SUBREG
10384 && REG_P (SUBREG_REG (operands[1])))))
10386 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10387 ? SUBREG_REG (operands[1]) : operands[1]);
10388 enum reg_class cl;
10390 if (regno >= FIRST_PSEUDO_REGISTER)
10392 cl = reg_preferred_class (regno);
10393 gcc_assert (cl != NO_REGS);
10394 regno = reg_renumber[regno];
10395 if (regno < 0)
10396 regno = ira_class_hard_regs[cl][0];
10398 if (FP_REGNO_P (regno))
10400 if (GET_MODE (operands[0]) != DDmode)
10401 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10402 emit_insn (gen_movsd_store (operands[0], operands[1]));
10404 else if (INT_REGNO_P (regno))
10405 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10406 else
10407 gcc_unreachable();
10408 return;
10410 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10411 p:DD)) if p0 is not of floating point class and p1 is spilled as
10412 we can have no analogous movsd_load for this. */
10413 if (lra_in_progress && mode == DDmode
10414 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10415 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10416 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10417 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10419 enum reg_class cl;
10420 int regno = REGNO (SUBREG_REG (operands[0]));
10422 if (regno >= FIRST_PSEUDO_REGISTER)
10424 cl = reg_preferred_class (regno);
10425 regno = reg_renumber[regno];
10426 if (regno < 0)
10427 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10429 if (regno >= 0 && ! FP_REGNO_P (regno))
10431 mode = SDmode;
10432 operands[0] = SUBREG_REG (operands[0]);
10433 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10436 if (lra_in_progress
10437 && mode == SDmode
10438 && (REG_P (operands[0])
10439 || (GET_CODE (operands[0]) == SUBREG
10440 && REG_P (SUBREG_REG (operands[0]))))
10441 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10442 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10444 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10445 ? SUBREG_REG (operands[0]) : operands[0]);
10446 enum reg_class cl;
10448 if (regno >= FIRST_PSEUDO_REGISTER)
10450 cl = reg_preferred_class (regno);
10451 gcc_assert (cl != NO_REGS);
10452 regno = reg_renumber[regno];
10453 if (regno < 0)
10454 regno = ira_class_hard_regs[cl][0];
10456 if (FP_REGNO_P (regno))
10458 if (GET_MODE (operands[1]) != DDmode)
10459 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10460 emit_insn (gen_movsd_load (operands[0], operands[1]));
10462 else if (INT_REGNO_P (regno))
10463 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10464 else
10465 gcc_unreachable();
10466 return;
10469 /* FIXME: In the long term, this switch statement should go away
10470 and be replaced by a sequence of tests based on things like
10471 mode == Pmode. */
10472 switch (mode)
10474 case E_HImode:
10475 case E_QImode:
10476 if (CONSTANT_P (operands[1])
10477 && GET_CODE (operands[1]) != CONST_INT)
10478 operands[1] = force_const_mem (mode, operands[1]);
10479 break;
10481 case E_TFmode:
10482 case E_TDmode:
10483 case E_IFmode:
10484 case E_KFmode:
10485 if (FLOAT128_2REG_P (mode))
10486 rs6000_eliminate_indexed_memrefs (operands);
10487 /* fall through */
10489 case E_DFmode:
10490 case E_DDmode:
10491 case E_SFmode:
10492 case E_SDmode:
10493 if (CONSTANT_P (operands[1])
10494 && ! easy_fp_constant (operands[1], mode))
10495 operands[1] = force_const_mem (mode, operands[1]);
10496 break;
10498 case E_V16QImode:
10499 case E_V8HImode:
10500 case E_V4SFmode:
10501 case E_V4SImode:
10502 case E_V2DFmode:
10503 case E_V2DImode:
10504 case E_V1TImode:
10505 if (CONSTANT_P (operands[1])
10506 && !easy_vector_constant (operands[1], mode))
10507 operands[1] = force_const_mem (mode, operands[1]);
10508 break;
10510 case E_SImode:
10511 case E_DImode:
10512 /* Use default pattern for address of ELF small data */
10513 if (TARGET_ELF
10514 && mode == Pmode
10515 && DEFAULT_ABI == ABI_V4
10516 && (GET_CODE (operands[1]) == SYMBOL_REF
10517 || GET_CODE (operands[1]) == CONST)
10518 && small_data_operand (operands[1], mode))
10520 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10521 return;
10524 if (DEFAULT_ABI == ABI_V4
10525 && mode == Pmode && mode == SImode
10526 && flag_pic == 1 && got_operand (operands[1], mode))
10528 emit_insn (gen_movsi_got (operands[0], operands[1]));
10529 return;
10532 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10533 && TARGET_NO_TOC
10534 && ! flag_pic
10535 && mode == Pmode
10536 && CONSTANT_P (operands[1])
10537 && GET_CODE (operands[1]) != HIGH
10538 && GET_CODE (operands[1]) != CONST_INT)
10540 rtx target = (!can_create_pseudo_p ()
10541 ? operands[0]
10542 : gen_reg_rtx (mode));
10544 /* If this is a function address on -mcall-aixdesc,
10545 convert it to the address of the descriptor. */
10546 if (DEFAULT_ABI == ABI_AIX
10547 && GET_CODE (operands[1]) == SYMBOL_REF
10548 && XSTR (operands[1], 0)[0] == '.')
10550 const char *name = XSTR (operands[1], 0);
10551 rtx new_ref;
10552 while (*name == '.')
10553 name++;
10554 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10555 CONSTANT_POOL_ADDRESS_P (new_ref)
10556 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10557 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10558 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10559 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10560 operands[1] = new_ref;
10563 if (DEFAULT_ABI == ABI_DARWIN)
10565 #if TARGET_MACHO
10566 if (MACHO_DYNAMIC_NO_PIC_P)
10568 /* Take care of any required data indirection. */
10569 operands[1] = rs6000_machopic_legitimize_pic_address (
10570 operands[1], mode, operands[0]);
10571 if (operands[0] != operands[1])
10572 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10573 return;
10575 #endif
10576 emit_insn (gen_macho_high (target, operands[1]));
10577 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10578 return;
10581 emit_insn (gen_elf_high (target, operands[1]));
10582 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10583 return;
10586 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10587 and we have put it in the TOC, we just need to make a TOC-relative
10588 reference to it. */
10589 if (TARGET_TOC
10590 && GET_CODE (operands[1]) == SYMBOL_REF
10591 && use_toc_relative_ref (operands[1], mode))
10592 operands[1] = create_TOC_reference (operands[1], operands[0]);
10593 else if (mode == Pmode
10594 && CONSTANT_P (operands[1])
10595 && GET_CODE (operands[1]) != HIGH
10596 && ((GET_CODE (operands[1]) != CONST_INT
10597 && ! easy_fp_constant (operands[1], mode))
10598 || (GET_CODE (operands[1]) == CONST_INT
10599 && (num_insns_constant (operands[1], mode)
10600 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10601 || (GET_CODE (operands[0]) == REG
10602 && FP_REGNO_P (REGNO (operands[0]))))
10603 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10604 && (TARGET_CMODEL == CMODEL_SMALL
10605 || can_create_pseudo_p ()
10606 || (REG_P (operands[0])
10607 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10610 #if TARGET_MACHO
10611 /* Darwin uses a special PIC legitimizer. */
10612 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10614 operands[1] =
10615 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10616 operands[0]);
10617 if (operands[0] != operands[1])
10618 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10619 return;
10621 #endif
10623 /* If we are to limit the number of things we put in the TOC and
10624 this is a symbol plus a constant we can add in one insn,
10625 just put the symbol in the TOC and add the constant. */
10626 if (GET_CODE (operands[1]) == CONST
10627 && TARGET_NO_SUM_IN_TOC
10628 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10629 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10630 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10631 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10632 && ! side_effects_p (operands[0]))
10634 rtx sym =
10635 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10636 rtx other = XEXP (XEXP (operands[1], 0), 1);
10638 sym = force_reg (mode, sym);
10639 emit_insn (gen_add3_insn (operands[0], sym, other));
10640 return;
10643 operands[1] = force_const_mem (mode, operands[1]);
10645 if (TARGET_TOC
10646 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10647 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10649 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10650 operands[0]);
10651 operands[1] = gen_const_mem (mode, tocref);
10652 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10655 break;
10657 case E_TImode:
10658 if (!VECTOR_MEM_VSX_P (TImode))
10659 rs6000_eliminate_indexed_memrefs (operands);
10660 break;
10662 case E_PTImode:
10663 rs6000_eliminate_indexed_memrefs (operands);
10664 break;
10666 default:
10667 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10670 /* Above, we may have called force_const_mem which may have returned
10671 an invalid address. If we can, fix this up; otherwise, reload will
10672 have to deal with it. */
10673 if (GET_CODE (operands[1]) == MEM)
10674 operands[1] = validize_mem (operands[1]);
10676 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10679 /* Nonzero if we can use a floating-point register to pass this arg. */
10680 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10681 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10682 && (CUM)->fregno <= FP_ARG_MAX_REG \
10683 && TARGET_HARD_FLOAT)
10685 /* Nonzero if we can use an AltiVec register to pass this arg. */
10686 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10687 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10688 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10689 && TARGET_ALTIVEC_ABI \
10690 && (NAMED))
10692 /* Walk down the type tree of TYPE counting consecutive base elements.
10693 If *MODEP is VOIDmode, then set it to the first valid floating point
10694 or vector type. If a non-floating point or vector type is found, or
10695 if a floating point or vector type that doesn't match a non-VOIDmode
10696 *MODEP is found, then return -1, otherwise return the count in the
10697 sub-tree. */
10699 static int
10700 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10702 machine_mode mode;
10703 HOST_WIDE_INT size;
10705 switch (TREE_CODE (type))
10707 case REAL_TYPE:
10708 mode = TYPE_MODE (type);
10709 if (!SCALAR_FLOAT_MODE_P (mode))
10710 return -1;
10712 if (*modep == VOIDmode)
10713 *modep = mode;
10715 if (*modep == mode)
10716 return 1;
10718 break;
10720 case COMPLEX_TYPE:
10721 mode = TYPE_MODE (TREE_TYPE (type));
10722 if (!SCALAR_FLOAT_MODE_P (mode))
10723 return -1;
10725 if (*modep == VOIDmode)
10726 *modep = mode;
10728 if (*modep == mode)
10729 return 2;
10731 break;
10733 case VECTOR_TYPE:
10734 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10735 return -1;
10737 /* Use V4SImode as representative of all 128-bit vector types. */
10738 size = int_size_in_bytes (type);
10739 switch (size)
10741 case 16:
10742 mode = V4SImode;
10743 break;
10744 default:
10745 return -1;
10748 if (*modep == VOIDmode)
10749 *modep = mode;
10751 /* Vector modes are considered to be opaque: two vectors are
10752 equivalent for the purposes of being homogeneous aggregates
10753 if they are the same size. */
10754 if (*modep == mode)
10755 return 1;
10757 break;
10759 case ARRAY_TYPE:
10761 int count;
10762 tree index = TYPE_DOMAIN (type);
10764 /* Can't handle incomplete types nor sizes that are not
10765 fixed. */
10766 if (!COMPLETE_TYPE_P (type)
10767 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10768 return -1;
10770 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10771 if (count == -1
10772 || !index
10773 || !TYPE_MAX_VALUE (index)
10774 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10775 || !TYPE_MIN_VALUE (index)
10776 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10777 || count < 0)
10778 return -1;
10780 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
10781 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
10783 /* There must be no padding. */
10784 if (wi::to_wide (TYPE_SIZE (type))
10785 != count * GET_MODE_BITSIZE (*modep))
10786 return -1;
10788 return count;
10791 case RECORD_TYPE:
10793 int count = 0;
10794 int sub_count;
10795 tree field;
10797 /* Can't handle incomplete types nor sizes that are not
10798 fixed. */
10799 if (!COMPLETE_TYPE_P (type)
10800 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10801 return -1;
10803 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10805 if (TREE_CODE (field) != FIELD_DECL)
10806 continue;
10808 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10809 if (sub_count < 0)
10810 return -1;
10811 count += sub_count;
10814 /* There must be no padding. */
10815 if (wi::to_wide (TYPE_SIZE (type))
10816 != count * GET_MODE_BITSIZE (*modep))
10817 return -1;
10819 return count;
10822 case UNION_TYPE:
10823 case QUAL_UNION_TYPE:
10825 /* These aren't very interesting except in a degenerate case. */
10826 int count = 0;
10827 int sub_count;
10828 tree field;
10830 /* Can't handle incomplete types nor sizes that are not
10831 fixed. */
10832 if (!COMPLETE_TYPE_P (type)
10833 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10834 return -1;
10836 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
10838 if (TREE_CODE (field) != FIELD_DECL)
10839 continue;
10841 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
10842 if (sub_count < 0)
10843 return -1;
10844 count = count > sub_count ? count : sub_count;
10847 /* There must be no padding. */
10848 if (wi::to_wide (TYPE_SIZE (type))
10849 != count * GET_MODE_BITSIZE (*modep))
10850 return -1;
10852 return count;
10855 default:
10856 break;
10859 return -1;
10862 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
10863 float or vector aggregate that shall be passed in FP/vector registers
10864 according to the ELFv2 ABI, return the homogeneous element mode in
10865 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
10867 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
10869 static bool
10870 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
10871 machine_mode *elt_mode,
10872 int *n_elts)
10874 /* Note that we do not accept complex types at the top level as
10875 homogeneous aggregates; these types are handled via the
10876 targetm.calls.split_complex_arg mechanism. Complex types
10877 can be elements of homogeneous aggregates, however. */
10878 if (TARGET_HARD_FLOAT && DEFAULT_ABI == ABI_ELFv2 && type
10879 && AGGREGATE_TYPE_P (type))
10881 machine_mode field_mode = VOIDmode;
10882 int field_count = rs6000_aggregate_candidate (type, &field_mode);
10884 if (field_count > 0)
10886 int reg_size = ALTIVEC_OR_VSX_VECTOR_MODE (field_mode) ? 16 : 8;
10887 int field_size = ROUND_UP (GET_MODE_SIZE (field_mode), reg_size);
10889 /* The ELFv2 ABI allows homogeneous aggregates to occupy
10890 up to AGGR_ARG_NUM_REG registers. */
10891 if (field_count * field_size <= AGGR_ARG_NUM_REG * reg_size)
10893 if (elt_mode)
10894 *elt_mode = field_mode;
10895 if (n_elts)
10896 *n_elts = field_count;
10897 return true;
10902 if (elt_mode)
10903 *elt_mode = mode;
10904 if (n_elts)
10905 *n_elts = 1;
10906 return false;
10909 /* Return a nonzero value to say to return the function value in
10910 memory, just as large structures are always returned. TYPE will be
10911 the data type of the value, and FNTYPE will be the type of the
10912 function doing the returning, or @code{NULL} for libcalls.
10914 The AIX ABI for the RS/6000 specifies that all structures are
10915 returned in memory. The Darwin ABI does the same.
10917 For the Darwin 64 Bit ABI, a function result can be returned in
10918 registers or in memory, depending on the size of the return data
10919 type. If it is returned in registers, the value occupies the same
10920 registers as it would if it were the first and only function
10921 argument. Otherwise, the function places its result in memory at
10922 the location pointed to by GPR3.
10924 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
10925 but a draft put them in memory, and GCC used to implement the draft
10926 instead of the final standard. Therefore, aix_struct_return
10927 controls this instead of DEFAULT_ABI; V.4 targets needing backward
10928 compatibility can change DRAFT_V4_STRUCT_RET to override the
10929 default, and -m switches get the final word. See
10930 rs6000_option_override_internal for more details.
10932 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
10933 long double support is enabled. These values are returned in memory.
10935 int_size_in_bytes returns -1 for variable size objects, which go in
10936 memory always. The cast to unsigned makes -1 > 8. */
10938 static bool
10939 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
10941 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
10942 if (TARGET_MACHO
10943 && rs6000_darwin64_abi
10944 && TREE_CODE (type) == RECORD_TYPE
10945 && int_size_in_bytes (type) > 0)
10947 CUMULATIVE_ARGS valcum;
10948 rtx valret;
10950 valcum.words = 0;
10951 valcum.fregno = FP_ARG_MIN_REG;
10952 valcum.vregno = ALTIVEC_ARG_MIN_REG;
10953 /* Do a trial code generation as if this were going to be passed
10954 as an argument; if any part goes in memory, we return NULL. */
10955 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
10956 if (valret)
10957 return false;
10958 /* Otherwise fall through to more conventional ABI rules. */
10961 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
10962 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
10963 NULL, NULL))
10964 return false;
10966 /* The ELFv2 ABI returns aggregates up to 16B in registers */
10967 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
10968 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
10969 return false;
10971 if (AGGREGATE_TYPE_P (type)
10972 && (aix_struct_return
10973 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
10974 return true;
10976 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10977 modes only exist for GCC vector types if -maltivec. */
10978 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
10979 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10980 return false;
10982 /* Return synthetic vectors in memory. */
10983 if (TREE_CODE (type) == VECTOR_TYPE
10984 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10986 static bool warned_for_return_big_vectors = false;
10987 if (!warned_for_return_big_vectors)
10989 warning (OPT_Wpsabi, "GCC vector returned by reference: "
10990 "non-standard ABI extension with no compatibility "
10991 "guarantee");
10992 warned_for_return_big_vectors = true;
10994 return true;
10997 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10998 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10999 return true;
11001 return false;
11004 /* Specify whether values returned in registers should be at the most
11005 significant end of a register. We want aggregates returned by
11006 value to match the way aggregates are passed to functions. */
11008 static bool
11009 rs6000_return_in_msb (const_tree valtype)
11011 return (DEFAULT_ABI == ABI_ELFv2
11012 && BYTES_BIG_ENDIAN
11013 && AGGREGATE_TYPE_P (valtype)
11014 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
11015 == PAD_UPWARD));
11018 #ifdef HAVE_AS_GNU_ATTRIBUTE
11019 /* Return TRUE if a call to function FNDECL may be one that
11020 potentially affects the function calling ABI of the object file. */
11022 static bool
11023 call_ABI_of_interest (tree fndecl)
11025 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11027 struct cgraph_node *c_node;
11029 /* Libcalls are always interesting. */
11030 if (fndecl == NULL_TREE)
11031 return true;
11033 /* Any call to an external function is interesting. */
11034 if (DECL_EXTERNAL (fndecl))
11035 return true;
11037 /* Interesting functions that we are emitting in this object file. */
11038 c_node = cgraph_node::get (fndecl);
11039 c_node = c_node->ultimate_alias_target ();
11040 return !c_node->only_called_directly_p ();
11042 return false;
11044 #endif
11046 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11047 for a call to a function whose data type is FNTYPE.
11048 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11050 For incoming args we set the number of arguments in the prototype large
11051 so we never return a PARALLEL. */
11053 void
11054 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11055 rtx libname ATTRIBUTE_UNUSED, int incoming,
11056 int libcall, int n_named_args,
11057 tree fndecl ATTRIBUTE_UNUSED,
11058 machine_mode return_mode ATTRIBUTE_UNUSED)
11060 static CUMULATIVE_ARGS zero_cumulative;
11062 *cum = zero_cumulative;
11063 cum->words = 0;
11064 cum->fregno = FP_ARG_MIN_REG;
11065 cum->vregno = ALTIVEC_ARG_MIN_REG;
11066 cum->prototype = (fntype && prototype_p (fntype));
11067 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11068 ? CALL_LIBCALL : CALL_NORMAL);
11069 cum->sysv_gregno = GP_ARG_MIN_REG;
11070 cum->stdarg = stdarg_p (fntype);
11071 cum->libcall = libcall;
11073 cum->nargs_prototype = 0;
11074 if (incoming || cum->prototype)
11075 cum->nargs_prototype = n_named_args;
11077 /* Check for a longcall attribute. */
11078 if ((!fntype && rs6000_default_long_calls)
11079 || (fntype
11080 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11081 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11082 cum->call_cookie |= CALL_LONG;
11084 if (TARGET_DEBUG_ARG)
11086 fprintf (stderr, "\ninit_cumulative_args:");
11087 if (fntype)
11089 tree ret_type = TREE_TYPE (fntype);
11090 fprintf (stderr, " ret code = %s,",
11091 get_tree_code_name (TREE_CODE (ret_type)));
11094 if (cum->call_cookie & CALL_LONG)
11095 fprintf (stderr, " longcall,");
11097 fprintf (stderr, " proto = %d, nargs = %d\n",
11098 cum->prototype, cum->nargs_prototype);
11101 #ifdef HAVE_AS_GNU_ATTRIBUTE
11102 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11104 cum->escapes = call_ABI_of_interest (fndecl);
11105 if (cum->escapes)
11107 tree return_type;
11109 if (fntype)
11111 return_type = TREE_TYPE (fntype);
11112 return_mode = TYPE_MODE (return_type);
11114 else
11115 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11117 if (return_type != NULL)
11119 if (TREE_CODE (return_type) == RECORD_TYPE
11120 && TYPE_TRANSPARENT_AGGR (return_type))
11122 return_type = TREE_TYPE (first_field (return_type));
11123 return_mode = TYPE_MODE (return_type);
11125 if (AGGREGATE_TYPE_P (return_type)
11126 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11127 <= 8))
11128 rs6000_returns_struct = true;
11130 if (SCALAR_FLOAT_MODE_P (return_mode))
11132 rs6000_passes_float = true;
11133 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11134 && (FLOAT128_IBM_P (return_mode)
11135 || FLOAT128_IEEE_P (return_mode)
11136 || (return_type != NULL
11137 && (TYPE_MAIN_VARIANT (return_type)
11138 == long_double_type_node))))
11139 rs6000_passes_long_double = true;
11141 /* Note if we passed or return a IEEE 128-bit type. We changed
11142 the mangling for these types, and we may need to make an alias
11143 with the old mangling. */
11144 if (FLOAT128_IEEE_P (return_mode))
11145 rs6000_passes_ieee128 = true;
11147 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode))
11148 rs6000_passes_vector = true;
11151 #endif
11153 if (fntype
11154 && !TARGET_ALTIVEC
11155 && TARGET_ALTIVEC_ABI
11156 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11158 error ("cannot return value in vector register because"
11159 " altivec instructions are disabled, use %qs"
11160 " to enable them", "-maltivec");
11164 /* The mode the ABI uses for a word. This is not the same as word_mode
11165 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11167 static scalar_int_mode
11168 rs6000_abi_word_mode (void)
11170 return TARGET_32BIT ? SImode : DImode;
11173 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11174 static char *
11175 rs6000_offload_options (void)
11177 if (TARGET_64BIT)
11178 return xstrdup ("-foffload-abi=lp64");
11179 else
11180 return xstrdup ("-foffload-abi=ilp32");
11183 /* On rs6000, function arguments are promoted, as are function return
11184 values. */
11186 static machine_mode
11187 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11188 machine_mode mode,
11189 int *punsignedp ATTRIBUTE_UNUSED,
11190 const_tree, int)
11192 PROMOTE_MODE (mode, *punsignedp, type);
11194 return mode;
11197 /* Return true if TYPE must be passed on the stack and not in registers. */
11199 static bool
11200 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11202 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11203 return must_pass_in_stack_var_size (mode, type);
11204 else
11205 return must_pass_in_stack_var_size_or_pad (mode, type);
11208 static inline bool
11209 is_complex_IBM_long_double (machine_mode mode)
11211 return mode == ICmode || (mode == TCmode && FLOAT128_IBM_P (TCmode));
11214 /* Whether ABI_V4 passes MODE args to a function in floating point
11215 registers. */
11217 static bool
11218 abi_v4_pass_in_fpr (machine_mode mode, bool named)
11220 if (!TARGET_HARD_FLOAT)
11221 return false;
11222 if (mode == DFmode)
11223 return true;
11224 if (mode == SFmode && named)
11225 return true;
11226 /* ABI_V4 passes complex IBM long double in 8 gprs.
11227 Stupid, but we can't change the ABI now. */
11228 if (is_complex_IBM_long_double (mode))
11229 return false;
11230 if (FLOAT128_2REG_P (mode))
11231 return true;
11232 if (DECIMAL_FLOAT_MODE_P (mode))
11233 return true;
11234 return false;
11237 /* Implement TARGET_FUNCTION_ARG_PADDING.
11239 For the AIX ABI structs are always stored left shifted in their
11240 argument slot. */
11242 static pad_direction
11243 rs6000_function_arg_padding (machine_mode mode, const_tree type)
11245 #ifndef AGGREGATE_PADDING_FIXED
11246 #define AGGREGATE_PADDING_FIXED 0
11247 #endif
11248 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11249 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11250 #endif
11252 if (!AGGREGATE_PADDING_FIXED)
11254 /* GCC used to pass structures of the same size as integer types as
11255 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
11256 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11257 passed padded downward, except that -mstrict-align further
11258 muddied the water in that multi-component structures of 2 and 4
11259 bytes in size were passed padded upward.
11261 The following arranges for best compatibility with previous
11262 versions of gcc, but removes the -mstrict-align dependency. */
11263 if (BYTES_BIG_ENDIAN)
11265 HOST_WIDE_INT size = 0;
11267 if (mode == BLKmode)
11269 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11270 size = int_size_in_bytes (type);
11272 else
11273 size = GET_MODE_SIZE (mode);
11275 if (size == 1 || size == 2 || size == 4)
11276 return PAD_DOWNWARD;
11278 return PAD_UPWARD;
11281 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11283 if (type != 0 && AGGREGATE_TYPE_P (type))
11284 return PAD_UPWARD;
11287 /* Fall back to the default. */
11288 return default_function_arg_padding (mode, type);
11291 /* If defined, a C expression that gives the alignment boundary, in bits,
11292 of an argument with the specified mode and type. If it is not defined,
11293 PARM_BOUNDARY is used for all arguments.
11295 V.4 wants long longs and doubles to be double word aligned. Just
11296 testing the mode size is a boneheaded way to do this as it means
11297 that other types such as complex int are also double word aligned.
11298 However, we're stuck with this because changing the ABI might break
11299 existing library interfaces.
11301 Quadword align Altivec/VSX vectors.
11302 Quadword align large synthetic vector types. */
11304 static unsigned int
11305 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11307 machine_mode elt_mode;
11308 int n_elts;
11310 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11312 if (DEFAULT_ABI == ABI_V4
11313 && (GET_MODE_SIZE (mode) == 8
11314 || (TARGET_HARD_FLOAT
11315 && !is_complex_IBM_long_double (mode)
11316 && FLOAT128_2REG_P (mode))))
11317 return 64;
11318 else if (FLOAT128_VECTOR_P (mode))
11319 return 128;
11320 else if (type && TREE_CODE (type) == VECTOR_TYPE
11321 && int_size_in_bytes (type) >= 8
11322 && int_size_in_bytes (type) < 16)
11323 return 64;
11324 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11325 || (type && TREE_CODE (type) == VECTOR_TYPE
11326 && int_size_in_bytes (type) >= 16))
11327 return 128;
11329 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11330 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11331 -mcompat-align-parm is used. */
11332 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11333 || DEFAULT_ABI == ABI_ELFv2)
11334 && type && TYPE_ALIGN (type) > 64)
11336 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11337 or homogeneous float/vector aggregates here. We already handled
11338 vector aggregates above, but still need to check for float here. */
11339 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11340 && !SCALAR_FLOAT_MODE_P (elt_mode));
11342 /* We used to check for BLKmode instead of the above aggregate type
11343 check. Warn when this results in any difference to the ABI. */
11344 if (aggregate_p != (mode == BLKmode))
11346 static bool warned;
11347 if (!warned && warn_psabi)
11349 warned = true;
11350 inform (input_location,
11351 "the ABI of passing aggregates with %d-byte alignment"
11352 " has changed in GCC 5",
11353 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11357 if (aggregate_p)
11358 return 128;
11361 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11362 implement the "aggregate type" check as a BLKmode check here; this
11363 means certain aggregate types are in fact not aligned. */
11364 if (TARGET_MACHO && rs6000_darwin64_abi
11365 && mode == BLKmode
11366 && type && TYPE_ALIGN (type) > 64)
11367 return 128;
11369 return PARM_BOUNDARY;
11372 /* The offset in words to the start of the parameter save area. */
11374 static unsigned int
11375 rs6000_parm_offset (void)
11377 return (DEFAULT_ABI == ABI_V4 ? 2
11378 : DEFAULT_ABI == ABI_ELFv2 ? 4
11379 : 6);
11382 /* For a function parm of MODE and TYPE, return the starting word in
11383 the parameter area. NWORDS of the parameter area are already used. */
11385 static unsigned int
11386 rs6000_parm_start (machine_mode mode, const_tree type,
11387 unsigned int nwords)
11389 unsigned int align;
11391 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11392 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11395 /* Compute the size (in words) of a function argument. */
11397 static unsigned long
11398 rs6000_arg_size (machine_mode mode, const_tree type)
11400 unsigned long size;
11402 if (mode != BLKmode)
11403 size = GET_MODE_SIZE (mode);
11404 else
11405 size = int_size_in_bytes (type);
11407 if (TARGET_32BIT)
11408 return (size + 3) >> 2;
11409 else
11410 return (size + 7) >> 3;
11413 /* Use this to flush pending int fields. */
11415 static void
11416 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11417 HOST_WIDE_INT bitpos, int final)
11419 unsigned int startbit, endbit;
11420 int intregs, intoffset;
11422 /* Handle the situations where a float is taking up the first half
11423 of the GPR, and the other half is empty (typically due to
11424 alignment restrictions). We can detect this by a 8-byte-aligned
11425 int field, or by seeing that this is the final flush for this
11426 argument. Count the word and continue on. */
11427 if (cum->floats_in_gpr == 1
11428 && (cum->intoffset % 64 == 0
11429 || (cum->intoffset == -1 && final)))
11431 cum->words++;
11432 cum->floats_in_gpr = 0;
11435 if (cum->intoffset == -1)
11436 return;
11438 intoffset = cum->intoffset;
11439 cum->intoffset = -1;
11440 cum->floats_in_gpr = 0;
11442 if (intoffset % BITS_PER_WORD != 0)
11444 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11445 if (!int_mode_for_size (bits, 0).exists ())
11447 /* We couldn't find an appropriate mode, which happens,
11448 e.g., in packed structs when there are 3 bytes to load.
11449 Back intoffset back to the beginning of the word in this
11450 case. */
11451 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11455 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11456 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11457 intregs = (endbit - startbit) / BITS_PER_WORD;
11458 cum->words += intregs;
11459 /* words should be unsigned. */
11460 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11462 int pad = (endbit/BITS_PER_WORD) - cum->words;
11463 cum->words += pad;
11467 /* The darwin64 ABI calls for us to recurse down through structs,
11468 looking for elements passed in registers. Unfortunately, we have
11469 to track int register count here also because of misalignments
11470 in powerpc alignment mode. */
11472 static void
11473 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11474 const_tree type,
11475 HOST_WIDE_INT startbitpos)
11477 tree f;
11479 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11480 if (TREE_CODE (f) == FIELD_DECL)
11482 HOST_WIDE_INT bitpos = startbitpos;
11483 tree ftype = TREE_TYPE (f);
11484 machine_mode mode;
11485 if (ftype == error_mark_node)
11486 continue;
11487 mode = TYPE_MODE (ftype);
11489 if (DECL_SIZE (f) != 0
11490 && tree_fits_uhwi_p (bit_position (f)))
11491 bitpos += int_bit_position (f);
11493 /* ??? FIXME: else assume zero offset. */
11495 if (TREE_CODE (ftype) == RECORD_TYPE)
11496 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11497 else if (USE_FP_FOR_ARG_P (cum, mode))
11499 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11500 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11501 cum->fregno += n_fpregs;
11502 /* Single-precision floats present a special problem for
11503 us, because they are smaller than an 8-byte GPR, and so
11504 the structure-packing rules combined with the standard
11505 varargs behavior mean that we want to pack float/float
11506 and float/int combinations into a single register's
11507 space. This is complicated by the arg advance flushing,
11508 which works on arbitrarily large groups of int-type
11509 fields. */
11510 if (mode == SFmode)
11512 if (cum->floats_in_gpr == 1)
11514 /* Two floats in a word; count the word and reset
11515 the float count. */
11516 cum->words++;
11517 cum->floats_in_gpr = 0;
11519 else if (bitpos % 64 == 0)
11521 /* A float at the beginning of an 8-byte word;
11522 count it and put off adjusting cum->words until
11523 we see if a arg advance flush is going to do it
11524 for us. */
11525 cum->floats_in_gpr++;
11527 else
11529 /* The float is at the end of a word, preceded
11530 by integer fields, so the arg advance flush
11531 just above has already set cum->words and
11532 everything is taken care of. */
11535 else
11536 cum->words += n_fpregs;
11538 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11540 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11541 cum->vregno++;
11542 cum->words += 2;
11544 else if (cum->intoffset == -1)
11545 cum->intoffset = bitpos;
11549 /* Check for an item that needs to be considered specially under the darwin 64
11550 bit ABI. These are record types where the mode is BLK or the structure is
11551 8 bytes in size. */
11552 static int
11553 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11555 return rs6000_darwin64_abi
11556 && ((mode == BLKmode
11557 && TREE_CODE (type) == RECORD_TYPE
11558 && int_size_in_bytes (type) > 0)
11559 || (type && TREE_CODE (type) == RECORD_TYPE
11560 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11563 /* Update the data in CUM to advance over an argument
11564 of mode MODE and data type TYPE.
11565 (TYPE is null for libcalls where that information may not be available.)
11567 Note that for args passed by reference, function_arg will be called
11568 with MODE and TYPE set to that of the pointer to the arg, not the arg
11569 itself. */
11571 static void
11572 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11573 const_tree type, bool named, int depth)
11575 machine_mode elt_mode;
11576 int n_elts;
11578 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11580 /* Only tick off an argument if we're not recursing. */
11581 if (depth == 0)
11582 cum->nargs_prototype--;
11584 #ifdef HAVE_AS_GNU_ATTRIBUTE
11585 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11586 && cum->escapes)
11588 if (SCALAR_FLOAT_MODE_P (mode))
11590 rs6000_passes_float = true;
11591 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11592 && (FLOAT128_IBM_P (mode)
11593 || FLOAT128_IEEE_P (mode)
11594 || (type != NULL
11595 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11596 rs6000_passes_long_double = true;
11598 /* Note if we passed or return a IEEE 128-bit type. We changed the
11599 mangling for these types, and we may need to make an alias with
11600 the old mangling. */
11601 if (FLOAT128_IEEE_P (mode))
11602 rs6000_passes_ieee128 = true;
11604 if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11605 rs6000_passes_vector = true;
11607 #endif
11609 if (TARGET_ALTIVEC_ABI
11610 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11611 || (type && TREE_CODE (type) == VECTOR_TYPE
11612 && int_size_in_bytes (type) == 16)))
11614 bool stack = false;
11616 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11618 cum->vregno += n_elts;
11620 if (!TARGET_ALTIVEC)
11621 error ("cannot pass argument in vector register because"
11622 " altivec instructions are disabled, use %qs"
11623 " to enable them", "-maltivec");
11625 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11626 even if it is going to be passed in a vector register.
11627 Darwin does the same for variable-argument functions. */
11628 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11629 && TARGET_64BIT)
11630 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11631 stack = true;
11633 else
11634 stack = true;
11636 if (stack)
11638 int align;
11640 /* Vector parameters must be 16-byte aligned. In 32-bit
11641 mode this means we need to take into account the offset
11642 to the parameter save area. In 64-bit mode, they just
11643 have to start on an even word, since the parameter save
11644 area is 16-byte aligned. */
11645 if (TARGET_32BIT)
11646 align = -(rs6000_parm_offset () + cum->words) & 3;
11647 else
11648 align = cum->words & 1;
11649 cum->words += align + rs6000_arg_size (mode, type);
11651 if (TARGET_DEBUG_ARG)
11653 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11654 cum->words, align);
11655 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11656 cum->nargs_prototype, cum->prototype,
11657 GET_MODE_NAME (mode));
11661 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11663 int size = int_size_in_bytes (type);
11664 /* Variable sized types have size == -1 and are
11665 treated as if consisting entirely of ints.
11666 Pad to 16 byte boundary if needed. */
11667 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11668 && (cum->words % 2) != 0)
11669 cum->words++;
11670 /* For varargs, we can just go up by the size of the struct. */
11671 if (!named)
11672 cum->words += (size + 7) / 8;
11673 else
11675 /* It is tempting to say int register count just goes up by
11676 sizeof(type)/8, but this is wrong in a case such as
11677 { int; double; int; } [powerpc alignment]. We have to
11678 grovel through the fields for these too. */
11679 cum->intoffset = 0;
11680 cum->floats_in_gpr = 0;
11681 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11682 rs6000_darwin64_record_arg_advance_flush (cum,
11683 size * BITS_PER_UNIT, 1);
11685 if (TARGET_DEBUG_ARG)
11687 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11688 cum->words, TYPE_ALIGN (type), size);
11689 fprintf (stderr,
11690 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11691 cum->nargs_prototype, cum->prototype,
11692 GET_MODE_NAME (mode));
11695 else if (DEFAULT_ABI == ABI_V4)
11697 if (abi_v4_pass_in_fpr (mode, named))
11699 /* _Decimal128 must use an even/odd register pair. This assumes
11700 that the register number is odd when fregno is odd. */
11701 if (mode == TDmode && (cum->fregno % 2) == 1)
11702 cum->fregno++;
11704 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11705 <= FP_ARG_V4_MAX_REG)
11706 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11707 else
11709 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11710 if (mode == DFmode || FLOAT128_IBM_P (mode)
11711 || mode == DDmode || mode == TDmode)
11712 cum->words += cum->words & 1;
11713 cum->words += rs6000_arg_size (mode, type);
11716 else
11718 int n_words = rs6000_arg_size (mode, type);
11719 int gregno = cum->sysv_gregno;
11721 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11722 As does any other 2 word item such as complex int due to a
11723 historical mistake. */
11724 if (n_words == 2)
11725 gregno += (1 - gregno) & 1;
11727 /* Multi-reg args are not split between registers and stack. */
11728 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11730 /* Long long is aligned on the stack. So are other 2 word
11731 items such as complex int due to a historical mistake. */
11732 if (n_words == 2)
11733 cum->words += cum->words & 1;
11734 cum->words += n_words;
11737 /* Note: continuing to accumulate gregno past when we've started
11738 spilling to the stack indicates the fact that we've started
11739 spilling to the stack to expand_builtin_saveregs. */
11740 cum->sysv_gregno = gregno + n_words;
11743 if (TARGET_DEBUG_ARG)
11745 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11746 cum->words, cum->fregno);
11747 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11748 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11749 fprintf (stderr, "mode = %4s, named = %d\n",
11750 GET_MODE_NAME (mode), named);
11753 else
11755 int n_words = rs6000_arg_size (mode, type);
11756 int start_words = cum->words;
11757 int align_words = rs6000_parm_start (mode, type, start_words);
11759 cum->words = align_words + n_words;
11761 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11763 /* _Decimal128 must be passed in an even/odd float register pair.
11764 This assumes that the register number is odd when fregno is
11765 odd. */
11766 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11767 cum->fregno++;
11768 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11771 if (TARGET_DEBUG_ARG)
11773 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11774 cum->words, cum->fregno);
11775 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11776 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11777 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11778 named, align_words - start_words, depth);
11783 static void
11784 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11785 const_tree type, bool named)
11787 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
11791 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
11792 structure between cum->intoffset and bitpos to integer registers. */
11794 static void
11795 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
11796 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
11798 machine_mode mode;
11799 unsigned int regno;
11800 unsigned int startbit, endbit;
11801 int this_regno, intregs, intoffset;
11802 rtx reg;
11804 if (cum->intoffset == -1)
11805 return;
11807 intoffset = cum->intoffset;
11808 cum->intoffset = -1;
11810 /* If this is the trailing part of a word, try to only load that
11811 much into the register. Otherwise load the whole register. Note
11812 that in the latter case we may pick up unwanted bits. It's not a
11813 problem at the moment but may wish to revisit. */
11815 if (intoffset % BITS_PER_WORD != 0)
11817 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11818 if (!int_mode_for_size (bits, 0).exists (&mode))
11820 /* We couldn't find an appropriate mode, which happens,
11821 e.g., in packed structs when there are 3 bytes to load.
11822 Back intoffset back to the beginning of the word in this
11823 case. */
11824 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11825 mode = word_mode;
11828 else
11829 mode = word_mode;
11831 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11832 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11833 intregs = (endbit - startbit) / BITS_PER_WORD;
11834 this_regno = cum->words + intoffset / BITS_PER_WORD;
11836 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
11837 cum->use_stack = 1;
11839 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
11840 if (intregs <= 0)
11841 return;
11843 intoffset /= BITS_PER_UNIT;
11846 regno = GP_ARG_MIN_REG + this_regno;
11847 reg = gen_rtx_REG (mode, regno);
11848 rvec[(*k)++] =
11849 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
11851 this_regno += 1;
11852 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
11853 mode = word_mode;
11854 intregs -= 1;
11856 while (intregs > 0);
11859 /* Recursive workhorse for the following. */
11861 static void
11862 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
11863 HOST_WIDE_INT startbitpos, rtx rvec[],
11864 int *k)
11866 tree f;
11868 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11869 if (TREE_CODE (f) == FIELD_DECL)
11871 HOST_WIDE_INT bitpos = startbitpos;
11872 tree ftype = TREE_TYPE (f);
11873 machine_mode mode;
11874 if (ftype == error_mark_node)
11875 continue;
11876 mode = TYPE_MODE (ftype);
11878 if (DECL_SIZE (f) != 0
11879 && tree_fits_uhwi_p (bit_position (f)))
11880 bitpos += int_bit_position (f);
11882 /* ??? FIXME: else assume zero offset. */
11884 if (TREE_CODE (ftype) == RECORD_TYPE)
11885 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
11886 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
11888 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
11889 #if 0
11890 switch (mode)
11892 case E_SCmode: mode = SFmode; break;
11893 case E_DCmode: mode = DFmode; break;
11894 case E_TCmode: mode = TFmode; break;
11895 default: break;
11897 #endif
11898 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11899 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
11901 gcc_assert (cum->fregno == FP_ARG_MAX_REG
11902 && (mode == TFmode || mode == TDmode));
11903 /* Long double or _Decimal128 split over regs and memory. */
11904 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
11905 cum->use_stack=1;
11907 rvec[(*k)++]
11908 = gen_rtx_EXPR_LIST (VOIDmode,
11909 gen_rtx_REG (mode, cum->fregno++),
11910 GEN_INT (bitpos / BITS_PER_UNIT));
11911 if (FLOAT128_2REG_P (mode))
11912 cum->fregno++;
11914 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11916 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
11917 rvec[(*k)++]
11918 = gen_rtx_EXPR_LIST (VOIDmode,
11919 gen_rtx_REG (mode, cum->vregno++),
11920 GEN_INT (bitpos / BITS_PER_UNIT));
11922 else if (cum->intoffset == -1)
11923 cum->intoffset = bitpos;
11927 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
11928 the register(s) to be used for each field and subfield of a struct
11929 being passed by value, along with the offset of where the
11930 register's value may be found in the block. FP fields go in FP
11931 register, vector fields go in vector registers, and everything
11932 else goes in int registers, packed as in memory.
11934 This code is also used for function return values. RETVAL indicates
11935 whether this is the case.
11937 Much of this is taken from the SPARC V9 port, which has a similar
11938 calling convention. */
11940 static rtx
11941 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
11942 bool named, bool retval)
11944 rtx rvec[FIRST_PSEUDO_REGISTER];
11945 int k = 1, kbase = 1;
11946 HOST_WIDE_INT typesize = int_size_in_bytes (type);
11947 /* This is a copy; modifications are not visible to our caller. */
11948 CUMULATIVE_ARGS copy_cum = *orig_cum;
11949 CUMULATIVE_ARGS *cum = &copy_cum;
11951 /* Pad to 16 byte boundary if needed. */
11952 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11953 && (cum->words % 2) != 0)
11954 cum->words++;
11956 cum->intoffset = 0;
11957 cum->use_stack = 0;
11958 cum->named = named;
11960 /* Put entries into rvec[] for individual FP and vector fields, and
11961 for the chunks of memory that go in int regs. Note we start at
11962 element 1; 0 is reserved for an indication of using memory, and
11963 may or may not be filled in below. */
11964 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
11965 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
11967 /* If any part of the struct went on the stack put all of it there.
11968 This hack is because the generic code for
11969 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
11970 parts of the struct are not at the beginning. */
11971 if (cum->use_stack)
11973 if (retval)
11974 return NULL_RTX; /* doesn't go in registers at all */
11975 kbase = 0;
11976 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
11978 if (k > 1 || cum->use_stack)
11979 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
11980 else
11981 return NULL_RTX;
11984 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
11986 static rtx
11987 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
11988 int align_words)
11990 int n_units;
11991 int i, k;
11992 rtx rvec[GP_ARG_NUM_REG + 1];
11994 if (align_words >= GP_ARG_NUM_REG)
11995 return NULL_RTX;
11997 n_units = rs6000_arg_size (mode, type);
11999 /* Optimize the simple case where the arg fits in one gpr, except in
12000 the case of BLKmode due to assign_parms assuming that registers are
12001 BITS_PER_WORD wide. */
12002 if (n_units == 0
12003 || (n_units == 1 && mode != BLKmode))
12004 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12006 k = 0;
12007 if (align_words + n_units > GP_ARG_NUM_REG)
12008 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12009 using a magic NULL_RTX component.
12010 This is not strictly correct. Only some of the arg belongs in
12011 memory, not all of it. However, the normal scheme using
12012 function_arg_partial_nregs can result in unusual subregs, eg.
12013 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12014 store the whole arg to memory is often more efficient than code
12015 to store pieces, and we know that space is available in the right
12016 place for the whole arg. */
12017 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12019 i = 0;
12022 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12023 rtx off = GEN_INT (i++ * 4);
12024 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12026 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12028 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12031 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12032 but must also be copied into the parameter save area starting at
12033 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12034 to the GPRs and/or memory. Return the number of elements used. */
12036 static int
12037 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12038 int align_words, rtx *rvec)
12040 int k = 0;
12042 if (align_words < GP_ARG_NUM_REG)
12044 int n_words = rs6000_arg_size (mode, type);
12046 if (align_words + n_words > GP_ARG_NUM_REG
12047 || mode == BLKmode
12048 || (TARGET_32BIT && TARGET_POWERPC64))
12050 /* If this is partially on the stack, then we only
12051 include the portion actually in registers here. */
12052 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12053 int i = 0;
12055 if (align_words + n_words > GP_ARG_NUM_REG)
12057 /* Not all of the arg fits in gprs. Say that it goes in memory
12058 too, using a magic NULL_RTX component. Also see comment in
12059 rs6000_mixed_function_arg for why the normal
12060 function_arg_partial_nregs scheme doesn't work in this case. */
12061 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12066 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12067 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12068 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12070 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12072 else
12074 /* The whole arg fits in gprs. */
12075 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12076 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12079 else
12081 /* It's entirely in memory. */
12082 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12085 return k;
12088 /* RVEC is a vector of K components of an argument of mode MODE.
12089 Construct the final function_arg return value from it. */
12091 static rtx
12092 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12094 gcc_assert (k >= 1);
12096 /* Avoid returning a PARALLEL in the trivial cases. */
12097 if (k == 1)
12099 if (XEXP (rvec[0], 0) == NULL_RTX)
12100 return NULL_RTX;
12102 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12103 return XEXP (rvec[0], 0);
12106 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12109 /* Determine where to put an argument to a function.
12110 Value is zero to push the argument on the stack,
12111 or a hard register in which to store the argument.
12113 MODE is the argument's machine mode.
12114 TYPE is the data type of the argument (as a tree).
12115 This is null for libcalls where that information may
12116 not be available.
12117 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12118 the preceding args and about the function being called. It is
12119 not modified in this routine.
12120 NAMED is nonzero if this argument is a named parameter
12121 (otherwise it is an extra parameter matching an ellipsis).
12123 On RS/6000 the first eight words of non-FP are normally in registers
12124 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12125 Under V.4, the first 8 FP args are in registers.
12127 If this is floating-point and no prototype is specified, we use
12128 both an FP and integer register (or possibly FP reg and stack). Library
12129 functions (when CALL_LIBCALL is set) always have the proper types for args,
12130 so we can pass the FP value just in one register. emit_library_function
12131 doesn't support PARALLEL anyway.
12133 Note that for args passed by reference, function_arg will be called
12134 with MODE and TYPE set to that of the pointer to the arg, not the arg
12135 itself. */
12137 static rtx
12138 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12139 const_tree type, bool named)
12141 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12142 enum rs6000_abi abi = DEFAULT_ABI;
12143 machine_mode elt_mode;
12144 int n_elts;
12146 /* Return a marker to indicate whether CR1 needs to set or clear the
12147 bit that V.4 uses to say fp args were passed in registers.
12148 Assume that we don't need the marker for software floating point,
12149 or compiler generated library calls. */
12150 if (mode == VOIDmode)
12152 if (abi == ABI_V4
12153 && (cum->call_cookie & CALL_LIBCALL) == 0
12154 && (cum->stdarg
12155 || (cum->nargs_prototype < 0
12156 && (cum->prototype || TARGET_NO_PROTOTYPE)))
12157 && TARGET_HARD_FLOAT)
12158 return GEN_INT (cum->call_cookie
12159 | ((cum->fregno == FP_ARG_MIN_REG)
12160 ? CALL_V4_SET_FP_ARGS
12161 : CALL_V4_CLEAR_FP_ARGS));
12163 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12166 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12168 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12170 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12171 if (rslt != NULL_RTX)
12172 return rslt;
12173 /* Else fall through to usual handling. */
12176 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12178 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12179 rtx r, off;
12180 int i, k = 0;
12182 /* Do we also need to pass this argument in the parameter save area?
12183 Library support functions for IEEE 128-bit are assumed to not need the
12184 value passed both in GPRs and in vector registers. */
12185 if (TARGET_64BIT && !cum->prototype
12186 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12188 int align_words = ROUND_UP (cum->words, 2);
12189 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12192 /* Describe where this argument goes in the vector registers. */
12193 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12195 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12196 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12197 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12200 return rs6000_finish_function_arg (mode, rvec, k);
12202 else if (TARGET_ALTIVEC_ABI
12203 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12204 || (type && TREE_CODE (type) == VECTOR_TYPE
12205 && int_size_in_bytes (type) == 16)))
12207 if (named || abi == ABI_V4)
12208 return NULL_RTX;
12209 else
12211 /* Vector parameters to varargs functions under AIX or Darwin
12212 get passed in memory and possibly also in GPRs. */
12213 int align, align_words, n_words;
12214 machine_mode part_mode;
12216 /* Vector parameters must be 16-byte aligned. In 32-bit
12217 mode this means we need to take into account the offset
12218 to the parameter save area. In 64-bit mode, they just
12219 have to start on an even word, since the parameter save
12220 area is 16-byte aligned. */
12221 if (TARGET_32BIT)
12222 align = -(rs6000_parm_offset () + cum->words) & 3;
12223 else
12224 align = cum->words & 1;
12225 align_words = cum->words + align;
12227 /* Out of registers? Memory, then. */
12228 if (align_words >= GP_ARG_NUM_REG)
12229 return NULL_RTX;
12231 if (TARGET_32BIT && TARGET_POWERPC64)
12232 return rs6000_mixed_function_arg (mode, type, align_words);
12234 /* The vector value goes in GPRs. Only the part of the
12235 value in GPRs is reported here. */
12236 part_mode = mode;
12237 n_words = rs6000_arg_size (mode, type);
12238 if (align_words + n_words > GP_ARG_NUM_REG)
12239 /* Fortunately, there are only two possibilities, the value
12240 is either wholly in GPRs or half in GPRs and half not. */
12241 part_mode = DImode;
12243 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12247 else if (abi == ABI_V4)
12249 if (abi_v4_pass_in_fpr (mode, named))
12251 /* _Decimal128 must use an even/odd register pair. This assumes
12252 that the register number is odd when fregno is odd. */
12253 if (mode == TDmode && (cum->fregno % 2) == 1)
12254 cum->fregno++;
12256 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12257 <= FP_ARG_V4_MAX_REG)
12258 return gen_rtx_REG (mode, cum->fregno);
12259 else
12260 return NULL_RTX;
12262 else
12264 int n_words = rs6000_arg_size (mode, type);
12265 int gregno = cum->sysv_gregno;
12267 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
12268 As does any other 2 word item such as complex int due to a
12269 historical mistake. */
12270 if (n_words == 2)
12271 gregno += (1 - gregno) & 1;
12273 /* Multi-reg args are not split between registers and stack. */
12274 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12275 return NULL_RTX;
12277 if (TARGET_32BIT && TARGET_POWERPC64)
12278 return rs6000_mixed_function_arg (mode, type,
12279 gregno - GP_ARG_MIN_REG);
12280 return gen_rtx_REG (mode, gregno);
12283 else
12285 int align_words = rs6000_parm_start (mode, type, cum->words);
12287 /* _Decimal128 must be passed in an even/odd float register pair.
12288 This assumes that the register number is odd when fregno is odd. */
12289 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12290 cum->fregno++;
12292 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12294 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12295 rtx r, off;
12296 int i, k = 0;
12297 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12298 int fpr_words;
12300 /* Do we also need to pass this argument in the parameter
12301 save area? */
12302 if (type && (cum->nargs_prototype <= 0
12303 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12304 && TARGET_XL_COMPAT
12305 && align_words >= GP_ARG_NUM_REG)))
12306 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12308 /* Describe where this argument goes in the fprs. */
12309 for (i = 0; i < n_elts
12310 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12312 /* Check if the argument is split over registers and memory.
12313 This can only ever happen for long double or _Decimal128;
12314 complex types are handled via split_complex_arg. */
12315 machine_mode fmode = elt_mode;
12316 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12318 gcc_assert (FLOAT128_2REG_P (fmode));
12319 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12322 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12323 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12324 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12327 /* If there were not enough FPRs to hold the argument, the rest
12328 usually goes into memory. However, if the current position
12329 is still within the register parameter area, a portion may
12330 actually have to go into GPRs.
12332 Note that it may happen that the portion of the argument
12333 passed in the first "half" of the first GPR was already
12334 passed in the last FPR as well.
12336 For unnamed arguments, we already set up GPRs to cover the
12337 whole argument in rs6000_psave_function_arg, so there is
12338 nothing further to do at this point. */
12339 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12340 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12341 && cum->nargs_prototype > 0)
12343 static bool warned;
12345 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12346 int n_words = rs6000_arg_size (mode, type);
12348 align_words += fpr_words;
12349 n_words -= fpr_words;
12353 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12354 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12355 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12357 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12359 if (!warned && warn_psabi)
12361 warned = true;
12362 inform (input_location,
12363 "the ABI of passing homogeneous float aggregates"
12364 " has changed in GCC 5");
12368 return rs6000_finish_function_arg (mode, rvec, k);
12370 else if (align_words < GP_ARG_NUM_REG)
12372 if (TARGET_32BIT && TARGET_POWERPC64)
12373 return rs6000_mixed_function_arg (mode, type, align_words);
12375 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12377 else
12378 return NULL_RTX;
12382 /* For an arg passed partly in registers and partly in memory, this is
12383 the number of bytes passed in registers. For args passed entirely in
12384 registers or entirely in memory, zero. When an arg is described by a
12385 PARALLEL, perhaps using more than one register type, this function
12386 returns the number of bytes used by the first element of the PARALLEL. */
12388 static int
12389 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12390 tree type, bool named)
12392 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12393 bool passed_in_gprs = true;
12394 int ret = 0;
12395 int align_words;
12396 machine_mode elt_mode;
12397 int n_elts;
12399 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12401 if (DEFAULT_ABI == ABI_V4)
12402 return 0;
12404 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12406 /* If we are passing this arg in the fixed parameter save area (gprs or
12407 memory) as well as VRs, we do not use the partial bytes mechanism;
12408 instead, rs6000_function_arg will return a PARALLEL including a memory
12409 element as necessary. Library support functions for IEEE 128-bit are
12410 assumed to not need the value passed both in GPRs and in vector
12411 registers. */
12412 if (TARGET_64BIT && !cum->prototype
12413 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12414 return 0;
12416 /* Otherwise, we pass in VRs only. Check for partial copies. */
12417 passed_in_gprs = false;
12418 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12419 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12422 /* In this complicated case we just disable the partial_nregs code. */
12423 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12424 return 0;
12426 align_words = rs6000_parm_start (mode, type, cum->words);
12428 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12430 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12432 /* If we are passing this arg in the fixed parameter save area
12433 (gprs or memory) as well as FPRs, we do not use the partial
12434 bytes mechanism; instead, rs6000_function_arg will return a
12435 PARALLEL including a memory element as necessary. */
12436 if (type
12437 && (cum->nargs_prototype <= 0
12438 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12439 && TARGET_XL_COMPAT
12440 && align_words >= GP_ARG_NUM_REG)))
12441 return 0;
12443 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12444 passed_in_gprs = false;
12445 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12447 /* Compute number of bytes / words passed in FPRs. If there
12448 is still space available in the register parameter area
12449 *after* that amount, a part of the argument will be passed
12450 in GPRs. In that case, the total amount passed in any
12451 registers is equal to the amount that would have been passed
12452 in GPRs if everything were passed there, so we fall back to
12453 the GPR code below to compute the appropriate value. */
12454 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12455 * MIN (8, GET_MODE_SIZE (elt_mode)));
12456 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12458 if (align_words + fpr_words < GP_ARG_NUM_REG)
12459 passed_in_gprs = true;
12460 else
12461 ret = fpr;
12465 if (passed_in_gprs
12466 && align_words < GP_ARG_NUM_REG
12467 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12468 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12470 if (ret != 0 && TARGET_DEBUG_ARG)
12471 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12473 return ret;
12476 /* A C expression that indicates when an argument must be passed by
12477 reference. If nonzero for an argument, a copy of that argument is
12478 made in memory and a pointer to the argument is passed instead of
12479 the argument itself. The pointer is passed in whatever way is
12480 appropriate for passing a pointer to that type.
12482 Under V.4, aggregates and long double are passed by reference.
12484 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12485 reference unless the AltiVec vector extension ABI is in force.
12487 As an extension to all ABIs, variable sized types are passed by
12488 reference. */
12490 static bool
12491 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12492 machine_mode mode, const_tree type,
12493 bool named ATTRIBUTE_UNUSED)
12495 if (!type)
12496 return 0;
12498 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12499 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12501 if (TARGET_DEBUG_ARG)
12502 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12503 return 1;
12506 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12508 if (TARGET_DEBUG_ARG)
12509 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12510 return 1;
12513 if (int_size_in_bytes (type) < 0)
12515 if (TARGET_DEBUG_ARG)
12516 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12517 return 1;
12520 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12521 modes only exist for GCC vector types if -maltivec. */
12522 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12524 if (TARGET_DEBUG_ARG)
12525 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12526 return 1;
12529 /* Pass synthetic vectors in memory. */
12530 if (TREE_CODE (type) == VECTOR_TYPE
12531 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12533 static bool warned_for_pass_big_vectors = false;
12534 if (TARGET_DEBUG_ARG)
12535 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12536 if (!warned_for_pass_big_vectors)
12538 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12539 "non-standard ABI extension with no compatibility "
12540 "guarantee");
12541 warned_for_pass_big_vectors = true;
12543 return 1;
12546 return 0;
12549 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12550 already processes. Return true if the parameter must be passed
12551 (fully or partially) on the stack. */
12553 static bool
12554 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12556 machine_mode mode;
12557 int unsignedp;
12558 rtx entry_parm;
12560 /* Catch errors. */
12561 if (type == NULL || type == error_mark_node)
12562 return true;
12564 /* Handle types with no storage requirement. */
12565 if (TYPE_MODE (type) == VOIDmode)
12566 return false;
12568 /* Handle complex types. */
12569 if (TREE_CODE (type) == COMPLEX_TYPE)
12570 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12571 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12573 /* Handle transparent aggregates. */
12574 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12575 && TYPE_TRANSPARENT_AGGR (type))
12576 type = TREE_TYPE (first_field (type));
12578 /* See if this arg was passed by invisible reference. */
12579 if (pass_by_reference (get_cumulative_args (args_so_far),
12580 TYPE_MODE (type), type, true))
12581 type = build_pointer_type (type);
12583 /* Find mode as it is passed by the ABI. */
12584 unsignedp = TYPE_UNSIGNED (type);
12585 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12587 /* If we must pass in stack, we need a stack. */
12588 if (rs6000_must_pass_in_stack (mode, type))
12589 return true;
12591 /* If there is no incoming register, we need a stack. */
12592 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12593 if (entry_parm == NULL)
12594 return true;
12596 /* Likewise if we need to pass both in registers and on the stack. */
12597 if (GET_CODE (entry_parm) == PARALLEL
12598 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12599 return true;
12601 /* Also true if we're partially in registers and partially not. */
12602 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12603 return true;
12605 /* Update info on where next arg arrives in registers. */
12606 rs6000_function_arg_advance (args_so_far, mode, type, true);
12607 return false;
12610 /* Return true if FUN has no prototype, has a variable argument
12611 list, or passes any parameter in memory. */
12613 static bool
12614 rs6000_function_parms_need_stack (tree fun, bool incoming)
12616 tree fntype, result;
12617 CUMULATIVE_ARGS args_so_far_v;
12618 cumulative_args_t args_so_far;
12620 if (!fun)
12621 /* Must be a libcall, all of which only use reg parms. */
12622 return false;
12624 fntype = fun;
12625 if (!TYPE_P (fun))
12626 fntype = TREE_TYPE (fun);
12628 /* Varargs functions need the parameter save area. */
12629 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12630 return true;
12632 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12633 args_so_far = pack_cumulative_args (&args_so_far_v);
12635 /* When incoming, we will have been passed the function decl.
12636 It is necessary to use the decl to handle K&R style functions,
12637 where TYPE_ARG_TYPES may not be available. */
12638 if (incoming)
12640 gcc_assert (DECL_P (fun));
12641 result = DECL_RESULT (fun);
12643 else
12644 result = TREE_TYPE (fntype);
12646 if (result && aggregate_value_p (result, fntype))
12648 if (!TYPE_P (result))
12649 result = TREE_TYPE (result);
12650 result = build_pointer_type (result);
12651 rs6000_parm_needs_stack (args_so_far, result);
12654 if (incoming)
12656 tree parm;
12658 for (parm = DECL_ARGUMENTS (fun);
12659 parm && parm != void_list_node;
12660 parm = TREE_CHAIN (parm))
12661 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12662 return true;
12664 else
12666 function_args_iterator args_iter;
12667 tree arg_type;
12669 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12670 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12671 return true;
12674 return false;
12677 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12678 usually a constant depending on the ABI. However, in the ELFv2 ABI
12679 the register parameter area is optional when calling a function that
12680 has a prototype is scope, has no variable argument list, and passes
12681 all parameters in registers. */
12684 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12686 int reg_parm_stack_space;
12688 switch (DEFAULT_ABI)
12690 default:
12691 reg_parm_stack_space = 0;
12692 break;
12694 case ABI_AIX:
12695 case ABI_DARWIN:
12696 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12697 break;
12699 case ABI_ELFv2:
12700 /* ??? Recomputing this every time is a bit expensive. Is there
12701 a place to cache this information? */
12702 if (rs6000_function_parms_need_stack (fun, incoming))
12703 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12704 else
12705 reg_parm_stack_space = 0;
12706 break;
12709 return reg_parm_stack_space;
12712 static void
12713 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12715 int i;
12716 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12718 if (nregs == 0)
12719 return;
12721 for (i = 0; i < nregs; i++)
12723 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12724 if (reload_completed)
12726 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12727 tem = NULL_RTX;
12728 else
12729 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12730 i * GET_MODE_SIZE (reg_mode));
12732 else
12733 tem = replace_equiv_address (tem, XEXP (tem, 0));
12735 gcc_assert (tem);
12737 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12741 /* Perform any needed actions needed for a function that is receiving a
12742 variable number of arguments.
12744 CUM is as above.
12746 MODE and TYPE are the mode and type of the current parameter.
12748 PRETEND_SIZE is a variable that should be set to the amount of stack
12749 that must be pushed by the prolog to pretend that our caller pushed
12752 Normally, this macro will push all remaining incoming registers on the
12753 stack and set PRETEND_SIZE to the length of the registers pushed. */
12755 static void
12756 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12757 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12758 int no_rtl)
12760 CUMULATIVE_ARGS next_cum;
12761 int reg_size = TARGET_32BIT ? 4 : 8;
12762 rtx save_area = NULL_RTX, mem;
12763 int first_reg_offset;
12764 alias_set_type set;
12766 /* Skip the last named argument. */
12767 next_cum = *get_cumulative_args (cum);
12768 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12770 if (DEFAULT_ABI == ABI_V4)
12772 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12774 if (! no_rtl)
12776 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12777 HOST_WIDE_INT offset = 0;
12779 /* Try to optimize the size of the varargs save area.
12780 The ABI requires that ap.reg_save_area is doubleword
12781 aligned, but we don't need to allocate space for all
12782 the bytes, only those to which we actually will save
12783 anything. */
12784 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12785 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12786 if (TARGET_HARD_FLOAT
12787 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12788 && cfun->va_list_fpr_size)
12790 if (gpr_reg_num)
12791 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
12792 * UNITS_PER_FP_WORD;
12793 if (cfun->va_list_fpr_size
12794 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12795 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
12796 else
12797 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
12798 * UNITS_PER_FP_WORD;
12800 if (gpr_reg_num)
12802 offset = -((first_reg_offset * reg_size) & ~7);
12803 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
12805 gpr_reg_num = cfun->va_list_gpr_size;
12806 if (reg_size == 4 && (first_reg_offset & 1))
12807 gpr_reg_num++;
12809 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
12811 else if (fpr_size)
12812 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
12813 * UNITS_PER_FP_WORD
12814 - (int) (GP_ARG_NUM_REG * reg_size);
12816 if (gpr_size + fpr_size)
12818 rtx reg_save_area
12819 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
12820 gcc_assert (GET_CODE (reg_save_area) == MEM);
12821 reg_save_area = XEXP (reg_save_area, 0);
12822 if (GET_CODE (reg_save_area) == PLUS)
12824 gcc_assert (XEXP (reg_save_area, 0)
12825 == virtual_stack_vars_rtx);
12826 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
12827 offset += INTVAL (XEXP (reg_save_area, 1));
12829 else
12830 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
12833 cfun->machine->varargs_save_offset = offset;
12834 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
12837 else
12839 first_reg_offset = next_cum.words;
12840 save_area = crtl->args.internal_arg_pointer;
12842 if (targetm.calls.must_pass_in_stack (mode, type))
12843 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
12846 set = get_varargs_alias_set ();
12847 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
12848 && cfun->va_list_gpr_size)
12850 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
12852 if (va_list_gpr_counter_field)
12853 /* V4 va_list_gpr_size counts number of registers needed. */
12854 n_gpr = cfun->va_list_gpr_size;
12855 else
12856 /* char * va_list instead counts number of bytes needed. */
12857 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
12859 if (nregs > n_gpr)
12860 nregs = n_gpr;
12862 mem = gen_rtx_MEM (BLKmode,
12863 plus_constant (Pmode, save_area,
12864 first_reg_offset * reg_size));
12865 MEM_NOTRAP_P (mem) = 1;
12866 set_mem_alias_set (mem, set);
12867 set_mem_align (mem, BITS_PER_WORD);
12869 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
12870 nregs);
12873 /* Save FP registers if needed. */
12874 if (DEFAULT_ABI == ABI_V4
12875 && TARGET_HARD_FLOAT
12876 && ! no_rtl
12877 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12878 && cfun->va_list_fpr_size)
12880 int fregno = next_cum.fregno, nregs;
12881 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
12882 rtx lab = gen_label_rtx ();
12883 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
12884 * UNITS_PER_FP_WORD);
12886 emit_jump_insn
12887 (gen_rtx_SET (pc_rtx,
12888 gen_rtx_IF_THEN_ELSE (VOIDmode,
12889 gen_rtx_NE (VOIDmode, cr1,
12890 const0_rtx),
12891 gen_rtx_LABEL_REF (VOIDmode, lab),
12892 pc_rtx)));
12894 for (nregs = 0;
12895 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
12896 fregno++, off += UNITS_PER_FP_WORD, nregs++)
12898 mem = gen_rtx_MEM (TARGET_HARD_FLOAT ? DFmode : SFmode,
12899 plus_constant (Pmode, save_area, off));
12900 MEM_NOTRAP_P (mem) = 1;
12901 set_mem_alias_set (mem, set);
12902 set_mem_align (mem, GET_MODE_ALIGNMENT (
12903 TARGET_HARD_FLOAT ? DFmode : SFmode));
12904 emit_move_insn (mem, gen_rtx_REG (
12905 TARGET_HARD_FLOAT ? DFmode : SFmode, fregno));
12908 emit_label (lab);
12912 /* Create the va_list data type. */
12914 static tree
12915 rs6000_build_builtin_va_list (void)
12917 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
12919 /* For AIX, prefer 'char *' because that's what the system
12920 header files like. */
12921 if (DEFAULT_ABI != ABI_V4)
12922 return build_pointer_type (char_type_node);
12924 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
12925 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
12926 get_identifier ("__va_list_tag"), record);
12928 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
12929 unsigned_char_type_node);
12930 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
12931 unsigned_char_type_node);
12932 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
12933 every user file. */
12934 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12935 get_identifier ("reserved"), short_unsigned_type_node);
12936 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12937 get_identifier ("overflow_arg_area"),
12938 ptr_type_node);
12939 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
12940 get_identifier ("reg_save_area"),
12941 ptr_type_node);
12943 va_list_gpr_counter_field = f_gpr;
12944 va_list_fpr_counter_field = f_fpr;
12946 DECL_FIELD_CONTEXT (f_gpr) = record;
12947 DECL_FIELD_CONTEXT (f_fpr) = record;
12948 DECL_FIELD_CONTEXT (f_res) = record;
12949 DECL_FIELD_CONTEXT (f_ovf) = record;
12950 DECL_FIELD_CONTEXT (f_sav) = record;
12952 TYPE_STUB_DECL (record) = type_decl;
12953 TYPE_NAME (record) = type_decl;
12954 TYPE_FIELDS (record) = f_gpr;
12955 DECL_CHAIN (f_gpr) = f_fpr;
12956 DECL_CHAIN (f_fpr) = f_res;
12957 DECL_CHAIN (f_res) = f_ovf;
12958 DECL_CHAIN (f_ovf) = f_sav;
12960 layout_type (record);
12962 /* The correct type is an array type of one element. */
12963 return build_array_type (record, build_index_type (size_zero_node));
12966 /* Implement va_start. */
12968 static void
12969 rs6000_va_start (tree valist, rtx nextarg)
12971 HOST_WIDE_INT words, n_gpr, n_fpr;
12972 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
12973 tree gpr, fpr, ovf, sav, t;
12975 /* Only SVR4 needs something special. */
12976 if (DEFAULT_ABI != ABI_V4)
12978 std_expand_builtin_va_start (valist, nextarg);
12979 return;
12982 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12983 f_fpr = DECL_CHAIN (f_gpr);
12984 f_res = DECL_CHAIN (f_fpr);
12985 f_ovf = DECL_CHAIN (f_res);
12986 f_sav = DECL_CHAIN (f_ovf);
12988 valist = build_simple_mem_ref (valist);
12989 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12990 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
12991 f_fpr, NULL_TREE);
12992 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
12993 f_ovf, NULL_TREE);
12994 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
12995 f_sav, NULL_TREE);
12997 /* Count number of gp and fp argument registers used. */
12998 words = crtl->args.info.words;
12999 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13000 GP_ARG_NUM_REG);
13001 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13002 FP_ARG_NUM_REG);
13004 if (TARGET_DEBUG_ARG)
13005 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13006 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13007 words, n_gpr, n_fpr);
13009 if (cfun->va_list_gpr_size)
13011 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13012 build_int_cst (NULL_TREE, n_gpr));
13013 TREE_SIDE_EFFECTS (t) = 1;
13014 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13017 if (cfun->va_list_fpr_size)
13019 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13020 build_int_cst (NULL_TREE, n_fpr));
13021 TREE_SIDE_EFFECTS (t) = 1;
13022 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13024 #ifdef HAVE_AS_GNU_ATTRIBUTE
13025 if (call_ABI_of_interest (cfun->decl))
13026 rs6000_passes_float = true;
13027 #endif
13030 /* Find the overflow area. */
13031 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13032 if (words != 0)
13033 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13034 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13035 TREE_SIDE_EFFECTS (t) = 1;
13036 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13038 /* If there were no va_arg invocations, don't set up the register
13039 save area. */
13040 if (!cfun->va_list_gpr_size
13041 && !cfun->va_list_fpr_size
13042 && n_gpr < GP_ARG_NUM_REG
13043 && n_fpr < FP_ARG_V4_MAX_REG)
13044 return;
13046 /* Find the register save area. */
13047 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13048 if (cfun->machine->varargs_save_offset)
13049 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13050 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13051 TREE_SIDE_EFFECTS (t) = 1;
13052 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13055 /* Implement va_arg. */
13057 static tree
13058 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13059 gimple_seq *post_p)
13061 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13062 tree gpr, fpr, ovf, sav, reg, t, u;
13063 int size, rsize, n_reg, sav_ofs, sav_scale;
13064 tree lab_false, lab_over, addr;
13065 int align;
13066 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13067 int regalign = 0;
13068 gimple *stmt;
13070 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13072 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13073 return build_va_arg_indirect_ref (t);
13076 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13077 earlier version of gcc, with the property that it always applied alignment
13078 adjustments to the va-args (even for zero-sized types). The cheapest way
13079 to deal with this is to replicate the effect of the part of
13080 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13081 of relevance.
13082 We don't need to check for pass-by-reference because of the test above.
13083 We can return a simplifed answer, since we know there's no offset to add. */
13085 if (((TARGET_MACHO
13086 && rs6000_darwin64_abi)
13087 || DEFAULT_ABI == ABI_ELFv2
13088 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13089 && integer_zerop (TYPE_SIZE (type)))
13091 unsigned HOST_WIDE_INT align, boundary;
13092 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13093 align = PARM_BOUNDARY / BITS_PER_UNIT;
13094 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13095 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13096 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13097 boundary /= BITS_PER_UNIT;
13098 if (boundary > align)
13100 tree t ;
13101 /* This updates arg ptr by the amount that would be necessary
13102 to align the zero-sized (but not zero-alignment) item. */
13103 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13104 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13105 gimplify_and_add (t, pre_p);
13107 t = fold_convert (sizetype, valist_tmp);
13108 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13109 fold_convert (TREE_TYPE (valist),
13110 fold_build2 (BIT_AND_EXPR, sizetype, t,
13111 size_int (-boundary))));
13112 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13113 gimplify_and_add (t, pre_p);
13115 /* Since it is zero-sized there's no increment for the item itself. */
13116 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13117 return build_va_arg_indirect_ref (valist_tmp);
13120 if (DEFAULT_ABI != ABI_V4)
13122 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13124 tree elem_type = TREE_TYPE (type);
13125 machine_mode elem_mode = TYPE_MODE (elem_type);
13126 int elem_size = GET_MODE_SIZE (elem_mode);
13128 if (elem_size < UNITS_PER_WORD)
13130 tree real_part, imag_part;
13131 gimple_seq post = NULL;
13133 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13134 &post);
13135 /* Copy the value into a temporary, lest the formal temporary
13136 be reused out from under us. */
13137 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13138 gimple_seq_add_seq (pre_p, post);
13140 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13141 post_p);
13143 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13147 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13150 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13151 f_fpr = DECL_CHAIN (f_gpr);
13152 f_res = DECL_CHAIN (f_fpr);
13153 f_ovf = DECL_CHAIN (f_res);
13154 f_sav = DECL_CHAIN (f_ovf);
13156 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13157 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13158 f_fpr, NULL_TREE);
13159 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13160 f_ovf, NULL_TREE);
13161 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13162 f_sav, NULL_TREE);
13164 size = int_size_in_bytes (type);
13165 rsize = (size + 3) / 4;
13166 int pad = 4 * rsize - size;
13167 align = 1;
13169 machine_mode mode = TYPE_MODE (type);
13170 if (abi_v4_pass_in_fpr (mode, false))
13172 /* FP args go in FP registers, if present. */
13173 reg = fpr;
13174 n_reg = (size + 7) / 8;
13175 sav_ofs = (TARGET_HARD_FLOAT ? 8 : 4) * 4;
13176 sav_scale = (TARGET_HARD_FLOAT ? 8 : 4);
13177 if (mode != SFmode && mode != SDmode)
13178 align = 8;
13180 else
13182 /* Otherwise into GP registers. */
13183 reg = gpr;
13184 n_reg = rsize;
13185 sav_ofs = 0;
13186 sav_scale = 4;
13187 if (n_reg == 2)
13188 align = 8;
13191 /* Pull the value out of the saved registers.... */
13193 lab_over = NULL;
13194 addr = create_tmp_var (ptr_type_node, "addr");
13196 /* AltiVec vectors never go in registers when -mabi=altivec. */
13197 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13198 align = 16;
13199 else
13201 lab_false = create_artificial_label (input_location);
13202 lab_over = create_artificial_label (input_location);
13204 /* Long long is aligned in the registers. As are any other 2 gpr
13205 item such as complex int due to a historical mistake. */
13206 u = reg;
13207 if (n_reg == 2 && reg == gpr)
13209 regalign = 1;
13210 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13211 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13212 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13213 unshare_expr (reg), u);
13215 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13216 reg number is 0 for f1, so we want to make it odd. */
13217 else if (reg == fpr && mode == TDmode)
13219 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13220 build_int_cst (TREE_TYPE (reg), 1));
13221 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13224 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13225 t = build2 (GE_EXPR, boolean_type_node, u, t);
13226 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13227 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13228 gimplify_and_add (t, pre_p);
13230 t = sav;
13231 if (sav_ofs)
13232 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13234 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13235 build_int_cst (TREE_TYPE (reg), n_reg));
13236 u = fold_convert (sizetype, u);
13237 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13238 t = fold_build_pointer_plus (t, u);
13240 /* _Decimal32 varargs are located in the second word of the 64-bit
13241 FP register for 32-bit binaries. */
13242 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
13243 t = fold_build_pointer_plus_hwi (t, size);
13245 /* Args are passed right-aligned. */
13246 if (BYTES_BIG_ENDIAN)
13247 t = fold_build_pointer_plus_hwi (t, pad);
13249 gimplify_assign (addr, t, pre_p);
13251 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13253 stmt = gimple_build_label (lab_false);
13254 gimple_seq_add_stmt (pre_p, stmt);
13256 if ((n_reg == 2 && !regalign) || n_reg > 2)
13258 /* Ensure that we don't find any more args in regs.
13259 Alignment has taken care of for special cases. */
13260 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13264 /* ... otherwise out of the overflow area. */
13266 /* Care for on-stack alignment if needed. */
13267 t = ovf;
13268 if (align != 1)
13270 t = fold_build_pointer_plus_hwi (t, align - 1);
13271 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13272 build_int_cst (TREE_TYPE (t), -align));
13275 /* Args are passed right-aligned. */
13276 if (BYTES_BIG_ENDIAN)
13277 t = fold_build_pointer_plus_hwi (t, pad);
13279 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13281 gimplify_assign (unshare_expr (addr), t, pre_p);
13283 t = fold_build_pointer_plus_hwi (t, size);
13284 gimplify_assign (unshare_expr (ovf), t, pre_p);
13286 if (lab_over)
13288 stmt = gimple_build_label (lab_over);
13289 gimple_seq_add_stmt (pre_p, stmt);
13292 if (STRICT_ALIGNMENT
13293 && (TYPE_ALIGN (type)
13294 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13296 /* The value (of type complex double, for example) may not be
13297 aligned in memory in the saved registers, so copy via a
13298 temporary. (This is the same code as used for SPARC.) */
13299 tree tmp = create_tmp_var (type, "va_arg_tmp");
13300 tree dest_addr = build_fold_addr_expr (tmp);
13302 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13303 3, dest_addr, addr, size_int (rsize * 4));
13304 TREE_ADDRESSABLE (tmp) = 1;
13306 gimplify_and_add (copy, pre_p);
13307 addr = dest_addr;
13310 addr = fold_convert (ptrtype, addr);
13311 return build_va_arg_indirect_ref (addr);
13314 /* Builtins. */
13316 static void
13317 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13319 tree t;
13320 unsigned classify = rs6000_builtin_info[(int)code].attr;
13321 const char *attr_string = "";
13323 gcc_assert (name != NULL);
13324 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13326 if (rs6000_builtin_decls[(int)code])
13327 fatal_error (input_location,
13328 "internal error: builtin function %qs already processed",
13329 name);
13331 rs6000_builtin_decls[(int)code] = t =
13332 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13334 /* Set any special attributes. */
13335 if ((classify & RS6000_BTC_CONST) != 0)
13337 /* const function, function only depends on the inputs. */
13338 TREE_READONLY (t) = 1;
13339 TREE_NOTHROW (t) = 1;
13340 attr_string = ", const";
13342 else if ((classify & RS6000_BTC_PURE) != 0)
13344 /* pure function, function can read global memory, but does not set any
13345 external state. */
13346 DECL_PURE_P (t) = 1;
13347 TREE_NOTHROW (t) = 1;
13348 attr_string = ", pure";
13350 else if ((classify & RS6000_BTC_FP) != 0)
13352 /* Function is a math function. If rounding mode is on, then treat the
13353 function as not reading global memory, but it can have arbitrary side
13354 effects. If it is off, then assume the function is a const function.
13355 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13356 builtin-attribute.def that is used for the math functions. */
13357 TREE_NOTHROW (t) = 1;
13358 if (flag_rounding_math)
13360 DECL_PURE_P (t) = 1;
13361 DECL_IS_NOVOPS (t) = 1;
13362 attr_string = ", fp, pure";
13364 else
13366 TREE_READONLY (t) = 1;
13367 attr_string = ", fp, const";
13370 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13371 gcc_unreachable ();
13373 if (TARGET_DEBUG_BUILTIN)
13374 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13375 (int)code, name, attr_string);
13378 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13380 #undef RS6000_BUILTIN_0
13381 #undef RS6000_BUILTIN_1
13382 #undef RS6000_BUILTIN_2
13383 #undef RS6000_BUILTIN_3
13384 #undef RS6000_BUILTIN_A
13385 #undef RS6000_BUILTIN_D
13386 #undef RS6000_BUILTIN_H
13387 #undef RS6000_BUILTIN_P
13388 #undef RS6000_BUILTIN_X
13390 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13391 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13392 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13393 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13394 { MASK, ICODE, NAME, ENUM },
13396 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13397 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13398 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13399 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13400 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13402 static const struct builtin_description bdesc_3arg[] =
13404 #include "rs6000-builtin.def"
13407 /* DST operations: void foo (void *, const int, const char). */
13409 #undef RS6000_BUILTIN_0
13410 #undef RS6000_BUILTIN_1
13411 #undef RS6000_BUILTIN_2
13412 #undef RS6000_BUILTIN_3
13413 #undef RS6000_BUILTIN_A
13414 #undef RS6000_BUILTIN_D
13415 #undef RS6000_BUILTIN_H
13416 #undef RS6000_BUILTIN_P
13417 #undef RS6000_BUILTIN_X
13419 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13420 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13421 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13422 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13423 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13424 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13425 { MASK, ICODE, NAME, ENUM },
13427 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13428 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13429 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13431 static const struct builtin_description bdesc_dst[] =
13433 #include "rs6000-builtin.def"
13436 /* Simple binary operations: VECc = foo (VECa, VECb). */
13438 #undef RS6000_BUILTIN_0
13439 #undef RS6000_BUILTIN_1
13440 #undef RS6000_BUILTIN_2
13441 #undef RS6000_BUILTIN_3
13442 #undef RS6000_BUILTIN_A
13443 #undef RS6000_BUILTIN_D
13444 #undef RS6000_BUILTIN_H
13445 #undef RS6000_BUILTIN_P
13446 #undef RS6000_BUILTIN_X
13448 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13449 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13450 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13451 { MASK, ICODE, NAME, ENUM },
13453 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13454 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13455 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13456 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13457 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13458 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13460 static const struct builtin_description bdesc_2arg[] =
13462 #include "rs6000-builtin.def"
13465 #undef RS6000_BUILTIN_0
13466 #undef RS6000_BUILTIN_1
13467 #undef RS6000_BUILTIN_2
13468 #undef RS6000_BUILTIN_3
13469 #undef RS6000_BUILTIN_A
13470 #undef RS6000_BUILTIN_D
13471 #undef RS6000_BUILTIN_H
13472 #undef RS6000_BUILTIN_P
13473 #undef RS6000_BUILTIN_X
13475 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13476 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13477 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13478 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13479 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13480 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13481 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13482 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13483 { MASK, ICODE, NAME, ENUM },
13485 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13487 /* AltiVec predicates. */
13489 static const struct builtin_description bdesc_altivec_preds[] =
13491 #include "rs6000-builtin.def"
13494 /* ABS* operations. */
13496 #undef RS6000_BUILTIN_0
13497 #undef RS6000_BUILTIN_1
13498 #undef RS6000_BUILTIN_2
13499 #undef RS6000_BUILTIN_3
13500 #undef RS6000_BUILTIN_A
13501 #undef RS6000_BUILTIN_D
13502 #undef RS6000_BUILTIN_H
13503 #undef RS6000_BUILTIN_P
13504 #undef RS6000_BUILTIN_X
13506 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13507 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13508 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13509 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13510 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13511 { MASK, ICODE, NAME, ENUM },
13513 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13514 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13515 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13516 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13518 static const struct builtin_description bdesc_abs[] =
13520 #include "rs6000-builtin.def"
13523 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13524 foo (VECa). */
13526 #undef RS6000_BUILTIN_0
13527 #undef RS6000_BUILTIN_1
13528 #undef RS6000_BUILTIN_2
13529 #undef RS6000_BUILTIN_3
13530 #undef RS6000_BUILTIN_A
13531 #undef RS6000_BUILTIN_D
13532 #undef RS6000_BUILTIN_H
13533 #undef RS6000_BUILTIN_P
13534 #undef RS6000_BUILTIN_X
13536 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13537 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13538 { MASK, ICODE, NAME, ENUM },
13540 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13541 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13542 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13543 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13544 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13545 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13546 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13548 static const struct builtin_description bdesc_1arg[] =
13550 #include "rs6000-builtin.def"
13553 /* Simple no-argument operations: result = __builtin_darn_32 () */
13555 #undef RS6000_BUILTIN_0
13556 #undef RS6000_BUILTIN_1
13557 #undef RS6000_BUILTIN_2
13558 #undef RS6000_BUILTIN_3
13559 #undef RS6000_BUILTIN_A
13560 #undef RS6000_BUILTIN_D
13561 #undef RS6000_BUILTIN_H
13562 #undef RS6000_BUILTIN_P
13563 #undef RS6000_BUILTIN_X
13565 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13566 { MASK, ICODE, NAME, ENUM },
13568 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13569 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13570 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13571 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13572 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13573 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13574 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13575 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13577 static const struct builtin_description bdesc_0arg[] =
13579 #include "rs6000-builtin.def"
13582 /* HTM builtins. */
13583 #undef RS6000_BUILTIN_0
13584 #undef RS6000_BUILTIN_1
13585 #undef RS6000_BUILTIN_2
13586 #undef RS6000_BUILTIN_3
13587 #undef RS6000_BUILTIN_A
13588 #undef RS6000_BUILTIN_D
13589 #undef RS6000_BUILTIN_H
13590 #undef RS6000_BUILTIN_P
13591 #undef RS6000_BUILTIN_X
13593 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13594 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13595 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13596 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13597 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13598 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13599 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13600 { MASK, ICODE, NAME, ENUM },
13602 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13603 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13605 static const struct builtin_description bdesc_htm[] =
13607 #include "rs6000-builtin.def"
13610 #undef RS6000_BUILTIN_0
13611 #undef RS6000_BUILTIN_1
13612 #undef RS6000_BUILTIN_2
13613 #undef RS6000_BUILTIN_3
13614 #undef RS6000_BUILTIN_A
13615 #undef RS6000_BUILTIN_D
13616 #undef RS6000_BUILTIN_H
13617 #undef RS6000_BUILTIN_P
13619 /* Return true if a builtin function is overloaded. */
13620 bool
13621 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13623 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13626 const char *
13627 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13629 return rs6000_builtin_info[(int)fncode].name;
13632 /* Expand an expression EXP that calls a builtin without arguments. */
13633 static rtx
13634 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13636 rtx pat;
13637 machine_mode tmode = insn_data[icode].operand[0].mode;
13639 if (icode == CODE_FOR_nothing)
13640 /* Builtin not supported on this processor. */
13641 return 0;
13643 if (target == 0
13644 || GET_MODE (target) != tmode
13645 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13646 target = gen_reg_rtx (tmode);
13648 pat = GEN_FCN (icode) (target);
13649 if (! pat)
13650 return 0;
13651 emit_insn (pat);
13653 return target;
13657 static rtx
13658 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13660 rtx pat;
13661 tree arg0 = CALL_EXPR_ARG (exp, 0);
13662 tree arg1 = CALL_EXPR_ARG (exp, 1);
13663 rtx op0 = expand_normal (arg0);
13664 rtx op1 = expand_normal (arg1);
13665 machine_mode mode0 = insn_data[icode].operand[0].mode;
13666 machine_mode mode1 = insn_data[icode].operand[1].mode;
13668 if (icode == CODE_FOR_nothing)
13669 /* Builtin not supported on this processor. */
13670 return 0;
13672 /* If we got invalid arguments bail out before generating bad rtl. */
13673 if (arg0 == error_mark_node || arg1 == error_mark_node)
13674 return const0_rtx;
13676 if (GET_CODE (op0) != CONST_INT
13677 || INTVAL (op0) > 255
13678 || INTVAL (op0) < 0)
13680 error ("argument 1 must be an 8-bit field value");
13681 return const0_rtx;
13684 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13685 op0 = copy_to_mode_reg (mode0, op0);
13687 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13688 op1 = copy_to_mode_reg (mode1, op1);
13690 pat = GEN_FCN (icode) (op0, op1);
13691 if (! pat)
13692 return const0_rtx;
13693 emit_insn (pat);
13695 return NULL_RTX;
13698 static rtx
13699 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13701 rtx pat;
13702 tree arg0 = CALL_EXPR_ARG (exp, 0);
13703 rtx op0 = expand_normal (arg0);
13704 machine_mode tmode = insn_data[icode].operand[0].mode;
13705 machine_mode mode0 = insn_data[icode].operand[1].mode;
13707 if (icode == CODE_FOR_nothing)
13708 /* Builtin not supported on this processor. */
13709 return 0;
13711 /* If we got invalid arguments bail out before generating bad rtl. */
13712 if (arg0 == error_mark_node)
13713 return const0_rtx;
13715 if (icode == CODE_FOR_altivec_vspltisb
13716 || icode == CODE_FOR_altivec_vspltish
13717 || icode == CODE_FOR_altivec_vspltisw)
13719 /* Only allow 5-bit *signed* literals. */
13720 if (GET_CODE (op0) != CONST_INT
13721 || INTVAL (op0) > 15
13722 || INTVAL (op0) < -16)
13724 error ("argument 1 must be a 5-bit signed literal");
13725 return CONST0_RTX (tmode);
13729 if (target == 0
13730 || GET_MODE (target) != tmode
13731 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13732 target = gen_reg_rtx (tmode);
13734 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13735 op0 = copy_to_mode_reg (mode0, op0);
13737 pat = GEN_FCN (icode) (target, op0);
13738 if (! pat)
13739 return 0;
13740 emit_insn (pat);
13742 return target;
13745 static rtx
13746 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
13748 rtx pat, scratch1, scratch2;
13749 tree arg0 = CALL_EXPR_ARG (exp, 0);
13750 rtx op0 = expand_normal (arg0);
13751 machine_mode tmode = insn_data[icode].operand[0].mode;
13752 machine_mode mode0 = insn_data[icode].operand[1].mode;
13754 /* If we have invalid arguments, bail out before generating bad rtl. */
13755 if (arg0 == error_mark_node)
13756 return const0_rtx;
13758 if (target == 0
13759 || GET_MODE (target) != tmode
13760 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13761 target = gen_reg_rtx (tmode);
13763 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13764 op0 = copy_to_mode_reg (mode0, op0);
13766 scratch1 = gen_reg_rtx (mode0);
13767 scratch2 = gen_reg_rtx (mode0);
13769 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
13770 if (! pat)
13771 return 0;
13772 emit_insn (pat);
13774 return target;
13777 static rtx
13778 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
13780 rtx pat;
13781 tree arg0 = CALL_EXPR_ARG (exp, 0);
13782 tree arg1 = CALL_EXPR_ARG (exp, 1);
13783 rtx op0 = expand_normal (arg0);
13784 rtx op1 = expand_normal (arg1);
13785 machine_mode tmode = insn_data[icode].operand[0].mode;
13786 machine_mode mode0 = insn_data[icode].operand[1].mode;
13787 machine_mode mode1 = insn_data[icode].operand[2].mode;
13789 if (icode == CODE_FOR_nothing)
13790 /* Builtin not supported on this processor. */
13791 return 0;
13793 /* If we got invalid arguments bail out before generating bad rtl. */
13794 if (arg0 == error_mark_node || arg1 == error_mark_node)
13795 return const0_rtx;
13797 if (icode == CODE_FOR_altivec_vcfux
13798 || icode == CODE_FOR_altivec_vcfsx
13799 || icode == CODE_FOR_altivec_vctsxs
13800 || icode == CODE_FOR_altivec_vctuxs
13801 || icode == CODE_FOR_altivec_vspltb
13802 || icode == CODE_FOR_altivec_vsplth
13803 || icode == CODE_FOR_altivec_vspltw)
13805 /* Only allow 5-bit unsigned literals. */
13806 STRIP_NOPS (arg1);
13807 if (TREE_CODE (arg1) != INTEGER_CST
13808 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13810 error ("argument 2 must be a 5-bit unsigned literal");
13811 return CONST0_RTX (tmode);
13814 else if (icode == CODE_FOR_dfptstsfi_eq_dd
13815 || icode == CODE_FOR_dfptstsfi_lt_dd
13816 || icode == CODE_FOR_dfptstsfi_gt_dd
13817 || icode == CODE_FOR_dfptstsfi_unordered_dd
13818 || icode == CODE_FOR_dfptstsfi_eq_td
13819 || icode == CODE_FOR_dfptstsfi_lt_td
13820 || icode == CODE_FOR_dfptstsfi_gt_td
13821 || icode == CODE_FOR_dfptstsfi_unordered_td)
13823 /* Only allow 6-bit unsigned literals. */
13824 STRIP_NOPS (arg0);
13825 if (TREE_CODE (arg0) != INTEGER_CST
13826 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
13828 error ("argument 1 must be a 6-bit unsigned literal");
13829 return CONST0_RTX (tmode);
13832 else if (icode == CODE_FOR_xststdcqp_kf
13833 || icode == CODE_FOR_xststdcqp_tf
13834 || icode == CODE_FOR_xststdcdp
13835 || icode == CODE_FOR_xststdcsp
13836 || icode == CODE_FOR_xvtstdcdp
13837 || icode == CODE_FOR_xvtstdcsp)
13839 /* Only allow 7-bit unsigned literals. */
13840 STRIP_NOPS (arg1);
13841 if (TREE_CODE (arg1) != INTEGER_CST
13842 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
13844 error ("argument 2 must be a 7-bit unsigned literal");
13845 return CONST0_RTX (tmode);
13848 else if (icode == CODE_FOR_unpackv1ti
13849 || icode == CODE_FOR_unpackkf
13850 || icode == CODE_FOR_unpacktf
13851 || icode == CODE_FOR_unpackif
13852 || icode == CODE_FOR_unpacktd)
13854 /* Only allow 1-bit unsigned literals. */
13855 STRIP_NOPS (arg1);
13856 if (TREE_CODE (arg1) != INTEGER_CST
13857 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
13859 error ("argument 2 must be a 1-bit unsigned literal");
13860 return CONST0_RTX (tmode);
13864 if (target == 0
13865 || GET_MODE (target) != tmode
13866 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13867 target = gen_reg_rtx (tmode);
13869 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13870 op0 = copy_to_mode_reg (mode0, op0);
13871 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13872 op1 = copy_to_mode_reg (mode1, op1);
13874 pat = GEN_FCN (icode) (target, op0, op1);
13875 if (! pat)
13876 return 0;
13877 emit_insn (pat);
13879 return target;
13882 static rtx
13883 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13885 rtx pat, scratch;
13886 tree cr6_form = CALL_EXPR_ARG (exp, 0);
13887 tree arg0 = CALL_EXPR_ARG (exp, 1);
13888 tree arg1 = CALL_EXPR_ARG (exp, 2);
13889 rtx op0 = expand_normal (arg0);
13890 rtx op1 = expand_normal (arg1);
13891 machine_mode tmode = SImode;
13892 machine_mode mode0 = insn_data[icode].operand[1].mode;
13893 machine_mode mode1 = insn_data[icode].operand[2].mode;
13894 int cr6_form_int;
13896 if (TREE_CODE (cr6_form) != INTEGER_CST)
13898 error ("argument 1 of %qs must be a constant",
13899 "__builtin_altivec_predicate");
13900 return const0_rtx;
13902 else
13903 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
13905 gcc_assert (mode0 == mode1);
13907 /* If we have invalid arguments, bail out before generating bad rtl. */
13908 if (arg0 == error_mark_node || arg1 == error_mark_node)
13909 return const0_rtx;
13911 if (target == 0
13912 || GET_MODE (target) != tmode
13913 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13914 target = gen_reg_rtx (tmode);
13916 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13917 op0 = copy_to_mode_reg (mode0, op0);
13918 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13919 op1 = copy_to_mode_reg (mode1, op1);
13921 /* Note that for many of the relevant operations (e.g. cmpne or
13922 cmpeq) with float or double operands, it makes more sense for the
13923 mode of the allocated scratch register to select a vector of
13924 integer. But the choice to copy the mode of operand 0 was made
13925 long ago and there are no plans to change it. */
13926 scratch = gen_reg_rtx (mode0);
13928 pat = GEN_FCN (icode) (scratch, op0, op1);
13929 if (! pat)
13930 return 0;
13931 emit_insn (pat);
13933 /* The vec_any* and vec_all* predicates use the same opcodes for two
13934 different operations, but the bits in CR6 will be different
13935 depending on what information we want. So we have to play tricks
13936 with CR6 to get the right bits out.
13938 If you think this is disgusting, look at the specs for the
13939 AltiVec predicates. */
13941 switch (cr6_form_int)
13943 case 0:
13944 emit_insn (gen_cr6_test_for_zero (target));
13945 break;
13946 case 1:
13947 emit_insn (gen_cr6_test_for_zero_reverse (target));
13948 break;
13949 case 2:
13950 emit_insn (gen_cr6_test_for_lt (target));
13951 break;
13952 case 3:
13953 emit_insn (gen_cr6_test_for_lt_reverse (target));
13954 break;
13955 default:
13956 error ("argument 1 of %qs is out of range",
13957 "__builtin_altivec_predicate");
13958 break;
13961 return target;
13965 swap_endian_selector_for_mode (machine_mode mode)
13967 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
13968 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
13969 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
13970 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
13972 unsigned int *swaparray, i;
13973 rtx perm[16];
13975 switch (mode)
13977 case E_V1TImode:
13978 swaparray = swap1;
13979 break;
13980 case E_V2DFmode:
13981 case E_V2DImode:
13982 swaparray = swap2;
13983 break;
13984 case E_V4SFmode:
13985 case E_V4SImode:
13986 swaparray = swap4;
13987 break;
13988 case E_V8HImode:
13989 swaparray = swap8;
13990 break;
13991 default:
13992 gcc_unreachable ();
13995 for (i = 0; i < 16; ++i)
13996 perm[i] = GEN_INT (swaparray[i]);
13998 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
13999 gen_rtvec_v (16, perm)));
14002 static rtx
14003 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14005 rtx pat, addr;
14006 tree arg0 = CALL_EXPR_ARG (exp, 0);
14007 tree arg1 = CALL_EXPR_ARG (exp, 1);
14008 machine_mode tmode = insn_data[icode].operand[0].mode;
14009 machine_mode mode0 = Pmode;
14010 machine_mode mode1 = Pmode;
14011 rtx op0 = expand_normal (arg0);
14012 rtx op1 = expand_normal (arg1);
14014 if (icode == CODE_FOR_nothing)
14015 /* Builtin not supported on this processor. */
14016 return 0;
14018 /* If we got invalid arguments bail out before generating bad rtl. */
14019 if (arg0 == error_mark_node || arg1 == error_mark_node)
14020 return const0_rtx;
14022 if (target == 0
14023 || GET_MODE (target) != tmode
14024 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14025 target = gen_reg_rtx (tmode);
14027 op1 = copy_to_mode_reg (mode1, op1);
14029 /* For LVX, express the RTL accurately by ANDing the address with -16.
14030 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14031 so the raw address is fine. */
14032 if (icode == CODE_FOR_altivec_lvx_v1ti
14033 || icode == CODE_FOR_altivec_lvx_v2df
14034 || icode == CODE_FOR_altivec_lvx_v2di
14035 || icode == CODE_FOR_altivec_lvx_v4sf
14036 || icode == CODE_FOR_altivec_lvx_v4si
14037 || icode == CODE_FOR_altivec_lvx_v8hi
14038 || icode == CODE_FOR_altivec_lvx_v16qi)
14040 rtx rawaddr;
14041 if (op0 == const0_rtx)
14042 rawaddr = op1;
14043 else
14045 op0 = copy_to_mode_reg (mode0, op0);
14046 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14048 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14049 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14051 emit_insn (gen_rtx_SET (target, addr));
14053 else
14055 if (op0 == const0_rtx)
14056 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14057 else
14059 op0 = copy_to_mode_reg (mode0, op0);
14060 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14061 gen_rtx_PLUS (Pmode, op1, op0));
14064 pat = GEN_FCN (icode) (target, addr);
14065 if (! pat)
14066 return 0;
14067 emit_insn (pat);
14070 return target;
14073 static rtx
14074 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
14076 rtx pat;
14077 tree arg0 = CALL_EXPR_ARG (exp, 0);
14078 tree arg1 = CALL_EXPR_ARG (exp, 1);
14079 tree arg2 = CALL_EXPR_ARG (exp, 2);
14080 rtx op0 = expand_normal (arg0);
14081 rtx op1 = expand_normal (arg1);
14082 rtx op2 = expand_normal (arg2);
14083 machine_mode mode0 = insn_data[icode].operand[0].mode;
14084 machine_mode mode1 = insn_data[icode].operand[1].mode;
14085 machine_mode mode2 = insn_data[icode].operand[2].mode;
14087 if (icode == CODE_FOR_nothing)
14088 /* Builtin not supported on this processor. */
14089 return NULL_RTX;
14091 /* If we got invalid arguments bail out before generating bad rtl. */
14092 if (arg0 == error_mark_node
14093 || arg1 == error_mark_node
14094 || arg2 == error_mark_node)
14095 return NULL_RTX;
14097 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14098 op0 = copy_to_mode_reg (mode0, op0);
14099 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14100 op1 = copy_to_mode_reg (mode1, op1);
14101 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14102 op2 = copy_to_mode_reg (mode2, op2);
14104 pat = GEN_FCN (icode) (op0, op1, op2);
14105 if (pat)
14106 emit_insn (pat);
14108 return NULL_RTX;
14111 static rtx
14112 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14114 tree arg0 = CALL_EXPR_ARG (exp, 0);
14115 tree arg1 = CALL_EXPR_ARG (exp, 1);
14116 tree arg2 = CALL_EXPR_ARG (exp, 2);
14117 rtx op0 = expand_normal (arg0);
14118 rtx op1 = expand_normal (arg1);
14119 rtx op2 = expand_normal (arg2);
14120 rtx pat, addr, rawaddr;
14121 machine_mode tmode = insn_data[icode].operand[0].mode;
14122 machine_mode smode = insn_data[icode].operand[1].mode;
14123 machine_mode mode1 = Pmode;
14124 machine_mode mode2 = Pmode;
14126 /* Invalid arguments. Bail before doing anything stoopid! */
14127 if (arg0 == error_mark_node
14128 || arg1 == error_mark_node
14129 || arg2 == error_mark_node)
14130 return const0_rtx;
14132 op2 = copy_to_mode_reg (mode2, op2);
14134 /* For STVX, express the RTL accurately by ANDing the address with -16.
14135 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14136 so the raw address is fine. */
14137 if (icode == CODE_FOR_altivec_stvx_v2df
14138 || icode == CODE_FOR_altivec_stvx_v2di
14139 || icode == CODE_FOR_altivec_stvx_v4sf
14140 || icode == CODE_FOR_altivec_stvx_v4si
14141 || icode == CODE_FOR_altivec_stvx_v8hi
14142 || icode == CODE_FOR_altivec_stvx_v16qi)
14144 if (op1 == const0_rtx)
14145 rawaddr = op2;
14146 else
14148 op1 = copy_to_mode_reg (mode1, op1);
14149 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14152 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14153 addr = gen_rtx_MEM (tmode, addr);
14155 op0 = copy_to_mode_reg (tmode, op0);
14157 emit_insn (gen_rtx_SET (addr, op0));
14159 else
14161 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14162 op0 = copy_to_mode_reg (smode, op0);
14164 if (op1 == const0_rtx)
14165 addr = gen_rtx_MEM (tmode, op2);
14166 else
14168 op1 = copy_to_mode_reg (mode1, op1);
14169 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14172 pat = GEN_FCN (icode) (addr, op0);
14173 if (pat)
14174 emit_insn (pat);
14177 return NULL_RTX;
14180 /* Return the appropriate SPR number associated with the given builtin. */
14181 static inline HOST_WIDE_INT
14182 htm_spr_num (enum rs6000_builtins code)
14184 if (code == HTM_BUILTIN_GET_TFHAR
14185 || code == HTM_BUILTIN_SET_TFHAR)
14186 return TFHAR_SPR;
14187 else if (code == HTM_BUILTIN_GET_TFIAR
14188 || code == HTM_BUILTIN_SET_TFIAR)
14189 return TFIAR_SPR;
14190 else if (code == HTM_BUILTIN_GET_TEXASR
14191 || code == HTM_BUILTIN_SET_TEXASR)
14192 return TEXASR_SPR;
14193 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14194 || code == HTM_BUILTIN_SET_TEXASRU);
14195 return TEXASRU_SPR;
14198 /* Return the appropriate SPR regno associated with the given builtin. */
14199 static inline HOST_WIDE_INT
14200 htm_spr_regno (enum rs6000_builtins code)
14202 if (code == HTM_BUILTIN_GET_TFHAR
14203 || code == HTM_BUILTIN_SET_TFHAR)
14204 return TFHAR_REGNO;
14205 else if (code == HTM_BUILTIN_GET_TFIAR
14206 || code == HTM_BUILTIN_SET_TFIAR)
14207 return TFIAR_REGNO;
14208 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14209 || code == HTM_BUILTIN_SET_TEXASR
14210 || code == HTM_BUILTIN_GET_TEXASRU
14211 || code == HTM_BUILTIN_SET_TEXASRU);
14212 return TEXASR_REGNO;
14215 /* Return the correct ICODE value depending on whether we are
14216 setting or reading the HTM SPRs. */
14217 static inline enum insn_code
14218 rs6000_htm_spr_icode (bool nonvoid)
14220 if (nonvoid)
14221 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14222 else
14223 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14226 /* Expand the HTM builtin in EXP and store the result in TARGET.
14227 Store true in *EXPANDEDP if we found a builtin to expand. */
14228 static rtx
14229 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14231 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14232 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14233 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14234 const struct builtin_description *d;
14235 size_t i;
14237 *expandedp = true;
14239 if (!TARGET_POWERPC64
14240 && (fcode == HTM_BUILTIN_TABORTDC
14241 || fcode == HTM_BUILTIN_TABORTDCI))
14243 size_t uns_fcode = (size_t)fcode;
14244 const char *name = rs6000_builtin_info[uns_fcode].name;
14245 error ("builtin %qs is only valid in 64-bit mode", name);
14246 return const0_rtx;
14249 /* Expand the HTM builtins. */
14250 d = bdesc_htm;
14251 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14252 if (d->code == fcode)
14254 rtx op[MAX_HTM_OPERANDS], pat;
14255 int nopnds = 0;
14256 tree arg;
14257 call_expr_arg_iterator iter;
14258 unsigned attr = rs6000_builtin_info[fcode].attr;
14259 enum insn_code icode = d->icode;
14260 const struct insn_operand_data *insn_op;
14261 bool uses_spr = (attr & RS6000_BTC_SPR);
14262 rtx cr = NULL_RTX;
14264 if (uses_spr)
14265 icode = rs6000_htm_spr_icode (nonvoid);
14266 insn_op = &insn_data[icode].operand[0];
14268 if (nonvoid)
14270 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
14271 if (!target
14272 || GET_MODE (target) != tmode
14273 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14274 target = gen_reg_rtx (tmode);
14275 if (uses_spr)
14276 op[nopnds++] = target;
14279 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14281 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14282 return const0_rtx;
14284 insn_op = &insn_data[icode].operand[nopnds];
14286 op[nopnds] = expand_normal (arg);
14288 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14290 if (!strcmp (insn_op->constraint, "n"))
14292 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14293 if (!CONST_INT_P (op[nopnds]))
14294 error ("argument %d must be an unsigned literal", arg_num);
14295 else
14296 error ("argument %d is an unsigned literal that is "
14297 "out of range", arg_num);
14298 return const0_rtx;
14300 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14303 nopnds++;
14306 /* Handle the builtins for extended mnemonics. These accept
14307 no arguments, but map to builtins that take arguments. */
14308 switch (fcode)
14310 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14311 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14312 op[nopnds++] = GEN_INT (1);
14313 if (flag_checking)
14314 attr |= RS6000_BTC_UNARY;
14315 break;
14316 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14317 op[nopnds++] = GEN_INT (0);
14318 if (flag_checking)
14319 attr |= RS6000_BTC_UNARY;
14320 break;
14321 default:
14322 break;
14325 /* If this builtin accesses SPRs, then pass in the appropriate
14326 SPR number and SPR regno as the last two operands. */
14327 if (uses_spr)
14329 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14330 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14331 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14333 /* If this builtin accesses a CR, then pass in a scratch
14334 CR as the last operand. */
14335 else if (attr & RS6000_BTC_CR)
14336 { cr = gen_reg_rtx (CCmode);
14337 op[nopnds++] = cr;
14340 if (flag_checking)
14342 int expected_nopnds = 0;
14343 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14344 expected_nopnds = 1;
14345 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14346 expected_nopnds = 2;
14347 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14348 expected_nopnds = 3;
14349 if (!(attr & RS6000_BTC_VOID))
14350 expected_nopnds += 1;
14351 if (uses_spr)
14352 expected_nopnds += 2;
14354 gcc_assert (nopnds == expected_nopnds
14355 && nopnds <= MAX_HTM_OPERANDS);
14358 switch (nopnds)
14360 case 1:
14361 pat = GEN_FCN (icode) (op[0]);
14362 break;
14363 case 2:
14364 pat = GEN_FCN (icode) (op[0], op[1]);
14365 break;
14366 case 3:
14367 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14368 break;
14369 case 4:
14370 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14371 break;
14372 default:
14373 gcc_unreachable ();
14375 if (!pat)
14376 return NULL_RTX;
14377 emit_insn (pat);
14379 if (attr & RS6000_BTC_CR)
14381 if (fcode == HTM_BUILTIN_TBEGIN)
14383 /* Emit code to set TARGET to true or false depending on
14384 whether the tbegin. instruction successfully or failed
14385 to start a transaction. We do this by placing the 1's
14386 complement of CR's EQ bit into TARGET. */
14387 rtx scratch = gen_reg_rtx (SImode);
14388 emit_insn (gen_rtx_SET (scratch,
14389 gen_rtx_EQ (SImode, cr,
14390 const0_rtx)));
14391 emit_insn (gen_rtx_SET (target,
14392 gen_rtx_XOR (SImode, scratch,
14393 GEN_INT (1))));
14395 else
14397 /* Emit code to copy the 4-bit condition register field
14398 CR into the least significant end of register TARGET. */
14399 rtx scratch1 = gen_reg_rtx (SImode);
14400 rtx scratch2 = gen_reg_rtx (SImode);
14401 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14402 emit_insn (gen_movcc (subreg, cr));
14403 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14404 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14408 if (nonvoid)
14409 return target;
14410 return const0_rtx;
14413 *expandedp = false;
14414 return NULL_RTX;
14417 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14419 static rtx
14420 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14421 rtx target)
14423 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14424 if (fcode == RS6000_BUILTIN_CPU_INIT)
14425 return const0_rtx;
14427 if (target == 0 || GET_MODE (target) != SImode)
14428 target = gen_reg_rtx (SImode);
14430 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14431 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14432 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
14433 to a STRING_CST. */
14434 if (TREE_CODE (arg) == ARRAY_REF
14435 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
14436 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
14437 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
14438 arg = TREE_OPERAND (arg, 0);
14440 if (TREE_CODE (arg) != STRING_CST)
14442 error ("builtin %qs only accepts a string argument",
14443 rs6000_builtin_info[(size_t) fcode].name);
14444 return const0_rtx;
14447 if (fcode == RS6000_BUILTIN_CPU_IS)
14449 const char *cpu = TREE_STRING_POINTER (arg);
14450 rtx cpuid = NULL_RTX;
14451 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14452 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14454 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14455 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14456 break;
14458 if (cpuid == NULL_RTX)
14460 /* Invalid CPU argument. */
14461 error ("cpu %qs is an invalid argument to builtin %qs",
14462 cpu, rs6000_builtin_info[(size_t) fcode].name);
14463 return const0_rtx;
14466 rtx platform = gen_reg_rtx (SImode);
14467 rtx tcbmem = gen_const_mem (SImode,
14468 gen_rtx_PLUS (Pmode,
14469 gen_rtx_REG (Pmode, TLS_REGNUM),
14470 GEN_INT (TCB_PLATFORM_OFFSET)));
14471 emit_move_insn (platform, tcbmem);
14472 emit_insn (gen_eqsi3 (target, platform, cpuid));
14474 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14476 const char *hwcap = TREE_STRING_POINTER (arg);
14477 rtx mask = NULL_RTX;
14478 int hwcap_offset;
14479 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14480 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14482 mask = GEN_INT (cpu_supports_info[i].mask);
14483 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14484 break;
14486 if (mask == NULL_RTX)
14488 /* Invalid HWCAP argument. */
14489 error ("%s %qs is an invalid argument to builtin %qs",
14490 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
14491 return const0_rtx;
14494 rtx tcb_hwcap = gen_reg_rtx (SImode);
14495 rtx tcbmem = gen_const_mem (SImode,
14496 gen_rtx_PLUS (Pmode,
14497 gen_rtx_REG (Pmode, TLS_REGNUM),
14498 GEN_INT (hwcap_offset)));
14499 emit_move_insn (tcb_hwcap, tcbmem);
14500 rtx scratch1 = gen_reg_rtx (SImode);
14501 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14502 rtx scratch2 = gen_reg_rtx (SImode);
14503 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14504 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14506 else
14507 gcc_unreachable ();
14509 /* Record that we have expanded a CPU builtin, so that we can later
14510 emit a reference to the special symbol exported by LIBC to ensure we
14511 do not link against an old LIBC that doesn't support this feature. */
14512 cpu_builtin_p = true;
14514 #else
14515 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
14516 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
14518 /* For old LIBCs, always return FALSE. */
14519 emit_move_insn (target, GEN_INT (0));
14520 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
14522 return target;
14525 static rtx
14526 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
14528 rtx pat;
14529 tree arg0 = CALL_EXPR_ARG (exp, 0);
14530 tree arg1 = CALL_EXPR_ARG (exp, 1);
14531 tree arg2 = CALL_EXPR_ARG (exp, 2);
14532 rtx op0 = expand_normal (arg0);
14533 rtx op1 = expand_normal (arg1);
14534 rtx op2 = expand_normal (arg2);
14535 machine_mode tmode = insn_data[icode].operand[0].mode;
14536 machine_mode mode0 = insn_data[icode].operand[1].mode;
14537 machine_mode mode1 = insn_data[icode].operand[2].mode;
14538 machine_mode mode2 = insn_data[icode].operand[3].mode;
14540 if (icode == CODE_FOR_nothing)
14541 /* Builtin not supported on this processor. */
14542 return 0;
14544 /* If we got invalid arguments bail out before generating bad rtl. */
14545 if (arg0 == error_mark_node
14546 || arg1 == error_mark_node
14547 || arg2 == error_mark_node)
14548 return const0_rtx;
14550 /* Check and prepare argument depending on the instruction code.
14552 Note that a switch statement instead of the sequence of tests
14553 would be incorrect as many of the CODE_FOR values could be
14554 CODE_FOR_nothing and that would yield multiple alternatives
14555 with identical values. We'd never reach here at runtime in
14556 this case. */
14557 if (icode == CODE_FOR_altivec_vsldoi_v4sf
14558 || icode == CODE_FOR_altivec_vsldoi_v2df
14559 || icode == CODE_FOR_altivec_vsldoi_v4si
14560 || icode == CODE_FOR_altivec_vsldoi_v8hi
14561 || icode == CODE_FOR_altivec_vsldoi_v16qi)
14563 /* Only allow 4-bit unsigned literals. */
14564 STRIP_NOPS (arg2);
14565 if (TREE_CODE (arg2) != INTEGER_CST
14566 || TREE_INT_CST_LOW (arg2) & ~0xf)
14568 error ("argument 3 must be a 4-bit unsigned literal");
14569 return CONST0_RTX (tmode);
14572 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
14573 || icode == CODE_FOR_vsx_xxpermdi_v2di
14574 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
14575 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
14576 || icode == CODE_FOR_vsx_xxpermdi_v1ti
14577 || icode == CODE_FOR_vsx_xxpermdi_v4sf
14578 || icode == CODE_FOR_vsx_xxpermdi_v4si
14579 || icode == CODE_FOR_vsx_xxpermdi_v8hi
14580 || icode == CODE_FOR_vsx_xxpermdi_v16qi
14581 || icode == CODE_FOR_vsx_xxsldwi_v16qi
14582 || icode == CODE_FOR_vsx_xxsldwi_v8hi
14583 || icode == CODE_FOR_vsx_xxsldwi_v4si
14584 || icode == CODE_FOR_vsx_xxsldwi_v4sf
14585 || icode == CODE_FOR_vsx_xxsldwi_v2di
14586 || icode == CODE_FOR_vsx_xxsldwi_v2df)
14588 /* Only allow 2-bit unsigned literals. */
14589 STRIP_NOPS (arg2);
14590 if (TREE_CODE (arg2) != INTEGER_CST
14591 || TREE_INT_CST_LOW (arg2) & ~0x3)
14593 error ("argument 3 must be a 2-bit unsigned literal");
14594 return CONST0_RTX (tmode);
14597 else if (icode == CODE_FOR_vsx_set_v2df
14598 || icode == CODE_FOR_vsx_set_v2di
14599 || icode == CODE_FOR_bcdadd
14600 || icode == CODE_FOR_bcdadd_lt
14601 || icode == CODE_FOR_bcdadd_eq
14602 || icode == CODE_FOR_bcdadd_gt
14603 || icode == CODE_FOR_bcdsub
14604 || icode == CODE_FOR_bcdsub_lt
14605 || icode == CODE_FOR_bcdsub_eq
14606 || icode == CODE_FOR_bcdsub_gt)
14608 /* Only allow 1-bit unsigned literals. */
14609 STRIP_NOPS (arg2);
14610 if (TREE_CODE (arg2) != INTEGER_CST
14611 || TREE_INT_CST_LOW (arg2) & ~0x1)
14613 error ("argument 3 must be a 1-bit unsigned literal");
14614 return CONST0_RTX (tmode);
14617 else if (icode == CODE_FOR_dfp_ddedpd_dd
14618 || icode == CODE_FOR_dfp_ddedpd_td)
14620 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
14621 STRIP_NOPS (arg0);
14622 if (TREE_CODE (arg0) != INTEGER_CST
14623 || TREE_INT_CST_LOW (arg2) & ~0x3)
14625 error ("argument 1 must be 0 or 2");
14626 return CONST0_RTX (tmode);
14629 else if (icode == CODE_FOR_dfp_denbcd_dd
14630 || icode == CODE_FOR_dfp_denbcd_td)
14632 /* Only allow 1-bit unsigned literals. */
14633 STRIP_NOPS (arg0);
14634 if (TREE_CODE (arg0) != INTEGER_CST
14635 || TREE_INT_CST_LOW (arg0) & ~0x1)
14637 error ("argument 1 must be a 1-bit unsigned literal");
14638 return CONST0_RTX (tmode);
14641 else if (icode == CODE_FOR_dfp_dscli_dd
14642 || icode == CODE_FOR_dfp_dscli_td
14643 || icode == CODE_FOR_dfp_dscri_dd
14644 || icode == CODE_FOR_dfp_dscri_td)
14646 /* Only allow 6-bit unsigned literals. */
14647 STRIP_NOPS (arg1);
14648 if (TREE_CODE (arg1) != INTEGER_CST
14649 || TREE_INT_CST_LOW (arg1) & ~0x3f)
14651 error ("argument 2 must be a 6-bit unsigned literal");
14652 return CONST0_RTX (tmode);
14655 else if (icode == CODE_FOR_crypto_vshasigmaw
14656 || icode == CODE_FOR_crypto_vshasigmad)
14658 /* Check whether the 2nd and 3rd arguments are integer constants and in
14659 range and prepare arguments. */
14660 STRIP_NOPS (arg1);
14661 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
14663 error ("argument 2 must be 0 or 1");
14664 return CONST0_RTX (tmode);
14667 STRIP_NOPS (arg2);
14668 if (TREE_CODE (arg2) != INTEGER_CST
14669 || wi::geu_p (wi::to_wide (arg2), 16))
14671 error ("argument 3 must be in the range 0..15");
14672 return CONST0_RTX (tmode);
14676 if (target == 0
14677 || GET_MODE (target) != tmode
14678 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14679 target = gen_reg_rtx (tmode);
14681 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14682 op0 = copy_to_mode_reg (mode0, op0);
14683 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14684 op1 = copy_to_mode_reg (mode1, op1);
14685 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14686 op2 = copy_to_mode_reg (mode2, op2);
14688 pat = GEN_FCN (icode) (target, op0, op1, op2);
14689 if (! pat)
14690 return 0;
14691 emit_insn (pat);
14693 return target;
14697 /* Expand the dst builtins. */
14698 static rtx
14699 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
14700 bool *expandedp)
14702 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14703 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14704 tree arg0, arg1, arg2;
14705 machine_mode mode0, mode1;
14706 rtx pat, op0, op1, op2;
14707 const struct builtin_description *d;
14708 size_t i;
14710 *expandedp = false;
14712 /* Handle DST variants. */
14713 d = bdesc_dst;
14714 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14715 if (d->code == fcode)
14717 arg0 = CALL_EXPR_ARG (exp, 0);
14718 arg1 = CALL_EXPR_ARG (exp, 1);
14719 arg2 = CALL_EXPR_ARG (exp, 2);
14720 op0 = expand_normal (arg0);
14721 op1 = expand_normal (arg1);
14722 op2 = expand_normal (arg2);
14723 mode0 = insn_data[d->icode].operand[0].mode;
14724 mode1 = insn_data[d->icode].operand[1].mode;
14726 /* Invalid arguments, bail out before generating bad rtl. */
14727 if (arg0 == error_mark_node
14728 || arg1 == error_mark_node
14729 || arg2 == error_mark_node)
14730 return const0_rtx;
14732 *expandedp = true;
14733 STRIP_NOPS (arg2);
14734 if (TREE_CODE (arg2) != INTEGER_CST
14735 || TREE_INT_CST_LOW (arg2) & ~0x3)
14737 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
14738 return const0_rtx;
14741 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14742 op0 = copy_to_mode_reg (Pmode, op0);
14743 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14744 op1 = copy_to_mode_reg (mode1, op1);
14746 pat = GEN_FCN (d->icode) (op0, op1, op2);
14747 if (pat != 0)
14748 emit_insn (pat);
14750 return NULL_RTX;
14753 return NULL_RTX;
14756 /* Expand vec_init builtin. */
14757 static rtx
14758 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
14760 machine_mode tmode = TYPE_MODE (type);
14761 machine_mode inner_mode = GET_MODE_INNER (tmode);
14762 int i, n_elt = GET_MODE_NUNITS (tmode);
14764 gcc_assert (VECTOR_MODE_P (tmode));
14765 gcc_assert (n_elt == call_expr_nargs (exp));
14767 if (!target || !register_operand (target, tmode))
14768 target = gen_reg_rtx (tmode);
14770 /* If we have a vector compromised of a single element, such as V1TImode, do
14771 the initialization directly. */
14772 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
14774 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
14775 emit_move_insn (target, gen_lowpart (tmode, x));
14777 else
14779 rtvec v = rtvec_alloc (n_elt);
14781 for (i = 0; i < n_elt; ++i)
14783 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
14784 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
14787 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
14790 return target;
14793 /* Return the integer constant in ARG. Constrain it to be in the range
14794 of the subparts of VEC_TYPE; issue an error if not. */
14796 static int
14797 get_element_number (tree vec_type, tree arg)
14799 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
14801 if (!tree_fits_uhwi_p (arg)
14802 || (elt = tree_to_uhwi (arg), elt > max))
14804 error ("selector must be an integer constant in the range 0..%wi", max);
14805 return 0;
14808 return elt;
14811 /* Expand vec_set builtin. */
14812 static rtx
14813 altivec_expand_vec_set_builtin (tree exp)
14815 machine_mode tmode, mode1;
14816 tree arg0, arg1, arg2;
14817 int elt;
14818 rtx op0, op1;
14820 arg0 = CALL_EXPR_ARG (exp, 0);
14821 arg1 = CALL_EXPR_ARG (exp, 1);
14822 arg2 = CALL_EXPR_ARG (exp, 2);
14824 tmode = TYPE_MODE (TREE_TYPE (arg0));
14825 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14826 gcc_assert (VECTOR_MODE_P (tmode));
14828 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
14829 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
14830 elt = get_element_number (TREE_TYPE (arg0), arg2);
14832 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
14833 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
14835 op0 = force_reg (tmode, op0);
14836 op1 = force_reg (mode1, op1);
14838 rs6000_expand_vector_set (op0, op1, elt);
14840 return op0;
14843 /* Expand vec_ext builtin. */
14844 static rtx
14845 altivec_expand_vec_ext_builtin (tree exp, rtx target)
14847 machine_mode tmode, mode0;
14848 tree arg0, arg1;
14849 rtx op0;
14850 rtx op1;
14852 arg0 = CALL_EXPR_ARG (exp, 0);
14853 arg1 = CALL_EXPR_ARG (exp, 1);
14855 op0 = expand_normal (arg0);
14856 op1 = expand_normal (arg1);
14858 /* Call get_element_number to validate arg1 if it is a constant. */
14859 if (TREE_CODE (arg1) == INTEGER_CST)
14860 (void) get_element_number (TREE_TYPE (arg0), arg1);
14862 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
14863 mode0 = TYPE_MODE (TREE_TYPE (arg0));
14864 gcc_assert (VECTOR_MODE_P (mode0));
14866 op0 = force_reg (mode0, op0);
14868 if (optimize || !target || !register_operand (target, tmode))
14869 target = gen_reg_rtx (tmode);
14871 rs6000_expand_vector_extract (target, op0, op1);
14873 return target;
14876 /* Expand the builtin in EXP and store the result in TARGET. Store
14877 true in *EXPANDEDP if we found a builtin to expand. */
14878 static rtx
14879 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
14881 const struct builtin_description *d;
14882 size_t i;
14883 enum insn_code icode;
14884 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14885 tree arg0, arg1, arg2;
14886 rtx op0, pat;
14887 machine_mode tmode, mode0;
14888 enum rs6000_builtins fcode
14889 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14891 if (rs6000_overloaded_builtin_p (fcode))
14893 *expandedp = true;
14894 error ("unresolved overload for Altivec builtin %qF", fndecl);
14896 /* Given it is invalid, just generate a normal call. */
14897 return expand_call (exp, target, false);
14900 target = altivec_expand_dst_builtin (exp, target, expandedp);
14901 if (*expandedp)
14902 return target;
14904 *expandedp = true;
14906 switch (fcode)
14908 case ALTIVEC_BUILTIN_STVX_V2DF:
14909 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
14910 case ALTIVEC_BUILTIN_STVX_V2DI:
14911 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
14912 case ALTIVEC_BUILTIN_STVX_V4SF:
14913 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
14914 case ALTIVEC_BUILTIN_STVX:
14915 case ALTIVEC_BUILTIN_STVX_V4SI:
14916 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
14917 case ALTIVEC_BUILTIN_STVX_V8HI:
14918 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
14919 case ALTIVEC_BUILTIN_STVX_V16QI:
14920 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
14921 case ALTIVEC_BUILTIN_STVEBX:
14922 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
14923 case ALTIVEC_BUILTIN_STVEHX:
14924 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
14925 case ALTIVEC_BUILTIN_STVEWX:
14926 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
14927 case ALTIVEC_BUILTIN_STVXL_V2DF:
14928 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
14929 case ALTIVEC_BUILTIN_STVXL_V2DI:
14930 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
14931 case ALTIVEC_BUILTIN_STVXL_V4SF:
14932 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
14933 case ALTIVEC_BUILTIN_STVXL:
14934 case ALTIVEC_BUILTIN_STVXL_V4SI:
14935 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
14936 case ALTIVEC_BUILTIN_STVXL_V8HI:
14937 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
14938 case ALTIVEC_BUILTIN_STVXL_V16QI:
14939 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
14941 case ALTIVEC_BUILTIN_STVLX:
14942 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
14943 case ALTIVEC_BUILTIN_STVLXL:
14944 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
14945 case ALTIVEC_BUILTIN_STVRX:
14946 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
14947 case ALTIVEC_BUILTIN_STVRXL:
14948 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
14950 case P9V_BUILTIN_STXVL:
14951 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
14953 case P9V_BUILTIN_XST_LEN_R:
14954 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
14956 case VSX_BUILTIN_STXVD2X_V1TI:
14957 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
14958 case VSX_BUILTIN_STXVD2X_V2DF:
14959 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
14960 case VSX_BUILTIN_STXVD2X_V2DI:
14961 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
14962 case VSX_BUILTIN_STXVW4X_V4SF:
14963 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
14964 case VSX_BUILTIN_STXVW4X_V4SI:
14965 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
14966 case VSX_BUILTIN_STXVW4X_V8HI:
14967 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
14968 case VSX_BUILTIN_STXVW4X_V16QI:
14969 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
14971 /* For the following on big endian, it's ok to use any appropriate
14972 unaligned-supporting store, so use a generic expander. For
14973 little-endian, the exact element-reversing instruction must
14974 be used. */
14975 case VSX_BUILTIN_ST_ELEMREV_V1TI:
14977 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti
14978 : CODE_FOR_vsx_st_elemrev_v1ti);
14979 return altivec_expand_stv_builtin (code, exp);
14981 case VSX_BUILTIN_ST_ELEMREV_V2DF:
14983 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
14984 : CODE_FOR_vsx_st_elemrev_v2df);
14985 return altivec_expand_stv_builtin (code, exp);
14987 case VSX_BUILTIN_ST_ELEMREV_V2DI:
14989 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
14990 : CODE_FOR_vsx_st_elemrev_v2di);
14991 return altivec_expand_stv_builtin (code, exp);
14993 case VSX_BUILTIN_ST_ELEMREV_V4SF:
14995 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
14996 : CODE_FOR_vsx_st_elemrev_v4sf);
14997 return altivec_expand_stv_builtin (code, exp);
14999 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15001 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15002 : CODE_FOR_vsx_st_elemrev_v4si);
15003 return altivec_expand_stv_builtin (code, exp);
15005 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15007 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15008 : CODE_FOR_vsx_st_elemrev_v8hi);
15009 return altivec_expand_stv_builtin (code, exp);
15011 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15013 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15014 : CODE_FOR_vsx_st_elemrev_v16qi);
15015 return altivec_expand_stv_builtin (code, exp);
15018 case ALTIVEC_BUILTIN_MFVSCR:
15019 icode = CODE_FOR_altivec_mfvscr;
15020 tmode = insn_data[icode].operand[0].mode;
15022 if (target == 0
15023 || GET_MODE (target) != tmode
15024 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15025 target = gen_reg_rtx (tmode);
15027 pat = GEN_FCN (icode) (target);
15028 if (! pat)
15029 return 0;
15030 emit_insn (pat);
15031 return target;
15033 case ALTIVEC_BUILTIN_MTVSCR:
15034 icode = CODE_FOR_altivec_mtvscr;
15035 arg0 = CALL_EXPR_ARG (exp, 0);
15036 op0 = expand_normal (arg0);
15037 mode0 = insn_data[icode].operand[0].mode;
15039 /* If we got invalid arguments bail out before generating bad rtl. */
15040 if (arg0 == error_mark_node)
15041 return const0_rtx;
15043 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15044 op0 = copy_to_mode_reg (mode0, op0);
15046 pat = GEN_FCN (icode) (op0);
15047 if (pat)
15048 emit_insn (pat);
15049 return NULL_RTX;
15051 case ALTIVEC_BUILTIN_DSSALL:
15052 emit_insn (gen_altivec_dssall ());
15053 return NULL_RTX;
15055 case ALTIVEC_BUILTIN_DSS:
15056 icode = CODE_FOR_altivec_dss;
15057 arg0 = CALL_EXPR_ARG (exp, 0);
15058 STRIP_NOPS (arg0);
15059 op0 = expand_normal (arg0);
15060 mode0 = insn_data[icode].operand[0].mode;
15062 /* If we got invalid arguments bail out before generating bad rtl. */
15063 if (arg0 == error_mark_node)
15064 return const0_rtx;
15066 if (TREE_CODE (arg0) != INTEGER_CST
15067 || TREE_INT_CST_LOW (arg0) & ~0x3)
15069 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
15070 return const0_rtx;
15073 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15074 op0 = copy_to_mode_reg (mode0, op0);
15076 emit_insn (gen_altivec_dss (op0));
15077 return NULL_RTX;
15079 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15080 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15081 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15082 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15083 case VSX_BUILTIN_VEC_INIT_V2DF:
15084 case VSX_BUILTIN_VEC_INIT_V2DI:
15085 case VSX_BUILTIN_VEC_INIT_V1TI:
15086 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15088 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15089 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15090 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15091 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15092 case VSX_BUILTIN_VEC_SET_V2DF:
15093 case VSX_BUILTIN_VEC_SET_V2DI:
15094 case VSX_BUILTIN_VEC_SET_V1TI:
15095 return altivec_expand_vec_set_builtin (exp);
15097 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15098 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15099 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15100 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15101 case VSX_BUILTIN_VEC_EXT_V2DF:
15102 case VSX_BUILTIN_VEC_EXT_V2DI:
15103 case VSX_BUILTIN_VEC_EXT_V1TI:
15104 return altivec_expand_vec_ext_builtin (exp, target);
15106 case P9V_BUILTIN_VEC_EXTRACT4B:
15107 arg1 = CALL_EXPR_ARG (exp, 1);
15108 STRIP_NOPS (arg1);
15110 /* Generate a normal call if it is invalid. */
15111 if (arg1 == error_mark_node)
15112 return expand_call (exp, target, false);
15114 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
15116 error ("second argument to %qs must be 0..12", "vec_vextract4b");
15117 return expand_call (exp, target, false);
15119 break;
15121 case P9V_BUILTIN_VEC_INSERT4B:
15122 arg2 = CALL_EXPR_ARG (exp, 2);
15123 STRIP_NOPS (arg2);
15125 /* Generate a normal call if it is invalid. */
15126 if (arg2 == error_mark_node)
15127 return expand_call (exp, target, false);
15129 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
15131 error ("third argument to %qs must be 0..12", "vec_vinsert4b");
15132 return expand_call (exp, target, false);
15134 break;
15136 default:
15137 break;
15138 /* Fall through. */
15141 /* Expand abs* operations. */
15142 d = bdesc_abs;
15143 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15144 if (d->code == fcode)
15145 return altivec_expand_abs_builtin (d->icode, exp, target);
15147 /* Expand the AltiVec predicates. */
15148 d = bdesc_altivec_preds;
15149 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15150 if (d->code == fcode)
15151 return altivec_expand_predicate_builtin (d->icode, exp, target);
15153 /* LV* are funky. We initialized them differently. */
15154 switch (fcode)
15156 case ALTIVEC_BUILTIN_LVSL:
15157 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15158 exp, target, false);
15159 case ALTIVEC_BUILTIN_LVSR:
15160 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15161 exp, target, false);
15162 case ALTIVEC_BUILTIN_LVEBX:
15163 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15164 exp, target, false);
15165 case ALTIVEC_BUILTIN_LVEHX:
15166 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15167 exp, target, false);
15168 case ALTIVEC_BUILTIN_LVEWX:
15169 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15170 exp, target, false);
15171 case ALTIVEC_BUILTIN_LVXL_V2DF:
15172 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15173 exp, target, false);
15174 case ALTIVEC_BUILTIN_LVXL_V2DI:
15175 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15176 exp, target, false);
15177 case ALTIVEC_BUILTIN_LVXL_V4SF:
15178 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15179 exp, target, false);
15180 case ALTIVEC_BUILTIN_LVXL:
15181 case ALTIVEC_BUILTIN_LVXL_V4SI:
15182 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15183 exp, target, false);
15184 case ALTIVEC_BUILTIN_LVXL_V8HI:
15185 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15186 exp, target, false);
15187 case ALTIVEC_BUILTIN_LVXL_V16QI:
15188 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15189 exp, target, false);
15190 case ALTIVEC_BUILTIN_LVX_V1TI:
15191 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v1ti,
15192 exp, target, false);
15193 case ALTIVEC_BUILTIN_LVX_V2DF:
15194 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
15195 exp, target, false);
15196 case ALTIVEC_BUILTIN_LVX_V2DI:
15197 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
15198 exp, target, false);
15199 case ALTIVEC_BUILTIN_LVX_V4SF:
15200 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
15201 exp, target, false);
15202 case ALTIVEC_BUILTIN_LVX:
15203 case ALTIVEC_BUILTIN_LVX_V4SI:
15204 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
15205 exp, target, false);
15206 case ALTIVEC_BUILTIN_LVX_V8HI:
15207 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
15208 exp, target, false);
15209 case ALTIVEC_BUILTIN_LVX_V16QI:
15210 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
15211 exp, target, false);
15212 case ALTIVEC_BUILTIN_LVLX:
15213 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15214 exp, target, true);
15215 case ALTIVEC_BUILTIN_LVLXL:
15216 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15217 exp, target, true);
15218 case ALTIVEC_BUILTIN_LVRX:
15219 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15220 exp, target, true);
15221 case ALTIVEC_BUILTIN_LVRXL:
15222 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15223 exp, target, true);
15224 case VSX_BUILTIN_LXVD2X_V1TI:
15225 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15226 exp, target, false);
15227 case VSX_BUILTIN_LXVD2X_V2DF:
15228 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15229 exp, target, false);
15230 case VSX_BUILTIN_LXVD2X_V2DI:
15231 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15232 exp, target, false);
15233 case VSX_BUILTIN_LXVW4X_V4SF:
15234 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15235 exp, target, false);
15236 case VSX_BUILTIN_LXVW4X_V4SI:
15237 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15238 exp, target, false);
15239 case VSX_BUILTIN_LXVW4X_V8HI:
15240 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15241 exp, target, false);
15242 case VSX_BUILTIN_LXVW4X_V16QI:
15243 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15244 exp, target, false);
15245 /* For the following on big endian, it's ok to use any appropriate
15246 unaligned-supporting load, so use a generic expander. For
15247 little-endian, the exact element-reversing instruction must
15248 be used. */
15249 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15251 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15252 : CODE_FOR_vsx_ld_elemrev_v2df);
15253 return altivec_expand_lv_builtin (code, exp, target, false);
15255 case VSX_BUILTIN_LD_ELEMREV_V1TI:
15257 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti
15258 : CODE_FOR_vsx_ld_elemrev_v1ti);
15259 return altivec_expand_lv_builtin (code, exp, target, false);
15261 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15263 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15264 : CODE_FOR_vsx_ld_elemrev_v2di);
15265 return altivec_expand_lv_builtin (code, exp, target, false);
15267 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15269 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15270 : CODE_FOR_vsx_ld_elemrev_v4sf);
15271 return altivec_expand_lv_builtin (code, exp, target, false);
15273 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15275 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15276 : CODE_FOR_vsx_ld_elemrev_v4si);
15277 return altivec_expand_lv_builtin (code, exp, target, false);
15279 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15281 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15282 : CODE_FOR_vsx_ld_elemrev_v8hi);
15283 return altivec_expand_lv_builtin (code, exp, target, false);
15285 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15287 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15288 : CODE_FOR_vsx_ld_elemrev_v16qi);
15289 return altivec_expand_lv_builtin (code, exp, target, false);
15291 break;
15292 default:
15293 break;
15294 /* Fall through. */
15297 *expandedp = false;
15298 return NULL_RTX;
15301 /* Check whether a builtin function is supported in this target
15302 configuration. */
15303 bool
15304 rs6000_builtin_is_supported_p (enum rs6000_builtins fncode)
15306 HOST_WIDE_INT fnmask = rs6000_builtin_info[fncode].mask;
15307 if ((fnmask & rs6000_builtin_mask) != fnmask)
15308 return false;
15309 else
15310 return true;
15313 /* Raise an error message for a builtin function that is called without the
15314 appropriate target options being set. */
15316 static void
15317 rs6000_invalid_builtin (enum rs6000_builtins fncode)
15319 size_t uns_fncode = (size_t) fncode;
15320 const char *name = rs6000_builtin_info[uns_fncode].name;
15321 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
15323 gcc_assert (name != NULL);
15324 if ((fnmask & RS6000_BTM_CELL) != 0)
15325 error ("builtin function %qs is only valid for the cell processor", name);
15326 else if ((fnmask & RS6000_BTM_VSX) != 0)
15327 error ("builtin function %qs requires the %qs option", name, "-mvsx");
15328 else if ((fnmask & RS6000_BTM_HTM) != 0)
15329 error ("builtin function %qs requires the %qs option", name, "-mhtm");
15330 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
15331 error ("builtin function %qs requires the %qs option", name, "-maltivec");
15332 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15333 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
15334 error ("builtin function %qs requires the %qs and %qs options",
15335 name, "-mhard-dfp", "-mpower8-vector");
15336 else if ((fnmask & RS6000_BTM_DFP) != 0)
15337 error ("builtin function %qs requires the %qs option", name, "-mhard-dfp");
15338 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
15339 error ("builtin function %qs requires the %qs option", name,
15340 "-mpower8-vector");
15341 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
15342 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
15343 error ("builtin function %qs requires the %qs and %qs options",
15344 name, "-mcpu=power9", "-m64");
15345 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
15346 error ("builtin function %qs requires the %qs option", name,
15347 "-mcpu=power9");
15348 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15349 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
15350 error ("builtin function %qs requires the %qs and %qs options",
15351 name, "-mcpu=power9", "-m64");
15352 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
15353 error ("builtin function %qs requires the %qs option", name,
15354 "-mcpu=power9");
15355 else if ((fnmask & RS6000_BTM_LDBL128) == RS6000_BTM_LDBL128)
15357 if (!TARGET_HARD_FLOAT)
15358 error ("builtin function %qs requires the %qs option", name,
15359 "-mhard-float");
15360 else
15361 error ("builtin function %qs requires the %qs option", name,
15362 TARGET_IEEEQUAD ? "-mabi=ibmlongdouble" : "-mlong-double-128");
15364 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
15365 error ("builtin function %qs requires the %qs option", name,
15366 "-mhard-float");
15367 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
15368 error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
15369 name);
15370 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
15371 error ("builtin function %qs requires the %qs option", name, "-mfloat128");
15372 else if ((fnmask & (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
15373 == (RS6000_BTM_POPCNTD | RS6000_BTM_POWERPC64))
15374 error ("builtin function %qs requires the %qs (or newer), and "
15375 "%qs or %qs options",
15376 name, "-mcpu=power7", "-m64", "-mpowerpc64");
15377 else
15378 error ("builtin function %qs is not supported with the current options",
15379 name);
15382 /* Target hook for early folding of built-ins, shamelessly stolen
15383 from ia64.c. */
15385 static tree
15386 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
15387 int n_args ATTRIBUTE_UNUSED,
15388 tree *args ATTRIBUTE_UNUSED,
15389 bool ignore ATTRIBUTE_UNUSED)
15391 #ifdef SUBTARGET_FOLD_BUILTIN
15392 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
15393 #else
15394 return NULL_TREE;
15395 #endif
15398 /* Helper function to sort out which built-ins may be valid without having
15399 a LHS. */
15400 static bool
15401 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
15403 switch (fn_code)
15405 case ALTIVEC_BUILTIN_STVX_V16QI:
15406 case ALTIVEC_BUILTIN_STVX_V8HI:
15407 case ALTIVEC_BUILTIN_STVX_V4SI:
15408 case ALTIVEC_BUILTIN_STVX_V4SF:
15409 case ALTIVEC_BUILTIN_STVX_V2DI:
15410 case ALTIVEC_BUILTIN_STVX_V2DF:
15411 return true;
15412 default:
15413 return false;
15417 /* Helper function to handle the gimple folding of a vector compare
15418 operation. This sets up true/false vectors, and uses the
15419 VEC_COND_EXPR operation.
15420 CODE indicates which comparison is to be made. (EQ, GT, ...).
15421 TYPE indicates the type of the result. */
15422 static tree
15423 fold_build_vec_cmp (tree_code code, tree type,
15424 tree arg0, tree arg1)
15426 tree cmp_type = build_same_sized_truth_vector_type (type);
15427 tree zero_vec = build_zero_cst (type);
15428 tree minus_one_vec = build_minus_one_cst (type);
15429 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
15430 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
15433 /* Helper function to handle the in-between steps for the
15434 vector compare built-ins. */
15435 static void
15436 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
15438 tree arg0 = gimple_call_arg (stmt, 0);
15439 tree arg1 = gimple_call_arg (stmt, 1);
15440 tree lhs = gimple_call_lhs (stmt);
15441 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
15442 gimple *g = gimple_build_assign (lhs, cmp);
15443 gimple_set_location (g, gimple_location (stmt));
15444 gsi_replace (gsi, g, true);
15447 /* Helper function to handle the vector merge[hl] built-ins. The
15448 implementation difference between h and l versions for this code are in
15449 the values used when building of the permute vector for high word versus
15450 low word merge. The variance is keyed off the use_high parameter. */
15451 static void
15452 fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high)
15454 tree arg0 = gimple_call_arg (stmt, 0);
15455 tree arg1 = gimple_call_arg (stmt, 1);
15456 tree lhs = gimple_call_lhs (stmt);
15457 tree lhs_type = TREE_TYPE (lhs);
15458 tree lhs_type_type = TREE_TYPE (lhs_type);
15459 int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type);
15460 int midpoint = n_elts / 2;
15461 int offset = 0;
15463 if (use_high == 1)
15464 offset = midpoint;
15466 tree_vector_builder elts (lhs_type, VECTOR_CST_NELTS (arg0), 1);
15468 for (int i = 0; i < midpoint; i++)
15470 elts.safe_push (build_int_cst (lhs_type_type, offset + i));
15471 elts.safe_push (build_int_cst (lhs_type_type, offset + n_elts + i));
15474 tree permute = elts.build ();
15476 gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute);
15477 gimple_set_location (g, gimple_location (stmt));
15478 gsi_replace (gsi, g, true);
15481 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
15482 a constant, use rs6000_fold_builtin.) */
15484 bool
15485 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
15487 gimple *stmt = gsi_stmt (*gsi);
15488 tree fndecl = gimple_call_fndecl (stmt);
15489 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
15490 enum rs6000_builtins fn_code
15491 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15492 tree arg0, arg1, lhs, temp;
15493 gimple *g;
15495 size_t uns_fncode = (size_t) fn_code;
15496 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
15497 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
15498 const char *fn_name2 = (icode != CODE_FOR_nothing)
15499 ? get_insn_name ((int) icode)
15500 : "nothing";
15502 if (TARGET_DEBUG_BUILTIN)
15503 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
15504 fn_code, fn_name1, fn_name2);
15506 if (!rs6000_fold_gimple)
15507 return false;
15509 /* Prevent gimple folding for code that does not have a LHS, unless it is
15510 allowed per the rs6000_builtin_valid_without_lhs helper function. */
15511 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
15512 return false;
15514 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
15515 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
15516 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
15517 if (!func_valid_p)
15518 return false;
15520 switch (fn_code)
15522 /* Flavors of vec_add. We deliberately don't expand
15523 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
15524 TImode, resulting in much poorer code generation. */
15525 case ALTIVEC_BUILTIN_VADDUBM:
15526 case ALTIVEC_BUILTIN_VADDUHM:
15527 case ALTIVEC_BUILTIN_VADDUWM:
15528 case P8V_BUILTIN_VADDUDM:
15529 case ALTIVEC_BUILTIN_VADDFP:
15530 case VSX_BUILTIN_XVADDDP:
15531 arg0 = gimple_call_arg (stmt, 0);
15532 arg1 = gimple_call_arg (stmt, 1);
15533 lhs = gimple_call_lhs (stmt);
15534 g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
15535 gimple_set_location (g, gimple_location (stmt));
15536 gsi_replace (gsi, g, true);
15537 return true;
15538 /* Flavors of vec_sub. We deliberately don't expand
15539 P8V_BUILTIN_VSUBUQM. */
15540 case ALTIVEC_BUILTIN_VSUBUBM:
15541 case ALTIVEC_BUILTIN_VSUBUHM:
15542 case ALTIVEC_BUILTIN_VSUBUWM:
15543 case P8V_BUILTIN_VSUBUDM:
15544 case ALTIVEC_BUILTIN_VSUBFP:
15545 case VSX_BUILTIN_XVSUBDP:
15546 arg0 = gimple_call_arg (stmt, 0);
15547 arg1 = gimple_call_arg (stmt, 1);
15548 lhs = gimple_call_lhs (stmt);
15549 g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
15550 gimple_set_location (g, gimple_location (stmt));
15551 gsi_replace (gsi, g, true);
15552 return true;
15553 case VSX_BUILTIN_XVMULSP:
15554 case VSX_BUILTIN_XVMULDP:
15555 arg0 = gimple_call_arg (stmt, 0);
15556 arg1 = gimple_call_arg (stmt, 1);
15557 lhs = gimple_call_lhs (stmt);
15558 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
15559 gimple_set_location (g, gimple_location (stmt));
15560 gsi_replace (gsi, g, true);
15561 return true;
15562 /* Even element flavors of vec_mul (signed). */
15563 case ALTIVEC_BUILTIN_VMULESB:
15564 case ALTIVEC_BUILTIN_VMULESH:
15565 case P8V_BUILTIN_VMULESW:
15566 /* Even element flavors of vec_mul (unsigned). */
15567 case ALTIVEC_BUILTIN_VMULEUB:
15568 case ALTIVEC_BUILTIN_VMULEUH:
15569 case P8V_BUILTIN_VMULEUW:
15570 arg0 = gimple_call_arg (stmt, 0);
15571 arg1 = gimple_call_arg (stmt, 1);
15572 lhs = gimple_call_lhs (stmt);
15573 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
15574 gimple_set_location (g, gimple_location (stmt));
15575 gsi_replace (gsi, g, true);
15576 return true;
15577 /* Odd element flavors of vec_mul (signed). */
15578 case ALTIVEC_BUILTIN_VMULOSB:
15579 case ALTIVEC_BUILTIN_VMULOSH:
15580 case P8V_BUILTIN_VMULOSW:
15581 /* Odd element flavors of vec_mul (unsigned). */
15582 case ALTIVEC_BUILTIN_VMULOUB:
15583 case ALTIVEC_BUILTIN_VMULOUH:
15584 case P8V_BUILTIN_VMULOUW:
15585 arg0 = gimple_call_arg (stmt, 0);
15586 arg1 = gimple_call_arg (stmt, 1);
15587 lhs = gimple_call_lhs (stmt);
15588 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
15589 gimple_set_location (g, gimple_location (stmt));
15590 gsi_replace (gsi, g, true);
15591 return true;
15592 /* Flavors of vec_div (Integer). */
15593 case VSX_BUILTIN_DIV_V2DI:
15594 case VSX_BUILTIN_UDIV_V2DI:
15595 arg0 = gimple_call_arg (stmt, 0);
15596 arg1 = gimple_call_arg (stmt, 1);
15597 lhs = gimple_call_lhs (stmt);
15598 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
15599 gimple_set_location (g, gimple_location (stmt));
15600 gsi_replace (gsi, g, true);
15601 return true;
15602 /* Flavors of vec_div (Float). */
15603 case VSX_BUILTIN_XVDIVSP:
15604 case VSX_BUILTIN_XVDIVDP:
15605 arg0 = gimple_call_arg (stmt, 0);
15606 arg1 = gimple_call_arg (stmt, 1);
15607 lhs = gimple_call_lhs (stmt);
15608 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
15609 gimple_set_location (g, gimple_location (stmt));
15610 gsi_replace (gsi, g, true);
15611 return true;
15612 /* Flavors of vec_and. */
15613 case ALTIVEC_BUILTIN_VAND:
15614 arg0 = gimple_call_arg (stmt, 0);
15615 arg1 = gimple_call_arg (stmt, 1);
15616 lhs = gimple_call_lhs (stmt);
15617 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
15618 gimple_set_location (g, gimple_location (stmt));
15619 gsi_replace (gsi, g, true);
15620 return true;
15621 /* Flavors of vec_andc. */
15622 case ALTIVEC_BUILTIN_VANDC:
15623 arg0 = gimple_call_arg (stmt, 0);
15624 arg1 = gimple_call_arg (stmt, 1);
15625 lhs = gimple_call_lhs (stmt);
15626 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15627 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15628 gimple_set_location (g, gimple_location (stmt));
15629 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15630 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
15631 gimple_set_location (g, gimple_location (stmt));
15632 gsi_replace (gsi, g, true);
15633 return true;
15634 /* Flavors of vec_nand. */
15635 case P8V_BUILTIN_VEC_NAND:
15636 case P8V_BUILTIN_NAND_V16QI:
15637 case P8V_BUILTIN_NAND_V8HI:
15638 case P8V_BUILTIN_NAND_V4SI:
15639 case P8V_BUILTIN_NAND_V4SF:
15640 case P8V_BUILTIN_NAND_V2DF:
15641 case P8V_BUILTIN_NAND_V2DI:
15642 arg0 = gimple_call_arg (stmt, 0);
15643 arg1 = gimple_call_arg (stmt, 1);
15644 lhs = gimple_call_lhs (stmt);
15645 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15646 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
15647 gimple_set_location (g, gimple_location (stmt));
15648 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15649 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15650 gimple_set_location (g, gimple_location (stmt));
15651 gsi_replace (gsi, g, true);
15652 return true;
15653 /* Flavors of vec_or. */
15654 case ALTIVEC_BUILTIN_VOR:
15655 arg0 = gimple_call_arg (stmt, 0);
15656 arg1 = gimple_call_arg (stmt, 1);
15657 lhs = gimple_call_lhs (stmt);
15658 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
15659 gimple_set_location (g, gimple_location (stmt));
15660 gsi_replace (gsi, g, true);
15661 return true;
15662 /* flavors of vec_orc. */
15663 case P8V_BUILTIN_ORC_V16QI:
15664 case P8V_BUILTIN_ORC_V8HI:
15665 case P8V_BUILTIN_ORC_V4SI:
15666 case P8V_BUILTIN_ORC_V4SF:
15667 case P8V_BUILTIN_ORC_V2DF:
15668 case P8V_BUILTIN_ORC_V2DI:
15669 arg0 = gimple_call_arg (stmt, 0);
15670 arg1 = gimple_call_arg (stmt, 1);
15671 lhs = gimple_call_lhs (stmt);
15672 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15673 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
15674 gimple_set_location (g, gimple_location (stmt));
15675 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15676 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
15677 gimple_set_location (g, gimple_location (stmt));
15678 gsi_replace (gsi, g, true);
15679 return true;
15680 /* Flavors of vec_xor. */
15681 case ALTIVEC_BUILTIN_VXOR:
15682 arg0 = gimple_call_arg (stmt, 0);
15683 arg1 = gimple_call_arg (stmt, 1);
15684 lhs = gimple_call_lhs (stmt);
15685 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
15686 gimple_set_location (g, gimple_location (stmt));
15687 gsi_replace (gsi, g, true);
15688 return true;
15689 /* Flavors of vec_nor. */
15690 case ALTIVEC_BUILTIN_VNOR:
15691 arg0 = gimple_call_arg (stmt, 0);
15692 arg1 = gimple_call_arg (stmt, 1);
15693 lhs = gimple_call_lhs (stmt);
15694 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15695 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
15696 gimple_set_location (g, gimple_location (stmt));
15697 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15698 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15699 gimple_set_location (g, gimple_location (stmt));
15700 gsi_replace (gsi, g, true);
15701 return true;
15702 /* flavors of vec_abs. */
15703 case ALTIVEC_BUILTIN_ABS_V16QI:
15704 case ALTIVEC_BUILTIN_ABS_V8HI:
15705 case ALTIVEC_BUILTIN_ABS_V4SI:
15706 case ALTIVEC_BUILTIN_ABS_V4SF:
15707 case P8V_BUILTIN_ABS_V2DI:
15708 case VSX_BUILTIN_XVABSDP:
15709 arg0 = gimple_call_arg (stmt, 0);
15710 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15711 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15712 return false;
15713 lhs = gimple_call_lhs (stmt);
15714 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
15715 gimple_set_location (g, gimple_location (stmt));
15716 gsi_replace (gsi, g, true);
15717 return true;
15718 /* flavors of vec_min. */
15719 case VSX_BUILTIN_XVMINDP:
15720 case P8V_BUILTIN_VMINSD:
15721 case P8V_BUILTIN_VMINUD:
15722 case ALTIVEC_BUILTIN_VMINSB:
15723 case ALTIVEC_BUILTIN_VMINSH:
15724 case ALTIVEC_BUILTIN_VMINSW:
15725 case ALTIVEC_BUILTIN_VMINUB:
15726 case ALTIVEC_BUILTIN_VMINUH:
15727 case ALTIVEC_BUILTIN_VMINUW:
15728 case ALTIVEC_BUILTIN_VMINFP:
15729 arg0 = gimple_call_arg (stmt, 0);
15730 arg1 = gimple_call_arg (stmt, 1);
15731 lhs = gimple_call_lhs (stmt);
15732 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
15733 gimple_set_location (g, gimple_location (stmt));
15734 gsi_replace (gsi, g, true);
15735 return true;
15736 /* flavors of vec_max. */
15737 case VSX_BUILTIN_XVMAXDP:
15738 case P8V_BUILTIN_VMAXSD:
15739 case P8V_BUILTIN_VMAXUD:
15740 case ALTIVEC_BUILTIN_VMAXSB:
15741 case ALTIVEC_BUILTIN_VMAXSH:
15742 case ALTIVEC_BUILTIN_VMAXSW:
15743 case ALTIVEC_BUILTIN_VMAXUB:
15744 case ALTIVEC_BUILTIN_VMAXUH:
15745 case ALTIVEC_BUILTIN_VMAXUW:
15746 case ALTIVEC_BUILTIN_VMAXFP:
15747 arg0 = gimple_call_arg (stmt, 0);
15748 arg1 = gimple_call_arg (stmt, 1);
15749 lhs = gimple_call_lhs (stmt);
15750 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
15751 gimple_set_location (g, gimple_location (stmt));
15752 gsi_replace (gsi, g, true);
15753 return true;
15754 /* Flavors of vec_eqv. */
15755 case P8V_BUILTIN_EQV_V16QI:
15756 case P8V_BUILTIN_EQV_V8HI:
15757 case P8V_BUILTIN_EQV_V4SI:
15758 case P8V_BUILTIN_EQV_V4SF:
15759 case P8V_BUILTIN_EQV_V2DF:
15760 case P8V_BUILTIN_EQV_V2DI:
15761 arg0 = gimple_call_arg (stmt, 0);
15762 arg1 = gimple_call_arg (stmt, 1);
15763 lhs = gimple_call_lhs (stmt);
15764 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
15765 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
15766 gimple_set_location (g, gimple_location (stmt));
15767 gsi_insert_before (gsi, g, GSI_SAME_STMT);
15768 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
15769 gimple_set_location (g, gimple_location (stmt));
15770 gsi_replace (gsi, g, true);
15771 return true;
15772 /* Flavors of vec_rotate_left. */
15773 case ALTIVEC_BUILTIN_VRLB:
15774 case ALTIVEC_BUILTIN_VRLH:
15775 case ALTIVEC_BUILTIN_VRLW:
15776 case P8V_BUILTIN_VRLD:
15777 arg0 = gimple_call_arg (stmt, 0);
15778 arg1 = gimple_call_arg (stmt, 1);
15779 lhs = gimple_call_lhs (stmt);
15780 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
15781 gimple_set_location (g, gimple_location (stmt));
15782 gsi_replace (gsi, g, true);
15783 return true;
15784 /* Flavors of vector shift right algebraic.
15785 vec_sra{b,h,w} -> vsra{b,h,w}. */
15786 case ALTIVEC_BUILTIN_VSRAB:
15787 case ALTIVEC_BUILTIN_VSRAH:
15788 case ALTIVEC_BUILTIN_VSRAW:
15789 case P8V_BUILTIN_VSRAD:
15790 arg0 = gimple_call_arg (stmt, 0);
15791 arg1 = gimple_call_arg (stmt, 1);
15792 lhs = gimple_call_lhs (stmt);
15793 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1);
15794 gimple_set_location (g, gimple_location (stmt));
15795 gsi_replace (gsi, g, true);
15796 return true;
15797 /* Flavors of vector shift left.
15798 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
15799 case ALTIVEC_BUILTIN_VSLB:
15800 case ALTIVEC_BUILTIN_VSLH:
15801 case ALTIVEC_BUILTIN_VSLW:
15802 case P8V_BUILTIN_VSLD:
15803 arg0 = gimple_call_arg (stmt, 0);
15804 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
15805 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
15806 return false;
15807 arg1 = gimple_call_arg (stmt, 1);
15808 lhs = gimple_call_lhs (stmt);
15809 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1);
15810 gimple_set_location (g, gimple_location (stmt));
15811 gsi_replace (gsi, g, true);
15812 return true;
15813 /* Flavors of vector shift right. */
15814 case ALTIVEC_BUILTIN_VSRB:
15815 case ALTIVEC_BUILTIN_VSRH:
15816 case ALTIVEC_BUILTIN_VSRW:
15817 case P8V_BUILTIN_VSRD:
15819 arg0 = gimple_call_arg (stmt, 0);
15820 arg1 = gimple_call_arg (stmt, 1);
15821 lhs = gimple_call_lhs (stmt);
15822 gimple_seq stmts = NULL;
15823 /* Convert arg0 to unsigned. */
15824 tree arg0_unsigned
15825 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
15826 unsigned_type_for (TREE_TYPE (arg0)), arg0);
15827 tree res
15828 = gimple_build (&stmts, RSHIFT_EXPR,
15829 TREE_TYPE (arg0_unsigned), arg0_unsigned, arg1);
15830 /* Convert result back to the lhs type. */
15831 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
15832 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15833 update_call_from_tree (gsi, res);
15834 return true;
15836 /* Vector loads. */
15837 case ALTIVEC_BUILTIN_LVX_V16QI:
15838 case ALTIVEC_BUILTIN_LVX_V8HI:
15839 case ALTIVEC_BUILTIN_LVX_V4SI:
15840 case ALTIVEC_BUILTIN_LVX_V4SF:
15841 case ALTIVEC_BUILTIN_LVX_V2DI:
15842 case ALTIVEC_BUILTIN_LVX_V2DF:
15843 case ALTIVEC_BUILTIN_LVX_V1TI:
15845 arg0 = gimple_call_arg (stmt, 0); // offset
15846 arg1 = gimple_call_arg (stmt, 1); // address
15847 lhs = gimple_call_lhs (stmt);
15848 location_t loc = gimple_location (stmt);
15849 /* Since arg1 may be cast to a different type, just use ptr_type_node
15850 here instead of trying to enforce TBAA on pointer types. */
15851 tree arg1_type = ptr_type_node;
15852 tree lhs_type = TREE_TYPE (lhs);
15853 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15854 the tree using the value from arg0. The resulting type will match
15855 the type of arg1. */
15856 gimple_seq stmts = NULL;
15857 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
15858 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15859 arg1_type, arg1, temp_offset);
15860 /* Mask off any lower bits from the address. */
15861 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15862 arg1_type, temp_addr,
15863 build_int_cst (arg1_type, -16));
15864 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15865 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
15866 take an offset, but since we've already incorporated the offset
15867 above, here we just pass in a zero. */
15868 gimple *g
15869 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
15870 build_int_cst (arg1_type, 0)));
15871 gimple_set_location (g, loc);
15872 gsi_replace (gsi, g, true);
15873 return true;
15875 /* Vector stores. */
15876 case ALTIVEC_BUILTIN_STVX_V16QI:
15877 case ALTIVEC_BUILTIN_STVX_V8HI:
15878 case ALTIVEC_BUILTIN_STVX_V4SI:
15879 case ALTIVEC_BUILTIN_STVX_V4SF:
15880 case ALTIVEC_BUILTIN_STVX_V2DI:
15881 case ALTIVEC_BUILTIN_STVX_V2DF:
15883 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
15884 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
15885 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
15886 location_t loc = gimple_location (stmt);
15887 tree arg0_type = TREE_TYPE (arg0);
15888 /* Use ptr_type_node (no TBAA) for the arg2_type.
15889 FIXME: (Richard) "A proper fix would be to transition this type as
15890 seen from the frontend to GIMPLE, for example in a similar way we
15891 do for MEM_REFs by piggy-backing that on an extra argument, a
15892 constant zero pointer of the alias pointer type to use (which would
15893 also serve as a type indicator of the store itself). I'd use a
15894 target specific internal function for this (not sure if we can have
15895 those target specific, but I guess if it's folded away then that's
15896 fine) and get away with the overload set." */
15897 tree arg2_type = ptr_type_node;
15898 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
15899 the tree using the value from arg0. The resulting type will match
15900 the type of arg2. */
15901 gimple_seq stmts = NULL;
15902 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
15903 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
15904 arg2_type, arg2, temp_offset);
15905 /* Mask off any lower bits from the address. */
15906 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
15907 arg2_type, temp_addr,
15908 build_int_cst (arg2_type, -16));
15909 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
15910 /* The desired gimple result should be similar to:
15911 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
15912 gimple *g
15913 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
15914 build_int_cst (arg2_type, 0)), arg0);
15915 gimple_set_location (g, loc);
15916 gsi_replace (gsi, g, true);
15917 return true;
15920 /* Vector Fused multiply-add (fma). */
15921 case ALTIVEC_BUILTIN_VMADDFP:
15922 case VSX_BUILTIN_XVMADDDP:
15923 case ALTIVEC_BUILTIN_VMLADDUHM:
15925 arg0 = gimple_call_arg (stmt, 0);
15926 arg1 = gimple_call_arg (stmt, 1);
15927 tree arg2 = gimple_call_arg (stmt, 2);
15928 lhs = gimple_call_lhs (stmt);
15929 gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2);
15930 gimple_call_set_lhs (g, lhs);
15931 gimple_call_set_nothrow (g, true);
15932 gimple_set_location (g, gimple_location (stmt));
15933 gsi_replace (gsi, g, true);
15934 return true;
15937 /* Vector compares; EQ, NE, GE, GT, LE. */
15938 case ALTIVEC_BUILTIN_VCMPEQUB:
15939 case ALTIVEC_BUILTIN_VCMPEQUH:
15940 case ALTIVEC_BUILTIN_VCMPEQUW:
15941 case P8V_BUILTIN_VCMPEQUD:
15942 fold_compare_helper (gsi, EQ_EXPR, stmt);
15943 return true;
15945 case P9V_BUILTIN_CMPNEB:
15946 case P9V_BUILTIN_CMPNEH:
15947 case P9V_BUILTIN_CMPNEW:
15948 fold_compare_helper (gsi, NE_EXPR, stmt);
15949 return true;
15951 case VSX_BUILTIN_CMPGE_16QI:
15952 case VSX_BUILTIN_CMPGE_U16QI:
15953 case VSX_BUILTIN_CMPGE_8HI:
15954 case VSX_BUILTIN_CMPGE_U8HI:
15955 case VSX_BUILTIN_CMPGE_4SI:
15956 case VSX_BUILTIN_CMPGE_U4SI:
15957 case VSX_BUILTIN_CMPGE_2DI:
15958 case VSX_BUILTIN_CMPGE_U2DI:
15959 fold_compare_helper (gsi, GE_EXPR, stmt);
15960 return true;
15962 case ALTIVEC_BUILTIN_VCMPGTSB:
15963 case ALTIVEC_BUILTIN_VCMPGTUB:
15964 case ALTIVEC_BUILTIN_VCMPGTSH:
15965 case ALTIVEC_BUILTIN_VCMPGTUH:
15966 case ALTIVEC_BUILTIN_VCMPGTSW:
15967 case ALTIVEC_BUILTIN_VCMPGTUW:
15968 case P8V_BUILTIN_VCMPGTUD:
15969 case P8V_BUILTIN_VCMPGTSD:
15970 fold_compare_helper (gsi, GT_EXPR, stmt);
15971 return true;
15973 case VSX_BUILTIN_CMPLE_16QI:
15974 case VSX_BUILTIN_CMPLE_U16QI:
15975 case VSX_BUILTIN_CMPLE_8HI:
15976 case VSX_BUILTIN_CMPLE_U8HI:
15977 case VSX_BUILTIN_CMPLE_4SI:
15978 case VSX_BUILTIN_CMPLE_U4SI:
15979 case VSX_BUILTIN_CMPLE_2DI:
15980 case VSX_BUILTIN_CMPLE_U2DI:
15981 fold_compare_helper (gsi, LE_EXPR, stmt);
15982 return true;
15984 /* flavors of vec_splat_[us]{8,16,32}. */
15985 case ALTIVEC_BUILTIN_VSPLTISB:
15986 case ALTIVEC_BUILTIN_VSPLTISH:
15987 case ALTIVEC_BUILTIN_VSPLTISW:
15989 int size;
15991 if (fn_code == ALTIVEC_BUILTIN_VSPLTISB)
15992 size = 8;
15993 else if (fn_code == ALTIVEC_BUILTIN_VSPLTISH)
15994 size = 16;
15995 else
15996 size = 32;
15998 arg0 = gimple_call_arg (stmt, 0);
15999 lhs = gimple_call_lhs (stmt);
16001 /* Only fold the vec_splat_*() if the lower bits of arg 0 is a
16002 5-bit signed constant in range -16 to +15. */
16003 if (TREE_CODE (arg0) != INTEGER_CST
16004 || !IN_RANGE (sext_hwi(TREE_INT_CST_LOW (arg0), size),
16005 -16, 15))
16006 return false;
16007 gimple_seq stmts = NULL;
16008 location_t loc = gimple_location (stmt);
16009 tree splat_value = gimple_convert (&stmts, loc,
16010 TREE_TYPE (TREE_TYPE (lhs)), arg0);
16011 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16012 tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value);
16013 g = gimple_build_assign (lhs, splat_tree);
16014 gimple_set_location (g, gimple_location (stmt));
16015 gsi_replace (gsi, g, true);
16016 return true;
16019 /* vec_mergel (integrals). */
16020 case ALTIVEC_BUILTIN_VMRGLH:
16021 case ALTIVEC_BUILTIN_VMRGLW:
16022 case VSX_BUILTIN_XXMRGLW_4SI:
16023 case ALTIVEC_BUILTIN_VMRGLB:
16024 case VSX_BUILTIN_VEC_MERGEL_V2DI:
16025 fold_mergehl_helper (gsi, stmt, 1);
16026 return true;
16027 /* vec_mergeh (integrals). */
16028 case ALTIVEC_BUILTIN_VMRGHH:
16029 case ALTIVEC_BUILTIN_VMRGHW:
16030 case VSX_BUILTIN_XXMRGHW_4SI:
16031 case ALTIVEC_BUILTIN_VMRGHB:
16032 case VSX_BUILTIN_VEC_MERGEH_V2DI:
16033 fold_mergehl_helper (gsi, stmt, 0);
16034 return true;
16035 default:
16036 if (TARGET_DEBUG_BUILTIN)
16037 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
16038 fn_code, fn_name1, fn_name2);
16039 break;
16042 return false;
16045 /* Expand an expression EXP that calls a built-in function,
16046 with result going to TARGET if that's convenient
16047 (and in mode MODE if that's convenient).
16048 SUBTARGET may be used as the target for computing one of EXP's operands.
16049 IGNORE is nonzero if the value is to be ignored. */
16051 static rtx
16052 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16053 machine_mode mode ATTRIBUTE_UNUSED,
16054 int ignore ATTRIBUTE_UNUSED)
16056 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16057 enum rs6000_builtins fcode
16058 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16059 size_t uns_fcode = (size_t)fcode;
16060 const struct builtin_description *d;
16061 size_t i;
16062 rtx ret;
16063 bool success;
16064 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16065 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16066 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16068 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
16069 floating point type, depending on whether long double is the IBM extended
16070 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
16071 we only define one variant of the built-in function, and switch the code
16072 when defining it, rather than defining two built-ins and using the
16073 overload table in rs6000-c.c to switch between the two. If we don't have
16074 the proper assembler, don't do this switch because CODE_FOR_*kf* and
16075 CODE_FOR_*tf* will be CODE_FOR_nothing. */
16076 #ifdef HAVE_AS_POWER9
16077 if (FLOAT128_IEEE_P (TFmode))
16078 switch (icode)
16080 default:
16081 break;
16083 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
16084 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
16085 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
16086 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
16087 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
16088 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
16089 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
16090 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
16091 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
16092 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
16093 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
16094 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
16095 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
16097 #endif
16099 if (TARGET_DEBUG_BUILTIN)
16101 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16102 const char *name2 = (icode != CODE_FOR_nothing)
16103 ? get_insn_name ((int) icode)
16104 : "nothing";
16105 const char *name3;
16107 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16109 default: name3 = "unknown"; break;
16110 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16111 case RS6000_BTC_UNARY: name3 = "unary"; break;
16112 case RS6000_BTC_BINARY: name3 = "binary"; break;
16113 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16114 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16115 case RS6000_BTC_ABS: name3 = "abs"; break;
16116 case RS6000_BTC_DST: name3 = "dst"; break;
16120 fprintf (stderr,
16121 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16122 (name1) ? name1 : "---", fcode,
16123 (name2) ? name2 : "---", (int) icode,
16124 name3,
16125 func_valid_p ? "" : ", not valid");
16128 if (!func_valid_p)
16130 rs6000_invalid_builtin (fcode);
16132 /* Given it is invalid, just generate a normal call. */
16133 return expand_call (exp, target, ignore);
16136 switch (fcode)
16138 case RS6000_BUILTIN_RECIP:
16139 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16141 case RS6000_BUILTIN_RECIPF:
16142 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16144 case RS6000_BUILTIN_RSQRTF:
16145 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16147 case RS6000_BUILTIN_RSQRT:
16148 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16150 case POWER7_BUILTIN_BPERMD:
16151 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16152 ? CODE_FOR_bpermd_di
16153 : CODE_FOR_bpermd_si), exp, target);
16155 case RS6000_BUILTIN_GET_TB:
16156 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16157 target);
16159 case RS6000_BUILTIN_MFTB:
16160 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16161 ? CODE_FOR_rs6000_mftb_di
16162 : CODE_FOR_rs6000_mftb_si),
16163 target);
16165 case RS6000_BUILTIN_MFFS:
16166 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16168 case RS6000_BUILTIN_MTFSF:
16169 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16171 case RS6000_BUILTIN_CPU_INIT:
16172 case RS6000_BUILTIN_CPU_IS:
16173 case RS6000_BUILTIN_CPU_SUPPORTS:
16174 return cpu_expand_builtin (fcode, exp, target);
16176 case MISC_BUILTIN_SPEC_BARRIER:
16178 emit_insn (gen_rs6000_speculation_barrier ());
16179 return NULL_RTX;
16182 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16183 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16185 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16186 : (int) CODE_FOR_altivec_lvsl_direct);
16187 machine_mode tmode = insn_data[icode2].operand[0].mode;
16188 machine_mode mode = insn_data[icode2].operand[1].mode;
16189 tree arg;
16190 rtx op, addr, pat;
16192 gcc_assert (TARGET_ALTIVEC);
16194 arg = CALL_EXPR_ARG (exp, 0);
16195 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16196 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16197 addr = memory_address (mode, op);
16198 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16199 op = addr;
16200 else
16202 /* For the load case need to negate the address. */
16203 op = gen_reg_rtx (GET_MODE (addr));
16204 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16206 op = gen_rtx_MEM (mode, op);
16208 if (target == 0
16209 || GET_MODE (target) != tmode
16210 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
16211 target = gen_reg_rtx (tmode);
16213 pat = GEN_FCN (icode2) (target, op);
16214 if (!pat)
16215 return 0;
16216 emit_insn (pat);
16218 return target;
16221 case ALTIVEC_BUILTIN_VCFUX:
16222 case ALTIVEC_BUILTIN_VCFSX:
16223 case ALTIVEC_BUILTIN_VCTUXS:
16224 case ALTIVEC_BUILTIN_VCTSXS:
16225 /* FIXME: There's got to be a nicer way to handle this case than
16226 constructing a new CALL_EXPR. */
16227 if (call_expr_nargs (exp) == 1)
16229 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16230 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16232 break;
16234 /* For the pack and unpack int128 routines, fix up the builtin so it
16235 uses the correct IBM128 type. */
16236 case MISC_BUILTIN_PACK_IF:
16237 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16239 icode = CODE_FOR_packtf;
16240 fcode = MISC_BUILTIN_PACK_TF;
16241 uns_fcode = (size_t)fcode;
16243 break;
16245 case MISC_BUILTIN_UNPACK_IF:
16246 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
16248 icode = CODE_FOR_unpacktf;
16249 fcode = MISC_BUILTIN_UNPACK_TF;
16250 uns_fcode = (size_t)fcode;
16252 break;
16254 default:
16255 break;
16258 if (TARGET_ALTIVEC)
16260 ret = altivec_expand_builtin (exp, target, &success);
16262 if (success)
16263 return ret;
16265 if (TARGET_HTM)
16267 ret = htm_expand_builtin (exp, target, &success);
16269 if (success)
16270 return ret;
16273 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16274 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16275 gcc_assert (attr == RS6000_BTC_UNARY
16276 || attr == RS6000_BTC_BINARY
16277 || attr == RS6000_BTC_TERNARY
16278 || attr == RS6000_BTC_SPECIAL);
16280 /* Handle simple unary operations. */
16281 d = bdesc_1arg;
16282 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16283 if (d->code == fcode)
16284 return rs6000_expand_unop_builtin (icode, exp, target);
16286 /* Handle simple binary operations. */
16287 d = bdesc_2arg;
16288 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16289 if (d->code == fcode)
16290 return rs6000_expand_binop_builtin (icode, exp, target);
16292 /* Handle simple ternary operations. */
16293 d = bdesc_3arg;
16294 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16295 if (d->code == fcode)
16296 return rs6000_expand_ternop_builtin (icode, exp, target);
16298 /* Handle simple no-argument operations. */
16299 d = bdesc_0arg;
16300 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16301 if (d->code == fcode)
16302 return rs6000_expand_zeroop_builtin (icode, target);
16304 gcc_unreachable ();
16307 /* Create a builtin vector type with a name. Taking care not to give
16308 the canonical type a name. */
16310 static tree
16311 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16313 tree result = build_vector_type (elt_type, num_elts);
16315 /* Copy so we don't give the canonical type a name. */
16316 result = build_variant_type_copy (result);
16318 add_builtin_type (name, result);
16320 return result;
16323 static void
16324 rs6000_init_builtins (void)
16326 tree tdecl;
16327 tree ftype;
16328 machine_mode mode;
16330 if (TARGET_DEBUG_BUILTIN)
16331 fprintf (stderr, "rs6000_init_builtins%s%s\n",
16332 (TARGET_ALTIVEC) ? ", altivec" : "",
16333 (TARGET_VSX) ? ", vsx" : "");
16335 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16336 : "__vector long long",
16337 intDI_type_node, 2);
16338 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16339 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16340 intSI_type_node, 4);
16341 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16342 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16343 intHI_type_node, 8);
16344 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16345 intQI_type_node, 16);
16347 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16348 unsigned_intQI_type_node, 16);
16349 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16350 unsigned_intHI_type_node, 8);
16351 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16352 unsigned_intSI_type_node, 4);
16353 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16354 ? "__vector unsigned long"
16355 : "__vector unsigned long long",
16356 unsigned_intDI_type_node, 2);
16358 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16360 const_str_type_node
16361 = build_pointer_type (build_qualified_type (char_type_node,
16362 TYPE_QUAL_CONST));
16364 /* We use V1TI mode as a special container to hold __int128_t items that
16365 must live in VSX registers. */
16366 if (intTI_type_node)
16368 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16369 intTI_type_node, 1);
16370 unsigned_V1TI_type_node
16371 = rs6000_vector_type ("__vector unsigned __int128",
16372 unsigned_intTI_type_node, 1);
16375 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16376 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16377 'vector unsigned short'. */
16379 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16380 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16381 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16382 bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16383 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16385 long_integer_type_internal_node = long_integer_type_node;
16386 long_unsigned_type_internal_node = long_unsigned_type_node;
16387 long_long_integer_type_internal_node = long_long_integer_type_node;
16388 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16389 intQI_type_internal_node = intQI_type_node;
16390 uintQI_type_internal_node = unsigned_intQI_type_node;
16391 intHI_type_internal_node = intHI_type_node;
16392 uintHI_type_internal_node = unsigned_intHI_type_node;
16393 intSI_type_internal_node = intSI_type_node;
16394 uintSI_type_internal_node = unsigned_intSI_type_node;
16395 intDI_type_internal_node = intDI_type_node;
16396 uintDI_type_internal_node = unsigned_intDI_type_node;
16397 intTI_type_internal_node = intTI_type_node;
16398 uintTI_type_internal_node = unsigned_intTI_type_node;
16399 float_type_internal_node = float_type_node;
16400 double_type_internal_node = double_type_node;
16401 long_double_type_internal_node = long_double_type_node;
16402 dfloat64_type_internal_node = dfloat64_type_node;
16403 dfloat128_type_internal_node = dfloat128_type_node;
16404 void_type_internal_node = void_type_node;
16406 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16407 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16408 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16409 format that uses a pair of doubles, depending on the switches and
16410 defaults.
16412 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16413 floating point, we need make sure the type is non-zero or else self-test
16414 fails during bootstrap.
16416 Always create __ibm128 as a separate type, even if the current long double
16417 format is IBM extended double.
16419 For IEEE 128-bit floating point, always create the type __ieee128. If the
16420 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16421 __ieee128. */
16422 if (TARGET_FLOAT128_TYPE)
16424 if (TARGET_IEEEQUAD || !TARGET_LONG_DOUBLE_128)
16426 ibm128_float_type_node = make_node (REAL_TYPE);
16427 TYPE_PRECISION (ibm128_float_type_node) = 128;
16428 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
16429 layout_type (ibm128_float_type_node);
16431 else
16432 ibm128_float_type_node = long_double_type_node;
16434 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
16435 "__ibm128");
16437 ieee128_float_type_node
16438 = TARGET_IEEEQUAD ? long_double_type_node : float128_type_node;
16439 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
16440 "__ieee128");
16443 else
16444 ieee128_float_type_node = ibm128_float_type_node = long_double_type_node;
16446 /* Initialize the modes for builtin_function_type, mapping a machine mode to
16447 tree type node. */
16448 builtin_mode_to_type[QImode][0] = integer_type_node;
16449 builtin_mode_to_type[HImode][0] = integer_type_node;
16450 builtin_mode_to_type[SImode][0] = intSI_type_node;
16451 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
16452 builtin_mode_to_type[DImode][0] = intDI_type_node;
16453 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
16454 builtin_mode_to_type[TImode][0] = intTI_type_node;
16455 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
16456 builtin_mode_to_type[SFmode][0] = float_type_node;
16457 builtin_mode_to_type[DFmode][0] = double_type_node;
16458 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
16459 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
16460 builtin_mode_to_type[TFmode][0] = long_double_type_node;
16461 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
16462 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
16463 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
16464 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
16465 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
16466 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
16467 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
16468 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
16469 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
16470 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
16471 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
16472 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
16473 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
16474 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
16476 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
16477 TYPE_NAME (bool_char_type_node) = tdecl;
16479 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
16480 TYPE_NAME (bool_short_type_node) = tdecl;
16482 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
16483 TYPE_NAME (bool_int_type_node) = tdecl;
16485 tdecl = add_builtin_type ("__pixel", pixel_type_node);
16486 TYPE_NAME (pixel_type_node) = tdecl;
16488 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
16489 bool_char_type_node, 16);
16490 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
16491 bool_short_type_node, 8);
16492 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
16493 bool_int_type_node, 4);
16494 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16495 ? "__vector __bool long"
16496 : "__vector __bool long long",
16497 bool_long_long_type_node, 2);
16498 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
16499 pixel_type_node, 8);
16501 /* Create Altivec and VSX builtins on machines with at least the
16502 general purpose extensions (970 and newer) to allow the use of
16503 the target attribute. */
16504 if (TARGET_EXTRA_BUILTINS)
16505 altivec_init_builtins ();
16506 if (TARGET_HTM)
16507 htm_init_builtins ();
16509 if (TARGET_EXTRA_BUILTINS)
16510 rs6000_common_init_builtins ();
16512 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
16513 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
16514 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
16516 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
16517 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
16518 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
16520 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
16521 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
16522 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
16524 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
16525 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
16526 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
16528 mode = (TARGET_64BIT) ? DImode : SImode;
16529 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
16530 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
16531 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
16533 ftype = build_function_type_list (unsigned_intDI_type_node,
16534 NULL_TREE);
16535 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
16537 if (TARGET_64BIT)
16538 ftype = build_function_type_list (unsigned_intDI_type_node,
16539 NULL_TREE);
16540 else
16541 ftype = build_function_type_list (unsigned_intSI_type_node,
16542 NULL_TREE);
16543 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
16545 ftype = build_function_type_list (double_type_node, NULL_TREE);
16546 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
16548 ftype = build_function_type_list (void_type_node,
16549 intSI_type_node, double_type_node,
16550 NULL_TREE);
16551 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
16553 ftype = build_function_type_list (void_type_node, NULL_TREE);
16554 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
16555 def_builtin ("__builtin_ppc_speculation_barrier", ftype,
16556 MISC_BUILTIN_SPEC_BARRIER);
16558 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
16559 NULL_TREE);
16560 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
16561 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
16563 /* AIX libm provides clog as __clog. */
16564 if (TARGET_XCOFF &&
16565 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
16566 set_user_assembler_name (tdecl, "__clog");
16568 #ifdef SUBTARGET_INIT_BUILTINS
16569 SUBTARGET_INIT_BUILTINS;
16570 #endif
16573 /* Returns the rs6000 builtin decl for CODE. */
16575 static tree
16576 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
16578 HOST_WIDE_INT fnmask;
16580 if (code >= RS6000_BUILTIN_COUNT)
16581 return error_mark_node;
16583 fnmask = rs6000_builtin_info[code].mask;
16584 if ((fnmask & rs6000_builtin_mask) != fnmask)
16586 rs6000_invalid_builtin ((enum rs6000_builtins)code);
16587 return error_mark_node;
16590 return rs6000_builtin_decls[code];
16593 static void
16594 altivec_init_builtins (void)
16596 const struct builtin_description *d;
16597 size_t i;
16598 tree ftype;
16599 tree decl;
16600 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
16602 tree pvoid_type_node = build_pointer_type (void_type_node);
16604 tree pcvoid_type_node
16605 = build_pointer_type (build_qualified_type (void_type_node,
16606 TYPE_QUAL_CONST));
16608 tree int_ftype_opaque
16609 = build_function_type_list (integer_type_node,
16610 opaque_V4SI_type_node, NULL_TREE);
16611 tree opaque_ftype_opaque
16612 = build_function_type_list (integer_type_node, NULL_TREE);
16613 tree opaque_ftype_opaque_int
16614 = build_function_type_list (opaque_V4SI_type_node,
16615 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
16616 tree opaque_ftype_opaque_opaque_int
16617 = build_function_type_list (opaque_V4SI_type_node,
16618 opaque_V4SI_type_node, opaque_V4SI_type_node,
16619 integer_type_node, NULL_TREE);
16620 tree opaque_ftype_opaque_opaque_opaque
16621 = build_function_type_list (opaque_V4SI_type_node,
16622 opaque_V4SI_type_node, opaque_V4SI_type_node,
16623 opaque_V4SI_type_node, NULL_TREE);
16624 tree opaque_ftype_opaque_opaque
16625 = build_function_type_list (opaque_V4SI_type_node,
16626 opaque_V4SI_type_node, opaque_V4SI_type_node,
16627 NULL_TREE);
16628 tree int_ftype_int_opaque_opaque
16629 = build_function_type_list (integer_type_node,
16630 integer_type_node, opaque_V4SI_type_node,
16631 opaque_V4SI_type_node, NULL_TREE);
16632 tree int_ftype_int_v4si_v4si
16633 = build_function_type_list (integer_type_node,
16634 integer_type_node, V4SI_type_node,
16635 V4SI_type_node, NULL_TREE);
16636 tree int_ftype_int_v2di_v2di
16637 = build_function_type_list (integer_type_node,
16638 integer_type_node, V2DI_type_node,
16639 V2DI_type_node, NULL_TREE);
16640 tree void_ftype_v4si
16641 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
16642 tree v8hi_ftype_void
16643 = build_function_type_list (V8HI_type_node, NULL_TREE);
16644 tree void_ftype_void
16645 = build_function_type_list (void_type_node, NULL_TREE);
16646 tree void_ftype_int
16647 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
16649 tree opaque_ftype_long_pcvoid
16650 = build_function_type_list (opaque_V4SI_type_node,
16651 long_integer_type_node, pcvoid_type_node,
16652 NULL_TREE);
16653 tree v16qi_ftype_long_pcvoid
16654 = build_function_type_list (V16QI_type_node,
16655 long_integer_type_node, pcvoid_type_node,
16656 NULL_TREE);
16657 tree v8hi_ftype_long_pcvoid
16658 = build_function_type_list (V8HI_type_node,
16659 long_integer_type_node, pcvoid_type_node,
16660 NULL_TREE);
16661 tree v4si_ftype_long_pcvoid
16662 = build_function_type_list (V4SI_type_node,
16663 long_integer_type_node, pcvoid_type_node,
16664 NULL_TREE);
16665 tree v4sf_ftype_long_pcvoid
16666 = build_function_type_list (V4SF_type_node,
16667 long_integer_type_node, pcvoid_type_node,
16668 NULL_TREE);
16669 tree v2df_ftype_long_pcvoid
16670 = build_function_type_list (V2DF_type_node,
16671 long_integer_type_node, pcvoid_type_node,
16672 NULL_TREE);
16673 tree v2di_ftype_long_pcvoid
16674 = build_function_type_list (V2DI_type_node,
16675 long_integer_type_node, pcvoid_type_node,
16676 NULL_TREE);
16677 tree v1ti_ftype_long_pcvoid
16678 = build_function_type_list (V1TI_type_node,
16679 long_integer_type_node, pcvoid_type_node,
16680 NULL_TREE);
16682 tree void_ftype_opaque_long_pvoid
16683 = build_function_type_list (void_type_node,
16684 opaque_V4SI_type_node, long_integer_type_node,
16685 pvoid_type_node, NULL_TREE);
16686 tree void_ftype_v4si_long_pvoid
16687 = build_function_type_list (void_type_node,
16688 V4SI_type_node, long_integer_type_node,
16689 pvoid_type_node, NULL_TREE);
16690 tree void_ftype_v16qi_long_pvoid
16691 = build_function_type_list (void_type_node,
16692 V16QI_type_node, long_integer_type_node,
16693 pvoid_type_node, NULL_TREE);
16695 tree void_ftype_v16qi_pvoid_long
16696 = build_function_type_list (void_type_node,
16697 V16QI_type_node, pvoid_type_node,
16698 long_integer_type_node, NULL_TREE);
16700 tree void_ftype_v8hi_long_pvoid
16701 = build_function_type_list (void_type_node,
16702 V8HI_type_node, long_integer_type_node,
16703 pvoid_type_node, NULL_TREE);
16704 tree void_ftype_v4sf_long_pvoid
16705 = build_function_type_list (void_type_node,
16706 V4SF_type_node, long_integer_type_node,
16707 pvoid_type_node, NULL_TREE);
16708 tree void_ftype_v2df_long_pvoid
16709 = build_function_type_list (void_type_node,
16710 V2DF_type_node, long_integer_type_node,
16711 pvoid_type_node, NULL_TREE);
16712 tree void_ftype_v1ti_long_pvoid
16713 = build_function_type_list (void_type_node,
16714 V1TI_type_node, long_integer_type_node,
16715 pvoid_type_node, NULL_TREE);
16716 tree void_ftype_v2di_long_pvoid
16717 = build_function_type_list (void_type_node,
16718 V2DI_type_node, long_integer_type_node,
16719 pvoid_type_node, NULL_TREE);
16720 tree int_ftype_int_v8hi_v8hi
16721 = build_function_type_list (integer_type_node,
16722 integer_type_node, V8HI_type_node,
16723 V8HI_type_node, NULL_TREE);
16724 tree int_ftype_int_v16qi_v16qi
16725 = build_function_type_list (integer_type_node,
16726 integer_type_node, V16QI_type_node,
16727 V16QI_type_node, NULL_TREE);
16728 tree int_ftype_int_v4sf_v4sf
16729 = build_function_type_list (integer_type_node,
16730 integer_type_node, V4SF_type_node,
16731 V4SF_type_node, NULL_TREE);
16732 tree int_ftype_int_v2df_v2df
16733 = build_function_type_list (integer_type_node,
16734 integer_type_node, V2DF_type_node,
16735 V2DF_type_node, NULL_TREE);
16736 tree v2di_ftype_v2di
16737 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
16738 tree v4si_ftype_v4si
16739 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16740 tree v8hi_ftype_v8hi
16741 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16742 tree v16qi_ftype_v16qi
16743 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16744 tree v4sf_ftype_v4sf
16745 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16746 tree v2df_ftype_v2df
16747 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16748 tree void_ftype_pcvoid_int_int
16749 = build_function_type_list (void_type_node,
16750 pcvoid_type_node, integer_type_node,
16751 integer_type_node, NULL_TREE);
16753 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
16754 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
16755 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
16756 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
16757 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
16758 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
16759 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
16760 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
16761 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
16762 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
16763 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
16764 ALTIVEC_BUILTIN_LVXL_V2DF);
16765 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
16766 ALTIVEC_BUILTIN_LVXL_V2DI);
16767 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
16768 ALTIVEC_BUILTIN_LVXL_V4SF);
16769 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
16770 ALTIVEC_BUILTIN_LVXL_V4SI);
16771 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
16772 ALTIVEC_BUILTIN_LVXL_V8HI);
16773 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
16774 ALTIVEC_BUILTIN_LVXL_V16QI);
16775 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
16776 def_builtin ("__builtin_altivec_lvx_v1ti", v1ti_ftype_long_pcvoid,
16777 ALTIVEC_BUILTIN_LVX_V1TI);
16778 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
16779 ALTIVEC_BUILTIN_LVX_V2DF);
16780 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
16781 ALTIVEC_BUILTIN_LVX_V2DI);
16782 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
16783 ALTIVEC_BUILTIN_LVX_V4SF);
16784 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
16785 ALTIVEC_BUILTIN_LVX_V4SI);
16786 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
16787 ALTIVEC_BUILTIN_LVX_V8HI);
16788 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
16789 ALTIVEC_BUILTIN_LVX_V16QI);
16790 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
16791 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
16792 ALTIVEC_BUILTIN_STVX_V2DF);
16793 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
16794 ALTIVEC_BUILTIN_STVX_V2DI);
16795 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
16796 ALTIVEC_BUILTIN_STVX_V4SF);
16797 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
16798 ALTIVEC_BUILTIN_STVX_V4SI);
16799 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
16800 ALTIVEC_BUILTIN_STVX_V8HI);
16801 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
16802 ALTIVEC_BUILTIN_STVX_V16QI);
16803 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
16804 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
16805 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
16806 ALTIVEC_BUILTIN_STVXL_V2DF);
16807 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
16808 ALTIVEC_BUILTIN_STVXL_V2DI);
16809 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
16810 ALTIVEC_BUILTIN_STVXL_V4SF);
16811 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
16812 ALTIVEC_BUILTIN_STVXL_V4SI);
16813 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
16814 ALTIVEC_BUILTIN_STVXL_V8HI);
16815 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
16816 ALTIVEC_BUILTIN_STVXL_V16QI);
16817 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
16818 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
16819 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
16820 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
16821 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
16822 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
16823 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
16824 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
16825 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
16826 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
16827 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
16828 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
16829 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
16830 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
16831 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
16832 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
16834 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
16835 VSX_BUILTIN_LXVD2X_V2DF);
16836 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
16837 VSX_BUILTIN_LXVD2X_V2DI);
16838 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
16839 VSX_BUILTIN_LXVW4X_V4SF);
16840 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
16841 VSX_BUILTIN_LXVW4X_V4SI);
16842 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
16843 VSX_BUILTIN_LXVW4X_V8HI);
16844 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
16845 VSX_BUILTIN_LXVW4X_V16QI);
16846 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
16847 VSX_BUILTIN_STXVD2X_V2DF);
16848 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
16849 VSX_BUILTIN_STXVD2X_V2DI);
16850 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
16851 VSX_BUILTIN_STXVW4X_V4SF);
16852 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
16853 VSX_BUILTIN_STXVW4X_V4SI);
16854 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
16855 VSX_BUILTIN_STXVW4X_V8HI);
16856 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
16857 VSX_BUILTIN_STXVW4X_V16QI);
16859 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
16860 VSX_BUILTIN_LD_ELEMREV_V2DF);
16861 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
16862 VSX_BUILTIN_LD_ELEMREV_V2DI);
16863 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
16864 VSX_BUILTIN_LD_ELEMREV_V4SF);
16865 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
16866 VSX_BUILTIN_LD_ELEMREV_V4SI);
16867 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
16868 VSX_BUILTIN_LD_ELEMREV_V8HI);
16869 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
16870 VSX_BUILTIN_LD_ELEMREV_V16QI);
16871 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
16872 VSX_BUILTIN_ST_ELEMREV_V2DF);
16873 def_builtin ("__builtin_vsx_st_elemrev_v1ti", void_ftype_v1ti_long_pvoid,
16874 VSX_BUILTIN_ST_ELEMREV_V1TI);
16875 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
16876 VSX_BUILTIN_ST_ELEMREV_V2DI);
16877 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
16878 VSX_BUILTIN_ST_ELEMREV_V4SF);
16879 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
16880 VSX_BUILTIN_ST_ELEMREV_V4SI);
16881 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
16882 VSX_BUILTIN_ST_ELEMREV_V8HI);
16883 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
16884 VSX_BUILTIN_ST_ELEMREV_V16QI);
16886 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
16887 VSX_BUILTIN_VEC_LD);
16888 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
16889 VSX_BUILTIN_VEC_ST);
16890 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
16891 VSX_BUILTIN_VEC_XL);
16892 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
16893 VSX_BUILTIN_VEC_XL_BE);
16894 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
16895 VSX_BUILTIN_VEC_XST);
16896 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
16897 VSX_BUILTIN_VEC_XST_BE);
16899 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
16900 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
16901 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
16903 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
16904 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
16905 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
16906 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
16907 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
16908 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
16909 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
16910 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
16911 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
16912 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
16913 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
16914 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
16916 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
16917 ALTIVEC_BUILTIN_VEC_ADDE);
16918 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
16919 ALTIVEC_BUILTIN_VEC_ADDEC);
16920 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
16921 ALTIVEC_BUILTIN_VEC_CMPNE);
16922 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
16923 ALTIVEC_BUILTIN_VEC_MUL);
16924 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
16925 ALTIVEC_BUILTIN_VEC_SUBE);
16926 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
16927 ALTIVEC_BUILTIN_VEC_SUBEC);
16929 /* Cell builtins. */
16930 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
16931 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
16932 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
16933 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
16935 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
16936 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
16937 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
16938 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
16940 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
16941 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
16942 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
16943 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
16945 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
16946 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
16947 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
16948 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
16950 if (TARGET_P9_VECTOR)
16952 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
16953 P9V_BUILTIN_STXVL);
16954 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
16955 P9V_BUILTIN_XST_LEN_R);
16958 /* Add the DST variants. */
16959 d = bdesc_dst;
16960 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
16962 HOST_WIDE_INT mask = d->mask;
16964 /* It is expected that these dst built-in functions may have
16965 d->icode equal to CODE_FOR_nothing. */
16966 if ((mask & builtin_mask) != mask)
16968 if (TARGET_DEBUG_BUILTIN)
16969 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
16970 d->name);
16971 continue;
16973 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
16976 /* Initialize the predicates. */
16977 d = bdesc_altivec_preds;
16978 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
16980 machine_mode mode1;
16981 tree type;
16982 HOST_WIDE_INT mask = d->mask;
16984 if ((mask & builtin_mask) != mask)
16986 if (TARGET_DEBUG_BUILTIN)
16987 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
16988 d->name);
16989 continue;
16992 if (rs6000_overloaded_builtin_p (d->code))
16993 mode1 = VOIDmode;
16994 else
16996 /* Cannot define builtin if the instruction is disabled. */
16997 gcc_assert (d->icode != CODE_FOR_nothing);
16998 mode1 = insn_data[d->icode].operand[1].mode;
17001 switch (mode1)
17003 case E_VOIDmode:
17004 type = int_ftype_int_opaque_opaque;
17005 break;
17006 case E_V2DImode:
17007 type = int_ftype_int_v2di_v2di;
17008 break;
17009 case E_V4SImode:
17010 type = int_ftype_int_v4si_v4si;
17011 break;
17012 case E_V8HImode:
17013 type = int_ftype_int_v8hi_v8hi;
17014 break;
17015 case E_V16QImode:
17016 type = int_ftype_int_v16qi_v16qi;
17017 break;
17018 case E_V4SFmode:
17019 type = int_ftype_int_v4sf_v4sf;
17020 break;
17021 case E_V2DFmode:
17022 type = int_ftype_int_v2df_v2df;
17023 break;
17024 default:
17025 gcc_unreachable ();
17028 def_builtin (d->name, type, d->code);
17031 /* Initialize the abs* operators. */
17032 d = bdesc_abs;
17033 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17035 machine_mode mode0;
17036 tree type;
17037 HOST_WIDE_INT mask = d->mask;
17039 if ((mask & builtin_mask) != mask)
17041 if (TARGET_DEBUG_BUILTIN)
17042 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17043 d->name);
17044 continue;
17047 /* Cannot define builtin if the instruction is disabled. */
17048 gcc_assert (d->icode != CODE_FOR_nothing);
17049 mode0 = insn_data[d->icode].operand[0].mode;
17051 switch (mode0)
17053 case E_V2DImode:
17054 type = v2di_ftype_v2di;
17055 break;
17056 case E_V4SImode:
17057 type = v4si_ftype_v4si;
17058 break;
17059 case E_V8HImode:
17060 type = v8hi_ftype_v8hi;
17061 break;
17062 case E_V16QImode:
17063 type = v16qi_ftype_v16qi;
17064 break;
17065 case E_V4SFmode:
17066 type = v4sf_ftype_v4sf;
17067 break;
17068 case E_V2DFmode:
17069 type = v2df_ftype_v2df;
17070 break;
17071 default:
17072 gcc_unreachable ();
17075 def_builtin (d->name, type, d->code);
17078 /* Initialize target builtin that implements
17079 targetm.vectorize.builtin_mask_for_load. */
17081 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17082 v16qi_ftype_long_pcvoid,
17083 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17084 BUILT_IN_MD, NULL, NULL_TREE);
17085 TREE_READONLY (decl) = 1;
17086 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17087 altivec_builtin_mask_for_load = decl;
17089 /* Access to the vec_init patterns. */
17090 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17091 integer_type_node, integer_type_node,
17092 integer_type_node, NULL_TREE);
17093 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17095 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17096 short_integer_type_node,
17097 short_integer_type_node,
17098 short_integer_type_node,
17099 short_integer_type_node,
17100 short_integer_type_node,
17101 short_integer_type_node,
17102 short_integer_type_node, NULL_TREE);
17103 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17105 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17106 char_type_node, char_type_node,
17107 char_type_node, char_type_node,
17108 char_type_node, char_type_node,
17109 char_type_node, char_type_node,
17110 char_type_node, char_type_node,
17111 char_type_node, char_type_node,
17112 char_type_node, char_type_node,
17113 char_type_node, NULL_TREE);
17114 def_builtin ("__builtin_vec_init_v16qi", ftype,
17115 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17117 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17118 float_type_node, float_type_node,
17119 float_type_node, NULL_TREE);
17120 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17122 /* VSX builtins. */
17123 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17124 double_type_node, NULL_TREE);
17125 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17127 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17128 intDI_type_node, NULL_TREE);
17129 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17131 /* Access to the vec_set patterns. */
17132 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17133 intSI_type_node,
17134 integer_type_node, NULL_TREE);
17135 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17137 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17138 intHI_type_node,
17139 integer_type_node, NULL_TREE);
17140 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17142 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17143 intQI_type_node,
17144 integer_type_node, NULL_TREE);
17145 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17147 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17148 float_type_node,
17149 integer_type_node, NULL_TREE);
17150 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17152 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17153 double_type_node,
17154 integer_type_node, NULL_TREE);
17155 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17157 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17158 intDI_type_node,
17159 integer_type_node, NULL_TREE);
17160 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17162 /* Access to the vec_extract patterns. */
17163 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17164 integer_type_node, NULL_TREE);
17165 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17167 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17168 integer_type_node, NULL_TREE);
17169 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17171 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17172 integer_type_node, NULL_TREE);
17173 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17175 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17176 integer_type_node, NULL_TREE);
17177 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17179 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17180 integer_type_node, NULL_TREE);
17181 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17183 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17184 integer_type_node, NULL_TREE);
17185 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17188 if (V1TI_type_node)
17190 tree v1ti_ftype_long_pcvoid
17191 = build_function_type_list (V1TI_type_node,
17192 long_integer_type_node, pcvoid_type_node,
17193 NULL_TREE);
17194 tree void_ftype_v1ti_long_pvoid
17195 = build_function_type_list (void_type_node,
17196 V1TI_type_node, long_integer_type_node,
17197 pvoid_type_node, NULL_TREE);
17198 def_builtin ("__builtin_vsx_ld_elemrev_v1ti", v1ti_ftype_long_pcvoid,
17199 VSX_BUILTIN_LD_ELEMREV_V1TI);
17200 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17201 VSX_BUILTIN_LXVD2X_V1TI);
17202 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17203 VSX_BUILTIN_STXVD2X_V1TI);
17204 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17205 NULL_TREE, NULL_TREE);
17206 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17207 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17208 intTI_type_node,
17209 integer_type_node, NULL_TREE);
17210 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17211 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17212 integer_type_node, NULL_TREE);
17213 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17218 static void
17219 htm_init_builtins (void)
17221 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17222 const struct builtin_description *d;
17223 size_t i;
17225 d = bdesc_htm;
17226 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17228 tree op[MAX_HTM_OPERANDS], type;
17229 HOST_WIDE_INT mask = d->mask;
17230 unsigned attr = rs6000_builtin_info[d->code].attr;
17231 bool void_func = (attr & RS6000_BTC_VOID);
17232 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17233 int nopnds = 0;
17234 tree gpr_type_node;
17235 tree rettype;
17236 tree argtype;
17238 /* It is expected that these htm built-in functions may have
17239 d->icode equal to CODE_FOR_nothing. */
17241 if (TARGET_32BIT && TARGET_POWERPC64)
17242 gpr_type_node = long_long_unsigned_type_node;
17243 else
17244 gpr_type_node = long_unsigned_type_node;
17246 if (attr & RS6000_BTC_SPR)
17248 rettype = gpr_type_node;
17249 argtype = gpr_type_node;
17251 else if (d->code == HTM_BUILTIN_TABORTDC
17252 || d->code == HTM_BUILTIN_TABORTDCI)
17254 rettype = unsigned_type_node;
17255 argtype = gpr_type_node;
17257 else
17259 rettype = unsigned_type_node;
17260 argtype = unsigned_type_node;
17263 if ((mask & builtin_mask) != mask)
17265 if (TARGET_DEBUG_BUILTIN)
17266 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17267 continue;
17270 if (d->name == 0)
17272 if (TARGET_DEBUG_BUILTIN)
17273 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17274 (long unsigned) i);
17275 continue;
17278 op[nopnds++] = (void_func) ? void_type_node : rettype;
17280 if (attr_args == RS6000_BTC_UNARY)
17281 op[nopnds++] = argtype;
17282 else if (attr_args == RS6000_BTC_BINARY)
17284 op[nopnds++] = argtype;
17285 op[nopnds++] = argtype;
17287 else if (attr_args == RS6000_BTC_TERNARY)
17289 op[nopnds++] = argtype;
17290 op[nopnds++] = argtype;
17291 op[nopnds++] = argtype;
17294 switch (nopnds)
17296 case 1:
17297 type = build_function_type_list (op[0], NULL_TREE);
17298 break;
17299 case 2:
17300 type = build_function_type_list (op[0], op[1], NULL_TREE);
17301 break;
17302 case 3:
17303 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17304 break;
17305 case 4:
17306 type = build_function_type_list (op[0], op[1], op[2], op[3],
17307 NULL_TREE);
17308 break;
17309 default:
17310 gcc_unreachable ();
17313 def_builtin (d->name, type, d->code);
17317 /* Hash function for builtin functions with up to 3 arguments and a return
17318 type. */
17319 hashval_t
17320 builtin_hasher::hash (builtin_hash_struct *bh)
17322 unsigned ret = 0;
17323 int i;
17325 for (i = 0; i < 4; i++)
17327 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17328 ret = (ret * 2) + bh->uns_p[i];
17331 return ret;
17334 /* Compare builtin hash entries H1 and H2 for equivalence. */
17335 bool
17336 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17338 return ((p1->mode[0] == p2->mode[0])
17339 && (p1->mode[1] == p2->mode[1])
17340 && (p1->mode[2] == p2->mode[2])
17341 && (p1->mode[3] == p2->mode[3])
17342 && (p1->uns_p[0] == p2->uns_p[0])
17343 && (p1->uns_p[1] == p2->uns_p[1])
17344 && (p1->uns_p[2] == p2->uns_p[2])
17345 && (p1->uns_p[3] == p2->uns_p[3]));
17348 /* Map types for builtin functions with an explicit return type and up to 3
17349 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17350 of the argument. */
17351 static tree
17352 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17353 machine_mode mode_arg1, machine_mode mode_arg2,
17354 enum rs6000_builtins builtin, const char *name)
17356 struct builtin_hash_struct h;
17357 struct builtin_hash_struct *h2;
17358 int num_args = 3;
17359 int i;
17360 tree ret_type = NULL_TREE;
17361 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
17363 /* Create builtin_hash_table. */
17364 if (builtin_hash_table == NULL)
17365 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
17367 h.type = NULL_TREE;
17368 h.mode[0] = mode_ret;
17369 h.mode[1] = mode_arg0;
17370 h.mode[2] = mode_arg1;
17371 h.mode[3] = mode_arg2;
17372 h.uns_p[0] = 0;
17373 h.uns_p[1] = 0;
17374 h.uns_p[2] = 0;
17375 h.uns_p[3] = 0;
17377 /* If the builtin is a type that produces unsigned results or takes unsigned
17378 arguments, and it is returned as a decl for the vectorizer (such as
17379 widening multiplies, permute), make sure the arguments and return value
17380 are type correct. */
17381 switch (builtin)
17383 /* unsigned 1 argument functions. */
17384 case CRYPTO_BUILTIN_VSBOX:
17385 case P8V_BUILTIN_VGBBD:
17386 case MISC_BUILTIN_CDTBCD:
17387 case MISC_BUILTIN_CBCDTD:
17388 h.uns_p[0] = 1;
17389 h.uns_p[1] = 1;
17390 break;
17392 /* unsigned 2 argument functions. */
17393 case ALTIVEC_BUILTIN_VMULEUB:
17394 case ALTIVEC_BUILTIN_VMULEUH:
17395 case P8V_BUILTIN_VMULEUW:
17396 case ALTIVEC_BUILTIN_VMULOUB:
17397 case ALTIVEC_BUILTIN_VMULOUH:
17398 case P8V_BUILTIN_VMULOUW:
17399 case CRYPTO_BUILTIN_VCIPHER:
17400 case CRYPTO_BUILTIN_VCIPHERLAST:
17401 case CRYPTO_BUILTIN_VNCIPHER:
17402 case CRYPTO_BUILTIN_VNCIPHERLAST:
17403 case CRYPTO_BUILTIN_VPMSUMB:
17404 case CRYPTO_BUILTIN_VPMSUMH:
17405 case CRYPTO_BUILTIN_VPMSUMW:
17406 case CRYPTO_BUILTIN_VPMSUMD:
17407 case CRYPTO_BUILTIN_VPMSUM:
17408 case MISC_BUILTIN_ADDG6S:
17409 case MISC_BUILTIN_DIVWEU:
17410 case MISC_BUILTIN_DIVDEU:
17411 case VSX_BUILTIN_UDIV_V2DI:
17412 case ALTIVEC_BUILTIN_VMAXUB:
17413 case ALTIVEC_BUILTIN_VMINUB:
17414 case ALTIVEC_BUILTIN_VMAXUH:
17415 case ALTIVEC_BUILTIN_VMINUH:
17416 case ALTIVEC_BUILTIN_VMAXUW:
17417 case ALTIVEC_BUILTIN_VMINUW:
17418 case P8V_BUILTIN_VMAXUD:
17419 case P8V_BUILTIN_VMINUD:
17420 h.uns_p[0] = 1;
17421 h.uns_p[1] = 1;
17422 h.uns_p[2] = 1;
17423 break;
17425 /* unsigned 3 argument functions. */
17426 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
17427 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
17428 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
17429 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
17430 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
17431 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
17432 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
17433 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
17434 case VSX_BUILTIN_VPERM_16QI_UNS:
17435 case VSX_BUILTIN_VPERM_8HI_UNS:
17436 case VSX_BUILTIN_VPERM_4SI_UNS:
17437 case VSX_BUILTIN_VPERM_2DI_UNS:
17438 case VSX_BUILTIN_XXSEL_16QI_UNS:
17439 case VSX_BUILTIN_XXSEL_8HI_UNS:
17440 case VSX_BUILTIN_XXSEL_4SI_UNS:
17441 case VSX_BUILTIN_XXSEL_2DI_UNS:
17442 case CRYPTO_BUILTIN_VPERMXOR:
17443 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
17444 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
17445 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
17446 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
17447 case CRYPTO_BUILTIN_VSHASIGMAW:
17448 case CRYPTO_BUILTIN_VSHASIGMAD:
17449 case CRYPTO_BUILTIN_VSHASIGMA:
17450 h.uns_p[0] = 1;
17451 h.uns_p[1] = 1;
17452 h.uns_p[2] = 1;
17453 h.uns_p[3] = 1;
17454 break;
17456 /* signed permute functions with unsigned char mask. */
17457 case ALTIVEC_BUILTIN_VPERM_16QI:
17458 case ALTIVEC_BUILTIN_VPERM_8HI:
17459 case ALTIVEC_BUILTIN_VPERM_4SI:
17460 case ALTIVEC_BUILTIN_VPERM_4SF:
17461 case ALTIVEC_BUILTIN_VPERM_2DI:
17462 case ALTIVEC_BUILTIN_VPERM_2DF:
17463 case VSX_BUILTIN_VPERM_16QI:
17464 case VSX_BUILTIN_VPERM_8HI:
17465 case VSX_BUILTIN_VPERM_4SI:
17466 case VSX_BUILTIN_VPERM_4SF:
17467 case VSX_BUILTIN_VPERM_2DI:
17468 case VSX_BUILTIN_VPERM_2DF:
17469 h.uns_p[3] = 1;
17470 break;
17472 /* unsigned args, signed return. */
17473 case VSX_BUILTIN_XVCVUXDSP:
17474 case VSX_BUILTIN_XVCVUXDDP_UNS:
17475 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
17476 h.uns_p[1] = 1;
17477 break;
17479 /* signed args, unsigned return. */
17480 case VSX_BUILTIN_XVCVDPUXDS_UNS:
17481 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
17482 case MISC_BUILTIN_UNPACK_TD:
17483 case MISC_BUILTIN_UNPACK_V1TI:
17484 h.uns_p[0] = 1;
17485 break;
17487 /* unsigned arguments, bool return (compares). */
17488 case ALTIVEC_BUILTIN_VCMPEQUB:
17489 case ALTIVEC_BUILTIN_VCMPEQUH:
17490 case ALTIVEC_BUILTIN_VCMPEQUW:
17491 case P8V_BUILTIN_VCMPEQUD:
17492 case VSX_BUILTIN_CMPGE_U16QI:
17493 case VSX_BUILTIN_CMPGE_U8HI:
17494 case VSX_BUILTIN_CMPGE_U4SI:
17495 case VSX_BUILTIN_CMPGE_U2DI:
17496 case ALTIVEC_BUILTIN_VCMPGTUB:
17497 case ALTIVEC_BUILTIN_VCMPGTUH:
17498 case ALTIVEC_BUILTIN_VCMPGTUW:
17499 case P8V_BUILTIN_VCMPGTUD:
17500 h.uns_p[1] = 1;
17501 h.uns_p[2] = 1;
17502 break;
17504 /* unsigned arguments for 128-bit pack instructions. */
17505 case MISC_BUILTIN_PACK_TD:
17506 case MISC_BUILTIN_PACK_V1TI:
17507 h.uns_p[1] = 1;
17508 h.uns_p[2] = 1;
17509 break;
17511 /* unsigned second arguments (vector shift right). */
17512 case ALTIVEC_BUILTIN_VSRB:
17513 case ALTIVEC_BUILTIN_VSRH:
17514 case ALTIVEC_BUILTIN_VSRW:
17515 case P8V_BUILTIN_VSRD:
17516 h.uns_p[2] = 1;
17517 break;
17519 default:
17520 break;
17523 /* Figure out how many args are present. */
17524 while (num_args > 0 && h.mode[num_args] == VOIDmode)
17525 num_args--;
17527 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
17528 if (!ret_type && h.uns_p[0])
17529 ret_type = builtin_mode_to_type[h.mode[0]][0];
17531 if (!ret_type)
17532 fatal_error (input_location,
17533 "internal error: builtin function %qs had an unexpected "
17534 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
17536 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
17537 arg_type[i] = NULL_TREE;
17539 for (i = 0; i < num_args; i++)
17541 int m = (int) h.mode[i+1];
17542 int uns_p = h.uns_p[i+1];
17544 arg_type[i] = builtin_mode_to_type[m][uns_p];
17545 if (!arg_type[i] && uns_p)
17546 arg_type[i] = builtin_mode_to_type[m][0];
17548 if (!arg_type[i])
17549 fatal_error (input_location,
17550 "internal error: builtin function %qs, argument %d "
17551 "had unexpected argument type %qs", name, i,
17552 GET_MODE_NAME (m));
17555 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
17556 if (*found == NULL)
17558 h2 = ggc_alloc<builtin_hash_struct> ();
17559 *h2 = h;
17560 *found = h2;
17562 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
17563 arg_type[2], NULL_TREE);
17566 return (*found)->type;
17569 static void
17570 rs6000_common_init_builtins (void)
17572 const struct builtin_description *d;
17573 size_t i;
17575 tree opaque_ftype_opaque = NULL_TREE;
17576 tree opaque_ftype_opaque_opaque = NULL_TREE;
17577 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
17578 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17580 /* Create Altivec and VSX builtins on machines with at least the
17581 general purpose extensions (970 and newer) to allow the use of
17582 the target attribute. */
17584 if (TARGET_EXTRA_BUILTINS)
17585 builtin_mask |= RS6000_BTM_COMMON;
17587 /* Add the ternary operators. */
17588 d = bdesc_3arg;
17589 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
17591 tree type;
17592 HOST_WIDE_INT mask = d->mask;
17594 if ((mask & builtin_mask) != mask)
17596 if (TARGET_DEBUG_BUILTIN)
17597 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
17598 continue;
17601 if (rs6000_overloaded_builtin_p (d->code))
17603 if (! (type = opaque_ftype_opaque_opaque_opaque))
17604 type = opaque_ftype_opaque_opaque_opaque
17605 = build_function_type_list (opaque_V4SI_type_node,
17606 opaque_V4SI_type_node,
17607 opaque_V4SI_type_node,
17608 opaque_V4SI_type_node,
17609 NULL_TREE);
17611 else
17613 enum insn_code icode = d->icode;
17614 if (d->name == 0)
17616 if (TARGET_DEBUG_BUILTIN)
17617 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
17618 (long unsigned)i);
17620 continue;
17623 if (icode == CODE_FOR_nothing)
17625 if (TARGET_DEBUG_BUILTIN)
17626 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
17627 d->name);
17629 continue;
17632 type = builtin_function_type (insn_data[icode].operand[0].mode,
17633 insn_data[icode].operand[1].mode,
17634 insn_data[icode].operand[2].mode,
17635 insn_data[icode].operand[3].mode,
17636 d->code, d->name);
17639 def_builtin (d->name, type, d->code);
17642 /* Add the binary operators. */
17643 d = bdesc_2arg;
17644 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17646 machine_mode mode0, mode1, mode2;
17647 tree type;
17648 HOST_WIDE_INT mask = d->mask;
17650 if ((mask & builtin_mask) != mask)
17652 if (TARGET_DEBUG_BUILTIN)
17653 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
17654 continue;
17657 if (rs6000_overloaded_builtin_p (d->code))
17659 if (! (type = opaque_ftype_opaque_opaque))
17660 type = opaque_ftype_opaque_opaque
17661 = build_function_type_list (opaque_V4SI_type_node,
17662 opaque_V4SI_type_node,
17663 opaque_V4SI_type_node,
17664 NULL_TREE);
17666 else
17668 enum insn_code icode = d->icode;
17669 if (d->name == 0)
17671 if (TARGET_DEBUG_BUILTIN)
17672 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
17673 (long unsigned)i);
17675 continue;
17678 if (icode == CODE_FOR_nothing)
17680 if (TARGET_DEBUG_BUILTIN)
17681 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
17682 d->name);
17684 continue;
17687 mode0 = insn_data[icode].operand[0].mode;
17688 mode1 = insn_data[icode].operand[1].mode;
17689 mode2 = insn_data[icode].operand[2].mode;
17691 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
17692 d->code, d->name);
17695 def_builtin (d->name, type, d->code);
17698 /* Add the simple unary operators. */
17699 d = bdesc_1arg;
17700 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17702 machine_mode mode0, mode1;
17703 tree type;
17704 HOST_WIDE_INT mask = d->mask;
17706 if ((mask & builtin_mask) != mask)
17708 if (TARGET_DEBUG_BUILTIN)
17709 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
17710 continue;
17713 if (rs6000_overloaded_builtin_p (d->code))
17715 if (! (type = opaque_ftype_opaque))
17716 type = opaque_ftype_opaque
17717 = build_function_type_list (opaque_V4SI_type_node,
17718 opaque_V4SI_type_node,
17719 NULL_TREE);
17721 else
17723 enum insn_code icode = d->icode;
17724 if (d->name == 0)
17726 if (TARGET_DEBUG_BUILTIN)
17727 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
17728 (long unsigned)i);
17730 continue;
17733 if (icode == CODE_FOR_nothing)
17735 if (TARGET_DEBUG_BUILTIN)
17736 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
17737 d->name);
17739 continue;
17742 mode0 = insn_data[icode].operand[0].mode;
17743 mode1 = insn_data[icode].operand[1].mode;
17745 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
17746 d->code, d->name);
17749 def_builtin (d->name, type, d->code);
17752 /* Add the simple no-argument operators. */
17753 d = bdesc_0arg;
17754 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
17756 machine_mode mode0;
17757 tree type;
17758 HOST_WIDE_INT mask = d->mask;
17760 if ((mask & builtin_mask) != mask)
17762 if (TARGET_DEBUG_BUILTIN)
17763 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
17764 continue;
17766 if (rs6000_overloaded_builtin_p (d->code))
17768 if (!opaque_ftype_opaque)
17769 opaque_ftype_opaque
17770 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
17771 type = opaque_ftype_opaque;
17773 else
17775 enum insn_code icode = d->icode;
17776 if (d->name == 0)
17778 if (TARGET_DEBUG_BUILTIN)
17779 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
17780 (long unsigned) i);
17781 continue;
17783 if (icode == CODE_FOR_nothing)
17785 if (TARGET_DEBUG_BUILTIN)
17786 fprintf (stderr,
17787 "rs6000_builtin, skip no-argument %s (no code)\n",
17788 d->name);
17789 continue;
17791 mode0 = insn_data[icode].operand[0].mode;
17792 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
17793 d->code, d->name);
17795 def_builtin (d->name, type, d->code);
17799 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
17800 static void
17801 init_float128_ibm (machine_mode mode)
17803 if (!TARGET_XL_COMPAT)
17805 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
17806 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
17807 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
17808 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
17810 if (!TARGET_HARD_FLOAT)
17812 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
17813 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
17814 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
17815 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
17816 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
17817 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
17818 set_optab_libfunc (le_optab, mode, "__gcc_qle");
17819 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
17821 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
17822 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
17823 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
17824 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
17825 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
17826 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
17827 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
17828 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
17831 else
17833 set_optab_libfunc (add_optab, mode, "_xlqadd");
17834 set_optab_libfunc (sub_optab, mode, "_xlqsub");
17835 set_optab_libfunc (smul_optab, mode, "_xlqmul");
17836 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
17839 /* Add various conversions for IFmode to use the traditional TFmode
17840 names. */
17841 if (mode == IFmode)
17843 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
17844 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
17845 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
17846 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
17847 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
17848 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
17850 if (TARGET_POWERPC64)
17852 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
17853 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
17854 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
17855 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
17860 /* Create a decl for either complex long double multiply or complex long double
17861 divide when long double is IEEE 128-bit floating point. We can't use
17862 __multc3 and __divtc3 because the original long double using IBM extended
17863 double used those names. The complex multiply/divide functions are encoded
17864 as builtin functions with a complex result and 4 scalar inputs. */
17866 static void
17867 create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
17869 tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
17870 name, NULL_TREE);
17872 set_builtin_decl (fncode, fndecl, true);
17874 if (TARGET_DEBUG_BUILTIN)
17875 fprintf (stderr, "create complex %s, fncode: %d\n", name, (int) fncode);
17877 return;
17880 /* Set up IEEE 128-bit floating point routines. Use different names if the
17881 arguments can be passed in a vector register. The historical PowerPC
17882 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
17883 continue to use that if we aren't using vector registers to pass IEEE
17884 128-bit floating point. */
17886 static void
17887 init_float128_ieee (machine_mode mode)
17889 if (FLOAT128_VECTOR_P (mode))
17891 static bool complex_muldiv_init_p = false;
17893 /* Set up to call __mulkc3 and __divkc3 under -mabi=ieeelongdouble. If
17894 we have clone or target attributes, this will be called a second
17895 time. We want to create the built-in function only once. */
17896 if (mode == TFmode && TARGET_IEEEQUAD && !complex_muldiv_init_p)
17898 complex_muldiv_init_p = true;
17899 built_in_function fncode_mul =
17900 (built_in_function) (BUILT_IN_COMPLEX_MUL_MIN + TCmode
17901 - MIN_MODE_COMPLEX_FLOAT);
17902 built_in_function fncode_div =
17903 (built_in_function) (BUILT_IN_COMPLEX_DIV_MIN + TCmode
17904 - MIN_MODE_COMPLEX_FLOAT);
17906 tree fntype = build_function_type_list (complex_long_double_type_node,
17907 long_double_type_node,
17908 long_double_type_node,
17909 long_double_type_node,
17910 long_double_type_node,
17911 NULL_TREE);
17913 create_complex_muldiv ("__mulkc3", fncode_mul, fntype);
17914 create_complex_muldiv ("__divkc3", fncode_div, fntype);
17917 set_optab_libfunc (add_optab, mode, "__addkf3");
17918 set_optab_libfunc (sub_optab, mode, "__subkf3");
17919 set_optab_libfunc (neg_optab, mode, "__negkf2");
17920 set_optab_libfunc (smul_optab, mode, "__mulkf3");
17921 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
17922 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
17923 set_optab_libfunc (abs_optab, mode, "__abskf2");
17924 set_optab_libfunc (powi_optab, mode, "__powikf2");
17926 set_optab_libfunc (eq_optab, mode, "__eqkf2");
17927 set_optab_libfunc (ne_optab, mode, "__nekf2");
17928 set_optab_libfunc (gt_optab, mode, "__gtkf2");
17929 set_optab_libfunc (ge_optab, mode, "__gekf2");
17930 set_optab_libfunc (lt_optab, mode, "__ltkf2");
17931 set_optab_libfunc (le_optab, mode, "__lekf2");
17932 set_optab_libfunc (unord_optab, mode, "__unordkf2");
17934 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
17935 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
17936 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
17937 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
17939 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
17940 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17941 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
17943 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
17944 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
17945 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
17947 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
17948 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
17949 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
17950 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
17951 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
17952 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
17954 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
17955 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
17956 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
17957 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
17959 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
17960 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
17961 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
17962 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
17964 if (TARGET_POWERPC64)
17966 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
17967 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
17968 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
17969 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
17973 else
17975 set_optab_libfunc (add_optab, mode, "_q_add");
17976 set_optab_libfunc (sub_optab, mode, "_q_sub");
17977 set_optab_libfunc (neg_optab, mode, "_q_neg");
17978 set_optab_libfunc (smul_optab, mode, "_q_mul");
17979 set_optab_libfunc (sdiv_optab, mode, "_q_div");
17980 if (TARGET_PPC_GPOPT)
17981 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
17983 set_optab_libfunc (eq_optab, mode, "_q_feq");
17984 set_optab_libfunc (ne_optab, mode, "_q_fne");
17985 set_optab_libfunc (gt_optab, mode, "_q_fgt");
17986 set_optab_libfunc (ge_optab, mode, "_q_fge");
17987 set_optab_libfunc (lt_optab, mode, "_q_flt");
17988 set_optab_libfunc (le_optab, mode, "_q_fle");
17990 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
17991 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
17992 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
17993 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
17994 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
17995 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
17996 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
17997 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18001 static void
18002 rs6000_init_libfuncs (void)
18004 /* __float128 support. */
18005 if (TARGET_FLOAT128_TYPE)
18007 init_float128_ibm (IFmode);
18008 init_float128_ieee (KFmode);
18011 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18012 if (TARGET_LONG_DOUBLE_128)
18014 if (!TARGET_IEEEQUAD)
18015 init_float128_ibm (TFmode);
18017 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18018 else
18019 init_float128_ieee (TFmode);
18023 /* Emit a potentially record-form instruction, setting DST from SRC.
18024 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18025 signed comparison of DST with zero. If DOT is 1, the generated RTL
18026 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18027 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18028 a separate COMPARE. */
18030 void
18031 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18033 if (dot == 0)
18035 emit_move_insn (dst, src);
18036 return;
18039 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18041 emit_move_insn (dst, src);
18042 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18043 return;
18046 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18047 if (dot == 1)
18049 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18050 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18052 else
18054 rtx set = gen_rtx_SET (dst, src);
18055 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18060 /* A validation routine: say whether CODE, a condition code, and MODE
18061 match. The other alternatives either don't make sense or should
18062 never be generated. */
18064 void
18065 validate_condition_mode (enum rtx_code code, machine_mode mode)
18067 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
18068 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
18069 && GET_MODE_CLASS (mode) == MODE_CC);
18071 /* These don't make sense. */
18072 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
18073 || mode != CCUNSmode);
18075 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
18076 || mode == CCUNSmode);
18078 gcc_assert (mode == CCFPmode
18079 || (code != ORDERED && code != UNORDERED
18080 && code != UNEQ && code != LTGT
18081 && code != UNGT && code != UNLT
18082 && code != UNGE && code != UNLE));
18084 /* These should never be generated except for
18085 flag_finite_math_only. */
18086 gcc_assert (mode != CCFPmode
18087 || flag_finite_math_only
18088 || (code != LE && code != GE
18089 && code != UNEQ && code != LTGT
18090 && code != UNGT && code != UNLT));
18092 /* These are invalid; the information is not there. */
18093 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
18097 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18098 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18099 not zero, store there the bit offset (counted from the right) where
18100 the single stretch of 1 bits begins; and similarly for B, the bit
18101 offset where it ends. */
18103 bool
18104 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
18106 unsigned HOST_WIDE_INT val = INTVAL (mask);
18107 unsigned HOST_WIDE_INT bit;
18108 int nb, ne;
18109 int n = GET_MODE_PRECISION (mode);
18111 if (mode != DImode && mode != SImode)
18112 return false;
18114 if (INTVAL (mask) >= 0)
18116 bit = val & -val;
18117 ne = exact_log2 (bit);
18118 nb = exact_log2 (val + bit);
18120 else if (val + 1 == 0)
18122 nb = n;
18123 ne = 0;
18125 else if (val & 1)
18127 val = ~val;
18128 bit = val & -val;
18129 nb = exact_log2 (bit);
18130 ne = exact_log2 (val + bit);
18132 else
18134 bit = val & -val;
18135 ne = exact_log2 (bit);
18136 if (val + bit == 0)
18137 nb = n;
18138 else
18139 nb = 0;
18142 nb--;
18144 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18145 return false;
18147 if (b)
18148 *b = nb;
18149 if (e)
18150 *e = ne;
18152 return true;
18155 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18156 or rldicr instruction, to implement an AND with it in mode MODE. */
18158 bool
18159 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18161 int nb, ne;
18163 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18164 return false;
18166 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18167 does not wrap. */
18168 if (mode == DImode)
18169 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18171 /* For SImode, rlwinm can do everything. */
18172 if (mode == SImode)
18173 return (nb < 32 && ne < 32);
18175 return false;
18178 /* Return the instruction template for an AND with mask in mode MODE, with
18179 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18181 const char *
18182 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18184 int nb, ne;
18186 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18187 gcc_unreachable ();
18189 if (mode == DImode && ne == 0)
18191 operands[3] = GEN_INT (63 - nb);
18192 if (dot)
18193 return "rldicl. %0,%1,0,%3";
18194 return "rldicl %0,%1,0,%3";
18197 if (mode == DImode && nb == 63)
18199 operands[3] = GEN_INT (63 - ne);
18200 if (dot)
18201 return "rldicr. %0,%1,0,%3";
18202 return "rldicr %0,%1,0,%3";
18205 if (nb < 32 && ne < 32)
18207 operands[3] = GEN_INT (31 - nb);
18208 operands[4] = GEN_INT (31 - ne);
18209 if (dot)
18210 return "rlwinm. %0,%1,0,%3,%4";
18211 return "rlwinm %0,%1,0,%3,%4";
18214 gcc_unreachable ();
18217 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18218 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18219 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18221 bool
18222 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18224 int nb, ne;
18226 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18227 return false;
18229 int n = GET_MODE_PRECISION (mode);
18230 int sh = -1;
18232 if (CONST_INT_P (XEXP (shift, 1)))
18234 sh = INTVAL (XEXP (shift, 1));
18235 if (sh < 0 || sh >= n)
18236 return false;
18239 rtx_code code = GET_CODE (shift);
18241 /* Convert any shift by 0 to a rotate, to simplify below code. */
18242 if (sh == 0)
18243 code = ROTATE;
18245 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18246 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18247 code = ASHIFT;
18248 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18250 code = LSHIFTRT;
18251 sh = n - sh;
18254 /* DImode rotates need rld*. */
18255 if (mode == DImode && code == ROTATE)
18256 return (nb == 63 || ne == 0 || ne == sh);
18258 /* SImode rotates need rlw*. */
18259 if (mode == SImode && code == ROTATE)
18260 return (nb < 32 && ne < 32 && sh < 32);
18262 /* Wrap-around masks are only okay for rotates. */
18263 if (ne > nb)
18264 return false;
18266 /* Variable shifts are only okay for rotates. */
18267 if (sh < 0)
18268 return false;
18270 /* Don't allow ASHIFT if the mask is wrong for that. */
18271 if (code == ASHIFT && ne < sh)
18272 return false;
18274 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18275 if the mask is wrong for that. */
18276 if (nb < 32 && ne < 32 && sh < 32
18277 && !(code == LSHIFTRT && nb >= 32 - sh))
18278 return true;
18280 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18281 if the mask is wrong for that. */
18282 if (code == LSHIFTRT)
18283 sh = 64 - sh;
18284 if (nb == 63 || ne == 0 || ne == sh)
18285 return !(code == LSHIFTRT && nb >= sh);
18287 return false;
18290 /* Return the instruction template for a shift with mask in mode MODE, with
18291 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18293 const char *
18294 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18296 int nb, ne;
18298 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18299 gcc_unreachable ();
18301 if (mode == DImode && ne == 0)
18303 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18304 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18305 operands[3] = GEN_INT (63 - nb);
18306 if (dot)
18307 return "rld%I2cl. %0,%1,%2,%3";
18308 return "rld%I2cl %0,%1,%2,%3";
18311 if (mode == DImode && nb == 63)
18313 operands[3] = GEN_INT (63 - ne);
18314 if (dot)
18315 return "rld%I2cr. %0,%1,%2,%3";
18316 return "rld%I2cr %0,%1,%2,%3";
18319 if (mode == DImode
18320 && GET_CODE (operands[4]) != LSHIFTRT
18321 && CONST_INT_P (operands[2])
18322 && ne == INTVAL (operands[2]))
18324 operands[3] = GEN_INT (63 - nb);
18325 if (dot)
18326 return "rld%I2c. %0,%1,%2,%3";
18327 return "rld%I2c %0,%1,%2,%3";
18330 if (nb < 32 && ne < 32)
18332 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18333 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18334 operands[3] = GEN_INT (31 - nb);
18335 operands[4] = GEN_INT (31 - ne);
18336 /* This insn can also be a 64-bit rotate with mask that really makes
18337 it just a shift right (with mask); the %h below are to adjust for
18338 that situation (shift count is >= 32 in that case). */
18339 if (dot)
18340 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18341 return "rlw%I2nm %0,%1,%h2,%3,%4";
18344 gcc_unreachable ();
18347 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18348 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18349 ASHIFT, or LSHIFTRT) in mode MODE. */
18351 bool
18352 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
18354 int nb, ne;
18356 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18357 return false;
18359 int n = GET_MODE_PRECISION (mode);
18361 int sh = INTVAL (XEXP (shift, 1));
18362 if (sh < 0 || sh >= n)
18363 return false;
18365 rtx_code code = GET_CODE (shift);
18367 /* Convert any shift by 0 to a rotate, to simplify below code. */
18368 if (sh == 0)
18369 code = ROTATE;
18371 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18372 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18373 code = ASHIFT;
18374 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18376 code = LSHIFTRT;
18377 sh = n - sh;
18380 /* DImode rotates need rldimi. */
18381 if (mode == DImode && code == ROTATE)
18382 return (ne == sh);
18384 /* SImode rotates need rlwimi. */
18385 if (mode == SImode && code == ROTATE)
18386 return (nb < 32 && ne < 32 && sh < 32);
18388 /* Wrap-around masks are only okay for rotates. */
18389 if (ne > nb)
18390 return false;
18392 /* Don't allow ASHIFT if the mask is wrong for that. */
18393 if (code == ASHIFT && ne < sh)
18394 return false;
18396 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
18397 if the mask is wrong for that. */
18398 if (nb < 32 && ne < 32 && sh < 32
18399 && !(code == LSHIFTRT && nb >= 32 - sh))
18400 return true;
18402 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
18403 if the mask is wrong for that. */
18404 if (code == LSHIFTRT)
18405 sh = 64 - sh;
18406 if (ne == sh)
18407 return !(code == LSHIFTRT && nb >= sh);
18409 return false;
18412 /* Return the instruction template for an insert with mask in mode MODE, with
18413 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18415 const char *
18416 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
18418 int nb, ne;
18420 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18421 gcc_unreachable ();
18423 /* Prefer rldimi because rlwimi is cracked. */
18424 if (TARGET_POWERPC64
18425 && (!dot || mode == DImode)
18426 && GET_CODE (operands[4]) != LSHIFTRT
18427 && ne == INTVAL (operands[2]))
18429 operands[3] = GEN_INT (63 - nb);
18430 if (dot)
18431 return "rldimi. %0,%1,%2,%3";
18432 return "rldimi %0,%1,%2,%3";
18435 if (nb < 32 && ne < 32)
18437 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18438 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18439 operands[3] = GEN_INT (31 - nb);
18440 operands[4] = GEN_INT (31 - ne);
18441 if (dot)
18442 return "rlwimi. %0,%1,%2,%3,%4";
18443 return "rlwimi %0,%1,%2,%3,%4";
18446 gcc_unreachable ();
18449 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
18450 using two machine instructions. */
18452 bool
18453 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
18455 /* There are two kinds of AND we can handle with two insns:
18456 1) those we can do with two rl* insn;
18457 2) ori[s];xori[s].
18459 We do not handle that last case yet. */
18461 /* If there is just one stretch of ones, we can do it. */
18462 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
18463 return true;
18465 /* Otherwise, fill in the lowest "hole"; if we can do the result with
18466 one insn, we can do the whole thing with two. */
18467 unsigned HOST_WIDE_INT val = INTVAL (c);
18468 unsigned HOST_WIDE_INT bit1 = val & -val;
18469 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18470 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18471 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18472 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
18475 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
18476 If EXPAND is true, split rotate-and-mask instructions we generate to
18477 their constituent parts as well (this is used during expand); if DOT
18478 is 1, make the last insn a record-form instruction clobbering the
18479 destination GPR and setting the CC reg (from operands[3]); if 2, set
18480 that GPR as well as the CC reg. */
18482 void
18483 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
18485 gcc_assert (!(expand && dot));
18487 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
18489 /* If it is one stretch of ones, it is DImode; shift left, mask, then
18490 shift right. This generates better code than doing the masks without
18491 shifts, or shifting first right and then left. */
18492 int nb, ne;
18493 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
18495 gcc_assert (mode == DImode);
18497 int shift = 63 - nb;
18498 if (expand)
18500 rtx tmp1 = gen_reg_rtx (DImode);
18501 rtx tmp2 = gen_reg_rtx (DImode);
18502 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
18503 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
18504 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
18506 else
18508 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
18509 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
18510 emit_move_insn (operands[0], tmp);
18511 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
18512 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18514 return;
18517 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
18518 that does the rest. */
18519 unsigned HOST_WIDE_INT bit1 = val & -val;
18520 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
18521 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
18522 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
18524 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
18525 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
18527 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
18529 /* Two "no-rotate"-and-mask instructions, for SImode. */
18530 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
18532 gcc_assert (mode == SImode);
18534 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18535 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
18536 emit_move_insn (reg, tmp);
18537 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18538 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18539 return;
18542 gcc_assert (mode == DImode);
18544 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
18545 insns; we have to do the first in SImode, because it wraps. */
18546 if (mask2 <= 0xffffffff
18547 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
18549 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
18550 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
18551 GEN_INT (mask1));
18552 rtx reg_low = gen_lowpart (SImode, reg);
18553 emit_move_insn (reg_low, tmp);
18554 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
18555 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18556 return;
18559 /* Two rld* insns: rotate, clear the hole in the middle (which now is
18560 at the top end), rotate back and clear the other hole. */
18561 int right = exact_log2 (bit3);
18562 int left = 64 - right;
18564 /* Rotate the mask too. */
18565 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
18567 if (expand)
18569 rtx tmp1 = gen_reg_rtx (DImode);
18570 rtx tmp2 = gen_reg_rtx (DImode);
18571 rtx tmp3 = gen_reg_rtx (DImode);
18572 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
18573 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
18574 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
18575 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
18577 else
18579 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
18580 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
18581 emit_move_insn (operands[0], tmp);
18582 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
18583 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
18584 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
18588 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
18589 for lfq and stfq insns iff the registers are hard registers. */
18592 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
18594 /* We might have been passed a SUBREG. */
18595 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
18596 return 0;
18598 /* We might have been passed non floating point registers. */
18599 if (!FP_REGNO_P (REGNO (reg1))
18600 || !FP_REGNO_P (REGNO (reg2)))
18601 return 0;
18603 return (REGNO (reg1) == REGNO (reg2) - 1);
18606 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
18607 addr1 and addr2 must be in consecutive memory locations
18608 (addr2 == addr1 + 8). */
18611 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
18613 rtx addr1, addr2;
18614 unsigned int reg1, reg2;
18615 int offset1, offset2;
18617 /* The mems cannot be volatile. */
18618 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
18619 return 0;
18621 addr1 = XEXP (mem1, 0);
18622 addr2 = XEXP (mem2, 0);
18624 /* Extract an offset (if used) from the first addr. */
18625 if (GET_CODE (addr1) == PLUS)
18627 /* If not a REG, return zero. */
18628 if (GET_CODE (XEXP (addr1, 0)) != REG)
18629 return 0;
18630 else
18632 reg1 = REGNO (XEXP (addr1, 0));
18633 /* The offset must be constant! */
18634 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
18635 return 0;
18636 offset1 = INTVAL (XEXP (addr1, 1));
18639 else if (GET_CODE (addr1) != REG)
18640 return 0;
18641 else
18643 reg1 = REGNO (addr1);
18644 /* This was a simple (mem (reg)) expression. Offset is 0. */
18645 offset1 = 0;
18648 /* And now for the second addr. */
18649 if (GET_CODE (addr2) == PLUS)
18651 /* If not a REG, return zero. */
18652 if (GET_CODE (XEXP (addr2, 0)) != REG)
18653 return 0;
18654 else
18656 reg2 = REGNO (XEXP (addr2, 0));
18657 /* The offset must be constant. */
18658 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
18659 return 0;
18660 offset2 = INTVAL (XEXP (addr2, 1));
18663 else if (GET_CODE (addr2) != REG)
18664 return 0;
18665 else
18667 reg2 = REGNO (addr2);
18668 /* This was a simple (mem (reg)) expression. Offset is 0. */
18669 offset2 = 0;
18672 /* Both of these must have the same base register. */
18673 if (reg1 != reg2)
18674 return 0;
18676 /* The offset for the second addr must be 8 more than the first addr. */
18677 if (offset2 != offset1 + 8)
18678 return 0;
18680 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
18681 instructions. */
18682 return 1;
18685 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
18686 need to use DDmode, in all other cases we can use the same mode. */
18687 static machine_mode
18688 rs6000_secondary_memory_needed_mode (machine_mode mode)
18690 if (lra_in_progress && mode == SDmode)
18691 return DDmode;
18692 return mode;
18695 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
18696 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
18697 only work on the traditional altivec registers, note if an altivec register
18698 was chosen. */
18700 static enum rs6000_reg_type
18701 register_to_reg_type (rtx reg, bool *is_altivec)
18703 HOST_WIDE_INT regno;
18704 enum reg_class rclass;
18706 if (GET_CODE (reg) == SUBREG)
18707 reg = SUBREG_REG (reg);
18709 if (!REG_P (reg))
18710 return NO_REG_TYPE;
18712 regno = REGNO (reg);
18713 if (regno >= FIRST_PSEUDO_REGISTER)
18715 if (!lra_in_progress && !reload_completed)
18716 return PSEUDO_REG_TYPE;
18718 regno = true_regnum (reg);
18719 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
18720 return PSEUDO_REG_TYPE;
18723 gcc_assert (regno >= 0);
18725 if (is_altivec && ALTIVEC_REGNO_P (regno))
18726 *is_altivec = true;
18728 rclass = rs6000_regno_regclass[regno];
18729 return reg_class_to_reg_type[(int)rclass];
18732 /* Helper function to return the cost of adding a TOC entry address. */
18734 static inline int
18735 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
18737 int ret;
18739 if (TARGET_CMODEL != CMODEL_SMALL)
18740 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
18742 else
18743 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
18745 return ret;
18748 /* Helper function for rs6000_secondary_reload to determine whether the memory
18749 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
18750 needs reloading. Return negative if the memory is not handled by the memory
18751 helper functions and to try a different reload method, 0 if no additional
18752 instructions are need, and positive to give the extra cost for the
18753 memory. */
18755 static int
18756 rs6000_secondary_reload_memory (rtx addr,
18757 enum reg_class rclass,
18758 machine_mode mode)
18760 int extra_cost = 0;
18761 rtx reg, and_arg, plus_arg0, plus_arg1;
18762 addr_mask_type addr_mask;
18763 const char *type = NULL;
18764 const char *fail_msg = NULL;
18766 if (GPR_REG_CLASS_P (rclass))
18767 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
18769 else if (rclass == FLOAT_REGS)
18770 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
18772 else if (rclass == ALTIVEC_REGS)
18773 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
18775 /* For the combined VSX_REGS, turn off Altivec AND -16. */
18776 else if (rclass == VSX_REGS)
18777 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
18778 & ~RELOAD_REG_AND_M16);
18780 /* If the register allocator hasn't made up its mind yet on the register
18781 class to use, settle on defaults to use. */
18782 else if (rclass == NO_REGS)
18784 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
18785 & ~RELOAD_REG_AND_M16);
18787 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
18788 addr_mask &= ~(RELOAD_REG_INDEXED
18789 | RELOAD_REG_PRE_INCDEC
18790 | RELOAD_REG_PRE_MODIFY);
18793 else
18794 addr_mask = 0;
18796 /* If the register isn't valid in this register class, just return now. */
18797 if ((addr_mask & RELOAD_REG_VALID) == 0)
18799 if (TARGET_DEBUG_ADDR)
18801 fprintf (stderr,
18802 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
18803 "not valid in class\n",
18804 GET_MODE_NAME (mode), reg_class_names[rclass]);
18805 debug_rtx (addr);
18808 return -1;
18811 switch (GET_CODE (addr))
18813 /* Does the register class supports auto update forms for this mode? We
18814 don't need a scratch register, since the powerpc only supports
18815 PRE_INC, PRE_DEC, and PRE_MODIFY. */
18816 case PRE_INC:
18817 case PRE_DEC:
18818 reg = XEXP (addr, 0);
18819 if (!base_reg_operand (addr, GET_MODE (reg)))
18821 fail_msg = "no base register #1";
18822 extra_cost = -1;
18825 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18827 extra_cost = 1;
18828 type = "update";
18830 break;
18832 case PRE_MODIFY:
18833 reg = XEXP (addr, 0);
18834 plus_arg1 = XEXP (addr, 1);
18835 if (!base_reg_operand (reg, GET_MODE (reg))
18836 || GET_CODE (plus_arg1) != PLUS
18837 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
18839 fail_msg = "bad PRE_MODIFY";
18840 extra_cost = -1;
18843 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18845 extra_cost = 1;
18846 type = "update";
18848 break;
18850 /* Do we need to simulate AND -16 to clear the bottom address bits used
18851 in VMX load/stores? Only allow the AND for vector sizes. */
18852 case AND:
18853 and_arg = XEXP (addr, 0);
18854 if (GET_MODE_SIZE (mode) != 16
18855 || GET_CODE (XEXP (addr, 1)) != CONST_INT
18856 || INTVAL (XEXP (addr, 1)) != -16)
18858 fail_msg = "bad Altivec AND #1";
18859 extra_cost = -1;
18862 if (rclass != ALTIVEC_REGS)
18864 if (legitimate_indirect_address_p (and_arg, false))
18865 extra_cost = 1;
18867 else if (legitimate_indexed_address_p (and_arg, false))
18868 extra_cost = 2;
18870 else
18872 fail_msg = "bad Altivec AND #2";
18873 extra_cost = -1;
18876 type = "and";
18878 break;
18880 /* If this is an indirect address, make sure it is a base register. */
18881 case REG:
18882 case SUBREG:
18883 if (!legitimate_indirect_address_p (addr, false))
18885 extra_cost = 1;
18886 type = "move";
18888 break;
18890 /* If this is an indexed address, make sure the register class can handle
18891 indexed addresses for this mode. */
18892 case PLUS:
18893 plus_arg0 = XEXP (addr, 0);
18894 plus_arg1 = XEXP (addr, 1);
18896 /* (plus (plus (reg) (constant)) (constant)) is generated during
18897 push_reload processing, so handle it now. */
18898 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
18900 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18902 extra_cost = 1;
18903 type = "offset";
18907 /* (plus (plus (reg) (constant)) (reg)) is also generated during
18908 push_reload processing, so handle it now. */
18909 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
18911 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18913 extra_cost = 1;
18914 type = "indexed #2";
18918 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
18920 fail_msg = "no base register #2";
18921 extra_cost = -1;
18924 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
18926 if ((addr_mask & RELOAD_REG_INDEXED) == 0
18927 || !legitimate_indexed_address_p (addr, false))
18929 extra_cost = 1;
18930 type = "indexed";
18934 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
18935 && CONST_INT_P (plus_arg1))
18937 if (!quad_address_offset_p (INTVAL (plus_arg1)))
18939 extra_cost = 1;
18940 type = "vector d-form offset";
18944 /* Make sure the register class can handle offset addresses. */
18945 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18947 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18949 extra_cost = 1;
18950 type = "offset #2";
18954 else
18956 fail_msg = "bad PLUS";
18957 extra_cost = -1;
18960 break;
18962 case LO_SUM:
18963 /* Quad offsets are restricted and can't handle normal addresses. */
18964 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18966 extra_cost = -1;
18967 type = "vector d-form lo_sum";
18970 else if (!legitimate_lo_sum_address_p (mode, addr, false))
18972 fail_msg = "bad LO_SUM";
18973 extra_cost = -1;
18976 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18978 extra_cost = 1;
18979 type = "lo_sum";
18981 break;
18983 /* Static addresses need to create a TOC entry. */
18984 case CONST:
18985 case SYMBOL_REF:
18986 case LABEL_REF:
18987 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
18989 extra_cost = -1;
18990 type = "vector d-form lo_sum #2";
18993 else
18995 type = "address";
18996 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
18998 break;
19000 /* TOC references look like offsetable memory. */
19001 case UNSPEC:
19002 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
19004 fail_msg = "bad UNSPEC";
19005 extra_cost = -1;
19008 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19010 extra_cost = -1;
19011 type = "vector d-form lo_sum #3";
19014 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19016 extra_cost = 1;
19017 type = "toc reference";
19019 break;
19021 default:
19023 fail_msg = "bad address";
19024 extra_cost = -1;
19028 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
19030 if (extra_cost < 0)
19031 fprintf (stderr,
19032 "rs6000_secondary_reload_memory error: mode = %s, "
19033 "class = %s, addr_mask = '%s', %s\n",
19034 GET_MODE_NAME (mode),
19035 reg_class_names[rclass],
19036 rs6000_debug_addr_mask (addr_mask, false),
19037 (fail_msg != NULL) ? fail_msg : "<bad address>");
19039 else
19040 fprintf (stderr,
19041 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19042 "addr_mask = '%s', extra cost = %d, %s\n",
19043 GET_MODE_NAME (mode),
19044 reg_class_names[rclass],
19045 rs6000_debug_addr_mask (addr_mask, false),
19046 extra_cost,
19047 (type) ? type : "<none>");
19049 debug_rtx (addr);
19052 return extra_cost;
19055 /* Helper function for rs6000_secondary_reload to return true if a move to a
19056 different register classe is really a simple move. */
19058 static bool
19059 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
19060 enum rs6000_reg_type from_type,
19061 machine_mode mode)
19063 int size = GET_MODE_SIZE (mode);
19065 /* Add support for various direct moves available. In this function, we only
19066 look at cases where we don't need any extra registers, and one or more
19067 simple move insns are issued. Originally small integers are not allowed
19068 in FPR/VSX registers. Single precision binary floating is not a simple
19069 move because we need to convert to the single precision memory layout.
19070 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19071 need special direct move handling, which we do not support yet. */
19072 if (TARGET_DIRECT_MOVE
19073 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19074 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
19076 if (TARGET_POWERPC64)
19078 /* ISA 2.07: MTVSRD or MVFVSRD. */
19079 if (size == 8)
19080 return true;
19082 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
19083 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
19084 return true;
19087 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19088 if (TARGET_P8_VECTOR)
19090 if (mode == SImode)
19091 return true;
19093 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
19094 return true;
19097 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19098 if (mode == SDmode)
19099 return true;
19102 /* Power6+: MFTGPR or MFFGPR. */
19103 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
19104 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
19105 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19106 return true;
19108 /* Move to/from SPR. */
19109 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
19110 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
19111 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19112 return true;
19114 return false;
19117 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19118 special direct moves that involve allocating an extra register, return the
19119 insn code of the helper function if there is such a function or
19120 CODE_FOR_nothing if not. */
19122 static bool
19123 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19124 enum rs6000_reg_type from_type,
19125 machine_mode mode,
19126 secondary_reload_info *sri,
19127 bool altivec_p)
19129 bool ret = false;
19130 enum insn_code icode = CODE_FOR_nothing;
19131 int cost = 0;
19132 int size = GET_MODE_SIZE (mode);
19134 if (TARGET_POWERPC64 && size == 16)
19136 /* Handle moving 128-bit values from GPRs to VSX point registers on
19137 ISA 2.07 (power8, power9) when running in 64-bit mode using
19138 XXPERMDI to glue the two 64-bit values back together. */
19139 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19141 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19142 icode = reg_addr[mode].reload_vsx_gpr;
19145 /* Handle moving 128-bit values from VSX point registers to GPRs on
19146 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19147 bottom 64-bit value. */
19148 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19150 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19151 icode = reg_addr[mode].reload_gpr_vsx;
19155 else if (TARGET_POWERPC64 && mode == SFmode)
19157 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19159 cost = 3; /* xscvdpspn, mfvsrd, and. */
19160 icode = reg_addr[mode].reload_gpr_vsx;
19163 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19165 cost = 2; /* mtvsrz, xscvspdpn. */
19166 icode = reg_addr[mode].reload_vsx_gpr;
19170 else if (!TARGET_POWERPC64 && size == 8)
19172 /* Handle moving 64-bit values from GPRs to floating point registers on
19173 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19174 32-bit values back together. Altivec register classes must be handled
19175 specially since a different instruction is used, and the secondary
19176 reload support requires a single instruction class in the scratch
19177 register constraint. However, right now TFmode is not allowed in
19178 Altivec registers, so the pattern will never match. */
19179 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19181 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19182 icode = reg_addr[mode].reload_fpr_gpr;
19186 if (icode != CODE_FOR_nothing)
19188 ret = true;
19189 if (sri)
19191 sri->icode = icode;
19192 sri->extra_cost = cost;
19196 return ret;
19199 /* Return whether a move between two register classes can be done either
19200 directly (simple move) or via a pattern that uses a single extra temporary
19201 (using ISA 2.07's direct move in this case. */
19203 static bool
19204 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19205 enum rs6000_reg_type from_type,
19206 machine_mode mode,
19207 secondary_reload_info *sri,
19208 bool altivec_p)
19210 /* Fall back to load/store reloads if either type is not a register. */
19211 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19212 return false;
19214 /* If we haven't allocated registers yet, assume the move can be done for the
19215 standard register types. */
19216 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19217 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19218 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19219 return true;
19221 /* Moves to the same set of registers is a simple move for non-specialized
19222 registers. */
19223 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19224 return true;
19226 /* Check whether a simple move can be done directly. */
19227 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19229 if (sri)
19231 sri->icode = CODE_FOR_nothing;
19232 sri->extra_cost = 0;
19234 return true;
19237 /* Now check if we can do it in a few steps. */
19238 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19239 altivec_p);
19242 /* Inform reload about cases where moving X with a mode MODE to a register in
19243 RCLASS requires an extra scratch or immediate register. Return the class
19244 needed for the immediate register.
19246 For VSX and Altivec, we may need a register to convert sp+offset into
19247 reg+sp.
19249 For misaligned 64-bit gpr loads and stores we need a register to
19250 convert an offset address to indirect. */
19252 static reg_class_t
19253 rs6000_secondary_reload (bool in_p,
19254 rtx x,
19255 reg_class_t rclass_i,
19256 machine_mode mode,
19257 secondary_reload_info *sri)
19259 enum reg_class rclass = (enum reg_class) rclass_i;
19260 reg_class_t ret = ALL_REGS;
19261 enum insn_code icode;
19262 bool default_p = false;
19263 bool done_p = false;
19265 /* Allow subreg of memory before/during reload. */
19266 bool memory_p = (MEM_P (x)
19267 || (!reload_completed && GET_CODE (x) == SUBREG
19268 && MEM_P (SUBREG_REG (x))));
19270 sri->icode = CODE_FOR_nothing;
19271 sri->t_icode = CODE_FOR_nothing;
19272 sri->extra_cost = 0;
19273 icode = ((in_p)
19274 ? reg_addr[mode].reload_load
19275 : reg_addr[mode].reload_store);
19277 if (REG_P (x) || register_operand (x, mode))
19279 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19280 bool altivec_p = (rclass == ALTIVEC_REGS);
19281 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19283 if (!in_p)
19284 std::swap (to_type, from_type);
19286 /* Can we do a direct move of some sort? */
19287 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19288 altivec_p))
19290 icode = (enum insn_code)sri->icode;
19291 default_p = false;
19292 done_p = true;
19293 ret = NO_REGS;
19297 /* Make sure 0.0 is not reloaded or forced into memory. */
19298 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19300 ret = NO_REGS;
19301 default_p = false;
19302 done_p = true;
19305 /* If this is a scalar floating point value and we want to load it into the
19306 traditional Altivec registers, do it via a move via a traditional floating
19307 point register, unless we have D-form addressing. Also make sure that
19308 non-zero constants use a FPR. */
19309 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19310 && !mode_supports_vmx_dform (mode)
19311 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19312 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
19314 ret = FLOAT_REGS;
19315 default_p = false;
19316 done_p = true;
19319 /* Handle reload of load/stores if we have reload helper functions. */
19320 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19322 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19323 mode);
19325 if (extra_cost >= 0)
19327 done_p = true;
19328 ret = NO_REGS;
19329 if (extra_cost > 0)
19331 sri->extra_cost = extra_cost;
19332 sri->icode = icode;
19337 /* Handle unaligned loads and stores of integer registers. */
19338 if (!done_p && TARGET_POWERPC64
19339 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19340 && memory_p
19341 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19343 rtx addr = XEXP (x, 0);
19344 rtx off = address_offset (addr);
19346 if (off != NULL_RTX)
19348 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19349 unsigned HOST_WIDE_INT offset = INTVAL (off);
19351 /* We need a secondary reload when our legitimate_address_p
19352 says the address is good (as otherwise the entire address
19353 will be reloaded), and the offset is not a multiple of
19354 four or we have an address wrap. Address wrap will only
19355 occur for LO_SUMs since legitimate_offset_address_p
19356 rejects addresses for 16-byte mems that will wrap. */
19357 if (GET_CODE (addr) == LO_SUM
19358 ? (1 /* legitimate_address_p allows any offset for lo_sum */
19359 && ((offset & 3) != 0
19360 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
19361 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
19362 && (offset & 3) != 0))
19364 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
19365 if (in_p)
19366 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
19367 : CODE_FOR_reload_di_load);
19368 else
19369 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
19370 : CODE_FOR_reload_di_store);
19371 sri->extra_cost = 2;
19372 ret = NO_REGS;
19373 done_p = true;
19375 else
19376 default_p = true;
19378 else
19379 default_p = true;
19382 if (!done_p && !TARGET_POWERPC64
19383 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19384 && memory_p
19385 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
19387 rtx addr = XEXP (x, 0);
19388 rtx off = address_offset (addr);
19390 if (off != NULL_RTX)
19392 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19393 unsigned HOST_WIDE_INT offset = INTVAL (off);
19395 /* We need a secondary reload when our legitimate_address_p
19396 says the address is good (as otherwise the entire address
19397 will be reloaded), and we have a wrap.
19399 legitimate_lo_sum_address_p allows LO_SUM addresses to
19400 have any offset so test for wrap in the low 16 bits.
19402 legitimate_offset_address_p checks for the range
19403 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
19404 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
19405 [0x7ff4,0x7fff] respectively, so test for the
19406 intersection of these ranges, [0x7ffc,0x7fff] and
19407 [0x7ff4,0x7ff7] respectively.
19409 Note that the address we see here may have been
19410 manipulated by legitimize_reload_address. */
19411 if (GET_CODE (addr) == LO_SUM
19412 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
19413 : offset - (0x8000 - extra) < UNITS_PER_WORD)
19415 if (in_p)
19416 sri->icode = CODE_FOR_reload_si_load;
19417 else
19418 sri->icode = CODE_FOR_reload_si_store;
19419 sri->extra_cost = 2;
19420 ret = NO_REGS;
19421 done_p = true;
19423 else
19424 default_p = true;
19426 else
19427 default_p = true;
19430 if (!done_p)
19431 default_p = true;
19433 if (default_p)
19434 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
19436 gcc_assert (ret != ALL_REGS);
19438 if (TARGET_DEBUG_ADDR)
19440 fprintf (stderr,
19441 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
19442 "mode = %s",
19443 reg_class_names[ret],
19444 in_p ? "true" : "false",
19445 reg_class_names[rclass],
19446 GET_MODE_NAME (mode));
19448 if (reload_completed)
19449 fputs (", after reload", stderr);
19451 if (!done_p)
19452 fputs (", done_p not set", stderr);
19454 if (default_p)
19455 fputs (", default secondary reload", stderr);
19457 if (sri->icode != CODE_FOR_nothing)
19458 fprintf (stderr, ", reload func = %s, extra cost = %d",
19459 insn_data[sri->icode].name, sri->extra_cost);
19461 else if (sri->extra_cost > 0)
19462 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
19464 fputs ("\n", stderr);
19465 debug_rtx (x);
19468 return ret;
19471 /* Better tracing for rs6000_secondary_reload_inner. */
19473 static void
19474 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
19475 bool store_p)
19477 rtx set, clobber;
19479 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
19481 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
19482 store_p ? "store" : "load");
19484 if (store_p)
19485 set = gen_rtx_SET (mem, reg);
19486 else
19487 set = gen_rtx_SET (reg, mem);
19489 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
19490 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
19493 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
19494 ATTRIBUTE_NORETURN;
19496 static void
19497 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
19498 bool store_p)
19500 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
19501 gcc_unreachable ();
19504 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
19505 reload helper functions. These were identified in
19506 rs6000_secondary_reload_memory, and if reload decided to use the secondary
19507 reload, it calls the insns:
19508 reload_<RELOAD:mode>_<P:mptrsize>_store
19509 reload_<RELOAD:mode>_<P:mptrsize>_load
19511 which in turn calls this function, to do whatever is necessary to create
19512 valid addresses. */
19514 void
19515 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
19517 int regno = true_regnum (reg);
19518 machine_mode mode = GET_MODE (reg);
19519 addr_mask_type addr_mask;
19520 rtx addr;
19521 rtx new_addr;
19522 rtx op_reg, op0, op1;
19523 rtx and_op;
19524 rtx cc_clobber;
19525 rtvec rv;
19527 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
19528 || !base_reg_operand (scratch, GET_MODE (scratch)))
19529 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19531 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
19532 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19534 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
19535 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19537 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
19538 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19540 else
19541 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19543 /* Make sure the mode is valid in this register class. */
19544 if ((addr_mask & RELOAD_REG_VALID) == 0)
19545 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19547 if (TARGET_DEBUG_ADDR)
19548 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
19550 new_addr = addr = XEXP (mem, 0);
19551 switch (GET_CODE (addr))
19553 /* Does the register class support auto update forms for this mode? If
19554 not, do the update now. We don't need a scratch register, since the
19555 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
19556 case PRE_INC:
19557 case PRE_DEC:
19558 op_reg = XEXP (addr, 0);
19559 if (!base_reg_operand (op_reg, Pmode))
19560 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19562 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19564 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
19565 new_addr = op_reg;
19567 break;
19569 case PRE_MODIFY:
19570 op0 = XEXP (addr, 0);
19571 op1 = XEXP (addr, 1);
19572 if (!base_reg_operand (op0, Pmode)
19573 || GET_CODE (op1) != PLUS
19574 || !rtx_equal_p (op0, XEXP (op1, 0)))
19575 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19577 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19579 emit_insn (gen_rtx_SET (op0, op1));
19580 new_addr = reg;
19582 break;
19584 /* Do we need to simulate AND -16 to clear the bottom address bits used
19585 in VMX load/stores? */
19586 case AND:
19587 op0 = XEXP (addr, 0);
19588 op1 = XEXP (addr, 1);
19589 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
19591 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
19592 op_reg = op0;
19594 else if (GET_CODE (op1) == PLUS)
19596 emit_insn (gen_rtx_SET (scratch, op1));
19597 op_reg = scratch;
19600 else
19601 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19603 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
19604 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
19605 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
19606 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
19607 new_addr = scratch;
19609 break;
19611 /* If this is an indirect address, make sure it is a base register. */
19612 case REG:
19613 case SUBREG:
19614 if (!base_reg_operand (addr, GET_MODE (addr)))
19616 emit_insn (gen_rtx_SET (scratch, addr));
19617 new_addr = scratch;
19619 break;
19621 /* If this is an indexed address, make sure the register class can handle
19622 indexed addresses for this mode. */
19623 case PLUS:
19624 op0 = XEXP (addr, 0);
19625 op1 = XEXP (addr, 1);
19626 if (!base_reg_operand (op0, Pmode))
19627 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19629 else if (int_reg_operand (op1, Pmode))
19631 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19633 emit_insn (gen_rtx_SET (scratch, addr));
19634 new_addr = scratch;
19638 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
19640 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
19641 || !quad_address_p (addr, mode, false))
19643 emit_insn (gen_rtx_SET (scratch, addr));
19644 new_addr = scratch;
19648 /* Make sure the register class can handle offset addresses. */
19649 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19651 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19653 emit_insn (gen_rtx_SET (scratch, addr));
19654 new_addr = scratch;
19658 else
19659 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19661 break;
19663 case LO_SUM:
19664 op0 = XEXP (addr, 0);
19665 op1 = XEXP (addr, 1);
19666 if (!base_reg_operand (op0, Pmode))
19667 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19669 else if (int_reg_operand (op1, Pmode))
19671 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19673 emit_insn (gen_rtx_SET (scratch, addr));
19674 new_addr = scratch;
19678 /* Quad offsets are restricted and can't handle normal addresses. */
19679 else if (mode_supports_dq_form (mode))
19681 emit_insn (gen_rtx_SET (scratch, addr));
19682 new_addr = scratch;
19685 /* Make sure the register class can handle offset addresses. */
19686 else if (legitimate_lo_sum_address_p (mode, addr, false))
19688 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19690 emit_insn (gen_rtx_SET (scratch, addr));
19691 new_addr = scratch;
19695 else
19696 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19698 break;
19700 case SYMBOL_REF:
19701 case CONST:
19702 case LABEL_REF:
19703 rs6000_emit_move (scratch, addr, Pmode);
19704 new_addr = scratch;
19705 break;
19707 default:
19708 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
19711 /* Adjust the address if it changed. */
19712 if (addr != new_addr)
19714 mem = replace_equiv_address_nv (mem, new_addr);
19715 if (TARGET_DEBUG_ADDR)
19716 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
19719 /* Now create the move. */
19720 if (store_p)
19721 emit_insn (gen_rtx_SET (mem, reg));
19722 else
19723 emit_insn (gen_rtx_SET (reg, mem));
19725 return;
19728 /* Convert reloads involving 64-bit gprs and misaligned offset
19729 addressing, or multiple 32-bit gprs and offsets that are too large,
19730 to use indirect addressing. */
19732 void
19733 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
19735 int regno = true_regnum (reg);
19736 enum reg_class rclass;
19737 rtx addr;
19738 rtx scratch_or_premodify = scratch;
19740 if (TARGET_DEBUG_ADDR)
19742 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
19743 store_p ? "store" : "load");
19744 fprintf (stderr, "reg:\n");
19745 debug_rtx (reg);
19746 fprintf (stderr, "mem:\n");
19747 debug_rtx (mem);
19748 fprintf (stderr, "scratch:\n");
19749 debug_rtx (scratch);
19752 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
19753 gcc_assert (GET_CODE (mem) == MEM);
19754 rclass = REGNO_REG_CLASS (regno);
19755 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
19756 addr = XEXP (mem, 0);
19758 if (GET_CODE (addr) == PRE_MODIFY)
19760 gcc_assert (REG_P (XEXP (addr, 0))
19761 && GET_CODE (XEXP (addr, 1)) == PLUS
19762 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
19763 scratch_or_premodify = XEXP (addr, 0);
19764 if (!HARD_REGISTER_P (scratch_or_premodify))
19765 /* If we have a pseudo here then reload will have arranged
19766 to have it replaced, but only in the original insn.
19767 Use the replacement here too. */
19768 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
19770 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
19771 expressions from the original insn, without unsharing them.
19772 Any RTL that points into the original insn will of course
19773 have register replacements applied. That is why we don't
19774 need to look for replacements under the PLUS. */
19775 addr = XEXP (addr, 1);
19777 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
19779 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
19781 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
19783 /* Now create the move. */
19784 if (store_p)
19785 emit_insn (gen_rtx_SET (mem, reg));
19786 else
19787 emit_insn (gen_rtx_SET (reg, mem));
19789 return;
19792 /* Given an rtx X being reloaded into a reg required to be
19793 in class CLASS, return the class of reg to actually use.
19794 In general this is just CLASS; but on some machines
19795 in some cases it is preferable to use a more restrictive class.
19797 On the RS/6000, we have to return NO_REGS when we want to reload a
19798 floating-point CONST_DOUBLE to force it to be copied to memory.
19800 We also don't want to reload integer values into floating-point
19801 registers if we can at all help it. In fact, this can
19802 cause reload to die, if it tries to generate a reload of CTR
19803 into a FP register and discovers it doesn't have the memory location
19804 required.
19806 ??? Would it be a good idea to have reload do the converse, that is
19807 try to reload floating modes into FP registers if possible?
19810 static enum reg_class
19811 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
19813 machine_mode mode = GET_MODE (x);
19814 bool is_constant = CONSTANT_P (x);
19816 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
19817 reload class for it. */
19818 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19819 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
19820 return NO_REGS;
19822 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
19823 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
19824 return NO_REGS;
19826 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
19827 the reloading of address expressions using PLUS into floating point
19828 registers. */
19829 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
19831 if (is_constant)
19833 /* Zero is always allowed in all VSX registers. */
19834 if (x == CONST0_RTX (mode))
19835 return rclass;
19837 /* If this is a vector constant that can be formed with a few Altivec
19838 instructions, we want altivec registers. */
19839 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
19840 return ALTIVEC_REGS;
19842 /* If this is an integer constant that can easily be loaded into
19843 vector registers, allow it. */
19844 if (CONST_INT_P (x))
19846 HOST_WIDE_INT value = INTVAL (x);
19848 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
19849 2.06 can generate it in the Altivec registers with
19850 VSPLTI<x>. */
19851 if (value == -1)
19853 if (TARGET_P8_VECTOR)
19854 return rclass;
19855 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
19856 return ALTIVEC_REGS;
19857 else
19858 return NO_REGS;
19861 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
19862 a sign extend in the Altivec registers. */
19863 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
19864 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
19865 return ALTIVEC_REGS;
19868 /* Force constant to memory. */
19869 return NO_REGS;
19872 /* D-form addressing can easily reload the value. */
19873 if (mode_supports_vmx_dform (mode)
19874 || mode_supports_dq_form (mode))
19875 return rclass;
19877 /* If this is a scalar floating point value and we don't have D-form
19878 addressing, prefer the traditional floating point registers so that we
19879 can use D-form (register+offset) addressing. */
19880 if (rclass == VSX_REGS
19881 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
19882 return FLOAT_REGS;
19884 /* Prefer the Altivec registers if Altivec is handling the vector
19885 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
19886 loads. */
19887 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
19888 || mode == V1TImode)
19889 return ALTIVEC_REGS;
19891 return rclass;
19894 if (is_constant || GET_CODE (x) == PLUS)
19896 if (reg_class_subset_p (GENERAL_REGS, rclass))
19897 return GENERAL_REGS;
19898 if (reg_class_subset_p (BASE_REGS, rclass))
19899 return BASE_REGS;
19900 return NO_REGS;
19903 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
19904 return GENERAL_REGS;
19906 return rclass;
19909 /* Debug version of rs6000_preferred_reload_class. */
19910 static enum reg_class
19911 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
19913 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
19915 fprintf (stderr,
19916 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
19917 "mode = %s, x:\n",
19918 reg_class_names[ret], reg_class_names[rclass],
19919 GET_MODE_NAME (GET_MODE (x)));
19920 debug_rtx (x);
19922 return ret;
19925 /* If we are copying between FP or AltiVec registers and anything else, we need
19926 a memory location. The exception is when we are targeting ppc64 and the
19927 move to/from fpr to gpr instructions are available. Also, under VSX, you
19928 can copy vector registers from the FP register set to the Altivec register
19929 set and vice versa. */
19931 static bool
19932 rs6000_secondary_memory_needed (machine_mode mode,
19933 reg_class_t from_class,
19934 reg_class_t to_class)
19936 enum rs6000_reg_type from_type, to_type;
19937 bool altivec_p = ((from_class == ALTIVEC_REGS)
19938 || (to_class == ALTIVEC_REGS));
19940 /* If a simple/direct move is available, we don't need secondary memory */
19941 from_type = reg_class_to_reg_type[(int)from_class];
19942 to_type = reg_class_to_reg_type[(int)to_class];
19944 if (rs6000_secondary_reload_move (to_type, from_type, mode,
19945 (secondary_reload_info *)0, altivec_p))
19946 return false;
19948 /* If we have a floating point or vector register class, we need to use
19949 memory to transfer the data. */
19950 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
19951 return true;
19953 return false;
19956 /* Debug version of rs6000_secondary_memory_needed. */
19957 static bool
19958 rs6000_debug_secondary_memory_needed (machine_mode mode,
19959 reg_class_t from_class,
19960 reg_class_t to_class)
19962 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
19964 fprintf (stderr,
19965 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
19966 "to_class = %s, mode = %s\n",
19967 ret ? "true" : "false",
19968 reg_class_names[from_class],
19969 reg_class_names[to_class],
19970 GET_MODE_NAME (mode));
19972 return ret;
19975 /* Return the register class of a scratch register needed to copy IN into
19976 or out of a register in RCLASS in MODE. If it can be done directly,
19977 NO_REGS is returned. */
19979 static enum reg_class
19980 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
19981 rtx in)
19983 int regno;
19985 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
19986 #if TARGET_MACHO
19987 && MACHOPIC_INDIRECT
19988 #endif
19991 /* We cannot copy a symbolic operand directly into anything
19992 other than BASE_REGS for TARGET_ELF. So indicate that a
19993 register from BASE_REGS is needed as an intermediate
19994 register.
19996 On Darwin, pic addresses require a load from memory, which
19997 needs a base register. */
19998 if (rclass != BASE_REGS
19999 && (GET_CODE (in) == SYMBOL_REF
20000 || GET_CODE (in) == HIGH
20001 || GET_CODE (in) == LABEL_REF
20002 || GET_CODE (in) == CONST))
20003 return BASE_REGS;
20006 if (GET_CODE (in) == REG)
20008 regno = REGNO (in);
20009 if (regno >= FIRST_PSEUDO_REGISTER)
20011 regno = true_regnum (in);
20012 if (regno >= FIRST_PSEUDO_REGISTER)
20013 regno = -1;
20016 else if (GET_CODE (in) == SUBREG)
20018 regno = true_regnum (in);
20019 if (regno >= FIRST_PSEUDO_REGISTER)
20020 regno = -1;
20022 else
20023 regno = -1;
20025 /* If we have VSX register moves, prefer moving scalar values between
20026 Altivec registers and GPR by going via an FPR (and then via memory)
20027 instead of reloading the secondary memory address for Altivec moves. */
20028 if (TARGET_VSX
20029 && GET_MODE_SIZE (mode) < 16
20030 && !mode_supports_vmx_dform (mode)
20031 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
20032 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
20033 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20034 && (regno >= 0 && INT_REGNO_P (regno)))))
20035 return FLOAT_REGS;
20037 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
20038 into anything. */
20039 if (rclass == GENERAL_REGS || rclass == BASE_REGS
20040 || (regno >= 0 && INT_REGNO_P (regno)))
20041 return NO_REGS;
20043 /* Constants, memory, and VSX registers can go into VSX registers (both the
20044 traditional floating point and the altivec registers). */
20045 if (rclass == VSX_REGS
20046 && (regno == -1 || VSX_REGNO_P (regno)))
20047 return NO_REGS;
20049 /* Constants, memory, and FP registers can go into FP registers. */
20050 if ((regno == -1 || FP_REGNO_P (regno))
20051 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
20052 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
20054 /* Memory, and AltiVec registers can go into AltiVec registers. */
20055 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
20056 && rclass == ALTIVEC_REGS)
20057 return NO_REGS;
20059 /* We can copy among the CR registers. */
20060 if ((rclass == CR_REGS || rclass == CR0_REGS)
20061 && regno >= 0 && CR_REGNO_P (regno))
20062 return NO_REGS;
20064 /* Otherwise, we need GENERAL_REGS. */
20065 return GENERAL_REGS;
20068 /* Debug version of rs6000_secondary_reload_class. */
20069 static enum reg_class
20070 rs6000_debug_secondary_reload_class (enum reg_class rclass,
20071 machine_mode mode, rtx in)
20073 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
20074 fprintf (stderr,
20075 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20076 "mode = %s, input rtx:\n",
20077 reg_class_names[ret], reg_class_names[rclass],
20078 GET_MODE_NAME (mode));
20079 debug_rtx (in);
20081 return ret;
20084 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20086 static bool
20087 rs6000_can_change_mode_class (machine_mode from,
20088 machine_mode to,
20089 reg_class_t rclass)
20091 unsigned from_size = GET_MODE_SIZE (from);
20092 unsigned to_size = GET_MODE_SIZE (to);
20094 if (from_size != to_size)
20096 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
20098 if (reg_classes_intersect_p (xclass, rclass))
20100 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
20101 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
20102 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
20103 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
20105 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20106 single register under VSX because the scalar part of the register
20107 is in the upper 64-bits, and not the lower 64-bits. Types like
20108 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20109 IEEE floating point can't overlap, and neither can small
20110 values. */
20112 if (to_float128_vector_p && from_float128_vector_p)
20113 return true;
20115 else if (to_float128_vector_p || from_float128_vector_p)
20116 return false;
20118 /* TDmode in floating-mode registers must always go into a register
20119 pair with the most significant word in the even-numbered register
20120 to match ISA requirements. In little-endian mode, this does not
20121 match subreg numbering, so we cannot allow subregs. */
20122 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20123 return false;
20125 if (from_size < 8 || to_size < 8)
20126 return false;
20128 if (from_size == 8 && (8 * to_nregs) != to_size)
20129 return false;
20131 if (to_size == 8 && (8 * from_nregs) != from_size)
20132 return false;
20134 return true;
20136 else
20137 return true;
20140 /* Since the VSX register set includes traditional floating point registers
20141 and altivec registers, just check for the size being different instead of
20142 trying to check whether the modes are vector modes. Otherwise it won't
20143 allow say DF and DI to change classes. For types like TFmode and TDmode
20144 that take 2 64-bit registers, rather than a single 128-bit register, don't
20145 allow subregs of those types to other 128 bit types. */
20146 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20148 unsigned num_regs = (from_size + 15) / 16;
20149 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
20150 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
20151 return false;
20153 return (from_size == 8 || from_size == 16);
20156 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20157 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20158 return false;
20160 return true;
20163 /* Debug version of rs6000_can_change_mode_class. */
20164 static bool
20165 rs6000_debug_can_change_mode_class (machine_mode from,
20166 machine_mode to,
20167 reg_class_t rclass)
20169 bool ret = rs6000_can_change_mode_class (from, to, rclass);
20171 fprintf (stderr,
20172 "rs6000_can_change_mode_class, return %s, from = %s, "
20173 "to = %s, rclass = %s\n",
20174 ret ? "true" : "false",
20175 GET_MODE_NAME (from), GET_MODE_NAME (to),
20176 reg_class_names[rclass]);
20178 return ret;
20181 /* Return a string to do a move operation of 128 bits of data. */
20183 const char *
20184 rs6000_output_move_128bit (rtx operands[])
20186 rtx dest = operands[0];
20187 rtx src = operands[1];
20188 machine_mode mode = GET_MODE (dest);
20189 int dest_regno;
20190 int src_regno;
20191 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20192 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20194 if (REG_P (dest))
20196 dest_regno = REGNO (dest);
20197 dest_gpr_p = INT_REGNO_P (dest_regno);
20198 dest_fp_p = FP_REGNO_P (dest_regno);
20199 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20200 dest_vsx_p = dest_fp_p | dest_vmx_p;
20202 else
20204 dest_regno = -1;
20205 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20208 if (REG_P (src))
20210 src_regno = REGNO (src);
20211 src_gpr_p = INT_REGNO_P (src_regno);
20212 src_fp_p = FP_REGNO_P (src_regno);
20213 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20214 src_vsx_p = src_fp_p | src_vmx_p;
20216 else
20218 src_regno = -1;
20219 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20222 /* Register moves. */
20223 if (dest_regno >= 0 && src_regno >= 0)
20225 if (dest_gpr_p)
20227 if (src_gpr_p)
20228 return "#";
20230 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20231 return (WORDS_BIG_ENDIAN
20232 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20233 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20235 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20236 return "#";
20239 else if (TARGET_VSX && dest_vsx_p)
20241 if (src_vsx_p)
20242 return "xxlor %x0,%x1,%x1";
20244 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20245 return (WORDS_BIG_ENDIAN
20246 ? "mtvsrdd %x0,%1,%L1"
20247 : "mtvsrdd %x0,%L1,%1");
20249 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20250 return "#";
20253 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20254 return "vor %0,%1,%1";
20256 else if (dest_fp_p && src_fp_p)
20257 return "#";
20260 /* Loads. */
20261 else if (dest_regno >= 0 && MEM_P (src))
20263 if (dest_gpr_p)
20265 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20266 return "lq %0,%1";
20267 else
20268 return "#";
20271 else if (TARGET_ALTIVEC && dest_vmx_p
20272 && altivec_indexed_or_indirect_operand (src, mode))
20273 return "lvx %0,%y1";
20275 else if (TARGET_VSX && dest_vsx_p)
20277 if (mode_supports_dq_form (mode)
20278 && quad_address_p (XEXP (src, 0), mode, true))
20279 return "lxv %x0,%1";
20281 else if (TARGET_P9_VECTOR)
20282 return "lxvx %x0,%y1";
20284 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20285 return "lxvw4x %x0,%y1";
20287 else
20288 return "lxvd2x %x0,%y1";
20291 else if (TARGET_ALTIVEC && dest_vmx_p)
20292 return "lvx %0,%y1";
20294 else if (dest_fp_p)
20295 return "#";
20298 /* Stores. */
20299 else if (src_regno >= 0 && MEM_P (dest))
20301 if (src_gpr_p)
20303 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20304 return "stq %1,%0";
20305 else
20306 return "#";
20309 else if (TARGET_ALTIVEC && src_vmx_p
20310 && altivec_indexed_or_indirect_operand (dest, mode))
20311 return "stvx %1,%y0";
20313 else if (TARGET_VSX && src_vsx_p)
20315 if (mode_supports_dq_form (mode)
20316 && quad_address_p (XEXP (dest, 0), mode, true))
20317 return "stxv %x1,%0";
20319 else if (TARGET_P9_VECTOR)
20320 return "stxvx %x1,%y0";
20322 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20323 return "stxvw4x %x1,%y0";
20325 else
20326 return "stxvd2x %x1,%y0";
20329 else if (TARGET_ALTIVEC && src_vmx_p)
20330 return "stvx %1,%y0";
20332 else if (src_fp_p)
20333 return "#";
20336 /* Constants. */
20337 else if (dest_regno >= 0
20338 && (GET_CODE (src) == CONST_INT
20339 || GET_CODE (src) == CONST_WIDE_INT
20340 || GET_CODE (src) == CONST_DOUBLE
20341 || GET_CODE (src) == CONST_VECTOR))
20343 if (dest_gpr_p)
20344 return "#";
20346 else if ((dest_vmx_p && TARGET_ALTIVEC)
20347 || (dest_vsx_p && TARGET_VSX))
20348 return output_vec_const_move (operands);
20351 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
20354 /* Validate a 128-bit move. */
20355 bool
20356 rs6000_move_128bit_ok_p (rtx operands[])
20358 machine_mode mode = GET_MODE (operands[0]);
20359 return (gpc_reg_operand (operands[0], mode)
20360 || gpc_reg_operand (operands[1], mode));
20363 /* Return true if a 128-bit move needs to be split. */
20364 bool
20365 rs6000_split_128bit_ok_p (rtx operands[])
20367 if (!reload_completed)
20368 return false;
20370 if (!gpr_or_gpr_p (operands[0], operands[1]))
20371 return false;
20373 if (quad_load_store_p (operands[0], operands[1]))
20374 return false;
20376 return true;
20380 /* Given a comparison operation, return the bit number in CCR to test. We
20381 know this is a valid comparison.
20383 SCC_P is 1 if this is for an scc. That means that %D will have been
20384 used instead of %C, so the bits will be in different places.
20386 Return -1 if OP isn't a valid comparison for some reason. */
20389 ccr_bit (rtx op, int scc_p)
20391 enum rtx_code code = GET_CODE (op);
20392 machine_mode cc_mode;
20393 int cc_regnum;
20394 int base_bit;
20395 rtx reg;
20397 if (!COMPARISON_P (op))
20398 return -1;
20400 reg = XEXP (op, 0);
20402 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
20404 cc_mode = GET_MODE (reg);
20405 cc_regnum = REGNO (reg);
20406 base_bit = 4 * (cc_regnum - CR0_REGNO);
20408 validate_condition_mode (code, cc_mode);
20410 /* When generating a sCOND operation, only positive conditions are
20411 allowed. */
20412 gcc_assert (!scc_p
20413 || code == EQ || code == GT || code == LT || code == UNORDERED
20414 || code == GTU || code == LTU);
20416 switch (code)
20418 case NE:
20419 return scc_p ? base_bit + 3 : base_bit + 2;
20420 case EQ:
20421 return base_bit + 2;
20422 case GT: case GTU: case UNLE:
20423 return base_bit + 1;
20424 case LT: case LTU: case UNGE:
20425 return base_bit;
20426 case ORDERED: case UNORDERED:
20427 return base_bit + 3;
20429 case GE: case GEU:
20430 /* If scc, we will have done a cror to put the bit in the
20431 unordered position. So test that bit. For integer, this is ! LT
20432 unless this is an scc insn. */
20433 return scc_p ? base_bit + 3 : base_bit;
20435 case LE: case LEU:
20436 return scc_p ? base_bit + 3 : base_bit + 1;
20438 default:
20439 gcc_unreachable ();
20443 /* Return the GOT register. */
20446 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
20448 /* The second flow pass currently (June 1999) can't update
20449 regs_ever_live without disturbing other parts of the compiler, so
20450 update it here to make the prolog/epilogue code happy. */
20451 if (!can_create_pseudo_p ()
20452 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20453 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
20455 crtl->uses_pic_offset_table = 1;
20457 return pic_offset_table_rtx;
20460 static rs6000_stack_t stack_info;
20462 /* Function to init struct machine_function.
20463 This will be called, via a pointer variable,
20464 from push_function_context. */
20466 static struct machine_function *
20467 rs6000_init_machine_status (void)
20469 stack_info.reload_completed = 0;
20470 return ggc_cleared_alloc<machine_function> ();
20473 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
20475 /* Write out a function code label. */
20477 void
20478 rs6000_output_function_entry (FILE *file, const char *fname)
20480 if (fname[0] != '.')
20482 switch (DEFAULT_ABI)
20484 default:
20485 gcc_unreachable ();
20487 case ABI_AIX:
20488 if (DOT_SYMBOLS)
20489 putc ('.', file);
20490 else
20491 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
20492 break;
20494 case ABI_ELFv2:
20495 case ABI_V4:
20496 case ABI_DARWIN:
20497 break;
20501 RS6000_OUTPUT_BASENAME (file, fname);
20504 /* Print an operand. Recognize special options, documented below. */
20506 #if TARGET_ELF
20507 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
20508 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
20509 #else
20510 #define SMALL_DATA_RELOC "sda21"
20511 #define SMALL_DATA_REG 0
20512 #endif
20514 void
20515 print_operand (FILE *file, rtx x, int code)
20517 int i;
20518 unsigned HOST_WIDE_INT uval;
20520 switch (code)
20522 /* %a is output_address. */
20524 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
20525 output_operand. */
20527 case 'D':
20528 /* Like 'J' but get to the GT bit only. */
20529 gcc_assert (REG_P (x));
20531 /* Bit 1 is GT bit. */
20532 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
20534 /* Add one for shift count in rlinm for scc. */
20535 fprintf (file, "%d", i + 1);
20536 return;
20538 case 'e':
20539 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
20540 if (! INT_P (x))
20542 output_operand_lossage ("invalid %%e value");
20543 return;
20546 uval = INTVAL (x);
20547 if ((uval & 0xffff) == 0 && uval != 0)
20548 putc ('s', file);
20549 return;
20551 case 'E':
20552 /* X is a CR register. Print the number of the EQ bit of the CR */
20553 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20554 output_operand_lossage ("invalid %%E value");
20555 else
20556 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
20557 return;
20559 case 'f':
20560 /* X is a CR register. Print the shift count needed to move it
20561 to the high-order four bits. */
20562 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20563 output_operand_lossage ("invalid %%f value");
20564 else
20565 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
20566 return;
20568 case 'F':
20569 /* Similar, but print the count for the rotate in the opposite
20570 direction. */
20571 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20572 output_operand_lossage ("invalid %%F value");
20573 else
20574 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
20575 return;
20577 case 'G':
20578 /* X is a constant integer. If it is negative, print "m",
20579 otherwise print "z". This is to make an aze or ame insn. */
20580 if (GET_CODE (x) != CONST_INT)
20581 output_operand_lossage ("invalid %%G value");
20582 else if (INTVAL (x) >= 0)
20583 putc ('z', file);
20584 else
20585 putc ('m', file);
20586 return;
20588 case 'h':
20589 /* If constant, output low-order five bits. Otherwise, write
20590 normally. */
20591 if (INT_P (x))
20592 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
20593 else
20594 print_operand (file, x, 0);
20595 return;
20597 case 'H':
20598 /* If constant, output low-order six bits. Otherwise, write
20599 normally. */
20600 if (INT_P (x))
20601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
20602 else
20603 print_operand (file, x, 0);
20604 return;
20606 case 'I':
20607 /* Print `i' if this is a constant, else nothing. */
20608 if (INT_P (x))
20609 putc ('i', file);
20610 return;
20612 case 'j':
20613 /* Write the bit number in CCR for jump. */
20614 i = ccr_bit (x, 0);
20615 if (i == -1)
20616 output_operand_lossage ("invalid %%j code");
20617 else
20618 fprintf (file, "%d", i);
20619 return;
20621 case 'J':
20622 /* Similar, but add one for shift count in rlinm for scc and pass
20623 scc flag to `ccr_bit'. */
20624 i = ccr_bit (x, 1);
20625 if (i == -1)
20626 output_operand_lossage ("invalid %%J code");
20627 else
20628 /* If we want bit 31, write a shift count of zero, not 32. */
20629 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20630 return;
20632 case 'k':
20633 /* X must be a constant. Write the 1's complement of the
20634 constant. */
20635 if (! INT_P (x))
20636 output_operand_lossage ("invalid %%k value");
20637 else
20638 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
20639 return;
20641 case 'K':
20642 /* X must be a symbolic constant on ELF. Write an
20643 expression suitable for an 'addi' that adds in the low 16
20644 bits of the MEM. */
20645 if (GET_CODE (x) == CONST)
20647 if (GET_CODE (XEXP (x, 0)) != PLUS
20648 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
20649 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
20650 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
20651 output_operand_lossage ("invalid %%K value");
20653 print_operand_address (file, x);
20654 fputs ("@l", file);
20655 return;
20657 /* %l is output_asm_label. */
20659 case 'L':
20660 /* Write second word of DImode or DFmode reference. Works on register
20661 or non-indexed memory only. */
20662 if (REG_P (x))
20663 fputs (reg_names[REGNO (x) + 1], file);
20664 else if (MEM_P (x))
20666 machine_mode mode = GET_MODE (x);
20667 /* Handle possible auto-increment. Since it is pre-increment and
20668 we have already done it, we can just use an offset of word. */
20669 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20670 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20671 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20672 UNITS_PER_WORD));
20673 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20674 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
20675 UNITS_PER_WORD));
20676 else
20677 output_address (mode, XEXP (adjust_address_nv (x, SImode,
20678 UNITS_PER_WORD),
20679 0));
20681 if (small_data_operand (x, GET_MODE (x)))
20682 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20683 reg_names[SMALL_DATA_REG]);
20685 return;
20687 case 'N': /* Unused */
20688 /* Write the number of elements in the vector times 4. */
20689 if (GET_CODE (x) != PARALLEL)
20690 output_operand_lossage ("invalid %%N value");
20691 else
20692 fprintf (file, "%d", XVECLEN (x, 0) * 4);
20693 return;
20695 case 'O': /* Unused */
20696 /* Similar, but subtract 1 first. */
20697 if (GET_CODE (x) != PARALLEL)
20698 output_operand_lossage ("invalid %%O value");
20699 else
20700 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
20701 return;
20703 case 'p':
20704 /* X is a CONST_INT that is a power of two. Output the logarithm. */
20705 if (! INT_P (x)
20706 || INTVAL (x) < 0
20707 || (i = exact_log2 (INTVAL (x))) < 0)
20708 output_operand_lossage ("invalid %%p value");
20709 else
20710 fprintf (file, "%d", i);
20711 return;
20713 case 'P':
20714 /* The operand must be an indirect memory reference. The result
20715 is the register name. */
20716 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
20717 || REGNO (XEXP (x, 0)) >= 32)
20718 output_operand_lossage ("invalid %%P value");
20719 else
20720 fputs (reg_names[REGNO (XEXP (x, 0))], file);
20721 return;
20723 case 'q':
20724 /* This outputs the logical code corresponding to a boolean
20725 expression. The expression may have one or both operands
20726 negated (if one, only the first one). For condition register
20727 logical operations, it will also treat the negated
20728 CR codes as NOTs, but not handle NOTs of them. */
20730 const char *const *t = 0;
20731 const char *s;
20732 enum rtx_code code = GET_CODE (x);
20733 static const char * const tbl[3][3] = {
20734 { "and", "andc", "nor" },
20735 { "or", "orc", "nand" },
20736 { "xor", "eqv", "xor" } };
20738 if (code == AND)
20739 t = tbl[0];
20740 else if (code == IOR)
20741 t = tbl[1];
20742 else if (code == XOR)
20743 t = tbl[2];
20744 else
20745 output_operand_lossage ("invalid %%q value");
20747 if (GET_CODE (XEXP (x, 0)) != NOT)
20748 s = t[0];
20749 else
20751 if (GET_CODE (XEXP (x, 1)) == NOT)
20752 s = t[2];
20753 else
20754 s = t[1];
20757 fputs (s, file);
20759 return;
20761 case 'Q':
20762 if (! TARGET_MFCRF)
20763 return;
20764 fputc (',', file);
20765 /* FALLTHRU */
20767 case 'R':
20768 /* X is a CR register. Print the mask for `mtcrf'. */
20769 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
20770 output_operand_lossage ("invalid %%R value");
20771 else
20772 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
20773 return;
20775 case 's':
20776 /* Low 5 bits of 32 - value */
20777 if (! INT_P (x))
20778 output_operand_lossage ("invalid %%s value");
20779 else
20780 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
20781 return;
20783 case 't':
20784 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
20785 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
20787 /* Bit 3 is OV bit. */
20788 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
20790 /* If we want bit 31, write a shift count of zero, not 32. */
20791 fprintf (file, "%d", i == 31 ? 0 : i + 1);
20792 return;
20794 case 'T':
20795 /* Print the symbolic name of a branch target register. */
20796 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
20797 && REGNO (x) != CTR_REGNO))
20798 output_operand_lossage ("invalid %%T value");
20799 else if (REGNO (x) == LR_REGNO)
20800 fputs ("lr", file);
20801 else
20802 fputs ("ctr", file);
20803 return;
20805 case 'u':
20806 /* High-order or low-order 16 bits of constant, whichever is non-zero,
20807 for use in unsigned operand. */
20808 if (! INT_P (x))
20810 output_operand_lossage ("invalid %%u value");
20811 return;
20814 uval = INTVAL (x);
20815 if ((uval & 0xffff) == 0)
20816 uval >>= 16;
20818 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
20819 return;
20821 case 'v':
20822 /* High-order 16 bits of constant for use in signed operand. */
20823 if (! INT_P (x))
20824 output_operand_lossage ("invalid %%v value");
20825 else
20826 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
20827 (INTVAL (x) >> 16) & 0xffff);
20828 return;
20830 case 'U':
20831 /* Print `u' if this has an auto-increment or auto-decrement. */
20832 if (MEM_P (x)
20833 && (GET_CODE (XEXP (x, 0)) == PRE_INC
20834 || GET_CODE (XEXP (x, 0)) == PRE_DEC
20835 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
20836 putc ('u', file);
20837 return;
20839 case 'V':
20840 /* Print the trap code for this operand. */
20841 switch (GET_CODE (x))
20843 case EQ:
20844 fputs ("eq", file); /* 4 */
20845 break;
20846 case NE:
20847 fputs ("ne", file); /* 24 */
20848 break;
20849 case LT:
20850 fputs ("lt", file); /* 16 */
20851 break;
20852 case LE:
20853 fputs ("le", file); /* 20 */
20854 break;
20855 case GT:
20856 fputs ("gt", file); /* 8 */
20857 break;
20858 case GE:
20859 fputs ("ge", file); /* 12 */
20860 break;
20861 case LTU:
20862 fputs ("llt", file); /* 2 */
20863 break;
20864 case LEU:
20865 fputs ("lle", file); /* 6 */
20866 break;
20867 case GTU:
20868 fputs ("lgt", file); /* 1 */
20869 break;
20870 case GEU:
20871 fputs ("lge", file); /* 5 */
20872 break;
20873 default:
20874 gcc_unreachable ();
20876 break;
20878 case 'w':
20879 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
20880 normally. */
20881 if (INT_P (x))
20882 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
20883 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
20884 else
20885 print_operand (file, x, 0);
20886 return;
20888 case 'x':
20889 /* X is a FPR or Altivec register used in a VSX context. */
20890 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
20891 output_operand_lossage ("invalid %%x value");
20892 else
20894 int reg = REGNO (x);
20895 int vsx_reg = (FP_REGNO_P (reg)
20896 ? reg - 32
20897 : reg - FIRST_ALTIVEC_REGNO + 32);
20899 #ifdef TARGET_REGNAMES
20900 if (TARGET_REGNAMES)
20901 fprintf (file, "%%vs%d", vsx_reg);
20902 else
20903 #endif
20904 fprintf (file, "%d", vsx_reg);
20906 return;
20908 case 'X':
20909 if (MEM_P (x)
20910 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
20911 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
20912 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
20913 putc ('x', file);
20914 return;
20916 case 'Y':
20917 /* Like 'L', for third word of TImode/PTImode */
20918 if (REG_P (x))
20919 fputs (reg_names[REGNO (x) + 2], file);
20920 else if (MEM_P (x))
20922 machine_mode mode = GET_MODE (x);
20923 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20924 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20925 output_address (mode, plus_constant (Pmode,
20926 XEXP (XEXP (x, 0), 0), 8));
20927 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20928 output_address (mode, plus_constant (Pmode,
20929 XEXP (XEXP (x, 0), 0), 8));
20930 else
20931 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
20932 if (small_data_operand (x, GET_MODE (x)))
20933 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20934 reg_names[SMALL_DATA_REG]);
20936 return;
20938 case 'z':
20939 /* X is a SYMBOL_REF. Write out the name preceded by a
20940 period and without any trailing data in brackets. Used for function
20941 names. If we are configured for System V (or the embedded ABI) on
20942 the PowerPC, do not emit the period, since those systems do not use
20943 TOCs and the like. */
20944 gcc_assert (GET_CODE (x) == SYMBOL_REF);
20946 /* For macho, check to see if we need a stub. */
20947 if (TARGET_MACHO)
20949 const char *name = XSTR (x, 0);
20950 #if TARGET_MACHO
20951 if (darwin_emit_branch_islands
20952 && MACHOPIC_INDIRECT
20953 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
20954 name = machopic_indirection_name (x, /*stub_p=*/true);
20955 #endif
20956 assemble_name (file, name);
20958 else if (!DOT_SYMBOLS)
20959 assemble_name (file, XSTR (x, 0));
20960 else
20961 rs6000_output_function_entry (file, XSTR (x, 0));
20962 return;
20964 case 'Z':
20965 /* Like 'L', for last word of TImode/PTImode. */
20966 if (REG_P (x))
20967 fputs (reg_names[REGNO (x) + 3], file);
20968 else if (MEM_P (x))
20970 machine_mode mode = GET_MODE (x);
20971 if (GET_CODE (XEXP (x, 0)) == PRE_INC
20972 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
20973 output_address (mode, plus_constant (Pmode,
20974 XEXP (XEXP (x, 0), 0), 12));
20975 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
20976 output_address (mode, plus_constant (Pmode,
20977 XEXP (XEXP (x, 0), 0), 12));
20978 else
20979 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
20980 if (small_data_operand (x, GET_MODE (x)))
20981 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
20982 reg_names[SMALL_DATA_REG]);
20984 return;
20986 /* Print AltiVec memory operand. */
20987 case 'y':
20989 rtx tmp;
20991 gcc_assert (MEM_P (x));
20993 tmp = XEXP (x, 0);
20995 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
20996 && GET_CODE (tmp) == AND
20997 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
20998 && INTVAL (XEXP (tmp, 1)) == -16)
20999 tmp = XEXP (tmp, 0);
21000 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
21001 && GET_CODE (tmp) == PRE_MODIFY)
21002 tmp = XEXP (tmp, 1);
21003 if (REG_P (tmp))
21004 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
21005 else
21007 if (GET_CODE (tmp) != PLUS
21008 || !REG_P (XEXP (tmp, 0))
21009 || !REG_P (XEXP (tmp, 1)))
21011 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
21012 break;
21015 if (REGNO (XEXP (tmp, 0)) == 0)
21016 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
21017 reg_names[ REGNO (XEXP (tmp, 0)) ]);
21018 else
21019 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
21020 reg_names[ REGNO (XEXP (tmp, 1)) ]);
21022 break;
21025 case 0:
21026 if (REG_P (x))
21027 fprintf (file, "%s", reg_names[REGNO (x)]);
21028 else if (MEM_P (x))
21030 /* We need to handle PRE_INC and PRE_DEC here, since we need to
21031 know the width from the mode. */
21032 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
21033 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
21034 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21035 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
21036 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
21037 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21038 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21039 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
21040 else
21041 output_address (GET_MODE (x), XEXP (x, 0));
21043 else
21045 if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21046 /* This hack along with a corresponding hack in
21047 rs6000_output_addr_const_extra arranges to output addends
21048 where the assembler expects to find them. eg.
21049 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21050 without this hack would be output as "x@toc+4". We
21051 want "x+4@toc". */
21052 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21053 else
21054 output_addr_const (file, x);
21056 return;
21058 case '&':
21059 if (const char *name = get_some_local_dynamic_name ())
21060 assemble_name (file, name);
21061 else
21062 output_operand_lossage ("'%%&' used without any "
21063 "local dynamic TLS references");
21064 return;
21066 default:
21067 output_operand_lossage ("invalid %%xn code");
21071 /* Print the address of an operand. */
21073 void
21074 print_operand_address (FILE *file, rtx x)
21076 if (REG_P (x))
21077 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
21078 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
21079 || GET_CODE (x) == LABEL_REF)
21081 output_addr_const (file, x);
21082 if (small_data_operand (x, GET_MODE (x)))
21083 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21084 reg_names[SMALL_DATA_REG]);
21085 else
21086 gcc_assert (!TARGET_TOC);
21088 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21089 && REG_P (XEXP (x, 1)))
21091 if (REGNO (XEXP (x, 0)) == 0)
21092 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21093 reg_names[ REGNO (XEXP (x, 0)) ]);
21094 else
21095 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21096 reg_names[ REGNO (XEXP (x, 1)) ]);
21098 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21099 && GET_CODE (XEXP (x, 1)) == CONST_INT)
21100 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21101 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21102 #if TARGET_MACHO
21103 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21104 && CONSTANT_P (XEXP (x, 1)))
21106 fprintf (file, "lo16(");
21107 output_addr_const (file, XEXP (x, 1));
21108 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21110 #endif
21111 #if TARGET_ELF
21112 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21113 && CONSTANT_P (XEXP (x, 1)))
21115 output_addr_const (file, XEXP (x, 1));
21116 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21118 #endif
21119 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21121 /* This hack along with a corresponding hack in
21122 rs6000_output_addr_const_extra arranges to output addends
21123 where the assembler expects to find them. eg.
21124 (lo_sum (reg 9)
21125 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21126 without this hack would be output as "x@toc+8@l(9)". We
21127 want "x+8@toc@l(9)". */
21128 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21129 if (GET_CODE (x) == LO_SUM)
21130 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21131 else
21132 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21134 else
21135 gcc_unreachable ();
21138 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21140 static bool
21141 rs6000_output_addr_const_extra (FILE *file, rtx x)
21143 if (GET_CODE (x) == UNSPEC)
21144 switch (XINT (x, 1))
21146 case UNSPEC_TOCREL:
21147 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
21148 && REG_P (XVECEXP (x, 0, 1))
21149 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21150 output_addr_const (file, XVECEXP (x, 0, 0));
21151 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21153 if (INTVAL (tocrel_offset_oac) >= 0)
21154 fprintf (file, "+");
21155 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21157 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21159 putc ('-', file);
21160 assemble_name (file, toc_label_name);
21161 need_toc_init = 1;
21163 else if (TARGET_ELF)
21164 fputs ("@toc", file);
21165 return true;
21167 #if TARGET_MACHO
21168 case UNSPEC_MACHOPIC_OFFSET:
21169 output_addr_const (file, XVECEXP (x, 0, 0));
21170 putc ('-', file);
21171 machopic_output_function_base_name (file);
21172 return true;
21173 #endif
21175 return false;
21178 /* Target hook for assembling integer objects. The PowerPC version has
21179 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21180 is defined. It also needs to handle DI-mode objects on 64-bit
21181 targets. */
21183 static bool
21184 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21186 #ifdef RELOCATABLE_NEEDS_FIXUP
21187 /* Special handling for SI values. */
21188 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21190 static int recurse = 0;
21192 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21193 the .fixup section. Since the TOC section is already relocated, we
21194 don't need to mark it here. We used to skip the text section, but it
21195 should never be valid for relocated addresses to be placed in the text
21196 section. */
21197 if (DEFAULT_ABI == ABI_V4
21198 && (TARGET_RELOCATABLE || flag_pic > 1)
21199 && in_section != toc_section
21200 && !recurse
21201 && !CONST_SCALAR_INT_P (x)
21202 && CONSTANT_P (x))
21204 char buf[256];
21206 recurse = 1;
21207 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21208 fixuplabelno++;
21209 ASM_OUTPUT_LABEL (asm_out_file, buf);
21210 fprintf (asm_out_file, "\t.long\t(");
21211 output_addr_const (asm_out_file, x);
21212 fprintf (asm_out_file, ")@fixup\n");
21213 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21214 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21215 fprintf (asm_out_file, "\t.long\t");
21216 assemble_name (asm_out_file, buf);
21217 fprintf (asm_out_file, "\n\t.previous\n");
21218 recurse = 0;
21219 return true;
21221 /* Remove initial .'s to turn a -mcall-aixdesc function
21222 address into the address of the descriptor, not the function
21223 itself. */
21224 else if (GET_CODE (x) == SYMBOL_REF
21225 && XSTR (x, 0)[0] == '.'
21226 && DEFAULT_ABI == ABI_AIX)
21228 const char *name = XSTR (x, 0);
21229 while (*name == '.')
21230 name++;
21232 fprintf (asm_out_file, "\t.long\t%s\n", name);
21233 return true;
21236 #endif /* RELOCATABLE_NEEDS_FIXUP */
21237 return default_assemble_integer (x, size, aligned_p);
21240 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21241 /* Emit an assembler directive to set symbol visibility for DECL to
21242 VISIBILITY_TYPE. */
21244 static void
21245 rs6000_assemble_visibility (tree decl, int vis)
21247 if (TARGET_XCOFF)
21248 return;
21250 /* Functions need to have their entry point symbol visibility set as
21251 well as their descriptor symbol visibility. */
21252 if (DEFAULT_ABI == ABI_AIX
21253 && DOT_SYMBOLS
21254 && TREE_CODE (decl) == FUNCTION_DECL)
21256 static const char * const visibility_types[] = {
21257 NULL, "protected", "hidden", "internal"
21260 const char *name, *type;
21262 name = ((* targetm.strip_name_encoding)
21263 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21264 type = visibility_types[vis];
21266 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21267 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21269 else
21270 default_assemble_visibility (decl, vis);
21272 #endif
21274 enum rtx_code
21275 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21277 /* Reversal of FP compares takes care -- an ordered compare
21278 becomes an unordered compare and vice versa. */
21279 if (mode == CCFPmode
21280 && (!flag_finite_math_only
21281 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21282 || code == UNEQ || code == LTGT))
21283 return reverse_condition_maybe_unordered (code);
21284 else
21285 return reverse_condition (code);
21288 /* Generate a compare for CODE. Return a brand-new rtx that
21289 represents the result of the compare. */
21291 static rtx
21292 rs6000_generate_compare (rtx cmp, machine_mode mode)
21294 machine_mode comp_mode;
21295 rtx compare_result;
21296 enum rtx_code code = GET_CODE (cmp);
21297 rtx op0 = XEXP (cmp, 0);
21298 rtx op1 = XEXP (cmp, 1);
21300 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21301 comp_mode = CCmode;
21302 else if (FLOAT_MODE_P (mode))
21303 comp_mode = CCFPmode;
21304 else if (code == GTU || code == LTU
21305 || code == GEU || code == LEU)
21306 comp_mode = CCUNSmode;
21307 else if ((code == EQ || code == NE)
21308 && unsigned_reg_p (op0)
21309 && (unsigned_reg_p (op1)
21310 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21311 /* These are unsigned values, perhaps there will be a later
21312 ordering compare that can be shared with this one. */
21313 comp_mode = CCUNSmode;
21314 else
21315 comp_mode = CCmode;
21317 /* If we have an unsigned compare, make sure we don't have a signed value as
21318 an immediate. */
21319 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
21320 && INTVAL (op1) < 0)
21322 op0 = copy_rtx_if_shared (op0);
21323 op1 = force_reg (GET_MODE (op0), op1);
21324 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21327 /* First, the compare. */
21328 compare_result = gen_reg_rtx (comp_mode);
21330 /* IEEE 128-bit support in VSX registers when we do not have hardware
21331 support. */
21332 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21334 rtx libfunc = NULL_RTX;
21335 bool check_nan = false;
21336 rtx dest;
21338 switch (code)
21340 case EQ:
21341 case NE:
21342 libfunc = optab_libfunc (eq_optab, mode);
21343 break;
21345 case GT:
21346 case GE:
21347 libfunc = optab_libfunc (ge_optab, mode);
21348 break;
21350 case LT:
21351 case LE:
21352 libfunc = optab_libfunc (le_optab, mode);
21353 break;
21355 case UNORDERED:
21356 case ORDERED:
21357 libfunc = optab_libfunc (unord_optab, mode);
21358 code = (code == UNORDERED) ? NE : EQ;
21359 break;
21361 case UNGE:
21362 case UNGT:
21363 check_nan = true;
21364 libfunc = optab_libfunc (ge_optab, mode);
21365 code = (code == UNGE) ? GE : GT;
21366 break;
21368 case UNLE:
21369 case UNLT:
21370 check_nan = true;
21371 libfunc = optab_libfunc (le_optab, mode);
21372 code = (code == UNLE) ? LE : LT;
21373 break;
21375 case UNEQ:
21376 case LTGT:
21377 check_nan = true;
21378 libfunc = optab_libfunc (eq_optab, mode);
21379 code = (code = UNEQ) ? EQ : NE;
21380 break;
21382 default:
21383 gcc_unreachable ();
21386 gcc_assert (libfunc);
21388 if (!check_nan)
21389 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21390 SImode, op0, mode, op1, mode);
21392 /* The library signals an exception for signalling NaNs, so we need to
21393 handle isgreater, etc. by first checking isordered. */
21394 else
21396 rtx ne_rtx, normal_dest, unord_dest;
21397 rtx unord_func = optab_libfunc (unord_optab, mode);
21398 rtx join_label = gen_label_rtx ();
21399 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
21400 rtx unord_cmp = gen_reg_rtx (comp_mode);
21403 /* Test for either value being a NaN. */
21404 gcc_assert (unord_func);
21405 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
21406 SImode, op0, mode, op1, mode);
21408 /* Set value (0) if either value is a NaN, and jump to the join
21409 label. */
21410 dest = gen_reg_rtx (SImode);
21411 emit_move_insn (dest, const1_rtx);
21412 emit_insn (gen_rtx_SET (unord_cmp,
21413 gen_rtx_COMPARE (comp_mode, unord_dest,
21414 const0_rtx)));
21416 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
21417 emit_jump_insn (gen_rtx_SET (pc_rtx,
21418 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
21419 join_ref,
21420 pc_rtx)));
21422 /* Do the normal comparison, knowing that the values are not
21423 NaNs. */
21424 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
21425 SImode, op0, mode, op1, mode);
21427 emit_insn (gen_cstoresi4 (dest,
21428 gen_rtx_fmt_ee (code, SImode, normal_dest,
21429 const0_rtx),
21430 normal_dest, const0_rtx));
21432 /* Join NaN and non-Nan paths. Compare dest against 0. */
21433 emit_label (join_label);
21434 code = NE;
21437 emit_insn (gen_rtx_SET (compare_result,
21438 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
21441 else
21443 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
21444 CLOBBERs to match cmptf_internal2 pattern. */
21445 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
21446 && FLOAT128_IBM_P (GET_MODE (op0))
21447 && TARGET_HARD_FLOAT)
21448 emit_insn (gen_rtx_PARALLEL (VOIDmode,
21449 gen_rtvec (10,
21450 gen_rtx_SET (compare_result,
21451 gen_rtx_COMPARE (comp_mode, op0, op1)),
21452 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21453 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21454 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21455 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21456 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21457 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21458 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21459 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
21460 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
21461 else if (GET_CODE (op1) == UNSPEC
21462 && XINT (op1, 1) == UNSPEC_SP_TEST)
21464 rtx op1b = XVECEXP (op1, 0, 0);
21465 comp_mode = CCEQmode;
21466 compare_result = gen_reg_rtx (CCEQmode);
21467 if (TARGET_64BIT)
21468 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
21469 else
21470 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
21472 else
21473 emit_insn (gen_rtx_SET (compare_result,
21474 gen_rtx_COMPARE (comp_mode, op0, op1)));
21477 /* Some kinds of FP comparisons need an OR operation;
21478 under flag_finite_math_only we don't bother. */
21479 if (FLOAT_MODE_P (mode)
21480 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
21481 && !flag_finite_math_only
21482 && (code == LE || code == GE
21483 || code == UNEQ || code == LTGT
21484 || code == UNGT || code == UNLT))
21486 enum rtx_code or1, or2;
21487 rtx or1_rtx, or2_rtx, compare2_rtx;
21488 rtx or_result = gen_reg_rtx (CCEQmode);
21490 switch (code)
21492 case LE: or1 = LT; or2 = EQ; break;
21493 case GE: or1 = GT; or2 = EQ; break;
21494 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
21495 case LTGT: or1 = LT; or2 = GT; break;
21496 case UNGT: or1 = UNORDERED; or2 = GT; break;
21497 case UNLT: or1 = UNORDERED; or2 = LT; break;
21498 default: gcc_unreachable ();
21500 validate_condition_mode (or1, comp_mode);
21501 validate_condition_mode (or2, comp_mode);
21502 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
21503 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
21504 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
21505 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
21506 const_true_rtx);
21507 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
21509 compare_result = or_result;
21510 code = EQ;
21513 validate_condition_mode (code, GET_MODE (compare_result));
21515 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
21519 /* Return the diagnostic message string if the binary operation OP is
21520 not permitted on TYPE1 and TYPE2, NULL otherwise. */
21522 static const char*
21523 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
21524 const_tree type1,
21525 const_tree type2)
21527 machine_mode mode1 = TYPE_MODE (type1);
21528 machine_mode mode2 = TYPE_MODE (type2);
21530 /* For complex modes, use the inner type. */
21531 if (COMPLEX_MODE_P (mode1))
21532 mode1 = GET_MODE_INNER (mode1);
21534 if (COMPLEX_MODE_P (mode2))
21535 mode2 = GET_MODE_INNER (mode2);
21537 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
21538 double to intermix unless -mfloat128-convert. */
21539 if (mode1 == mode2)
21540 return NULL;
21542 if (!TARGET_FLOAT128_CVT)
21544 if ((mode1 == KFmode && mode2 == IFmode)
21545 || (mode1 == IFmode && mode2 == KFmode))
21546 return N_("__float128 and __ibm128 cannot be used in the same "
21547 "expression");
21549 if (TARGET_IEEEQUAD
21550 && ((mode1 == IFmode && mode2 == TFmode)
21551 || (mode1 == TFmode && mode2 == IFmode)))
21552 return N_("__ibm128 and long double cannot be used in the same "
21553 "expression");
21555 if (!TARGET_IEEEQUAD
21556 && ((mode1 == KFmode && mode2 == TFmode)
21557 || (mode1 == TFmode && mode2 == KFmode)))
21558 return N_("__float128 and long double cannot be used in the same "
21559 "expression");
21562 return NULL;
21566 /* Expand floating point conversion to/from __float128 and __ibm128. */
21568 void
21569 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
21571 machine_mode dest_mode = GET_MODE (dest);
21572 machine_mode src_mode = GET_MODE (src);
21573 convert_optab cvt = unknown_optab;
21574 bool do_move = false;
21575 rtx libfunc = NULL_RTX;
21576 rtx dest2;
21577 typedef rtx (*rtx_2func_t) (rtx, rtx);
21578 rtx_2func_t hw_convert = (rtx_2func_t)0;
21579 size_t kf_or_tf;
21581 struct hw_conv_t {
21582 rtx_2func_t from_df;
21583 rtx_2func_t from_sf;
21584 rtx_2func_t from_si_sign;
21585 rtx_2func_t from_si_uns;
21586 rtx_2func_t from_di_sign;
21587 rtx_2func_t from_di_uns;
21588 rtx_2func_t to_df;
21589 rtx_2func_t to_sf;
21590 rtx_2func_t to_si_sign;
21591 rtx_2func_t to_si_uns;
21592 rtx_2func_t to_di_sign;
21593 rtx_2func_t to_di_uns;
21594 } hw_conversions[2] = {
21595 /* convertions to/from KFmode */
21597 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
21598 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
21599 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
21600 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
21601 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
21602 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
21603 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
21604 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
21605 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
21606 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
21607 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
21608 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
21611 /* convertions to/from TFmode */
21613 gen_extenddftf2_hw, /* TFmode <- DFmode. */
21614 gen_extendsftf2_hw, /* TFmode <- SFmode. */
21615 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
21616 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
21617 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
21618 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
21619 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
21620 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
21621 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
21622 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
21623 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
21624 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
21628 if (dest_mode == src_mode)
21629 gcc_unreachable ();
21631 /* Eliminate memory operations. */
21632 if (MEM_P (src))
21633 src = force_reg (src_mode, src);
21635 if (MEM_P (dest))
21637 rtx tmp = gen_reg_rtx (dest_mode);
21638 rs6000_expand_float128_convert (tmp, src, unsigned_p);
21639 rs6000_emit_move (dest, tmp, dest_mode);
21640 return;
21643 /* Convert to IEEE 128-bit floating point. */
21644 if (FLOAT128_IEEE_P (dest_mode))
21646 if (dest_mode == KFmode)
21647 kf_or_tf = 0;
21648 else if (dest_mode == TFmode)
21649 kf_or_tf = 1;
21650 else
21651 gcc_unreachable ();
21653 switch (src_mode)
21655 case E_DFmode:
21656 cvt = sext_optab;
21657 hw_convert = hw_conversions[kf_or_tf].from_df;
21658 break;
21660 case E_SFmode:
21661 cvt = sext_optab;
21662 hw_convert = hw_conversions[kf_or_tf].from_sf;
21663 break;
21665 case E_KFmode:
21666 case E_IFmode:
21667 case E_TFmode:
21668 if (FLOAT128_IBM_P (src_mode))
21669 cvt = sext_optab;
21670 else
21671 do_move = true;
21672 break;
21674 case E_SImode:
21675 if (unsigned_p)
21677 cvt = ufloat_optab;
21678 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
21680 else
21682 cvt = sfloat_optab;
21683 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
21685 break;
21687 case E_DImode:
21688 if (unsigned_p)
21690 cvt = ufloat_optab;
21691 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
21693 else
21695 cvt = sfloat_optab;
21696 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
21698 break;
21700 default:
21701 gcc_unreachable ();
21705 /* Convert from IEEE 128-bit floating point. */
21706 else if (FLOAT128_IEEE_P (src_mode))
21708 if (src_mode == KFmode)
21709 kf_or_tf = 0;
21710 else if (src_mode == TFmode)
21711 kf_or_tf = 1;
21712 else
21713 gcc_unreachable ();
21715 switch (dest_mode)
21717 case E_DFmode:
21718 cvt = trunc_optab;
21719 hw_convert = hw_conversions[kf_or_tf].to_df;
21720 break;
21722 case E_SFmode:
21723 cvt = trunc_optab;
21724 hw_convert = hw_conversions[kf_or_tf].to_sf;
21725 break;
21727 case E_KFmode:
21728 case E_IFmode:
21729 case E_TFmode:
21730 if (FLOAT128_IBM_P (dest_mode))
21731 cvt = trunc_optab;
21732 else
21733 do_move = true;
21734 break;
21736 case E_SImode:
21737 if (unsigned_p)
21739 cvt = ufix_optab;
21740 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
21742 else
21744 cvt = sfix_optab;
21745 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
21747 break;
21749 case E_DImode:
21750 if (unsigned_p)
21752 cvt = ufix_optab;
21753 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
21755 else
21757 cvt = sfix_optab;
21758 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
21760 break;
21762 default:
21763 gcc_unreachable ();
21767 /* Both IBM format. */
21768 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
21769 do_move = true;
21771 else
21772 gcc_unreachable ();
21774 /* Handle conversion between TFmode/KFmode/IFmode. */
21775 if (do_move)
21776 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
21778 /* Handle conversion if we have hardware support. */
21779 else if (TARGET_FLOAT128_HW && hw_convert)
21780 emit_insn ((hw_convert) (dest, src));
21782 /* Call an external function to do the conversion. */
21783 else if (cvt != unknown_optab)
21785 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
21786 gcc_assert (libfunc != NULL_RTX);
21788 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
21789 src, src_mode);
21791 gcc_assert (dest2 != NULL_RTX);
21792 if (!rtx_equal_p (dest, dest2))
21793 emit_move_insn (dest, dest2);
21796 else
21797 gcc_unreachable ();
21799 return;
21803 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
21804 can be used as that dest register. Return the dest register. */
21807 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
21809 if (op2 == const0_rtx)
21810 return op1;
21812 if (GET_CODE (scratch) == SCRATCH)
21813 scratch = gen_reg_rtx (mode);
21815 if (logical_operand (op2, mode))
21816 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
21817 else
21818 emit_insn (gen_rtx_SET (scratch,
21819 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
21821 return scratch;
21824 void
21825 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
21827 rtx condition_rtx;
21828 machine_mode op_mode;
21829 enum rtx_code cond_code;
21830 rtx result = operands[0];
21832 condition_rtx = rs6000_generate_compare (operands[1], mode);
21833 cond_code = GET_CODE (condition_rtx);
21835 if (cond_code == NE
21836 || cond_code == GE || cond_code == LE
21837 || cond_code == GEU || cond_code == LEU
21838 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
21840 rtx not_result = gen_reg_rtx (CCEQmode);
21841 rtx not_op, rev_cond_rtx;
21842 machine_mode cc_mode;
21844 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
21846 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
21847 SImode, XEXP (condition_rtx, 0), const0_rtx);
21848 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
21849 emit_insn (gen_rtx_SET (not_result, not_op));
21850 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
21853 op_mode = GET_MODE (XEXP (operands[1], 0));
21854 if (op_mode == VOIDmode)
21855 op_mode = GET_MODE (XEXP (operands[1], 1));
21857 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
21859 PUT_MODE (condition_rtx, DImode);
21860 convert_move (result, condition_rtx, 0);
21862 else
21864 PUT_MODE (condition_rtx, SImode);
21865 emit_insn (gen_rtx_SET (result, condition_rtx));
21869 /* Emit a branch of kind CODE to location LOC. */
21871 void
21872 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
21874 rtx condition_rtx, loc_ref;
21876 condition_rtx = rs6000_generate_compare (operands[0], mode);
21877 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
21878 emit_jump_insn (gen_rtx_SET (pc_rtx,
21879 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
21880 loc_ref, pc_rtx)));
21883 /* Return the string to output a conditional branch to LABEL, which is
21884 the operand template of the label, or NULL if the branch is really a
21885 conditional return.
21887 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
21888 condition code register and its mode specifies what kind of
21889 comparison we made.
21891 REVERSED is nonzero if we should reverse the sense of the comparison.
21893 INSN is the insn. */
21895 char *
21896 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
21898 static char string[64];
21899 enum rtx_code code = GET_CODE (op);
21900 rtx cc_reg = XEXP (op, 0);
21901 machine_mode mode = GET_MODE (cc_reg);
21902 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
21903 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
21904 int really_reversed = reversed ^ need_longbranch;
21905 char *s = string;
21906 const char *ccode;
21907 const char *pred;
21908 rtx note;
21910 validate_condition_mode (code, mode);
21912 /* Work out which way this really branches. We could use
21913 reverse_condition_maybe_unordered here always but this
21914 makes the resulting assembler clearer. */
21915 if (really_reversed)
21917 /* Reversal of FP compares takes care -- an ordered compare
21918 becomes an unordered compare and vice versa. */
21919 if (mode == CCFPmode)
21920 code = reverse_condition_maybe_unordered (code);
21921 else
21922 code = reverse_condition (code);
21925 switch (code)
21927 /* Not all of these are actually distinct opcodes, but
21928 we distinguish them for clarity of the resulting assembler. */
21929 case NE: case LTGT:
21930 ccode = "ne"; break;
21931 case EQ: case UNEQ:
21932 ccode = "eq"; break;
21933 case GE: case GEU:
21934 ccode = "ge"; break;
21935 case GT: case GTU: case UNGT:
21936 ccode = "gt"; break;
21937 case LE: case LEU:
21938 ccode = "le"; break;
21939 case LT: case LTU: case UNLT:
21940 ccode = "lt"; break;
21941 case UNORDERED: ccode = "un"; break;
21942 case ORDERED: ccode = "nu"; break;
21943 case UNGE: ccode = "nl"; break;
21944 case UNLE: ccode = "ng"; break;
21945 default:
21946 gcc_unreachable ();
21949 /* Maybe we have a guess as to how likely the branch is. */
21950 pred = "";
21951 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
21952 if (note != NULL_RTX)
21954 /* PROB is the difference from 50%. */
21955 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
21956 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
21958 /* Only hint for highly probable/improbable branches on newer cpus when
21959 we have real profile data, as static prediction overrides processor
21960 dynamic prediction. For older cpus we may as well always hint, but
21961 assume not taken for branches that are very close to 50% as a
21962 mispredicted taken branch is more expensive than a
21963 mispredicted not-taken branch. */
21964 if (rs6000_always_hint
21965 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
21966 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
21967 && br_prob_note_reliable_p (note)))
21969 if (abs (prob) > REG_BR_PROB_BASE / 20
21970 && ((prob > 0) ^ need_longbranch))
21971 pred = "+";
21972 else
21973 pred = "-";
21977 if (label == NULL)
21978 s += sprintf (s, "b%slr%s ", ccode, pred);
21979 else
21980 s += sprintf (s, "b%s%s ", ccode, pred);
21982 /* We need to escape any '%' characters in the reg_names string.
21983 Assume they'd only be the first character.... */
21984 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
21985 *s++ = '%';
21986 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
21988 if (label != NULL)
21990 /* If the branch distance was too far, we may have to use an
21991 unconditional branch to go the distance. */
21992 if (need_longbranch)
21993 s += sprintf (s, ",$+8\n\tb %s", label);
21994 else
21995 s += sprintf (s, ",%s", label);
21998 return string;
22001 /* Return insn for VSX or Altivec comparisons. */
22003 static rtx
22004 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22006 rtx mask;
22007 machine_mode mode = GET_MODE (op0);
22009 switch (code)
22011 default:
22012 break;
22014 case GE:
22015 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22016 return NULL_RTX;
22017 /* FALLTHRU */
22019 case EQ:
22020 case GT:
22021 case GTU:
22022 case ORDERED:
22023 case UNORDERED:
22024 case UNEQ:
22025 case LTGT:
22026 mask = gen_reg_rtx (mode);
22027 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22028 return mask;
22031 return NULL_RTX;
22034 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22035 DMODE is expected destination mode. This is a recursive function. */
22037 static rtx
22038 rs6000_emit_vector_compare (enum rtx_code rcode,
22039 rtx op0, rtx op1,
22040 machine_mode dmode)
22042 rtx mask;
22043 bool swap_operands = false;
22044 bool try_again = false;
22046 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22047 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22049 /* See if the comparison works as is. */
22050 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22051 if (mask)
22052 return mask;
22054 switch (rcode)
22056 case LT:
22057 rcode = GT;
22058 swap_operands = true;
22059 try_again = true;
22060 break;
22061 case LTU:
22062 rcode = GTU;
22063 swap_operands = true;
22064 try_again = true;
22065 break;
22066 case NE:
22067 case UNLE:
22068 case UNLT:
22069 case UNGE:
22070 case UNGT:
22071 /* Invert condition and try again.
22072 e.g., A != B becomes ~(A==B). */
22074 enum rtx_code rev_code;
22075 enum insn_code nor_code;
22076 rtx mask2;
22078 rev_code = reverse_condition_maybe_unordered (rcode);
22079 if (rev_code == UNKNOWN)
22080 return NULL_RTX;
22082 nor_code = optab_handler (one_cmpl_optab, dmode);
22083 if (nor_code == CODE_FOR_nothing)
22084 return NULL_RTX;
22086 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22087 if (!mask2)
22088 return NULL_RTX;
22090 mask = gen_reg_rtx (dmode);
22091 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22092 return mask;
22094 break;
22095 case GE:
22096 case GEU:
22097 case LE:
22098 case LEU:
22099 /* Try GT/GTU/LT/LTU OR EQ */
22101 rtx c_rtx, eq_rtx;
22102 enum insn_code ior_code;
22103 enum rtx_code new_code;
22105 switch (rcode)
22107 case GE:
22108 new_code = GT;
22109 break;
22111 case GEU:
22112 new_code = GTU;
22113 break;
22115 case LE:
22116 new_code = LT;
22117 break;
22119 case LEU:
22120 new_code = LTU;
22121 break;
22123 default:
22124 gcc_unreachable ();
22127 ior_code = optab_handler (ior_optab, dmode);
22128 if (ior_code == CODE_FOR_nothing)
22129 return NULL_RTX;
22131 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22132 if (!c_rtx)
22133 return NULL_RTX;
22135 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22136 if (!eq_rtx)
22137 return NULL_RTX;
22139 mask = gen_reg_rtx (dmode);
22140 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22141 return mask;
22143 break;
22144 default:
22145 return NULL_RTX;
22148 if (try_again)
22150 if (swap_operands)
22151 std::swap (op0, op1);
22153 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22154 if (mask)
22155 return mask;
22158 /* You only get two chances. */
22159 return NULL_RTX;
22162 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22163 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22164 operands for the relation operation COND. */
22167 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22168 rtx cond, rtx cc_op0, rtx cc_op1)
22170 machine_mode dest_mode = GET_MODE (dest);
22171 machine_mode mask_mode = GET_MODE (cc_op0);
22172 enum rtx_code rcode = GET_CODE (cond);
22173 machine_mode cc_mode = CCmode;
22174 rtx mask;
22175 rtx cond2;
22176 bool invert_move = false;
22178 if (VECTOR_UNIT_NONE_P (dest_mode))
22179 return 0;
22181 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22182 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22184 switch (rcode)
22186 /* Swap operands if we can, and fall back to doing the operation as
22187 specified, and doing a NOR to invert the test. */
22188 case NE:
22189 case UNLE:
22190 case UNLT:
22191 case UNGE:
22192 case UNGT:
22193 /* Invert condition and try again.
22194 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22195 invert_move = true;
22196 rcode = reverse_condition_maybe_unordered (rcode);
22197 if (rcode == UNKNOWN)
22198 return 0;
22199 break;
22201 case GE:
22202 case LE:
22203 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22205 /* Invert condition to avoid compound test. */
22206 invert_move = true;
22207 rcode = reverse_condition (rcode);
22209 break;
22211 case GTU:
22212 case GEU:
22213 case LTU:
22214 case LEU:
22215 /* Mark unsigned tests with CCUNSmode. */
22216 cc_mode = CCUNSmode;
22218 /* Invert condition to avoid compound test if necessary. */
22219 if (rcode == GEU || rcode == LEU)
22221 invert_move = true;
22222 rcode = reverse_condition (rcode);
22224 break;
22226 default:
22227 break;
22230 /* Get the vector mask for the given relational operations. */
22231 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22233 if (!mask)
22234 return 0;
22236 if (invert_move)
22237 std::swap (op_true, op_false);
22239 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22240 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22241 && (GET_CODE (op_true) == CONST_VECTOR
22242 || GET_CODE (op_false) == CONST_VECTOR))
22244 rtx constant_0 = CONST0_RTX (dest_mode);
22245 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22247 if (op_true == constant_m1 && op_false == constant_0)
22249 emit_move_insn (dest, mask);
22250 return 1;
22253 else if (op_true == constant_0 && op_false == constant_m1)
22255 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22256 return 1;
22259 /* If we can't use the vector comparison directly, perhaps we can use
22260 the mask for the true or false fields, instead of loading up a
22261 constant. */
22262 if (op_true == constant_m1)
22263 op_true = mask;
22265 if (op_false == constant_0)
22266 op_false = mask;
22269 if (!REG_P (op_true) && !SUBREG_P (op_true))
22270 op_true = force_reg (dest_mode, op_true);
22272 if (!REG_P (op_false) && !SUBREG_P (op_false))
22273 op_false = force_reg (dest_mode, op_false);
22275 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22276 CONST0_RTX (dest_mode));
22277 emit_insn (gen_rtx_SET (dest,
22278 gen_rtx_IF_THEN_ELSE (dest_mode,
22279 cond2,
22280 op_true,
22281 op_false)));
22282 return 1;
22285 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22286 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22287 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22288 hardware has no such operation. */
22290 static int
22291 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22293 enum rtx_code code = GET_CODE (op);
22294 rtx op0 = XEXP (op, 0);
22295 rtx op1 = XEXP (op, 1);
22296 machine_mode compare_mode = GET_MODE (op0);
22297 machine_mode result_mode = GET_MODE (dest);
22298 bool max_p = false;
22300 if (result_mode != compare_mode)
22301 return 0;
22303 if (code == GE || code == GT)
22304 max_p = true;
22305 else if (code == LE || code == LT)
22306 max_p = false;
22307 else
22308 return 0;
22310 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22313 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22314 max_p = !max_p;
22316 else
22317 return 0;
22319 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22320 return 1;
22323 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22324 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22325 operands of the last comparison is nonzero/true, FALSE_COND if it is
22326 zero/false. Return 0 if the hardware has no such operation. */
22328 static int
22329 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22331 enum rtx_code code = GET_CODE (op);
22332 rtx op0 = XEXP (op, 0);
22333 rtx op1 = XEXP (op, 1);
22334 machine_mode result_mode = GET_MODE (dest);
22335 rtx compare_rtx;
22336 rtx cmove_rtx;
22337 rtx clobber_rtx;
22339 if (!can_create_pseudo_p ())
22340 return 0;
22342 switch (code)
22344 case EQ:
22345 case GE:
22346 case GT:
22347 break;
22349 case NE:
22350 case LT:
22351 case LE:
22352 code = swap_condition (code);
22353 std::swap (op0, op1);
22354 break;
22356 default:
22357 return 0;
22360 /* Generate: [(parallel [(set (dest)
22361 (if_then_else (op (cmp1) (cmp2))
22362 (true)
22363 (false)))
22364 (clobber (scratch))])]. */
22366 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
22367 cmove_rtx = gen_rtx_SET (dest,
22368 gen_rtx_IF_THEN_ELSE (result_mode,
22369 compare_rtx,
22370 true_cond,
22371 false_cond));
22373 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
22374 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22375 gen_rtvec (2, cmove_rtx, clobber_rtx)));
22377 return 1;
22380 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
22381 operands of the last comparison is nonzero/true, FALSE_COND if it
22382 is zero/false. Return 0 if the hardware has no such operation. */
22385 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22387 enum rtx_code code = GET_CODE (op);
22388 rtx op0 = XEXP (op, 0);
22389 rtx op1 = XEXP (op, 1);
22390 machine_mode compare_mode = GET_MODE (op0);
22391 machine_mode result_mode = GET_MODE (dest);
22392 rtx temp;
22393 bool is_against_zero;
22395 /* These modes should always match. */
22396 if (GET_MODE (op1) != compare_mode
22397 /* In the isel case however, we can use a compare immediate, so
22398 op1 may be a small constant. */
22399 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
22400 return 0;
22401 if (GET_MODE (true_cond) != result_mode)
22402 return 0;
22403 if (GET_MODE (false_cond) != result_mode)
22404 return 0;
22406 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
22407 if (TARGET_P9_MINMAX
22408 && (compare_mode == SFmode || compare_mode == DFmode)
22409 && (result_mode == SFmode || result_mode == DFmode))
22411 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
22412 return 1;
22414 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
22415 return 1;
22418 /* Don't allow using floating point comparisons for integer results for
22419 now. */
22420 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
22421 return 0;
22423 /* First, work out if the hardware can do this at all, or
22424 if it's too slow.... */
22425 if (!FLOAT_MODE_P (compare_mode))
22427 if (TARGET_ISEL)
22428 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
22429 return 0;
22432 is_against_zero = op1 == CONST0_RTX (compare_mode);
22434 /* A floating-point subtract might overflow, underflow, or produce
22435 an inexact result, thus changing the floating-point flags, so it
22436 can't be generated if we care about that. It's safe if one side
22437 of the construct is zero, since then no subtract will be
22438 generated. */
22439 if (SCALAR_FLOAT_MODE_P (compare_mode)
22440 && flag_trapping_math && ! is_against_zero)
22441 return 0;
22443 /* Eliminate half of the comparisons by switching operands, this
22444 makes the remaining code simpler. */
22445 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
22446 || code == LTGT || code == LT || code == UNLE)
22448 code = reverse_condition_maybe_unordered (code);
22449 temp = true_cond;
22450 true_cond = false_cond;
22451 false_cond = temp;
22454 /* UNEQ and LTGT take four instructions for a comparison with zero,
22455 it'll probably be faster to use a branch here too. */
22456 if (code == UNEQ && HONOR_NANS (compare_mode))
22457 return 0;
22459 /* We're going to try to implement comparisons by performing
22460 a subtract, then comparing against zero. Unfortunately,
22461 Inf - Inf is NaN which is not zero, and so if we don't
22462 know that the operand is finite and the comparison
22463 would treat EQ different to UNORDERED, we can't do it. */
22464 if (HONOR_INFINITIES (compare_mode)
22465 && code != GT && code != UNGE
22466 && (GET_CODE (op1) != CONST_DOUBLE
22467 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
22468 /* Constructs of the form (a OP b ? a : b) are safe. */
22469 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
22470 || (! rtx_equal_p (op0, true_cond)
22471 && ! rtx_equal_p (op1, true_cond))))
22472 return 0;
22474 /* At this point we know we can use fsel. */
22476 /* Reduce the comparison to a comparison against zero. */
22477 if (! is_against_zero)
22479 temp = gen_reg_rtx (compare_mode);
22480 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
22481 op0 = temp;
22482 op1 = CONST0_RTX (compare_mode);
22485 /* If we don't care about NaNs we can reduce some of the comparisons
22486 down to faster ones. */
22487 if (! HONOR_NANS (compare_mode))
22488 switch (code)
22490 case GT:
22491 code = LE;
22492 temp = true_cond;
22493 true_cond = false_cond;
22494 false_cond = temp;
22495 break;
22496 case UNGE:
22497 code = GE;
22498 break;
22499 case UNEQ:
22500 code = EQ;
22501 break;
22502 default:
22503 break;
22506 /* Now, reduce everything down to a GE. */
22507 switch (code)
22509 case GE:
22510 break;
22512 case LE:
22513 temp = gen_reg_rtx (compare_mode);
22514 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22515 op0 = temp;
22516 break;
22518 case ORDERED:
22519 temp = gen_reg_rtx (compare_mode);
22520 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
22521 op0 = temp;
22522 break;
22524 case EQ:
22525 temp = gen_reg_rtx (compare_mode);
22526 emit_insn (gen_rtx_SET (temp,
22527 gen_rtx_NEG (compare_mode,
22528 gen_rtx_ABS (compare_mode, op0))));
22529 op0 = temp;
22530 break;
22532 case UNGE:
22533 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
22534 temp = gen_reg_rtx (result_mode);
22535 emit_insn (gen_rtx_SET (temp,
22536 gen_rtx_IF_THEN_ELSE (result_mode,
22537 gen_rtx_GE (VOIDmode,
22538 op0, op1),
22539 true_cond, false_cond)));
22540 false_cond = true_cond;
22541 true_cond = temp;
22543 temp = gen_reg_rtx (compare_mode);
22544 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22545 op0 = temp;
22546 break;
22548 case GT:
22549 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
22550 temp = gen_reg_rtx (result_mode);
22551 emit_insn (gen_rtx_SET (temp,
22552 gen_rtx_IF_THEN_ELSE (result_mode,
22553 gen_rtx_GE (VOIDmode,
22554 op0, op1),
22555 true_cond, false_cond)));
22556 true_cond = false_cond;
22557 false_cond = temp;
22559 temp = gen_reg_rtx (compare_mode);
22560 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
22561 op0 = temp;
22562 break;
22564 default:
22565 gcc_unreachable ();
22568 emit_insn (gen_rtx_SET (dest,
22569 gen_rtx_IF_THEN_ELSE (result_mode,
22570 gen_rtx_GE (VOIDmode,
22571 op0, op1),
22572 true_cond, false_cond)));
22573 return 1;
22576 /* Same as above, but for ints (isel). */
22579 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22581 rtx condition_rtx, cr;
22582 machine_mode mode = GET_MODE (dest);
22583 enum rtx_code cond_code;
22584 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
22585 bool signedp;
22587 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
22588 return 0;
22590 /* We still have to do the compare, because isel doesn't do a
22591 compare, it just looks at the CRx bits set by a previous compare
22592 instruction. */
22593 condition_rtx = rs6000_generate_compare (op, mode);
22594 cond_code = GET_CODE (condition_rtx);
22595 cr = XEXP (condition_rtx, 0);
22596 signedp = GET_MODE (cr) == CCmode;
22598 isel_func = (mode == SImode
22599 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
22600 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
22602 switch (cond_code)
22604 case LT: case GT: case LTU: case GTU: case EQ:
22605 /* isel handles these directly. */
22606 break;
22608 default:
22609 /* We need to swap the sense of the comparison. */
22611 std::swap (false_cond, true_cond);
22612 PUT_CODE (condition_rtx, reverse_condition (cond_code));
22614 break;
22617 false_cond = force_reg (mode, false_cond);
22618 if (true_cond != const0_rtx)
22619 true_cond = force_reg (mode, true_cond);
22621 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
22623 return 1;
22626 void
22627 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
22629 machine_mode mode = GET_MODE (op0);
22630 enum rtx_code c;
22631 rtx target;
22633 /* VSX/altivec have direct min/max insns. */
22634 if ((code == SMAX || code == SMIN)
22635 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
22636 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
22638 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
22639 return;
22642 if (code == SMAX || code == SMIN)
22643 c = GE;
22644 else
22645 c = GEU;
22647 if (code == SMAX || code == UMAX)
22648 target = emit_conditional_move (dest, c, op0, op1, mode,
22649 op0, op1, mode, 0);
22650 else
22651 target = emit_conditional_move (dest, c, op0, op1, mode,
22652 op1, op0, mode, 0);
22653 gcc_assert (target);
22654 if (target != dest)
22655 emit_move_insn (dest, target);
22658 /* A subroutine of the atomic operation splitters. Jump to LABEL if
22659 COND is true. Mark the jump as unlikely to be taken. */
22661 static void
22662 emit_unlikely_jump (rtx cond, rtx label)
22664 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
22665 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
22666 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
22669 /* A subroutine of the atomic operation splitters. Emit a load-locked
22670 instruction in MODE. For QI/HImode, possibly use a pattern than includes
22671 the zero_extend operation. */
22673 static void
22674 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
22676 rtx (*fn) (rtx, rtx) = NULL;
22678 switch (mode)
22680 case E_QImode:
22681 fn = gen_load_lockedqi;
22682 break;
22683 case E_HImode:
22684 fn = gen_load_lockedhi;
22685 break;
22686 case E_SImode:
22687 if (GET_MODE (mem) == QImode)
22688 fn = gen_load_lockedqi_si;
22689 else if (GET_MODE (mem) == HImode)
22690 fn = gen_load_lockedhi_si;
22691 else
22692 fn = gen_load_lockedsi;
22693 break;
22694 case E_DImode:
22695 fn = gen_load_lockeddi;
22696 break;
22697 case E_TImode:
22698 fn = gen_load_lockedti;
22699 break;
22700 default:
22701 gcc_unreachable ();
22703 emit_insn (fn (reg, mem));
22706 /* A subroutine of the atomic operation splitters. Emit a store-conditional
22707 instruction in MODE. */
22709 static void
22710 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
22712 rtx (*fn) (rtx, rtx, rtx) = NULL;
22714 switch (mode)
22716 case E_QImode:
22717 fn = gen_store_conditionalqi;
22718 break;
22719 case E_HImode:
22720 fn = gen_store_conditionalhi;
22721 break;
22722 case E_SImode:
22723 fn = gen_store_conditionalsi;
22724 break;
22725 case E_DImode:
22726 fn = gen_store_conditionaldi;
22727 break;
22728 case E_TImode:
22729 fn = gen_store_conditionalti;
22730 break;
22731 default:
22732 gcc_unreachable ();
22735 /* Emit sync before stwcx. to address PPC405 Erratum. */
22736 if (PPC405_ERRATUM77)
22737 emit_insn (gen_hwsync ());
22739 emit_insn (fn (res, mem, val));
22742 /* Expand barriers before and after a load_locked/store_cond sequence. */
22744 static rtx
22745 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
22747 rtx addr = XEXP (mem, 0);
22749 if (!legitimate_indirect_address_p (addr, reload_completed)
22750 && !legitimate_indexed_address_p (addr, reload_completed))
22752 addr = force_reg (Pmode, addr);
22753 mem = replace_equiv_address_nv (mem, addr);
22756 switch (model)
22758 case MEMMODEL_RELAXED:
22759 case MEMMODEL_CONSUME:
22760 case MEMMODEL_ACQUIRE:
22761 break;
22762 case MEMMODEL_RELEASE:
22763 case MEMMODEL_ACQ_REL:
22764 emit_insn (gen_lwsync ());
22765 break;
22766 case MEMMODEL_SEQ_CST:
22767 emit_insn (gen_hwsync ());
22768 break;
22769 default:
22770 gcc_unreachable ();
22772 return mem;
22775 static void
22776 rs6000_post_atomic_barrier (enum memmodel model)
22778 switch (model)
22780 case MEMMODEL_RELAXED:
22781 case MEMMODEL_CONSUME:
22782 case MEMMODEL_RELEASE:
22783 break;
22784 case MEMMODEL_ACQUIRE:
22785 case MEMMODEL_ACQ_REL:
22786 case MEMMODEL_SEQ_CST:
22787 emit_insn (gen_isync ());
22788 break;
22789 default:
22790 gcc_unreachable ();
22794 /* A subroutine of the various atomic expanders. For sub-word operations,
22795 we must adjust things to operate on SImode. Given the original MEM,
22796 return a new aligned memory. Also build and return the quantities by
22797 which to shift and mask. */
22799 static rtx
22800 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
22802 rtx addr, align, shift, mask, mem;
22803 HOST_WIDE_INT shift_mask;
22804 machine_mode mode = GET_MODE (orig_mem);
22806 /* For smaller modes, we have to implement this via SImode. */
22807 shift_mask = (mode == QImode ? 0x18 : 0x10);
22809 addr = XEXP (orig_mem, 0);
22810 addr = force_reg (GET_MODE (addr), addr);
22812 /* Aligned memory containing subword. Generate a new memory. We
22813 do not want any of the existing MEM_ATTR data, as we're now
22814 accessing memory outside the original object. */
22815 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
22816 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22817 mem = gen_rtx_MEM (SImode, align);
22818 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
22819 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
22820 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
22822 /* Shift amount for subword relative to aligned word. */
22823 shift = gen_reg_rtx (SImode);
22824 addr = gen_lowpart (SImode, addr);
22825 rtx tmp = gen_reg_rtx (SImode);
22826 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
22827 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
22828 if (BYTES_BIG_ENDIAN)
22829 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
22830 shift, 1, OPTAB_LIB_WIDEN);
22831 *pshift = shift;
22833 /* Mask for insertion. */
22834 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
22835 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
22836 *pmask = mask;
22838 return mem;
22841 /* A subroutine of the various atomic expanders. For sub-word operands,
22842 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
22844 static rtx
22845 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
22847 rtx x;
22849 x = gen_reg_rtx (SImode);
22850 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
22851 gen_rtx_NOT (SImode, mask),
22852 oldval)));
22854 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
22856 return x;
22859 /* A subroutine of the various atomic expanders. For sub-word operands,
22860 extract WIDE to NARROW via SHIFT. */
22862 static void
22863 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
22865 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
22866 wide, 1, OPTAB_LIB_WIDEN);
22867 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
22870 /* Expand an atomic compare and swap operation. */
22872 void
22873 rs6000_expand_atomic_compare_and_swap (rtx operands[])
22875 rtx boolval, retval, mem, oldval, newval, cond;
22876 rtx label1, label2, x, mask, shift;
22877 machine_mode mode, orig_mode;
22878 enum memmodel mod_s, mod_f;
22879 bool is_weak;
22881 boolval = operands[0];
22882 retval = operands[1];
22883 mem = operands[2];
22884 oldval = operands[3];
22885 newval = operands[4];
22886 is_weak = (INTVAL (operands[5]) != 0);
22887 mod_s = memmodel_base (INTVAL (operands[6]));
22888 mod_f = memmodel_base (INTVAL (operands[7]));
22889 orig_mode = mode = GET_MODE (mem);
22891 mask = shift = NULL_RTX;
22892 if (mode == QImode || mode == HImode)
22894 /* Before power8, we didn't have access to lbarx/lharx, so generate a
22895 lwarx and shift/mask operations. With power8, we need to do the
22896 comparison in SImode, but the store is still done in QI/HImode. */
22897 oldval = convert_modes (SImode, mode, oldval, 1);
22899 if (!TARGET_SYNC_HI_QI)
22901 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
22903 /* Shift and mask OLDVAL into position with the word. */
22904 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
22905 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22907 /* Shift and mask NEWVAL into position within the word. */
22908 newval = convert_modes (SImode, mode, newval, 1);
22909 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
22910 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22913 /* Prepare to adjust the return value. */
22914 retval = gen_reg_rtx (SImode);
22915 mode = SImode;
22917 else if (reg_overlap_mentioned_p (retval, oldval))
22918 oldval = copy_to_reg (oldval);
22920 if (mode != TImode && !reg_or_short_operand (oldval, mode))
22921 oldval = copy_to_mode_reg (mode, oldval);
22923 if (reg_overlap_mentioned_p (retval, newval))
22924 newval = copy_to_reg (newval);
22926 mem = rs6000_pre_atomic_barrier (mem, mod_s);
22928 label1 = NULL_RTX;
22929 if (!is_weak)
22931 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22932 emit_label (XEXP (label1, 0));
22934 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
22936 emit_load_locked (mode, retval, mem);
22938 x = retval;
22939 if (mask)
22940 x = expand_simple_binop (SImode, AND, retval, mask,
22941 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22943 cond = gen_reg_rtx (CCmode);
22944 /* If we have TImode, synthesize a comparison. */
22945 if (mode != TImode)
22946 x = gen_rtx_COMPARE (CCmode, x, oldval);
22947 else
22949 rtx xor1_result = gen_reg_rtx (DImode);
22950 rtx xor2_result = gen_reg_rtx (DImode);
22951 rtx or_result = gen_reg_rtx (DImode);
22952 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
22953 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
22954 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
22955 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
22957 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
22958 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
22959 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
22960 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
22963 emit_insn (gen_rtx_SET (cond, x));
22965 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22966 emit_unlikely_jump (x, label2);
22968 x = newval;
22969 if (mask)
22970 x = rs6000_mask_atomic_subword (retval, newval, mask);
22972 emit_store_conditional (orig_mode, cond, mem, x);
22974 if (!is_weak)
22976 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
22977 emit_unlikely_jump (x, label1);
22980 if (!is_mm_relaxed (mod_f))
22981 emit_label (XEXP (label2, 0));
22983 rs6000_post_atomic_barrier (mod_s);
22985 if (is_mm_relaxed (mod_f))
22986 emit_label (XEXP (label2, 0));
22988 if (shift)
22989 rs6000_finish_atomic_subword (operands[1], retval, shift);
22990 else if (mode != GET_MODE (operands[1]))
22991 convert_move (operands[1], retval, 1);
22993 /* In all cases, CR0 contains EQ on success, and NE on failure. */
22994 x = gen_rtx_EQ (SImode, cond, const0_rtx);
22995 emit_insn (gen_rtx_SET (boolval, x));
22998 /* Expand an atomic exchange operation. */
23000 void
23001 rs6000_expand_atomic_exchange (rtx operands[])
23003 rtx retval, mem, val, cond;
23004 machine_mode mode;
23005 enum memmodel model;
23006 rtx label, x, mask, shift;
23008 retval = operands[0];
23009 mem = operands[1];
23010 val = operands[2];
23011 model = memmodel_base (INTVAL (operands[3]));
23012 mode = GET_MODE (mem);
23014 mask = shift = NULL_RTX;
23015 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23017 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23019 /* Shift and mask VAL into position with the word. */
23020 val = convert_modes (SImode, mode, val, 1);
23021 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23022 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23024 /* Prepare to adjust the return value. */
23025 retval = gen_reg_rtx (SImode);
23026 mode = SImode;
23029 mem = rs6000_pre_atomic_barrier (mem, model);
23031 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23032 emit_label (XEXP (label, 0));
23034 emit_load_locked (mode, retval, mem);
23036 x = val;
23037 if (mask)
23038 x = rs6000_mask_atomic_subword (retval, val, mask);
23040 cond = gen_reg_rtx (CCmode);
23041 emit_store_conditional (mode, cond, mem, x);
23043 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23044 emit_unlikely_jump (x, label);
23046 rs6000_post_atomic_barrier (model);
23048 if (shift)
23049 rs6000_finish_atomic_subword (operands[0], retval, shift);
23052 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23053 to perform. MEM is the memory on which to operate. VAL is the second
23054 operand of the binary operator. BEFORE and AFTER are optional locations to
23055 return the value of MEM either before of after the operation. MODEL_RTX
23056 is a CONST_INT containing the memory model to use. */
23058 void
23059 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23060 rtx orig_before, rtx orig_after, rtx model_rtx)
23062 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23063 machine_mode mode = GET_MODE (mem);
23064 machine_mode store_mode = mode;
23065 rtx label, x, cond, mask, shift;
23066 rtx before = orig_before, after = orig_after;
23068 mask = shift = NULL_RTX;
23069 /* On power8, we want to use SImode for the operation. On previous systems,
23070 use the operation in a subword and shift/mask to get the proper byte or
23071 halfword. */
23072 if (mode == QImode || mode == HImode)
23074 if (TARGET_SYNC_HI_QI)
23076 val = convert_modes (SImode, mode, val, 1);
23078 /* Prepare to adjust the return value. */
23079 before = gen_reg_rtx (SImode);
23080 if (after)
23081 after = gen_reg_rtx (SImode);
23082 mode = SImode;
23084 else
23086 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23088 /* Shift and mask VAL into position with the word. */
23089 val = convert_modes (SImode, mode, val, 1);
23090 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23091 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23093 switch (code)
23095 case IOR:
23096 case XOR:
23097 /* We've already zero-extended VAL. That is sufficient to
23098 make certain that it does not affect other bits. */
23099 mask = NULL;
23100 break;
23102 case AND:
23103 /* If we make certain that all of the other bits in VAL are
23104 set, that will be sufficient to not affect other bits. */
23105 x = gen_rtx_NOT (SImode, mask);
23106 x = gen_rtx_IOR (SImode, x, val);
23107 emit_insn (gen_rtx_SET (val, x));
23108 mask = NULL;
23109 break;
23111 case NOT:
23112 case PLUS:
23113 case MINUS:
23114 /* These will all affect bits outside the field and need
23115 adjustment via MASK within the loop. */
23116 break;
23118 default:
23119 gcc_unreachable ();
23122 /* Prepare to adjust the return value. */
23123 before = gen_reg_rtx (SImode);
23124 if (after)
23125 after = gen_reg_rtx (SImode);
23126 store_mode = mode = SImode;
23130 mem = rs6000_pre_atomic_barrier (mem, model);
23132 label = gen_label_rtx ();
23133 emit_label (label);
23134 label = gen_rtx_LABEL_REF (VOIDmode, label);
23136 if (before == NULL_RTX)
23137 before = gen_reg_rtx (mode);
23139 emit_load_locked (mode, before, mem);
23141 if (code == NOT)
23143 x = expand_simple_binop (mode, AND, before, val,
23144 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23145 after = expand_simple_unop (mode, NOT, x, after, 1);
23147 else
23149 after = expand_simple_binop (mode, code, before, val,
23150 after, 1, OPTAB_LIB_WIDEN);
23153 x = after;
23154 if (mask)
23156 x = expand_simple_binop (SImode, AND, after, mask,
23157 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23158 x = rs6000_mask_atomic_subword (before, x, mask);
23160 else if (store_mode != mode)
23161 x = convert_modes (store_mode, mode, x, 1);
23163 cond = gen_reg_rtx (CCmode);
23164 emit_store_conditional (store_mode, cond, mem, x);
23166 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23167 emit_unlikely_jump (x, label);
23169 rs6000_post_atomic_barrier (model);
23171 if (shift)
23173 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23174 then do the calcuations in a SImode register. */
23175 if (orig_before)
23176 rs6000_finish_atomic_subword (orig_before, before, shift);
23177 if (orig_after)
23178 rs6000_finish_atomic_subword (orig_after, after, shift);
23180 else if (store_mode != mode)
23182 /* QImode/HImode on machines with lbarx/lharx where we do the native
23183 operation and then do the calcuations in a SImode register. */
23184 if (orig_before)
23185 convert_move (orig_before, before, 1);
23186 if (orig_after)
23187 convert_move (orig_after, after, 1);
23189 else if (orig_after && after != orig_after)
23190 emit_move_insn (orig_after, after);
23193 /* Emit instructions to move SRC to DST. Called by splitters for
23194 multi-register moves. It will emit at most one instruction for
23195 each register that is accessed; that is, it won't emit li/lis pairs
23196 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23197 register. */
23199 void
23200 rs6000_split_multireg_move (rtx dst, rtx src)
23202 /* The register number of the first register being moved. */
23203 int reg;
23204 /* The mode that is to be moved. */
23205 machine_mode mode;
23206 /* The mode that the move is being done in, and its size. */
23207 machine_mode reg_mode;
23208 int reg_mode_size;
23209 /* The number of registers that will be moved. */
23210 int nregs;
23212 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23213 mode = GET_MODE (dst);
23214 nregs = hard_regno_nregs (reg, mode);
23215 if (FP_REGNO_P (reg))
23216 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23217 (TARGET_HARD_FLOAT ? DFmode : SFmode);
23218 else if (ALTIVEC_REGNO_P (reg))
23219 reg_mode = V16QImode;
23220 else
23221 reg_mode = word_mode;
23222 reg_mode_size = GET_MODE_SIZE (reg_mode);
23224 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23226 /* TDmode residing in FP registers is special, since the ISA requires that
23227 the lower-numbered word of a register pair is always the most significant
23228 word, even in little-endian mode. This does not match the usual subreg
23229 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23230 the appropriate constituent registers "by hand" in little-endian mode.
23232 Note we do not need to check for destructive overlap here since TDmode
23233 can only reside in even/odd register pairs. */
23234 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23236 rtx p_src, p_dst;
23237 int i;
23239 for (i = 0; i < nregs; i++)
23241 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23242 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23243 else
23244 p_src = simplify_gen_subreg (reg_mode, src, mode,
23245 i * reg_mode_size);
23247 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23248 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23249 else
23250 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23251 i * reg_mode_size);
23253 emit_insn (gen_rtx_SET (p_dst, p_src));
23256 return;
23259 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23261 /* Move register range backwards, if we might have destructive
23262 overlap. */
23263 int i;
23264 for (i = nregs - 1; i >= 0; i--)
23265 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23266 i * reg_mode_size),
23267 simplify_gen_subreg (reg_mode, src, mode,
23268 i * reg_mode_size)));
23270 else
23272 int i;
23273 int j = -1;
23274 bool used_update = false;
23275 rtx restore_basereg = NULL_RTX;
23277 if (MEM_P (src) && INT_REGNO_P (reg))
23279 rtx breg;
23281 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23282 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23284 rtx delta_rtx;
23285 breg = XEXP (XEXP (src, 0), 0);
23286 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23287 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23288 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23289 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23290 src = replace_equiv_address (src, breg);
23292 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
23294 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23296 rtx basereg = XEXP (XEXP (src, 0), 0);
23297 if (TARGET_UPDATE)
23299 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23300 emit_insn (gen_rtx_SET (ndst,
23301 gen_rtx_MEM (reg_mode,
23302 XEXP (src, 0))));
23303 used_update = true;
23305 else
23306 emit_insn (gen_rtx_SET (basereg,
23307 XEXP (XEXP (src, 0), 1)));
23308 src = replace_equiv_address (src, basereg);
23310 else
23312 rtx basereg = gen_rtx_REG (Pmode, reg);
23313 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
23314 src = replace_equiv_address (src, basereg);
23318 breg = XEXP (src, 0);
23319 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
23320 breg = XEXP (breg, 0);
23322 /* If the base register we are using to address memory is
23323 also a destination reg, then change that register last. */
23324 if (REG_P (breg)
23325 && REGNO (breg) >= REGNO (dst)
23326 && REGNO (breg) < REGNO (dst) + nregs)
23327 j = REGNO (breg) - REGNO (dst);
23329 else if (MEM_P (dst) && INT_REGNO_P (reg))
23331 rtx breg;
23333 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
23334 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
23336 rtx delta_rtx;
23337 breg = XEXP (XEXP (dst, 0), 0);
23338 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
23339 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
23340 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
23342 /* We have to update the breg before doing the store.
23343 Use store with update, if available. */
23345 if (TARGET_UPDATE)
23347 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23348 emit_insn (TARGET_32BIT
23349 ? (TARGET_POWERPC64
23350 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
23351 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
23352 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
23353 used_update = true;
23355 else
23356 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23357 dst = replace_equiv_address (dst, breg);
23359 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
23360 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
23362 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
23364 rtx basereg = XEXP (XEXP (dst, 0), 0);
23365 if (TARGET_UPDATE)
23367 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
23368 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
23369 XEXP (dst, 0)),
23370 nsrc));
23371 used_update = true;
23373 else
23374 emit_insn (gen_rtx_SET (basereg,
23375 XEXP (XEXP (dst, 0), 1)));
23376 dst = replace_equiv_address (dst, basereg);
23378 else
23380 rtx basereg = XEXP (XEXP (dst, 0), 0);
23381 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
23382 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
23383 && REG_P (basereg)
23384 && REG_P (offsetreg)
23385 && REGNO (basereg) != REGNO (offsetreg));
23386 if (REGNO (basereg) == 0)
23388 rtx tmp = offsetreg;
23389 offsetreg = basereg;
23390 basereg = tmp;
23392 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
23393 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
23394 dst = replace_equiv_address (dst, basereg);
23397 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
23398 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
23401 for (i = 0; i < nregs; i++)
23403 /* Calculate index to next subword. */
23404 ++j;
23405 if (j == nregs)
23406 j = 0;
23408 /* If compiler already emitted move of first word by
23409 store with update, no need to do anything. */
23410 if (j == 0 && used_update)
23411 continue;
23413 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23414 j * reg_mode_size),
23415 simplify_gen_subreg (reg_mode, src, mode,
23416 j * reg_mode_size)));
23418 if (restore_basereg != NULL_RTX)
23419 emit_insn (restore_basereg);
23424 /* This page contains routines that are used to determine what the
23425 function prologue and epilogue code will do and write them out. */
23427 /* Determine whether the REG is really used. */
23429 static bool
23430 save_reg_p (int reg)
23432 /* We need to mark the PIC offset register live for the same conditions
23433 as it is set up, or otherwise it won't be saved before we clobber it. */
23435 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
23437 /* When calling eh_return, we must return true for all the cases
23438 where conditional_register_usage marks the PIC offset reg
23439 call used. */
23440 if (TARGET_TOC && TARGET_MINIMAL_TOC
23441 && (crtl->calls_eh_return
23442 || df_regs_ever_live_p (reg)
23443 || !constant_pool_empty_p ()))
23444 return true;
23446 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
23447 && flag_pic)
23448 return true;
23451 return !call_used_regs[reg] && df_regs_ever_live_p (reg);
23454 /* Return the first fixed-point register that is required to be
23455 saved. 32 if none. */
23458 first_reg_to_save (void)
23460 int first_reg;
23462 /* Find lowest numbered live register. */
23463 for (first_reg = 13; first_reg <= 31; first_reg++)
23464 if (save_reg_p (first_reg))
23465 break;
23467 #if TARGET_MACHO
23468 if (flag_pic
23469 && crtl->uses_pic_offset_table
23470 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
23471 return RS6000_PIC_OFFSET_TABLE_REGNUM;
23472 #endif
23474 return first_reg;
23477 /* Similar, for FP regs. */
23480 first_fp_reg_to_save (void)
23482 int first_reg;
23484 /* Find lowest numbered live register. */
23485 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
23486 if (save_reg_p (first_reg))
23487 break;
23489 return first_reg;
23492 /* Similar, for AltiVec regs. */
23494 static int
23495 first_altivec_reg_to_save (void)
23497 int i;
23499 /* Stack frame remains as is unless we are in AltiVec ABI. */
23500 if (! TARGET_ALTIVEC_ABI)
23501 return LAST_ALTIVEC_REGNO + 1;
23503 /* On Darwin, the unwind routines are compiled without
23504 TARGET_ALTIVEC, and use save_world to save/restore the
23505 altivec registers when necessary. */
23506 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23507 && ! TARGET_ALTIVEC)
23508 return FIRST_ALTIVEC_REGNO + 20;
23510 /* Find lowest numbered live register. */
23511 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
23512 if (save_reg_p (i))
23513 break;
23515 return i;
23518 /* Return a 32-bit mask of the AltiVec registers we need to set in
23519 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
23520 the 32-bit word is 0. */
23522 static unsigned int
23523 compute_vrsave_mask (void)
23525 unsigned int i, mask = 0;
23527 /* On Darwin, the unwind routines are compiled without
23528 TARGET_ALTIVEC, and use save_world to save/restore the
23529 call-saved altivec registers when necessary. */
23530 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
23531 && ! TARGET_ALTIVEC)
23532 mask |= 0xFFF;
23534 /* First, find out if we use _any_ altivec registers. */
23535 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
23536 if (df_regs_ever_live_p (i))
23537 mask |= ALTIVEC_REG_BIT (i);
23539 if (mask == 0)
23540 return mask;
23542 /* Next, remove the argument registers from the set. These must
23543 be in the VRSAVE mask set by the caller, so we don't need to add
23544 them in again. More importantly, the mask we compute here is
23545 used to generate CLOBBERs in the set_vrsave insn, and we do not
23546 wish the argument registers to die. */
23547 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
23548 mask &= ~ALTIVEC_REG_BIT (i);
23550 /* Similarly, remove the return value from the set. */
23552 bool yes = false;
23553 diddle_return_value (is_altivec_return_reg, &yes);
23554 if (yes)
23555 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
23558 return mask;
23561 /* For a very restricted set of circumstances, we can cut down the
23562 size of prologues/epilogues by calling our own save/restore-the-world
23563 routines. */
23565 static void
23566 compute_save_world_info (rs6000_stack_t *info)
23568 info->world_save_p = 1;
23569 info->world_save_p
23570 = (WORLD_SAVE_P (info)
23571 && DEFAULT_ABI == ABI_DARWIN
23572 && !cfun->has_nonlocal_label
23573 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
23574 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
23575 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
23576 && info->cr_save_p);
23578 /* This will not work in conjunction with sibcalls. Make sure there
23579 are none. (This check is expensive, but seldom executed.) */
23580 if (WORLD_SAVE_P (info))
23582 rtx_insn *insn;
23583 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
23584 if (CALL_P (insn) && SIBLING_CALL_P (insn))
23586 info->world_save_p = 0;
23587 break;
23591 if (WORLD_SAVE_P (info))
23593 /* Even if we're not touching VRsave, make sure there's room on the
23594 stack for it, if it looks like we're calling SAVE_WORLD, which
23595 will attempt to save it. */
23596 info->vrsave_size = 4;
23598 /* If we are going to save the world, we need to save the link register too. */
23599 info->lr_save_p = 1;
23601 /* "Save" the VRsave register too if we're saving the world. */
23602 if (info->vrsave_mask == 0)
23603 info->vrsave_mask = compute_vrsave_mask ();
23605 /* Because the Darwin register save/restore routines only handle
23606 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
23607 check. */
23608 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
23609 && (info->first_altivec_reg_save
23610 >= FIRST_SAVED_ALTIVEC_REGNO));
23613 return;
23617 static void
23618 is_altivec_return_reg (rtx reg, void *xyes)
23620 bool *yes = (bool *) xyes;
23621 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
23622 *yes = true;
23626 /* Return whether REG is a global user reg or has been specifed by
23627 -ffixed-REG. We should not restore these, and so cannot use
23628 lmw or out-of-line restore functions if there are any. We also
23629 can't save them (well, emit frame notes for them), because frame
23630 unwinding during exception handling will restore saved registers. */
23632 static bool
23633 fixed_reg_p (int reg)
23635 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
23636 backend sets it, overriding anything the user might have given. */
23637 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23638 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
23639 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
23640 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
23641 return false;
23643 return fixed_regs[reg];
23646 /* Determine the strategy for savings/restoring registers. */
23648 enum {
23649 SAVE_MULTIPLE = 0x1,
23650 SAVE_INLINE_GPRS = 0x2,
23651 SAVE_INLINE_FPRS = 0x4,
23652 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
23653 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
23654 SAVE_INLINE_VRS = 0x20,
23655 REST_MULTIPLE = 0x100,
23656 REST_INLINE_GPRS = 0x200,
23657 REST_INLINE_FPRS = 0x400,
23658 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
23659 REST_INLINE_VRS = 0x1000
23662 static int
23663 rs6000_savres_strategy (rs6000_stack_t *info,
23664 bool using_static_chain_p)
23666 int strategy = 0;
23668 /* Select between in-line and out-of-line save and restore of regs.
23669 First, all the obvious cases where we don't use out-of-line. */
23670 if (crtl->calls_eh_return
23671 || cfun->machine->ra_need_lr)
23672 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
23673 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
23674 | SAVE_INLINE_VRS | REST_INLINE_VRS);
23676 if (info->first_gp_reg_save == 32)
23677 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23679 if (info->first_fp_reg_save == 64)
23680 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23682 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
23683 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23685 /* Define cutoff for using out-of-line functions to save registers. */
23686 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
23688 if (!optimize_size)
23690 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23691 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23692 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23694 else
23696 /* Prefer out-of-line restore if it will exit. */
23697 if (info->first_fp_reg_save > 61)
23698 strategy |= SAVE_INLINE_FPRS;
23699 if (info->first_gp_reg_save > 29)
23701 if (info->first_fp_reg_save == 64)
23702 strategy |= SAVE_INLINE_GPRS;
23703 else
23704 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23706 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
23707 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23710 else if (DEFAULT_ABI == ABI_DARWIN)
23712 if (info->first_fp_reg_save > 60)
23713 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23714 if (info->first_gp_reg_save > 29)
23715 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23716 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23718 else
23720 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
23721 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
23722 || info->first_fp_reg_save > 61)
23723 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
23724 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23725 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
23728 /* Don't bother to try to save things out-of-line if r11 is occupied
23729 by the static chain. It would require too much fiddling and the
23730 static chain is rarely used anyway. FPRs are saved w.r.t the stack
23731 pointer on Darwin, and AIX uses r1 or r12. */
23732 if (using_static_chain_p
23733 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
23734 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
23735 | SAVE_INLINE_GPRS
23736 | SAVE_INLINE_VRS);
23738 /* Don't ever restore fixed regs. That means we can't use the
23739 out-of-line register restore functions if a fixed reg is in the
23740 range of regs restored. */
23741 if (!(strategy & REST_INLINE_FPRS))
23742 for (int i = info->first_fp_reg_save; i < 64; i++)
23743 if (fixed_regs[i])
23745 strategy |= REST_INLINE_FPRS;
23746 break;
23749 /* We can only use the out-of-line routines to restore fprs if we've
23750 saved all the registers from first_fp_reg_save in the prologue.
23751 Otherwise, we risk loading garbage. Of course, if we have saved
23752 out-of-line then we know we haven't skipped any fprs. */
23753 if ((strategy & SAVE_INLINE_FPRS)
23754 && !(strategy & REST_INLINE_FPRS))
23755 for (int i = info->first_fp_reg_save; i < 64; i++)
23756 if (!save_reg_p (i))
23758 strategy |= REST_INLINE_FPRS;
23759 break;
23762 /* Similarly, for altivec regs. */
23763 if (!(strategy & REST_INLINE_VRS))
23764 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
23765 if (fixed_regs[i])
23767 strategy |= REST_INLINE_VRS;
23768 break;
23771 if ((strategy & SAVE_INLINE_VRS)
23772 && !(strategy & REST_INLINE_VRS))
23773 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
23774 if (!save_reg_p (i))
23776 strategy |= REST_INLINE_VRS;
23777 break;
23780 /* info->lr_save_p isn't yet set if the only reason lr needs to be
23781 saved is an out-of-line save or restore. Set up the value for
23782 the next test (excluding out-of-line gprs). */
23783 bool lr_save_p = (info->lr_save_p
23784 || !(strategy & SAVE_INLINE_FPRS)
23785 || !(strategy & SAVE_INLINE_VRS)
23786 || !(strategy & REST_INLINE_FPRS)
23787 || !(strategy & REST_INLINE_VRS));
23789 if (TARGET_MULTIPLE
23790 && !TARGET_POWERPC64
23791 && info->first_gp_reg_save < 31
23792 && !(flag_shrink_wrap
23793 && flag_shrink_wrap_separate
23794 && optimize_function_for_speed_p (cfun)))
23796 int count = 0;
23797 for (int i = info->first_gp_reg_save; i < 32; i++)
23798 if (save_reg_p (i))
23799 count++;
23801 if (count <= 1)
23802 /* Don't use store multiple if only one reg needs to be
23803 saved. This can occur for example when the ABI_V4 pic reg
23804 (r30) needs to be saved to make calls, but r31 is not
23805 used. */
23806 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23807 else
23809 /* Prefer store multiple for saves over out-of-line
23810 routines, since the store-multiple instruction will
23811 always be smaller. */
23812 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
23814 /* The situation is more complicated with load multiple.
23815 We'd prefer to use the out-of-line routines for restores,
23816 since the "exit" out-of-line routines can handle the
23817 restore of LR and the frame teardown. However if doesn't
23818 make sense to use the out-of-line routine if that is the
23819 only reason we'd need to save LR, and we can't use the
23820 "exit" out-of-line gpr restore if we have saved some
23821 fprs; In those cases it is advantageous to use load
23822 multiple when available. */
23823 if (info->first_fp_reg_save != 64 || !lr_save_p)
23824 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
23828 /* Using the "exit" out-of-line routine does not improve code size
23829 if using it would require lr to be saved and if only saving one
23830 or two gprs. */
23831 else if (!lr_save_p && info->first_gp_reg_save > 29)
23832 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
23834 /* Don't ever restore fixed regs. */
23835 if ((strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
23836 for (int i = info->first_gp_reg_save; i < 32; i++)
23837 if (fixed_reg_p (i))
23839 strategy |= REST_INLINE_GPRS;
23840 strategy &= ~REST_MULTIPLE;
23841 break;
23844 /* We can only use load multiple or the out-of-line routines to
23845 restore gprs if we've saved all the registers from
23846 first_gp_reg_save. Otherwise, we risk loading garbage.
23847 Of course, if we have saved out-of-line or used stmw then we know
23848 we haven't skipped any gprs. */
23849 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
23850 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
23851 for (int i = info->first_gp_reg_save; i < 32; i++)
23852 if (!save_reg_p (i))
23854 strategy |= REST_INLINE_GPRS;
23855 strategy &= ~REST_MULTIPLE;
23856 break;
23859 if (TARGET_ELF && TARGET_64BIT)
23861 if (!(strategy & SAVE_INLINE_FPRS))
23862 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23863 else if (!(strategy & SAVE_INLINE_GPRS)
23864 && info->first_fp_reg_save == 64)
23865 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
23867 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
23868 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
23870 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
23871 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
23873 return strategy;
23876 /* Calculate the stack information for the current function. This is
23877 complicated by having two separate calling sequences, the AIX calling
23878 sequence and the V.4 calling sequence.
23880 AIX (and Darwin/Mac OS X) stack frames look like:
23881 32-bit 64-bit
23882 SP----> +---------------------------------------+
23883 | back chain to caller | 0 0
23884 +---------------------------------------+
23885 | saved CR | 4 8 (8-11)
23886 +---------------------------------------+
23887 | saved LR | 8 16
23888 +---------------------------------------+
23889 | reserved for compilers | 12 24
23890 +---------------------------------------+
23891 | reserved for binders | 16 32
23892 +---------------------------------------+
23893 | saved TOC pointer | 20 40
23894 +---------------------------------------+
23895 | Parameter save area (+padding*) (P) | 24 48
23896 +---------------------------------------+
23897 | Alloca space (A) | 24+P etc.
23898 +---------------------------------------+
23899 | Local variable space (L) | 24+P+A
23900 +---------------------------------------+
23901 | Float/int conversion temporary (X) | 24+P+A+L
23902 +---------------------------------------+
23903 | Save area for AltiVec registers (W) | 24+P+A+L+X
23904 +---------------------------------------+
23905 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
23906 +---------------------------------------+
23907 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
23908 +---------------------------------------+
23909 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
23910 +---------------------------------------+
23911 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
23912 +---------------------------------------+
23913 old SP->| back chain to caller's caller |
23914 +---------------------------------------+
23916 * If the alloca area is present, the parameter save area is
23917 padded so that the former starts 16-byte aligned.
23919 The required alignment for AIX configurations is two words (i.e., 8
23920 or 16 bytes).
23922 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
23924 SP----> +---------------------------------------+
23925 | Back chain to caller | 0
23926 +---------------------------------------+
23927 | Save area for CR | 8
23928 +---------------------------------------+
23929 | Saved LR | 16
23930 +---------------------------------------+
23931 | Saved TOC pointer | 24
23932 +---------------------------------------+
23933 | Parameter save area (+padding*) (P) | 32
23934 +---------------------------------------+
23935 | Alloca space (A) | 32+P
23936 +---------------------------------------+
23937 | Local variable space (L) | 32+P+A
23938 +---------------------------------------+
23939 | Save area for AltiVec registers (W) | 32+P+A+L
23940 +---------------------------------------+
23941 | AltiVec alignment padding (Y) | 32+P+A+L+W
23942 +---------------------------------------+
23943 | Save area for GP registers (G) | 32+P+A+L+W+Y
23944 +---------------------------------------+
23945 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
23946 +---------------------------------------+
23947 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
23948 +---------------------------------------+
23950 * If the alloca area is present, the parameter save area is
23951 padded so that the former starts 16-byte aligned.
23953 V.4 stack frames look like:
23955 SP----> +---------------------------------------+
23956 | back chain to caller | 0
23957 +---------------------------------------+
23958 | caller's saved LR | 4
23959 +---------------------------------------+
23960 | Parameter save area (+padding*) (P) | 8
23961 +---------------------------------------+
23962 | Alloca space (A) | 8+P
23963 +---------------------------------------+
23964 | Varargs save area (V) | 8+P+A
23965 +---------------------------------------+
23966 | Local variable space (L) | 8+P+A+V
23967 +---------------------------------------+
23968 | Float/int conversion temporary (X) | 8+P+A+V+L
23969 +---------------------------------------+
23970 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
23971 +---------------------------------------+
23972 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
23973 +---------------------------------------+
23974 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
23975 +---------------------------------------+
23976 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
23977 +---------------------------------------+
23978 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
23979 +---------------------------------------+
23980 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
23981 +---------------------------------------+
23982 old SP->| back chain to caller's caller |
23983 +---------------------------------------+
23985 * If the alloca area is present and the required alignment is
23986 16 bytes, the parameter save area is padded so that the
23987 alloca area starts 16-byte aligned.
23989 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
23990 given. (But note below and in sysv4.h that we require only 8 and
23991 may round up the size of our stack frame anyways. The historical
23992 reason is early versions of powerpc-linux which didn't properly
23993 align the stack at program startup. A happy side-effect is that
23994 -mno-eabi libraries can be used with -meabi programs.)
23996 The EABI configuration defaults to the V.4 layout. However,
23997 the stack alignment requirements may differ. If -mno-eabi is not
23998 given, the required stack alignment is 8 bytes; if -mno-eabi is
23999 given, the required alignment is 16 bytes. (But see V.4 comment
24000 above.) */
24002 #ifndef ABI_STACK_BOUNDARY
24003 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
24004 #endif
24006 static rs6000_stack_t *
24007 rs6000_stack_info (void)
24009 /* We should never be called for thunks, we are not set up for that. */
24010 gcc_assert (!cfun->is_thunk);
24012 rs6000_stack_t *info = &stack_info;
24013 int reg_size = TARGET_32BIT ? 4 : 8;
24014 int ehrd_size;
24015 int ehcr_size;
24016 int save_align;
24017 int first_gp;
24018 HOST_WIDE_INT non_fixed_size;
24019 bool using_static_chain_p;
24021 if (reload_completed && info->reload_completed)
24022 return info;
24024 memset (info, 0, sizeof (*info));
24025 info->reload_completed = reload_completed;
24027 /* Select which calling sequence. */
24028 info->abi = DEFAULT_ABI;
24030 /* Calculate which registers need to be saved & save area size. */
24031 info->first_gp_reg_save = first_reg_to_save ();
24032 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
24033 even if it currently looks like we won't. Reload may need it to
24034 get at a constant; if so, it will have already created a constant
24035 pool entry for it. */
24036 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
24037 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
24038 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
24039 && crtl->uses_const_pool
24040 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
24041 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
24042 else
24043 first_gp = info->first_gp_reg_save;
24045 info->gp_size = reg_size * (32 - first_gp);
24047 info->first_fp_reg_save = first_fp_reg_to_save ();
24048 info->fp_size = 8 * (64 - info->first_fp_reg_save);
24050 info->first_altivec_reg_save = first_altivec_reg_to_save ();
24051 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
24052 - info->first_altivec_reg_save);
24054 /* Does this function call anything? */
24055 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
24057 /* Determine if we need to save the condition code registers. */
24058 if (save_reg_p (CR2_REGNO)
24059 || save_reg_p (CR3_REGNO)
24060 || save_reg_p (CR4_REGNO))
24062 info->cr_save_p = 1;
24063 if (DEFAULT_ABI == ABI_V4)
24064 info->cr_size = reg_size;
24067 /* If the current function calls __builtin_eh_return, then we need
24068 to allocate stack space for registers that will hold data for
24069 the exception handler. */
24070 if (crtl->calls_eh_return)
24072 unsigned int i;
24073 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
24074 continue;
24076 ehrd_size = i * UNITS_PER_WORD;
24078 else
24079 ehrd_size = 0;
24081 /* In the ELFv2 ABI, we also need to allocate space for separate
24082 CR field save areas if the function calls __builtin_eh_return. */
24083 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24085 /* This hard-codes that we have three call-saved CR fields. */
24086 ehcr_size = 3 * reg_size;
24087 /* We do *not* use the regular CR save mechanism. */
24088 info->cr_save_p = 0;
24090 else
24091 ehcr_size = 0;
24093 /* Determine various sizes. */
24094 info->reg_size = reg_size;
24095 info->fixed_size = RS6000_SAVE_AREA;
24096 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
24097 if (cfun->calls_alloca)
24098 info->parm_size =
24099 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
24100 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
24101 else
24102 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
24103 TARGET_ALTIVEC ? 16 : 8);
24104 if (FRAME_GROWS_DOWNWARD)
24105 info->vars_size
24106 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
24107 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
24108 - (info->fixed_size + info->vars_size + info->parm_size);
24110 if (TARGET_ALTIVEC_ABI)
24111 info->vrsave_mask = compute_vrsave_mask ();
24113 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
24114 info->vrsave_size = 4;
24116 compute_save_world_info (info);
24118 /* Calculate the offsets. */
24119 switch (DEFAULT_ABI)
24121 case ABI_NONE:
24122 default:
24123 gcc_unreachable ();
24125 case ABI_AIX:
24126 case ABI_ELFv2:
24127 case ABI_DARWIN:
24128 info->fp_save_offset = -info->fp_size;
24129 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24131 if (TARGET_ALTIVEC_ABI)
24133 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
24135 /* Align stack so vector save area is on a quadword boundary.
24136 The padding goes above the vectors. */
24137 if (info->altivec_size != 0)
24138 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
24140 info->altivec_save_offset = info->vrsave_save_offset
24141 - info->altivec_padding_size
24142 - info->altivec_size;
24143 gcc_assert (info->altivec_size == 0
24144 || info->altivec_save_offset % 16 == 0);
24146 /* Adjust for AltiVec case. */
24147 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
24149 else
24150 info->ehrd_offset = info->gp_save_offset - ehrd_size;
24152 info->ehcr_offset = info->ehrd_offset - ehcr_size;
24153 info->cr_save_offset = reg_size; /* first word when 64-bit. */
24154 info->lr_save_offset = 2*reg_size;
24155 break;
24157 case ABI_V4:
24158 info->fp_save_offset = -info->fp_size;
24159 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24160 info->cr_save_offset = info->gp_save_offset - info->cr_size;
24162 if (TARGET_ALTIVEC_ABI)
24164 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
24166 /* Align stack so vector save area is on a quadword boundary. */
24167 if (info->altivec_size != 0)
24168 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
24170 info->altivec_save_offset = info->vrsave_save_offset
24171 - info->altivec_padding_size
24172 - info->altivec_size;
24174 /* Adjust for AltiVec case. */
24175 info->ehrd_offset = info->altivec_save_offset;
24177 else
24178 info->ehrd_offset = info->cr_save_offset;
24180 info->ehrd_offset -= ehrd_size;
24181 info->lr_save_offset = reg_size;
24184 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
24185 info->save_size = RS6000_ALIGN (info->fp_size
24186 + info->gp_size
24187 + info->altivec_size
24188 + info->altivec_padding_size
24189 + ehrd_size
24190 + ehcr_size
24191 + info->cr_size
24192 + info->vrsave_size,
24193 save_align);
24195 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
24197 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
24198 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
24200 /* Determine if we need to save the link register. */
24201 if (info->calls_p
24202 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24203 && crtl->profile
24204 && !TARGET_PROFILE_KERNEL)
24205 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
24206 #ifdef TARGET_RELOCATABLE
24207 || (DEFAULT_ABI == ABI_V4
24208 && (TARGET_RELOCATABLE || flag_pic > 1)
24209 && !constant_pool_empty_p ())
24210 #endif
24211 || rs6000_ra_ever_killed ())
24212 info->lr_save_p = 1;
24214 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24215 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24216 && call_used_regs[STATIC_CHAIN_REGNUM]);
24217 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
24219 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
24220 || !(info->savres_strategy & SAVE_INLINE_FPRS)
24221 || !(info->savres_strategy & SAVE_INLINE_VRS)
24222 || !(info->savres_strategy & REST_INLINE_GPRS)
24223 || !(info->savres_strategy & REST_INLINE_FPRS)
24224 || !(info->savres_strategy & REST_INLINE_VRS))
24225 info->lr_save_p = 1;
24227 if (info->lr_save_p)
24228 df_set_regs_ever_live (LR_REGNO, true);
24230 /* Determine if we need to allocate any stack frame:
24232 For AIX we need to push the stack if a frame pointer is needed
24233 (because the stack might be dynamically adjusted), if we are
24234 debugging, if we make calls, or if the sum of fp_save, gp_save,
24235 and local variables are more than the space needed to save all
24236 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
24237 + 18*8 = 288 (GPR13 reserved).
24239 For V.4 we don't have the stack cushion that AIX uses, but assume
24240 that the debugger can handle stackless frames. */
24242 if (info->calls_p)
24243 info->push_p = 1;
24245 else if (DEFAULT_ABI == ABI_V4)
24246 info->push_p = non_fixed_size != 0;
24248 else if (frame_pointer_needed)
24249 info->push_p = 1;
24251 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
24252 info->push_p = 1;
24254 else
24255 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
24257 return info;
24260 static void
24261 debug_stack_info (rs6000_stack_t *info)
24263 const char *abi_string;
24265 if (! info)
24266 info = rs6000_stack_info ();
24268 fprintf (stderr, "\nStack information for function %s:\n",
24269 ((current_function_decl && DECL_NAME (current_function_decl))
24270 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
24271 : "<unknown>"));
24273 switch (info->abi)
24275 default: abi_string = "Unknown"; break;
24276 case ABI_NONE: abi_string = "NONE"; break;
24277 case ABI_AIX: abi_string = "AIX"; break;
24278 case ABI_ELFv2: abi_string = "ELFv2"; break;
24279 case ABI_DARWIN: abi_string = "Darwin"; break;
24280 case ABI_V4: abi_string = "V.4"; break;
24283 fprintf (stderr, "\tABI = %5s\n", abi_string);
24285 if (TARGET_ALTIVEC_ABI)
24286 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
24288 if (info->first_gp_reg_save != 32)
24289 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
24291 if (info->first_fp_reg_save != 64)
24292 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
24294 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
24295 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
24296 info->first_altivec_reg_save);
24298 if (info->lr_save_p)
24299 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
24301 if (info->cr_save_p)
24302 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
24304 if (info->vrsave_mask)
24305 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
24307 if (info->push_p)
24308 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
24310 if (info->calls_p)
24311 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
24313 if (info->gp_size)
24314 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
24316 if (info->fp_size)
24317 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
24319 if (info->altivec_size)
24320 fprintf (stderr, "\taltivec_save_offset = %5d\n",
24321 info->altivec_save_offset);
24323 if (info->vrsave_size)
24324 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
24325 info->vrsave_save_offset);
24327 if (info->lr_save_p)
24328 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
24330 if (info->cr_save_p)
24331 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
24333 if (info->varargs_save_offset)
24334 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
24336 if (info->total_size)
24337 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24338 info->total_size);
24340 if (info->vars_size)
24341 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
24342 info->vars_size);
24344 if (info->parm_size)
24345 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
24347 if (info->fixed_size)
24348 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
24350 if (info->gp_size)
24351 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
24353 if (info->fp_size)
24354 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
24356 if (info->altivec_size)
24357 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
24359 if (info->vrsave_size)
24360 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
24362 if (info->altivec_padding_size)
24363 fprintf (stderr, "\taltivec_padding_size= %5d\n",
24364 info->altivec_padding_size);
24366 if (info->cr_size)
24367 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
24369 if (info->save_size)
24370 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
24372 if (info->reg_size != 4)
24373 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
24375 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
24377 fprintf (stderr, "\n");
24381 rs6000_return_addr (int count, rtx frame)
24383 /* We can't use get_hard_reg_initial_val for LR when count == 0 if LR
24384 is trashed by the prologue, as it is for PIC on ABI_V4 and Darwin. */
24385 if (count != 0
24386 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
24388 cfun->machine->ra_needs_full_frame = 1;
24390 if (count == 0)
24391 /* FRAME is set to frame_pointer_rtx by the generic code, but that
24392 is good for loading 0(r1) only when !FRAME_GROWS_DOWNWARD. */
24393 frame = stack_pointer_rtx;
24394 rtx prev_frame_addr = memory_address (Pmode, frame);
24395 rtx prev_frame = copy_to_reg (gen_rtx_MEM (Pmode, prev_frame_addr));
24396 rtx lr_save_off = plus_constant (Pmode,
24397 prev_frame, RETURN_ADDRESS_OFFSET);
24398 rtx lr_save_addr = memory_address (Pmode, lr_save_off);
24399 return gen_rtx_MEM (Pmode, lr_save_addr);
24402 cfun->machine->ra_need_lr = 1;
24403 return get_hard_reg_initial_val (Pmode, LR_REGNO);
24406 /* Say whether a function is a candidate for sibcall handling or not. */
24408 static bool
24409 rs6000_function_ok_for_sibcall (tree decl, tree exp)
24411 tree fntype;
24413 if (decl)
24414 fntype = TREE_TYPE (decl);
24415 else
24416 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
24418 /* We can't do it if the called function has more vector parameters
24419 than the current function; there's nowhere to put the VRsave code. */
24420 if (TARGET_ALTIVEC_ABI
24421 && TARGET_ALTIVEC_VRSAVE
24422 && !(decl && decl == current_function_decl))
24424 function_args_iterator args_iter;
24425 tree type;
24426 int nvreg = 0;
24428 /* Functions with vector parameters are required to have a
24429 prototype, so the argument type info must be available
24430 here. */
24431 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
24432 if (TREE_CODE (type) == VECTOR_TYPE
24433 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24434 nvreg++;
24436 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
24437 if (TREE_CODE (type) == VECTOR_TYPE
24438 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
24439 nvreg--;
24441 if (nvreg > 0)
24442 return false;
24445 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
24446 functions, because the callee may have a different TOC pointer to
24447 the caller and there's no way to ensure we restore the TOC when
24448 we return. With the secure-plt SYSV ABI we can't make non-local
24449 calls when -fpic/PIC because the plt call stubs use r30. */
24450 if (DEFAULT_ABI == ABI_DARWIN
24451 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24452 && decl
24453 && !DECL_EXTERNAL (decl)
24454 && !DECL_WEAK (decl)
24455 && (*targetm.binds_local_p) (decl))
24456 || (DEFAULT_ABI == ABI_V4
24457 && (!TARGET_SECURE_PLT
24458 || !flag_pic
24459 || (decl
24460 && (*targetm.binds_local_p) (decl)))))
24462 tree attr_list = TYPE_ATTRIBUTES (fntype);
24464 if (!lookup_attribute ("longcall", attr_list)
24465 || lookup_attribute ("shortcall", attr_list))
24466 return true;
24469 return false;
24472 static int
24473 rs6000_ra_ever_killed (void)
24475 rtx_insn *top;
24476 rtx reg;
24477 rtx_insn *insn;
24479 if (cfun->is_thunk)
24480 return 0;
24482 if (cfun->machine->lr_save_state)
24483 return cfun->machine->lr_save_state - 1;
24485 /* regs_ever_live has LR marked as used if any sibcalls are present,
24486 but this should not force saving and restoring in the
24487 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
24488 clobbers LR, so that is inappropriate. */
24490 /* Also, the prologue can generate a store into LR that
24491 doesn't really count, like this:
24493 move LR->R0
24494 bcl to set PIC register
24495 move LR->R31
24496 move R0->LR
24498 When we're called from the epilogue, we need to avoid counting
24499 this as a store. */
24501 push_topmost_sequence ();
24502 top = get_insns ();
24503 pop_topmost_sequence ();
24504 reg = gen_rtx_REG (Pmode, LR_REGNO);
24506 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
24508 if (INSN_P (insn))
24510 if (CALL_P (insn))
24512 if (!SIBLING_CALL_P (insn))
24513 return 1;
24515 else if (find_regno_note (insn, REG_INC, LR_REGNO))
24516 return 1;
24517 else if (set_of (reg, insn) != NULL_RTX
24518 && !prologue_epilogue_contains (insn))
24519 return 1;
24522 return 0;
24525 /* Emit instructions needed to load the TOC register.
24526 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
24527 a constant pool; or for SVR4 -fpic. */
24529 void
24530 rs6000_emit_load_toc_table (int fromprolog)
24532 rtx dest;
24533 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
24535 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
24537 char buf[30];
24538 rtx lab, tmp1, tmp2, got;
24540 lab = gen_label_rtx ();
24541 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
24542 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24543 if (flag_pic == 2)
24545 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24546 need_toc_init = 1;
24548 else
24549 got = rs6000_got_sym ();
24550 tmp1 = tmp2 = dest;
24551 if (!fromprolog)
24553 tmp1 = gen_reg_rtx (Pmode);
24554 tmp2 = gen_reg_rtx (Pmode);
24556 emit_insn (gen_load_toc_v4_PIC_1 (lab));
24557 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
24558 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
24559 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
24561 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
24563 emit_insn (gen_load_toc_v4_pic_si ());
24564 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24566 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
24568 char buf[30];
24569 rtx temp0 = (fromprolog
24570 ? gen_rtx_REG (Pmode, 0)
24571 : gen_reg_rtx (Pmode));
24573 if (fromprolog)
24575 rtx symF, symL;
24577 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
24578 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24580 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
24581 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
24583 emit_insn (gen_load_toc_v4_PIC_1 (symF));
24584 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24585 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
24587 else
24589 rtx tocsym, lab;
24591 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24592 need_toc_init = 1;
24593 lab = gen_label_rtx ();
24594 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
24595 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
24596 if (TARGET_LINK_STACK)
24597 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
24598 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
24600 emit_insn (gen_addsi3 (dest, temp0, dest));
24602 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
24604 /* This is for AIX code running in non-PIC ELF32. */
24605 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
24607 need_toc_init = 1;
24608 emit_insn (gen_elf_high (dest, realsym));
24609 emit_insn (gen_elf_low (dest, dest, realsym));
24611 else
24613 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24615 if (TARGET_32BIT)
24616 emit_insn (gen_load_toc_aix_si (dest));
24617 else
24618 emit_insn (gen_load_toc_aix_di (dest));
24622 /* Emit instructions to restore the link register after determining where
24623 its value has been stored. */
24625 void
24626 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
24628 rs6000_stack_t *info = rs6000_stack_info ();
24629 rtx operands[2];
24631 operands[0] = source;
24632 operands[1] = scratch;
24634 if (info->lr_save_p)
24636 rtx frame_rtx = stack_pointer_rtx;
24637 HOST_WIDE_INT sp_offset = 0;
24638 rtx tmp;
24640 if (frame_pointer_needed
24641 || cfun->calls_alloca
24642 || info->total_size > 32767)
24644 tmp = gen_frame_mem (Pmode, frame_rtx);
24645 emit_move_insn (operands[1], tmp);
24646 frame_rtx = operands[1];
24648 else if (info->push_p)
24649 sp_offset = info->total_size;
24651 tmp = plus_constant (Pmode, frame_rtx,
24652 info->lr_save_offset + sp_offset);
24653 tmp = gen_frame_mem (Pmode, tmp);
24654 emit_move_insn (tmp, operands[0]);
24656 else
24657 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
24659 /* Freeze lr_save_p. We've just emitted rtl that depends on the
24660 state of lr_save_p so any change from here on would be a bug. In
24661 particular, stop rs6000_ra_ever_killed from considering the SET
24662 of lr we may have added just above. */
24663 cfun->machine->lr_save_state = info->lr_save_p + 1;
24666 static GTY(()) alias_set_type set = -1;
24668 alias_set_type
24669 get_TOC_alias_set (void)
24671 if (set == -1)
24672 set = new_alias_set ();
24673 return set;
24676 /* This returns nonzero if the current function uses the TOC. This is
24677 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
24678 is generated by the ABI_V4 load_toc_* patterns.
24679 Return 2 instead of 1 if the load_toc_* pattern is in the function
24680 partition that doesn't start the function. */
24681 #if TARGET_ELF
24682 static int
24683 uses_TOC (void)
24685 rtx_insn *insn;
24686 int ret = 1;
24688 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24690 if (INSN_P (insn))
24692 rtx pat = PATTERN (insn);
24693 int i;
24695 if (GET_CODE (pat) == PARALLEL)
24696 for (i = 0; i < XVECLEN (pat, 0); i++)
24698 rtx sub = XVECEXP (pat, 0, i);
24699 if (GET_CODE (sub) == USE)
24701 sub = XEXP (sub, 0);
24702 if (GET_CODE (sub) == UNSPEC
24703 && XINT (sub, 1) == UNSPEC_TOC)
24704 return ret;
24708 else if (crtl->has_bb_partition
24709 && NOTE_P (insn)
24710 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
24711 ret = 2;
24713 return 0;
24715 #endif
24718 create_TOC_reference (rtx symbol, rtx largetoc_reg)
24720 rtx tocrel, tocreg, hi;
24722 if (TARGET_DEBUG_ADDR)
24724 if (GET_CODE (symbol) == SYMBOL_REF)
24725 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
24726 XSTR (symbol, 0));
24727 else
24729 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
24730 GET_RTX_NAME (GET_CODE (symbol)));
24731 debug_rtx (symbol);
24735 if (!can_create_pseudo_p ())
24736 df_set_regs_ever_live (TOC_REGISTER, true);
24738 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
24739 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
24740 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
24741 return tocrel;
24743 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
24744 if (largetoc_reg != NULL)
24746 emit_move_insn (largetoc_reg, hi);
24747 hi = largetoc_reg;
24749 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
24752 /* Issue assembly directives that create a reference to the given DWARF
24753 FRAME_TABLE_LABEL from the current function section. */
24754 void
24755 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
24757 fprintf (asm_out_file, "\t.ref %s\n",
24758 (* targetm.strip_name_encoding) (frame_table_label));
24761 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
24762 and the change to the stack pointer. */
24764 static void
24765 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
24767 rtvec p;
24768 int i;
24769 rtx regs[3];
24771 i = 0;
24772 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24773 if (hard_frame_needed)
24774 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
24775 if (!(REGNO (fp) == STACK_POINTER_REGNUM
24776 || (hard_frame_needed
24777 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
24778 regs[i++] = fp;
24780 p = rtvec_alloc (i);
24781 while (--i >= 0)
24783 rtx mem = gen_frame_mem (BLKmode, regs[i]);
24784 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
24787 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
24790 /* Allocate SIZE_INT bytes on the stack using a store with update style insn
24791 and set the appropriate attributes for the generated insn. Return the
24792 first insn which adjusts the stack pointer or the last insn before
24793 the stack adjustment loop.
24795 SIZE_INT is used to create the CFI note for the allocation.
24797 SIZE_RTX is an rtx containing the size of the adjustment. Note that
24798 since stacks grow to lower addresses its runtime value is -SIZE_INT.
24800 ORIG_SP contains the backchain value that must be stored at *sp. */
24802 static rtx_insn *
24803 rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp)
24805 rtx_insn *insn;
24807 rtx size_rtx = GEN_INT (-size_int);
24808 if (size_int > 32767)
24810 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
24811 /* Need a note here so that try_split doesn't get confused. */
24812 if (get_last_insn () == NULL_RTX)
24813 emit_note (NOTE_INSN_DELETED);
24814 insn = emit_move_insn (tmp_reg, size_rtx);
24815 try_split (PATTERN (insn), insn, 0);
24816 size_rtx = tmp_reg;
24819 if (Pmode == SImode)
24820 insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
24821 stack_pointer_rtx,
24822 size_rtx,
24823 orig_sp));
24824 else
24825 insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx,
24826 stack_pointer_rtx,
24827 size_rtx,
24828 orig_sp));
24829 rtx par = PATTERN (insn);
24830 gcc_assert (GET_CODE (par) == PARALLEL);
24831 rtx set = XVECEXP (par, 0, 0);
24832 gcc_assert (GET_CODE (set) == SET);
24833 rtx mem = SET_DEST (set);
24834 gcc_assert (MEM_P (mem));
24835 MEM_NOTRAP_P (mem) = 1;
24836 set_mem_alias_set (mem, get_frame_alias_set ());
24838 RTX_FRAME_RELATED_P (insn) = 1;
24839 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24840 gen_rtx_SET (stack_pointer_rtx,
24841 gen_rtx_PLUS (Pmode,
24842 stack_pointer_rtx,
24843 GEN_INT (-size_int))));
24845 /* Emit a blockage to ensure the allocation/probing insns are
24846 not optimized, combined, removed, etc. Add REG_STACK_CHECK
24847 note for similar reasons. */
24848 if (flag_stack_clash_protection)
24850 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
24851 emit_insn (gen_blockage ());
24854 return insn;
24857 static HOST_WIDE_INT
24858 get_stack_clash_protection_probe_interval (void)
24860 return (HOST_WIDE_INT_1U
24861 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
24864 static HOST_WIDE_INT
24865 get_stack_clash_protection_guard_size (void)
24867 return (HOST_WIDE_INT_1U
24868 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE));
24871 /* Allocate ORIG_SIZE bytes on the stack and probe the newly
24872 allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes.
24874 COPY_REG, if non-null, should contain a copy of the original
24875 stack pointer at exit from this function.
24877 This is subtly different than the Ada probing in that it tries hard to
24878 prevent attacks that jump the stack guard. Thus it is never allowed to
24879 allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack
24880 space without a suitable probe. */
24881 static rtx_insn *
24882 rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size,
24883 rtx copy_reg)
24885 rtx orig_sp = copy_reg;
24887 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
24889 /* Round the size down to a multiple of PROBE_INTERVAL. */
24890 HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval);
24892 /* If explicitly requested,
24893 or the rounded size is not the same as the original size
24894 or the the rounded size is greater than a page,
24895 then we will need a copy of the original stack pointer. */
24896 if (rounded_size != orig_size
24897 || rounded_size > probe_interval
24898 || copy_reg)
24900 /* If the caller did not request a copy of the incoming stack
24901 pointer, then we use r0 to hold the copy. */
24902 if (!copy_reg)
24903 orig_sp = gen_rtx_REG (Pmode, 0);
24904 emit_move_insn (orig_sp, stack_pointer_rtx);
24907 /* There's three cases here.
24909 One is a single probe which is the most common and most efficiently
24910 implemented as it does not have to have a copy of the original
24911 stack pointer if there are no residuals.
24913 Second is unrolled allocation/probes which we use if there's just
24914 a few of them. It needs to save the original stack pointer into a
24915 temporary for use as a source register in the allocation/probe.
24917 Last is a loop. This is the most uncommon case and least efficient. */
24918 rtx_insn *retval = NULL;
24919 if (rounded_size == probe_interval)
24921 retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx);
24923 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
24925 else if (rounded_size <= 8 * probe_interval)
24927 /* The ABI requires using the store with update insns to allocate
24928 space and store the backchain into the stack
24930 So we save the current stack pointer into a temporary, then
24931 emit the store-with-update insns to store the saved stack pointer
24932 into the right location in each new page. */
24933 for (int i = 0; i < rounded_size; i += probe_interval)
24935 rtx_insn *insn
24936 = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp);
24938 /* Save the first stack adjustment in RETVAL. */
24939 if (i == 0)
24940 retval = insn;
24943 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
24945 else
24947 /* Compute the ending address. */
24948 rtx end_addr
24949 = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12);
24950 rtx rs = GEN_INT (-rounded_size);
24951 rtx_insn *insn;
24952 if (add_operand (rs, Pmode))
24953 insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs));
24954 else
24956 emit_move_insn (end_addr, GEN_INT (-rounded_size));
24957 insn = emit_insn (gen_add3_insn (end_addr, end_addr,
24958 stack_pointer_rtx));
24959 /* Describe the effect of INSN to the CFI engine. */
24960 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24961 gen_rtx_SET (end_addr,
24962 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
24963 rs)));
24965 RTX_FRAME_RELATED_P (insn) = 1;
24967 /* Emit the loop. */
24968 if (TARGET_64BIT)
24969 retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx,
24970 stack_pointer_rtx, orig_sp,
24971 end_addr));
24972 else
24973 retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx,
24974 stack_pointer_rtx, orig_sp,
24975 end_addr));
24976 RTX_FRAME_RELATED_P (retval) = 1;
24977 /* Describe the effect of INSN to the CFI engine. */
24978 add_reg_note (retval, REG_FRAME_RELATED_EXPR,
24979 gen_rtx_SET (stack_pointer_rtx, end_addr));
24981 /* Emit a blockage to ensure the allocation/probing insns are
24982 not optimized, combined, removed, etc. Other cases handle this
24983 within their call to rs6000_emit_allocate_stack_1. */
24984 emit_insn (gen_blockage ());
24986 dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size);
24989 if (orig_size != rounded_size)
24991 /* Allocate (and implicitly probe) any residual space. */
24992 HOST_WIDE_INT residual = orig_size - rounded_size;
24994 rtx_insn *insn = rs6000_emit_allocate_stack_1 (residual, orig_sp);
24996 /* If the residual was the only allocation, then we can return the
24997 allocating insn. */
24998 if (!retval)
24999 retval = insn;
25002 return retval;
25005 /* Emit the correct code for allocating stack space, as insns.
25006 If COPY_REG, make sure a copy of the old frame is left there.
25007 The generated code may use hard register 0 as a temporary. */
25009 static rtx_insn *
25010 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
25012 rtx_insn *insn;
25013 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25014 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25015 rtx todec = gen_int_mode (-size, Pmode);
25017 if (INTVAL (todec) != -size)
25019 warning (0, "stack frame too large");
25020 emit_insn (gen_trap ());
25021 return 0;
25024 if (crtl->limit_stack)
25026 if (REG_P (stack_limit_rtx)
25027 && REGNO (stack_limit_rtx) > 1
25028 && REGNO (stack_limit_rtx) <= 31)
25030 rtx_insn *insn
25031 = gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size));
25032 gcc_assert (insn);
25033 emit_insn (insn);
25034 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, const0_rtx));
25036 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
25037 && TARGET_32BIT
25038 && DEFAULT_ABI == ABI_V4
25039 && !flag_pic)
25041 rtx toload = gen_rtx_CONST (VOIDmode,
25042 gen_rtx_PLUS (Pmode,
25043 stack_limit_rtx,
25044 GEN_INT (size)));
25046 emit_insn (gen_elf_high (tmp_reg, toload));
25047 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
25048 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25049 const0_rtx));
25051 else
25052 warning (0, "stack limit expression is not supported");
25055 if (flag_stack_clash_protection)
25057 if (size < get_stack_clash_protection_guard_size ())
25058 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
25059 else
25061 rtx_insn *insn = rs6000_emit_probe_stack_range_stack_clash (size,
25062 copy_reg);
25064 /* If we asked for a copy with an offset, then we still need add in
25065 the offset. */
25066 if (copy_reg && copy_off)
25067 emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off)));
25068 return insn;
25072 if (copy_reg)
25074 if (copy_off != 0)
25075 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
25076 else
25077 emit_move_insn (copy_reg, stack_reg);
25080 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25081 it now and set the alias set/attributes. The above gen_*_update
25082 calls will generate a PARALLEL with the MEM set being the first
25083 operation. */
25084 insn = rs6000_emit_allocate_stack_1 (size, stack_reg);
25085 return insn;
25088 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25090 #if PROBE_INTERVAL > 32768
25091 #error Cannot use indexed addressing mode for stack probing
25092 #endif
25094 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25095 inclusive. These are offsets from the current stack pointer. */
25097 static void
25098 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
25100 /* See if we have a constant small number of probes to generate. If so,
25101 that's the easy case. */
25102 if (first + size <= 32768)
25104 HOST_WIDE_INT i;
25106 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25107 it exceeds SIZE. If only one probe is needed, this will not
25108 generate any code. Then probe at FIRST + SIZE. */
25109 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
25110 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25111 -(first + i)));
25113 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25114 -(first + size)));
25117 /* Otherwise, do the same as above, but in a loop. Note that we must be
25118 extra careful with variables wrapping around because we might be at
25119 the very top (or the very bottom) of the address space and we have
25120 to be able to handle this case properly; in particular, we use an
25121 equality test for the loop condition. */
25122 else
25124 HOST_WIDE_INT rounded_size;
25125 rtx r12 = gen_rtx_REG (Pmode, 12);
25126 rtx r0 = gen_rtx_REG (Pmode, 0);
25128 /* Sanity check for the addressing mode we're going to use. */
25129 gcc_assert (first <= 32768);
25131 /* Step 1: round SIZE to the previous multiple of the interval. */
25133 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
25136 /* Step 2: compute initial and final value of the loop counter. */
25138 /* TEST_ADDR = SP + FIRST. */
25139 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
25140 -first)));
25142 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25143 if (rounded_size > 32768)
25145 emit_move_insn (r0, GEN_INT (-rounded_size));
25146 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
25148 else
25149 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
25150 -rounded_size)));
25153 /* Step 3: the loop
25157 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25158 probe at TEST_ADDR
25160 while (TEST_ADDR != LAST_ADDR)
25162 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25163 until it is equal to ROUNDED_SIZE. */
25165 if (TARGET_64BIT)
25166 emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0));
25167 else
25168 emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0));
25171 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25172 that SIZE is equal to ROUNDED_SIZE. */
25174 if (size != rounded_size)
25175 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
25179 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25180 addresses, not offsets. */
25182 static const char *
25183 output_probe_stack_range_1 (rtx reg1, rtx reg2)
25185 static int labelno = 0;
25186 char loop_lab[32];
25187 rtx xops[2];
25189 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25191 /* Loop. */
25192 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25194 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25195 xops[0] = reg1;
25196 xops[1] = GEN_INT (-PROBE_INTERVAL);
25197 output_asm_insn ("addi %0,%0,%1", xops);
25199 /* Probe at TEST_ADDR. */
25200 xops[1] = gen_rtx_REG (Pmode, 0);
25201 output_asm_insn ("stw %1,0(%0)", xops);
25203 /* Test if TEST_ADDR == LAST_ADDR. */
25204 xops[1] = reg2;
25205 if (TARGET_64BIT)
25206 output_asm_insn ("cmpd 0,%0,%1", xops);
25207 else
25208 output_asm_insn ("cmpw 0,%0,%1", xops);
25210 /* Branch. */
25211 fputs ("\tbne 0,", asm_out_file);
25212 assemble_name_raw (asm_out_file, loop_lab);
25213 fputc ('\n', asm_out_file);
25215 return "";
25218 /* This function is called when rs6000_frame_related is processing
25219 SETs within a PARALLEL, and returns whether the REGNO save ought to
25220 be marked RTX_FRAME_RELATED_P. The PARALLELs involved are those
25221 for out-of-line register save functions, store multiple, and the
25222 Darwin world_save. They may contain registers that don't really
25223 need saving. */
25225 static bool
25226 interesting_frame_related_regno (unsigned int regno)
25228 /* Saves apparently of r0 are actually saving LR. It doesn't make
25229 sense to substitute the regno here to test save_reg_p (LR_REGNO).
25230 We *know* LR needs saving, and dwarf2cfi.c is able to deduce that
25231 (set (mem) (r0)) is saving LR from a prior (set (r0) (lr)) marked
25232 as frame related. */
25233 if (regno == 0)
25234 return true;
25235 /* If we see CR2 then we are here on a Darwin world save. Saves of
25236 CR2 signify the whole CR is being saved. This is a long-standing
25237 ABI wart fixed by ELFv2. As for r0/lr there is no need to check
25238 that CR needs to be saved. */
25239 if (regno == CR2_REGNO)
25240 return true;
25241 /* Omit frame info for any user-defined global regs. If frame info
25242 is supplied for them, frame unwinding will restore a user reg.
25243 Also omit frame info for any reg we don't need to save, as that
25244 bloats frame info and can cause problems with shrink wrapping.
25245 Since global regs won't be seen as needing to be saved, both of
25246 these conditions are covered by save_reg_p. */
25247 return save_reg_p (regno);
25250 /* Probe a range of stack addresses from REG1 to REG3 inclusive. These are
25251 addresses, not offsets.
25253 REG2 contains the backchain that must be stored into *sp at each allocation.
25255 This is subtly different than the Ada probing above in that it tries hard
25256 to prevent attacks that jump the stack guard. Thus, it is never allowed
25257 to allocate more than PROBE_INTERVAL bytes of stack space without a
25258 suitable probe. */
25260 static const char *
25261 output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3)
25263 static int labelno = 0;
25264 char loop_lab[32];
25265 rtx xops[3];
25267 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
25269 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25271 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25273 /* This allocates and probes. */
25274 xops[0] = reg1;
25275 xops[1] = reg2;
25276 xops[2] = GEN_INT (-probe_interval);
25277 if (TARGET_64BIT)
25278 output_asm_insn ("stdu %1,%2(%0)", xops);
25279 else
25280 output_asm_insn ("stwu %1,%2(%0)", xops);
25282 /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */
25283 xops[0] = reg1;
25284 xops[1] = reg3;
25285 if (TARGET_64BIT)
25286 output_asm_insn ("cmpd 0,%0,%1", xops);
25287 else
25288 output_asm_insn ("cmpw 0,%0,%1", xops);
25290 fputs ("\tbne 0,", asm_out_file);
25291 assemble_name_raw (asm_out_file, loop_lab);
25292 fputc ('\n', asm_out_file);
25294 return "";
25297 /* Wrapper around the output_probe_stack_range routines. */
25298 const char *
25299 output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
25301 if (flag_stack_clash_protection)
25302 return output_probe_stack_range_stack_clash (reg1, reg2, reg3);
25303 else
25304 return output_probe_stack_range_1 (reg1, reg3);
25307 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
25308 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
25309 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
25310 deduce these equivalences by itself so it wasn't necessary to hold
25311 its hand so much. Don't be tempted to always supply d2_f_d_e with
25312 the actual cfa register, ie. r31 when we are using a hard frame
25313 pointer. That fails when saving regs off r1, and sched moves the
25314 r31 setup past the reg saves. */
25316 static rtx_insn *
25317 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
25318 rtx reg2, rtx repl2)
25320 rtx repl;
25322 if (REGNO (reg) == STACK_POINTER_REGNUM)
25324 gcc_checking_assert (val == 0);
25325 repl = NULL_RTX;
25327 else
25328 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
25329 GEN_INT (val));
25331 rtx pat = PATTERN (insn);
25332 if (!repl && !reg2)
25334 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
25335 if (GET_CODE (pat) == PARALLEL)
25336 for (int i = 0; i < XVECLEN (pat, 0); i++)
25337 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25339 rtx set = XVECEXP (pat, 0, i);
25341 if (!REG_P (SET_SRC (set))
25342 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
25343 RTX_FRAME_RELATED_P (set) = 1;
25345 RTX_FRAME_RELATED_P (insn) = 1;
25346 return insn;
25349 /* We expect that 'pat' is either a SET or a PARALLEL containing
25350 SETs (and possibly other stuff). In a PARALLEL, all the SETs
25351 are important so they all have to be marked RTX_FRAME_RELATED_P.
25352 Call simplify_replace_rtx on the SETs rather than the whole insn
25353 so as to leave the other stuff alone (for example USE of r12). */
25355 set_used_flags (pat);
25356 if (GET_CODE (pat) == SET)
25358 if (repl)
25359 pat = simplify_replace_rtx (pat, reg, repl);
25360 if (reg2)
25361 pat = simplify_replace_rtx (pat, reg2, repl2);
25363 else if (GET_CODE (pat) == PARALLEL)
25365 pat = shallow_copy_rtx (pat);
25366 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
25368 for (int i = 0; i < XVECLEN (pat, 0); i++)
25369 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25371 rtx set = XVECEXP (pat, 0, i);
25373 if (repl)
25374 set = simplify_replace_rtx (set, reg, repl);
25375 if (reg2)
25376 set = simplify_replace_rtx (set, reg2, repl2);
25377 XVECEXP (pat, 0, i) = set;
25379 if (!REG_P (SET_SRC (set))
25380 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
25381 RTX_FRAME_RELATED_P (set) = 1;
25384 else
25385 gcc_unreachable ();
25387 RTX_FRAME_RELATED_P (insn) = 1;
25388 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
25390 return insn;
25393 /* Returns an insn that has a vrsave set operation with the
25394 appropriate CLOBBERs. */
25396 static rtx
25397 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
25399 int nclobs, i;
25400 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
25401 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
25403 clobs[0]
25404 = gen_rtx_SET (vrsave,
25405 gen_rtx_UNSPEC_VOLATILE (SImode,
25406 gen_rtvec (2, reg, vrsave),
25407 UNSPECV_SET_VRSAVE));
25409 nclobs = 1;
25411 /* We need to clobber the registers in the mask so the scheduler
25412 does not move sets to VRSAVE before sets of AltiVec registers.
25414 However, if the function receives nonlocal gotos, reload will set
25415 all call saved registers live. We will end up with:
25417 (set (reg 999) (mem))
25418 (parallel [ (set (reg vrsave) (unspec blah))
25419 (clobber (reg 999))])
25421 The clobber will cause the store into reg 999 to be dead, and
25422 flow will attempt to delete an epilogue insn. In this case, we
25423 need an unspec use/set of the register. */
25425 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
25426 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25428 if (!epiloguep || call_used_regs [i])
25429 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
25430 gen_rtx_REG (V4SImode, i));
25431 else
25433 rtx reg = gen_rtx_REG (V4SImode, i);
25435 clobs[nclobs++]
25436 = gen_rtx_SET (reg,
25437 gen_rtx_UNSPEC (V4SImode,
25438 gen_rtvec (1, reg), 27));
25442 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
25444 for (i = 0; i < nclobs; ++i)
25445 XVECEXP (insn, 0, i) = clobs[i];
25447 return insn;
25450 static rtx
25451 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
25453 rtx addr, mem;
25455 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
25456 mem = gen_frame_mem (GET_MODE (reg), addr);
25457 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
25460 static rtx
25461 gen_frame_load (rtx reg, rtx frame_reg, int offset)
25463 return gen_frame_set (reg, frame_reg, offset, false);
25466 static rtx
25467 gen_frame_store (rtx reg, rtx frame_reg, int offset)
25469 return gen_frame_set (reg, frame_reg, offset, true);
25472 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
25473 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
25475 static rtx_insn *
25476 emit_frame_save (rtx frame_reg, machine_mode mode,
25477 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
25479 rtx reg;
25481 /* Some cases that need register indexed addressing. */
25482 gcc_checking_assert (!(TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
25483 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)));
25485 reg = gen_rtx_REG (mode, regno);
25486 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
25487 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
25488 NULL_RTX, NULL_RTX);
25491 /* Emit an offset memory reference suitable for a frame store, while
25492 converting to a valid addressing mode. */
25494 static rtx
25495 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
25497 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, GEN_INT (offset)));
25500 #ifndef TARGET_FIX_AND_CONTINUE
25501 #define TARGET_FIX_AND_CONTINUE 0
25502 #endif
25504 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
25505 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
25506 #define LAST_SAVRES_REGISTER 31
25507 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
25509 enum {
25510 SAVRES_LR = 0x1,
25511 SAVRES_SAVE = 0x2,
25512 SAVRES_REG = 0x0c,
25513 SAVRES_GPR = 0,
25514 SAVRES_FPR = 4,
25515 SAVRES_VR = 8
25518 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
25520 /* Temporary holding space for an out-of-line register save/restore
25521 routine name. */
25522 static char savres_routine_name[30];
25524 /* Return the name for an out-of-line register save/restore routine.
25525 We are saving/restoring GPRs if GPR is true. */
25527 static char *
25528 rs6000_savres_routine_name (int regno, int sel)
25530 const char *prefix = "";
25531 const char *suffix = "";
25533 /* Different targets are supposed to define
25534 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
25535 routine name could be defined with:
25537 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
25539 This is a nice idea in practice, but in reality, things are
25540 complicated in several ways:
25542 - ELF targets have save/restore routines for GPRs.
25544 - PPC64 ELF targets have routines for save/restore of GPRs that
25545 differ in what they do with the link register, so having a set
25546 prefix doesn't work. (We only use one of the save routines at
25547 the moment, though.)
25549 - PPC32 elf targets have "exit" versions of the restore routines
25550 that restore the link register and can save some extra space.
25551 These require an extra suffix. (There are also "tail" versions
25552 of the restore routines and "GOT" versions of the save routines,
25553 but we don't generate those at present. Same problems apply,
25554 though.)
25556 We deal with all this by synthesizing our own prefix/suffix and
25557 using that for the simple sprintf call shown above. */
25558 if (DEFAULT_ABI == ABI_V4)
25560 if (TARGET_64BIT)
25561 goto aix_names;
25563 if ((sel & SAVRES_REG) == SAVRES_GPR)
25564 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
25565 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25566 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
25567 else if ((sel & SAVRES_REG) == SAVRES_VR)
25568 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25569 else
25570 abort ();
25572 if ((sel & SAVRES_LR))
25573 suffix = "_x";
25575 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25577 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
25578 /* No out-of-line save/restore routines for GPRs on AIX. */
25579 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
25580 #endif
25582 aix_names:
25583 if ((sel & SAVRES_REG) == SAVRES_GPR)
25584 prefix = ((sel & SAVRES_SAVE)
25585 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
25586 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
25587 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25589 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
25590 if ((sel & SAVRES_LR))
25591 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
25592 else
25593 #endif
25595 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
25596 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
25599 else if ((sel & SAVRES_REG) == SAVRES_VR)
25600 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
25601 else
25602 abort ();
25605 if (DEFAULT_ABI == ABI_DARWIN)
25607 /* The Darwin approach is (slightly) different, in order to be
25608 compatible with code generated by the system toolchain. There is a
25609 single symbol for the start of save sequence, and the code here
25610 embeds an offset into that code on the basis of the first register
25611 to be saved. */
25612 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
25613 if ((sel & SAVRES_REG) == SAVRES_GPR)
25614 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
25615 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
25616 (regno - 13) * 4, prefix, regno);
25617 else if ((sel & SAVRES_REG) == SAVRES_FPR)
25618 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
25619 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
25620 else if ((sel & SAVRES_REG) == SAVRES_VR)
25621 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
25622 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
25623 else
25624 abort ();
25626 else
25627 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
25629 return savres_routine_name;
25632 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
25633 We are saving/restoring GPRs if GPR is true. */
25635 static rtx
25636 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
25638 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
25639 ? info->first_gp_reg_save
25640 : (sel & SAVRES_REG) == SAVRES_FPR
25641 ? info->first_fp_reg_save - 32
25642 : (sel & SAVRES_REG) == SAVRES_VR
25643 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
25644 : -1);
25645 rtx sym;
25646 int select = sel;
25648 /* Don't generate bogus routine names. */
25649 gcc_assert (FIRST_SAVRES_REGISTER <= regno
25650 && regno <= LAST_SAVRES_REGISTER
25651 && select >= 0 && select <= 12);
25653 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
25655 if (sym == NULL)
25657 char *name;
25659 name = rs6000_savres_routine_name (regno, sel);
25661 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
25662 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
25663 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
25666 return sym;
25669 /* Emit a sequence of insns, including a stack tie if needed, for
25670 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
25671 reset the stack pointer, but move the base of the frame into
25672 reg UPDT_REGNO for use by out-of-line register restore routines. */
25674 static rtx
25675 rs6000_emit_stack_reset (rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
25676 unsigned updt_regno)
25678 /* If there is nothing to do, don't do anything. */
25679 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
25680 return NULL_RTX;
25682 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
25684 /* This blockage is needed so that sched doesn't decide to move
25685 the sp change before the register restores. */
25686 if (DEFAULT_ABI == ABI_V4)
25687 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
25688 GEN_INT (frame_off)));
25690 /* If we are restoring registers out-of-line, we will be using the
25691 "exit" variants of the restore routines, which will reset the
25692 stack for us. But we do need to point updt_reg into the
25693 right place for those routines. */
25694 if (frame_off != 0)
25695 return emit_insn (gen_add3_insn (updt_reg_rtx,
25696 frame_reg_rtx, GEN_INT (frame_off)));
25697 else
25698 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
25700 return NULL_RTX;
25703 /* Return the register number used as a pointer by out-of-line
25704 save/restore functions. */
25706 static inline unsigned
25707 ptr_regno_for_savres (int sel)
25709 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25710 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
25711 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
25714 /* Construct a parallel rtx describing the effect of a call to an
25715 out-of-line register save/restore routine, and emit the insn
25716 or jump_insn as appropriate. */
25718 static rtx_insn *
25719 rs6000_emit_savres_rtx (rs6000_stack_t *info,
25720 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
25721 machine_mode reg_mode, int sel)
25723 int i;
25724 int offset, start_reg, end_reg, n_regs, use_reg;
25725 int reg_size = GET_MODE_SIZE (reg_mode);
25726 rtx sym;
25727 rtvec p;
25728 rtx par;
25729 rtx_insn *insn;
25731 offset = 0;
25732 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
25733 ? info->first_gp_reg_save
25734 : (sel & SAVRES_REG) == SAVRES_FPR
25735 ? info->first_fp_reg_save
25736 : (sel & SAVRES_REG) == SAVRES_VR
25737 ? info->first_altivec_reg_save
25738 : -1);
25739 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
25740 ? 32
25741 : (sel & SAVRES_REG) == SAVRES_FPR
25742 ? 64
25743 : (sel & SAVRES_REG) == SAVRES_VR
25744 ? LAST_ALTIVEC_REGNO + 1
25745 : -1);
25746 n_regs = end_reg - start_reg;
25747 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
25748 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
25749 + n_regs);
25751 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25752 RTVEC_ELT (p, offset++) = ret_rtx;
25754 RTVEC_ELT (p, offset++)
25755 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
25757 sym = rs6000_savres_routine_sym (info, sel);
25758 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
25760 use_reg = ptr_regno_for_savres (sel);
25761 if ((sel & SAVRES_REG) == SAVRES_VR)
25763 /* Vector regs are saved/restored using [reg+reg] addressing. */
25764 RTVEC_ELT (p, offset++)
25765 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
25766 RTVEC_ELT (p, offset++)
25767 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
25769 else
25770 RTVEC_ELT (p, offset++)
25771 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
25773 for (i = 0; i < end_reg - start_reg; i++)
25774 RTVEC_ELT (p, i + offset)
25775 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
25776 frame_reg_rtx, save_area_offset + reg_size * i,
25777 (sel & SAVRES_SAVE) != 0);
25779 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25780 RTVEC_ELT (p, i + offset)
25781 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
25783 par = gen_rtx_PARALLEL (VOIDmode, p);
25785 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
25787 insn = emit_jump_insn (par);
25788 JUMP_LABEL (insn) = ret_rtx;
25790 else
25791 insn = emit_insn (par);
25792 return insn;
25795 /* Emit prologue code to store CR fields that need to be saved into REG. This
25796 function should only be called when moving the non-volatile CRs to REG, it
25797 is not a general purpose routine to move the entire set of CRs to REG.
25798 Specifically, gen_prologue_movesi_from_cr() does not contain uses of the
25799 volatile CRs. */
25801 static void
25802 rs6000_emit_prologue_move_from_cr (rtx reg)
25804 /* Only the ELFv2 ABI allows storing only selected fields. */
25805 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
25807 int i, cr_reg[8], count = 0;
25809 /* Collect CR fields that must be saved. */
25810 for (i = 0; i < 8; i++)
25811 if (save_reg_p (CR0_REGNO + i))
25812 cr_reg[count++] = i;
25814 /* If it's just a single one, use mfcrf. */
25815 if (count == 1)
25817 rtvec p = rtvec_alloc (1);
25818 rtvec r = rtvec_alloc (2);
25819 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
25820 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
25821 RTVEC_ELT (p, 0)
25822 = gen_rtx_SET (reg,
25823 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
25825 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25826 return;
25829 /* ??? It might be better to handle count == 2 / 3 cases here
25830 as well, using logical operations to combine the values. */
25833 emit_insn (gen_prologue_movesi_from_cr (reg));
25836 /* Return whether the split-stack arg pointer (r12) is used. */
25838 static bool
25839 split_stack_arg_pointer_used_p (void)
25841 /* If the pseudo holding the arg pointer is no longer a pseudo,
25842 then the arg pointer is used. */
25843 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
25844 && (!REG_P (cfun->machine->split_stack_arg_pointer)
25845 || (REGNO (cfun->machine->split_stack_arg_pointer)
25846 < FIRST_PSEUDO_REGISTER)))
25847 return true;
25849 /* Unfortunately we also need to do some code scanning, since
25850 r12 may have been substituted for the pseudo. */
25851 rtx_insn *insn;
25852 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
25853 FOR_BB_INSNS (bb, insn)
25854 if (NONDEBUG_INSN_P (insn))
25856 /* A call destroys r12. */
25857 if (CALL_P (insn))
25858 return false;
25860 df_ref use;
25861 FOR_EACH_INSN_USE (use, insn)
25863 rtx x = DF_REF_REG (use);
25864 if (REG_P (x) && REGNO (x) == 12)
25865 return true;
25867 df_ref def;
25868 FOR_EACH_INSN_DEF (def, insn)
25870 rtx x = DF_REF_REG (def);
25871 if (REG_P (x) && REGNO (x) == 12)
25872 return false;
25875 return bitmap_bit_p (DF_LR_OUT (bb), 12);
25878 /* Return whether we need to emit an ELFv2 global entry point prologue. */
25880 static bool
25881 rs6000_global_entry_point_needed_p (void)
25883 /* Only needed for the ELFv2 ABI. */
25884 if (DEFAULT_ABI != ABI_ELFv2)
25885 return false;
25887 /* With -msingle-pic-base, we assume the whole program shares the same
25888 TOC, so no global entry point prologues are needed anywhere. */
25889 if (TARGET_SINGLE_PIC_BASE)
25890 return false;
25892 /* Ensure we have a global entry point for thunks. ??? We could
25893 avoid that if the target routine doesn't need a global entry point,
25894 but we do not know whether this is the case at this point. */
25895 if (cfun->is_thunk)
25896 return true;
25898 /* For regular functions, rs6000_emit_prologue sets this flag if the
25899 routine ever uses the TOC pointer. */
25900 return cfun->machine->r2_setup_needed;
25903 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
25904 static sbitmap
25905 rs6000_get_separate_components (void)
25907 rs6000_stack_t *info = rs6000_stack_info ();
25909 if (WORLD_SAVE_P (info))
25910 return NULL;
25912 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
25913 && !(info->savres_strategy & REST_MULTIPLE));
25915 /* Component 0 is the save/restore of LR (done via GPR0).
25916 Component 2 is the save of the TOC (GPR2).
25917 Components 13..31 are the save/restore of GPR13..GPR31.
25918 Components 46..63 are the save/restore of FPR14..FPR31. */
25920 cfun->machine->n_components = 64;
25922 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
25923 bitmap_clear (components);
25925 int reg_size = TARGET_32BIT ? 4 : 8;
25926 int fp_reg_size = 8;
25928 /* The GPRs we need saved to the frame. */
25929 if ((info->savres_strategy & SAVE_INLINE_GPRS)
25930 && (info->savres_strategy & REST_INLINE_GPRS))
25932 int offset = info->gp_save_offset;
25933 if (info->push_p)
25934 offset += info->total_size;
25936 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
25938 if (IN_RANGE (offset, -0x8000, 0x7fff)
25939 && save_reg_p (regno))
25940 bitmap_set_bit (components, regno);
25942 offset += reg_size;
25946 /* Don't mess with the hard frame pointer. */
25947 if (frame_pointer_needed)
25948 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
25950 /* Don't mess with the fixed TOC register. */
25951 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
25952 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
25953 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
25954 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
25956 /* The FPRs we need saved to the frame. */
25957 if ((info->savres_strategy & SAVE_INLINE_FPRS)
25958 && (info->savres_strategy & REST_INLINE_FPRS))
25960 int offset = info->fp_save_offset;
25961 if (info->push_p)
25962 offset += info->total_size;
25964 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
25966 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
25967 bitmap_set_bit (components, regno);
25969 offset += fp_reg_size;
25973 /* Optimize LR save and restore if we can. This is component 0. Any
25974 out-of-line register save/restore routines need LR. */
25975 if (info->lr_save_p
25976 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
25977 && (info->savres_strategy & SAVE_INLINE_GPRS)
25978 && (info->savres_strategy & REST_INLINE_GPRS)
25979 && (info->savres_strategy & SAVE_INLINE_FPRS)
25980 && (info->savres_strategy & REST_INLINE_FPRS)
25981 && (info->savres_strategy & SAVE_INLINE_VRS)
25982 && (info->savres_strategy & REST_INLINE_VRS))
25984 int offset = info->lr_save_offset;
25985 if (info->push_p)
25986 offset += info->total_size;
25987 if (IN_RANGE (offset, -0x8000, 0x7fff))
25988 bitmap_set_bit (components, 0);
25991 /* Optimize saving the TOC. This is component 2. */
25992 if (cfun->machine->save_toc_in_prologue)
25993 bitmap_set_bit (components, 2);
25995 return components;
25998 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
25999 static sbitmap
26000 rs6000_components_for_bb (basic_block bb)
26002 rs6000_stack_t *info = rs6000_stack_info ();
26004 bitmap in = DF_LIVE_IN (bb);
26005 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
26006 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
26008 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26009 bitmap_clear (components);
26011 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
26013 /* GPRs. */
26014 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26015 if (bitmap_bit_p (in, regno)
26016 || bitmap_bit_p (gen, regno)
26017 || bitmap_bit_p (kill, regno))
26018 bitmap_set_bit (components, regno);
26020 /* FPRs. */
26021 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26022 if (bitmap_bit_p (in, regno)
26023 || bitmap_bit_p (gen, regno)
26024 || bitmap_bit_p (kill, regno))
26025 bitmap_set_bit (components, regno);
26027 /* The link register. */
26028 if (bitmap_bit_p (in, LR_REGNO)
26029 || bitmap_bit_p (gen, LR_REGNO)
26030 || bitmap_bit_p (kill, LR_REGNO))
26031 bitmap_set_bit (components, 0);
26033 /* The TOC save. */
26034 if (bitmap_bit_p (in, TOC_REGNUM)
26035 || bitmap_bit_p (gen, TOC_REGNUM)
26036 || bitmap_bit_p (kill, TOC_REGNUM))
26037 bitmap_set_bit (components, 2);
26039 return components;
26042 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
26043 static void
26044 rs6000_disqualify_components (sbitmap components, edge e,
26045 sbitmap edge_components, bool /*is_prologue*/)
26047 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
26048 live where we want to place that code. */
26049 if (bitmap_bit_p (edge_components, 0)
26050 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
26052 if (dump_file)
26053 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
26054 "on entry to bb %d\n", e->dest->index);
26055 bitmap_clear_bit (components, 0);
26059 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
26060 static void
26061 rs6000_emit_prologue_components (sbitmap components)
26063 rs6000_stack_t *info = rs6000_stack_info ();
26064 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26065 ? HARD_FRAME_POINTER_REGNUM
26066 : STACK_POINTER_REGNUM);
26068 machine_mode reg_mode = Pmode;
26069 int reg_size = TARGET_32BIT ? 4 : 8;
26070 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26071 int fp_reg_size = 8;
26073 /* Prologue for LR. */
26074 if (bitmap_bit_p (components, 0))
26076 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
26077 rtx reg = gen_rtx_REG (reg_mode, 0);
26078 rtx_insn *insn = emit_move_insn (reg, lr);
26079 RTX_FRAME_RELATED_P (insn) = 1;
26080 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (reg, lr));
26082 int offset = info->lr_save_offset;
26083 if (info->push_p)
26084 offset += info->total_size;
26086 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26087 RTX_FRAME_RELATED_P (insn) = 1;
26088 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
26089 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
26092 /* Prologue for TOC. */
26093 if (bitmap_bit_p (components, 2))
26095 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
26096 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26097 emit_insn (gen_frame_store (reg, sp_reg, RS6000_TOC_SAVE_SLOT));
26100 /* Prologue for the GPRs. */
26101 int offset = info->gp_save_offset;
26102 if (info->push_p)
26103 offset += info->total_size;
26105 for (int i = info->first_gp_reg_save; i < 32; i++)
26107 if (bitmap_bit_p (components, i))
26109 rtx reg = gen_rtx_REG (reg_mode, i);
26110 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26111 RTX_FRAME_RELATED_P (insn) = 1;
26112 rtx set = copy_rtx (single_set (insn));
26113 add_reg_note (insn, REG_CFA_OFFSET, set);
26116 offset += reg_size;
26119 /* Prologue for the FPRs. */
26120 offset = info->fp_save_offset;
26121 if (info->push_p)
26122 offset += info->total_size;
26124 for (int i = info->first_fp_reg_save; i < 64; i++)
26126 if (bitmap_bit_p (components, i))
26128 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26129 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26130 RTX_FRAME_RELATED_P (insn) = 1;
26131 rtx set = copy_rtx (single_set (insn));
26132 add_reg_note (insn, REG_CFA_OFFSET, set);
26135 offset += fp_reg_size;
26139 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
26140 static void
26141 rs6000_emit_epilogue_components (sbitmap components)
26143 rs6000_stack_t *info = rs6000_stack_info ();
26144 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26145 ? HARD_FRAME_POINTER_REGNUM
26146 : STACK_POINTER_REGNUM);
26148 machine_mode reg_mode = Pmode;
26149 int reg_size = TARGET_32BIT ? 4 : 8;
26151 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26152 int fp_reg_size = 8;
26154 /* Epilogue for the FPRs. */
26155 int offset = info->fp_save_offset;
26156 if (info->push_p)
26157 offset += info->total_size;
26159 for (int i = info->first_fp_reg_save; i < 64; i++)
26161 if (bitmap_bit_p (components, i))
26163 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26164 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26165 RTX_FRAME_RELATED_P (insn) = 1;
26166 add_reg_note (insn, REG_CFA_RESTORE, reg);
26169 offset += fp_reg_size;
26172 /* Epilogue for the GPRs. */
26173 offset = info->gp_save_offset;
26174 if (info->push_p)
26175 offset += info->total_size;
26177 for (int i = info->first_gp_reg_save; i < 32; i++)
26179 if (bitmap_bit_p (components, i))
26181 rtx reg = gen_rtx_REG (reg_mode, i);
26182 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26183 RTX_FRAME_RELATED_P (insn) = 1;
26184 add_reg_note (insn, REG_CFA_RESTORE, reg);
26187 offset += reg_size;
26190 /* Epilogue for LR. */
26191 if (bitmap_bit_p (components, 0))
26193 int offset = info->lr_save_offset;
26194 if (info->push_p)
26195 offset += info->total_size;
26197 rtx reg = gen_rtx_REG (reg_mode, 0);
26198 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26200 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26201 insn = emit_move_insn (lr, reg);
26202 RTX_FRAME_RELATED_P (insn) = 1;
26203 add_reg_note (insn, REG_CFA_RESTORE, lr);
26207 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
26208 static void
26209 rs6000_set_handled_components (sbitmap components)
26211 rs6000_stack_t *info = rs6000_stack_info ();
26213 for (int i = info->first_gp_reg_save; i < 32; i++)
26214 if (bitmap_bit_p (components, i))
26215 cfun->machine->gpr_is_wrapped_separately[i] = true;
26217 for (int i = info->first_fp_reg_save; i < 64; i++)
26218 if (bitmap_bit_p (components, i))
26219 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
26221 if (bitmap_bit_p (components, 0))
26222 cfun->machine->lr_is_wrapped_separately = true;
26224 if (bitmap_bit_p (components, 2))
26225 cfun->machine->toc_is_wrapped_separately = true;
26228 /* VRSAVE is a bit vector representing which AltiVec registers
26229 are used. The OS uses this to determine which vector
26230 registers to save on a context switch. We need to save
26231 VRSAVE on the stack frame, add whatever AltiVec registers we
26232 used in this function, and do the corresponding magic in the
26233 epilogue. */
26234 static void
26235 emit_vrsave_prologue (rs6000_stack_t *info, int save_regno,
26236 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26238 /* Get VRSAVE into a GPR. */
26239 rtx reg = gen_rtx_REG (SImode, save_regno);
26240 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26241 if (TARGET_MACHO)
26242 emit_insn (gen_get_vrsave_internal (reg));
26243 else
26244 emit_insn (gen_rtx_SET (reg, vrsave));
26246 /* Save VRSAVE. */
26247 int offset = info->vrsave_save_offset + frame_off;
26248 emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
26250 /* Include the registers in the mask. */
26251 emit_insn (gen_iorsi3 (reg, reg, GEN_INT (info->vrsave_mask)));
26253 emit_insn (generate_set_vrsave (reg, info, 0));
26256 /* Set up the arg pointer (r12) for -fsplit-stack code. If __morestack was
26257 called, it left the arg pointer to the old stack in r29. Otherwise, the
26258 arg pointer is the top of the current frame. */
26259 static void
26260 emit_split_stack_prologue (rs6000_stack_t *info, rtx_insn *sp_adjust,
26261 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26263 cfun->machine->split_stack_argp_used = true;
26265 if (sp_adjust)
26267 rtx r12 = gen_rtx_REG (Pmode, 12);
26268 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26269 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
26270 emit_insn_before (set_r12, sp_adjust);
26272 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
26274 rtx r12 = gen_rtx_REG (Pmode, 12);
26275 if (frame_off == 0)
26276 emit_move_insn (r12, frame_reg_rtx);
26277 else
26278 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
26281 if (info->push_p)
26283 rtx r12 = gen_rtx_REG (Pmode, 12);
26284 rtx r29 = gen_rtx_REG (Pmode, 29);
26285 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26286 rtx not_more = gen_label_rtx ();
26287 rtx jump;
26289 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26290 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
26291 gen_rtx_LABEL_REF (VOIDmode, not_more),
26292 pc_rtx);
26293 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26294 JUMP_LABEL (jump) = not_more;
26295 LABEL_NUSES (not_more) += 1;
26296 emit_move_insn (r12, r29);
26297 emit_label (not_more);
26301 /* Emit function prologue as insns. */
26303 void
26304 rs6000_emit_prologue (void)
26306 rs6000_stack_t *info = rs6000_stack_info ();
26307 machine_mode reg_mode = Pmode;
26308 int reg_size = TARGET_32BIT ? 4 : 8;
26309 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
26310 int fp_reg_size = 8;
26311 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26312 rtx frame_reg_rtx = sp_reg_rtx;
26313 unsigned int cr_save_regno;
26314 rtx cr_save_rtx = NULL_RTX;
26315 rtx_insn *insn;
26316 int strategy;
26317 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
26318 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
26319 && call_used_regs[STATIC_CHAIN_REGNUM]);
26320 int using_split_stack = (flag_split_stack
26321 && (lookup_attribute ("no_split_stack",
26322 DECL_ATTRIBUTES (cfun->decl))
26323 == NULL));
26325 /* Offset to top of frame for frame_reg and sp respectively. */
26326 HOST_WIDE_INT frame_off = 0;
26327 HOST_WIDE_INT sp_off = 0;
26328 /* sp_adjust is the stack adjusting instruction, tracked so that the
26329 insn setting up the split-stack arg pointer can be emitted just
26330 prior to it, when r12 is not used here for other purposes. */
26331 rtx_insn *sp_adjust = 0;
26333 #if CHECKING_P
26334 /* Track and check usage of r0, r11, r12. */
26335 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
26336 #define START_USE(R) do \
26338 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26339 reg_inuse |= 1 << (R); \
26340 } while (0)
26341 #define END_USE(R) do \
26343 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
26344 reg_inuse &= ~(1 << (R)); \
26345 } while (0)
26346 #define NOT_INUSE(R) do \
26348 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
26349 } while (0)
26350 #else
26351 #define START_USE(R) do {} while (0)
26352 #define END_USE(R) do {} while (0)
26353 #define NOT_INUSE(R) do {} while (0)
26354 #endif
26356 if (DEFAULT_ABI == ABI_ELFv2
26357 && !TARGET_SINGLE_PIC_BASE)
26359 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
26361 /* With -mminimal-toc we may generate an extra use of r2 below. */
26362 if (TARGET_TOC && TARGET_MINIMAL_TOC
26363 && !constant_pool_empty_p ())
26364 cfun->machine->r2_setup_needed = true;
26368 if (flag_stack_usage_info)
26369 current_function_static_stack_size = info->total_size;
26371 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
26373 HOST_WIDE_INT size = info->total_size;
26375 if (crtl->is_leaf && !cfun->calls_alloca)
26377 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
26378 rs6000_emit_probe_stack_range (get_stack_check_protect (),
26379 size - get_stack_check_protect ());
26381 else if (size > 0)
26382 rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
26385 if (TARGET_FIX_AND_CONTINUE)
26387 /* gdb on darwin arranges to forward a function from the old
26388 address by modifying the first 5 instructions of the function
26389 to branch to the overriding function. This is necessary to
26390 permit function pointers that point to the old function to
26391 actually forward to the new function. */
26392 emit_insn (gen_nop ());
26393 emit_insn (gen_nop ());
26394 emit_insn (gen_nop ());
26395 emit_insn (gen_nop ());
26396 emit_insn (gen_nop ());
26399 /* Handle world saves specially here. */
26400 if (WORLD_SAVE_P (info))
26402 int i, j, sz;
26403 rtx treg;
26404 rtvec p;
26405 rtx reg0;
26407 /* save_world expects lr in r0. */
26408 reg0 = gen_rtx_REG (Pmode, 0);
26409 if (info->lr_save_p)
26411 insn = emit_move_insn (reg0,
26412 gen_rtx_REG (Pmode, LR_REGNO));
26413 RTX_FRAME_RELATED_P (insn) = 1;
26416 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
26417 assumptions about the offsets of various bits of the stack
26418 frame. */
26419 gcc_assert (info->gp_save_offset == -220
26420 && info->fp_save_offset == -144
26421 && info->lr_save_offset == 8
26422 && info->cr_save_offset == 4
26423 && info->push_p
26424 && info->lr_save_p
26425 && (!crtl->calls_eh_return
26426 || info->ehrd_offset == -432)
26427 && info->vrsave_save_offset == -224
26428 && info->altivec_save_offset == -416);
26430 treg = gen_rtx_REG (SImode, 11);
26431 emit_move_insn (treg, GEN_INT (-info->total_size));
26433 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
26434 in R11. It also clobbers R12, so beware! */
26436 /* Preserve CR2 for save_world prologues */
26437 sz = 5;
26438 sz += 32 - info->first_gp_reg_save;
26439 sz += 64 - info->first_fp_reg_save;
26440 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
26441 p = rtvec_alloc (sz);
26442 j = 0;
26443 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
26444 gen_rtx_REG (SImode,
26445 LR_REGNO));
26446 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
26447 gen_rtx_SYMBOL_REF (Pmode,
26448 "*save_world"));
26449 /* We do floats first so that the instruction pattern matches
26450 properly. */
26451 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26452 RTVEC_ELT (p, j++)
26453 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT ? DFmode : SFmode,
26454 info->first_fp_reg_save + i),
26455 frame_reg_rtx,
26456 info->fp_save_offset + frame_off + 8 * i);
26457 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
26458 RTVEC_ELT (p, j++)
26459 = gen_frame_store (gen_rtx_REG (V4SImode,
26460 info->first_altivec_reg_save + i),
26461 frame_reg_rtx,
26462 info->altivec_save_offset + frame_off + 16 * i);
26463 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26464 RTVEC_ELT (p, j++)
26465 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26466 frame_reg_rtx,
26467 info->gp_save_offset + frame_off + reg_size * i);
26469 /* CR register traditionally saved as CR2. */
26470 RTVEC_ELT (p, j++)
26471 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
26472 frame_reg_rtx, info->cr_save_offset + frame_off);
26473 /* Explain about use of R0. */
26474 if (info->lr_save_p)
26475 RTVEC_ELT (p, j++)
26476 = gen_frame_store (reg0,
26477 frame_reg_rtx, info->lr_save_offset + frame_off);
26478 /* Explain what happens to the stack pointer. */
26480 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
26481 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
26484 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26485 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26486 treg, GEN_INT (-info->total_size));
26487 sp_off = frame_off = info->total_size;
26490 strategy = info->savres_strategy;
26492 /* For V.4, update stack before we do any saving and set back pointer. */
26493 if (! WORLD_SAVE_P (info)
26494 && info->push_p
26495 && (DEFAULT_ABI == ABI_V4
26496 || crtl->calls_eh_return))
26498 bool need_r11 = (!(strategy & SAVE_INLINE_FPRS)
26499 || !(strategy & SAVE_INLINE_GPRS)
26500 || !(strategy & SAVE_INLINE_VRS));
26501 int ptr_regno = -1;
26502 rtx ptr_reg = NULL_RTX;
26503 int ptr_off = 0;
26505 if (info->total_size < 32767)
26506 frame_off = info->total_size;
26507 else if (need_r11)
26508 ptr_regno = 11;
26509 else if (info->cr_save_p
26510 || info->lr_save_p
26511 || info->first_fp_reg_save < 64
26512 || info->first_gp_reg_save < 32
26513 || info->altivec_size != 0
26514 || info->vrsave_size != 0
26515 || crtl->calls_eh_return)
26516 ptr_regno = 12;
26517 else
26519 /* The prologue won't be saving any regs so there is no need
26520 to set up a frame register to access any frame save area.
26521 We also won't be using frame_off anywhere below, but set
26522 the correct value anyway to protect against future
26523 changes to this function. */
26524 frame_off = info->total_size;
26526 if (ptr_regno != -1)
26528 /* Set up the frame offset to that needed by the first
26529 out-of-line save function. */
26530 START_USE (ptr_regno);
26531 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26532 frame_reg_rtx = ptr_reg;
26533 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
26534 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
26535 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
26536 ptr_off = info->gp_save_offset + info->gp_size;
26537 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
26538 ptr_off = info->altivec_save_offset + info->altivec_size;
26539 frame_off = -ptr_off;
26541 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26542 ptr_reg, ptr_off);
26543 if (REGNO (frame_reg_rtx) == 12)
26544 sp_adjust = 0;
26545 sp_off = info->total_size;
26546 if (frame_reg_rtx != sp_reg_rtx)
26547 rs6000_emit_stack_tie (frame_reg_rtx, false);
26550 /* If we use the link register, get it into r0. */
26551 if (!WORLD_SAVE_P (info) && info->lr_save_p
26552 && !cfun->machine->lr_is_wrapped_separately)
26554 rtx addr, reg, mem;
26556 reg = gen_rtx_REG (Pmode, 0);
26557 START_USE (0);
26558 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
26559 RTX_FRAME_RELATED_P (insn) = 1;
26561 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
26562 | SAVE_NOINLINE_FPRS_SAVES_LR)))
26564 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26565 GEN_INT (info->lr_save_offset + frame_off));
26566 mem = gen_rtx_MEM (Pmode, addr);
26567 /* This should not be of rs6000_sr_alias_set, because of
26568 __builtin_return_address. */
26570 insn = emit_move_insn (mem, reg);
26571 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26572 NULL_RTX, NULL_RTX);
26573 END_USE (0);
26577 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
26578 r12 will be needed by out-of-line gpr restore. */
26579 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26580 && !(strategy & (SAVE_INLINE_GPRS
26581 | SAVE_NOINLINE_GPRS_SAVES_LR))
26582 ? 11 : 12);
26583 if (!WORLD_SAVE_P (info)
26584 && info->cr_save_p
26585 && REGNO (frame_reg_rtx) != cr_save_regno
26586 && !(using_static_chain_p && cr_save_regno == 11)
26587 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
26589 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
26590 START_USE (cr_save_regno);
26591 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
26594 /* Do any required saving of fpr's. If only one or two to save, do
26595 it ourselves. Otherwise, call function. */
26596 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
26598 int offset = info->fp_save_offset + frame_off;
26599 for (int i = info->first_fp_reg_save; i < 64; i++)
26601 if (save_reg_p (i)
26602 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
26603 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
26604 sp_off - frame_off);
26606 offset += fp_reg_size;
26609 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
26611 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
26612 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
26613 unsigned ptr_regno = ptr_regno_for_savres (sel);
26614 rtx ptr_reg = frame_reg_rtx;
26616 if (REGNO (frame_reg_rtx) == ptr_regno)
26617 gcc_checking_assert (frame_off == 0);
26618 else
26620 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26621 NOT_INUSE (ptr_regno);
26622 emit_insn (gen_add3_insn (ptr_reg,
26623 frame_reg_rtx, GEN_INT (frame_off)));
26625 insn = rs6000_emit_savres_rtx (info, ptr_reg,
26626 info->fp_save_offset,
26627 info->lr_save_offset,
26628 DFmode, sel);
26629 rs6000_frame_related (insn, ptr_reg, sp_off,
26630 NULL_RTX, NULL_RTX);
26631 if (lr)
26632 END_USE (0);
26635 /* Save GPRs. This is done as a PARALLEL if we are using
26636 the store-multiple instructions. */
26637 if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
26639 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
26640 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
26641 unsigned ptr_regno = ptr_regno_for_savres (sel);
26642 rtx ptr_reg = frame_reg_rtx;
26643 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
26644 int end_save = info->gp_save_offset + info->gp_size;
26645 int ptr_off;
26647 if (ptr_regno == 12)
26648 sp_adjust = 0;
26649 if (!ptr_set_up)
26650 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26652 /* Need to adjust r11 (r12) if we saved any FPRs. */
26653 if (end_save + frame_off != 0)
26655 rtx offset = GEN_INT (end_save + frame_off);
26657 if (ptr_set_up)
26658 frame_off = -end_save;
26659 else
26660 NOT_INUSE (ptr_regno);
26661 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
26663 else if (!ptr_set_up)
26665 NOT_INUSE (ptr_regno);
26666 emit_move_insn (ptr_reg, frame_reg_rtx);
26668 ptr_off = -end_save;
26669 insn = rs6000_emit_savres_rtx (info, ptr_reg,
26670 info->gp_save_offset + ptr_off,
26671 info->lr_save_offset + ptr_off,
26672 reg_mode, sel);
26673 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
26674 NULL_RTX, NULL_RTX);
26675 if (lr)
26676 END_USE (0);
26678 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
26680 rtvec p;
26681 int i;
26682 p = rtvec_alloc (32 - info->first_gp_reg_save);
26683 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26684 RTVEC_ELT (p, i)
26685 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26686 frame_reg_rtx,
26687 info->gp_save_offset + frame_off + reg_size * i);
26688 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26689 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
26690 NULL_RTX, NULL_RTX);
26692 else if (!WORLD_SAVE_P (info))
26694 int offset = info->gp_save_offset + frame_off;
26695 for (int i = info->first_gp_reg_save; i < 32; i++)
26697 if (save_reg_p (i)
26698 && !cfun->machine->gpr_is_wrapped_separately[i])
26699 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
26700 sp_off - frame_off);
26702 offset += reg_size;
26706 if (crtl->calls_eh_return)
26708 unsigned int i;
26709 rtvec p;
26711 for (i = 0; ; ++i)
26713 unsigned int regno = EH_RETURN_DATA_REGNO (i);
26714 if (regno == INVALID_REGNUM)
26715 break;
26718 p = rtvec_alloc (i);
26720 for (i = 0; ; ++i)
26722 unsigned int regno = EH_RETURN_DATA_REGNO (i);
26723 if (regno == INVALID_REGNUM)
26724 break;
26726 rtx set
26727 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
26728 sp_reg_rtx,
26729 info->ehrd_offset + sp_off + reg_size * (int) i);
26730 RTVEC_ELT (p, i) = set;
26731 RTX_FRAME_RELATED_P (set) = 1;
26734 insn = emit_insn (gen_blockage ());
26735 RTX_FRAME_RELATED_P (insn) = 1;
26736 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
26739 /* In AIX ABI we need to make sure r2 is really saved. */
26740 if (TARGET_AIX && crtl->calls_eh_return)
26742 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
26743 rtx join_insn, note;
26744 rtx_insn *save_insn;
26745 long toc_restore_insn;
26747 tmp_reg = gen_rtx_REG (Pmode, 11);
26748 tmp_reg_si = gen_rtx_REG (SImode, 11);
26749 if (using_static_chain_p)
26751 START_USE (0);
26752 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
26754 else
26755 START_USE (11);
26756 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
26757 /* Peek at instruction to which this function returns. If it's
26758 restoring r2, then we know we've already saved r2. We can't
26759 unconditionally save r2 because the value we have will already
26760 be updated if we arrived at this function via a plt call or
26761 toc adjusting stub. */
26762 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
26763 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
26764 + RS6000_TOC_SAVE_SLOT);
26765 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
26766 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
26767 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
26768 validate_condition_mode (EQ, CCUNSmode);
26769 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
26770 emit_insn (gen_rtx_SET (compare_result,
26771 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
26772 toc_save_done = gen_label_rtx ();
26773 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26774 gen_rtx_EQ (VOIDmode, compare_result,
26775 const0_rtx),
26776 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
26777 pc_rtx);
26778 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26779 JUMP_LABEL (jump) = toc_save_done;
26780 LABEL_NUSES (toc_save_done) += 1;
26782 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
26783 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
26784 sp_off - frame_off);
26786 emit_label (toc_save_done);
26788 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
26789 have a CFG that has different saves along different paths.
26790 Move the note to a dummy blockage insn, which describes that
26791 R2 is unconditionally saved after the label. */
26792 /* ??? An alternate representation might be a special insn pattern
26793 containing both the branch and the store. That might let the
26794 code that minimizes the number of DW_CFA_advance opcodes better
26795 freedom in placing the annotations. */
26796 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
26797 if (note)
26798 remove_note (save_insn, note);
26799 else
26800 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
26801 copy_rtx (PATTERN (save_insn)), NULL_RTX);
26802 RTX_FRAME_RELATED_P (save_insn) = 0;
26804 join_insn = emit_insn (gen_blockage ());
26805 REG_NOTES (join_insn) = note;
26806 RTX_FRAME_RELATED_P (join_insn) = 1;
26808 if (using_static_chain_p)
26810 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
26811 END_USE (0);
26813 else
26814 END_USE (11);
26817 /* Save CR if we use any that must be preserved. */
26818 if (!WORLD_SAVE_P (info) && info->cr_save_p)
26820 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
26821 GEN_INT (info->cr_save_offset + frame_off));
26822 rtx mem = gen_frame_mem (SImode, addr);
26824 /* If we didn't copy cr before, do so now using r0. */
26825 if (cr_save_rtx == NULL_RTX)
26827 START_USE (0);
26828 cr_save_rtx = gen_rtx_REG (SImode, 0);
26829 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
26832 /* Saving CR requires a two-instruction sequence: one instruction
26833 to move the CR to a general-purpose register, and a second
26834 instruction that stores the GPR to memory.
26836 We do not emit any DWARF CFI records for the first of these,
26837 because we cannot properly represent the fact that CR is saved in
26838 a register. One reason is that we cannot express that multiple
26839 CR fields are saved; another reason is that on 64-bit, the size
26840 of the CR register in DWARF (4 bytes) differs from the size of
26841 a general-purpose register.
26843 This means if any intervening instruction were to clobber one of
26844 the call-saved CR fields, we'd have incorrect CFI. To prevent
26845 this from happening, we mark the store to memory as a use of
26846 those CR fields, which prevents any such instruction from being
26847 scheduled in between the two instructions. */
26848 rtx crsave_v[9];
26849 int n_crsave = 0;
26850 int i;
26852 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
26853 for (i = 0; i < 8; i++)
26854 if (save_reg_p (CR0_REGNO + i))
26855 crsave_v[n_crsave++]
26856 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26858 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
26859 gen_rtvec_v (n_crsave, crsave_v)));
26860 END_USE (REGNO (cr_save_rtx));
26862 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
26863 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
26864 so we need to construct a frame expression manually. */
26865 RTX_FRAME_RELATED_P (insn) = 1;
26867 /* Update address to be stack-pointer relative, like
26868 rs6000_frame_related would do. */
26869 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26870 GEN_INT (info->cr_save_offset + sp_off));
26871 mem = gen_frame_mem (SImode, addr);
26873 if (DEFAULT_ABI == ABI_ELFv2)
26875 /* In the ELFv2 ABI we generate separate CFI records for each
26876 CR field that was actually saved. They all point to the
26877 same 32-bit stack slot. */
26878 rtx crframe[8];
26879 int n_crframe = 0;
26881 for (i = 0; i < 8; i++)
26882 if (save_reg_p (CR0_REGNO + i))
26884 crframe[n_crframe]
26885 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
26887 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
26888 n_crframe++;
26891 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26892 gen_rtx_PARALLEL (VOIDmode,
26893 gen_rtvec_v (n_crframe, crframe)));
26895 else
26897 /* In other ABIs, by convention, we use a single CR regnum to
26898 represent the fact that all call-saved CR fields are saved.
26899 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
26900 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
26901 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
26905 /* In the ELFv2 ABI we need to save all call-saved CR fields into
26906 *separate* slots if the routine calls __builtin_eh_return, so
26907 that they can be independently restored by the unwinder. */
26908 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
26910 int i, cr_off = info->ehcr_offset;
26911 rtx crsave;
26913 /* ??? We might get better performance by using multiple mfocrf
26914 instructions. */
26915 crsave = gen_rtx_REG (SImode, 0);
26916 emit_insn (gen_prologue_movesi_from_cr (crsave));
26918 for (i = 0; i < 8; i++)
26919 if (!call_used_regs[CR0_REGNO + i])
26921 rtvec p = rtvec_alloc (2);
26922 RTVEC_ELT (p, 0)
26923 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
26924 RTVEC_ELT (p, 1)
26925 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
26927 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26929 RTX_FRAME_RELATED_P (insn) = 1;
26930 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
26931 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
26932 sp_reg_rtx, cr_off + sp_off));
26934 cr_off += reg_size;
26938 /* If we are emitting stack probes, but allocate no stack, then
26939 just note that in the dump file. */
26940 if (flag_stack_clash_protection
26941 && dump_file
26942 && !info->push_p)
26943 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
26945 /* Update stack and set back pointer unless this is V.4,
26946 for which it was done previously. */
26947 if (!WORLD_SAVE_P (info) && info->push_p
26948 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
26950 rtx ptr_reg = NULL;
26951 int ptr_off = 0;
26953 /* If saving altivec regs we need to be able to address all save
26954 locations using a 16-bit offset. */
26955 if ((strategy & SAVE_INLINE_VRS) == 0
26956 || (info->altivec_size != 0
26957 && (info->altivec_save_offset + info->altivec_size - 16
26958 + info->total_size - frame_off) > 32767)
26959 || (info->vrsave_size != 0
26960 && (info->vrsave_save_offset
26961 + info->total_size - frame_off) > 32767))
26963 int sel = SAVRES_SAVE | SAVRES_VR;
26964 unsigned ptr_regno = ptr_regno_for_savres (sel);
26966 if (using_static_chain_p
26967 && ptr_regno == STATIC_CHAIN_REGNUM)
26968 ptr_regno = 12;
26969 if (REGNO (frame_reg_rtx) != ptr_regno)
26970 START_USE (ptr_regno);
26971 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
26972 frame_reg_rtx = ptr_reg;
26973 ptr_off = info->altivec_save_offset + info->altivec_size;
26974 frame_off = -ptr_off;
26976 else if (REGNO (frame_reg_rtx) == 1)
26977 frame_off = info->total_size;
26978 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
26979 ptr_reg, ptr_off);
26980 if (REGNO (frame_reg_rtx) == 12)
26981 sp_adjust = 0;
26982 sp_off = info->total_size;
26983 if (frame_reg_rtx != sp_reg_rtx)
26984 rs6000_emit_stack_tie (frame_reg_rtx, false);
26987 /* Set frame pointer, if needed. */
26988 if (frame_pointer_needed)
26990 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
26991 sp_reg_rtx);
26992 RTX_FRAME_RELATED_P (insn) = 1;
26995 /* Save AltiVec registers if needed. Save here because the red zone does
26996 not always include AltiVec registers. */
26997 if (!WORLD_SAVE_P (info)
26998 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
27000 int end_save = info->altivec_save_offset + info->altivec_size;
27001 int ptr_off;
27002 /* Oddly, the vector save/restore functions point r0 at the end
27003 of the save area, then use r11 or r12 to load offsets for
27004 [reg+reg] addressing. */
27005 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27006 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
27007 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27009 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27010 NOT_INUSE (0);
27011 if (scratch_regno == 12)
27012 sp_adjust = 0;
27013 if (end_save + frame_off != 0)
27015 rtx offset = GEN_INT (end_save + frame_off);
27017 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27019 else
27020 emit_move_insn (ptr_reg, frame_reg_rtx);
27022 ptr_off = -end_save;
27023 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27024 info->altivec_save_offset + ptr_off,
27025 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
27026 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
27027 NULL_RTX, NULL_RTX);
27028 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27030 /* The oddity mentioned above clobbered our frame reg. */
27031 emit_move_insn (frame_reg_rtx, ptr_reg);
27032 frame_off = ptr_off;
27035 else if (!WORLD_SAVE_P (info)
27036 && info->altivec_size != 0)
27038 int i;
27040 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27041 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27043 rtx areg, savereg, mem;
27044 HOST_WIDE_INT offset;
27046 offset = (info->altivec_save_offset + frame_off
27047 + 16 * (i - info->first_altivec_reg_save));
27049 savereg = gen_rtx_REG (V4SImode, i);
27051 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27053 mem = gen_frame_mem (V4SImode,
27054 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27055 GEN_INT (offset)));
27056 insn = emit_insn (gen_rtx_SET (mem, savereg));
27057 areg = NULL_RTX;
27059 else
27061 NOT_INUSE (0);
27062 areg = gen_rtx_REG (Pmode, 0);
27063 emit_move_insn (areg, GEN_INT (offset));
27065 /* AltiVec addressing mode is [reg+reg]. */
27066 mem = gen_frame_mem (V4SImode,
27067 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
27069 /* Rather than emitting a generic move, force use of the stvx
27070 instruction, which we always want on ISA 2.07 (power8) systems.
27071 In particular we don't want xxpermdi/stxvd2x for little
27072 endian. */
27073 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
27076 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27077 areg, GEN_INT (offset));
27081 /* VRSAVE is a bit vector representing which AltiVec registers
27082 are used. The OS uses this to determine which vector
27083 registers to save on a context switch. We need to save
27084 VRSAVE on the stack frame, add whatever AltiVec registers we
27085 used in this function, and do the corresponding magic in the
27086 epilogue. */
27088 if (!WORLD_SAVE_P (info) && info->vrsave_size != 0)
27090 /* Get VRSAVE into a GPR. Note that ABI_V4 and ABI_DARWIN might
27091 be using r12 as frame_reg_rtx and r11 as the static chain
27092 pointer for nested functions. */
27093 int save_regno = 12;
27094 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27095 && !using_static_chain_p)
27096 save_regno = 11;
27097 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
27099 save_regno = 11;
27100 if (using_static_chain_p)
27101 save_regno = 0;
27103 NOT_INUSE (save_regno);
27105 emit_vrsave_prologue (info, save_regno, frame_off, frame_reg_rtx);
27108 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27109 if (!TARGET_SINGLE_PIC_BASE
27110 && ((TARGET_TOC && TARGET_MINIMAL_TOC
27111 && !constant_pool_empty_p ())
27112 || (DEFAULT_ABI == ABI_V4
27113 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
27114 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
27116 /* If emit_load_toc_table will use the link register, we need to save
27117 it. We use R12 for this purpose because emit_load_toc_table
27118 can use register 0. This allows us to use a plain 'blr' to return
27119 from the procedure more often. */
27120 int save_LR_around_toc_setup = (TARGET_ELF
27121 && DEFAULT_ABI == ABI_V4
27122 && flag_pic
27123 && ! info->lr_save_p
27124 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
27125 if (save_LR_around_toc_setup)
27127 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27128 rtx tmp = gen_rtx_REG (Pmode, 12);
27130 sp_adjust = 0;
27131 insn = emit_move_insn (tmp, lr);
27132 RTX_FRAME_RELATED_P (insn) = 1;
27134 rs6000_emit_load_toc_table (TRUE);
27136 insn = emit_move_insn (lr, tmp);
27137 add_reg_note (insn, REG_CFA_RESTORE, lr);
27138 RTX_FRAME_RELATED_P (insn) = 1;
27140 else
27141 rs6000_emit_load_toc_table (TRUE);
27144 #if TARGET_MACHO
27145 if (!TARGET_SINGLE_PIC_BASE
27146 && DEFAULT_ABI == ABI_DARWIN
27147 && flag_pic && crtl->uses_pic_offset_table)
27149 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27150 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
27152 /* Save and restore LR locally around this call (in R0). */
27153 if (!info->lr_save_p)
27154 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
27156 emit_insn (gen_load_macho_picbase (src));
27158 emit_move_insn (gen_rtx_REG (Pmode,
27159 RS6000_PIC_OFFSET_TABLE_REGNUM),
27160 lr);
27162 if (!info->lr_save_p)
27163 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
27165 #endif
27167 /* If we need to, save the TOC register after doing the stack setup.
27168 Do not emit eh frame info for this save. The unwinder wants info,
27169 conceptually attached to instructions in this function, about
27170 register values in the caller of this function. This R2 may have
27171 already been changed from the value in the caller.
27172 We don't attempt to write accurate DWARF EH frame info for R2
27173 because code emitted by gcc for a (non-pointer) function call
27174 doesn't save and restore R2. Instead, R2 is managed out-of-line
27175 by a linker generated plt call stub when the function resides in
27176 a shared library. This behavior is costly to describe in DWARF,
27177 both in terms of the size of DWARF info and the time taken in the
27178 unwinder to interpret it. R2 changes, apart from the
27179 calls_eh_return case earlier in this function, are handled by
27180 linux-unwind.h frob_update_context. */
27181 if (rs6000_save_toc_in_prologue_p ()
27182 && !cfun->machine->toc_is_wrapped_separately)
27184 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
27185 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
27188 /* Set up the arg pointer (r12) for -fsplit-stack code. */
27189 if (using_split_stack && split_stack_arg_pointer_used_p ())
27190 emit_split_stack_prologue (info, sp_adjust, frame_off, frame_reg_rtx);
27193 /* Output .extern statements for the save/restore routines we use. */
27195 static void
27196 rs6000_output_savres_externs (FILE *file)
27198 rs6000_stack_t *info = rs6000_stack_info ();
27200 if (TARGET_DEBUG_STACK)
27201 debug_stack_info (info);
27203 /* Write .extern for any function we will call to save and restore
27204 fp values. */
27205 if (info->first_fp_reg_save < 64
27206 && !TARGET_MACHO
27207 && !TARGET_ELF)
27209 char *name;
27210 int regno = info->first_fp_reg_save - 32;
27212 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
27214 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27215 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27216 name = rs6000_savres_routine_name (regno, sel);
27217 fprintf (file, "\t.extern %s\n", name);
27219 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
27221 bool lr = (info->savres_strategy
27222 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27223 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27224 name = rs6000_savres_routine_name (regno, sel);
27225 fprintf (file, "\t.extern %s\n", name);
27230 /* Write function prologue. */
27232 static void
27233 rs6000_output_function_prologue (FILE *file)
27235 if (!cfun->is_thunk)
27236 rs6000_output_savres_externs (file);
27238 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27239 immediately after the global entry point label. */
27240 if (rs6000_global_entry_point_needed_p ())
27242 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27244 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
27246 if (TARGET_CMODEL != CMODEL_LARGE)
27248 /* In the small and medium code models, we assume the TOC is less
27249 2 GB away from the text section, so it can be computed via the
27250 following two-instruction sequence. */
27251 char buf[256];
27253 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27254 fprintf (file, "0:\taddis 2,12,.TOC.-");
27255 assemble_name (file, buf);
27256 fprintf (file, "@ha\n");
27257 fprintf (file, "\taddi 2,2,.TOC.-");
27258 assemble_name (file, buf);
27259 fprintf (file, "@l\n");
27261 else
27263 /* In the large code model, we allow arbitrary offsets between the
27264 TOC and the text section, so we have to load the offset from
27265 memory. The data field is emitted directly before the global
27266 entry point in rs6000_elf_declare_function_name. */
27267 char buf[256];
27269 #ifdef HAVE_AS_ENTRY_MARKERS
27270 /* If supported by the linker, emit a marker relocation. If the
27271 total code size of the final executable or shared library
27272 happens to fit into 2 GB after all, the linker will replace
27273 this code sequence with the sequence for the small or medium
27274 code model. */
27275 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
27276 #endif
27277 fprintf (file, "\tld 2,");
27278 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27279 assemble_name (file, buf);
27280 fprintf (file, "-");
27281 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27282 assemble_name (file, buf);
27283 fprintf (file, "(12)\n");
27284 fprintf (file, "\tadd 2,2,12\n");
27287 fputs ("\t.localentry\t", file);
27288 assemble_name (file, name);
27289 fputs (",.-", file);
27290 assemble_name (file, name);
27291 fputs ("\n", file);
27294 /* Output -mprofile-kernel code. This needs to be done here instead of
27295 in output_function_profile since it must go after the ELFv2 ABI
27296 local entry point. */
27297 if (TARGET_PROFILE_KERNEL && crtl->profile)
27299 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27300 gcc_assert (!TARGET_32BIT);
27302 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
27304 /* In the ELFv2 ABI we have no compiler stack word. It must be
27305 the resposibility of _mcount to preserve the static chain
27306 register if required. */
27307 if (DEFAULT_ABI != ABI_ELFv2
27308 && cfun->static_chain_decl != NULL)
27310 asm_fprintf (file, "\tstd %s,24(%s)\n",
27311 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27312 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27313 asm_fprintf (file, "\tld %s,24(%s)\n",
27314 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
27316 else
27317 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
27320 rs6000_pic_labelno++;
27323 /* -mprofile-kernel code calls mcount before the function prolog,
27324 so a profiled leaf function should stay a leaf function. */
27325 static bool
27326 rs6000_keep_leaf_when_profiled ()
27328 return TARGET_PROFILE_KERNEL;
27331 /* Non-zero if vmx regs are restored before the frame pop, zero if
27332 we restore after the pop when possible. */
27333 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
27335 /* Restoring cr is a two step process: loading a reg from the frame
27336 save, then moving the reg to cr. For ABI_V4 we must let the
27337 unwinder know that the stack location is no longer valid at or
27338 before the stack deallocation, but we can't emit a cfa_restore for
27339 cr at the stack deallocation like we do for other registers.
27340 The trouble is that it is possible for the move to cr to be
27341 scheduled after the stack deallocation. So say exactly where cr
27342 is located on each of the two insns. */
27344 static rtx
27345 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
27347 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
27348 rtx reg = gen_rtx_REG (SImode, regno);
27349 rtx_insn *insn = emit_move_insn (reg, mem);
27351 if (!exit_func && DEFAULT_ABI == ABI_V4)
27353 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27354 rtx set = gen_rtx_SET (reg, cr);
27356 add_reg_note (insn, REG_CFA_REGISTER, set);
27357 RTX_FRAME_RELATED_P (insn) = 1;
27359 return reg;
27362 /* Reload CR from REG. */
27364 static void
27365 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
27367 int count = 0;
27368 int i;
27370 if (using_mfcr_multiple)
27372 for (i = 0; i < 8; i++)
27373 if (save_reg_p (CR0_REGNO + i))
27374 count++;
27375 gcc_assert (count);
27378 if (using_mfcr_multiple && count > 1)
27380 rtx_insn *insn;
27381 rtvec p;
27382 int ndx;
27384 p = rtvec_alloc (count);
27386 ndx = 0;
27387 for (i = 0; i < 8; i++)
27388 if (save_reg_p (CR0_REGNO + i))
27390 rtvec r = rtvec_alloc (2);
27391 RTVEC_ELT (r, 0) = reg;
27392 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
27393 RTVEC_ELT (p, ndx) =
27394 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
27395 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
27396 ndx++;
27398 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27399 gcc_assert (ndx == count);
27401 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27402 CR field separately. */
27403 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27405 for (i = 0; i < 8; i++)
27406 if (save_reg_p (CR0_REGNO + i))
27407 add_reg_note (insn, REG_CFA_RESTORE,
27408 gen_rtx_REG (SImode, CR0_REGNO + i));
27410 RTX_FRAME_RELATED_P (insn) = 1;
27413 else
27414 for (i = 0; i < 8; i++)
27415 if (save_reg_p (CR0_REGNO + i))
27417 rtx insn = emit_insn (gen_movsi_to_cr_one
27418 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
27420 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
27421 CR field separately, attached to the insn that in fact
27422 restores this particular CR field. */
27423 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
27425 add_reg_note (insn, REG_CFA_RESTORE,
27426 gen_rtx_REG (SImode, CR0_REGNO + i));
27428 RTX_FRAME_RELATED_P (insn) = 1;
27432 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
27433 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
27434 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
27436 rtx_insn *insn = get_last_insn ();
27437 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
27439 add_reg_note (insn, REG_CFA_RESTORE, cr);
27440 RTX_FRAME_RELATED_P (insn) = 1;
27444 /* Like cr, the move to lr instruction can be scheduled after the
27445 stack deallocation, but unlike cr, its stack frame save is still
27446 valid. So we only need to emit the cfa_restore on the correct
27447 instruction. */
27449 static void
27450 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
27452 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
27453 rtx reg = gen_rtx_REG (Pmode, regno);
27455 emit_move_insn (reg, mem);
27458 static void
27459 restore_saved_lr (int regno, bool exit_func)
27461 rtx reg = gen_rtx_REG (Pmode, regno);
27462 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27463 rtx_insn *insn = emit_move_insn (lr, reg);
27465 if (!exit_func && flag_shrink_wrap)
27467 add_reg_note (insn, REG_CFA_RESTORE, lr);
27468 RTX_FRAME_RELATED_P (insn) = 1;
27472 static rtx
27473 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
27475 if (DEFAULT_ABI == ABI_ELFv2)
27477 int i;
27478 for (i = 0; i < 8; i++)
27479 if (save_reg_p (CR0_REGNO + i))
27481 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
27482 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
27483 cfa_restores);
27486 else if (info->cr_save_p)
27487 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27488 gen_rtx_REG (SImode, CR2_REGNO),
27489 cfa_restores);
27491 if (info->lr_save_p)
27492 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27493 gen_rtx_REG (Pmode, LR_REGNO),
27494 cfa_restores);
27495 return cfa_restores;
27498 /* Return true if OFFSET from stack pointer can be clobbered by signals.
27499 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
27500 below stack pointer not cloberred by signals. */
27502 static inline bool
27503 offset_below_red_zone_p (HOST_WIDE_INT offset)
27505 return offset < (DEFAULT_ABI == ABI_V4
27507 : TARGET_32BIT ? -220 : -288);
27510 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
27512 static void
27513 emit_cfa_restores (rtx cfa_restores)
27515 rtx_insn *insn = get_last_insn ();
27516 rtx *loc = &REG_NOTES (insn);
27518 while (*loc)
27519 loc = &XEXP (*loc, 1);
27520 *loc = cfa_restores;
27521 RTX_FRAME_RELATED_P (insn) = 1;
27524 /* Emit function epilogue as insns. */
27526 void
27527 rs6000_emit_epilogue (int sibcall)
27529 rs6000_stack_t *info;
27530 int restoring_GPRs_inline;
27531 int restoring_FPRs_inline;
27532 int using_load_multiple;
27533 int using_mtcr_multiple;
27534 int use_backchain_to_restore_sp;
27535 int restore_lr;
27536 int strategy;
27537 HOST_WIDE_INT frame_off = 0;
27538 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
27539 rtx frame_reg_rtx = sp_reg_rtx;
27540 rtx cfa_restores = NULL_RTX;
27541 rtx insn;
27542 rtx cr_save_reg = NULL_RTX;
27543 machine_mode reg_mode = Pmode;
27544 int reg_size = TARGET_32BIT ? 4 : 8;
27545 machine_mode fp_reg_mode = TARGET_HARD_FLOAT ? DFmode : SFmode;
27546 int fp_reg_size = 8;
27547 int i;
27548 bool exit_func;
27549 unsigned ptr_regno;
27551 info = rs6000_stack_info ();
27553 strategy = info->savres_strategy;
27554 using_load_multiple = strategy & REST_MULTIPLE;
27555 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
27556 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
27557 using_mtcr_multiple = (rs6000_tune == PROCESSOR_PPC601
27558 || rs6000_tune == PROCESSOR_PPC603
27559 || rs6000_tune == PROCESSOR_PPC750
27560 || optimize_size);
27561 /* Restore via the backchain when we have a large frame, since this
27562 is more efficient than an addis, addi pair. The second condition
27563 here will not trigger at the moment; We don't actually need a
27564 frame pointer for alloca, but the generic parts of the compiler
27565 give us one anyway. */
27566 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
27567 ? info->lr_save_offset
27568 : 0) > 32767
27569 || (cfun->calls_alloca
27570 && !frame_pointer_needed));
27571 restore_lr = (info->lr_save_p
27572 && (restoring_FPRs_inline
27573 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
27574 && (restoring_GPRs_inline
27575 || info->first_fp_reg_save < 64)
27576 && !cfun->machine->lr_is_wrapped_separately);
27579 if (WORLD_SAVE_P (info))
27581 int i, j;
27582 char rname[30];
27583 const char *alloc_rname;
27584 rtvec p;
27586 /* eh_rest_world_r10 will return to the location saved in the LR
27587 stack slot (which is not likely to be our caller.)
27588 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
27589 rest_world is similar, except any R10 parameter is ignored.
27590 The exception-handling stuff that was here in 2.95 is no
27591 longer necessary. */
27593 p = rtvec_alloc (9
27594 + 32 - info->first_gp_reg_save
27595 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
27596 + 63 + 1 - info->first_fp_reg_save);
27598 strcpy (rname, ((crtl->calls_eh_return) ?
27599 "*eh_rest_world_r10" : "*rest_world"));
27600 alloc_rname = ggc_strdup (rname);
27602 j = 0;
27603 RTVEC_ELT (p, j++) = ret_rtx;
27604 RTVEC_ELT (p, j++)
27605 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
27606 /* The instruction pattern requires a clobber here;
27607 it is shared with the restVEC helper. */
27608 RTVEC_ELT (p, j++)
27609 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
27612 /* CR register traditionally saved as CR2. */
27613 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
27614 RTVEC_ELT (p, j++)
27615 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
27616 if (flag_shrink_wrap)
27618 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
27619 gen_rtx_REG (Pmode, LR_REGNO),
27620 cfa_restores);
27621 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27625 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27627 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
27628 RTVEC_ELT (p, j++)
27629 = gen_frame_load (reg,
27630 frame_reg_rtx, info->gp_save_offset + reg_size * i);
27631 if (flag_shrink_wrap
27632 && save_reg_p (info->first_gp_reg_save + i))
27633 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27635 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
27637 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
27638 RTVEC_ELT (p, j++)
27639 = gen_frame_load (reg,
27640 frame_reg_rtx, info->altivec_save_offset + 16 * i);
27641 if (flag_shrink_wrap
27642 && save_reg_p (info->first_altivec_reg_save + i))
27643 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27645 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
27647 rtx reg = gen_rtx_REG (TARGET_HARD_FLOAT ? DFmode : SFmode,
27648 info->first_fp_reg_save + i);
27649 RTVEC_ELT (p, j++)
27650 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
27651 if (flag_shrink_wrap
27652 && save_reg_p (info->first_fp_reg_save + i))
27653 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27655 RTVEC_ELT (p, j++)
27656 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
27657 RTVEC_ELT (p, j++)
27658 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
27659 RTVEC_ELT (p, j++)
27660 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
27661 RTVEC_ELT (p, j++)
27662 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
27663 RTVEC_ELT (p, j++)
27664 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
27665 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
27667 if (flag_shrink_wrap)
27669 REG_NOTES (insn) = cfa_restores;
27670 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27671 RTX_FRAME_RELATED_P (insn) = 1;
27673 return;
27676 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
27677 if (info->push_p)
27678 frame_off = info->total_size;
27680 /* Restore AltiVec registers if we must do so before adjusting the
27681 stack. */
27682 if (info->altivec_size != 0
27683 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27684 || (DEFAULT_ABI != ABI_V4
27685 && offset_below_red_zone_p (info->altivec_save_offset))))
27687 int i;
27688 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
27690 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27691 if (use_backchain_to_restore_sp)
27693 int frame_regno = 11;
27695 if ((strategy & REST_INLINE_VRS) == 0)
27697 /* Of r11 and r12, select the one not clobbered by an
27698 out-of-line restore function for the frame register. */
27699 frame_regno = 11 + 12 - scratch_regno;
27701 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
27702 emit_move_insn (frame_reg_rtx,
27703 gen_rtx_MEM (Pmode, sp_reg_rtx));
27704 frame_off = 0;
27706 else if (frame_pointer_needed)
27707 frame_reg_rtx = hard_frame_pointer_rtx;
27709 if ((strategy & REST_INLINE_VRS) == 0)
27711 int end_save = info->altivec_save_offset + info->altivec_size;
27712 int ptr_off;
27713 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27714 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27716 if (end_save + frame_off != 0)
27718 rtx offset = GEN_INT (end_save + frame_off);
27720 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27722 else
27723 emit_move_insn (ptr_reg, frame_reg_rtx);
27725 ptr_off = -end_save;
27726 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27727 info->altivec_save_offset + ptr_off,
27728 0, V4SImode, SAVRES_VR);
27730 else
27732 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27733 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27735 rtx addr, areg, mem, insn;
27736 rtx reg = gen_rtx_REG (V4SImode, i);
27737 HOST_WIDE_INT offset
27738 = (info->altivec_save_offset + frame_off
27739 + 16 * (i - info->first_altivec_reg_save));
27741 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27743 mem = gen_frame_mem (V4SImode,
27744 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27745 GEN_INT (offset)));
27746 insn = gen_rtx_SET (reg, mem);
27748 else
27750 areg = gen_rtx_REG (Pmode, 0);
27751 emit_move_insn (areg, GEN_INT (offset));
27753 /* AltiVec addressing mode is [reg+reg]. */
27754 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
27755 mem = gen_frame_mem (V4SImode, addr);
27757 /* Rather than emitting a generic move, force use of the
27758 lvx instruction, which we always want. In particular we
27759 don't want lxvd2x/xxpermdi for little endian. */
27760 insn = gen_altivec_lvx_v4si_internal (reg, mem);
27763 (void) emit_insn (insn);
27767 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27768 if (((strategy & REST_INLINE_VRS) == 0
27769 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
27770 && (flag_shrink_wrap
27771 || (offset_below_red_zone_p
27772 (info->altivec_save_offset
27773 + 16 * (i - info->first_altivec_reg_save))))
27774 && save_reg_p (i))
27776 rtx reg = gen_rtx_REG (V4SImode, i);
27777 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27781 /* Restore VRSAVE if we must do so before adjusting the stack. */
27782 if (info->vrsave_size != 0
27783 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27784 || (DEFAULT_ABI != ABI_V4
27785 && offset_below_red_zone_p (info->vrsave_save_offset))))
27787 rtx reg;
27789 if (frame_reg_rtx == sp_reg_rtx)
27791 if (use_backchain_to_restore_sp)
27793 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27794 emit_move_insn (frame_reg_rtx,
27795 gen_rtx_MEM (Pmode, sp_reg_rtx));
27796 frame_off = 0;
27798 else if (frame_pointer_needed)
27799 frame_reg_rtx = hard_frame_pointer_rtx;
27802 reg = gen_rtx_REG (SImode, 12);
27803 emit_insn (gen_frame_load (reg, frame_reg_rtx,
27804 info->vrsave_save_offset + frame_off));
27806 emit_insn (generate_set_vrsave (reg, info, 1));
27809 insn = NULL_RTX;
27810 /* If we have a large stack frame, restore the old stack pointer
27811 using the backchain. */
27812 if (use_backchain_to_restore_sp)
27814 if (frame_reg_rtx == sp_reg_rtx)
27816 /* Under V.4, don't reset the stack pointer until after we're done
27817 loading the saved registers. */
27818 if (DEFAULT_ABI == ABI_V4)
27819 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27821 insn = emit_move_insn (frame_reg_rtx,
27822 gen_rtx_MEM (Pmode, sp_reg_rtx));
27823 frame_off = 0;
27825 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27826 && DEFAULT_ABI == ABI_V4)
27827 /* frame_reg_rtx has been set up by the altivec restore. */
27829 else
27831 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
27832 frame_reg_rtx = sp_reg_rtx;
27835 /* If we have a frame pointer, we can restore the old stack pointer
27836 from it. */
27837 else if (frame_pointer_needed)
27839 frame_reg_rtx = sp_reg_rtx;
27840 if (DEFAULT_ABI == ABI_V4)
27841 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
27842 /* Prevent reordering memory accesses against stack pointer restore. */
27843 else if (cfun->calls_alloca
27844 || offset_below_red_zone_p (-info->total_size))
27845 rs6000_emit_stack_tie (frame_reg_rtx, true);
27847 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
27848 GEN_INT (info->total_size)));
27849 frame_off = 0;
27851 else if (info->push_p
27852 && DEFAULT_ABI != ABI_V4
27853 && !crtl->calls_eh_return)
27855 /* Prevent reordering memory accesses against stack pointer restore. */
27856 if (cfun->calls_alloca
27857 || offset_below_red_zone_p (-info->total_size))
27858 rs6000_emit_stack_tie (frame_reg_rtx, false);
27859 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
27860 GEN_INT (info->total_size)));
27861 frame_off = 0;
27863 if (insn && frame_reg_rtx == sp_reg_rtx)
27865 if (cfa_restores)
27867 REG_NOTES (insn) = cfa_restores;
27868 cfa_restores = NULL_RTX;
27870 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
27871 RTX_FRAME_RELATED_P (insn) = 1;
27874 /* Restore AltiVec registers if we have not done so already. */
27875 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27876 && info->altivec_size != 0
27877 && (DEFAULT_ABI == ABI_V4
27878 || !offset_below_red_zone_p (info->altivec_save_offset)))
27880 int i;
27882 if ((strategy & REST_INLINE_VRS) == 0)
27884 int end_save = info->altivec_save_offset + info->altivec_size;
27885 int ptr_off;
27886 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27887 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
27888 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27890 if (end_save + frame_off != 0)
27892 rtx offset = GEN_INT (end_save + frame_off);
27894 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27896 else
27897 emit_move_insn (ptr_reg, frame_reg_rtx);
27899 ptr_off = -end_save;
27900 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27901 info->altivec_save_offset + ptr_off,
27902 0, V4SImode, SAVRES_VR);
27903 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27905 /* Frame reg was clobbered by out-of-line save. Restore it
27906 from ptr_reg, and if we are calling out-of-line gpr or
27907 fpr restore set up the correct pointer and offset. */
27908 unsigned newptr_regno = 1;
27909 if (!restoring_GPRs_inline)
27911 bool lr = info->gp_save_offset + info->gp_size == 0;
27912 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
27913 newptr_regno = ptr_regno_for_savres (sel);
27914 end_save = info->gp_save_offset + info->gp_size;
27916 else if (!restoring_FPRs_inline)
27918 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
27919 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27920 newptr_regno = ptr_regno_for_savres (sel);
27921 end_save = info->fp_save_offset + info->fp_size;
27924 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
27925 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
27927 if (end_save + ptr_off != 0)
27929 rtx offset = GEN_INT (end_save + ptr_off);
27931 frame_off = -end_save;
27932 if (TARGET_32BIT)
27933 emit_insn (gen_addsi3_carry (frame_reg_rtx,
27934 ptr_reg, offset));
27935 else
27936 emit_insn (gen_adddi3_carry (frame_reg_rtx,
27937 ptr_reg, offset));
27939 else
27941 frame_off = ptr_off;
27942 emit_move_insn (frame_reg_rtx, ptr_reg);
27946 else
27948 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27949 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27951 rtx addr, areg, mem, insn;
27952 rtx reg = gen_rtx_REG (V4SImode, i);
27953 HOST_WIDE_INT offset
27954 = (info->altivec_save_offset + frame_off
27955 + 16 * (i - info->first_altivec_reg_save));
27957 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27959 mem = gen_frame_mem (V4SImode,
27960 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27961 GEN_INT (offset)));
27962 insn = gen_rtx_SET (reg, mem);
27964 else
27966 areg = gen_rtx_REG (Pmode, 0);
27967 emit_move_insn (areg, GEN_INT (offset));
27969 /* AltiVec addressing mode is [reg+reg]. */
27970 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
27971 mem = gen_frame_mem (V4SImode, addr);
27973 /* Rather than emitting a generic move, force use of the
27974 lvx instruction, which we always want. In particular we
27975 don't want lxvd2x/xxpermdi for little endian. */
27976 insn = gen_altivec_lvx_v4si_internal (reg, mem);
27979 (void) emit_insn (insn);
27983 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27984 if (((strategy & REST_INLINE_VRS) == 0
27985 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
27986 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
27987 && save_reg_p (i))
27989 rtx reg = gen_rtx_REG (V4SImode, i);
27990 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
27994 /* Restore VRSAVE if we have not done so already. */
27995 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
27996 && info->vrsave_size != 0
27997 && (DEFAULT_ABI == ABI_V4
27998 || !offset_below_red_zone_p (info->vrsave_save_offset)))
28000 rtx reg;
28002 reg = gen_rtx_REG (SImode, 12);
28003 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28004 info->vrsave_save_offset + frame_off));
28006 emit_insn (generate_set_vrsave (reg, info, 1));
28009 /* If we exit by an out-of-line restore function on ABI_V4 then that
28010 function will deallocate the stack, so we don't need to worry
28011 about the unwinder restoring cr from an invalid stack frame
28012 location. */
28013 exit_func = (!restoring_FPRs_inline
28014 || (!restoring_GPRs_inline
28015 && info->first_fp_reg_save == 64));
28017 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
28018 *separate* slots if the routine calls __builtin_eh_return, so
28019 that they can be independently restored by the unwinder. */
28020 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28022 int i, cr_off = info->ehcr_offset;
28024 for (i = 0; i < 8; i++)
28025 if (!call_used_regs[CR0_REGNO + i])
28027 rtx reg = gen_rtx_REG (SImode, 0);
28028 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28029 cr_off + frame_off));
28031 insn = emit_insn (gen_movsi_to_cr_one
28032 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28034 if (!exit_func && flag_shrink_wrap)
28036 add_reg_note (insn, REG_CFA_RESTORE,
28037 gen_rtx_REG (SImode, CR0_REGNO + i));
28039 RTX_FRAME_RELATED_P (insn) = 1;
28042 cr_off += reg_size;
28046 /* Get the old lr if we saved it. If we are restoring registers
28047 out-of-line, then the out-of-line routines can do this for us. */
28048 if (restore_lr && restoring_GPRs_inline)
28049 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28051 /* Get the old cr if we saved it. */
28052 if (info->cr_save_p)
28054 unsigned cr_save_regno = 12;
28056 if (!restoring_GPRs_inline)
28058 /* Ensure we don't use the register used by the out-of-line
28059 gpr register restore below. */
28060 bool lr = info->gp_save_offset + info->gp_size == 0;
28061 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28062 int gpr_ptr_regno = ptr_regno_for_savres (sel);
28064 if (gpr_ptr_regno == 12)
28065 cr_save_regno = 11;
28066 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
28068 else if (REGNO (frame_reg_rtx) == 12)
28069 cr_save_regno = 11;
28071 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
28072 info->cr_save_offset + frame_off,
28073 exit_func);
28076 /* Set LR here to try to overlap restores below. */
28077 if (restore_lr && restoring_GPRs_inline)
28078 restore_saved_lr (0, exit_func);
28080 /* Load exception handler data registers, if needed. */
28081 if (crtl->calls_eh_return)
28083 unsigned int i, regno;
28085 if (TARGET_AIX)
28087 rtx reg = gen_rtx_REG (reg_mode, 2);
28088 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28089 frame_off + RS6000_TOC_SAVE_SLOT));
28092 for (i = 0; ; ++i)
28094 rtx mem;
28096 regno = EH_RETURN_DATA_REGNO (i);
28097 if (regno == INVALID_REGNUM)
28098 break;
28100 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
28101 info->ehrd_offset + frame_off
28102 + reg_size * (int) i);
28104 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
28108 /* Restore GPRs. This is done as a PARALLEL if we are using
28109 the load-multiple instructions. */
28110 if (!restoring_GPRs_inline)
28112 /* We are jumping to an out-of-line function. */
28113 rtx ptr_reg;
28114 int end_save = info->gp_save_offset + info->gp_size;
28115 bool can_use_exit = end_save == 0;
28116 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
28117 int ptr_off;
28119 /* Emit stack reset code if we need it. */
28120 ptr_regno = ptr_regno_for_savres (sel);
28121 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28122 if (can_use_exit)
28123 rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28124 else if (end_save + frame_off != 0)
28125 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
28126 GEN_INT (end_save + frame_off)));
28127 else if (REGNO (frame_reg_rtx) != ptr_regno)
28128 emit_move_insn (ptr_reg, frame_reg_rtx);
28129 if (REGNO (frame_reg_rtx) == ptr_regno)
28130 frame_off = -end_save;
28132 if (can_use_exit && info->cr_save_p)
28133 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
28135 ptr_off = -end_save;
28136 rs6000_emit_savres_rtx (info, ptr_reg,
28137 info->gp_save_offset + ptr_off,
28138 info->lr_save_offset + ptr_off,
28139 reg_mode, sel);
28141 else if (using_load_multiple)
28143 rtvec p;
28144 p = rtvec_alloc (32 - info->first_gp_reg_save);
28145 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28146 RTVEC_ELT (p, i)
28147 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28148 frame_reg_rtx,
28149 info->gp_save_offset + frame_off + reg_size * i);
28150 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28152 else
28154 int offset = info->gp_save_offset + frame_off;
28155 for (i = info->first_gp_reg_save; i < 32; i++)
28157 if (save_reg_p (i)
28158 && !cfun->machine->gpr_is_wrapped_separately[i])
28160 rtx reg = gen_rtx_REG (reg_mode, i);
28161 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28164 offset += reg_size;
28168 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28170 /* If the frame pointer was used then we can't delay emitting
28171 a REG_CFA_DEF_CFA note. This must happen on the insn that
28172 restores the frame pointer, r31. We may have already emitted
28173 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28174 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28175 be harmless if emitted. */
28176 if (frame_pointer_needed)
28178 insn = get_last_insn ();
28179 add_reg_note (insn, REG_CFA_DEF_CFA,
28180 plus_constant (Pmode, frame_reg_rtx, frame_off));
28181 RTX_FRAME_RELATED_P (insn) = 1;
28184 /* Set up cfa_restores. We always need these when
28185 shrink-wrapping. If not shrink-wrapping then we only need
28186 the cfa_restore when the stack location is no longer valid.
28187 The cfa_restores must be emitted on or before the insn that
28188 invalidates the stack, and of course must not be emitted
28189 before the insn that actually does the restore. The latter
28190 is why it is a bad idea to emit the cfa_restores as a group
28191 on the last instruction here that actually does a restore:
28192 That insn may be reordered with respect to others doing
28193 restores. */
28194 if (flag_shrink_wrap
28195 && !restoring_GPRs_inline
28196 && info->first_fp_reg_save == 64)
28197 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28199 for (i = info->first_gp_reg_save; i < 32; i++)
28200 if (save_reg_p (i)
28201 && !cfun->machine->gpr_is_wrapped_separately[i])
28203 rtx reg = gen_rtx_REG (reg_mode, i);
28204 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28208 if (!restoring_GPRs_inline
28209 && info->first_fp_reg_save == 64)
28211 /* We are jumping to an out-of-line function. */
28212 if (cfa_restores)
28213 emit_cfa_restores (cfa_restores);
28214 return;
28217 if (restore_lr && !restoring_GPRs_inline)
28219 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28220 restore_saved_lr (0, exit_func);
28223 /* Restore fpr's if we need to do it without calling a function. */
28224 if (restoring_FPRs_inline)
28226 int offset = info->fp_save_offset + frame_off;
28227 for (i = info->first_fp_reg_save; i < 64; i++)
28229 if (save_reg_p (i)
28230 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
28232 rtx reg = gen_rtx_REG (fp_reg_mode, i);
28233 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28234 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28235 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
28236 cfa_restores);
28239 offset += fp_reg_size;
28243 /* If we saved cr, restore it here. Just those that were used. */
28244 if (info->cr_save_p)
28245 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
28247 /* If this is V.4, unwind the stack pointer after all of the loads
28248 have been done, or set up r11 if we are restoring fp out of line. */
28249 ptr_regno = 1;
28250 if (!restoring_FPRs_inline)
28252 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28253 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28254 ptr_regno = ptr_regno_for_savres (sel);
28257 insn = rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28258 if (REGNO (frame_reg_rtx) == ptr_regno)
28259 frame_off = 0;
28261 if (insn && restoring_FPRs_inline)
28263 if (cfa_restores)
28265 REG_NOTES (insn) = cfa_restores;
28266 cfa_restores = NULL_RTX;
28268 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28269 RTX_FRAME_RELATED_P (insn) = 1;
28272 if (crtl->calls_eh_return)
28274 rtx sa = EH_RETURN_STACKADJ_RTX;
28275 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
28278 if (!sibcall && restoring_FPRs_inline)
28280 if (cfa_restores)
28282 /* We can't hang the cfa_restores off a simple return,
28283 since the shrink-wrap code sometimes uses an existing
28284 return. This means there might be a path from
28285 pre-prologue code to this return, and dwarf2cfi code
28286 wants the eh_frame unwinder state to be the same on
28287 all paths to any point. So we need to emit the
28288 cfa_restores before the return. For -m64 we really
28289 don't need epilogue cfa_restores at all, except for
28290 this irritating dwarf2cfi with shrink-wrap
28291 requirement; The stack red-zone means eh_frame info
28292 from the prologue telling the unwinder to restore
28293 from the stack is perfectly good right to the end of
28294 the function. */
28295 emit_insn (gen_blockage ());
28296 emit_cfa_restores (cfa_restores);
28297 cfa_restores = NULL_RTX;
28300 emit_jump_insn (targetm.gen_simple_return ());
28303 if (!sibcall && !restoring_FPRs_inline)
28305 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28306 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
28307 int elt = 0;
28308 RTVEC_ELT (p, elt++) = ret_rtx;
28309 if (lr)
28310 RTVEC_ELT (p, elt++)
28311 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
28313 /* We have to restore more than two FP registers, so branch to the
28314 restore function. It will return to our caller. */
28315 int i;
28316 int reg;
28317 rtx sym;
28319 if (flag_shrink_wrap)
28320 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28322 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
28323 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
28324 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
28325 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
28327 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
28329 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
28331 RTVEC_ELT (p, elt++)
28332 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
28333 if (flag_shrink_wrap
28334 && save_reg_p (info->first_fp_reg_save + i))
28335 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28338 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28341 if (cfa_restores)
28343 if (sibcall)
28344 /* Ensure the cfa_restores are hung off an insn that won't
28345 be reordered above other restores. */
28346 emit_insn (gen_blockage ());
28348 emit_cfa_restores (cfa_restores);
28352 /* Write function epilogue. */
28354 static void
28355 rs6000_output_function_epilogue (FILE *file)
28357 #if TARGET_MACHO
28358 macho_branch_islands ();
28361 rtx_insn *insn = get_last_insn ();
28362 rtx_insn *deleted_debug_label = NULL;
28364 /* Mach-O doesn't support labels at the end of objects, so if
28365 it looks like we might want one, take special action.
28367 First, collect any sequence of deleted debug labels. */
28368 while (insn
28369 && NOTE_P (insn)
28370 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
28372 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
28373 notes only, instead set their CODE_LABEL_NUMBER to -1,
28374 otherwise there would be code generation differences
28375 in between -g and -g0. */
28376 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28377 deleted_debug_label = insn;
28378 insn = PREV_INSN (insn);
28381 /* Second, if we have:
28382 label:
28383 barrier
28384 then this needs to be detected, so skip past the barrier. */
28386 if (insn && BARRIER_P (insn))
28387 insn = PREV_INSN (insn);
28389 /* Up to now we've only seen notes or barriers. */
28390 if (insn)
28392 if (LABEL_P (insn)
28393 || (NOTE_P (insn)
28394 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
28395 /* Trailing label: <barrier>. */
28396 fputs ("\tnop\n", file);
28397 else
28399 /* Lastly, see if we have a completely empty function body. */
28400 while (insn && ! INSN_P (insn))
28401 insn = PREV_INSN (insn);
28402 /* If we don't find any insns, we've got an empty function body;
28403 I.e. completely empty - without a return or branch. This is
28404 taken as the case where a function body has been removed
28405 because it contains an inline __builtin_unreachable(). GCC
28406 states that reaching __builtin_unreachable() means UB so we're
28407 not obliged to do anything special; however, we want
28408 non-zero-sized function bodies. To meet this, and help the
28409 user out, let's trap the case. */
28410 if (insn == NULL)
28411 fputs ("\ttrap\n", file);
28414 else if (deleted_debug_label)
28415 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
28416 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
28417 CODE_LABEL_NUMBER (insn) = -1;
28419 #endif
28421 /* Output a traceback table here. See /usr/include/sys/debug.h for info
28422 on its format.
28424 We don't output a traceback table if -finhibit-size-directive was
28425 used. The documentation for -finhibit-size-directive reads
28426 ``don't output a @code{.size} assembler directive, or anything
28427 else that would cause trouble if the function is split in the
28428 middle, and the two halves are placed at locations far apart in
28429 memory.'' The traceback table has this property, since it
28430 includes the offset from the start of the function to the
28431 traceback table itself.
28433 System V.4 Powerpc's (and the embedded ABI derived from it) use a
28434 different traceback table. */
28435 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28436 && ! flag_inhibit_size_directive
28437 && rs6000_traceback != traceback_none && !cfun->is_thunk)
28439 const char *fname = NULL;
28440 const char *language_string = lang_hooks.name;
28441 int fixed_parms = 0, float_parms = 0, parm_info = 0;
28442 int i;
28443 int optional_tbtab;
28444 rs6000_stack_t *info = rs6000_stack_info ();
28446 if (rs6000_traceback == traceback_full)
28447 optional_tbtab = 1;
28448 else if (rs6000_traceback == traceback_part)
28449 optional_tbtab = 0;
28450 else
28451 optional_tbtab = !optimize_size && !TARGET_ELF;
28453 if (optional_tbtab)
28455 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
28456 while (*fname == '.') /* V.4 encodes . in the name */
28457 fname++;
28459 /* Need label immediately before tbtab, so we can compute
28460 its offset from the function start. */
28461 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28462 ASM_OUTPUT_LABEL (file, fname);
28465 /* The .tbtab pseudo-op can only be used for the first eight
28466 expressions, since it can't handle the possibly variable
28467 length fields that follow. However, if you omit the optional
28468 fields, the assembler outputs zeros for all optional fields
28469 anyways, giving each variable length field is minimum length
28470 (as defined in sys/debug.h). Thus we can not use the .tbtab
28471 pseudo-op at all. */
28473 /* An all-zero word flags the start of the tbtab, for debuggers
28474 that have to find it by searching forward from the entry
28475 point or from the current pc. */
28476 fputs ("\t.long 0\n", file);
28478 /* Tbtab format type. Use format type 0. */
28479 fputs ("\t.byte 0,", file);
28481 /* Language type. Unfortunately, there does not seem to be any
28482 official way to discover the language being compiled, so we
28483 use language_string.
28484 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
28485 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
28486 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
28487 either, so for now use 0. */
28488 if (lang_GNU_C ()
28489 || ! strcmp (language_string, "GNU GIMPLE")
28490 || ! strcmp (language_string, "GNU Go")
28491 || ! strcmp (language_string, "libgccjit"))
28492 i = 0;
28493 else if (! strcmp (language_string, "GNU F77")
28494 || lang_GNU_Fortran ())
28495 i = 1;
28496 else if (! strcmp (language_string, "GNU Pascal"))
28497 i = 2;
28498 else if (! strcmp (language_string, "GNU Ada"))
28499 i = 3;
28500 else if (lang_GNU_CXX ()
28501 || ! strcmp (language_string, "GNU Objective-C++"))
28502 i = 9;
28503 else if (! strcmp (language_string, "GNU Java"))
28504 i = 13;
28505 else if (! strcmp (language_string, "GNU Objective-C"))
28506 i = 14;
28507 else
28508 gcc_unreachable ();
28509 fprintf (file, "%d,", i);
28511 /* 8 single bit fields: global linkage (not set for C extern linkage,
28512 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
28513 from start of procedure stored in tbtab, internal function, function
28514 has controlled storage, function has no toc, function uses fp,
28515 function logs/aborts fp operations. */
28516 /* Assume that fp operations are used if any fp reg must be saved. */
28517 fprintf (file, "%d,",
28518 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
28520 /* 6 bitfields: function is interrupt handler, name present in
28521 proc table, function calls alloca, on condition directives
28522 (controls stack walks, 3 bits), saves condition reg, saves
28523 link reg. */
28524 /* The `function calls alloca' bit seems to be set whenever reg 31 is
28525 set up as a frame pointer, even when there is no alloca call. */
28526 fprintf (file, "%d,",
28527 ((optional_tbtab << 6)
28528 | ((optional_tbtab & frame_pointer_needed) << 5)
28529 | (info->cr_save_p << 1)
28530 | (info->lr_save_p)));
28532 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
28533 (6 bits). */
28534 fprintf (file, "%d,",
28535 (info->push_p << 7) | (64 - info->first_fp_reg_save));
28537 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
28538 fprintf (file, "%d,", (32 - first_reg_to_save ()));
28540 if (optional_tbtab)
28542 /* Compute the parameter info from the function decl argument
28543 list. */
28544 tree decl;
28545 int next_parm_info_bit = 31;
28547 for (decl = DECL_ARGUMENTS (current_function_decl);
28548 decl; decl = DECL_CHAIN (decl))
28550 rtx parameter = DECL_INCOMING_RTL (decl);
28551 machine_mode mode = GET_MODE (parameter);
28553 if (GET_CODE (parameter) == REG)
28555 if (SCALAR_FLOAT_MODE_P (mode))
28557 int bits;
28559 float_parms++;
28561 switch (mode)
28563 case E_SFmode:
28564 case E_SDmode:
28565 bits = 0x2;
28566 break;
28568 case E_DFmode:
28569 case E_DDmode:
28570 case E_TFmode:
28571 case E_TDmode:
28572 case E_IFmode:
28573 case E_KFmode:
28574 bits = 0x3;
28575 break;
28577 default:
28578 gcc_unreachable ();
28581 /* If only one bit will fit, don't or in this entry. */
28582 if (next_parm_info_bit > 0)
28583 parm_info |= (bits << (next_parm_info_bit - 1));
28584 next_parm_info_bit -= 2;
28586 else
28588 fixed_parms += ((GET_MODE_SIZE (mode)
28589 + (UNITS_PER_WORD - 1))
28590 / UNITS_PER_WORD);
28591 next_parm_info_bit -= 1;
28597 /* Number of fixed point parameters. */
28598 /* This is actually the number of words of fixed point parameters; thus
28599 an 8 byte struct counts as 2; and thus the maximum value is 8. */
28600 fprintf (file, "%d,", fixed_parms);
28602 /* 2 bitfields: number of floating point parameters (7 bits), parameters
28603 all on stack. */
28604 /* This is actually the number of fp registers that hold parameters;
28605 and thus the maximum value is 13. */
28606 /* Set parameters on stack bit if parameters are not in their original
28607 registers, regardless of whether they are on the stack? Xlc
28608 seems to set the bit when not optimizing. */
28609 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
28611 if (optional_tbtab)
28613 /* Optional fields follow. Some are variable length. */
28615 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
28616 float, 11 double float. */
28617 /* There is an entry for each parameter in a register, in the order
28618 that they occur in the parameter list. Any intervening arguments
28619 on the stack are ignored. If the list overflows a long (max
28620 possible length 34 bits) then completely leave off all elements
28621 that don't fit. */
28622 /* Only emit this long if there was at least one parameter. */
28623 if (fixed_parms || float_parms)
28624 fprintf (file, "\t.long %d\n", parm_info);
28626 /* Offset from start of code to tb table. */
28627 fputs ("\t.long ", file);
28628 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
28629 RS6000_OUTPUT_BASENAME (file, fname);
28630 putc ('-', file);
28631 rs6000_output_function_entry (file, fname);
28632 putc ('\n', file);
28634 /* Interrupt handler mask. */
28635 /* Omit this long, since we never set the interrupt handler bit
28636 above. */
28638 /* Number of CTL (controlled storage) anchors. */
28639 /* Omit this long, since the has_ctl bit is never set above. */
28641 /* Displacement into stack of each CTL anchor. */
28642 /* Omit this list of longs, because there are no CTL anchors. */
28644 /* Length of function name. */
28645 if (*fname == '*')
28646 ++fname;
28647 fprintf (file, "\t.short %d\n", (int) strlen (fname));
28649 /* Function name. */
28650 assemble_string (fname, strlen (fname));
28652 /* Register for alloca automatic storage; this is always reg 31.
28653 Only emit this if the alloca bit was set above. */
28654 if (frame_pointer_needed)
28655 fputs ("\t.byte 31\n", file);
28657 fputs ("\t.align 2\n", file);
28661 /* Arrange to define .LCTOC1 label, if not already done. */
28662 if (need_toc_init)
28664 need_toc_init = 0;
28665 if (!toc_initialized)
28667 switch_to_section (toc_section);
28668 switch_to_section (current_function_section ());
28673 /* -fsplit-stack support. */
28675 /* A SYMBOL_REF for __morestack. */
28676 static GTY(()) rtx morestack_ref;
28678 static rtx
28679 gen_add3_const (rtx rt, rtx ra, long c)
28681 if (TARGET_64BIT)
28682 return gen_adddi3 (rt, ra, GEN_INT (c));
28683 else
28684 return gen_addsi3 (rt, ra, GEN_INT (c));
28687 /* Emit -fsplit-stack prologue, which goes before the regular function
28688 prologue (at local entry point in the case of ELFv2). */
28690 void
28691 rs6000_expand_split_stack_prologue (void)
28693 rs6000_stack_t *info = rs6000_stack_info ();
28694 unsigned HOST_WIDE_INT allocate;
28695 long alloc_hi, alloc_lo;
28696 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
28697 rtx_insn *insn;
28699 gcc_assert (flag_split_stack && reload_completed);
28701 if (!info->push_p)
28702 return;
28704 if (global_regs[29])
28706 error ("%qs uses register r29", "-fsplit-stack");
28707 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
28708 "conflicts with %qD", global_regs_decl[29]);
28711 allocate = info->total_size;
28712 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
28714 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
28715 return;
28717 if (morestack_ref == NULL_RTX)
28719 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
28720 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
28721 | SYMBOL_FLAG_FUNCTION);
28724 r0 = gen_rtx_REG (Pmode, 0);
28725 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28726 r12 = gen_rtx_REG (Pmode, 12);
28727 emit_insn (gen_load_split_stack_limit (r0));
28728 /* Always emit two insns here to calculate the requested stack,
28729 so that the linker can edit them when adjusting size for calling
28730 non-split-stack code. */
28731 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
28732 alloc_lo = -allocate - alloc_hi;
28733 if (alloc_hi != 0)
28735 emit_insn (gen_add3_const (r12, r1, alloc_hi));
28736 if (alloc_lo != 0)
28737 emit_insn (gen_add3_const (r12, r12, alloc_lo));
28738 else
28739 emit_insn (gen_nop ());
28741 else
28743 emit_insn (gen_add3_const (r12, r1, alloc_lo));
28744 emit_insn (gen_nop ());
28747 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
28748 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
28749 ok_label = gen_label_rtx ();
28750 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28751 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
28752 gen_rtx_LABEL_REF (VOIDmode, ok_label),
28753 pc_rtx);
28754 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28755 JUMP_LABEL (insn) = ok_label;
28756 /* Mark the jump as very likely to be taken. */
28757 add_reg_br_prob_note (insn, profile_probability::very_likely ());
28759 lr = gen_rtx_REG (Pmode, LR_REGNO);
28760 insn = emit_move_insn (r0, lr);
28761 RTX_FRAME_RELATED_P (insn) = 1;
28762 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
28763 RTX_FRAME_RELATED_P (insn) = 1;
28765 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
28766 const0_rtx, const0_rtx));
28767 call_fusage = NULL_RTX;
28768 use_reg (&call_fusage, r12);
28769 /* Say the call uses r0, even though it doesn't, to stop regrename
28770 from twiddling with the insns saving lr, trashing args for cfun.
28771 The insns restoring lr are similarly protected by making
28772 split_stack_return use r0. */
28773 use_reg (&call_fusage, r0);
28774 add_function_usage_to (insn, call_fusage);
28775 /* Indicate that this function can't jump to non-local gotos. */
28776 make_reg_eh_region_note_nothrow_nononlocal (insn);
28777 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
28778 insn = emit_move_insn (lr, r0);
28779 add_reg_note (insn, REG_CFA_RESTORE, lr);
28780 RTX_FRAME_RELATED_P (insn) = 1;
28781 emit_insn (gen_split_stack_return ());
28783 emit_label (ok_label);
28784 LABEL_NUSES (ok_label) = 1;
28787 /* Return the internal arg pointer used for function incoming
28788 arguments. When -fsplit-stack, the arg pointer is r12 so we need
28789 to copy it to a pseudo in order for it to be preserved over calls
28790 and suchlike. We'd really like to use a pseudo here for the
28791 internal arg pointer but data-flow analysis is not prepared to
28792 accept pseudos as live at the beginning of a function. */
28794 static rtx
28795 rs6000_internal_arg_pointer (void)
28797 if (flag_split_stack
28798 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
28799 == NULL))
28802 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
28804 rtx pat;
28806 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
28807 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
28809 /* Put the pseudo initialization right after the note at the
28810 beginning of the function. */
28811 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
28812 gen_rtx_REG (Pmode, 12));
28813 push_topmost_sequence ();
28814 emit_insn_after (pat, get_insns ());
28815 pop_topmost_sequence ();
28817 rtx ret = plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
28818 FIRST_PARM_OFFSET (current_function_decl));
28819 return copy_to_reg (ret);
28821 return virtual_incoming_args_rtx;
28824 /* We may have to tell the dataflow pass that the split stack prologue
28825 is initializing a register. */
28827 static void
28828 rs6000_live_on_entry (bitmap regs)
28830 if (flag_split_stack)
28831 bitmap_set_bit (regs, 12);
28834 /* Emit -fsplit-stack dynamic stack allocation space check. */
28836 void
28837 rs6000_split_stack_space_check (rtx size, rtx label)
28839 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
28840 rtx limit = gen_reg_rtx (Pmode);
28841 rtx requested = gen_reg_rtx (Pmode);
28842 rtx cmp = gen_reg_rtx (CCUNSmode);
28843 rtx jump;
28845 emit_insn (gen_load_split_stack_limit (limit));
28846 if (CONST_INT_P (size))
28847 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
28848 else
28850 size = force_reg (Pmode, size);
28851 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
28853 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
28854 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
28855 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
28856 gen_rtx_LABEL_REF (VOIDmode, label),
28857 pc_rtx);
28858 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
28859 JUMP_LABEL (jump) = label;
28862 /* A C compound statement that outputs the assembler code for a thunk
28863 function, used to implement C++ virtual function calls with
28864 multiple inheritance. The thunk acts as a wrapper around a virtual
28865 function, adjusting the implicit object parameter before handing
28866 control off to the real function.
28868 First, emit code to add the integer DELTA to the location that
28869 contains the incoming first argument. Assume that this argument
28870 contains a pointer, and is the one used to pass the `this' pointer
28871 in C++. This is the incoming argument *before* the function
28872 prologue, e.g. `%o0' on a sparc. The addition must preserve the
28873 values of all other incoming arguments.
28875 After the addition, emit code to jump to FUNCTION, which is a
28876 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
28877 not touch the return address. Hence returning from FUNCTION will
28878 return to whoever called the current `thunk'.
28880 The effect must be as if FUNCTION had been called directly with the
28881 adjusted first argument. This macro is responsible for emitting
28882 all of the code for a thunk function; output_function_prologue()
28883 and output_function_epilogue() are not invoked.
28885 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
28886 been extracted from it.) It might possibly be useful on some
28887 targets, but probably not.
28889 If you do not define this macro, the target-independent code in the
28890 C++ frontend will generate a less efficient heavyweight thunk that
28891 calls FUNCTION instead of jumping to it. The generic approach does
28892 not support varargs. */
28894 static void
28895 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
28896 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
28897 tree function)
28899 rtx this_rtx, funexp;
28900 rtx_insn *insn;
28902 reload_completed = 1;
28903 epilogue_completed = 1;
28905 /* Mark the end of the (empty) prologue. */
28906 emit_note (NOTE_INSN_PROLOGUE_END);
28908 /* Find the "this" pointer. If the function returns a structure,
28909 the structure return pointer is in r3. */
28910 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
28911 this_rtx = gen_rtx_REG (Pmode, 4);
28912 else
28913 this_rtx = gen_rtx_REG (Pmode, 3);
28915 /* Apply the constant offset, if required. */
28916 if (delta)
28917 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
28919 /* Apply the offset from the vtable, if required. */
28920 if (vcall_offset)
28922 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
28923 rtx tmp = gen_rtx_REG (Pmode, 12);
28925 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
28926 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
28928 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
28929 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
28931 else
28933 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
28935 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
28937 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
28940 /* Generate a tail call to the target function. */
28941 if (!TREE_USED (function))
28943 assemble_external (function);
28944 TREE_USED (function) = 1;
28946 funexp = XEXP (DECL_RTL (function), 0);
28947 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
28949 #if TARGET_MACHO
28950 if (MACHOPIC_INDIRECT)
28951 funexp = machopic_indirect_call_target (funexp);
28952 #endif
28954 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
28955 generate sibcall RTL explicitly. */
28956 insn = emit_call_insn (
28957 gen_rtx_PARALLEL (VOIDmode,
28958 gen_rtvec (3,
28959 gen_rtx_CALL (VOIDmode,
28960 funexp, const0_rtx),
28961 gen_rtx_USE (VOIDmode, const0_rtx),
28962 simple_return_rtx)));
28963 SIBLING_CALL_P (insn) = 1;
28964 emit_barrier ();
28966 /* Run just enough of rest_of_compilation to get the insns emitted.
28967 There's not really enough bulk here to make other passes such as
28968 instruction scheduling worth while. Note that use_thunk calls
28969 assemble_start_function and assemble_end_function. */
28970 insn = get_insns ();
28971 shorten_branches (insn);
28972 final_start_function (insn, file, 1);
28973 final (insn, file, 1);
28974 final_end_function ();
28976 reload_completed = 0;
28977 epilogue_completed = 0;
28980 /* A quick summary of the various types of 'constant-pool tables'
28981 under PowerPC:
28983 Target Flags Name One table per
28984 AIX (none) AIX TOC object file
28985 AIX -mfull-toc AIX TOC object file
28986 AIX -mminimal-toc AIX minimal TOC translation unit
28987 SVR4/EABI (none) SVR4 SDATA object file
28988 SVR4/EABI -fpic SVR4 pic object file
28989 SVR4/EABI -fPIC SVR4 PIC translation unit
28990 SVR4/EABI -mrelocatable EABI TOC function
28991 SVR4/EABI -maix AIX TOC object file
28992 SVR4/EABI -maix -mminimal-toc
28993 AIX minimal TOC translation unit
28995 Name Reg. Set by entries contains:
28996 made by addrs? fp? sum?
28998 AIX TOC 2 crt0 as Y option option
28999 AIX minimal TOC 30 prolog gcc Y Y option
29000 SVR4 SDATA 13 crt0 gcc N Y N
29001 SVR4 pic 30 prolog ld Y not yet N
29002 SVR4 PIC 30 prolog gcc Y option option
29003 EABI TOC 30 prolog gcc Y option option
29007 /* Hash functions for the hash table. */
29009 static unsigned
29010 rs6000_hash_constant (rtx k)
29012 enum rtx_code code = GET_CODE (k);
29013 machine_mode mode = GET_MODE (k);
29014 unsigned result = (code << 3) ^ mode;
29015 const char *format;
29016 int flen, fidx;
29018 format = GET_RTX_FORMAT (code);
29019 flen = strlen (format);
29020 fidx = 0;
29022 switch (code)
29024 case LABEL_REF:
29025 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
29027 case CONST_WIDE_INT:
29029 int i;
29030 flen = CONST_WIDE_INT_NUNITS (k);
29031 for (i = 0; i < flen; i++)
29032 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
29033 return result;
29036 case CONST_DOUBLE:
29037 if (mode != VOIDmode)
29038 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
29039 flen = 2;
29040 break;
29042 case CODE_LABEL:
29043 fidx = 3;
29044 break;
29046 default:
29047 break;
29050 for (; fidx < flen; fidx++)
29051 switch (format[fidx])
29053 case 's':
29055 unsigned i, len;
29056 const char *str = XSTR (k, fidx);
29057 len = strlen (str);
29058 result = result * 613 + len;
29059 for (i = 0; i < len; i++)
29060 result = result * 613 + (unsigned) str[i];
29061 break;
29063 case 'u':
29064 case 'e':
29065 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
29066 break;
29067 case 'i':
29068 case 'n':
29069 result = result * 613 + (unsigned) XINT (k, fidx);
29070 break;
29071 case 'w':
29072 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
29073 result = result * 613 + (unsigned) XWINT (k, fidx);
29074 else
29076 size_t i;
29077 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
29078 result = result * 613 + (unsigned) (XWINT (k, fidx)
29079 >> CHAR_BIT * i);
29081 break;
29082 case '0':
29083 break;
29084 default:
29085 gcc_unreachable ();
29088 return result;
29091 hashval_t
29092 toc_hasher::hash (toc_hash_struct *thc)
29094 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
29097 /* Compare H1 and H2 for equivalence. */
29099 bool
29100 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
29102 rtx r1 = h1->key;
29103 rtx r2 = h2->key;
29105 if (h1->key_mode != h2->key_mode)
29106 return 0;
29108 return rtx_equal_p (r1, r2);
29111 /* These are the names given by the C++ front-end to vtables, and
29112 vtable-like objects. Ideally, this logic should not be here;
29113 instead, there should be some programmatic way of inquiring as
29114 to whether or not an object is a vtable. */
29116 #define VTABLE_NAME_P(NAME) \
29117 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29118 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29119 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29120 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29121 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29123 #ifdef NO_DOLLAR_IN_LABEL
29124 /* Return a GGC-allocated character string translating dollar signs in
29125 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29127 const char *
29128 rs6000_xcoff_strip_dollar (const char *name)
29130 char *strip, *p;
29131 const char *q;
29132 size_t len;
29134 q = (const char *) strchr (name, '$');
29136 if (q == 0 || q == name)
29137 return name;
29139 len = strlen (name);
29140 strip = XALLOCAVEC (char, len + 1);
29141 strcpy (strip, name);
29142 p = strip + (q - name);
29143 while (p)
29145 *p = '_';
29146 p = strchr (p + 1, '$');
29149 return ggc_alloc_string (strip, len);
29151 #endif
29153 void
29154 rs6000_output_symbol_ref (FILE *file, rtx x)
29156 const char *name = XSTR (x, 0);
29158 /* Currently C++ toc references to vtables can be emitted before it
29159 is decided whether the vtable is public or private. If this is
29160 the case, then the linker will eventually complain that there is
29161 a reference to an unknown section. Thus, for vtables only,
29162 we emit the TOC reference to reference the identifier and not the
29163 symbol. */
29164 if (VTABLE_NAME_P (name))
29166 RS6000_OUTPUT_BASENAME (file, name);
29168 else
29169 assemble_name (file, name);
29172 /* Output a TOC entry. We derive the entry name from what is being
29173 written. */
29175 void
29176 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
29178 char buf[256];
29179 const char *name = buf;
29180 rtx base = x;
29181 HOST_WIDE_INT offset = 0;
29183 gcc_assert (!TARGET_NO_TOC);
29185 /* When the linker won't eliminate them, don't output duplicate
29186 TOC entries (this happens on AIX if there is any kind of TOC,
29187 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29188 CODE_LABELs. */
29189 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
29191 struct toc_hash_struct *h;
29193 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29194 time because GGC is not initialized at that point. */
29195 if (toc_hash_table == NULL)
29196 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
29198 h = ggc_alloc<toc_hash_struct> ();
29199 h->key = x;
29200 h->key_mode = mode;
29201 h->labelno = labelno;
29203 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
29204 if (*found == NULL)
29205 *found = h;
29206 else /* This is indeed a duplicate.
29207 Set this label equal to that label. */
29209 fputs ("\t.set ", file);
29210 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29211 fprintf (file, "%d,", labelno);
29212 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29213 fprintf (file, "%d\n", ((*found)->labelno));
29215 #ifdef HAVE_AS_TLS
29216 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
29217 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
29218 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
29220 fputs ("\t.set ", file);
29221 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29222 fprintf (file, "%d,", labelno);
29223 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29224 fprintf (file, "%d\n", ((*found)->labelno));
29226 #endif
29227 return;
29231 /* If we're going to put a double constant in the TOC, make sure it's
29232 aligned properly when strict alignment is on. */
29233 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
29234 && STRICT_ALIGNMENT
29235 && GET_MODE_BITSIZE (mode) >= 64
29236 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
29237 ASM_OUTPUT_ALIGN (file, 3);
29240 (*targetm.asm_out.internal_label) (file, "LC", labelno);
29242 /* Handle FP constants specially. Note that if we have a minimal
29243 TOC, things we put here aren't actually in the TOC, so we can allow
29244 FP constants. */
29245 if (GET_CODE (x) == CONST_DOUBLE &&
29246 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
29247 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
29249 long k[4];
29251 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29252 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
29253 else
29254 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29256 if (TARGET_64BIT)
29258 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29259 fputs (DOUBLE_INT_ASM_OP, file);
29260 else
29261 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29262 k[0] & 0xffffffff, k[1] & 0xffffffff,
29263 k[2] & 0xffffffff, k[3] & 0xffffffff);
29264 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
29265 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29266 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
29267 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
29268 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
29269 return;
29271 else
29273 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29274 fputs ("\t.long ", file);
29275 else
29276 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29277 k[0] & 0xffffffff, k[1] & 0xffffffff,
29278 k[2] & 0xffffffff, k[3] & 0xffffffff);
29279 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29280 k[0] & 0xffffffff, k[1] & 0xffffffff,
29281 k[2] & 0xffffffff, k[3] & 0xffffffff);
29282 return;
29285 else if (GET_CODE (x) == CONST_DOUBLE &&
29286 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
29288 long k[2];
29290 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29291 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
29292 else
29293 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29295 if (TARGET_64BIT)
29297 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29298 fputs (DOUBLE_INT_ASM_OP, file);
29299 else
29300 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29301 k[0] & 0xffffffff, k[1] & 0xffffffff);
29302 fprintf (file, "0x%lx%08lx\n",
29303 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29304 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
29305 return;
29307 else
29309 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29310 fputs ("\t.long ", file);
29311 else
29312 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
29313 k[0] & 0xffffffff, k[1] & 0xffffffff);
29314 fprintf (file, "0x%lx,0x%lx\n",
29315 k[0] & 0xffffffff, k[1] & 0xffffffff);
29316 return;
29319 else if (GET_CODE (x) == CONST_DOUBLE &&
29320 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
29322 long l;
29324 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29325 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
29326 else
29327 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
29329 if (TARGET_64BIT)
29331 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29332 fputs (DOUBLE_INT_ASM_OP, file);
29333 else
29334 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29335 if (WORDS_BIG_ENDIAN)
29336 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
29337 else
29338 fprintf (file, "0x%lx\n", l & 0xffffffff);
29339 return;
29341 else
29343 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29344 fputs ("\t.long ", file);
29345 else
29346 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
29347 fprintf (file, "0x%lx\n", l & 0xffffffff);
29348 return;
29351 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
29353 unsigned HOST_WIDE_INT low;
29354 HOST_WIDE_INT high;
29356 low = INTVAL (x) & 0xffffffff;
29357 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
29359 /* TOC entries are always Pmode-sized, so when big-endian
29360 smaller integer constants in the TOC need to be padded.
29361 (This is still a win over putting the constants in
29362 a separate constant pool, because then we'd have
29363 to have both a TOC entry _and_ the actual constant.)
29365 For a 32-bit target, CONST_INT values are loaded and shifted
29366 entirely within `low' and can be stored in one TOC entry. */
29368 /* It would be easy to make this work, but it doesn't now. */
29369 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
29371 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
29373 low |= high << 32;
29374 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
29375 high = (HOST_WIDE_INT) low >> 32;
29376 low &= 0xffffffff;
29379 if (TARGET_64BIT)
29381 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29382 fputs (DOUBLE_INT_ASM_OP, file);
29383 else
29384 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29385 (long) high & 0xffffffff, (long) low & 0xffffffff);
29386 fprintf (file, "0x%lx%08lx\n",
29387 (long) high & 0xffffffff, (long) low & 0xffffffff);
29388 return;
29390 else
29392 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
29394 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29395 fputs ("\t.long ", file);
29396 else
29397 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
29398 (long) high & 0xffffffff, (long) low & 0xffffffff);
29399 fprintf (file, "0x%lx,0x%lx\n",
29400 (long) high & 0xffffffff, (long) low & 0xffffffff);
29402 else
29404 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29405 fputs ("\t.long ", file);
29406 else
29407 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
29408 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
29410 return;
29414 if (GET_CODE (x) == CONST)
29416 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
29417 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
29419 base = XEXP (XEXP (x, 0), 0);
29420 offset = INTVAL (XEXP (XEXP (x, 0), 1));
29423 switch (GET_CODE (base))
29425 case SYMBOL_REF:
29426 name = XSTR (base, 0);
29427 break;
29429 case LABEL_REF:
29430 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
29431 CODE_LABEL_NUMBER (XEXP (base, 0)));
29432 break;
29434 case CODE_LABEL:
29435 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
29436 break;
29438 default:
29439 gcc_unreachable ();
29442 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29443 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
29444 else
29446 fputs ("\t.tc ", file);
29447 RS6000_OUTPUT_BASENAME (file, name);
29449 if (offset < 0)
29450 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
29451 else if (offset)
29452 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
29454 /* Mark large TOC symbols on AIX with [TE] so they are mapped
29455 after other TOC symbols, reducing overflow of small TOC access
29456 to [TC] symbols. */
29457 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
29458 ? "[TE]," : "[TC],", file);
29461 /* Currently C++ toc references to vtables can be emitted before it
29462 is decided whether the vtable is public or private. If this is
29463 the case, then the linker will eventually complain that there is
29464 a TOC reference to an unknown section. Thus, for vtables only,
29465 we emit the TOC reference to reference the symbol and not the
29466 section. */
29467 if (VTABLE_NAME_P (name))
29469 RS6000_OUTPUT_BASENAME (file, name);
29470 if (offset < 0)
29471 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
29472 else if (offset > 0)
29473 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
29475 else
29476 output_addr_const (file, x);
29478 #if HAVE_AS_TLS
29479 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
29481 switch (SYMBOL_REF_TLS_MODEL (base))
29483 case 0:
29484 break;
29485 case TLS_MODEL_LOCAL_EXEC:
29486 fputs ("@le", file);
29487 break;
29488 case TLS_MODEL_INITIAL_EXEC:
29489 fputs ("@ie", file);
29490 break;
29491 /* Use global-dynamic for local-dynamic. */
29492 case TLS_MODEL_GLOBAL_DYNAMIC:
29493 case TLS_MODEL_LOCAL_DYNAMIC:
29494 putc ('\n', file);
29495 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
29496 fputs ("\t.tc .", file);
29497 RS6000_OUTPUT_BASENAME (file, name);
29498 fputs ("[TC],", file);
29499 output_addr_const (file, x);
29500 fputs ("@m", file);
29501 break;
29502 default:
29503 gcc_unreachable ();
29506 #endif
29508 putc ('\n', file);
29511 /* Output an assembler pseudo-op to write an ASCII string of N characters
29512 starting at P to FILE.
29514 On the RS/6000, we have to do this using the .byte operation and
29515 write out special characters outside the quoted string.
29516 Also, the assembler is broken; very long strings are truncated,
29517 so we must artificially break them up early. */
29519 void
29520 output_ascii (FILE *file, const char *p, int n)
29522 char c;
29523 int i, count_string;
29524 const char *for_string = "\t.byte \"";
29525 const char *for_decimal = "\t.byte ";
29526 const char *to_close = NULL;
29528 count_string = 0;
29529 for (i = 0; i < n; i++)
29531 c = *p++;
29532 if (c >= ' ' && c < 0177)
29534 if (for_string)
29535 fputs (for_string, file);
29536 putc (c, file);
29538 /* Write two quotes to get one. */
29539 if (c == '"')
29541 putc (c, file);
29542 ++count_string;
29545 for_string = NULL;
29546 for_decimal = "\"\n\t.byte ";
29547 to_close = "\"\n";
29548 ++count_string;
29550 if (count_string >= 512)
29552 fputs (to_close, file);
29554 for_string = "\t.byte \"";
29555 for_decimal = "\t.byte ";
29556 to_close = NULL;
29557 count_string = 0;
29560 else
29562 if (for_decimal)
29563 fputs (for_decimal, file);
29564 fprintf (file, "%d", c);
29566 for_string = "\n\t.byte \"";
29567 for_decimal = ", ";
29568 to_close = "\n";
29569 count_string = 0;
29573 /* Now close the string if we have written one. Then end the line. */
29574 if (to_close)
29575 fputs (to_close, file);
29578 /* Generate a unique section name for FILENAME for a section type
29579 represented by SECTION_DESC. Output goes into BUF.
29581 SECTION_DESC can be any string, as long as it is different for each
29582 possible section type.
29584 We name the section in the same manner as xlc. The name begins with an
29585 underscore followed by the filename (after stripping any leading directory
29586 names) with the last period replaced by the string SECTION_DESC. If
29587 FILENAME does not contain a period, SECTION_DESC is appended to the end of
29588 the name. */
29590 void
29591 rs6000_gen_section_name (char **buf, const char *filename,
29592 const char *section_desc)
29594 const char *q, *after_last_slash, *last_period = 0;
29595 char *p;
29596 int len;
29598 after_last_slash = filename;
29599 for (q = filename; *q; q++)
29601 if (*q == '/')
29602 after_last_slash = q + 1;
29603 else if (*q == '.')
29604 last_period = q;
29607 len = strlen (after_last_slash) + strlen (section_desc) + 2;
29608 *buf = (char *) xmalloc (len);
29610 p = *buf;
29611 *p++ = '_';
29613 for (q = after_last_slash; *q; q++)
29615 if (q == last_period)
29617 strcpy (p, section_desc);
29618 p += strlen (section_desc);
29619 break;
29622 else if (ISALNUM (*q))
29623 *p++ = *q;
29626 if (last_period == 0)
29627 strcpy (p, section_desc);
29628 else
29629 *p = '\0';
29632 /* Emit profile function. */
29634 void
29635 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
29637 /* Non-standard profiling for kernels, which just saves LR then calls
29638 _mcount without worrying about arg saves. The idea is to change
29639 the function prologue as little as possible as it isn't easy to
29640 account for arg save/restore code added just for _mcount. */
29641 if (TARGET_PROFILE_KERNEL)
29642 return;
29644 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29646 #ifndef NO_PROFILE_COUNTERS
29647 # define NO_PROFILE_COUNTERS 0
29648 #endif
29649 if (NO_PROFILE_COUNTERS)
29650 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
29651 LCT_NORMAL, VOIDmode);
29652 else
29654 char buf[30];
29655 const char *label_name;
29656 rtx fun;
29658 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
29659 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
29660 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
29662 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
29663 LCT_NORMAL, VOIDmode, fun, Pmode);
29666 else if (DEFAULT_ABI == ABI_DARWIN)
29668 const char *mcount_name = RS6000_MCOUNT;
29669 int caller_addr_regno = LR_REGNO;
29671 /* Be conservative and always set this, at least for now. */
29672 crtl->uses_pic_offset_table = 1;
29674 #if TARGET_MACHO
29675 /* For PIC code, set up a stub and collect the caller's address
29676 from r0, which is where the prologue puts it. */
29677 if (MACHOPIC_INDIRECT
29678 && crtl->uses_pic_offset_table)
29679 caller_addr_regno = 0;
29680 #endif
29681 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
29682 LCT_NORMAL, VOIDmode,
29683 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
29687 /* Write function profiler code. */
29689 void
29690 output_function_profiler (FILE *file, int labelno)
29692 char buf[100];
29694 switch (DEFAULT_ABI)
29696 default:
29697 gcc_unreachable ();
29699 case ABI_V4:
29700 if (!TARGET_32BIT)
29702 warning (0, "no profiling of 64-bit code for this ABI");
29703 return;
29705 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
29706 fprintf (file, "\tmflr %s\n", reg_names[0]);
29707 if (NO_PROFILE_COUNTERS)
29709 asm_fprintf (file, "\tstw %s,4(%s)\n",
29710 reg_names[0], reg_names[1]);
29712 else if (TARGET_SECURE_PLT && flag_pic)
29714 if (TARGET_LINK_STACK)
29716 char name[32];
29717 get_ppc476_thunk_name (name);
29718 asm_fprintf (file, "\tbl %s\n", name);
29720 else
29721 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
29722 asm_fprintf (file, "\tstw %s,4(%s)\n",
29723 reg_names[0], reg_names[1]);
29724 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
29725 asm_fprintf (file, "\taddis %s,%s,",
29726 reg_names[12], reg_names[12]);
29727 assemble_name (file, buf);
29728 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
29729 assemble_name (file, buf);
29730 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
29732 else if (flag_pic == 1)
29734 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
29735 asm_fprintf (file, "\tstw %s,4(%s)\n",
29736 reg_names[0], reg_names[1]);
29737 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
29738 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
29739 assemble_name (file, buf);
29740 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
29742 else if (flag_pic > 1)
29744 asm_fprintf (file, "\tstw %s,4(%s)\n",
29745 reg_names[0], reg_names[1]);
29746 /* Now, we need to get the address of the label. */
29747 if (TARGET_LINK_STACK)
29749 char name[32];
29750 get_ppc476_thunk_name (name);
29751 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
29752 assemble_name (file, buf);
29753 fputs ("-.\n1:", file);
29754 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
29755 asm_fprintf (file, "\taddi %s,%s,4\n",
29756 reg_names[11], reg_names[11]);
29758 else
29760 fputs ("\tbcl 20,31,1f\n\t.long ", file);
29761 assemble_name (file, buf);
29762 fputs ("-.\n1:", file);
29763 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
29765 asm_fprintf (file, "\tlwz %s,0(%s)\n",
29766 reg_names[0], reg_names[11]);
29767 asm_fprintf (file, "\tadd %s,%s,%s\n",
29768 reg_names[0], reg_names[0], reg_names[11]);
29770 else
29772 asm_fprintf (file, "\tlis %s,", reg_names[12]);
29773 assemble_name (file, buf);
29774 fputs ("@ha\n", file);
29775 asm_fprintf (file, "\tstw %s,4(%s)\n",
29776 reg_names[0], reg_names[1]);
29777 asm_fprintf (file, "\tla %s,", reg_names[0]);
29778 assemble_name (file, buf);
29779 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
29782 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
29783 fprintf (file, "\tbl %s%s\n",
29784 RS6000_MCOUNT, flag_pic ? "@plt" : "");
29785 break;
29787 case ABI_AIX:
29788 case ABI_ELFv2:
29789 case ABI_DARWIN:
29790 /* Don't do anything, done in output_profile_hook (). */
29791 break;
29797 /* The following variable value is the last issued insn. */
29799 static rtx_insn *last_scheduled_insn;
29801 /* The following variable helps to balance issuing of load and
29802 store instructions */
29804 static int load_store_pendulum;
29806 /* The following variable helps pair divide insns during scheduling. */
29807 static int divide_cnt;
29808 /* The following variable helps pair and alternate vector and vector load
29809 insns during scheduling. */
29810 static int vec_pairing;
29813 /* Power4 load update and store update instructions are cracked into a
29814 load or store and an integer insn which are executed in the same cycle.
29815 Branches have their own dispatch slot which does not count against the
29816 GCC issue rate, but it changes the program flow so there are no other
29817 instructions to issue in this cycle. */
29819 static int
29820 rs6000_variable_issue_1 (rtx_insn *insn, int more)
29822 last_scheduled_insn = insn;
29823 if (GET_CODE (PATTERN (insn)) == USE
29824 || GET_CODE (PATTERN (insn)) == CLOBBER)
29826 cached_can_issue_more = more;
29827 return cached_can_issue_more;
29830 if (insn_terminates_group_p (insn, current_group))
29832 cached_can_issue_more = 0;
29833 return cached_can_issue_more;
29836 /* If no reservation, but reach here */
29837 if (recog_memoized (insn) < 0)
29838 return more;
29840 if (rs6000_sched_groups)
29842 if (is_microcoded_insn (insn))
29843 cached_can_issue_more = 0;
29844 else if (is_cracked_insn (insn))
29845 cached_can_issue_more = more > 2 ? more - 2 : 0;
29846 else
29847 cached_can_issue_more = more - 1;
29849 return cached_can_issue_more;
29852 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
29853 return 0;
29855 cached_can_issue_more = more - 1;
29856 return cached_can_issue_more;
29859 static int
29860 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
29862 int r = rs6000_variable_issue_1 (insn, more);
29863 if (verbose)
29864 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
29865 return r;
29868 /* Adjust the cost of a scheduling dependency. Return the new cost of
29869 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
29871 static int
29872 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
29873 unsigned int)
29875 enum attr_type attr_type;
29877 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
29878 return cost;
29880 switch (dep_type)
29882 case REG_DEP_TRUE:
29884 /* Data dependency; DEP_INSN writes a register that INSN reads
29885 some cycles later. */
29887 /* Separate a load from a narrower, dependent store. */
29888 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9)
29889 && GET_CODE (PATTERN (insn)) == SET
29890 && GET_CODE (PATTERN (dep_insn)) == SET
29891 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
29892 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
29893 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
29894 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
29895 return cost + 14;
29897 attr_type = get_attr_type (insn);
29899 switch (attr_type)
29901 case TYPE_JMPREG:
29902 /* Tell the first scheduling pass about the latency between
29903 a mtctr and bctr (and mtlr and br/blr). The first
29904 scheduling pass will not know about this latency since
29905 the mtctr instruction, which has the latency associated
29906 to it, will be generated by reload. */
29907 return 4;
29908 case TYPE_BRANCH:
29909 /* Leave some extra cycles between a compare and its
29910 dependent branch, to inhibit expensive mispredicts. */
29911 if ((rs6000_tune == PROCESSOR_PPC603
29912 || rs6000_tune == PROCESSOR_PPC604
29913 || rs6000_tune == PROCESSOR_PPC604e
29914 || rs6000_tune == PROCESSOR_PPC620
29915 || rs6000_tune == PROCESSOR_PPC630
29916 || rs6000_tune == PROCESSOR_PPC750
29917 || rs6000_tune == PROCESSOR_PPC7400
29918 || rs6000_tune == PROCESSOR_PPC7450
29919 || rs6000_tune == PROCESSOR_PPCE5500
29920 || rs6000_tune == PROCESSOR_PPCE6500
29921 || rs6000_tune == PROCESSOR_POWER4
29922 || rs6000_tune == PROCESSOR_POWER5
29923 || rs6000_tune == PROCESSOR_POWER7
29924 || rs6000_tune == PROCESSOR_POWER8
29925 || rs6000_tune == PROCESSOR_POWER9
29926 || rs6000_tune == PROCESSOR_CELL)
29927 && recog_memoized (dep_insn)
29928 && (INSN_CODE (dep_insn) >= 0))
29930 switch (get_attr_type (dep_insn))
29932 case TYPE_CMP:
29933 case TYPE_FPCOMPARE:
29934 case TYPE_CR_LOGICAL:
29935 return cost + 2;
29936 case TYPE_EXTS:
29937 case TYPE_MUL:
29938 if (get_attr_dot (dep_insn) == DOT_YES)
29939 return cost + 2;
29940 else
29941 break;
29942 case TYPE_SHIFT:
29943 if (get_attr_dot (dep_insn) == DOT_YES
29944 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
29945 return cost + 2;
29946 else
29947 break;
29948 default:
29949 break;
29951 break;
29953 case TYPE_STORE:
29954 case TYPE_FPSTORE:
29955 if ((rs6000_tune == PROCESSOR_POWER6)
29956 && recog_memoized (dep_insn)
29957 && (INSN_CODE (dep_insn) >= 0))
29960 if (GET_CODE (PATTERN (insn)) != SET)
29961 /* If this happens, we have to extend this to schedule
29962 optimally. Return default for now. */
29963 return cost;
29965 /* Adjust the cost for the case where the value written
29966 by a fixed point operation is used as the address
29967 gen value on a store. */
29968 switch (get_attr_type (dep_insn))
29970 case TYPE_LOAD:
29971 case TYPE_CNTLZ:
29973 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29974 return get_attr_sign_extend (dep_insn)
29975 == SIGN_EXTEND_YES ? 6 : 4;
29976 break;
29978 case TYPE_SHIFT:
29980 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29981 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
29982 6 : 3;
29983 break;
29985 case TYPE_INTEGER:
29986 case TYPE_ADD:
29987 case TYPE_LOGICAL:
29988 case TYPE_EXTS:
29989 case TYPE_INSERT:
29991 if (! rs6000_store_data_bypass_p (dep_insn, insn))
29992 return 3;
29993 break;
29995 case TYPE_STORE:
29996 case TYPE_FPLOAD:
29997 case TYPE_FPSTORE:
29999 if (get_attr_update (dep_insn) == UPDATE_YES
30000 && ! rs6000_store_data_bypass_p (dep_insn, insn))
30001 return 3;
30002 break;
30004 case TYPE_MUL:
30006 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30007 return 17;
30008 break;
30010 case TYPE_DIV:
30012 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30013 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30014 break;
30016 default:
30017 break;
30020 break;
30022 case TYPE_LOAD:
30023 if ((rs6000_tune == PROCESSOR_POWER6)
30024 && recog_memoized (dep_insn)
30025 && (INSN_CODE (dep_insn) >= 0))
30028 /* Adjust the cost for the case where the value written
30029 by a fixed point instruction is used within the address
30030 gen portion of a subsequent load(u)(x) */
30031 switch (get_attr_type (dep_insn))
30033 case TYPE_LOAD:
30034 case TYPE_CNTLZ:
30036 if (set_to_load_agen (dep_insn, insn))
30037 return get_attr_sign_extend (dep_insn)
30038 == SIGN_EXTEND_YES ? 6 : 4;
30039 break;
30041 case TYPE_SHIFT:
30043 if (set_to_load_agen (dep_insn, insn))
30044 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30045 6 : 3;
30046 break;
30048 case TYPE_INTEGER:
30049 case TYPE_ADD:
30050 case TYPE_LOGICAL:
30051 case TYPE_EXTS:
30052 case TYPE_INSERT:
30054 if (set_to_load_agen (dep_insn, insn))
30055 return 3;
30056 break;
30058 case TYPE_STORE:
30059 case TYPE_FPLOAD:
30060 case TYPE_FPSTORE:
30062 if (get_attr_update (dep_insn) == UPDATE_YES
30063 && set_to_load_agen (dep_insn, insn))
30064 return 3;
30065 break;
30067 case TYPE_MUL:
30069 if (set_to_load_agen (dep_insn, insn))
30070 return 17;
30071 break;
30073 case TYPE_DIV:
30075 if (set_to_load_agen (dep_insn, insn))
30076 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30077 break;
30079 default:
30080 break;
30083 break;
30085 case TYPE_FPLOAD:
30086 if ((rs6000_tune == PROCESSOR_POWER6)
30087 && get_attr_update (insn) == UPDATE_NO
30088 && recog_memoized (dep_insn)
30089 && (INSN_CODE (dep_insn) >= 0)
30090 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
30091 return 2;
30093 default:
30094 break;
30097 /* Fall out to return default cost. */
30099 break;
30101 case REG_DEP_OUTPUT:
30102 /* Output dependency; DEP_INSN writes a register that INSN writes some
30103 cycles later. */
30104 if ((rs6000_tune == PROCESSOR_POWER6)
30105 && recog_memoized (dep_insn)
30106 && (INSN_CODE (dep_insn) >= 0))
30108 attr_type = get_attr_type (insn);
30110 switch (attr_type)
30112 case TYPE_FP:
30113 case TYPE_FPSIMPLE:
30114 if (get_attr_type (dep_insn) == TYPE_FP
30115 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
30116 return 1;
30117 break;
30118 case TYPE_FPLOAD:
30119 if (get_attr_update (insn) == UPDATE_NO
30120 && get_attr_type (dep_insn) == TYPE_MFFGPR)
30121 return 2;
30122 break;
30123 default:
30124 break;
30127 /* Fall through, no cost for output dependency. */
30128 /* FALLTHRU */
30130 case REG_DEP_ANTI:
30131 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30132 cycles later. */
30133 return 0;
30135 default:
30136 gcc_unreachable ();
30139 return cost;
30142 /* Debug version of rs6000_adjust_cost. */
30144 static int
30145 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
30146 int cost, unsigned int dw)
30148 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
30150 if (ret != cost)
30152 const char *dep;
30154 switch (dep_type)
30156 default: dep = "unknown depencency"; break;
30157 case REG_DEP_TRUE: dep = "data dependency"; break;
30158 case REG_DEP_OUTPUT: dep = "output dependency"; break;
30159 case REG_DEP_ANTI: dep = "anti depencency"; break;
30162 fprintf (stderr,
30163 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30164 "%s, insn:\n", ret, cost, dep);
30166 debug_rtx (insn);
30169 return ret;
30172 /* The function returns a true if INSN is microcoded.
30173 Return false otherwise. */
30175 static bool
30176 is_microcoded_insn (rtx_insn *insn)
30178 if (!insn || !NONDEBUG_INSN_P (insn)
30179 || GET_CODE (PATTERN (insn)) == USE
30180 || GET_CODE (PATTERN (insn)) == CLOBBER)
30181 return false;
30183 if (rs6000_tune == PROCESSOR_CELL)
30184 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
30186 if (rs6000_sched_groups
30187 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30189 enum attr_type type = get_attr_type (insn);
30190 if ((type == TYPE_LOAD
30191 && get_attr_update (insn) == UPDATE_YES
30192 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
30193 || ((type == TYPE_LOAD || type == TYPE_STORE)
30194 && get_attr_update (insn) == UPDATE_YES
30195 && get_attr_indexed (insn) == INDEXED_YES)
30196 || type == TYPE_MFCR)
30197 return true;
30200 return false;
30203 /* The function returns true if INSN is cracked into 2 instructions
30204 by the processor (and therefore occupies 2 issue slots). */
30206 static bool
30207 is_cracked_insn (rtx_insn *insn)
30209 if (!insn || !NONDEBUG_INSN_P (insn)
30210 || GET_CODE (PATTERN (insn)) == USE
30211 || GET_CODE (PATTERN (insn)) == CLOBBER)
30212 return false;
30214 if (rs6000_sched_groups
30215 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30217 enum attr_type type = get_attr_type (insn);
30218 if ((type == TYPE_LOAD
30219 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30220 && get_attr_update (insn) == UPDATE_NO)
30221 || (type == TYPE_LOAD
30222 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
30223 && get_attr_update (insn) == UPDATE_YES
30224 && get_attr_indexed (insn) == INDEXED_NO)
30225 || (type == TYPE_STORE
30226 && get_attr_update (insn) == UPDATE_YES
30227 && get_attr_indexed (insn) == INDEXED_NO)
30228 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
30229 && get_attr_update (insn) == UPDATE_YES)
30230 || (type == TYPE_CR_LOGICAL
30231 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
30232 || (type == TYPE_EXTS
30233 && get_attr_dot (insn) == DOT_YES)
30234 || (type == TYPE_SHIFT
30235 && get_attr_dot (insn) == DOT_YES
30236 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
30237 || (type == TYPE_MUL
30238 && get_attr_dot (insn) == DOT_YES)
30239 || type == TYPE_DIV
30240 || (type == TYPE_INSERT
30241 && get_attr_size (insn) == SIZE_32))
30242 return true;
30245 return false;
30248 /* The function returns true if INSN can be issued only from
30249 the branch slot. */
30251 static bool
30252 is_branch_slot_insn (rtx_insn *insn)
30254 if (!insn || !NONDEBUG_INSN_P (insn)
30255 || GET_CODE (PATTERN (insn)) == USE
30256 || GET_CODE (PATTERN (insn)) == CLOBBER)
30257 return false;
30259 if (rs6000_sched_groups)
30261 enum attr_type type = get_attr_type (insn);
30262 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
30263 return true;
30264 return false;
30267 return false;
30270 /* The function returns true if out_inst sets a value that is
30271 used in the address generation computation of in_insn */
30272 static bool
30273 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
30275 rtx out_set, in_set;
30277 /* For performance reasons, only handle the simple case where
30278 both loads are a single_set. */
30279 out_set = single_set (out_insn);
30280 if (out_set)
30282 in_set = single_set (in_insn);
30283 if (in_set)
30284 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
30287 return false;
30290 /* Try to determine base/offset/size parts of the given MEM.
30291 Return true if successful, false if all the values couldn't
30292 be determined.
30294 This function only looks for REG or REG+CONST address forms.
30295 REG+REG address form will return false. */
30297 static bool
30298 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
30299 HOST_WIDE_INT *size)
30301 rtx addr_rtx;
30302 if MEM_SIZE_KNOWN_P (mem)
30303 *size = MEM_SIZE (mem);
30304 else
30305 return false;
30307 addr_rtx = (XEXP (mem, 0));
30308 if (GET_CODE (addr_rtx) == PRE_MODIFY)
30309 addr_rtx = XEXP (addr_rtx, 1);
30311 *offset = 0;
30312 while (GET_CODE (addr_rtx) == PLUS
30313 && CONST_INT_P (XEXP (addr_rtx, 1)))
30315 *offset += INTVAL (XEXP (addr_rtx, 1));
30316 addr_rtx = XEXP (addr_rtx, 0);
30318 if (!REG_P (addr_rtx))
30319 return false;
30321 *base = addr_rtx;
30322 return true;
30325 /* The function returns true if the target storage location of
30326 mem1 is adjacent to the target storage location of mem2 */
30327 /* Return 1 if memory locations are adjacent. */
30329 static bool
30330 adjacent_mem_locations (rtx mem1, rtx mem2)
30332 rtx reg1, reg2;
30333 HOST_WIDE_INT off1, size1, off2, size2;
30335 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30336 && get_memref_parts (mem2, &reg2, &off2, &size2))
30337 return ((REGNO (reg1) == REGNO (reg2))
30338 && ((off1 + size1 == off2)
30339 || (off2 + size2 == off1)));
30341 return false;
30344 /* This function returns true if it can be determined that the two MEM
30345 locations overlap by at least 1 byte based on base reg/offset/size. */
30347 static bool
30348 mem_locations_overlap (rtx mem1, rtx mem2)
30350 rtx reg1, reg2;
30351 HOST_WIDE_INT off1, size1, off2, size2;
30353 if (get_memref_parts (mem1, &reg1, &off1, &size1)
30354 && get_memref_parts (mem2, &reg2, &off2, &size2))
30355 return ((REGNO (reg1) == REGNO (reg2))
30356 && (((off1 <= off2) && (off1 + size1 > off2))
30357 || ((off2 <= off1) && (off2 + size2 > off1))));
30359 return false;
30362 /* A C statement (sans semicolon) to update the integer scheduling
30363 priority INSN_PRIORITY (INSN). Increase the priority to execute the
30364 INSN earlier, reduce the priority to execute INSN later. Do not
30365 define this macro if you do not need to adjust the scheduling
30366 priorities of insns. */
30368 static int
30369 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
30371 rtx load_mem, str_mem;
30372 /* On machines (like the 750) which have asymmetric integer units,
30373 where one integer unit can do multiply and divides and the other
30374 can't, reduce the priority of multiply/divide so it is scheduled
30375 before other integer operations. */
30377 #if 0
30378 if (! INSN_P (insn))
30379 return priority;
30381 if (GET_CODE (PATTERN (insn)) == USE)
30382 return priority;
30384 switch (rs6000_tune) {
30385 case PROCESSOR_PPC750:
30386 switch (get_attr_type (insn))
30388 default:
30389 break;
30391 case TYPE_MUL:
30392 case TYPE_DIV:
30393 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
30394 priority, priority);
30395 if (priority >= 0 && priority < 0x01000000)
30396 priority >>= 3;
30397 break;
30400 #endif
30402 if (insn_must_be_first_in_group (insn)
30403 && reload_completed
30404 && current_sched_info->sched_max_insns_priority
30405 && rs6000_sched_restricted_insns_priority)
30408 /* Prioritize insns that can be dispatched only in the first
30409 dispatch slot. */
30410 if (rs6000_sched_restricted_insns_priority == 1)
30411 /* Attach highest priority to insn. This means that in
30412 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
30413 precede 'priority' (critical path) considerations. */
30414 return current_sched_info->sched_max_insns_priority;
30415 else if (rs6000_sched_restricted_insns_priority == 2)
30416 /* Increase priority of insn by a minimal amount. This means that in
30417 haifa-sched.c:ready_sort(), only 'priority' (critical path)
30418 considerations precede dispatch-slot restriction considerations. */
30419 return (priority + 1);
30422 if (rs6000_tune == PROCESSOR_POWER6
30423 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
30424 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
30425 /* Attach highest priority to insn if the scheduler has just issued two
30426 stores and this instruction is a load, or two loads and this instruction
30427 is a store. Power6 wants loads and stores scheduled alternately
30428 when possible */
30429 return current_sched_info->sched_max_insns_priority;
30431 return priority;
30434 /* Return true if the instruction is nonpipelined on the Cell. */
30435 static bool
30436 is_nonpipeline_insn (rtx_insn *insn)
30438 enum attr_type type;
30439 if (!insn || !NONDEBUG_INSN_P (insn)
30440 || GET_CODE (PATTERN (insn)) == USE
30441 || GET_CODE (PATTERN (insn)) == CLOBBER)
30442 return false;
30444 type = get_attr_type (insn);
30445 if (type == TYPE_MUL
30446 || type == TYPE_DIV
30447 || type == TYPE_SDIV
30448 || type == TYPE_DDIV
30449 || type == TYPE_SSQRT
30450 || type == TYPE_DSQRT
30451 || type == TYPE_MFCR
30452 || type == TYPE_MFCRF
30453 || type == TYPE_MFJMPR)
30455 return true;
30457 return false;
30461 /* Return how many instructions the machine can issue per cycle. */
30463 static int
30464 rs6000_issue_rate (void)
30466 /* Unless scheduling for register pressure, use issue rate of 1 for
30467 first scheduling pass to decrease degradation. */
30468 if (!reload_completed && !flag_sched_pressure)
30469 return 1;
30471 switch (rs6000_tune) {
30472 case PROCESSOR_RS64A:
30473 case PROCESSOR_PPC601: /* ? */
30474 case PROCESSOR_PPC7450:
30475 return 3;
30476 case PROCESSOR_PPC440:
30477 case PROCESSOR_PPC603:
30478 case PROCESSOR_PPC750:
30479 case PROCESSOR_PPC7400:
30480 case PROCESSOR_PPC8540:
30481 case PROCESSOR_PPC8548:
30482 case PROCESSOR_CELL:
30483 case PROCESSOR_PPCE300C2:
30484 case PROCESSOR_PPCE300C3:
30485 case PROCESSOR_PPCE500MC:
30486 case PROCESSOR_PPCE500MC64:
30487 case PROCESSOR_PPCE5500:
30488 case PROCESSOR_PPCE6500:
30489 case PROCESSOR_TITAN:
30490 return 2;
30491 case PROCESSOR_PPC476:
30492 case PROCESSOR_PPC604:
30493 case PROCESSOR_PPC604e:
30494 case PROCESSOR_PPC620:
30495 case PROCESSOR_PPC630:
30496 return 4;
30497 case PROCESSOR_POWER4:
30498 case PROCESSOR_POWER5:
30499 case PROCESSOR_POWER6:
30500 case PROCESSOR_POWER7:
30501 return 5;
30502 case PROCESSOR_POWER8:
30503 return 7;
30504 case PROCESSOR_POWER9:
30505 return 6;
30506 default:
30507 return 1;
30511 /* Return how many instructions to look ahead for better insn
30512 scheduling. */
30514 static int
30515 rs6000_use_sched_lookahead (void)
30517 switch (rs6000_tune)
30519 case PROCESSOR_PPC8540:
30520 case PROCESSOR_PPC8548:
30521 return 4;
30523 case PROCESSOR_CELL:
30524 return (reload_completed ? 8 : 0);
30526 default:
30527 return 0;
30531 /* We are choosing insn from the ready queue. Return zero if INSN can be
30532 chosen. */
30533 static int
30534 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
30536 if (ready_index == 0)
30537 return 0;
30539 if (rs6000_tune != PROCESSOR_CELL)
30540 return 0;
30542 gcc_assert (insn != NULL_RTX && INSN_P (insn));
30544 if (!reload_completed
30545 || is_nonpipeline_insn (insn)
30546 || is_microcoded_insn (insn))
30547 return 1;
30549 return 0;
30552 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
30553 and return true. */
30555 static bool
30556 find_mem_ref (rtx pat, rtx *mem_ref)
30558 const char * fmt;
30559 int i, j;
30561 /* stack_tie does not produce any real memory traffic. */
30562 if (tie_operand (pat, VOIDmode))
30563 return false;
30565 if (GET_CODE (pat) == MEM)
30567 *mem_ref = pat;
30568 return true;
30571 /* Recursively process the pattern. */
30572 fmt = GET_RTX_FORMAT (GET_CODE (pat));
30574 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
30576 if (fmt[i] == 'e')
30578 if (find_mem_ref (XEXP (pat, i), mem_ref))
30579 return true;
30581 else if (fmt[i] == 'E')
30582 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
30584 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
30585 return true;
30589 return false;
30592 /* Determine if PAT is a PATTERN of a load insn. */
30594 static bool
30595 is_load_insn1 (rtx pat, rtx *load_mem)
30597 if (!pat || pat == NULL_RTX)
30598 return false;
30600 if (GET_CODE (pat) == SET)
30601 return find_mem_ref (SET_SRC (pat), load_mem);
30603 if (GET_CODE (pat) == PARALLEL)
30605 int i;
30607 for (i = 0; i < XVECLEN (pat, 0); i++)
30608 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
30609 return true;
30612 return false;
30615 /* Determine if INSN loads from memory. */
30617 static bool
30618 is_load_insn (rtx insn, rtx *load_mem)
30620 if (!insn || !INSN_P (insn))
30621 return false;
30623 if (CALL_P (insn))
30624 return false;
30626 return is_load_insn1 (PATTERN (insn), load_mem);
30629 /* Determine if PAT is a PATTERN of a store insn. */
30631 static bool
30632 is_store_insn1 (rtx pat, rtx *str_mem)
30634 if (!pat || pat == NULL_RTX)
30635 return false;
30637 if (GET_CODE (pat) == SET)
30638 return find_mem_ref (SET_DEST (pat), str_mem);
30640 if (GET_CODE (pat) == PARALLEL)
30642 int i;
30644 for (i = 0; i < XVECLEN (pat, 0); i++)
30645 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
30646 return true;
30649 return false;
30652 /* Determine if INSN stores to memory. */
30654 static bool
30655 is_store_insn (rtx insn, rtx *str_mem)
30657 if (!insn || !INSN_P (insn))
30658 return false;
30660 return is_store_insn1 (PATTERN (insn), str_mem);
30663 /* Return whether TYPE is a Power9 pairable vector instruction type. */
30665 static bool
30666 is_power9_pairable_vec_type (enum attr_type type)
30668 switch (type)
30670 case TYPE_VECSIMPLE:
30671 case TYPE_VECCOMPLEX:
30672 case TYPE_VECDIV:
30673 case TYPE_VECCMP:
30674 case TYPE_VECPERM:
30675 case TYPE_VECFLOAT:
30676 case TYPE_VECFDIV:
30677 case TYPE_VECDOUBLE:
30678 return true;
30679 default:
30680 break;
30682 return false;
30685 /* Returns whether the dependence between INSN and NEXT is considered
30686 costly by the given target. */
30688 static bool
30689 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
30691 rtx insn;
30692 rtx next;
30693 rtx load_mem, str_mem;
30695 /* If the flag is not enabled - no dependence is considered costly;
30696 allow all dependent insns in the same group.
30697 This is the most aggressive option. */
30698 if (rs6000_sched_costly_dep == no_dep_costly)
30699 return false;
30701 /* If the flag is set to 1 - a dependence is always considered costly;
30702 do not allow dependent instructions in the same group.
30703 This is the most conservative option. */
30704 if (rs6000_sched_costly_dep == all_deps_costly)
30705 return true;
30707 insn = DEP_PRO (dep);
30708 next = DEP_CON (dep);
30710 if (rs6000_sched_costly_dep == store_to_load_dep_costly
30711 && is_load_insn (next, &load_mem)
30712 && is_store_insn (insn, &str_mem))
30713 /* Prevent load after store in the same group. */
30714 return true;
30716 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
30717 && is_load_insn (next, &load_mem)
30718 && is_store_insn (insn, &str_mem)
30719 && DEP_TYPE (dep) == REG_DEP_TRUE
30720 && mem_locations_overlap(str_mem, load_mem))
30721 /* Prevent load after store in the same group if it is a true
30722 dependence. */
30723 return true;
30725 /* The flag is set to X; dependences with latency >= X are considered costly,
30726 and will not be scheduled in the same group. */
30727 if (rs6000_sched_costly_dep <= max_dep_latency
30728 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
30729 return true;
30731 return false;
30734 /* Return the next insn after INSN that is found before TAIL is reached,
30735 skipping any "non-active" insns - insns that will not actually occupy
30736 an issue slot. Return NULL_RTX if such an insn is not found. */
30738 static rtx_insn *
30739 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
30741 if (insn == NULL_RTX || insn == tail)
30742 return NULL;
30744 while (1)
30746 insn = NEXT_INSN (insn);
30747 if (insn == NULL_RTX || insn == tail)
30748 return NULL;
30750 if (CALL_P (insn)
30751 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
30752 || (NONJUMP_INSN_P (insn)
30753 && GET_CODE (PATTERN (insn)) != USE
30754 && GET_CODE (PATTERN (insn)) != CLOBBER
30755 && INSN_CODE (insn) != CODE_FOR_stack_tie))
30756 break;
30758 return insn;
30761 /* Do Power9 specific sched_reorder2 reordering of ready list. */
30763 static int
30764 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
30766 int pos;
30767 int i;
30768 rtx_insn *tmp;
30769 enum attr_type type, type2;
30771 type = get_attr_type (last_scheduled_insn);
30773 /* Try to issue fixed point divides back-to-back in pairs so they will be
30774 routed to separate execution units and execute in parallel. */
30775 if (type == TYPE_DIV && divide_cnt == 0)
30777 /* First divide has been scheduled. */
30778 divide_cnt = 1;
30780 /* Scan the ready list looking for another divide, if found move it
30781 to the end of the list so it is chosen next. */
30782 pos = lastpos;
30783 while (pos >= 0)
30785 if (recog_memoized (ready[pos]) >= 0
30786 && get_attr_type (ready[pos]) == TYPE_DIV)
30788 tmp = ready[pos];
30789 for (i = pos; i < lastpos; i++)
30790 ready[i] = ready[i + 1];
30791 ready[lastpos] = tmp;
30792 break;
30794 pos--;
30797 else
30799 /* Last insn was the 2nd divide or not a divide, reset the counter. */
30800 divide_cnt = 0;
30802 /* The best dispatch throughput for vector and vector load insns can be
30803 achieved by interleaving a vector and vector load such that they'll
30804 dispatch to the same superslice. If this pairing cannot be achieved
30805 then it is best to pair vector insns together and vector load insns
30806 together.
30808 To aid in this pairing, vec_pairing maintains the current state with
30809 the following values:
30811 0 : Initial state, no vecload/vector pairing has been started.
30813 1 : A vecload or vector insn has been issued and a candidate for
30814 pairing has been found and moved to the end of the ready
30815 list. */
30816 if (type == TYPE_VECLOAD)
30818 /* Issued a vecload. */
30819 if (vec_pairing == 0)
30821 int vecload_pos = -1;
30822 /* We issued a single vecload, look for a vector insn to pair it
30823 with. If one isn't found, try to pair another vecload. */
30824 pos = lastpos;
30825 while (pos >= 0)
30827 if (recog_memoized (ready[pos]) >= 0)
30829 type2 = get_attr_type (ready[pos]);
30830 if (is_power9_pairable_vec_type (type2))
30832 /* Found a vector insn to pair with, move it to the
30833 end of the ready list so it is scheduled next. */
30834 tmp = ready[pos];
30835 for (i = pos; i < lastpos; i++)
30836 ready[i] = ready[i + 1];
30837 ready[lastpos] = tmp;
30838 vec_pairing = 1;
30839 return cached_can_issue_more;
30841 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
30842 /* Remember position of first vecload seen. */
30843 vecload_pos = pos;
30845 pos--;
30847 if (vecload_pos >= 0)
30849 /* Didn't find a vector to pair with but did find a vecload,
30850 move it to the end of the ready list. */
30851 tmp = ready[vecload_pos];
30852 for (i = vecload_pos; i < lastpos; i++)
30853 ready[i] = ready[i + 1];
30854 ready[lastpos] = tmp;
30855 vec_pairing = 1;
30856 return cached_can_issue_more;
30860 else if (is_power9_pairable_vec_type (type))
30862 /* Issued a vector operation. */
30863 if (vec_pairing == 0)
30865 int vec_pos = -1;
30866 /* We issued a single vector insn, look for a vecload to pair it
30867 with. If one isn't found, try to pair another vector. */
30868 pos = lastpos;
30869 while (pos >= 0)
30871 if (recog_memoized (ready[pos]) >= 0)
30873 type2 = get_attr_type (ready[pos]);
30874 if (type2 == TYPE_VECLOAD)
30876 /* Found a vecload insn to pair with, move it to the
30877 end of the ready list so it is scheduled next. */
30878 tmp = ready[pos];
30879 for (i = pos; i < lastpos; i++)
30880 ready[i] = ready[i + 1];
30881 ready[lastpos] = tmp;
30882 vec_pairing = 1;
30883 return cached_can_issue_more;
30885 else if (is_power9_pairable_vec_type (type2)
30886 && vec_pos == -1)
30887 /* Remember position of first vector insn seen. */
30888 vec_pos = pos;
30890 pos--;
30892 if (vec_pos >= 0)
30894 /* Didn't find a vecload to pair with but did find a vector
30895 insn, move it to the end of the ready list. */
30896 tmp = ready[vec_pos];
30897 for (i = vec_pos; i < lastpos; i++)
30898 ready[i] = ready[i + 1];
30899 ready[lastpos] = tmp;
30900 vec_pairing = 1;
30901 return cached_can_issue_more;
30906 /* We've either finished a vec/vecload pair, couldn't find an insn to
30907 continue the current pair, or the last insn had nothing to do with
30908 with pairing. In any case, reset the state. */
30909 vec_pairing = 0;
30912 return cached_can_issue_more;
30915 /* We are about to begin issuing insns for this clock cycle. */
30917 static int
30918 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
30919 rtx_insn **ready ATTRIBUTE_UNUSED,
30920 int *pn_ready ATTRIBUTE_UNUSED,
30921 int clock_var ATTRIBUTE_UNUSED)
30923 int n_ready = *pn_ready;
30925 if (sched_verbose)
30926 fprintf (dump, "// rs6000_sched_reorder :\n");
30928 /* Reorder the ready list, if the second to last ready insn
30929 is a nonepipeline insn. */
30930 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
30932 if (is_nonpipeline_insn (ready[n_ready - 1])
30933 && (recog_memoized (ready[n_ready - 2]) > 0))
30934 /* Simply swap first two insns. */
30935 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
30938 if (rs6000_tune == PROCESSOR_POWER6)
30939 load_store_pendulum = 0;
30941 return rs6000_issue_rate ();
30944 /* Like rs6000_sched_reorder, but called after issuing each insn. */
30946 static int
30947 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
30948 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
30950 if (sched_verbose)
30951 fprintf (dump, "// rs6000_sched_reorder2 :\n");
30953 /* For Power6, we need to handle some special cases to try and keep the
30954 store queue from overflowing and triggering expensive flushes.
30956 This code monitors how load and store instructions are being issued
30957 and skews the ready list one way or the other to increase the likelihood
30958 that a desired instruction is issued at the proper time.
30960 A couple of things are done. First, we maintain a "load_store_pendulum"
30961 to track the current state of load/store issue.
30963 - If the pendulum is at zero, then no loads or stores have been
30964 issued in the current cycle so we do nothing.
30966 - If the pendulum is 1, then a single load has been issued in this
30967 cycle and we attempt to locate another load in the ready list to
30968 issue with it.
30970 - If the pendulum is -2, then two stores have already been
30971 issued in this cycle, so we increase the priority of the first load
30972 in the ready list to increase it's likelihood of being chosen first
30973 in the next cycle.
30975 - If the pendulum is -1, then a single store has been issued in this
30976 cycle and we attempt to locate another store in the ready list to
30977 issue with it, preferring a store to an adjacent memory location to
30978 facilitate store pairing in the store queue.
30980 - If the pendulum is 2, then two loads have already been
30981 issued in this cycle, so we increase the priority of the first store
30982 in the ready list to increase it's likelihood of being chosen first
30983 in the next cycle.
30985 - If the pendulum < -2 or > 2, then do nothing.
30987 Note: This code covers the most common scenarios. There exist non
30988 load/store instructions which make use of the LSU and which
30989 would need to be accounted for to strictly model the behavior
30990 of the machine. Those instructions are currently unaccounted
30991 for to help minimize compile time overhead of this code.
30993 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
30995 int pos;
30996 int i;
30997 rtx_insn *tmp;
30998 rtx load_mem, str_mem;
31000 if (is_store_insn (last_scheduled_insn, &str_mem))
31001 /* Issuing a store, swing the load_store_pendulum to the left */
31002 load_store_pendulum--;
31003 else if (is_load_insn (last_scheduled_insn, &load_mem))
31004 /* Issuing a load, swing the load_store_pendulum to the right */
31005 load_store_pendulum++;
31006 else
31007 return cached_can_issue_more;
31009 /* If the pendulum is balanced, or there is only one instruction on
31010 the ready list, then all is well, so return. */
31011 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
31012 return cached_can_issue_more;
31014 if (load_store_pendulum == 1)
31016 /* A load has been issued in this cycle. Scan the ready list
31017 for another load to issue with it */
31018 pos = *pn_ready-1;
31020 while (pos >= 0)
31022 if (is_load_insn (ready[pos], &load_mem))
31024 /* Found a load. Move it to the head of the ready list,
31025 and adjust it's priority so that it is more likely to
31026 stay there */
31027 tmp = ready[pos];
31028 for (i=pos; i<*pn_ready-1; i++)
31029 ready[i] = ready[i + 1];
31030 ready[*pn_ready-1] = tmp;
31032 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31033 INSN_PRIORITY (tmp)++;
31034 break;
31036 pos--;
31039 else if (load_store_pendulum == -2)
31041 /* Two stores have been issued in this cycle. Increase the
31042 priority of the first load in the ready list to favor it for
31043 issuing in the next cycle. */
31044 pos = *pn_ready-1;
31046 while (pos >= 0)
31048 if (is_load_insn (ready[pos], &load_mem)
31049 && !sel_sched_p ()
31050 && INSN_PRIORITY_KNOWN (ready[pos]))
31052 INSN_PRIORITY (ready[pos])++;
31054 /* Adjust the pendulum to account for the fact that a load
31055 was found and increased in priority. This is to prevent
31056 increasing the priority of multiple loads */
31057 load_store_pendulum--;
31059 break;
31061 pos--;
31064 else if (load_store_pendulum == -1)
31066 /* A store has been issued in this cycle. Scan the ready list for
31067 another store to issue with it, preferring a store to an adjacent
31068 memory location */
31069 int first_store_pos = -1;
31071 pos = *pn_ready-1;
31073 while (pos >= 0)
31075 if (is_store_insn (ready[pos], &str_mem))
31077 rtx str_mem2;
31078 /* Maintain the index of the first store found on the
31079 list */
31080 if (first_store_pos == -1)
31081 first_store_pos = pos;
31083 if (is_store_insn (last_scheduled_insn, &str_mem2)
31084 && adjacent_mem_locations (str_mem, str_mem2))
31086 /* Found an adjacent store. Move it to the head of the
31087 ready list, and adjust it's priority so that it is
31088 more likely to stay there */
31089 tmp = ready[pos];
31090 for (i=pos; i<*pn_ready-1; i++)
31091 ready[i] = ready[i + 1];
31092 ready[*pn_ready-1] = tmp;
31094 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31095 INSN_PRIORITY (tmp)++;
31097 first_store_pos = -1;
31099 break;
31102 pos--;
31105 if (first_store_pos >= 0)
31107 /* An adjacent store wasn't found, but a non-adjacent store was,
31108 so move the non-adjacent store to the front of the ready
31109 list, and adjust its priority so that it is more likely to
31110 stay there. */
31111 tmp = ready[first_store_pos];
31112 for (i=first_store_pos; i<*pn_ready-1; i++)
31113 ready[i] = ready[i + 1];
31114 ready[*pn_ready-1] = tmp;
31115 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31116 INSN_PRIORITY (tmp)++;
31119 else if (load_store_pendulum == 2)
31121 /* Two loads have been issued in this cycle. Increase the priority
31122 of the first store in the ready list to favor it for issuing in
31123 the next cycle. */
31124 pos = *pn_ready-1;
31126 while (pos >= 0)
31128 if (is_store_insn (ready[pos], &str_mem)
31129 && !sel_sched_p ()
31130 && INSN_PRIORITY_KNOWN (ready[pos]))
31132 INSN_PRIORITY (ready[pos])++;
31134 /* Adjust the pendulum to account for the fact that a store
31135 was found and increased in priority. This is to prevent
31136 increasing the priority of multiple stores */
31137 load_store_pendulum++;
31139 break;
31141 pos--;
31146 /* Do Power9 dependent reordering if necessary. */
31147 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
31148 && recog_memoized (last_scheduled_insn) >= 0)
31149 return power9_sched_reorder2 (ready, *pn_ready - 1);
31151 return cached_can_issue_more;
31154 /* Return whether the presence of INSN causes a dispatch group termination
31155 of group WHICH_GROUP.
31157 If WHICH_GROUP == current_group, this function will return true if INSN
31158 causes the termination of the current group (i.e, the dispatch group to
31159 which INSN belongs). This means that INSN will be the last insn in the
31160 group it belongs to.
31162 If WHICH_GROUP == previous_group, this function will return true if INSN
31163 causes the termination of the previous group (i.e, the dispatch group that
31164 precedes the group to which INSN belongs). This means that INSN will be
31165 the first insn in the group it belongs to). */
31167 static bool
31168 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
31170 bool first, last;
31172 if (! insn)
31173 return false;
31175 first = insn_must_be_first_in_group (insn);
31176 last = insn_must_be_last_in_group (insn);
31178 if (first && last)
31179 return true;
31181 if (which_group == current_group)
31182 return last;
31183 else if (which_group == previous_group)
31184 return first;
31186 return false;
31190 static bool
31191 insn_must_be_first_in_group (rtx_insn *insn)
31193 enum attr_type type;
31195 if (!insn
31196 || NOTE_P (insn)
31197 || DEBUG_INSN_P (insn)
31198 || GET_CODE (PATTERN (insn)) == USE
31199 || GET_CODE (PATTERN (insn)) == CLOBBER)
31200 return false;
31202 switch (rs6000_tune)
31204 case PROCESSOR_POWER5:
31205 if (is_cracked_insn (insn))
31206 return true;
31207 /* FALLTHRU */
31208 case PROCESSOR_POWER4:
31209 if (is_microcoded_insn (insn))
31210 return true;
31212 if (!rs6000_sched_groups)
31213 return false;
31215 type = get_attr_type (insn);
31217 switch (type)
31219 case TYPE_MFCR:
31220 case TYPE_MFCRF:
31221 case TYPE_MTCR:
31222 case TYPE_CR_LOGICAL:
31223 case TYPE_MTJMPR:
31224 case TYPE_MFJMPR:
31225 case TYPE_DIV:
31226 case TYPE_LOAD_L:
31227 case TYPE_STORE_C:
31228 case TYPE_ISYNC:
31229 case TYPE_SYNC:
31230 return true;
31231 default:
31232 break;
31234 break;
31235 case PROCESSOR_POWER6:
31236 type = get_attr_type (insn);
31238 switch (type)
31240 case TYPE_EXTS:
31241 case TYPE_CNTLZ:
31242 case TYPE_TRAP:
31243 case TYPE_MUL:
31244 case TYPE_INSERT:
31245 case TYPE_FPCOMPARE:
31246 case TYPE_MFCR:
31247 case TYPE_MTCR:
31248 case TYPE_MFJMPR:
31249 case TYPE_MTJMPR:
31250 case TYPE_ISYNC:
31251 case TYPE_SYNC:
31252 case TYPE_LOAD_L:
31253 case TYPE_STORE_C:
31254 return true;
31255 case TYPE_SHIFT:
31256 if (get_attr_dot (insn) == DOT_NO
31257 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31258 return true;
31259 else
31260 break;
31261 case TYPE_DIV:
31262 if (get_attr_size (insn) == SIZE_32)
31263 return true;
31264 else
31265 break;
31266 case TYPE_LOAD:
31267 case TYPE_STORE:
31268 case TYPE_FPLOAD:
31269 case TYPE_FPSTORE:
31270 if (get_attr_update (insn) == UPDATE_YES)
31271 return true;
31272 else
31273 break;
31274 default:
31275 break;
31277 break;
31278 case PROCESSOR_POWER7:
31279 type = get_attr_type (insn);
31281 switch (type)
31283 case TYPE_CR_LOGICAL:
31284 case TYPE_MFCR:
31285 case TYPE_MFCRF:
31286 case TYPE_MTCR:
31287 case TYPE_DIV:
31288 case TYPE_ISYNC:
31289 case TYPE_LOAD_L:
31290 case TYPE_STORE_C:
31291 case TYPE_MFJMPR:
31292 case TYPE_MTJMPR:
31293 return true;
31294 case TYPE_MUL:
31295 case TYPE_SHIFT:
31296 case TYPE_EXTS:
31297 if (get_attr_dot (insn) == DOT_YES)
31298 return true;
31299 else
31300 break;
31301 case TYPE_LOAD:
31302 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31303 || get_attr_update (insn) == UPDATE_YES)
31304 return true;
31305 else
31306 break;
31307 case TYPE_STORE:
31308 case TYPE_FPLOAD:
31309 case TYPE_FPSTORE:
31310 if (get_attr_update (insn) == UPDATE_YES)
31311 return true;
31312 else
31313 break;
31314 default:
31315 break;
31317 break;
31318 case PROCESSOR_POWER8:
31319 type = get_attr_type (insn);
31321 switch (type)
31323 case TYPE_CR_LOGICAL:
31324 case TYPE_MFCR:
31325 case TYPE_MFCRF:
31326 case TYPE_MTCR:
31327 case TYPE_SYNC:
31328 case TYPE_ISYNC:
31329 case TYPE_LOAD_L:
31330 case TYPE_STORE_C:
31331 case TYPE_VECSTORE:
31332 case TYPE_MFJMPR:
31333 case TYPE_MTJMPR:
31334 return true;
31335 case TYPE_SHIFT:
31336 case TYPE_EXTS:
31337 case TYPE_MUL:
31338 if (get_attr_dot (insn) == DOT_YES)
31339 return true;
31340 else
31341 break;
31342 case TYPE_LOAD:
31343 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31344 || get_attr_update (insn) == UPDATE_YES)
31345 return true;
31346 else
31347 break;
31348 case TYPE_STORE:
31349 if (get_attr_update (insn) == UPDATE_YES
31350 && get_attr_indexed (insn) == INDEXED_YES)
31351 return true;
31352 else
31353 break;
31354 default:
31355 break;
31357 break;
31358 default:
31359 break;
31362 return false;
31365 static bool
31366 insn_must_be_last_in_group (rtx_insn *insn)
31368 enum attr_type type;
31370 if (!insn
31371 || NOTE_P (insn)
31372 || DEBUG_INSN_P (insn)
31373 || GET_CODE (PATTERN (insn)) == USE
31374 || GET_CODE (PATTERN (insn)) == CLOBBER)
31375 return false;
31377 switch (rs6000_tune) {
31378 case PROCESSOR_POWER4:
31379 case PROCESSOR_POWER5:
31380 if (is_microcoded_insn (insn))
31381 return true;
31383 if (is_branch_slot_insn (insn))
31384 return true;
31386 break;
31387 case PROCESSOR_POWER6:
31388 type = get_attr_type (insn);
31390 switch (type)
31392 case TYPE_EXTS:
31393 case TYPE_CNTLZ:
31394 case TYPE_TRAP:
31395 case TYPE_MUL:
31396 case TYPE_FPCOMPARE:
31397 case TYPE_MFCR:
31398 case TYPE_MTCR:
31399 case TYPE_MFJMPR:
31400 case TYPE_MTJMPR:
31401 case TYPE_ISYNC:
31402 case TYPE_SYNC:
31403 case TYPE_LOAD_L:
31404 case TYPE_STORE_C:
31405 return true;
31406 case TYPE_SHIFT:
31407 if (get_attr_dot (insn) == DOT_NO
31408 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31409 return true;
31410 else
31411 break;
31412 case TYPE_DIV:
31413 if (get_attr_size (insn) == SIZE_32)
31414 return true;
31415 else
31416 break;
31417 default:
31418 break;
31420 break;
31421 case PROCESSOR_POWER7:
31422 type = get_attr_type (insn);
31424 switch (type)
31426 case TYPE_ISYNC:
31427 case TYPE_SYNC:
31428 case TYPE_LOAD_L:
31429 case TYPE_STORE_C:
31430 return true;
31431 case TYPE_LOAD:
31432 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31433 && get_attr_update (insn) == UPDATE_YES)
31434 return true;
31435 else
31436 break;
31437 case TYPE_STORE:
31438 if (get_attr_update (insn) == UPDATE_YES
31439 && get_attr_indexed (insn) == INDEXED_YES)
31440 return true;
31441 else
31442 break;
31443 default:
31444 break;
31446 break;
31447 case PROCESSOR_POWER8:
31448 type = get_attr_type (insn);
31450 switch (type)
31452 case TYPE_MFCR:
31453 case TYPE_MTCR:
31454 case TYPE_ISYNC:
31455 case TYPE_SYNC:
31456 case TYPE_LOAD_L:
31457 case TYPE_STORE_C:
31458 return true;
31459 case TYPE_LOAD:
31460 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
31461 && get_attr_update (insn) == UPDATE_YES)
31462 return true;
31463 else
31464 break;
31465 case TYPE_STORE:
31466 if (get_attr_update (insn) == UPDATE_YES
31467 && get_attr_indexed (insn) == INDEXED_YES)
31468 return true;
31469 else
31470 break;
31471 default:
31472 break;
31474 break;
31475 default:
31476 break;
31479 return false;
31482 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
31483 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
31485 static bool
31486 is_costly_group (rtx *group_insns, rtx next_insn)
31488 int i;
31489 int issue_rate = rs6000_issue_rate ();
31491 for (i = 0; i < issue_rate; i++)
31493 sd_iterator_def sd_it;
31494 dep_t dep;
31495 rtx insn = group_insns[i];
31497 if (!insn)
31498 continue;
31500 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
31502 rtx next = DEP_CON (dep);
31504 if (next == next_insn
31505 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
31506 return true;
31510 return false;
31513 /* Utility of the function redefine_groups.
31514 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
31515 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
31516 to keep it "far" (in a separate group) from GROUP_INSNS, following
31517 one of the following schemes, depending on the value of the flag
31518 -minsert_sched_nops = X:
31519 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
31520 in order to force NEXT_INSN into a separate group.
31521 (2) X < sched_finish_regroup_exact: insert exactly X nops.
31522 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
31523 insertion (has a group just ended, how many vacant issue slots remain in the
31524 last group, and how many dispatch groups were encountered so far). */
31526 static int
31527 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
31528 rtx_insn *next_insn, bool *group_end, int can_issue_more,
31529 int *group_count)
31531 rtx nop;
31532 bool force;
31533 int issue_rate = rs6000_issue_rate ();
31534 bool end = *group_end;
31535 int i;
31537 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
31538 return can_issue_more;
31540 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
31541 return can_issue_more;
31543 force = is_costly_group (group_insns, next_insn);
31544 if (!force)
31545 return can_issue_more;
31547 if (sched_verbose > 6)
31548 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
31549 *group_count ,can_issue_more);
31551 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
31553 if (*group_end)
31554 can_issue_more = 0;
31556 /* Since only a branch can be issued in the last issue_slot, it is
31557 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
31558 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
31559 in this case the last nop will start a new group and the branch
31560 will be forced to the new group. */
31561 if (can_issue_more && !is_branch_slot_insn (next_insn))
31562 can_issue_more--;
31564 /* Do we have a special group ending nop? */
31565 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
31566 || rs6000_tune == PROCESSOR_POWER8)
31568 nop = gen_group_ending_nop ();
31569 emit_insn_before (nop, next_insn);
31570 can_issue_more = 0;
31572 else
31573 while (can_issue_more > 0)
31575 nop = gen_nop ();
31576 emit_insn_before (nop, next_insn);
31577 can_issue_more--;
31580 *group_end = true;
31581 return 0;
31584 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
31586 int n_nops = rs6000_sched_insert_nops;
31588 /* Nops can't be issued from the branch slot, so the effective
31589 issue_rate for nops is 'issue_rate - 1'. */
31590 if (can_issue_more == 0)
31591 can_issue_more = issue_rate;
31592 can_issue_more--;
31593 if (can_issue_more == 0)
31595 can_issue_more = issue_rate - 1;
31596 (*group_count)++;
31597 end = true;
31598 for (i = 0; i < issue_rate; i++)
31600 group_insns[i] = 0;
31604 while (n_nops > 0)
31606 nop = gen_nop ();
31607 emit_insn_before (nop, next_insn);
31608 if (can_issue_more == issue_rate - 1) /* new group begins */
31609 end = false;
31610 can_issue_more--;
31611 if (can_issue_more == 0)
31613 can_issue_more = issue_rate - 1;
31614 (*group_count)++;
31615 end = true;
31616 for (i = 0; i < issue_rate; i++)
31618 group_insns[i] = 0;
31621 n_nops--;
31624 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
31625 can_issue_more++;
31627 /* Is next_insn going to start a new group? */
31628 *group_end
31629 = (end
31630 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
31631 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
31632 || (can_issue_more < issue_rate &&
31633 insn_terminates_group_p (next_insn, previous_group)));
31634 if (*group_end && end)
31635 (*group_count)--;
31637 if (sched_verbose > 6)
31638 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
31639 *group_count, can_issue_more);
31640 return can_issue_more;
31643 return can_issue_more;
31646 /* This function tries to synch the dispatch groups that the compiler "sees"
31647 with the dispatch groups that the processor dispatcher is expected to
31648 form in practice. It tries to achieve this synchronization by forcing the
31649 estimated processor grouping on the compiler (as opposed to the function
31650 'pad_goups' which tries to force the scheduler's grouping on the processor).
31652 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
31653 examines the (estimated) dispatch groups that will be formed by the processor
31654 dispatcher. It marks these group boundaries to reflect the estimated
31655 processor grouping, overriding the grouping that the scheduler had marked.
31656 Depending on the value of the flag '-minsert-sched-nops' this function can
31657 force certain insns into separate groups or force a certain distance between
31658 them by inserting nops, for example, if there exists a "costly dependence"
31659 between the insns.
31661 The function estimates the group boundaries that the processor will form as
31662 follows: It keeps track of how many vacant issue slots are available after
31663 each insn. A subsequent insn will start a new group if one of the following
31664 4 cases applies:
31665 - no more vacant issue slots remain in the current dispatch group.
31666 - only the last issue slot, which is the branch slot, is vacant, but the next
31667 insn is not a branch.
31668 - only the last 2 or less issue slots, including the branch slot, are vacant,
31669 which means that a cracked insn (which occupies two issue slots) can't be
31670 issued in this group.
31671 - less than 'issue_rate' slots are vacant, and the next insn always needs to
31672 start a new group. */
31674 static int
31675 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
31676 rtx_insn *tail)
31678 rtx_insn *insn, *next_insn;
31679 int issue_rate;
31680 int can_issue_more;
31681 int slot, i;
31682 bool group_end;
31683 int group_count = 0;
31684 rtx *group_insns;
31686 /* Initialize. */
31687 issue_rate = rs6000_issue_rate ();
31688 group_insns = XALLOCAVEC (rtx, issue_rate);
31689 for (i = 0; i < issue_rate; i++)
31691 group_insns[i] = 0;
31693 can_issue_more = issue_rate;
31694 slot = 0;
31695 insn = get_next_active_insn (prev_head_insn, tail);
31696 group_end = false;
31698 while (insn != NULL_RTX)
31700 slot = (issue_rate - can_issue_more);
31701 group_insns[slot] = insn;
31702 can_issue_more =
31703 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
31704 if (insn_terminates_group_p (insn, current_group))
31705 can_issue_more = 0;
31707 next_insn = get_next_active_insn (insn, tail);
31708 if (next_insn == NULL_RTX)
31709 return group_count + 1;
31711 /* Is next_insn going to start a new group? */
31712 group_end
31713 = (can_issue_more == 0
31714 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
31715 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
31716 || (can_issue_more < issue_rate &&
31717 insn_terminates_group_p (next_insn, previous_group)));
31719 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
31720 next_insn, &group_end, can_issue_more,
31721 &group_count);
31723 if (group_end)
31725 group_count++;
31726 can_issue_more = 0;
31727 for (i = 0; i < issue_rate; i++)
31729 group_insns[i] = 0;
31733 if (GET_MODE (next_insn) == TImode && can_issue_more)
31734 PUT_MODE (next_insn, VOIDmode);
31735 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
31736 PUT_MODE (next_insn, TImode);
31738 insn = next_insn;
31739 if (can_issue_more == 0)
31740 can_issue_more = issue_rate;
31741 } /* while */
31743 return group_count;
31746 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
31747 dispatch group boundaries that the scheduler had marked. Pad with nops
31748 any dispatch groups which have vacant issue slots, in order to force the
31749 scheduler's grouping on the processor dispatcher. The function
31750 returns the number of dispatch groups found. */
31752 static int
31753 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
31754 rtx_insn *tail)
31756 rtx_insn *insn, *next_insn;
31757 rtx nop;
31758 int issue_rate;
31759 int can_issue_more;
31760 int group_end;
31761 int group_count = 0;
31763 /* Initialize issue_rate. */
31764 issue_rate = rs6000_issue_rate ();
31765 can_issue_more = issue_rate;
31767 insn = get_next_active_insn (prev_head_insn, tail);
31768 next_insn = get_next_active_insn (insn, tail);
31770 while (insn != NULL_RTX)
31772 can_issue_more =
31773 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
31775 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
31777 if (next_insn == NULL_RTX)
31778 break;
31780 if (group_end)
31782 /* If the scheduler had marked group termination at this location
31783 (between insn and next_insn), and neither insn nor next_insn will
31784 force group termination, pad the group with nops to force group
31785 termination. */
31786 if (can_issue_more
31787 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
31788 && !insn_terminates_group_p (insn, current_group)
31789 && !insn_terminates_group_p (next_insn, previous_group))
31791 if (!is_branch_slot_insn (next_insn))
31792 can_issue_more--;
31794 while (can_issue_more)
31796 nop = gen_nop ();
31797 emit_insn_before (nop, next_insn);
31798 can_issue_more--;
31802 can_issue_more = issue_rate;
31803 group_count++;
31806 insn = next_insn;
31807 next_insn = get_next_active_insn (insn, tail);
31810 return group_count;
31813 /* We're beginning a new block. Initialize data structures as necessary. */
31815 static void
31816 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
31817 int sched_verbose ATTRIBUTE_UNUSED,
31818 int max_ready ATTRIBUTE_UNUSED)
31820 last_scheduled_insn = NULL;
31821 load_store_pendulum = 0;
31822 divide_cnt = 0;
31823 vec_pairing = 0;
31826 /* The following function is called at the end of scheduling BB.
31827 After reload, it inserts nops at insn group bundling. */
31829 static void
31830 rs6000_sched_finish (FILE *dump, int sched_verbose)
31832 int n_groups;
31834 if (sched_verbose)
31835 fprintf (dump, "=== Finishing schedule.\n");
31837 if (reload_completed && rs6000_sched_groups)
31839 /* Do not run sched_finish hook when selective scheduling enabled. */
31840 if (sel_sched_p ())
31841 return;
31843 if (rs6000_sched_insert_nops == sched_finish_none)
31844 return;
31846 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
31847 n_groups = pad_groups (dump, sched_verbose,
31848 current_sched_info->prev_head,
31849 current_sched_info->next_tail);
31850 else
31851 n_groups = redefine_groups (dump, sched_verbose,
31852 current_sched_info->prev_head,
31853 current_sched_info->next_tail);
31855 if (sched_verbose >= 6)
31857 fprintf (dump, "ngroups = %d\n", n_groups);
31858 print_rtl (dump, current_sched_info->prev_head);
31859 fprintf (dump, "Done finish_sched\n");
31864 struct rs6000_sched_context
31866 short cached_can_issue_more;
31867 rtx_insn *last_scheduled_insn;
31868 int load_store_pendulum;
31869 int divide_cnt;
31870 int vec_pairing;
31873 typedef struct rs6000_sched_context rs6000_sched_context_def;
31874 typedef rs6000_sched_context_def *rs6000_sched_context_t;
31876 /* Allocate store for new scheduling context. */
31877 static void *
31878 rs6000_alloc_sched_context (void)
31880 return xmalloc (sizeof (rs6000_sched_context_def));
31883 /* If CLEAN_P is true then initializes _SC with clean data,
31884 and from the global context otherwise. */
31885 static void
31886 rs6000_init_sched_context (void *_sc, bool clean_p)
31888 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
31890 if (clean_p)
31892 sc->cached_can_issue_more = 0;
31893 sc->last_scheduled_insn = NULL;
31894 sc->load_store_pendulum = 0;
31895 sc->divide_cnt = 0;
31896 sc->vec_pairing = 0;
31898 else
31900 sc->cached_can_issue_more = cached_can_issue_more;
31901 sc->last_scheduled_insn = last_scheduled_insn;
31902 sc->load_store_pendulum = load_store_pendulum;
31903 sc->divide_cnt = divide_cnt;
31904 sc->vec_pairing = vec_pairing;
31908 /* Sets the global scheduling context to the one pointed to by _SC. */
31909 static void
31910 rs6000_set_sched_context (void *_sc)
31912 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
31914 gcc_assert (sc != NULL);
31916 cached_can_issue_more = sc->cached_can_issue_more;
31917 last_scheduled_insn = sc->last_scheduled_insn;
31918 load_store_pendulum = sc->load_store_pendulum;
31919 divide_cnt = sc->divide_cnt;
31920 vec_pairing = sc->vec_pairing;
31923 /* Free _SC. */
31924 static void
31925 rs6000_free_sched_context (void *_sc)
31927 gcc_assert (_sc != NULL);
31929 free (_sc);
31932 static bool
31933 rs6000_sched_can_speculate_insn (rtx_insn *insn)
31935 switch (get_attr_type (insn))
31937 case TYPE_DIV:
31938 case TYPE_SDIV:
31939 case TYPE_DDIV:
31940 case TYPE_VECDIV:
31941 case TYPE_SSQRT:
31942 case TYPE_DSQRT:
31943 return false;
31945 default:
31946 return true;
31950 /* Length in units of the trampoline for entering a nested function. */
31953 rs6000_trampoline_size (void)
31955 int ret = 0;
31957 switch (DEFAULT_ABI)
31959 default:
31960 gcc_unreachable ();
31962 case ABI_AIX:
31963 ret = (TARGET_32BIT) ? 12 : 24;
31964 break;
31966 case ABI_ELFv2:
31967 gcc_assert (!TARGET_32BIT);
31968 ret = 32;
31969 break;
31971 case ABI_DARWIN:
31972 case ABI_V4:
31973 ret = (TARGET_32BIT) ? 40 : 48;
31974 break;
31977 return ret;
31980 /* Emit RTL insns to initialize the variable parts of a trampoline.
31981 FNADDR is an RTX for the address of the function's pure code.
31982 CXT is an RTX for the static chain value for the function. */
31984 static void
31985 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
31987 int regsize = (TARGET_32BIT) ? 4 : 8;
31988 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
31989 rtx ctx_reg = force_reg (Pmode, cxt);
31990 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
31992 switch (DEFAULT_ABI)
31994 default:
31995 gcc_unreachable ();
31997 /* Under AIX, just build the 3 word function descriptor */
31998 case ABI_AIX:
32000 rtx fnmem, fn_reg, toc_reg;
32002 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32003 error ("you cannot take the address of a nested function if you use "
32004 "the %qs option", "-mno-pointers-to-nested-functions");
32006 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
32007 fn_reg = gen_reg_rtx (Pmode);
32008 toc_reg = gen_reg_rtx (Pmode);
32010 /* Macro to shorten the code expansions below. */
32011 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
32013 m_tramp = replace_equiv_address (m_tramp, addr);
32015 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
32016 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
32017 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
32018 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
32019 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
32021 # undef MEM_PLUS
32023 break;
32025 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
32026 case ABI_ELFv2:
32027 case ABI_DARWIN:
32028 case ABI_V4:
32029 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
32030 LCT_NORMAL, VOIDmode,
32031 addr, Pmode,
32032 GEN_INT (rs6000_trampoline_size ()), SImode,
32033 fnaddr, Pmode,
32034 ctx_reg, Pmode);
32035 break;
32040 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
32041 identifier as an argument, so the front end shouldn't look it up. */
32043 static bool
32044 rs6000_attribute_takes_identifier_p (const_tree attr_id)
32046 return is_attribute_p ("altivec", attr_id);
32049 /* Handle the "altivec" attribute. The attribute may have
32050 arguments as follows:
32052 __attribute__((altivec(vector__)))
32053 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32054 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32056 and may appear more than once (e.g., 'vector bool char') in a
32057 given declaration. */
32059 static tree
32060 rs6000_handle_altivec_attribute (tree *node,
32061 tree name ATTRIBUTE_UNUSED,
32062 tree args,
32063 int flags ATTRIBUTE_UNUSED,
32064 bool *no_add_attrs)
32066 tree type = *node, result = NULL_TREE;
32067 machine_mode mode;
32068 int unsigned_p;
32069 char altivec_type
32070 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
32071 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
32072 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
32073 : '?');
32075 while (POINTER_TYPE_P (type)
32076 || TREE_CODE (type) == FUNCTION_TYPE
32077 || TREE_CODE (type) == METHOD_TYPE
32078 || TREE_CODE (type) == ARRAY_TYPE)
32079 type = TREE_TYPE (type);
32081 mode = TYPE_MODE (type);
32083 /* Check for invalid AltiVec type qualifiers. */
32084 if (type == long_double_type_node)
32085 error ("use of %<long double%> in AltiVec types is invalid");
32086 else if (type == boolean_type_node)
32087 error ("use of boolean types in AltiVec types is invalid");
32088 else if (TREE_CODE (type) == COMPLEX_TYPE)
32089 error ("use of %<complex%> in AltiVec types is invalid");
32090 else if (DECIMAL_FLOAT_MODE_P (mode))
32091 error ("use of decimal floating point types in AltiVec types is invalid");
32092 else if (!TARGET_VSX)
32094 if (type == long_unsigned_type_node || type == long_integer_type_node)
32096 if (TARGET_64BIT)
32097 error ("use of %<long%> in AltiVec types is invalid for "
32098 "64-bit code without %qs", "-mvsx");
32099 else if (rs6000_warn_altivec_long)
32100 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32101 "use %<int%>");
32103 else if (type == long_long_unsigned_type_node
32104 || type == long_long_integer_type_node)
32105 error ("use of %<long long%> in AltiVec types is invalid without %qs",
32106 "-mvsx");
32107 else if (type == double_type_node)
32108 error ("use of %<double%> in AltiVec types is invalid without %qs",
32109 "-mvsx");
32112 switch (altivec_type)
32114 case 'v':
32115 unsigned_p = TYPE_UNSIGNED (type);
32116 switch (mode)
32118 case E_TImode:
32119 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
32120 break;
32121 case E_DImode:
32122 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
32123 break;
32124 case E_SImode:
32125 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
32126 break;
32127 case E_HImode:
32128 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
32129 break;
32130 case E_QImode:
32131 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
32132 break;
32133 case E_SFmode: result = V4SF_type_node; break;
32134 case E_DFmode: result = V2DF_type_node; break;
32135 /* If the user says 'vector int bool', we may be handed the 'bool'
32136 attribute _before_ the 'vector' attribute, and so select the
32137 proper type in the 'b' case below. */
32138 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
32139 case E_V2DImode: case E_V2DFmode:
32140 result = type;
32141 default: break;
32143 break;
32144 case 'b':
32145 switch (mode)
32147 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
32148 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
32149 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
32150 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
32151 default: break;
32153 break;
32154 case 'p':
32155 switch (mode)
32157 case E_V8HImode: result = pixel_V8HI_type_node;
32158 default: break;
32160 default: break;
32163 /* Propagate qualifiers attached to the element type
32164 onto the vector type. */
32165 if (result && result != type && TYPE_QUALS (type))
32166 result = build_qualified_type (result, TYPE_QUALS (type));
32168 *no_add_attrs = true; /* No need to hang on to the attribute. */
32170 if (result)
32171 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
32173 return NULL_TREE;
32176 /* AltiVec defines five built-in scalar types that serve as vector
32177 elements; we must teach the compiler how to mangle them. The 128-bit
32178 floating point mangling is target-specific as well. */
32180 static const char *
32181 rs6000_mangle_type (const_tree type)
32183 type = TYPE_MAIN_VARIANT (type);
32185 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32186 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32187 return NULL;
32189 if (type == bool_char_type_node) return "U6__boolc";
32190 if (type == bool_short_type_node) return "U6__bools";
32191 if (type == pixel_type_node) return "u7__pixel";
32192 if (type == bool_int_type_node) return "U6__booli";
32193 if (type == bool_long_long_type_node) return "U6__boolx";
32195 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
32196 return "g";
32197 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
32198 return ieee128_mangling_gcc_8_1 ? "U10__float128" : "u9__ieee128";
32200 /* For all other types, use the default mangling. */
32201 return NULL;
32204 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32205 struct attribute_spec.handler. */
32207 static tree
32208 rs6000_handle_longcall_attribute (tree *node, tree name,
32209 tree args ATTRIBUTE_UNUSED,
32210 int flags ATTRIBUTE_UNUSED,
32211 bool *no_add_attrs)
32213 if (TREE_CODE (*node) != FUNCTION_TYPE
32214 && TREE_CODE (*node) != FIELD_DECL
32215 && TREE_CODE (*node) != TYPE_DECL)
32217 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32218 name);
32219 *no_add_attrs = true;
32222 return NULL_TREE;
32225 /* Set longcall attributes on all functions declared when
32226 rs6000_default_long_calls is true. */
32227 static void
32228 rs6000_set_default_type_attributes (tree type)
32230 if (rs6000_default_long_calls
32231 && (TREE_CODE (type) == FUNCTION_TYPE
32232 || TREE_CODE (type) == METHOD_TYPE))
32233 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
32234 NULL_TREE,
32235 TYPE_ATTRIBUTES (type));
32237 #if TARGET_MACHO
32238 darwin_set_default_type_attributes (type);
32239 #endif
32242 /* Return a reference suitable for calling a function with the
32243 longcall attribute. */
32246 rs6000_longcall_ref (rtx call_ref)
32248 const char *call_name;
32249 tree node;
32251 if (GET_CODE (call_ref) != SYMBOL_REF)
32252 return call_ref;
32254 /* System V adds '.' to the internal name, so skip them. */
32255 call_name = XSTR (call_ref, 0);
32256 if (*call_name == '.')
32258 while (*call_name == '.')
32259 call_name++;
32261 node = get_identifier (call_name);
32262 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
32265 return force_reg (Pmode, call_ref);
32268 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32269 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32270 #endif
32272 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32273 struct attribute_spec.handler. */
32274 static tree
32275 rs6000_handle_struct_attribute (tree *node, tree name,
32276 tree args ATTRIBUTE_UNUSED,
32277 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
32279 tree *type = NULL;
32280 if (DECL_P (*node))
32282 if (TREE_CODE (*node) == TYPE_DECL)
32283 type = &TREE_TYPE (*node);
32285 else
32286 type = node;
32288 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
32289 || TREE_CODE (*type) == UNION_TYPE)))
32291 warning (OPT_Wattributes, "%qE attribute ignored", name);
32292 *no_add_attrs = true;
32295 else if ((is_attribute_p ("ms_struct", name)
32296 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
32297 || ((is_attribute_p ("gcc_struct", name)
32298 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
32300 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
32301 name);
32302 *no_add_attrs = true;
32305 return NULL_TREE;
32308 static bool
32309 rs6000_ms_bitfield_layout_p (const_tree record_type)
32311 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
32312 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
32313 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
32316 #ifdef USING_ELFOS_H
32318 /* A get_unnamed_section callback, used for switching to toc_section. */
32320 static void
32321 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
32323 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32324 && TARGET_MINIMAL_TOC)
32326 if (!toc_initialized)
32328 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32329 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32330 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
32331 fprintf (asm_out_file, "\t.tc ");
32332 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
32333 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32334 fprintf (asm_out_file, "\n");
32336 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32337 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32338 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32339 fprintf (asm_out_file, " = .+32768\n");
32340 toc_initialized = 1;
32342 else
32343 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32345 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32347 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
32348 if (!toc_initialized)
32350 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32351 toc_initialized = 1;
32354 else
32356 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
32357 if (!toc_initialized)
32359 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
32360 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
32361 fprintf (asm_out_file, " = .+32768\n");
32362 toc_initialized = 1;
32367 /* Implement TARGET_ASM_INIT_SECTIONS. */
32369 static void
32370 rs6000_elf_asm_init_sections (void)
32372 toc_section
32373 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
32375 sdata2_section
32376 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
32377 SDATA2_SECTION_ASM_OP);
32380 /* Implement TARGET_SELECT_RTX_SECTION. */
32382 static section *
32383 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
32384 unsigned HOST_WIDE_INT align)
32386 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
32387 return toc_section;
32388 else
32389 return default_elf_select_rtx_section (mode, x, align);
32392 /* For a SYMBOL_REF, set generic flags and then perform some
32393 target-specific processing.
32395 When the AIX ABI is requested on a non-AIX system, replace the
32396 function name with the real name (with a leading .) rather than the
32397 function descriptor name. This saves a lot of overriding code to
32398 read the prefixes. */
32400 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
32401 static void
32402 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
32404 default_encode_section_info (decl, rtl, first);
32406 if (first
32407 && TREE_CODE (decl) == FUNCTION_DECL
32408 && !TARGET_AIX
32409 && DEFAULT_ABI == ABI_AIX)
32411 rtx sym_ref = XEXP (rtl, 0);
32412 size_t len = strlen (XSTR (sym_ref, 0));
32413 char *str = XALLOCAVEC (char, len + 2);
32414 str[0] = '.';
32415 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
32416 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
32420 static inline bool
32421 compare_section_name (const char *section, const char *templ)
32423 int len;
32425 len = strlen (templ);
32426 return (strncmp (section, templ, len) == 0
32427 && (section[len] == 0 || section[len] == '.'));
32430 bool
32431 rs6000_elf_in_small_data_p (const_tree decl)
32433 if (rs6000_sdata == SDATA_NONE)
32434 return false;
32436 /* We want to merge strings, so we never consider them small data. */
32437 if (TREE_CODE (decl) == STRING_CST)
32438 return false;
32440 /* Functions are never in the small data area. */
32441 if (TREE_CODE (decl) == FUNCTION_DECL)
32442 return false;
32444 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
32446 const char *section = DECL_SECTION_NAME (decl);
32447 if (compare_section_name (section, ".sdata")
32448 || compare_section_name (section, ".sdata2")
32449 || compare_section_name (section, ".gnu.linkonce.s")
32450 || compare_section_name (section, ".sbss")
32451 || compare_section_name (section, ".sbss2")
32452 || compare_section_name (section, ".gnu.linkonce.sb")
32453 || strcmp (section, ".PPC.EMB.sdata0") == 0
32454 || strcmp (section, ".PPC.EMB.sbss0") == 0)
32455 return true;
32457 else
32459 /* If we are told not to put readonly data in sdata, then don't. */
32460 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
32461 && !rs6000_readonly_in_sdata)
32462 return false;
32464 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
32466 if (size > 0
32467 && size <= g_switch_value
32468 /* If it's not public, and we're not going to reference it there,
32469 there's no need to put it in the small data section. */
32470 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
32471 return true;
32474 return false;
32477 #endif /* USING_ELFOS_H */
32479 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
32481 static bool
32482 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
32484 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
32487 /* Do not place thread-local symbols refs in the object blocks. */
32489 static bool
32490 rs6000_use_blocks_for_decl_p (const_tree decl)
32492 return !DECL_THREAD_LOCAL_P (decl);
32495 /* Return a REG that occurs in ADDR with coefficient 1.
32496 ADDR can be effectively incremented by incrementing REG.
32498 r0 is special and we must not select it as an address
32499 register by this routine since our caller will try to
32500 increment the returned register via an "la" instruction. */
32503 find_addr_reg (rtx addr)
32505 while (GET_CODE (addr) == PLUS)
32507 if (GET_CODE (XEXP (addr, 0)) == REG
32508 && REGNO (XEXP (addr, 0)) != 0)
32509 addr = XEXP (addr, 0);
32510 else if (GET_CODE (XEXP (addr, 1)) == REG
32511 && REGNO (XEXP (addr, 1)) != 0)
32512 addr = XEXP (addr, 1);
32513 else if (CONSTANT_P (XEXP (addr, 0)))
32514 addr = XEXP (addr, 1);
32515 else if (CONSTANT_P (XEXP (addr, 1)))
32516 addr = XEXP (addr, 0);
32517 else
32518 gcc_unreachable ();
32520 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
32521 return addr;
32524 void
32525 rs6000_fatal_bad_address (rtx op)
32527 fatal_insn ("bad address", op);
32530 #if TARGET_MACHO
32532 typedef struct branch_island_d {
32533 tree function_name;
32534 tree label_name;
32535 int line_number;
32536 } branch_island;
32539 static vec<branch_island, va_gc> *branch_islands;
32541 /* Remember to generate a branch island for far calls to the given
32542 function. */
32544 static void
32545 add_compiler_branch_island (tree label_name, tree function_name,
32546 int line_number)
32548 branch_island bi = {function_name, label_name, line_number};
32549 vec_safe_push (branch_islands, bi);
32552 /* Generate far-jump branch islands for everything recorded in
32553 branch_islands. Invoked immediately after the last instruction of
32554 the epilogue has been emitted; the branch islands must be appended
32555 to, and contiguous with, the function body. Mach-O stubs are
32556 generated in machopic_output_stub(). */
32558 static void
32559 macho_branch_islands (void)
32561 char tmp_buf[512];
32563 while (!vec_safe_is_empty (branch_islands))
32565 branch_island *bi = &branch_islands->last ();
32566 const char *label = IDENTIFIER_POINTER (bi->label_name);
32567 const char *name = IDENTIFIER_POINTER (bi->function_name);
32568 char name_buf[512];
32569 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
32570 if (name[0] == '*' || name[0] == '&')
32571 strcpy (name_buf, name+1);
32572 else
32574 name_buf[0] = '_';
32575 strcpy (name_buf+1, name);
32577 strcpy (tmp_buf, "\n");
32578 strcat (tmp_buf, label);
32579 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32580 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32581 dbxout_stabd (N_SLINE, bi->line_number);
32582 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
32583 if (flag_pic)
32585 if (TARGET_LINK_STACK)
32587 char name[32];
32588 get_ppc476_thunk_name (name);
32589 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
32590 strcat (tmp_buf, name);
32591 strcat (tmp_buf, "\n");
32592 strcat (tmp_buf, label);
32593 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32595 else
32597 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
32598 strcat (tmp_buf, label);
32599 strcat (tmp_buf, "_pic\n");
32600 strcat (tmp_buf, label);
32601 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
32604 strcat (tmp_buf, "\taddis r11,r11,ha16(");
32605 strcat (tmp_buf, name_buf);
32606 strcat (tmp_buf, " - ");
32607 strcat (tmp_buf, label);
32608 strcat (tmp_buf, "_pic)\n");
32610 strcat (tmp_buf, "\tmtlr r0\n");
32612 strcat (tmp_buf, "\taddi r12,r11,lo16(");
32613 strcat (tmp_buf, name_buf);
32614 strcat (tmp_buf, " - ");
32615 strcat (tmp_buf, label);
32616 strcat (tmp_buf, "_pic)\n");
32618 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
32620 else
32622 strcat (tmp_buf, ":\nlis r12,hi16(");
32623 strcat (tmp_buf, name_buf);
32624 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
32625 strcat (tmp_buf, name_buf);
32626 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
32628 output_asm_insn (tmp_buf, 0);
32629 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
32630 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
32631 dbxout_stabd (N_SLINE, bi->line_number);
32632 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
32633 branch_islands->pop ();
32637 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
32638 already there or not. */
32640 static int
32641 no_previous_def (tree function_name)
32643 branch_island *bi;
32644 unsigned ix;
32646 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
32647 if (function_name == bi->function_name)
32648 return 0;
32649 return 1;
32652 /* GET_PREV_LABEL gets the label name from the previous definition of
32653 the function. */
32655 static tree
32656 get_prev_label (tree function_name)
32658 branch_island *bi;
32659 unsigned ix;
32661 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
32662 if (function_name == bi->function_name)
32663 return bi->label_name;
32664 return NULL_TREE;
32667 /* INSN is either a function call or a millicode call. It may have an
32668 unconditional jump in its delay slot.
32670 CALL_DEST is the routine we are calling. */
32672 char *
32673 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
32674 int cookie_operand_number)
32676 static char buf[256];
32677 if (darwin_emit_branch_islands
32678 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
32679 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
32681 tree labelname;
32682 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
32684 if (no_previous_def (funname))
32686 rtx label_rtx = gen_label_rtx ();
32687 char *label_buf, temp_buf[256];
32688 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
32689 CODE_LABEL_NUMBER (label_rtx));
32690 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
32691 labelname = get_identifier (label_buf);
32692 add_compiler_branch_island (labelname, funname, insn_line (insn));
32694 else
32695 labelname = get_prev_label (funname);
32697 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
32698 instruction will reach 'foo', otherwise link as 'bl L42'".
32699 "L42" should be a 'branch island', that will do a far jump to
32700 'foo'. Branch islands are generated in
32701 macho_branch_islands(). */
32702 sprintf (buf, "jbsr %%z%d,%.246s",
32703 dest_operand_number, IDENTIFIER_POINTER (labelname));
32705 else
32706 sprintf (buf, "bl %%z%d", dest_operand_number);
32707 return buf;
32710 /* Generate PIC and indirect symbol stubs. */
32712 void
32713 machopic_output_stub (FILE *file, const char *symb, const char *stub)
32715 unsigned int length;
32716 char *symbol_name, *lazy_ptr_name;
32717 char *local_label_0;
32718 static int label = 0;
32720 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
32721 symb = (*targetm.strip_name_encoding) (symb);
32724 length = strlen (symb);
32725 symbol_name = XALLOCAVEC (char, length + 32);
32726 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
32728 lazy_ptr_name = XALLOCAVEC (char, length + 32);
32729 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
32731 if (flag_pic == 2)
32732 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
32733 else
32734 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
32736 if (flag_pic == 2)
32738 fprintf (file, "\t.align 5\n");
32740 fprintf (file, "%s:\n", stub);
32741 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32743 label++;
32744 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
32745 sprintf (local_label_0, "\"L%011d$spb\"", label);
32747 fprintf (file, "\tmflr r0\n");
32748 if (TARGET_LINK_STACK)
32750 char name[32];
32751 get_ppc476_thunk_name (name);
32752 fprintf (file, "\tbl %s\n", name);
32753 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
32755 else
32757 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
32758 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
32760 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
32761 lazy_ptr_name, local_label_0);
32762 fprintf (file, "\tmtlr r0\n");
32763 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
32764 (TARGET_64BIT ? "ldu" : "lwzu"),
32765 lazy_ptr_name, local_label_0);
32766 fprintf (file, "\tmtctr r12\n");
32767 fprintf (file, "\tbctr\n");
32769 else
32771 fprintf (file, "\t.align 4\n");
32773 fprintf (file, "%s:\n", stub);
32774 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32776 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
32777 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
32778 (TARGET_64BIT ? "ldu" : "lwzu"),
32779 lazy_ptr_name);
32780 fprintf (file, "\tmtctr r12\n");
32781 fprintf (file, "\tbctr\n");
32784 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
32785 fprintf (file, "%s:\n", lazy_ptr_name);
32786 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
32787 fprintf (file, "%sdyld_stub_binding_helper\n",
32788 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
32791 /* Legitimize PIC addresses. If the address is already
32792 position-independent, we return ORIG. Newly generated
32793 position-independent addresses go into a reg. This is REG if non
32794 zero, otherwise we allocate register(s) as necessary. */
32796 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
32799 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
32800 rtx reg)
32802 rtx base, offset;
32804 if (reg == NULL && !reload_completed)
32805 reg = gen_reg_rtx (Pmode);
32807 if (GET_CODE (orig) == CONST)
32809 rtx reg_temp;
32811 if (GET_CODE (XEXP (orig, 0)) == PLUS
32812 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
32813 return orig;
32815 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
32817 /* Use a different reg for the intermediate value, as
32818 it will be marked UNCHANGING. */
32819 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
32820 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
32821 Pmode, reg_temp);
32822 offset =
32823 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
32824 Pmode, reg);
32826 if (GET_CODE (offset) == CONST_INT)
32828 if (SMALL_INT (offset))
32829 return plus_constant (Pmode, base, INTVAL (offset));
32830 else if (!reload_completed)
32831 offset = force_reg (Pmode, offset);
32832 else
32834 rtx mem = force_const_mem (Pmode, orig);
32835 return machopic_legitimize_pic_address (mem, Pmode, reg);
32838 return gen_rtx_PLUS (Pmode, base, offset);
32841 /* Fall back on generic machopic code. */
32842 return machopic_legitimize_pic_address (orig, mode, reg);
32845 /* Output a .machine directive for the Darwin assembler, and call
32846 the generic start_file routine. */
32848 static void
32849 rs6000_darwin_file_start (void)
32851 static const struct
32853 const char *arg;
32854 const char *name;
32855 HOST_WIDE_INT if_set;
32856 } mapping[] = {
32857 { "ppc64", "ppc64", MASK_64BIT },
32858 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
32859 { "power4", "ppc970", 0 },
32860 { "G5", "ppc970", 0 },
32861 { "7450", "ppc7450", 0 },
32862 { "7400", "ppc7400", MASK_ALTIVEC },
32863 { "G4", "ppc7400", 0 },
32864 { "750", "ppc750", 0 },
32865 { "740", "ppc750", 0 },
32866 { "G3", "ppc750", 0 },
32867 { "604e", "ppc604e", 0 },
32868 { "604", "ppc604", 0 },
32869 { "603e", "ppc603", 0 },
32870 { "603", "ppc603", 0 },
32871 { "601", "ppc601", 0 },
32872 { NULL, "ppc", 0 } };
32873 const char *cpu_id = "";
32874 size_t i;
32876 rs6000_file_start ();
32877 darwin_file_start ();
32879 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
32881 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
32882 cpu_id = rs6000_default_cpu;
32884 if (global_options_set.x_rs6000_cpu_index)
32885 cpu_id = processor_target_table[rs6000_cpu_index].name;
32887 /* Look through the mapping array. Pick the first name that either
32888 matches the argument, has a bit set in IF_SET that is also set
32889 in the target flags, or has a NULL name. */
32891 i = 0;
32892 while (mapping[i].arg != NULL
32893 && strcmp (mapping[i].arg, cpu_id) != 0
32894 && (mapping[i].if_set & rs6000_isa_flags) == 0)
32895 i++;
32897 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
32900 #endif /* TARGET_MACHO */
32902 #if TARGET_ELF
32903 static int
32904 rs6000_elf_reloc_rw_mask (void)
32906 if (flag_pic)
32907 return 3;
32908 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
32909 return 2;
32910 else
32911 return 0;
32914 /* Record an element in the table of global constructors. SYMBOL is
32915 a SYMBOL_REF of the function to be called; PRIORITY is a number
32916 between 0 and MAX_INIT_PRIORITY.
32918 This differs from default_named_section_asm_out_constructor in
32919 that we have special handling for -mrelocatable. */
32921 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
32922 static void
32923 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
32925 const char *section = ".ctors";
32926 char buf[18];
32928 if (priority != DEFAULT_INIT_PRIORITY)
32930 sprintf (buf, ".ctors.%.5u",
32931 /* Invert the numbering so the linker puts us in the proper
32932 order; constructors are run from right to left, and the
32933 linker sorts in increasing order. */
32934 MAX_INIT_PRIORITY - priority);
32935 section = buf;
32938 switch_to_section (get_section (section, SECTION_WRITE, NULL));
32939 assemble_align (POINTER_SIZE);
32941 if (DEFAULT_ABI == ABI_V4
32942 && (TARGET_RELOCATABLE || flag_pic > 1))
32944 fputs ("\t.long (", asm_out_file);
32945 output_addr_const (asm_out_file, symbol);
32946 fputs (")@fixup\n", asm_out_file);
32948 else
32949 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
32952 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
32953 static void
32954 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
32956 const char *section = ".dtors";
32957 char buf[18];
32959 if (priority != DEFAULT_INIT_PRIORITY)
32961 sprintf (buf, ".dtors.%.5u",
32962 /* Invert the numbering so the linker puts us in the proper
32963 order; constructors are run from right to left, and the
32964 linker sorts in increasing order. */
32965 MAX_INIT_PRIORITY - priority);
32966 section = buf;
32969 switch_to_section (get_section (section, SECTION_WRITE, NULL));
32970 assemble_align (POINTER_SIZE);
32972 if (DEFAULT_ABI == ABI_V4
32973 && (TARGET_RELOCATABLE || flag_pic > 1))
32975 fputs ("\t.long (", asm_out_file);
32976 output_addr_const (asm_out_file, symbol);
32977 fputs (")@fixup\n", asm_out_file);
32979 else
32980 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
32983 void
32984 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
32986 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
32988 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
32989 ASM_OUTPUT_LABEL (file, name);
32990 fputs (DOUBLE_INT_ASM_OP, file);
32991 rs6000_output_function_entry (file, name);
32992 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
32993 if (DOT_SYMBOLS)
32995 fputs ("\t.size\t", file);
32996 assemble_name (file, name);
32997 fputs (",24\n\t.type\t.", file);
32998 assemble_name (file, name);
32999 fputs (",@function\n", file);
33000 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
33002 fputs ("\t.globl\t.", file);
33003 assemble_name (file, name);
33004 putc ('\n', file);
33007 else
33008 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33009 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33010 rs6000_output_function_entry (file, name);
33011 fputs (":\n", file);
33012 return;
33015 int uses_toc;
33016 if (DEFAULT_ABI == ABI_V4
33017 && (TARGET_RELOCATABLE || flag_pic > 1)
33018 && !TARGET_SECURE_PLT
33019 && (!constant_pool_empty_p () || crtl->profile)
33020 && (uses_toc = uses_TOC ()))
33022 char buf[256];
33024 if (uses_toc == 2)
33025 switch_to_other_text_partition ();
33026 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33028 fprintf (file, "\t.long ");
33029 assemble_name (file, toc_label_name);
33030 need_toc_init = 1;
33031 putc ('-', file);
33032 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33033 assemble_name (file, buf);
33034 putc ('\n', file);
33035 if (uses_toc == 2)
33036 switch_to_other_text_partition ();
33039 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33040 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33042 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
33044 char buf[256];
33046 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33048 fprintf (file, "\t.quad .TOC.-");
33049 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33050 assemble_name (file, buf);
33051 putc ('\n', file);
33054 if (DEFAULT_ABI == ABI_AIX)
33056 const char *desc_name, *orig_name;
33058 orig_name = (*targetm.strip_name_encoding) (name);
33059 desc_name = orig_name;
33060 while (*desc_name == '.')
33061 desc_name++;
33063 if (TREE_PUBLIC (decl))
33064 fprintf (file, "\t.globl %s\n", desc_name);
33066 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33067 fprintf (file, "%s:\n", desc_name);
33068 fprintf (file, "\t.long %s\n", orig_name);
33069 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
33070 fputs ("\t.long 0\n", file);
33071 fprintf (file, "\t.previous\n");
33073 ASM_OUTPUT_LABEL (file, name);
33076 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
33077 static void
33078 rs6000_elf_file_end (void)
33080 #ifdef HAVE_AS_GNU_ATTRIBUTE
33081 /* ??? The value emitted depends on options active at file end.
33082 Assume anyone using #pragma or attributes that might change
33083 options knows what they are doing. */
33084 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
33085 && rs6000_passes_float)
33087 int fp;
33089 if (TARGET_HARD_FLOAT)
33090 fp = 1;
33091 else
33092 fp = 2;
33093 if (rs6000_passes_long_double)
33095 if (!TARGET_LONG_DOUBLE_128)
33096 fp |= 2 * 4;
33097 else if (TARGET_IEEEQUAD)
33098 fp |= 3 * 4;
33099 else
33100 fp |= 1 * 4;
33102 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
33104 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
33106 if (rs6000_passes_vector)
33107 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
33108 (TARGET_ALTIVEC_ABI ? 2 : 1));
33109 if (rs6000_returns_struct)
33110 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
33111 aix_struct_return ? 2 : 1);
33113 #endif
33114 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33115 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
33116 file_end_indicate_exec_stack ();
33117 #endif
33119 if (flag_split_stack)
33120 file_end_indicate_split_stack ();
33122 if (cpu_builtin_p)
33124 /* We have expanded a CPU builtin, so we need to emit a reference to
33125 the special symbol that LIBC uses to declare it supports the
33126 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33127 switch_to_section (data_section);
33128 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
33129 fprintf (asm_out_file, "\t%s %s\n",
33130 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
33133 #endif
33135 #if TARGET_XCOFF
33137 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33138 #define HAVE_XCOFF_DWARF_EXTRAS 0
33139 #endif
33141 static enum unwind_info_type
33142 rs6000_xcoff_debug_unwind_info (void)
33144 return UI_NONE;
33147 static void
33148 rs6000_xcoff_asm_output_anchor (rtx symbol)
33150 char buffer[100];
33152 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
33153 SYMBOL_REF_BLOCK_OFFSET (symbol));
33154 fprintf (asm_out_file, "%s", SET_ASM_OP);
33155 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
33156 fprintf (asm_out_file, ",");
33157 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
33158 fprintf (asm_out_file, "\n");
33161 static void
33162 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
33164 fputs (GLOBAL_ASM_OP, stream);
33165 RS6000_OUTPUT_BASENAME (stream, name);
33166 putc ('\n', stream);
33169 /* A get_unnamed_decl callback, used for read-only sections. PTR
33170 points to the section string variable. */
33172 static void
33173 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
33175 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
33176 *(const char *const *) directive,
33177 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33180 /* Likewise for read-write sections. */
33182 static void
33183 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
33185 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
33186 *(const char *const *) directive,
33187 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33190 static void
33191 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
33193 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
33194 *(const char *const *) directive,
33195 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33198 /* A get_unnamed_section callback, used for switching to toc_section. */
33200 static void
33201 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33203 if (TARGET_MINIMAL_TOC)
33205 /* toc_section is always selected at least once from
33206 rs6000_xcoff_file_start, so this is guaranteed to
33207 always be defined once and only once in each file. */
33208 if (!toc_initialized)
33210 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
33211 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
33212 toc_initialized = 1;
33214 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
33215 (TARGET_32BIT ? "" : ",3"));
33217 else
33218 fputs ("\t.toc\n", asm_out_file);
33221 /* Implement TARGET_ASM_INIT_SECTIONS. */
33223 static void
33224 rs6000_xcoff_asm_init_sections (void)
33226 read_only_data_section
33227 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33228 &xcoff_read_only_section_name);
33230 private_data_section
33231 = get_unnamed_section (SECTION_WRITE,
33232 rs6000_xcoff_output_readwrite_section_asm_op,
33233 &xcoff_private_data_section_name);
33235 tls_data_section
33236 = get_unnamed_section (SECTION_TLS,
33237 rs6000_xcoff_output_tls_section_asm_op,
33238 &xcoff_tls_data_section_name);
33240 tls_private_data_section
33241 = get_unnamed_section (SECTION_TLS,
33242 rs6000_xcoff_output_tls_section_asm_op,
33243 &xcoff_private_data_section_name);
33245 read_only_private_data_section
33246 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33247 &xcoff_private_data_section_name);
33249 toc_section
33250 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
33252 readonly_data_section = read_only_data_section;
33255 static int
33256 rs6000_xcoff_reloc_rw_mask (void)
33258 return 3;
33261 static void
33262 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
33263 tree decl ATTRIBUTE_UNUSED)
33265 int smclass;
33266 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
33268 if (flags & SECTION_EXCLUDE)
33269 smclass = 4;
33270 else if (flags & SECTION_DEBUG)
33272 fprintf (asm_out_file, "\t.dwsect %s\n", name);
33273 return;
33275 else if (flags & SECTION_CODE)
33276 smclass = 0;
33277 else if (flags & SECTION_TLS)
33278 smclass = 3;
33279 else if (flags & SECTION_WRITE)
33280 smclass = 2;
33281 else
33282 smclass = 1;
33284 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
33285 (flags & SECTION_CODE) ? "." : "",
33286 name, suffix[smclass], flags & SECTION_ENTSIZE);
33289 #define IN_NAMED_SECTION(DECL) \
33290 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
33291 && DECL_SECTION_NAME (DECL) != NULL)
33293 static section *
33294 rs6000_xcoff_select_section (tree decl, int reloc,
33295 unsigned HOST_WIDE_INT align)
33297 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
33298 named section. */
33299 if (align > BIGGEST_ALIGNMENT)
33301 resolve_unique_section (decl, reloc, true);
33302 if (IN_NAMED_SECTION (decl))
33303 return get_named_section (decl, NULL, reloc);
33306 if (decl_readonly_section (decl, reloc))
33308 if (TREE_PUBLIC (decl))
33309 return read_only_data_section;
33310 else
33311 return read_only_private_data_section;
33313 else
33315 #if HAVE_AS_TLS
33316 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33318 if (TREE_PUBLIC (decl))
33319 return tls_data_section;
33320 else if (bss_initializer_p (decl))
33322 /* Convert to COMMON to emit in BSS. */
33323 DECL_COMMON (decl) = 1;
33324 return tls_comm_section;
33326 else
33327 return tls_private_data_section;
33329 else
33330 #endif
33331 if (TREE_PUBLIC (decl))
33332 return data_section;
33333 else
33334 return private_data_section;
33338 static void
33339 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
33341 const char *name;
33343 /* Use select_section for private data and uninitialized data with
33344 alignment <= BIGGEST_ALIGNMENT. */
33345 if (!TREE_PUBLIC (decl)
33346 || DECL_COMMON (decl)
33347 || (DECL_INITIAL (decl) == NULL_TREE
33348 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
33349 || DECL_INITIAL (decl) == error_mark_node
33350 || (flag_zero_initialized_in_bss
33351 && initializer_zerop (DECL_INITIAL (decl))))
33352 return;
33354 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
33355 name = (*targetm.strip_name_encoding) (name);
33356 set_decl_section_name (decl, name);
33359 /* Select section for constant in constant pool.
33361 On RS/6000, all constants are in the private read-only data area.
33362 However, if this is being placed in the TOC it must be output as a
33363 toc entry. */
33365 static section *
33366 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
33367 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
33369 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33370 return toc_section;
33371 else
33372 return read_only_private_data_section;
33375 /* Remove any trailing [DS] or the like from the symbol name. */
33377 static const char *
33378 rs6000_xcoff_strip_name_encoding (const char *name)
33380 size_t len;
33381 if (*name == '*')
33382 name++;
33383 len = strlen (name);
33384 if (name[len - 1] == ']')
33385 return ggc_alloc_string (name, len - 4);
33386 else
33387 return name;
33390 /* Section attributes. AIX is always PIC. */
33392 static unsigned int
33393 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
33395 unsigned int align;
33396 unsigned int flags = default_section_type_flags (decl, name, reloc);
33398 /* Align to at least UNIT size. */
33399 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
33400 align = MIN_UNITS_PER_WORD;
33401 else
33402 /* Increase alignment of large objects if not already stricter. */
33403 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
33404 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
33405 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
33407 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
33410 /* Output at beginning of assembler file.
33412 Initialize the section names for the RS/6000 at this point.
33414 Specify filename, including full path, to assembler.
33416 We want to go into the TOC section so at least one .toc will be emitted.
33417 Also, in order to output proper .bs/.es pairs, we need at least one static
33418 [RW] section emitted.
33420 Finally, declare mcount when profiling to make the assembler happy. */
33422 static void
33423 rs6000_xcoff_file_start (void)
33425 rs6000_gen_section_name (&xcoff_bss_section_name,
33426 main_input_filename, ".bss_");
33427 rs6000_gen_section_name (&xcoff_private_data_section_name,
33428 main_input_filename, ".rw_");
33429 rs6000_gen_section_name (&xcoff_read_only_section_name,
33430 main_input_filename, ".ro_");
33431 rs6000_gen_section_name (&xcoff_tls_data_section_name,
33432 main_input_filename, ".tls_");
33433 rs6000_gen_section_name (&xcoff_tbss_section_name,
33434 main_input_filename, ".tbss_[UL]");
33436 fputs ("\t.file\t", asm_out_file);
33437 output_quoted_string (asm_out_file, main_input_filename);
33438 fputc ('\n', asm_out_file);
33439 if (write_symbols != NO_DEBUG)
33440 switch_to_section (private_data_section);
33441 switch_to_section (toc_section);
33442 switch_to_section (text_section);
33443 if (profile_flag)
33444 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
33445 rs6000_file_start ();
33448 /* Output at end of assembler file.
33449 On the RS/6000, referencing data should automatically pull in text. */
33451 static void
33452 rs6000_xcoff_file_end (void)
33454 switch_to_section (text_section);
33455 fputs ("_section_.text:\n", asm_out_file);
33456 switch_to_section (data_section);
33457 fputs (TARGET_32BIT
33458 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
33459 asm_out_file);
33462 struct declare_alias_data
33464 FILE *file;
33465 bool function_descriptor;
33468 /* Declare alias N. A helper function for for_node_and_aliases. */
33470 static bool
33471 rs6000_declare_alias (struct symtab_node *n, void *d)
33473 struct declare_alias_data *data = (struct declare_alias_data *)d;
33474 /* Main symbol is output specially, because varasm machinery does part of
33475 the job for us - we do not need to declare .globl/lglobs and such. */
33476 if (!n->alias || n->weakref)
33477 return false;
33479 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
33480 return false;
33482 /* Prevent assemble_alias from trying to use .set pseudo operation
33483 that does not behave as expected by the middle-end. */
33484 TREE_ASM_WRITTEN (n->decl) = true;
33486 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
33487 char *buffer = (char *) alloca (strlen (name) + 2);
33488 char *p;
33489 int dollar_inside = 0;
33491 strcpy (buffer, name);
33492 p = strchr (buffer, '$');
33493 while (p) {
33494 *p = '_';
33495 dollar_inside++;
33496 p = strchr (p + 1, '$');
33498 if (TREE_PUBLIC (n->decl))
33500 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
33502 if (dollar_inside) {
33503 if (data->function_descriptor)
33504 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33505 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33507 if (data->function_descriptor)
33509 fputs ("\t.globl .", data->file);
33510 RS6000_OUTPUT_BASENAME (data->file, buffer);
33511 putc ('\n', data->file);
33513 fputs ("\t.globl ", data->file);
33514 RS6000_OUTPUT_BASENAME (data->file, buffer);
33515 putc ('\n', data->file);
33517 #ifdef ASM_WEAKEN_DECL
33518 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
33519 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
33520 #endif
33522 else
33524 if (dollar_inside)
33526 if (data->function_descriptor)
33527 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
33528 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
33530 if (data->function_descriptor)
33532 fputs ("\t.lglobl .", data->file);
33533 RS6000_OUTPUT_BASENAME (data->file, buffer);
33534 putc ('\n', data->file);
33536 fputs ("\t.lglobl ", data->file);
33537 RS6000_OUTPUT_BASENAME (data->file, buffer);
33538 putc ('\n', data->file);
33540 if (data->function_descriptor)
33541 fputs (".", data->file);
33542 RS6000_OUTPUT_BASENAME (data->file, buffer);
33543 fputs (":\n", data->file);
33544 return false;
33548 #ifdef HAVE_GAS_HIDDEN
33549 /* Helper function to calculate visibility of a DECL
33550 and return the value as a const string. */
33552 static const char *
33553 rs6000_xcoff_visibility (tree decl)
33555 static const char * const visibility_types[] = {
33556 "", ",protected", ",hidden", ",internal"
33559 enum symbol_visibility vis = DECL_VISIBILITY (decl);
33560 return visibility_types[vis];
33562 #endif
33565 /* This macro produces the initial definition of a function name.
33566 On the RS/6000, we need to place an extra '.' in the function name and
33567 output the function descriptor.
33568 Dollar signs are converted to underscores.
33570 The csect for the function will have already been created when
33571 text_section was selected. We do have to go back to that csect, however.
33573 The third and fourth parameters to the .function pseudo-op (16 and 044)
33574 are placeholders which no longer have any use.
33576 Because AIX assembler's .set command has unexpected semantics, we output
33577 all aliases as alternative labels in front of the definition. */
33579 void
33580 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
33582 char *buffer = (char *) alloca (strlen (name) + 1);
33583 char *p;
33584 int dollar_inside = 0;
33585 struct declare_alias_data data = {file, false};
33587 strcpy (buffer, name);
33588 p = strchr (buffer, '$');
33589 while (p) {
33590 *p = '_';
33591 dollar_inside++;
33592 p = strchr (p + 1, '$');
33594 if (TREE_PUBLIC (decl))
33596 if (!RS6000_WEAK || !DECL_WEAK (decl))
33598 if (dollar_inside) {
33599 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33600 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33602 fputs ("\t.globl .", file);
33603 RS6000_OUTPUT_BASENAME (file, buffer);
33604 #ifdef HAVE_GAS_HIDDEN
33605 fputs (rs6000_xcoff_visibility (decl), file);
33606 #endif
33607 putc ('\n', file);
33610 else
33612 if (dollar_inside) {
33613 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
33614 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
33616 fputs ("\t.lglobl .", file);
33617 RS6000_OUTPUT_BASENAME (file, buffer);
33618 putc ('\n', file);
33620 fputs ("\t.csect ", file);
33621 RS6000_OUTPUT_BASENAME (file, buffer);
33622 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
33623 RS6000_OUTPUT_BASENAME (file, buffer);
33624 fputs (":\n", file);
33625 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33626 &data, true);
33627 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
33628 RS6000_OUTPUT_BASENAME (file, buffer);
33629 fputs (", TOC[tc0], 0\n", file);
33630 in_section = NULL;
33631 switch_to_section (function_section (decl));
33632 putc ('.', file);
33633 RS6000_OUTPUT_BASENAME (file, buffer);
33634 fputs (":\n", file);
33635 data.function_descriptor = true;
33636 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33637 &data, true);
33638 if (!DECL_IGNORED_P (decl))
33640 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33641 xcoffout_declare_function (file, decl, buffer);
33642 else if (write_symbols == DWARF2_DEBUG)
33644 name = (*targetm.strip_name_encoding) (name);
33645 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
33648 return;
33652 /* Output assembly language to globalize a symbol from a DECL,
33653 possibly with visibility. */
33655 void
33656 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
33658 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
33659 fputs (GLOBAL_ASM_OP, stream);
33660 RS6000_OUTPUT_BASENAME (stream, name);
33661 #ifdef HAVE_GAS_HIDDEN
33662 fputs (rs6000_xcoff_visibility (decl), stream);
33663 #endif
33664 putc ('\n', stream);
33667 /* Output assembly language to define a symbol as COMMON from a DECL,
33668 possibly with visibility. */
33670 void
33671 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
33672 tree decl ATTRIBUTE_UNUSED,
33673 const char *name,
33674 unsigned HOST_WIDE_INT size,
33675 unsigned HOST_WIDE_INT align)
33677 unsigned HOST_WIDE_INT align2 = 2;
33679 if (align > 32)
33680 align2 = floor_log2 (align / BITS_PER_UNIT);
33681 else if (size > 4)
33682 align2 = 3;
33684 fputs (COMMON_ASM_OP, stream);
33685 RS6000_OUTPUT_BASENAME (stream, name);
33687 fprintf (stream,
33688 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
33689 size, align2);
33691 #ifdef HAVE_GAS_HIDDEN
33692 if (decl != NULL)
33693 fputs (rs6000_xcoff_visibility (decl), stream);
33694 #endif
33695 putc ('\n', stream);
33698 /* This macro produces the initial definition of a object (variable) name.
33699 Because AIX assembler's .set command has unexpected semantics, we output
33700 all aliases as alternative labels in front of the definition. */
33702 void
33703 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
33705 struct declare_alias_data data = {file, false};
33706 RS6000_OUTPUT_BASENAME (file, name);
33707 fputs (":\n", file);
33708 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
33709 &data, true);
33712 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
33714 void
33715 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
33717 fputs (integer_asm_op (size, FALSE), file);
33718 assemble_name (file, label);
33719 fputs ("-$", file);
33722 /* Output a symbol offset relative to the dbase for the current object.
33723 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
33724 signed offsets.
33726 __gcc_unwind_dbase is embedded in all executables/libraries through
33727 libgcc/config/rs6000/crtdbase.S. */
33729 void
33730 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
33732 fputs (integer_asm_op (size, FALSE), file);
33733 assemble_name (file, label);
33734 fputs("-__gcc_unwind_dbase", file);
33737 #ifdef HAVE_AS_TLS
33738 static void
33739 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
33741 rtx symbol;
33742 int flags;
33743 const char *symname;
33745 default_encode_section_info (decl, rtl, first);
33747 /* Careful not to prod global register variables. */
33748 if (!MEM_P (rtl))
33749 return;
33750 symbol = XEXP (rtl, 0);
33751 if (GET_CODE (symbol) != SYMBOL_REF)
33752 return;
33754 flags = SYMBOL_REF_FLAGS (symbol);
33756 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
33757 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
33759 SYMBOL_REF_FLAGS (symbol) = flags;
33761 /* Append mapping class to extern decls. */
33762 symname = XSTR (symbol, 0);
33763 if (decl /* sync condition with assemble_external () */
33764 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
33765 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
33766 || TREE_CODE (decl) == FUNCTION_DECL)
33767 && symname[strlen (symname) - 1] != ']')
33769 char *newname = (char *) alloca (strlen (symname) + 5);
33770 strcpy (newname, symname);
33771 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
33772 ? "[DS]" : "[UA]"));
33773 XSTR (symbol, 0) = ggc_strdup (newname);
33776 #endif /* HAVE_AS_TLS */
33777 #endif /* TARGET_XCOFF */
33779 void
33780 rs6000_asm_weaken_decl (FILE *stream, tree decl,
33781 const char *name, const char *val)
33783 fputs ("\t.weak\t", stream);
33784 RS6000_OUTPUT_BASENAME (stream, name);
33785 if (decl && TREE_CODE (decl) == FUNCTION_DECL
33786 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
33788 if (TARGET_XCOFF)
33789 fputs ("[DS]", stream);
33790 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
33791 if (TARGET_XCOFF)
33792 fputs (rs6000_xcoff_visibility (decl), stream);
33793 #endif
33794 fputs ("\n\t.weak\t.", stream);
33795 RS6000_OUTPUT_BASENAME (stream, name);
33797 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
33798 if (TARGET_XCOFF)
33799 fputs (rs6000_xcoff_visibility (decl), stream);
33800 #endif
33801 fputc ('\n', stream);
33802 if (val)
33804 #ifdef ASM_OUTPUT_DEF
33805 ASM_OUTPUT_DEF (stream, name, val);
33806 #endif
33807 if (decl && TREE_CODE (decl) == FUNCTION_DECL
33808 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
33810 fputs ("\t.set\t.", stream);
33811 RS6000_OUTPUT_BASENAME (stream, name);
33812 fputs (",.", stream);
33813 RS6000_OUTPUT_BASENAME (stream, val);
33814 fputc ('\n', stream);
33820 /* Return true if INSN should not be copied. */
33822 static bool
33823 rs6000_cannot_copy_insn_p (rtx_insn *insn)
33825 return recog_memoized (insn) >= 0
33826 && get_attr_cannot_copy (insn);
33829 /* Compute a (partial) cost for rtx X. Return true if the complete
33830 cost has been computed, and false if subexpressions should be
33831 scanned. In either case, *TOTAL contains the cost result. */
33833 static bool
33834 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
33835 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
33837 int code = GET_CODE (x);
33839 switch (code)
33841 /* On the RS/6000, if it is valid in the insn, it is free. */
33842 case CONST_INT:
33843 if (((outer_code == SET
33844 || outer_code == PLUS
33845 || outer_code == MINUS)
33846 && (satisfies_constraint_I (x)
33847 || satisfies_constraint_L (x)))
33848 || (outer_code == AND
33849 && (satisfies_constraint_K (x)
33850 || (mode == SImode
33851 ? satisfies_constraint_L (x)
33852 : satisfies_constraint_J (x))))
33853 || ((outer_code == IOR || outer_code == XOR)
33854 && (satisfies_constraint_K (x)
33855 || (mode == SImode
33856 ? satisfies_constraint_L (x)
33857 : satisfies_constraint_J (x))))
33858 || outer_code == ASHIFT
33859 || outer_code == ASHIFTRT
33860 || outer_code == LSHIFTRT
33861 || outer_code == ROTATE
33862 || outer_code == ROTATERT
33863 || outer_code == ZERO_EXTRACT
33864 || (outer_code == MULT
33865 && satisfies_constraint_I (x))
33866 || ((outer_code == DIV || outer_code == UDIV
33867 || outer_code == MOD || outer_code == UMOD)
33868 && exact_log2 (INTVAL (x)) >= 0)
33869 || (outer_code == COMPARE
33870 && (satisfies_constraint_I (x)
33871 || satisfies_constraint_K (x)))
33872 || ((outer_code == EQ || outer_code == NE)
33873 && (satisfies_constraint_I (x)
33874 || satisfies_constraint_K (x)
33875 || (mode == SImode
33876 ? satisfies_constraint_L (x)
33877 : satisfies_constraint_J (x))))
33878 || (outer_code == GTU
33879 && satisfies_constraint_I (x))
33880 || (outer_code == LTU
33881 && satisfies_constraint_P (x)))
33883 *total = 0;
33884 return true;
33886 else if ((outer_code == PLUS
33887 && reg_or_add_cint_operand (x, VOIDmode))
33888 || (outer_code == MINUS
33889 && reg_or_sub_cint_operand (x, VOIDmode))
33890 || ((outer_code == SET
33891 || outer_code == IOR
33892 || outer_code == XOR)
33893 && (INTVAL (x)
33894 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
33896 *total = COSTS_N_INSNS (1);
33897 return true;
33899 /* FALLTHRU */
33901 case CONST_DOUBLE:
33902 case CONST_WIDE_INT:
33903 case CONST:
33904 case HIGH:
33905 case SYMBOL_REF:
33906 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
33907 return true;
33909 case MEM:
33910 /* When optimizing for size, MEM should be slightly more expensive
33911 than generating address, e.g., (plus (reg) (const)).
33912 L1 cache latency is about two instructions. */
33913 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
33914 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
33915 *total += COSTS_N_INSNS (100);
33916 return true;
33918 case LABEL_REF:
33919 *total = 0;
33920 return true;
33922 case PLUS:
33923 case MINUS:
33924 if (FLOAT_MODE_P (mode))
33925 *total = rs6000_cost->fp;
33926 else
33927 *total = COSTS_N_INSNS (1);
33928 return false;
33930 case MULT:
33931 if (GET_CODE (XEXP (x, 1)) == CONST_INT
33932 && satisfies_constraint_I (XEXP (x, 1)))
33934 if (INTVAL (XEXP (x, 1)) >= -256
33935 && INTVAL (XEXP (x, 1)) <= 255)
33936 *total = rs6000_cost->mulsi_const9;
33937 else
33938 *total = rs6000_cost->mulsi_const;
33940 else if (mode == SFmode)
33941 *total = rs6000_cost->fp;
33942 else if (FLOAT_MODE_P (mode))
33943 *total = rs6000_cost->dmul;
33944 else if (mode == DImode)
33945 *total = rs6000_cost->muldi;
33946 else
33947 *total = rs6000_cost->mulsi;
33948 return false;
33950 case FMA:
33951 if (mode == SFmode)
33952 *total = rs6000_cost->fp;
33953 else
33954 *total = rs6000_cost->dmul;
33955 break;
33957 case DIV:
33958 case MOD:
33959 if (FLOAT_MODE_P (mode))
33961 *total = mode == DFmode ? rs6000_cost->ddiv
33962 : rs6000_cost->sdiv;
33963 return false;
33965 /* FALLTHRU */
33967 case UDIV:
33968 case UMOD:
33969 if (GET_CODE (XEXP (x, 1)) == CONST_INT
33970 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
33972 if (code == DIV || code == MOD)
33973 /* Shift, addze */
33974 *total = COSTS_N_INSNS (2);
33975 else
33976 /* Shift */
33977 *total = COSTS_N_INSNS (1);
33979 else
33981 if (GET_MODE (XEXP (x, 1)) == DImode)
33982 *total = rs6000_cost->divdi;
33983 else
33984 *total = rs6000_cost->divsi;
33986 /* Add in shift and subtract for MOD unless we have a mod instruction. */
33987 if (!TARGET_MODULO && (code == MOD || code == UMOD))
33988 *total += COSTS_N_INSNS (2);
33989 return false;
33991 case CTZ:
33992 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
33993 return false;
33995 case FFS:
33996 *total = COSTS_N_INSNS (4);
33997 return false;
33999 case POPCOUNT:
34000 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
34001 return false;
34003 case PARITY:
34004 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
34005 return false;
34007 case NOT:
34008 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
34009 *total = 0;
34010 else
34011 *total = COSTS_N_INSNS (1);
34012 return false;
34014 case AND:
34015 if (CONST_INT_P (XEXP (x, 1)))
34017 rtx left = XEXP (x, 0);
34018 rtx_code left_code = GET_CODE (left);
34020 /* rotate-and-mask: 1 insn. */
34021 if ((left_code == ROTATE
34022 || left_code == ASHIFT
34023 || left_code == LSHIFTRT)
34024 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
34026 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
34027 if (!CONST_INT_P (XEXP (left, 1)))
34028 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
34029 *total += COSTS_N_INSNS (1);
34030 return true;
34033 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
34034 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
34035 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
34036 || (val & 0xffff) == val
34037 || (val & 0xffff0000) == val
34038 || ((val & 0xffff) == 0 && mode == SImode))
34040 *total = rtx_cost (left, mode, AND, 0, speed);
34041 *total += COSTS_N_INSNS (1);
34042 return true;
34045 /* 2 insns. */
34046 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
34048 *total = rtx_cost (left, mode, AND, 0, speed);
34049 *total += COSTS_N_INSNS (2);
34050 return true;
34054 *total = COSTS_N_INSNS (1);
34055 return false;
34057 case IOR:
34058 /* FIXME */
34059 *total = COSTS_N_INSNS (1);
34060 return true;
34062 case CLZ:
34063 case XOR:
34064 case ZERO_EXTRACT:
34065 *total = COSTS_N_INSNS (1);
34066 return false;
34068 case ASHIFT:
34069 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34070 the sign extend and shift separately within the insn. */
34071 if (TARGET_EXTSWSLI && mode == DImode
34072 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
34073 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
34075 *total = 0;
34076 return false;
34078 /* fall through */
34080 case ASHIFTRT:
34081 case LSHIFTRT:
34082 case ROTATE:
34083 case ROTATERT:
34084 /* Handle mul_highpart. */
34085 if (outer_code == TRUNCATE
34086 && GET_CODE (XEXP (x, 0)) == MULT)
34088 if (mode == DImode)
34089 *total = rs6000_cost->muldi;
34090 else
34091 *total = rs6000_cost->mulsi;
34092 return true;
34094 else if (outer_code == AND)
34095 *total = 0;
34096 else
34097 *total = COSTS_N_INSNS (1);
34098 return false;
34100 case SIGN_EXTEND:
34101 case ZERO_EXTEND:
34102 if (GET_CODE (XEXP (x, 0)) == MEM)
34103 *total = 0;
34104 else
34105 *total = COSTS_N_INSNS (1);
34106 return false;
34108 case COMPARE:
34109 case NEG:
34110 case ABS:
34111 if (!FLOAT_MODE_P (mode))
34113 *total = COSTS_N_INSNS (1);
34114 return false;
34116 /* FALLTHRU */
34118 case FLOAT:
34119 case UNSIGNED_FLOAT:
34120 case FIX:
34121 case UNSIGNED_FIX:
34122 case FLOAT_TRUNCATE:
34123 *total = rs6000_cost->fp;
34124 return false;
34126 case FLOAT_EXTEND:
34127 if (mode == DFmode)
34128 *total = rs6000_cost->sfdf_convert;
34129 else
34130 *total = rs6000_cost->fp;
34131 return false;
34133 case UNSPEC:
34134 switch (XINT (x, 1))
34136 case UNSPEC_FRSP:
34137 *total = rs6000_cost->fp;
34138 return true;
34140 default:
34141 break;
34143 break;
34145 case CALL:
34146 case IF_THEN_ELSE:
34147 if (!speed)
34149 *total = COSTS_N_INSNS (1);
34150 return true;
34152 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
34154 *total = rs6000_cost->fp;
34155 return false;
34157 break;
34159 case NE:
34160 case EQ:
34161 case GTU:
34162 case LTU:
34163 /* Carry bit requires mode == Pmode.
34164 NEG or PLUS already counted so only add one. */
34165 if (mode == Pmode
34166 && (outer_code == NEG || outer_code == PLUS))
34168 *total = COSTS_N_INSNS (1);
34169 return true;
34171 /* FALLTHRU */
34173 case GT:
34174 case LT:
34175 case UNORDERED:
34176 if (outer_code == SET)
34178 if (XEXP (x, 1) == const0_rtx)
34180 *total = COSTS_N_INSNS (2);
34181 return true;
34183 else
34185 *total = COSTS_N_INSNS (3);
34186 return false;
34189 /* CC COMPARE. */
34190 if (outer_code == COMPARE)
34192 *total = 0;
34193 return true;
34195 break;
34197 default:
34198 break;
34201 return false;
34204 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34206 static bool
34207 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
34208 int opno, int *total, bool speed)
34210 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
34212 fprintf (stderr,
34213 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34214 "opno = %d, total = %d, speed = %s, x:\n",
34215 ret ? "complete" : "scan inner",
34216 GET_MODE_NAME (mode),
34217 GET_RTX_NAME (outer_code),
34218 opno,
34219 *total,
34220 speed ? "true" : "false");
34222 debug_rtx (x);
34224 return ret;
34227 static int
34228 rs6000_insn_cost (rtx_insn *insn, bool speed)
34230 if (recog_memoized (insn) < 0)
34231 return 0;
34233 if (!speed)
34234 return get_attr_length (insn);
34236 int cost = get_attr_cost (insn);
34237 if (cost > 0)
34238 return cost;
34240 int n = get_attr_length (insn) / 4;
34241 enum attr_type type = get_attr_type (insn);
34243 switch (type)
34245 case TYPE_LOAD:
34246 case TYPE_FPLOAD:
34247 case TYPE_VECLOAD:
34248 cost = COSTS_N_INSNS (n + 1);
34249 break;
34251 case TYPE_MUL:
34252 switch (get_attr_size (insn))
34254 case SIZE_8:
34255 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
34256 break;
34257 case SIZE_16:
34258 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
34259 break;
34260 case SIZE_32:
34261 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
34262 break;
34263 case SIZE_64:
34264 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
34265 break;
34266 default:
34267 gcc_unreachable ();
34269 break;
34270 case TYPE_DIV:
34271 switch (get_attr_size (insn))
34273 case SIZE_32:
34274 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
34275 break;
34276 case SIZE_64:
34277 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
34278 break;
34279 default:
34280 gcc_unreachable ();
34282 break;
34284 case TYPE_FP:
34285 cost = n * rs6000_cost->fp;
34286 break;
34287 case TYPE_DMUL:
34288 cost = n * rs6000_cost->dmul;
34289 break;
34290 case TYPE_SDIV:
34291 cost = n * rs6000_cost->sdiv;
34292 break;
34293 case TYPE_DDIV:
34294 cost = n * rs6000_cost->ddiv;
34295 break;
34297 case TYPE_SYNC:
34298 case TYPE_LOAD_L:
34299 case TYPE_MFCR:
34300 case TYPE_MFCRF:
34301 cost = COSTS_N_INSNS (n + 2);
34302 break;
34304 default:
34305 cost = COSTS_N_INSNS (n);
34308 return cost;
34311 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
34313 static int
34314 rs6000_debug_address_cost (rtx x, machine_mode mode,
34315 addr_space_t as, bool speed)
34317 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
34319 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
34320 ret, speed ? "true" : "false");
34321 debug_rtx (x);
34323 return ret;
34327 /* A C expression returning the cost of moving data from a register of class
34328 CLASS1 to one of CLASS2. */
34330 static int
34331 rs6000_register_move_cost (machine_mode mode,
34332 reg_class_t from, reg_class_t to)
34334 int ret;
34336 if (TARGET_DEBUG_COST)
34337 dbg_cost_ctrl++;
34339 /* Moves from/to GENERAL_REGS. */
34340 if (reg_classes_intersect_p (to, GENERAL_REGS)
34341 || reg_classes_intersect_p (from, GENERAL_REGS))
34343 reg_class_t rclass = from;
34345 if (! reg_classes_intersect_p (to, GENERAL_REGS))
34346 rclass = to;
34348 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
34349 ret = (rs6000_memory_move_cost (mode, rclass, false)
34350 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
34352 /* It's more expensive to move CR_REGS than CR0_REGS because of the
34353 shift. */
34354 else if (rclass == CR_REGS)
34355 ret = 4;
34357 /* For those processors that have slow LR/CTR moves, make them more
34358 expensive than memory in order to bias spills to memory .*/
34359 else if ((rs6000_tune == PROCESSOR_POWER6
34360 || rs6000_tune == PROCESSOR_POWER7
34361 || rs6000_tune == PROCESSOR_POWER8
34362 || rs6000_tune == PROCESSOR_POWER9)
34363 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
34364 ret = 6 * hard_regno_nregs (0, mode);
34366 else
34367 /* A move will cost one instruction per GPR moved. */
34368 ret = 2 * hard_regno_nregs (0, mode);
34371 /* If we have VSX, we can easily move between FPR or Altivec registers. */
34372 else if (VECTOR_MEM_VSX_P (mode)
34373 && reg_classes_intersect_p (to, VSX_REGS)
34374 && reg_classes_intersect_p (from, VSX_REGS))
34375 ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
34377 /* Moving between two similar registers is just one instruction. */
34378 else if (reg_classes_intersect_p (to, from))
34379 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
34381 /* Everything else has to go through GENERAL_REGS. */
34382 else
34383 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
34384 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
34386 if (TARGET_DEBUG_COST)
34388 if (dbg_cost_ctrl == 1)
34389 fprintf (stderr,
34390 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
34391 ret, GET_MODE_NAME (mode), reg_class_names[from],
34392 reg_class_names[to]);
34393 dbg_cost_ctrl--;
34396 return ret;
34399 /* A C expressions returning the cost of moving data of MODE from a register to
34400 or from memory. */
34402 static int
34403 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
34404 bool in ATTRIBUTE_UNUSED)
34406 int ret;
34408 if (TARGET_DEBUG_COST)
34409 dbg_cost_ctrl++;
34411 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
34412 ret = 4 * hard_regno_nregs (0, mode);
34413 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
34414 || reg_classes_intersect_p (rclass, VSX_REGS)))
34415 ret = 4 * hard_regno_nregs (32, mode);
34416 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
34417 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
34418 else
34419 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
34421 if (TARGET_DEBUG_COST)
34423 if (dbg_cost_ctrl == 1)
34424 fprintf (stderr,
34425 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
34426 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
34427 dbg_cost_ctrl--;
34430 return ret;
34433 /* Returns a code for a target-specific builtin that implements
34434 reciprocal of the function, or NULL_TREE if not available. */
34436 static tree
34437 rs6000_builtin_reciprocal (tree fndecl)
34439 switch (DECL_FUNCTION_CODE (fndecl))
34441 case VSX_BUILTIN_XVSQRTDP:
34442 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
34443 return NULL_TREE;
34445 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
34447 case VSX_BUILTIN_XVSQRTSP:
34448 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
34449 return NULL_TREE;
34451 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
34453 default:
34454 return NULL_TREE;
34458 /* Load up a constant. If the mode is a vector mode, splat the value across
34459 all of the vector elements. */
34461 static rtx
34462 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
34464 rtx reg;
34466 if (mode == SFmode || mode == DFmode)
34468 rtx d = const_double_from_real_value (dconst, mode);
34469 reg = force_reg (mode, d);
34471 else if (mode == V4SFmode)
34473 rtx d = const_double_from_real_value (dconst, SFmode);
34474 rtvec v = gen_rtvec (4, d, d, d, d);
34475 reg = gen_reg_rtx (mode);
34476 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34478 else if (mode == V2DFmode)
34480 rtx d = const_double_from_real_value (dconst, DFmode);
34481 rtvec v = gen_rtvec (2, d, d);
34482 reg = gen_reg_rtx (mode);
34483 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
34485 else
34486 gcc_unreachable ();
34488 return reg;
34491 /* Generate an FMA instruction. */
34493 static void
34494 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
34496 machine_mode mode = GET_MODE (target);
34497 rtx dst;
34499 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
34500 gcc_assert (dst != NULL);
34502 if (dst != target)
34503 emit_move_insn (target, dst);
34506 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
34508 static void
34509 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
34511 machine_mode mode = GET_MODE (dst);
34512 rtx r;
34514 /* This is a tad more complicated, since the fnma_optab is for
34515 a different expression: fma(-m1, m2, a), which is the same
34516 thing except in the case of signed zeros.
34518 Fortunately we know that if FMA is supported that FNMSUB is
34519 also supported in the ISA. Just expand it directly. */
34521 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
34523 r = gen_rtx_NEG (mode, a);
34524 r = gen_rtx_FMA (mode, m1, m2, r);
34525 r = gen_rtx_NEG (mode, r);
34526 emit_insn (gen_rtx_SET (dst, r));
34529 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
34530 add a reg_note saying that this was a division. Support both scalar and
34531 vector divide. Assumes no trapping math and finite arguments. */
34533 void
34534 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
34536 machine_mode mode = GET_MODE (dst);
34537 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
34538 int i;
34540 /* Low precision estimates guarantee 5 bits of accuracy. High
34541 precision estimates guarantee 14 bits of accuracy. SFmode
34542 requires 23 bits of accuracy. DFmode requires 52 bits of
34543 accuracy. Each pass at least doubles the accuracy, leading
34544 to the following. */
34545 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34546 if (mode == DFmode || mode == V2DFmode)
34547 passes++;
34549 enum insn_code code = optab_handler (smul_optab, mode);
34550 insn_gen_fn gen_mul = GEN_FCN (code);
34552 gcc_assert (code != CODE_FOR_nothing);
34554 one = rs6000_load_constant_and_splat (mode, dconst1);
34556 /* x0 = 1./d estimate */
34557 x0 = gen_reg_rtx (mode);
34558 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
34559 UNSPEC_FRES)));
34561 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
34562 if (passes > 1) {
34564 /* e0 = 1. - d * x0 */
34565 e0 = gen_reg_rtx (mode);
34566 rs6000_emit_nmsub (e0, d, x0, one);
34568 /* x1 = x0 + e0 * x0 */
34569 x1 = gen_reg_rtx (mode);
34570 rs6000_emit_madd (x1, e0, x0, x0);
34572 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
34573 ++i, xprev = xnext, eprev = enext) {
34575 /* enext = eprev * eprev */
34576 enext = gen_reg_rtx (mode);
34577 emit_insn (gen_mul (enext, eprev, eprev));
34579 /* xnext = xprev + enext * xprev */
34580 xnext = gen_reg_rtx (mode);
34581 rs6000_emit_madd (xnext, enext, xprev, xprev);
34584 } else
34585 xprev = x0;
34587 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
34589 /* u = n * xprev */
34590 u = gen_reg_rtx (mode);
34591 emit_insn (gen_mul (u, n, xprev));
34593 /* v = n - (d * u) */
34594 v = gen_reg_rtx (mode);
34595 rs6000_emit_nmsub (v, d, u, n);
34597 /* dst = (v * xprev) + u */
34598 rs6000_emit_madd (dst, v, xprev, u);
34600 if (note_p)
34601 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
34604 /* Goldschmidt's Algorithm for single/double-precision floating point
34605 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
34607 void
34608 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
34610 machine_mode mode = GET_MODE (src);
34611 rtx e = gen_reg_rtx (mode);
34612 rtx g = gen_reg_rtx (mode);
34613 rtx h = gen_reg_rtx (mode);
34615 /* Low precision estimates guarantee 5 bits of accuracy. High
34616 precision estimates guarantee 14 bits of accuracy. SFmode
34617 requires 23 bits of accuracy. DFmode requires 52 bits of
34618 accuracy. Each pass at least doubles the accuracy, leading
34619 to the following. */
34620 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
34621 if (mode == DFmode || mode == V2DFmode)
34622 passes++;
34624 int i;
34625 rtx mhalf;
34626 enum insn_code code = optab_handler (smul_optab, mode);
34627 insn_gen_fn gen_mul = GEN_FCN (code);
34629 gcc_assert (code != CODE_FOR_nothing);
34631 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
34633 /* e = rsqrt estimate */
34634 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
34635 UNSPEC_RSQRT)));
34637 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
34638 if (!recip)
34640 rtx zero = force_reg (mode, CONST0_RTX (mode));
34642 if (mode == SFmode)
34644 rtx target = emit_conditional_move (e, GT, src, zero, mode,
34645 e, zero, mode, 0);
34646 if (target != e)
34647 emit_move_insn (e, target);
34649 else
34651 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
34652 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
34656 /* g = sqrt estimate. */
34657 emit_insn (gen_mul (g, e, src));
34658 /* h = 1/(2*sqrt) estimate. */
34659 emit_insn (gen_mul (h, e, mhalf));
34661 if (recip)
34663 if (passes == 1)
34665 rtx t = gen_reg_rtx (mode);
34666 rs6000_emit_nmsub (t, g, h, mhalf);
34667 /* Apply correction directly to 1/rsqrt estimate. */
34668 rs6000_emit_madd (dst, e, t, e);
34670 else
34672 for (i = 0; i < passes; i++)
34674 rtx t1 = gen_reg_rtx (mode);
34675 rtx g1 = gen_reg_rtx (mode);
34676 rtx h1 = gen_reg_rtx (mode);
34678 rs6000_emit_nmsub (t1, g, h, mhalf);
34679 rs6000_emit_madd (g1, g, t1, g);
34680 rs6000_emit_madd (h1, h, t1, h);
34682 g = g1;
34683 h = h1;
34685 /* Multiply by 2 for 1/rsqrt. */
34686 emit_insn (gen_add3_insn (dst, h, h));
34689 else
34691 rtx t = gen_reg_rtx (mode);
34692 rs6000_emit_nmsub (t, g, h, mhalf);
34693 rs6000_emit_madd (dst, g, t, g);
34696 return;
34699 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
34700 (Power7) targets. DST is the target, and SRC is the argument operand. */
34702 void
34703 rs6000_emit_popcount (rtx dst, rtx src)
34705 machine_mode mode = GET_MODE (dst);
34706 rtx tmp1, tmp2;
34708 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
34709 if (TARGET_POPCNTD)
34711 if (mode == SImode)
34712 emit_insn (gen_popcntdsi2 (dst, src));
34713 else
34714 emit_insn (gen_popcntddi2 (dst, src));
34715 return;
34718 tmp1 = gen_reg_rtx (mode);
34720 if (mode == SImode)
34722 emit_insn (gen_popcntbsi2 (tmp1, src));
34723 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
34724 NULL_RTX, 0);
34725 tmp2 = force_reg (SImode, tmp2);
34726 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
34728 else
34730 emit_insn (gen_popcntbdi2 (tmp1, src));
34731 tmp2 = expand_mult (DImode, tmp1,
34732 GEN_INT ((HOST_WIDE_INT)
34733 0x01010101 << 32 | 0x01010101),
34734 NULL_RTX, 0);
34735 tmp2 = force_reg (DImode, tmp2);
34736 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
34741 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
34742 target, and SRC is the argument operand. */
34744 void
34745 rs6000_emit_parity (rtx dst, rtx src)
34747 machine_mode mode = GET_MODE (dst);
34748 rtx tmp;
34750 tmp = gen_reg_rtx (mode);
34752 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
34753 if (TARGET_CMPB)
34755 if (mode == SImode)
34757 emit_insn (gen_popcntbsi2 (tmp, src));
34758 emit_insn (gen_paritysi2_cmpb (dst, tmp));
34760 else
34762 emit_insn (gen_popcntbdi2 (tmp, src));
34763 emit_insn (gen_paritydi2_cmpb (dst, tmp));
34765 return;
34768 if (mode == SImode)
34770 /* Is mult+shift >= shift+xor+shift+xor? */
34771 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
34773 rtx tmp1, tmp2, tmp3, tmp4;
34775 tmp1 = gen_reg_rtx (SImode);
34776 emit_insn (gen_popcntbsi2 (tmp1, src));
34778 tmp2 = gen_reg_rtx (SImode);
34779 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
34780 tmp3 = gen_reg_rtx (SImode);
34781 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
34783 tmp4 = gen_reg_rtx (SImode);
34784 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
34785 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
34787 else
34788 rs6000_emit_popcount (tmp, src);
34789 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
34791 else
34793 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
34794 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
34796 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
34798 tmp1 = gen_reg_rtx (DImode);
34799 emit_insn (gen_popcntbdi2 (tmp1, src));
34801 tmp2 = gen_reg_rtx (DImode);
34802 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
34803 tmp3 = gen_reg_rtx (DImode);
34804 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
34806 tmp4 = gen_reg_rtx (DImode);
34807 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
34808 tmp5 = gen_reg_rtx (DImode);
34809 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
34811 tmp6 = gen_reg_rtx (DImode);
34812 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
34813 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
34815 else
34816 rs6000_emit_popcount (tmp, src);
34817 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
34821 /* Expand an Altivec constant permutation for little endian mode.
34822 OP0 and OP1 are the input vectors and TARGET is the output vector.
34823 SEL specifies the constant permutation vector.
34825 There are two issues: First, the two input operands must be
34826 swapped so that together they form a double-wide array in LE
34827 order. Second, the vperm instruction has surprising behavior
34828 in LE mode: it interprets the elements of the source vectors
34829 in BE mode ("left to right") and interprets the elements of
34830 the destination vector in LE mode ("right to left"). To
34831 correct for this, we must subtract each element of the permute
34832 control vector from 31.
34834 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
34835 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
34836 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
34837 serve as the permute control vector. Then, in BE mode,
34839 vperm 9,10,11,12
34841 places the desired result in vr9. However, in LE mode the
34842 vector contents will be
34844 vr10 = 00000003 00000002 00000001 00000000
34845 vr11 = 00000007 00000006 00000005 00000004
34847 The result of the vperm using the same permute control vector is
34849 vr9 = 05000000 07000000 01000000 03000000
34851 That is, the leftmost 4 bytes of vr10 are interpreted as the
34852 source for the rightmost 4 bytes of vr9, and so on.
34854 If we change the permute control vector to
34856 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
34858 and issue
34860 vperm 9,11,10,12
34862 we get the desired
34864 vr9 = 00000006 00000004 00000002 00000000. */
34866 static void
34867 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
34868 const vec_perm_indices &sel)
34870 unsigned int i;
34871 rtx perm[16];
34872 rtx constv, unspec;
34874 /* Unpack and adjust the constant selector. */
34875 for (i = 0; i < 16; ++i)
34877 unsigned int elt = 31 - (sel[i] & 31);
34878 perm[i] = GEN_INT (elt);
34881 /* Expand to a permute, swapping the inputs and using the
34882 adjusted selector. */
34883 if (!REG_P (op0))
34884 op0 = force_reg (V16QImode, op0);
34885 if (!REG_P (op1))
34886 op1 = force_reg (V16QImode, op1);
34888 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
34889 constv = force_reg (V16QImode, constv);
34890 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
34891 UNSPEC_VPERM);
34892 if (!REG_P (target))
34894 rtx tmp = gen_reg_rtx (V16QImode);
34895 emit_move_insn (tmp, unspec);
34896 unspec = tmp;
34899 emit_move_insn (target, unspec);
34902 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
34903 permute control vector. But here it's not a constant, so we must
34904 generate a vector NAND or NOR to do the adjustment. */
34906 void
34907 altivec_expand_vec_perm_le (rtx operands[4])
34909 rtx notx, iorx, unspec;
34910 rtx target = operands[0];
34911 rtx op0 = operands[1];
34912 rtx op1 = operands[2];
34913 rtx sel = operands[3];
34914 rtx tmp = target;
34915 rtx norreg = gen_reg_rtx (V16QImode);
34916 machine_mode mode = GET_MODE (target);
34918 /* Get everything in regs so the pattern matches. */
34919 if (!REG_P (op0))
34920 op0 = force_reg (mode, op0);
34921 if (!REG_P (op1))
34922 op1 = force_reg (mode, op1);
34923 if (!REG_P (sel))
34924 sel = force_reg (V16QImode, sel);
34925 if (!REG_P (target))
34926 tmp = gen_reg_rtx (mode);
34928 if (TARGET_P9_VECTOR)
34930 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
34931 UNSPEC_VPERMR);
34933 else
34935 /* Invert the selector with a VNAND if available, else a VNOR.
34936 The VNAND is preferred for future fusion opportunities. */
34937 notx = gen_rtx_NOT (V16QImode, sel);
34938 iorx = (TARGET_P8_VECTOR
34939 ? gen_rtx_IOR (V16QImode, notx, notx)
34940 : gen_rtx_AND (V16QImode, notx, notx));
34941 emit_insn (gen_rtx_SET (norreg, iorx));
34943 /* Permute with operands reversed and adjusted selector. */
34944 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
34945 UNSPEC_VPERM);
34948 /* Copy into target, possibly by way of a register. */
34949 if (!REG_P (target))
34951 emit_move_insn (tmp, unspec);
34952 unspec = tmp;
34955 emit_move_insn (target, unspec);
34958 /* Expand an Altivec constant permutation. Return true if we match
34959 an efficient implementation; false to fall back to VPERM.
34961 OP0 and OP1 are the input vectors and TARGET is the output vector.
34962 SEL specifies the constant permutation vector. */
34964 static bool
34965 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
34966 const vec_perm_indices &sel)
34968 struct altivec_perm_insn {
34969 HOST_WIDE_INT mask;
34970 enum insn_code impl;
34971 unsigned char perm[16];
34973 static const struct altivec_perm_insn patterns[] = {
34974 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
34975 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
34976 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
34977 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
34978 { OPTION_MASK_ALTIVEC,
34979 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
34980 : CODE_FOR_altivec_vmrglb_direct),
34981 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
34982 { OPTION_MASK_ALTIVEC,
34983 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
34984 : CODE_FOR_altivec_vmrglh_direct),
34985 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
34986 { OPTION_MASK_ALTIVEC,
34987 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
34988 : CODE_FOR_altivec_vmrglw_direct),
34989 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
34990 { OPTION_MASK_ALTIVEC,
34991 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
34992 : CODE_FOR_altivec_vmrghb_direct),
34993 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
34994 { OPTION_MASK_ALTIVEC,
34995 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
34996 : CODE_FOR_altivec_vmrghh_direct),
34997 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
34998 { OPTION_MASK_ALTIVEC,
34999 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
35000 : CODE_FOR_altivec_vmrghw_direct),
35001 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
35002 { OPTION_MASK_P8_VECTOR,
35003 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
35004 : CODE_FOR_p8_vmrgow_v4sf_direct),
35005 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
35006 { OPTION_MASK_P8_VECTOR,
35007 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
35008 : CODE_FOR_p8_vmrgew_v4sf_direct),
35009 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
35012 unsigned int i, j, elt, which;
35013 unsigned char perm[16];
35014 rtx x;
35015 bool one_vec;
35017 /* Unpack the constant selector. */
35018 for (i = which = 0; i < 16; ++i)
35020 elt = sel[i] & 31;
35021 which |= (elt < 16 ? 1 : 2);
35022 perm[i] = elt;
35025 /* Simplify the constant selector based on operands. */
35026 switch (which)
35028 default:
35029 gcc_unreachable ();
35031 case 3:
35032 one_vec = false;
35033 if (!rtx_equal_p (op0, op1))
35034 break;
35035 /* FALLTHRU */
35037 case 2:
35038 for (i = 0; i < 16; ++i)
35039 perm[i] &= 15;
35040 op0 = op1;
35041 one_vec = true;
35042 break;
35044 case 1:
35045 op1 = op0;
35046 one_vec = true;
35047 break;
35050 /* Look for splat patterns. */
35051 if (one_vec)
35053 elt = perm[0];
35055 for (i = 0; i < 16; ++i)
35056 if (perm[i] != elt)
35057 break;
35058 if (i == 16)
35060 if (!BYTES_BIG_ENDIAN)
35061 elt = 15 - elt;
35062 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
35063 return true;
35066 if (elt % 2 == 0)
35068 for (i = 0; i < 16; i += 2)
35069 if (perm[i] != elt || perm[i + 1] != elt + 1)
35070 break;
35071 if (i == 16)
35073 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
35074 x = gen_reg_rtx (V8HImode);
35075 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
35076 GEN_INT (field)));
35077 emit_move_insn (target, gen_lowpart (V16QImode, x));
35078 return true;
35082 if (elt % 4 == 0)
35084 for (i = 0; i < 16; i += 4)
35085 if (perm[i] != elt
35086 || perm[i + 1] != elt + 1
35087 || perm[i + 2] != elt + 2
35088 || perm[i + 3] != elt + 3)
35089 break;
35090 if (i == 16)
35092 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
35093 x = gen_reg_rtx (V4SImode);
35094 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
35095 GEN_INT (field)));
35096 emit_move_insn (target, gen_lowpart (V16QImode, x));
35097 return true;
35102 /* Look for merge and pack patterns. */
35103 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
35105 bool swapped;
35107 if ((patterns[j].mask & rs6000_isa_flags) == 0)
35108 continue;
35110 elt = patterns[j].perm[0];
35111 if (perm[0] == elt)
35112 swapped = false;
35113 else if (perm[0] == elt + 16)
35114 swapped = true;
35115 else
35116 continue;
35117 for (i = 1; i < 16; ++i)
35119 elt = patterns[j].perm[i];
35120 if (swapped)
35121 elt = (elt >= 16 ? elt - 16 : elt + 16);
35122 else if (one_vec && elt >= 16)
35123 elt -= 16;
35124 if (perm[i] != elt)
35125 break;
35127 if (i == 16)
35129 enum insn_code icode = patterns[j].impl;
35130 machine_mode omode = insn_data[icode].operand[0].mode;
35131 machine_mode imode = insn_data[icode].operand[1].mode;
35133 /* For little-endian, don't use vpkuwum and vpkuhum if the
35134 underlying vector type is not V4SI and V8HI, respectively.
35135 For example, using vpkuwum with a V8HI picks up the even
35136 halfwords (BE numbering) when the even halfwords (LE
35137 numbering) are what we need. */
35138 if (!BYTES_BIG_ENDIAN
35139 && icode == CODE_FOR_altivec_vpkuwum_direct
35140 && ((GET_CODE (op0) == REG
35141 && GET_MODE (op0) != V4SImode)
35142 || (GET_CODE (op0) == SUBREG
35143 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
35144 continue;
35145 if (!BYTES_BIG_ENDIAN
35146 && icode == CODE_FOR_altivec_vpkuhum_direct
35147 && ((GET_CODE (op0) == REG
35148 && GET_MODE (op0) != V8HImode)
35149 || (GET_CODE (op0) == SUBREG
35150 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
35151 continue;
35153 /* For little-endian, the two input operands must be swapped
35154 (or swapped back) to ensure proper right-to-left numbering
35155 from 0 to 2N-1. */
35156 if (swapped ^ !BYTES_BIG_ENDIAN)
35157 std::swap (op0, op1);
35158 if (imode != V16QImode)
35160 op0 = gen_lowpart (imode, op0);
35161 op1 = gen_lowpart (imode, op1);
35163 if (omode == V16QImode)
35164 x = target;
35165 else
35166 x = gen_reg_rtx (omode);
35167 emit_insn (GEN_FCN (icode) (x, op0, op1));
35168 if (omode != V16QImode)
35169 emit_move_insn (target, gen_lowpart (V16QImode, x));
35170 return true;
35174 if (!BYTES_BIG_ENDIAN)
35176 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
35177 return true;
35180 return false;
35183 /* Expand a VSX Permute Doubleword constant permutation.
35184 Return true if we match an efficient implementation. */
35186 static bool
35187 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
35188 unsigned char perm0, unsigned char perm1)
35190 rtx x;
35192 /* If both selectors come from the same operand, fold to single op. */
35193 if ((perm0 & 2) == (perm1 & 2))
35195 if (perm0 & 2)
35196 op0 = op1;
35197 else
35198 op1 = op0;
35200 /* If both operands are equal, fold to simpler permutation. */
35201 if (rtx_equal_p (op0, op1))
35203 perm0 = perm0 & 1;
35204 perm1 = (perm1 & 1) + 2;
35206 /* If the first selector comes from the second operand, swap. */
35207 else if (perm0 & 2)
35209 if (perm1 & 2)
35210 return false;
35211 perm0 -= 2;
35212 perm1 += 2;
35213 std::swap (op0, op1);
35215 /* If the second selector does not come from the second operand, fail. */
35216 else if ((perm1 & 2) == 0)
35217 return false;
35219 /* Success! */
35220 if (target != NULL)
35222 machine_mode vmode, dmode;
35223 rtvec v;
35225 vmode = GET_MODE (target);
35226 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
35227 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
35228 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
35229 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
35230 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
35231 emit_insn (gen_rtx_SET (target, x));
35233 return true;
35236 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
35238 static bool
35239 rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
35240 rtx op1, const vec_perm_indices &sel)
35242 bool testing_p = !target;
35244 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35245 if (TARGET_ALTIVEC && testing_p)
35246 return true;
35248 /* Check for ps_merge* or xxpermdi insns. */
35249 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
35251 if (testing_p)
35253 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
35254 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
35256 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
35257 return true;
35260 if (TARGET_ALTIVEC)
35262 /* Force the target-independent code to lower to V16QImode. */
35263 if (vmode != V16QImode)
35264 return false;
35265 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
35266 return true;
35269 return false;
35272 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
35273 OP0 and OP1 are the input vectors and TARGET is the output vector.
35274 PERM specifies the constant permutation vector. */
35276 static void
35277 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
35278 machine_mode vmode, const vec_perm_builder &perm)
35280 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
35281 if (x != target)
35282 emit_move_insn (target, x);
35285 /* Expand an extract even operation. */
35287 void
35288 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
35290 machine_mode vmode = GET_MODE (target);
35291 unsigned i, nelt = GET_MODE_NUNITS (vmode);
35292 vec_perm_builder perm (nelt, nelt, 1);
35294 for (i = 0; i < nelt; i++)
35295 perm.quick_push (i * 2);
35297 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
35300 /* Expand a vector interleave operation. */
35302 void
35303 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
35305 machine_mode vmode = GET_MODE (target);
35306 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
35307 vec_perm_builder perm (nelt, nelt, 1);
35309 high = (highp ? 0 : nelt / 2);
35310 for (i = 0; i < nelt / 2; i++)
35312 perm.quick_push (i + high);
35313 perm.quick_push (i + nelt + high);
35316 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
35319 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
35320 void
35321 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
35323 HOST_WIDE_INT hwi_scale (scale);
35324 REAL_VALUE_TYPE r_pow;
35325 rtvec v = rtvec_alloc (2);
35326 rtx elt;
35327 rtx scale_vec = gen_reg_rtx (V2DFmode);
35328 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
35329 elt = const_double_from_real_value (r_pow, DFmode);
35330 RTVEC_ELT (v, 0) = elt;
35331 RTVEC_ELT (v, 1) = elt;
35332 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
35333 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
35336 /* Return an RTX representing where to find the function value of a
35337 function returning MODE. */
35338 static rtx
35339 rs6000_complex_function_value (machine_mode mode)
35341 unsigned int regno;
35342 rtx r1, r2;
35343 machine_mode inner = GET_MODE_INNER (mode);
35344 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
35346 if (TARGET_FLOAT128_TYPE
35347 && (mode == KCmode
35348 || (mode == TCmode && TARGET_IEEEQUAD)))
35349 regno = ALTIVEC_ARG_RETURN;
35351 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35352 regno = FP_ARG_RETURN;
35354 else
35356 regno = GP_ARG_RETURN;
35358 /* 32-bit is OK since it'll go in r3/r4. */
35359 if (TARGET_32BIT && inner_bytes >= 4)
35360 return gen_rtx_REG (mode, regno);
35363 if (inner_bytes >= 8)
35364 return gen_rtx_REG (mode, regno);
35366 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
35367 const0_rtx);
35368 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
35369 GEN_INT (inner_bytes));
35370 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
35373 /* Return an rtx describing a return value of MODE as a PARALLEL
35374 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
35375 stride REG_STRIDE. */
35377 static rtx
35378 rs6000_parallel_return (machine_mode mode,
35379 int n_elts, machine_mode elt_mode,
35380 unsigned int regno, unsigned int reg_stride)
35382 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35384 int i;
35385 for (i = 0; i < n_elts; i++)
35387 rtx r = gen_rtx_REG (elt_mode, regno);
35388 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
35389 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
35390 regno += reg_stride;
35393 return par;
35396 /* Target hook for TARGET_FUNCTION_VALUE.
35398 An integer value is in r3 and a floating-point value is in fp1,
35399 unless -msoft-float. */
35401 static rtx
35402 rs6000_function_value (const_tree valtype,
35403 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
35404 bool outgoing ATTRIBUTE_UNUSED)
35406 machine_mode mode;
35407 unsigned int regno;
35408 machine_mode elt_mode;
35409 int n_elts;
35411 /* Special handling for structs in darwin64. */
35412 if (TARGET_MACHO
35413 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
35415 CUMULATIVE_ARGS valcum;
35416 rtx valret;
35418 valcum.words = 0;
35419 valcum.fregno = FP_ARG_MIN_REG;
35420 valcum.vregno = ALTIVEC_ARG_MIN_REG;
35421 /* Do a trial code generation as if this were going to be passed as
35422 an argument; if any part goes in memory, we return NULL. */
35423 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
35424 if (valret)
35425 return valret;
35426 /* Otherwise fall through to standard ABI rules. */
35429 mode = TYPE_MODE (valtype);
35431 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
35432 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
35434 int first_reg, n_regs;
35436 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
35438 /* _Decimal128 must use even/odd register pairs. */
35439 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35440 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
35442 else
35444 first_reg = ALTIVEC_ARG_RETURN;
35445 n_regs = 1;
35448 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
35451 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
35452 if (TARGET_32BIT && TARGET_POWERPC64)
35453 switch (mode)
35455 default:
35456 break;
35457 case E_DImode:
35458 case E_SCmode:
35459 case E_DCmode:
35460 case E_TCmode:
35461 int count = GET_MODE_SIZE (mode) / 4;
35462 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
35465 if ((INTEGRAL_TYPE_P (valtype)
35466 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
35467 || POINTER_TYPE_P (valtype))
35468 mode = TARGET_32BIT ? SImode : DImode;
35470 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35471 /* _Decimal128 must use an even/odd register pair. */
35472 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35473 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
35474 && !FLOAT128_VECTOR_P (mode))
35475 regno = FP_ARG_RETURN;
35476 else if (TREE_CODE (valtype) == COMPLEX_TYPE
35477 && targetm.calls.split_complex_arg)
35478 return rs6000_complex_function_value (mode);
35479 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35480 return register is used in both cases, and we won't see V2DImode/V2DFmode
35481 for pure altivec, combine the two cases. */
35482 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
35483 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
35484 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
35485 regno = ALTIVEC_ARG_RETURN;
35486 else
35487 regno = GP_ARG_RETURN;
35489 return gen_rtx_REG (mode, regno);
35492 /* Define how to find the value returned by a library function
35493 assuming the value has mode MODE. */
35495 rs6000_libcall_value (machine_mode mode)
35497 unsigned int regno;
35499 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
35500 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
35501 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
35503 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
35504 /* _Decimal128 must use an even/odd register pair. */
35505 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
35506 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
35507 regno = FP_ARG_RETURN;
35508 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
35509 return register is used in both cases, and we won't see V2DImode/V2DFmode
35510 for pure altivec, combine the two cases. */
35511 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
35512 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
35513 regno = ALTIVEC_ARG_RETURN;
35514 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
35515 return rs6000_complex_function_value (mode);
35516 else
35517 regno = GP_ARG_RETURN;
35519 return gen_rtx_REG (mode, regno);
35522 /* Compute register pressure classes. We implement the target hook to avoid
35523 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
35524 lead to incorrect estimates of number of available registers and therefor
35525 increased register pressure/spill. */
35526 static int
35527 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
35529 int n;
35531 n = 0;
35532 pressure_classes[n++] = GENERAL_REGS;
35533 if (TARGET_VSX)
35534 pressure_classes[n++] = VSX_REGS;
35535 else
35537 if (TARGET_ALTIVEC)
35538 pressure_classes[n++] = ALTIVEC_REGS;
35539 if (TARGET_HARD_FLOAT)
35540 pressure_classes[n++] = FLOAT_REGS;
35542 pressure_classes[n++] = CR_REGS;
35543 pressure_classes[n++] = SPECIAL_REGS;
35545 return n;
35548 /* Given FROM and TO register numbers, say whether this elimination is allowed.
35549 Frame pointer elimination is automatically handled.
35551 For the RS/6000, if frame pointer elimination is being done, we would like
35552 to convert ap into fp, not sp.
35554 We need r30 if -mminimal-toc was specified, and there are constant pool
35555 references. */
35557 static bool
35558 rs6000_can_eliminate (const int from, const int to)
35560 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
35561 ? ! frame_pointer_needed
35562 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
35563 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
35564 || constant_pool_empty_p ()
35565 : true);
35568 /* Define the offset between two registers, FROM to be eliminated and its
35569 replacement TO, at the start of a routine. */
35570 HOST_WIDE_INT
35571 rs6000_initial_elimination_offset (int from, int to)
35573 rs6000_stack_t *info = rs6000_stack_info ();
35574 HOST_WIDE_INT offset;
35576 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35577 offset = info->push_p ? 0 : -info->total_size;
35578 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35580 offset = info->push_p ? 0 : -info->total_size;
35581 if (FRAME_GROWS_DOWNWARD)
35582 offset += info->fixed_size + info->vars_size + info->parm_size;
35584 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35585 offset = FRAME_GROWS_DOWNWARD
35586 ? info->fixed_size + info->vars_size + info->parm_size
35587 : 0;
35588 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
35589 offset = info->total_size;
35590 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
35591 offset = info->push_p ? info->total_size : 0;
35592 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
35593 offset = 0;
35594 else
35595 gcc_unreachable ();
35597 return offset;
35600 /* Fill in sizes of registers used by unwinder. */
35602 static void
35603 rs6000_init_dwarf_reg_sizes_extra (tree address)
35605 if (TARGET_MACHO && ! TARGET_ALTIVEC)
35607 int i;
35608 machine_mode mode = TYPE_MODE (char_type_node);
35609 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
35610 rtx mem = gen_rtx_MEM (BLKmode, addr);
35611 rtx value = gen_int_mode (16, mode);
35613 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
35614 The unwinder still needs to know the size of Altivec registers. */
35616 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
35618 int column = DWARF_REG_TO_UNWIND_COLUMN
35619 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
35620 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
35622 emit_move_insn (adjust_address (mem, mode, offset), value);
35627 /* Map internal gcc register numbers to debug format register numbers.
35628 FORMAT specifies the type of debug register number to use:
35629 0 -- debug information, except for frame-related sections
35630 1 -- DWARF .debug_frame section
35631 2 -- DWARF .eh_frame section */
35633 unsigned int
35634 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
35636 /* Except for the above, we use the internal number for non-DWARF
35637 debug information, and also for .eh_frame. */
35638 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
35639 return regno;
35641 /* On some platforms, we use the standard DWARF register
35642 numbering for .debug_info and .debug_frame. */
35643 #ifdef RS6000_USE_DWARF_NUMBERING
35644 if (regno <= 63)
35645 return regno;
35646 if (regno == LR_REGNO)
35647 return 108;
35648 if (regno == CTR_REGNO)
35649 return 109;
35650 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
35651 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
35652 The actual code emitted saves the whole of CR, so we map CR2_REGNO
35653 to the DWARF reg for CR. */
35654 if (format == 1 && regno == CR2_REGNO)
35655 return 64;
35656 if (CR_REGNO_P (regno))
35657 return regno - CR0_REGNO + 86;
35658 if (regno == CA_REGNO)
35659 return 101; /* XER */
35660 if (ALTIVEC_REGNO_P (regno))
35661 return regno - FIRST_ALTIVEC_REGNO + 1124;
35662 if (regno == VRSAVE_REGNO)
35663 return 356;
35664 if (regno == VSCR_REGNO)
35665 return 67;
35666 #endif
35667 return regno;
35670 /* target hook eh_return_filter_mode */
35671 static scalar_int_mode
35672 rs6000_eh_return_filter_mode (void)
35674 return TARGET_32BIT ? SImode : word_mode;
35677 /* Target hook for scalar_mode_supported_p. */
35678 static bool
35679 rs6000_scalar_mode_supported_p (scalar_mode mode)
35681 /* -m32 does not support TImode. This is the default, from
35682 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
35683 same ABI as for -m32. But default_scalar_mode_supported_p allows
35684 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
35685 for -mpowerpc64. */
35686 if (TARGET_32BIT && mode == TImode)
35687 return false;
35689 if (DECIMAL_FLOAT_MODE_P (mode))
35690 return default_decimal_float_supported_p ();
35691 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
35692 return true;
35693 else
35694 return default_scalar_mode_supported_p (mode);
35697 /* Target hook for vector_mode_supported_p. */
35698 static bool
35699 rs6000_vector_mode_supported_p (machine_mode mode)
35701 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
35702 128-bit, the compiler might try to widen IEEE 128-bit to IBM
35703 double-double. */
35704 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
35705 return true;
35707 else
35708 return false;
35711 /* Target hook for floatn_mode. */
35712 static opt_scalar_float_mode
35713 rs6000_floatn_mode (int n, bool extended)
35715 if (extended)
35717 switch (n)
35719 case 32:
35720 return DFmode;
35722 case 64:
35723 if (TARGET_FLOAT128_TYPE)
35724 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
35725 else
35726 return opt_scalar_float_mode ();
35728 case 128:
35729 return opt_scalar_float_mode ();
35731 default:
35732 /* Those are the only valid _FloatNx types. */
35733 gcc_unreachable ();
35736 else
35738 switch (n)
35740 case 32:
35741 return SFmode;
35743 case 64:
35744 return DFmode;
35746 case 128:
35747 if (TARGET_FLOAT128_TYPE)
35748 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
35749 else
35750 return opt_scalar_float_mode ();
35752 default:
35753 return opt_scalar_float_mode ();
35759 /* Target hook for c_mode_for_suffix. */
35760 static machine_mode
35761 rs6000_c_mode_for_suffix (char suffix)
35763 if (TARGET_FLOAT128_TYPE)
35765 if (suffix == 'q' || suffix == 'Q')
35766 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
35768 /* At the moment, we are not defining a suffix for IBM extended double.
35769 If/when the default for -mabi=ieeelongdouble is changed, and we want
35770 to support __ibm128 constants in legacy library code, we may need to
35771 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
35772 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
35773 __float80 constants. */
35776 return VOIDmode;
35779 /* Target hook for invalid_arg_for_unprototyped_fn. */
35780 static const char *
35781 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
35783 return (!rs6000_darwin64_abi
35784 && typelist == 0
35785 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
35786 && (funcdecl == NULL_TREE
35787 || (TREE_CODE (funcdecl) == FUNCTION_DECL
35788 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
35789 ? N_("AltiVec argument passed to unprototyped function")
35790 : NULL;
35793 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
35794 setup by using __stack_chk_fail_local hidden function instead of
35795 calling __stack_chk_fail directly. Otherwise it is better to call
35796 __stack_chk_fail directly. */
35798 static tree ATTRIBUTE_UNUSED
35799 rs6000_stack_protect_fail (void)
35801 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
35802 ? default_hidden_stack_protect_fail ()
35803 : default_external_stack_protect_fail ();
35806 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
35808 #if TARGET_ELF
35809 static unsigned HOST_WIDE_INT
35810 rs6000_asan_shadow_offset (void)
35812 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
35814 #endif
35816 /* Mask options that we want to support inside of attribute((target)) and
35817 #pragma GCC target operations. Note, we do not include things like
35818 64/32-bit, endianness, hard/soft floating point, etc. that would have
35819 different calling sequences. */
35821 struct rs6000_opt_mask {
35822 const char *name; /* option name */
35823 HOST_WIDE_INT mask; /* mask to set */
35824 bool invert; /* invert sense of mask */
35825 bool valid_target; /* option is a target option */
35828 static struct rs6000_opt_mask const rs6000_opt_masks[] =
35830 { "altivec", OPTION_MASK_ALTIVEC, false, true },
35831 { "cmpb", OPTION_MASK_CMPB, false, true },
35832 { "crypto", OPTION_MASK_CRYPTO, false, true },
35833 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
35834 { "dlmzb", OPTION_MASK_DLMZB, false, true },
35835 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
35836 false, true },
35837 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
35838 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
35839 { "fprnd", OPTION_MASK_FPRND, false, true },
35840 { "hard-dfp", OPTION_MASK_DFP, false, true },
35841 { "htm", OPTION_MASK_HTM, false, true },
35842 { "isel", OPTION_MASK_ISEL, false, true },
35843 { "mfcrf", OPTION_MASK_MFCRF, false, true },
35844 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
35845 { "modulo", OPTION_MASK_MODULO, false, true },
35846 { "mulhw", OPTION_MASK_MULHW, false, true },
35847 { "multiple", OPTION_MASK_MULTIPLE, false, true },
35848 { "popcntb", OPTION_MASK_POPCNTB, false, true },
35849 { "popcntd", OPTION_MASK_POPCNTD, false, true },
35850 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
35851 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
35852 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
35853 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
35854 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
35855 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
35856 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
35857 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
35858 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
35859 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
35860 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
35861 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
35862 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
35863 { "string", 0, false, true },
35864 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
35865 { "update", OPTION_MASK_NO_UPDATE, true , true },
35866 { "vsx", OPTION_MASK_VSX, false, true },
35867 #ifdef OPTION_MASK_64BIT
35868 #if TARGET_AIX_OS
35869 { "aix64", OPTION_MASK_64BIT, false, false },
35870 { "aix32", OPTION_MASK_64BIT, true, false },
35871 #else
35872 { "64", OPTION_MASK_64BIT, false, false },
35873 { "32", OPTION_MASK_64BIT, true, false },
35874 #endif
35875 #endif
35876 #ifdef OPTION_MASK_EABI
35877 { "eabi", OPTION_MASK_EABI, false, false },
35878 #endif
35879 #ifdef OPTION_MASK_LITTLE_ENDIAN
35880 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
35881 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
35882 #endif
35883 #ifdef OPTION_MASK_RELOCATABLE
35884 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
35885 #endif
35886 #ifdef OPTION_MASK_STRICT_ALIGN
35887 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
35888 #endif
35889 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
35890 { "string", 0, false, false },
35893 /* Builtin mask mapping for printing the flags. */
35894 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
35896 { "altivec", RS6000_BTM_ALTIVEC, false, false },
35897 { "vsx", RS6000_BTM_VSX, false, false },
35898 { "fre", RS6000_BTM_FRE, false, false },
35899 { "fres", RS6000_BTM_FRES, false, false },
35900 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
35901 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
35902 { "popcntd", RS6000_BTM_POPCNTD, false, false },
35903 { "cell", RS6000_BTM_CELL, false, false },
35904 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
35905 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
35906 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
35907 { "crypto", RS6000_BTM_CRYPTO, false, false },
35908 { "htm", RS6000_BTM_HTM, false, false },
35909 { "hard-dfp", RS6000_BTM_DFP, false, false },
35910 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
35911 { "long-double-128", RS6000_BTM_LDBL128, false, false },
35912 { "powerpc64", RS6000_BTM_POWERPC64, false, false },
35913 { "float128", RS6000_BTM_FLOAT128, false, false },
35914 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
35917 /* Option variables that we want to support inside attribute((target)) and
35918 #pragma GCC target operations. */
35920 struct rs6000_opt_var {
35921 const char *name; /* option name */
35922 size_t global_offset; /* offset of the option in global_options. */
35923 size_t target_offset; /* offset of the option in target options. */
35926 static struct rs6000_opt_var const rs6000_opt_vars[] =
35928 { "friz",
35929 offsetof (struct gcc_options, x_TARGET_FRIZ),
35930 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
35931 { "avoid-indexed-addresses",
35932 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
35933 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
35934 { "longcall",
35935 offsetof (struct gcc_options, x_rs6000_default_long_calls),
35936 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
35937 { "optimize-swaps",
35938 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
35939 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
35940 { "allow-movmisalign",
35941 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
35942 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
35943 { "sched-groups",
35944 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
35945 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
35946 { "always-hint",
35947 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
35948 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
35949 { "align-branch-targets",
35950 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
35951 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
35952 { "tls-markers",
35953 offsetof (struct gcc_options, x_tls_markers),
35954 offsetof (struct cl_target_option, x_tls_markers), },
35955 { "sched-prolog",
35956 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
35957 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
35958 { "sched-epilog",
35959 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
35960 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
35961 { "speculate-indirect-jumps",
35962 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
35963 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
35966 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
35967 parsing. Return true if there were no errors. */
35969 static bool
35970 rs6000_inner_target_options (tree args, bool attr_p)
35972 bool ret = true;
35974 if (args == NULL_TREE)
35977 else if (TREE_CODE (args) == STRING_CST)
35979 char *p = ASTRDUP (TREE_STRING_POINTER (args));
35980 char *q;
35982 while ((q = strtok (p, ",")) != NULL)
35984 bool error_p = false;
35985 bool not_valid_p = false;
35986 const char *cpu_opt = NULL;
35988 p = NULL;
35989 if (strncmp (q, "cpu=", 4) == 0)
35991 int cpu_index = rs6000_cpu_name_lookup (q+4);
35992 if (cpu_index >= 0)
35993 rs6000_cpu_index = cpu_index;
35994 else
35996 error_p = true;
35997 cpu_opt = q+4;
36000 else if (strncmp (q, "tune=", 5) == 0)
36002 int tune_index = rs6000_cpu_name_lookup (q+5);
36003 if (tune_index >= 0)
36004 rs6000_tune_index = tune_index;
36005 else
36007 error_p = true;
36008 cpu_opt = q+5;
36011 else
36013 size_t i;
36014 bool invert = false;
36015 char *r = q;
36017 error_p = true;
36018 if (strncmp (r, "no-", 3) == 0)
36020 invert = true;
36021 r += 3;
36024 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
36025 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
36027 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
36029 if (!rs6000_opt_masks[i].valid_target)
36030 not_valid_p = true;
36031 else
36033 error_p = false;
36034 rs6000_isa_flags_explicit |= mask;
36036 /* VSX needs altivec, so -mvsx automagically sets
36037 altivec and disables -mavoid-indexed-addresses. */
36038 if (!invert)
36040 if (mask == OPTION_MASK_VSX)
36042 mask |= OPTION_MASK_ALTIVEC;
36043 TARGET_AVOID_XFORM = 0;
36047 if (rs6000_opt_masks[i].invert)
36048 invert = !invert;
36050 if (invert)
36051 rs6000_isa_flags &= ~mask;
36052 else
36053 rs6000_isa_flags |= mask;
36055 break;
36058 if (error_p && !not_valid_p)
36060 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
36061 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
36063 size_t j = rs6000_opt_vars[i].global_offset;
36064 *((int *) ((char *)&global_options + j)) = !invert;
36065 error_p = false;
36066 not_valid_p = false;
36067 break;
36072 if (error_p)
36074 const char *eprefix, *esuffix;
36076 ret = false;
36077 if (attr_p)
36079 eprefix = "__attribute__((__target__(";
36080 esuffix = ")))";
36082 else
36084 eprefix = "#pragma GCC target ";
36085 esuffix = "";
36088 if (cpu_opt)
36089 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
36090 q, esuffix);
36091 else if (not_valid_p)
36092 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
36093 else
36094 error ("%s%qs%s is invalid", eprefix, q, esuffix);
36099 else if (TREE_CODE (args) == TREE_LIST)
36103 tree value = TREE_VALUE (args);
36104 if (value)
36106 bool ret2 = rs6000_inner_target_options (value, attr_p);
36107 if (!ret2)
36108 ret = false;
36110 args = TREE_CHAIN (args);
36112 while (args != NULL_TREE);
36115 else
36117 error ("attribute %<target%> argument not a string");
36118 return false;
36121 return ret;
36124 /* Print out the target options as a list for -mdebug=target. */
36126 static void
36127 rs6000_debug_target_options (tree args, const char *prefix)
36129 if (args == NULL_TREE)
36130 fprintf (stderr, "%s<NULL>", prefix);
36132 else if (TREE_CODE (args) == STRING_CST)
36134 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36135 char *q;
36137 while ((q = strtok (p, ",")) != NULL)
36139 p = NULL;
36140 fprintf (stderr, "%s\"%s\"", prefix, q);
36141 prefix = ", ";
36145 else if (TREE_CODE (args) == TREE_LIST)
36149 tree value = TREE_VALUE (args);
36150 if (value)
36152 rs6000_debug_target_options (value, prefix);
36153 prefix = ", ";
36155 args = TREE_CHAIN (args);
36157 while (args != NULL_TREE);
36160 else
36161 gcc_unreachable ();
36163 return;
36167 /* Hook to validate attribute((target("..."))). */
36169 static bool
36170 rs6000_valid_attribute_p (tree fndecl,
36171 tree ARG_UNUSED (name),
36172 tree args,
36173 int flags)
36175 struct cl_target_option cur_target;
36176 bool ret;
36177 tree old_optimize;
36178 tree new_target, new_optimize;
36179 tree func_optimize;
36181 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
36183 if (TARGET_DEBUG_TARGET)
36185 tree tname = DECL_NAME (fndecl);
36186 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
36187 if (tname)
36188 fprintf (stderr, "function: %.*s\n",
36189 (int) IDENTIFIER_LENGTH (tname),
36190 IDENTIFIER_POINTER (tname));
36191 else
36192 fprintf (stderr, "function: unknown\n");
36194 fprintf (stderr, "args:");
36195 rs6000_debug_target_options (args, " ");
36196 fprintf (stderr, "\n");
36198 if (flags)
36199 fprintf (stderr, "flags: 0x%x\n", flags);
36201 fprintf (stderr, "--------------------\n");
36204 /* attribute((target("default"))) does nothing, beyond
36205 affecting multi-versioning. */
36206 if (TREE_VALUE (args)
36207 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
36208 && TREE_CHAIN (args) == NULL_TREE
36209 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
36210 return true;
36212 old_optimize = build_optimization_node (&global_options);
36213 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36215 /* If the function changed the optimization levels as well as setting target
36216 options, start with the optimizations specified. */
36217 if (func_optimize && func_optimize != old_optimize)
36218 cl_optimization_restore (&global_options,
36219 TREE_OPTIMIZATION (func_optimize));
36221 /* The target attributes may also change some optimization flags, so update
36222 the optimization options if necessary. */
36223 cl_target_option_save (&cur_target, &global_options);
36224 rs6000_cpu_index = rs6000_tune_index = -1;
36225 ret = rs6000_inner_target_options (args, true);
36227 /* Set up any additional state. */
36228 if (ret)
36230 ret = rs6000_option_override_internal (false);
36231 new_target = build_target_option_node (&global_options);
36233 else
36234 new_target = NULL;
36236 new_optimize = build_optimization_node (&global_options);
36238 if (!new_target)
36239 ret = false;
36241 else if (fndecl)
36243 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
36245 if (old_optimize != new_optimize)
36246 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
36249 cl_target_option_restore (&global_options, &cur_target);
36251 if (old_optimize != new_optimize)
36252 cl_optimization_restore (&global_options,
36253 TREE_OPTIMIZATION (old_optimize));
36255 return ret;
36259 /* Hook to validate the current #pragma GCC target and set the state, and
36260 update the macros based on what was changed. If ARGS is NULL, then
36261 POP_TARGET is used to reset the options. */
36263 bool
36264 rs6000_pragma_target_parse (tree args, tree pop_target)
36266 tree prev_tree = build_target_option_node (&global_options);
36267 tree cur_tree;
36268 struct cl_target_option *prev_opt, *cur_opt;
36269 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
36270 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
36272 if (TARGET_DEBUG_TARGET)
36274 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
36275 fprintf (stderr, "args:");
36276 rs6000_debug_target_options (args, " ");
36277 fprintf (stderr, "\n");
36279 if (pop_target)
36281 fprintf (stderr, "pop_target:\n");
36282 debug_tree (pop_target);
36284 else
36285 fprintf (stderr, "pop_target: <NULL>\n");
36287 fprintf (stderr, "--------------------\n");
36290 if (! args)
36292 cur_tree = ((pop_target)
36293 ? pop_target
36294 : target_option_default_node);
36295 cl_target_option_restore (&global_options,
36296 TREE_TARGET_OPTION (cur_tree));
36298 else
36300 rs6000_cpu_index = rs6000_tune_index = -1;
36301 if (!rs6000_inner_target_options (args, false)
36302 || !rs6000_option_override_internal (false)
36303 || (cur_tree = build_target_option_node (&global_options))
36304 == NULL_TREE)
36306 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
36307 fprintf (stderr, "invalid pragma\n");
36309 return false;
36313 target_option_current_node = cur_tree;
36314 rs6000_activate_target_options (target_option_current_node);
36316 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
36317 change the macros that are defined. */
36318 if (rs6000_target_modify_macros_ptr)
36320 prev_opt = TREE_TARGET_OPTION (prev_tree);
36321 prev_bumask = prev_opt->x_rs6000_builtin_mask;
36322 prev_flags = prev_opt->x_rs6000_isa_flags;
36324 cur_opt = TREE_TARGET_OPTION (cur_tree);
36325 cur_flags = cur_opt->x_rs6000_isa_flags;
36326 cur_bumask = cur_opt->x_rs6000_builtin_mask;
36328 diff_bumask = (prev_bumask ^ cur_bumask);
36329 diff_flags = (prev_flags ^ cur_flags);
36331 if ((diff_flags != 0) || (diff_bumask != 0))
36333 /* Delete old macros. */
36334 rs6000_target_modify_macros_ptr (false,
36335 prev_flags & diff_flags,
36336 prev_bumask & diff_bumask);
36338 /* Define new macros. */
36339 rs6000_target_modify_macros_ptr (true,
36340 cur_flags & diff_flags,
36341 cur_bumask & diff_bumask);
36345 return true;
36349 /* Remember the last target of rs6000_set_current_function. */
36350 static GTY(()) tree rs6000_previous_fndecl;
36352 /* Restore target's globals from NEW_TREE and invalidate the
36353 rs6000_previous_fndecl cache. */
36355 void
36356 rs6000_activate_target_options (tree new_tree)
36358 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
36359 if (TREE_TARGET_GLOBALS (new_tree))
36360 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
36361 else if (new_tree == target_option_default_node)
36362 restore_target_globals (&default_target_globals);
36363 else
36364 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
36365 rs6000_previous_fndecl = NULL_TREE;
36368 /* Establish appropriate back-end context for processing the function
36369 FNDECL. The argument might be NULL to indicate processing at top
36370 level, outside of any function scope. */
36371 static void
36372 rs6000_set_current_function (tree fndecl)
36374 if (TARGET_DEBUG_TARGET)
36376 fprintf (stderr, "\n==================== rs6000_set_current_function");
36378 if (fndecl)
36379 fprintf (stderr, ", fndecl %s (%p)",
36380 (DECL_NAME (fndecl)
36381 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
36382 : "<unknown>"), (void *)fndecl);
36384 if (rs6000_previous_fndecl)
36385 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
36387 fprintf (stderr, "\n");
36390 /* Only change the context if the function changes. This hook is called
36391 several times in the course of compiling a function, and we don't want to
36392 slow things down too much or call target_reinit when it isn't safe. */
36393 if (fndecl == rs6000_previous_fndecl)
36394 return;
36396 tree old_tree;
36397 if (rs6000_previous_fndecl == NULL_TREE)
36398 old_tree = target_option_current_node;
36399 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
36400 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
36401 else
36402 old_tree = target_option_default_node;
36404 tree new_tree;
36405 if (fndecl == NULL_TREE)
36407 if (old_tree != target_option_current_node)
36408 new_tree = target_option_current_node;
36409 else
36410 new_tree = NULL_TREE;
36412 else
36414 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
36415 if (new_tree == NULL_TREE)
36416 new_tree = target_option_default_node;
36419 if (TARGET_DEBUG_TARGET)
36421 if (new_tree)
36423 fprintf (stderr, "\nnew fndecl target specific options:\n");
36424 debug_tree (new_tree);
36427 if (old_tree)
36429 fprintf (stderr, "\nold fndecl target specific options:\n");
36430 debug_tree (old_tree);
36433 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
36434 fprintf (stderr, "--------------------\n");
36437 if (new_tree && old_tree != new_tree)
36438 rs6000_activate_target_options (new_tree);
36440 if (fndecl)
36441 rs6000_previous_fndecl = fndecl;
36445 /* Save the current options */
36447 static void
36448 rs6000_function_specific_save (struct cl_target_option *ptr,
36449 struct gcc_options *opts)
36451 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
36452 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
36455 /* Restore the current options */
36457 static void
36458 rs6000_function_specific_restore (struct gcc_options *opts,
36459 struct cl_target_option *ptr)
36462 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
36463 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
36464 (void) rs6000_option_override_internal (false);
36467 /* Print the current options */
36469 static void
36470 rs6000_function_specific_print (FILE *file, int indent,
36471 struct cl_target_option *ptr)
36473 rs6000_print_isa_options (file, indent, "Isa options set",
36474 ptr->x_rs6000_isa_flags);
36476 rs6000_print_isa_options (file, indent, "Isa options explicit",
36477 ptr->x_rs6000_isa_flags_explicit);
36480 /* Helper function to print the current isa or misc options on a line. */
36482 static void
36483 rs6000_print_options_internal (FILE *file,
36484 int indent,
36485 const char *string,
36486 HOST_WIDE_INT flags,
36487 const char *prefix,
36488 const struct rs6000_opt_mask *opts,
36489 size_t num_elements)
36491 size_t i;
36492 size_t start_column = 0;
36493 size_t cur_column;
36494 size_t max_column = 120;
36495 size_t prefix_len = strlen (prefix);
36496 size_t comma_len = 0;
36497 const char *comma = "";
36499 if (indent)
36500 start_column += fprintf (file, "%*s", indent, "");
36502 if (!flags)
36504 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
36505 return;
36508 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
36510 /* Print the various mask options. */
36511 cur_column = start_column;
36512 for (i = 0; i < num_elements; i++)
36514 bool invert = opts[i].invert;
36515 const char *name = opts[i].name;
36516 const char *no_str = "";
36517 HOST_WIDE_INT mask = opts[i].mask;
36518 size_t len = comma_len + prefix_len + strlen (name);
36520 if (!invert)
36522 if ((flags & mask) == 0)
36524 no_str = "no-";
36525 len += sizeof ("no-") - 1;
36528 flags &= ~mask;
36531 else
36533 if ((flags & mask) != 0)
36535 no_str = "no-";
36536 len += sizeof ("no-") - 1;
36539 flags |= mask;
36542 cur_column += len;
36543 if (cur_column > max_column)
36545 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
36546 cur_column = start_column + len;
36547 comma = "";
36550 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
36551 comma = ", ";
36552 comma_len = sizeof (", ") - 1;
36555 fputs ("\n", file);
36558 /* Helper function to print the current isa options on a line. */
36560 static void
36561 rs6000_print_isa_options (FILE *file, int indent, const char *string,
36562 HOST_WIDE_INT flags)
36564 rs6000_print_options_internal (file, indent, string, flags, "-m",
36565 &rs6000_opt_masks[0],
36566 ARRAY_SIZE (rs6000_opt_masks));
36569 static void
36570 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
36571 HOST_WIDE_INT flags)
36573 rs6000_print_options_internal (file, indent, string, flags, "",
36574 &rs6000_builtin_mask_names[0],
36575 ARRAY_SIZE (rs6000_builtin_mask_names));
36578 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
36579 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
36580 -mupper-regs-df, etc.).
36582 If the user used -mno-power8-vector, we need to turn off all of the implicit
36583 ISA 2.07 and 3.0 options that relate to the vector unit.
36585 If the user used -mno-power9-vector, we need to turn off all of the implicit
36586 ISA 3.0 options that relate to the vector unit.
36588 This function does not handle explicit options such as the user specifying
36589 -mdirect-move. These are handled in rs6000_option_override_internal, and
36590 the appropriate error is given if needed.
36592 We return a mask of all of the implicit options that should not be enabled
36593 by default. */
36595 static HOST_WIDE_INT
36596 rs6000_disable_incompatible_switches (void)
36598 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
36599 size_t i, j;
36601 static const struct {
36602 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
36603 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
36604 const char *const name; /* name of the switch. */
36605 } flags[] = {
36606 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
36607 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
36608 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
36611 for (i = 0; i < ARRAY_SIZE (flags); i++)
36613 HOST_WIDE_INT no_flag = flags[i].no_flag;
36615 if ((rs6000_isa_flags & no_flag) == 0
36616 && (rs6000_isa_flags_explicit & no_flag) != 0)
36618 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
36619 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
36620 & rs6000_isa_flags
36621 & dep_flags);
36623 if (set_flags)
36625 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
36626 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
36628 set_flags &= ~rs6000_opt_masks[j].mask;
36629 error ("%<-mno-%s%> turns off %<-m%s%>",
36630 flags[i].name,
36631 rs6000_opt_masks[j].name);
36634 gcc_assert (!set_flags);
36637 rs6000_isa_flags &= ~dep_flags;
36638 ignore_masks |= no_flag | dep_flags;
36642 return ignore_masks;
36646 /* Helper function for printing the function name when debugging. */
36648 static const char *
36649 get_decl_name (tree fn)
36651 tree name;
36653 if (!fn)
36654 return "<null>";
36656 name = DECL_NAME (fn);
36657 if (!name)
36658 return "<no-name>";
36660 return IDENTIFIER_POINTER (name);
36663 /* Return the clone id of the target we are compiling code for in a target
36664 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
36665 the priority list for the target clones (ordered from lowest to
36666 highest). */
36668 static int
36669 rs6000_clone_priority (tree fndecl)
36671 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
36672 HOST_WIDE_INT isa_masks;
36673 int ret = CLONE_DEFAULT;
36674 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
36675 const char *attrs_str = NULL;
36677 attrs = TREE_VALUE (TREE_VALUE (attrs));
36678 attrs_str = TREE_STRING_POINTER (attrs);
36680 /* Return priority zero for default function. Return the ISA needed for the
36681 function if it is not the default. */
36682 if (strcmp (attrs_str, "default") != 0)
36684 if (fn_opts == NULL_TREE)
36685 fn_opts = target_option_default_node;
36687 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
36688 isa_masks = rs6000_isa_flags;
36689 else
36690 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
36692 for (ret = CLONE_MAX - 1; ret != 0; ret--)
36693 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
36694 break;
36697 if (TARGET_DEBUG_TARGET)
36698 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
36699 get_decl_name (fndecl), ret);
36701 return ret;
36704 /* This compares the priority of target features in function DECL1 and DECL2.
36705 It returns positive value if DECL1 is higher priority, negative value if
36706 DECL2 is higher priority and 0 if they are the same. Note, priorities are
36707 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
36709 static int
36710 rs6000_compare_version_priority (tree decl1, tree decl2)
36712 int priority1 = rs6000_clone_priority (decl1);
36713 int priority2 = rs6000_clone_priority (decl2);
36714 int ret = priority1 - priority2;
36716 if (TARGET_DEBUG_TARGET)
36717 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
36718 get_decl_name (decl1), get_decl_name (decl2), ret);
36720 return ret;
36723 /* Make a dispatcher declaration for the multi-versioned function DECL.
36724 Calls to DECL function will be replaced with calls to the dispatcher
36725 by the front-end. Returns the decl of the dispatcher function. */
36727 static tree
36728 rs6000_get_function_versions_dispatcher (void *decl)
36730 tree fn = (tree) decl;
36731 struct cgraph_node *node = NULL;
36732 struct cgraph_node *default_node = NULL;
36733 struct cgraph_function_version_info *node_v = NULL;
36734 struct cgraph_function_version_info *first_v = NULL;
36736 tree dispatch_decl = NULL;
36738 struct cgraph_function_version_info *default_version_info = NULL;
36739 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
36741 if (TARGET_DEBUG_TARGET)
36742 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
36743 get_decl_name (fn));
36745 node = cgraph_node::get (fn);
36746 gcc_assert (node != NULL);
36748 node_v = node->function_version ();
36749 gcc_assert (node_v != NULL);
36751 if (node_v->dispatcher_resolver != NULL)
36752 return node_v->dispatcher_resolver;
36754 /* Find the default version and make it the first node. */
36755 first_v = node_v;
36756 /* Go to the beginning of the chain. */
36757 while (first_v->prev != NULL)
36758 first_v = first_v->prev;
36760 default_version_info = first_v;
36761 while (default_version_info != NULL)
36763 const tree decl2 = default_version_info->this_node->decl;
36764 if (is_function_default_version (decl2))
36765 break;
36766 default_version_info = default_version_info->next;
36769 /* If there is no default node, just return NULL. */
36770 if (default_version_info == NULL)
36771 return NULL;
36773 /* Make default info the first node. */
36774 if (first_v != default_version_info)
36776 default_version_info->prev->next = default_version_info->next;
36777 if (default_version_info->next)
36778 default_version_info->next->prev = default_version_info->prev;
36779 first_v->prev = default_version_info;
36780 default_version_info->next = first_v;
36781 default_version_info->prev = NULL;
36784 default_node = default_version_info->this_node;
36786 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
36787 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36788 "target_clones attribute needs GLIBC (2.23 and newer) that "
36789 "exports hardware capability bits");
36790 #else
36792 if (targetm.has_ifunc_p ())
36794 struct cgraph_function_version_info *it_v = NULL;
36795 struct cgraph_node *dispatcher_node = NULL;
36796 struct cgraph_function_version_info *dispatcher_version_info = NULL;
36798 /* Right now, the dispatching is done via ifunc. */
36799 dispatch_decl = make_dispatcher_decl (default_node->decl);
36801 dispatcher_node = cgraph_node::get_create (dispatch_decl);
36802 gcc_assert (dispatcher_node != NULL);
36803 dispatcher_node->dispatcher_function = 1;
36804 dispatcher_version_info
36805 = dispatcher_node->insert_new_function_version ();
36806 dispatcher_version_info->next = default_version_info;
36807 dispatcher_node->definition = 1;
36809 /* Set the dispatcher for all the versions. */
36810 it_v = default_version_info;
36811 while (it_v != NULL)
36813 it_v->dispatcher_resolver = dispatch_decl;
36814 it_v = it_v->next;
36817 else
36819 error_at (DECL_SOURCE_LOCATION (default_node->decl),
36820 "multiversioning needs ifunc which is not supported "
36821 "on this target");
36823 #endif
36825 return dispatch_decl;
36828 /* Make the resolver function decl to dispatch the versions of a multi-
36829 versioned function, DEFAULT_DECL. Create an empty basic block in the
36830 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
36831 function. */
36833 static tree
36834 make_resolver_func (const tree default_decl,
36835 const tree dispatch_decl,
36836 basic_block *empty_bb)
36838 /* Make the resolver function static. The resolver function returns
36839 void *. */
36840 tree decl_name = clone_function_name (default_decl, "resolver");
36841 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
36842 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
36843 tree decl = build_fn_decl (resolver_name, type);
36844 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
36846 DECL_NAME (decl) = decl_name;
36847 TREE_USED (decl) = 1;
36848 DECL_ARTIFICIAL (decl) = 1;
36849 DECL_IGNORED_P (decl) = 0;
36850 TREE_PUBLIC (decl) = 0;
36851 DECL_UNINLINABLE (decl) = 1;
36853 /* Resolver is not external, body is generated. */
36854 DECL_EXTERNAL (decl) = 0;
36855 DECL_EXTERNAL (dispatch_decl) = 0;
36857 DECL_CONTEXT (decl) = NULL_TREE;
36858 DECL_INITIAL (decl) = make_node (BLOCK);
36859 DECL_STATIC_CONSTRUCTOR (decl) = 0;
36861 /* Build result decl and add to function_decl. */
36862 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
36863 DECL_ARTIFICIAL (t) = 1;
36864 DECL_IGNORED_P (t) = 1;
36865 DECL_RESULT (decl) = t;
36867 gimplify_function_tree (decl);
36868 push_cfun (DECL_STRUCT_FUNCTION (decl));
36869 *empty_bb = init_lowered_empty_function (decl, false,
36870 profile_count::uninitialized ());
36872 cgraph_node::add_new_function (decl, true);
36873 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
36875 pop_cfun ();
36877 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
36878 DECL_ATTRIBUTES (dispatch_decl)
36879 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
36881 cgraph_node::create_same_body_alias (dispatch_decl, decl);
36883 return decl;
36886 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
36887 return a pointer to VERSION_DECL if we are running on a machine that
36888 supports the index CLONE_ISA hardware architecture bits. This function will
36889 be called during version dispatch to decide which function version to
36890 execute. It returns the basic block at the end, to which more conditions
36891 can be added. */
36893 static basic_block
36894 add_condition_to_bb (tree function_decl, tree version_decl,
36895 int clone_isa, basic_block new_bb)
36897 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
36899 gcc_assert (new_bb != NULL);
36900 gimple_seq gseq = bb_seq (new_bb);
36903 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
36904 build_fold_addr_expr (version_decl));
36905 tree result_var = create_tmp_var (ptr_type_node);
36906 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
36907 gimple *return_stmt = gimple_build_return (result_var);
36909 if (clone_isa == CLONE_DEFAULT)
36911 gimple_seq_add_stmt (&gseq, convert_stmt);
36912 gimple_seq_add_stmt (&gseq, return_stmt);
36913 set_bb_seq (new_bb, gseq);
36914 gimple_set_bb (convert_stmt, new_bb);
36915 gimple_set_bb (return_stmt, new_bb);
36916 pop_cfun ();
36917 return new_bb;
36920 tree bool_zero = build_int_cst (bool_int_type_node, 0);
36921 tree cond_var = create_tmp_var (bool_int_type_node);
36922 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
36923 const char *arg_str = rs6000_clone_map[clone_isa].name;
36924 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
36925 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
36926 gimple_call_set_lhs (call_cond_stmt, cond_var);
36928 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
36929 gimple_set_bb (call_cond_stmt, new_bb);
36930 gimple_seq_add_stmt (&gseq, call_cond_stmt);
36932 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
36933 NULL_TREE, NULL_TREE);
36934 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
36935 gimple_set_bb (if_else_stmt, new_bb);
36936 gimple_seq_add_stmt (&gseq, if_else_stmt);
36938 gimple_seq_add_stmt (&gseq, convert_stmt);
36939 gimple_seq_add_stmt (&gseq, return_stmt);
36940 set_bb_seq (new_bb, gseq);
36942 basic_block bb1 = new_bb;
36943 edge e12 = split_block (bb1, if_else_stmt);
36944 basic_block bb2 = e12->dest;
36945 e12->flags &= ~EDGE_FALLTHRU;
36946 e12->flags |= EDGE_TRUE_VALUE;
36948 edge e23 = split_block (bb2, return_stmt);
36949 gimple_set_bb (convert_stmt, bb2);
36950 gimple_set_bb (return_stmt, bb2);
36952 basic_block bb3 = e23->dest;
36953 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
36955 remove_edge (e23);
36956 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
36958 pop_cfun ();
36959 return bb3;
36962 /* This function generates the dispatch function for multi-versioned functions.
36963 DISPATCH_DECL is the function which will contain the dispatch logic.
36964 FNDECLS are the function choices for dispatch, and is a tree chain.
36965 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
36966 code is generated. */
36968 static int
36969 dispatch_function_versions (tree dispatch_decl,
36970 void *fndecls_p,
36971 basic_block *empty_bb)
36973 int ix;
36974 tree ele;
36975 vec<tree> *fndecls;
36976 tree clones[CLONE_MAX];
36978 if (TARGET_DEBUG_TARGET)
36979 fputs ("dispatch_function_versions, top\n", stderr);
36981 gcc_assert (dispatch_decl != NULL
36982 && fndecls_p != NULL
36983 && empty_bb != NULL);
36985 /* fndecls_p is actually a vector. */
36986 fndecls = static_cast<vec<tree> *> (fndecls_p);
36988 /* At least one more version other than the default. */
36989 gcc_assert (fndecls->length () >= 2);
36991 /* The first version in the vector is the default decl. */
36992 memset ((void *) clones, '\0', sizeof (clones));
36993 clones[CLONE_DEFAULT] = (*fndecls)[0];
36995 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
36996 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
36997 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
36998 recent glibc. If we ever need to call __builtin_cpu_init, we would need
36999 to insert the code here to do the call. */
37001 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
37003 int priority = rs6000_clone_priority (ele);
37004 if (!clones[priority])
37005 clones[priority] = ele;
37008 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
37009 if (clones[ix])
37011 if (TARGET_DEBUG_TARGET)
37012 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
37013 ix, get_decl_name (clones[ix]));
37015 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
37016 *empty_bb);
37019 return 0;
37022 /* Generate the dispatching code body to dispatch multi-versioned function
37023 DECL. The target hook is called to process the "target" attributes and
37024 provide the code to dispatch the right function at run-time. NODE points
37025 to the dispatcher decl whose body will be created. */
37027 static tree
37028 rs6000_generate_version_dispatcher_body (void *node_p)
37030 tree resolver;
37031 basic_block empty_bb;
37032 struct cgraph_node *node = (cgraph_node *) node_p;
37033 struct cgraph_function_version_info *ninfo = node->function_version ();
37035 if (ninfo->dispatcher_resolver)
37036 return ninfo->dispatcher_resolver;
37038 /* node is going to be an alias, so remove the finalized bit. */
37039 node->definition = false;
37041 /* The first version in the chain corresponds to the default version. */
37042 ninfo->dispatcher_resolver = resolver
37043 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
37045 if (TARGET_DEBUG_TARGET)
37046 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
37047 get_decl_name (resolver));
37049 push_cfun (DECL_STRUCT_FUNCTION (resolver));
37050 auto_vec<tree, 2> fn_ver_vec;
37052 for (struct cgraph_function_version_info *vinfo = ninfo->next;
37053 vinfo;
37054 vinfo = vinfo->next)
37056 struct cgraph_node *version = vinfo->this_node;
37057 /* Check for virtual functions here again, as by this time it should
37058 have been determined if this function needs a vtable index or
37059 not. This happens for methods in derived classes that override
37060 virtual methods in base classes but are not explicitly marked as
37061 virtual. */
37062 if (DECL_VINDEX (version->decl))
37063 sorry ("Virtual function multiversioning not supported");
37065 fn_ver_vec.safe_push (version->decl);
37068 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
37069 cgraph_edge::rebuild_edges ();
37070 pop_cfun ();
37071 return resolver;
37075 /* Hook to determine if one function can safely inline another. */
37077 static bool
37078 rs6000_can_inline_p (tree caller, tree callee)
37080 bool ret = false;
37081 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
37082 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
37084 /* If callee has no option attributes, then it is ok to inline. */
37085 if (!callee_tree)
37086 ret = true;
37088 /* If caller has no option attributes, but callee does then it is not ok to
37089 inline. */
37090 else if (!caller_tree)
37091 ret = false;
37093 else
37095 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
37096 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
37098 /* Callee's options should a subset of the caller's, i.e. a vsx function
37099 can inline an altivec function but a non-vsx function can't inline a
37100 vsx function. */
37101 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
37102 == callee_opts->x_rs6000_isa_flags)
37103 ret = true;
37106 if (TARGET_DEBUG_TARGET)
37107 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37108 get_decl_name (caller), get_decl_name (callee),
37109 (ret ? "can" : "cannot"));
37111 return ret;
37114 /* Allocate a stack temp and fixup the address so it meets the particular
37115 memory requirements (either offetable or REG+REG addressing). */
37118 rs6000_allocate_stack_temp (machine_mode mode,
37119 bool offsettable_p,
37120 bool reg_reg_p)
37122 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37123 rtx addr = XEXP (stack, 0);
37124 int strict_p = reload_completed;
37126 if (!legitimate_indirect_address_p (addr, strict_p))
37128 if (offsettable_p
37129 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
37130 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37132 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
37133 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37136 return stack;
37139 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
37140 to such a form to deal with memory reference instructions like STFIWX that
37141 only take reg+reg addressing. */
37144 rs6000_address_for_fpconvert (rtx x)
37146 rtx addr;
37148 gcc_assert (MEM_P (x));
37149 addr = XEXP (x, 0);
37150 if (can_create_pseudo_p ()
37151 && ! legitimate_indirect_address_p (addr, reload_completed)
37152 && ! legitimate_indexed_address_p (addr, reload_completed))
37154 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
37156 rtx reg = XEXP (addr, 0);
37157 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
37158 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
37159 gcc_assert (REG_P (reg));
37160 emit_insn (gen_add3_insn (reg, reg, size_rtx));
37161 addr = reg;
37163 else if (GET_CODE (addr) == PRE_MODIFY)
37165 rtx reg = XEXP (addr, 0);
37166 rtx expr = XEXP (addr, 1);
37167 gcc_assert (REG_P (reg));
37168 gcc_assert (GET_CODE (expr) == PLUS);
37169 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
37170 addr = reg;
37173 x = replace_equiv_address (x, copy_addr_to_reg (addr));
37176 return x;
37179 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37181 On the RS/6000, all integer constants are acceptable, most won't be valid
37182 for particular insns, though. Only easy FP constants are acceptable. */
37184 static bool
37185 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
37187 if (TARGET_ELF && tls_referenced_p (x))
37188 return false;
37190 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
37191 || GET_MODE (x) == VOIDmode
37192 || (TARGET_POWERPC64 && mode == DImode)
37193 || easy_fp_constant (x, mode)
37194 || easy_vector_constant (x, mode));
37198 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37200 static bool
37201 chain_already_loaded (rtx_insn *last)
37203 for (; last != NULL; last = PREV_INSN (last))
37205 if (NONJUMP_INSN_P (last))
37207 rtx patt = PATTERN (last);
37209 if (GET_CODE (patt) == SET)
37211 rtx lhs = XEXP (patt, 0);
37213 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
37214 return true;
37218 return false;
37221 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37223 void
37224 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37226 const bool direct_call_p
37227 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
37228 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
37229 rtx toc_load = NULL_RTX;
37230 rtx toc_restore = NULL_RTX;
37231 rtx func_addr;
37232 rtx abi_reg = NULL_RTX;
37233 rtx call[4];
37234 int n_call;
37235 rtx insn;
37237 /* Handle longcall attributes. */
37238 if (INTVAL (cookie) & CALL_LONG)
37239 func_desc = rs6000_longcall_ref (func_desc);
37241 /* Handle indirect calls. */
37242 if (GET_CODE (func_desc) != SYMBOL_REF
37243 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
37245 /* Save the TOC into its reserved slot before the call,
37246 and prepare to restore it after the call. */
37247 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
37248 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
37249 rtx stack_toc_mem = gen_frame_mem (Pmode,
37250 gen_rtx_PLUS (Pmode, stack_ptr,
37251 stack_toc_offset));
37252 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
37253 gen_rtvec (1, stack_toc_offset),
37254 UNSPEC_TOCSLOT);
37255 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
37257 /* Can we optimize saving the TOC in the prologue or
37258 do we need to do it at every call? */
37259 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
37260 cfun->machine->save_toc_in_prologue = true;
37261 else
37263 MEM_VOLATILE_P (stack_toc_mem) = 1;
37264 emit_move_insn (stack_toc_mem, toc_reg);
37267 if (DEFAULT_ABI == ABI_ELFv2)
37269 /* A function pointer in the ELFv2 ABI is just a plain address, but
37270 the ABI requires it to be loaded into r12 before the call. */
37271 func_addr = gen_rtx_REG (Pmode, 12);
37272 emit_move_insn (func_addr, func_desc);
37273 abi_reg = func_addr;
37275 else
37277 /* A function pointer under AIX is a pointer to a data area whose
37278 first word contains the actual address of the function, whose
37279 second word contains a pointer to its TOC, and whose third word
37280 contains a value to place in the static chain register (r11).
37281 Note that if we load the static chain, our "trampoline" need
37282 not have any executable code. */
37284 /* Load up address of the actual function. */
37285 func_desc = force_reg (Pmode, func_desc);
37286 func_addr = gen_reg_rtx (Pmode);
37287 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
37289 /* Prepare to load the TOC of the called function. Note that the
37290 TOC load must happen immediately before the actual call so
37291 that unwinding the TOC registers works correctly. See the
37292 comment in frob_update_context. */
37293 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
37294 rtx func_toc_mem = gen_rtx_MEM (Pmode,
37295 gen_rtx_PLUS (Pmode, func_desc,
37296 func_toc_offset));
37297 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
37299 /* If we have a static chain, load it up. But, if the call was
37300 originally direct, the 3rd word has not been written since no
37301 trampoline has been built, so we ought not to load it, lest we
37302 override a static chain value. */
37303 if (!direct_call_p
37304 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
37305 && !chain_already_loaded (get_current_sequence ()->next->last))
37307 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
37308 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
37309 rtx func_sc_mem = gen_rtx_MEM (Pmode,
37310 gen_rtx_PLUS (Pmode, func_desc,
37311 func_sc_offset));
37312 emit_move_insn (sc_reg, func_sc_mem);
37313 abi_reg = sc_reg;
37317 else
37319 /* Direct calls use the TOC: for local calls, the callee will
37320 assume the TOC register is set; for non-local calls, the
37321 PLT stub needs the TOC register. */
37322 abi_reg = toc_reg;
37323 func_addr = func_desc;
37326 /* Create the call. */
37327 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
37328 if (value != NULL_RTX)
37329 call[0] = gen_rtx_SET (value, call[0]);
37330 n_call = 1;
37332 if (toc_load)
37333 call[n_call++] = toc_load;
37334 if (toc_restore)
37335 call[n_call++] = toc_restore;
37337 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
37339 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
37340 insn = emit_call_insn (insn);
37342 /* Mention all registers defined by the ABI to hold information
37343 as uses in CALL_INSN_FUNCTION_USAGE. */
37344 if (abi_reg)
37345 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
37348 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
37350 void
37351 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37353 rtx call[2];
37354 rtx insn;
37356 gcc_assert (INTVAL (cookie) == 0);
37358 /* Create the call. */
37359 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
37360 if (value != NULL_RTX)
37361 call[0] = gen_rtx_SET (value, call[0]);
37363 call[1] = simple_return_rtx;
37365 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
37366 insn = emit_call_insn (insn);
37368 /* Note use of the TOC register. */
37369 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
37372 /* Return whether we need to always update the saved TOC pointer when we update
37373 the stack pointer. */
37375 static bool
37376 rs6000_save_toc_in_prologue_p (void)
37378 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
37381 #ifdef HAVE_GAS_HIDDEN
37382 # define USE_HIDDEN_LINKONCE 1
37383 #else
37384 # define USE_HIDDEN_LINKONCE 0
37385 #endif
37387 /* Fills in the label name that should be used for a 476 link stack thunk. */
37389 void
37390 get_ppc476_thunk_name (char name[32])
37392 gcc_assert (TARGET_LINK_STACK);
37394 if (USE_HIDDEN_LINKONCE)
37395 sprintf (name, "__ppc476.get_thunk");
37396 else
37397 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
37400 /* This function emits the simple thunk routine that is used to preserve
37401 the link stack on the 476 cpu. */
37403 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
37404 static void
37405 rs6000_code_end (void)
37407 char name[32];
37408 tree decl;
37410 if (!TARGET_LINK_STACK)
37411 return;
37413 get_ppc476_thunk_name (name);
37415 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
37416 build_function_type_list (void_type_node, NULL_TREE));
37417 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
37418 NULL_TREE, void_type_node);
37419 TREE_PUBLIC (decl) = 1;
37420 TREE_STATIC (decl) = 1;
37422 #if RS6000_WEAK
37423 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
37425 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
37426 targetm.asm_out.unique_section (decl, 0);
37427 switch_to_section (get_named_section (decl, NULL, 0));
37428 DECL_WEAK (decl) = 1;
37429 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
37430 targetm.asm_out.globalize_label (asm_out_file, name);
37431 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
37432 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
37434 else
37435 #endif
37437 switch_to_section (text_section);
37438 ASM_OUTPUT_LABEL (asm_out_file, name);
37441 DECL_INITIAL (decl) = make_node (BLOCK);
37442 current_function_decl = decl;
37443 allocate_struct_function (decl, false);
37444 init_function_start (decl);
37445 first_function_block_is_cold = false;
37446 /* Make sure unwind info is emitted for the thunk if needed. */
37447 final_start_function (emit_barrier (), asm_out_file, 1);
37449 fputs ("\tblr\n", asm_out_file);
37451 final_end_function ();
37452 init_insn_lengths ();
37453 free_after_compilation (cfun);
37454 set_cfun (NULL);
37455 current_function_decl = NULL;
37458 /* Add r30 to hard reg set if the prologue sets it up and it is not
37459 pic_offset_table_rtx. */
37461 static void
37462 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
37464 if (!TARGET_SINGLE_PIC_BASE
37465 && TARGET_TOC
37466 && TARGET_MINIMAL_TOC
37467 && !constant_pool_empty_p ())
37468 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
37469 if (cfun->machine->split_stack_argp_used)
37470 add_to_hard_reg_set (&set->set, Pmode, 12);
37472 /* Make sure the hard reg set doesn't include r2, which was possibly added
37473 via PIC_OFFSET_TABLE_REGNUM. */
37474 if (TARGET_TOC)
37475 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
37479 /* Helper function for rs6000_split_logical to emit a logical instruction after
37480 spliting the operation to single GPR registers.
37482 DEST is the destination register.
37483 OP1 and OP2 are the input source registers.
37484 CODE is the base operation (AND, IOR, XOR, NOT).
37485 MODE is the machine mode.
37486 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37487 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37488 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37490 static void
37491 rs6000_split_logical_inner (rtx dest,
37492 rtx op1,
37493 rtx op2,
37494 enum rtx_code code,
37495 machine_mode mode,
37496 bool complement_final_p,
37497 bool complement_op1_p,
37498 bool complement_op2_p)
37500 rtx bool_rtx;
37502 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
37503 if (op2 && GET_CODE (op2) == CONST_INT
37504 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
37505 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37507 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
37508 HOST_WIDE_INT value = INTVAL (op2) & mask;
37510 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
37511 if (code == AND)
37513 if (value == 0)
37515 emit_insn (gen_rtx_SET (dest, const0_rtx));
37516 return;
37519 else if (value == mask)
37521 if (!rtx_equal_p (dest, op1))
37522 emit_insn (gen_rtx_SET (dest, op1));
37523 return;
37527 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
37528 into separate ORI/ORIS or XORI/XORIS instrucitons. */
37529 else if (code == IOR || code == XOR)
37531 if (value == 0)
37533 if (!rtx_equal_p (dest, op1))
37534 emit_insn (gen_rtx_SET (dest, op1));
37535 return;
37540 if (code == AND && mode == SImode
37541 && !complement_final_p && !complement_op1_p && !complement_op2_p)
37543 emit_insn (gen_andsi3 (dest, op1, op2));
37544 return;
37547 if (complement_op1_p)
37548 op1 = gen_rtx_NOT (mode, op1);
37550 if (complement_op2_p)
37551 op2 = gen_rtx_NOT (mode, op2);
37553 /* For canonical RTL, if only one arm is inverted it is the first. */
37554 if (!complement_op1_p && complement_op2_p)
37555 std::swap (op1, op2);
37557 bool_rtx = ((code == NOT)
37558 ? gen_rtx_NOT (mode, op1)
37559 : gen_rtx_fmt_ee (code, mode, op1, op2));
37561 if (complement_final_p)
37562 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
37564 emit_insn (gen_rtx_SET (dest, bool_rtx));
37567 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
37568 operations are split immediately during RTL generation to allow for more
37569 optimizations of the AND/IOR/XOR.
37571 OPERANDS is an array containing the destination and two input operands.
37572 CODE is the base operation (AND, IOR, XOR, NOT).
37573 MODE is the machine mode.
37574 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37575 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37576 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
37577 CLOBBER_REG is either NULL or a scratch register of type CC to allow
37578 formation of the AND instructions. */
37580 static void
37581 rs6000_split_logical_di (rtx operands[3],
37582 enum rtx_code code,
37583 bool complement_final_p,
37584 bool complement_op1_p,
37585 bool complement_op2_p)
37587 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
37588 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
37589 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
37590 enum hi_lo { hi = 0, lo = 1 };
37591 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
37592 size_t i;
37594 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
37595 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
37596 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
37597 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
37599 if (code == NOT)
37600 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
37601 else
37603 if (GET_CODE (operands[2]) != CONST_INT)
37605 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
37606 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
37608 else
37610 HOST_WIDE_INT value = INTVAL (operands[2]);
37611 HOST_WIDE_INT value_hi_lo[2];
37613 gcc_assert (!complement_final_p);
37614 gcc_assert (!complement_op1_p);
37615 gcc_assert (!complement_op2_p);
37617 value_hi_lo[hi] = value >> 32;
37618 value_hi_lo[lo] = value & lower_32bits;
37620 for (i = 0; i < 2; i++)
37622 HOST_WIDE_INT sub_value = value_hi_lo[i];
37624 if (sub_value & sign_bit)
37625 sub_value |= upper_32bits;
37627 op2_hi_lo[i] = GEN_INT (sub_value);
37629 /* If this is an AND instruction, check to see if we need to load
37630 the value in a register. */
37631 if (code == AND && sub_value != -1 && sub_value != 0
37632 && !and_operand (op2_hi_lo[i], SImode))
37633 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
37638 for (i = 0; i < 2; i++)
37640 /* Split large IOR/XOR operations. */
37641 if ((code == IOR || code == XOR)
37642 && GET_CODE (op2_hi_lo[i]) == CONST_INT
37643 && !complement_final_p
37644 && !complement_op1_p
37645 && !complement_op2_p
37646 && !logical_const_operand (op2_hi_lo[i], SImode))
37648 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
37649 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
37650 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
37651 rtx tmp = gen_reg_rtx (SImode);
37653 /* Make sure the constant is sign extended. */
37654 if ((hi_16bits & sign_bit) != 0)
37655 hi_16bits |= upper_32bits;
37657 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
37658 code, SImode, false, false, false);
37660 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
37661 code, SImode, false, false, false);
37663 else
37664 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
37665 code, SImode, complement_final_p,
37666 complement_op1_p, complement_op2_p);
37669 return;
37672 /* Split the insns that make up boolean operations operating on multiple GPR
37673 registers. The boolean MD patterns ensure that the inputs either are
37674 exactly the same as the output registers, or there is no overlap.
37676 OPERANDS is an array containing the destination and two input operands.
37677 CODE is the base operation (AND, IOR, XOR, NOT).
37678 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
37679 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
37680 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
37682 void
37683 rs6000_split_logical (rtx operands[3],
37684 enum rtx_code code,
37685 bool complement_final_p,
37686 bool complement_op1_p,
37687 bool complement_op2_p)
37689 machine_mode mode = GET_MODE (operands[0]);
37690 machine_mode sub_mode;
37691 rtx op0, op1, op2;
37692 int sub_size, regno0, regno1, nregs, i;
37694 /* If this is DImode, use the specialized version that can run before
37695 register allocation. */
37696 if (mode == DImode && !TARGET_POWERPC64)
37698 rs6000_split_logical_di (operands, code, complement_final_p,
37699 complement_op1_p, complement_op2_p);
37700 return;
37703 op0 = operands[0];
37704 op1 = operands[1];
37705 op2 = (code == NOT) ? NULL_RTX : operands[2];
37706 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
37707 sub_size = GET_MODE_SIZE (sub_mode);
37708 regno0 = REGNO (op0);
37709 regno1 = REGNO (op1);
37711 gcc_assert (reload_completed);
37712 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
37713 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
37715 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
37716 gcc_assert (nregs > 1);
37718 if (op2 && REG_P (op2))
37719 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
37721 for (i = 0; i < nregs; i++)
37723 int offset = i * sub_size;
37724 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
37725 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
37726 rtx sub_op2 = ((code == NOT)
37727 ? NULL_RTX
37728 : simplify_subreg (sub_mode, op2, mode, offset));
37730 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
37731 complement_final_p, complement_op1_p,
37732 complement_op2_p);
37735 return;
37739 /* Return true if the peephole2 can combine a load involving a combination of
37740 an addis instruction and a load with an offset that can be fused together on
37741 a power8. */
37743 bool
37744 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
37745 rtx addis_value, /* addis value. */
37746 rtx target, /* target register that is loaded. */
37747 rtx mem) /* bottom part of the memory addr. */
37749 rtx addr;
37750 rtx base_reg;
37752 /* Validate arguments. */
37753 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
37754 return false;
37756 if (!base_reg_operand (target, GET_MODE (target)))
37757 return false;
37759 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
37760 return false;
37762 /* Allow sign/zero extension. */
37763 if (GET_CODE (mem) == ZERO_EXTEND
37764 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
37765 mem = XEXP (mem, 0);
37767 if (!MEM_P (mem))
37768 return false;
37770 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
37771 return false;
37773 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
37774 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
37775 return false;
37777 /* Validate that the register used to load the high value is either the
37778 register being loaded, or we can safely replace its use.
37780 This function is only called from the peephole2 pass and we assume that
37781 there are 2 instructions in the peephole (addis and load), so we want to
37782 check if the target register was not used in the memory address and the
37783 register to hold the addis result is dead after the peephole. */
37784 if (REGNO (addis_reg) != REGNO (target))
37786 if (reg_mentioned_p (target, mem))
37787 return false;
37789 if (!peep2_reg_dead_p (2, addis_reg))
37790 return false;
37792 /* If the target register being loaded is the stack pointer, we must
37793 avoid loading any other value into it, even temporarily. */
37794 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
37795 return false;
37798 base_reg = XEXP (addr, 0);
37799 return REGNO (addis_reg) == REGNO (base_reg);
37802 /* During the peephole2 pass, adjust and expand the insns for a load fusion
37803 sequence. We adjust the addis register to use the target register. If the
37804 load sign extends, we adjust the code to do the zero extending load, and an
37805 explicit sign extension later since the fusion only covers zero extending
37806 loads.
37808 The operands are:
37809 operands[0] register set with addis (to be replaced with target)
37810 operands[1] value set via addis
37811 operands[2] target register being loaded
37812 operands[3] D-form memory reference using operands[0]. */
37814 void
37815 expand_fusion_gpr_load (rtx *operands)
37817 rtx addis_value = operands[1];
37818 rtx target = operands[2];
37819 rtx orig_mem = operands[3];
37820 rtx new_addr, new_mem, orig_addr, offset;
37821 enum rtx_code plus_or_lo_sum;
37822 machine_mode target_mode = GET_MODE (target);
37823 machine_mode extend_mode = target_mode;
37824 machine_mode ptr_mode = Pmode;
37825 enum rtx_code extend = UNKNOWN;
37827 if (GET_CODE (orig_mem) == ZERO_EXTEND
37828 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
37830 extend = GET_CODE (orig_mem);
37831 orig_mem = XEXP (orig_mem, 0);
37832 target_mode = GET_MODE (orig_mem);
37835 gcc_assert (MEM_P (orig_mem));
37837 orig_addr = XEXP (orig_mem, 0);
37838 plus_or_lo_sum = GET_CODE (orig_addr);
37839 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
37841 offset = XEXP (orig_addr, 1);
37842 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
37843 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
37845 if (extend != UNKNOWN)
37846 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
37848 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
37849 UNSPEC_FUSION_GPR);
37850 emit_insn (gen_rtx_SET (target, new_mem));
37852 if (extend == SIGN_EXTEND)
37854 int sub_off = ((BYTES_BIG_ENDIAN)
37855 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
37856 : 0);
37857 rtx sign_reg
37858 = simplify_subreg (target_mode, target, extend_mode, sub_off);
37860 emit_insn (gen_rtx_SET (target,
37861 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
37864 return;
37867 /* Emit the addis instruction that will be part of a fused instruction
37868 sequence. */
37870 void
37871 emit_fusion_addis (rtx target, rtx addis_value)
37873 rtx fuse_ops[10];
37874 const char *addis_str = NULL;
37876 /* Emit the addis instruction. */
37877 fuse_ops[0] = target;
37878 if (satisfies_constraint_L (addis_value))
37880 fuse_ops[1] = addis_value;
37881 addis_str = "lis %0,%v1";
37884 else if (GET_CODE (addis_value) == PLUS)
37886 rtx op0 = XEXP (addis_value, 0);
37887 rtx op1 = XEXP (addis_value, 1);
37889 if (REG_P (op0) && CONST_INT_P (op1)
37890 && satisfies_constraint_L (op1))
37892 fuse_ops[1] = op0;
37893 fuse_ops[2] = op1;
37894 addis_str = "addis %0,%1,%v2";
37898 else if (GET_CODE (addis_value) == HIGH)
37900 rtx value = XEXP (addis_value, 0);
37901 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
37903 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
37904 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
37905 if (TARGET_ELF)
37906 addis_str = "addis %0,%2,%1@toc@ha";
37908 else if (TARGET_XCOFF)
37909 addis_str = "addis %0,%1@u(%2)";
37911 else
37912 gcc_unreachable ();
37915 else if (GET_CODE (value) == PLUS)
37917 rtx op0 = XEXP (value, 0);
37918 rtx op1 = XEXP (value, 1);
37920 if (GET_CODE (op0) == UNSPEC
37921 && XINT (op0, 1) == UNSPEC_TOCREL
37922 && CONST_INT_P (op1))
37924 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
37925 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
37926 fuse_ops[3] = op1;
37927 if (TARGET_ELF)
37928 addis_str = "addis %0,%2,%1+%3@toc@ha";
37930 else if (TARGET_XCOFF)
37931 addis_str = "addis %0,%1+%3@u(%2)";
37933 else
37934 gcc_unreachable ();
37938 else if (satisfies_constraint_L (value))
37940 fuse_ops[1] = value;
37941 addis_str = "lis %0,%v1";
37944 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
37946 fuse_ops[1] = value;
37947 addis_str = "lis %0,%1@ha";
37951 if (!addis_str)
37952 fatal_insn ("Could not generate addis value for fusion", addis_value);
37954 output_asm_insn (addis_str, fuse_ops);
37957 /* Emit a D-form load or store instruction that is the second instruction
37958 of a fusion sequence. */
37960 void
37961 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
37962 const char *insn_str)
37964 rtx fuse_ops[10];
37965 char insn_template[80];
37967 fuse_ops[0] = load_store_reg;
37968 fuse_ops[1] = addis_reg;
37970 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
37972 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
37973 fuse_ops[2] = offset;
37974 output_asm_insn (insn_template, fuse_ops);
37977 else if (GET_CODE (offset) == UNSPEC
37978 && XINT (offset, 1) == UNSPEC_TOCREL)
37980 if (TARGET_ELF)
37981 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
37983 else if (TARGET_XCOFF)
37984 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
37986 else
37987 gcc_unreachable ();
37989 fuse_ops[2] = XVECEXP (offset, 0, 0);
37990 output_asm_insn (insn_template, fuse_ops);
37993 else if (GET_CODE (offset) == PLUS
37994 && GET_CODE (XEXP (offset, 0)) == UNSPEC
37995 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
37996 && CONST_INT_P (XEXP (offset, 1)))
37998 rtx tocrel_unspec = XEXP (offset, 0);
37999 if (TARGET_ELF)
38000 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
38002 else if (TARGET_XCOFF)
38003 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
38005 else
38006 gcc_unreachable ();
38008 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
38009 fuse_ops[3] = XEXP (offset, 1);
38010 output_asm_insn (insn_template, fuse_ops);
38013 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
38015 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38017 fuse_ops[2] = offset;
38018 output_asm_insn (insn_template, fuse_ops);
38021 else
38022 fatal_insn ("Unable to generate load/store offset for fusion", offset);
38024 return;
38027 /* Wrap a TOC address that can be fused to indicate that special fusion
38028 processing is needed. */
38031 fusion_wrap_memory_address (rtx old_mem)
38033 rtx old_addr = XEXP (old_mem, 0);
38034 rtvec v = gen_rtvec (1, old_addr);
38035 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
38036 return replace_equiv_address_nv (old_mem, new_addr, false);
38039 /* Given an address, convert it into the addis and load offset parts. Addresses
38040 created during the peephole2 process look like:
38041 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
38042 (unspec [(...)] UNSPEC_TOCREL))
38044 Addresses created via toc fusion look like:
38045 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
38047 static void
38048 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
38050 rtx hi, lo;
38052 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
38054 lo = XVECEXP (addr, 0, 0);
38055 hi = gen_rtx_HIGH (Pmode, lo);
38057 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
38059 hi = XEXP (addr, 0);
38060 lo = XEXP (addr, 1);
38062 else
38063 gcc_unreachable ();
38065 *p_hi = hi;
38066 *p_lo = lo;
38069 /* Return a string to fuse an addis instruction with a gpr load to the same
38070 register that we loaded up the addis instruction. The address that is used
38071 is the logical address that was formed during peephole2:
38072 (lo_sum (high) (low-part))
38074 Or the address is the TOC address that is wrapped before register allocation:
38075 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
38077 The code is complicated, so we call output_asm_insn directly, and just
38078 return "". */
38080 const char *
38081 emit_fusion_gpr_load (rtx target, rtx mem)
38083 rtx addis_value;
38084 rtx addr;
38085 rtx load_offset;
38086 const char *load_str = NULL;
38087 machine_mode mode;
38089 if (GET_CODE (mem) == ZERO_EXTEND)
38090 mem = XEXP (mem, 0);
38092 gcc_assert (REG_P (target) && MEM_P (mem));
38094 addr = XEXP (mem, 0);
38095 fusion_split_address (addr, &addis_value, &load_offset);
38097 /* Now emit the load instruction to the same register. */
38098 mode = GET_MODE (mem);
38099 switch (mode)
38101 case E_QImode:
38102 load_str = "lbz";
38103 break;
38105 case E_HImode:
38106 load_str = "lhz";
38107 break;
38109 case E_SImode:
38110 case E_SFmode:
38111 load_str = "lwz";
38112 break;
38114 case E_DImode:
38115 case E_DFmode:
38116 gcc_assert (TARGET_POWERPC64);
38117 load_str = "ld";
38118 break;
38120 default:
38121 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
38124 /* Emit the addis instruction. */
38125 emit_fusion_addis (target, addis_value);
38127 /* Emit the D-form load instruction. */
38128 emit_fusion_load_store (target, target, load_offset, load_str);
38130 return "";
38134 /* Return true if the peephole2 can combine a load/store involving a
38135 combination of an addis instruction and the memory operation. This was
38136 added to the ISA 3.0 (power9) hardware. */
38138 bool
38139 fusion_p9_p (rtx addis_reg, /* register set via addis. */
38140 rtx addis_value, /* addis value. */
38141 rtx dest, /* destination (memory or register). */
38142 rtx src) /* source (register or memory). */
38144 rtx addr, mem, offset;
38145 machine_mode mode = GET_MODE (src);
38147 /* Validate arguments. */
38148 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38149 return false;
38151 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38152 return false;
38154 /* Ignore extend operations that are part of the load. */
38155 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
38156 src = XEXP (src, 0);
38158 /* Test for memory<-register or register<-memory. */
38159 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
38161 if (!MEM_P (dest))
38162 return false;
38164 mem = dest;
38167 else if (MEM_P (src))
38169 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
38170 return false;
38172 mem = src;
38175 else
38176 return false;
38178 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38179 if (GET_CODE (addr) == PLUS)
38181 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38182 return false;
38184 return satisfies_constraint_I (XEXP (addr, 1));
38187 else if (GET_CODE (addr) == LO_SUM)
38189 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38190 return false;
38192 offset = XEXP (addr, 1);
38193 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
38194 return small_toc_ref (offset, GET_MODE (offset));
38196 else if (TARGET_ELF && !TARGET_POWERPC64)
38197 return CONSTANT_P (offset);
38200 return false;
38203 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38204 load sequence.
38206 The operands are:
38207 operands[0] register set with addis
38208 operands[1] value set via addis
38209 operands[2] target register being loaded
38210 operands[3] D-form memory reference using operands[0].
38212 This is similar to the fusion introduced with power8, except it scales to
38213 both loads/stores and does not require the result register to be the same as
38214 the base register. At the moment, we only do this if register set with addis
38215 is dead. */
38217 void
38218 expand_fusion_p9_load (rtx *operands)
38220 rtx tmp_reg = operands[0];
38221 rtx addis_value = operands[1];
38222 rtx target = operands[2];
38223 rtx orig_mem = operands[3];
38224 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
38225 enum rtx_code plus_or_lo_sum;
38226 machine_mode target_mode = GET_MODE (target);
38227 machine_mode extend_mode = target_mode;
38228 machine_mode ptr_mode = Pmode;
38229 enum rtx_code extend = UNKNOWN;
38231 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
38233 extend = GET_CODE (orig_mem);
38234 orig_mem = XEXP (orig_mem, 0);
38235 target_mode = GET_MODE (orig_mem);
38238 gcc_assert (MEM_P (orig_mem));
38240 orig_addr = XEXP (orig_mem, 0);
38241 plus_or_lo_sum = GET_CODE (orig_addr);
38242 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38244 offset = XEXP (orig_addr, 1);
38245 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38246 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38248 if (extend != UNKNOWN)
38249 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
38251 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38252 UNSPEC_FUSION_P9);
38254 set = gen_rtx_SET (target, new_mem);
38255 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38256 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38257 emit_insn (insn);
38259 return;
38262 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38263 store sequence.
38265 The operands are:
38266 operands[0] register set with addis
38267 operands[1] value set via addis
38268 operands[2] target D-form memory being stored to
38269 operands[3] register being stored
38271 This is similar to the fusion introduced with power8, except it scales to
38272 both loads/stores and does not require the result register to be the same as
38273 the base register. At the moment, we only do this if register set with addis
38274 is dead. */
38276 void
38277 expand_fusion_p9_store (rtx *operands)
38279 rtx tmp_reg = operands[0];
38280 rtx addis_value = operands[1];
38281 rtx orig_mem = operands[2];
38282 rtx src = operands[3];
38283 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
38284 enum rtx_code plus_or_lo_sum;
38285 machine_mode target_mode = GET_MODE (orig_mem);
38286 machine_mode ptr_mode = Pmode;
38288 gcc_assert (MEM_P (orig_mem));
38290 orig_addr = XEXP (orig_mem, 0);
38291 plus_or_lo_sum = GET_CODE (orig_addr);
38292 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38294 offset = XEXP (orig_addr, 1);
38295 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38296 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38298 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
38299 UNSPEC_FUSION_P9);
38301 set = gen_rtx_SET (new_mem, new_src);
38302 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
38303 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
38304 emit_insn (insn);
38306 return;
38309 /* Return a string to fuse an addis instruction with a load using extended
38310 fusion. The address that is used is the logical address that was formed
38311 during peephole2: (lo_sum (high) (low-part))
38313 The code is complicated, so we call output_asm_insn directly, and just
38314 return "". */
38316 const char *
38317 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
38319 machine_mode mode = GET_MODE (reg);
38320 rtx hi;
38321 rtx lo;
38322 rtx addr;
38323 const char *load_string;
38324 int r;
38326 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
38328 mem = XEXP (mem, 0);
38329 mode = GET_MODE (mem);
38332 if (GET_CODE (reg) == SUBREG)
38334 gcc_assert (SUBREG_BYTE (reg) == 0);
38335 reg = SUBREG_REG (reg);
38338 if (!REG_P (reg))
38339 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
38341 r = REGNO (reg);
38342 if (FP_REGNO_P (r))
38344 if (mode == SFmode)
38345 load_string = "lfs";
38346 else if (mode == DFmode || mode == DImode)
38347 load_string = "lfd";
38348 else
38349 gcc_unreachable ();
38351 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_VECTOR)
38353 if (mode == SFmode)
38354 load_string = "lxssp";
38355 else if (mode == DFmode || mode == DImode)
38356 load_string = "lxsd";
38357 else
38358 gcc_unreachable ();
38360 else if (INT_REGNO_P (r))
38362 switch (mode)
38364 case E_QImode:
38365 load_string = "lbz";
38366 break;
38367 case E_HImode:
38368 load_string = "lhz";
38369 break;
38370 case E_SImode:
38371 case E_SFmode:
38372 load_string = "lwz";
38373 break;
38374 case E_DImode:
38375 case E_DFmode:
38376 if (!TARGET_POWERPC64)
38377 gcc_unreachable ();
38378 load_string = "ld";
38379 break;
38380 default:
38381 gcc_unreachable ();
38384 else
38385 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
38387 if (!MEM_P (mem))
38388 fatal_insn ("emit_fusion_p9_load not MEM", mem);
38390 addr = XEXP (mem, 0);
38391 fusion_split_address (addr, &hi, &lo);
38393 /* Emit the addis instruction. */
38394 emit_fusion_addis (tmp_reg, hi);
38396 /* Emit the D-form load instruction. */
38397 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
38399 return "";
38402 /* Return a string to fuse an addis instruction with a store using extended
38403 fusion. The address that is used is the logical address that was formed
38404 during peephole2: (lo_sum (high) (low-part))
38406 The code is complicated, so we call output_asm_insn directly, and just
38407 return "". */
38409 const char *
38410 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
38412 machine_mode mode = GET_MODE (reg);
38413 rtx hi;
38414 rtx lo;
38415 rtx addr;
38416 const char *store_string;
38417 int r;
38419 if (GET_CODE (reg) == SUBREG)
38421 gcc_assert (SUBREG_BYTE (reg) == 0);
38422 reg = SUBREG_REG (reg);
38425 if (!REG_P (reg))
38426 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
38428 r = REGNO (reg);
38429 if (FP_REGNO_P (r))
38431 if (mode == SFmode)
38432 store_string = "stfs";
38433 else if (mode == DFmode)
38434 store_string = "stfd";
38435 else
38436 gcc_unreachable ();
38438 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_VECTOR)
38440 if (mode == SFmode)
38441 store_string = "stxssp";
38442 else if (mode == DFmode || mode == DImode)
38443 store_string = "stxsd";
38444 else
38445 gcc_unreachable ();
38447 else if (INT_REGNO_P (r))
38449 switch (mode)
38451 case E_QImode:
38452 store_string = "stb";
38453 break;
38454 case E_HImode:
38455 store_string = "sth";
38456 break;
38457 case E_SImode:
38458 case E_SFmode:
38459 store_string = "stw";
38460 break;
38461 case E_DImode:
38462 case E_DFmode:
38463 if (!TARGET_POWERPC64)
38464 gcc_unreachable ();
38465 store_string = "std";
38466 break;
38467 default:
38468 gcc_unreachable ();
38471 else
38472 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
38474 if (!MEM_P (mem))
38475 fatal_insn ("emit_fusion_p9_store not MEM", mem);
38477 addr = XEXP (mem, 0);
38478 fusion_split_address (addr, &hi, &lo);
38480 /* Emit the addis instruction. */
38481 emit_fusion_addis (tmp_reg, hi);
38483 /* Emit the D-form load instruction. */
38484 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
38486 return "";
38489 #ifdef RS6000_GLIBC_ATOMIC_FENV
38490 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
38491 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
38492 #endif
38494 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
38496 static void
38497 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
38499 if (!TARGET_HARD_FLOAT)
38501 #ifdef RS6000_GLIBC_ATOMIC_FENV
38502 if (atomic_hold_decl == NULL_TREE)
38504 atomic_hold_decl
38505 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38506 get_identifier ("__atomic_feholdexcept"),
38507 build_function_type_list (void_type_node,
38508 double_ptr_type_node,
38509 NULL_TREE));
38510 TREE_PUBLIC (atomic_hold_decl) = 1;
38511 DECL_EXTERNAL (atomic_hold_decl) = 1;
38514 if (atomic_clear_decl == NULL_TREE)
38516 atomic_clear_decl
38517 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38518 get_identifier ("__atomic_feclearexcept"),
38519 build_function_type_list (void_type_node,
38520 NULL_TREE));
38521 TREE_PUBLIC (atomic_clear_decl) = 1;
38522 DECL_EXTERNAL (atomic_clear_decl) = 1;
38525 tree const_double = build_qualified_type (double_type_node,
38526 TYPE_QUAL_CONST);
38527 tree const_double_ptr = build_pointer_type (const_double);
38528 if (atomic_update_decl == NULL_TREE)
38530 atomic_update_decl
38531 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
38532 get_identifier ("__atomic_feupdateenv"),
38533 build_function_type_list (void_type_node,
38534 const_double_ptr,
38535 NULL_TREE));
38536 TREE_PUBLIC (atomic_update_decl) = 1;
38537 DECL_EXTERNAL (atomic_update_decl) = 1;
38540 tree fenv_var = create_tmp_var_raw (double_type_node);
38541 TREE_ADDRESSABLE (fenv_var) = 1;
38542 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
38544 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
38545 *clear = build_call_expr (atomic_clear_decl, 0);
38546 *update = build_call_expr (atomic_update_decl, 1,
38547 fold_convert (const_double_ptr, fenv_addr));
38548 #endif
38549 return;
38552 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
38553 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
38554 tree call_mffs = build_call_expr (mffs, 0);
38556 /* Generates the equivalent of feholdexcept (&fenv_var)
38558 *fenv_var = __builtin_mffs ();
38559 double fenv_hold;
38560 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
38561 __builtin_mtfsf (0xff, fenv_hold); */
38563 /* Mask to clear everything except for the rounding modes and non-IEEE
38564 arithmetic flag. */
38565 const unsigned HOST_WIDE_INT hold_exception_mask =
38566 HOST_WIDE_INT_C (0xffffffff00000007);
38568 tree fenv_var = create_tmp_var_raw (double_type_node);
38570 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
38572 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
38573 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38574 build_int_cst (uint64_type_node,
38575 hold_exception_mask));
38577 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38578 fenv_llu_and);
38580 tree hold_mtfsf = build_call_expr (mtfsf, 2,
38581 build_int_cst (unsigned_type_node, 0xff),
38582 fenv_hold_mtfsf);
38584 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
38586 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
38588 double fenv_clear = __builtin_mffs ();
38589 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
38590 __builtin_mtfsf (0xff, fenv_clear); */
38592 /* Mask to clear everything except for the rounding modes and non-IEEE
38593 arithmetic flag. */
38594 const unsigned HOST_WIDE_INT clear_exception_mask =
38595 HOST_WIDE_INT_C (0xffffffff00000000);
38597 tree fenv_clear = create_tmp_var_raw (double_type_node);
38599 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
38601 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
38602 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
38603 fenv_clean_llu,
38604 build_int_cst (uint64_type_node,
38605 clear_exception_mask));
38607 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38608 fenv_clear_llu_and);
38610 tree clear_mtfsf = build_call_expr (mtfsf, 2,
38611 build_int_cst (unsigned_type_node, 0xff),
38612 fenv_clear_mtfsf);
38614 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
38616 /* Generates the equivalent of feupdateenv (&fenv_var)
38618 double old_fenv = __builtin_mffs ();
38619 double fenv_update;
38620 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
38621 (*(uint64_t*)fenv_var 0x1ff80fff);
38622 __builtin_mtfsf (0xff, fenv_update); */
38624 const unsigned HOST_WIDE_INT update_exception_mask =
38625 HOST_WIDE_INT_C (0xffffffff1fffff00);
38626 const unsigned HOST_WIDE_INT new_exception_mask =
38627 HOST_WIDE_INT_C (0x1ff80fff);
38629 tree old_fenv = create_tmp_var_raw (double_type_node);
38630 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
38632 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
38633 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
38634 build_int_cst (uint64_type_node,
38635 update_exception_mask));
38637 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
38638 build_int_cst (uint64_type_node,
38639 new_exception_mask));
38641 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
38642 old_llu_and, new_llu_and);
38644 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
38645 new_llu_mask);
38647 tree update_mtfsf = build_call_expr (mtfsf, 2,
38648 build_int_cst (unsigned_type_node, 0xff),
38649 fenv_update_mtfsf);
38651 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
38654 void
38655 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
38657 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
38659 rtx_tmp0 = gen_reg_rtx (V2DFmode);
38660 rtx_tmp1 = gen_reg_rtx (V2DFmode);
38662 /* The destination of the vmrgew instruction layout is:
38663 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
38664 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
38665 vmrgew instruction will be correct. */
38666 if (BYTES_BIG_ENDIAN)
38668 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
38669 GEN_INT (0)));
38670 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
38671 GEN_INT (3)));
38673 else
38675 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
38676 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
38679 rtx_tmp2 = gen_reg_rtx (V4SFmode);
38680 rtx_tmp3 = gen_reg_rtx (V4SFmode);
38682 emit_insn (gen_vsx_xvcdpsp (rtx_tmp2, rtx_tmp0));
38683 emit_insn (gen_vsx_xvcdpsp (rtx_tmp3, rtx_tmp1));
38685 if (BYTES_BIG_ENDIAN)
38686 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
38687 else
38688 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
38691 void
38692 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
38694 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
38696 rtx_tmp0 = gen_reg_rtx (V2DImode);
38697 rtx_tmp1 = gen_reg_rtx (V2DImode);
38699 /* The destination of the vmrgew instruction layout is:
38700 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
38701 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
38702 vmrgew instruction will be correct. */
38703 if (BYTES_BIG_ENDIAN)
38705 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
38706 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
38708 else
38710 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
38711 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
38714 rtx_tmp2 = gen_reg_rtx (V4SFmode);
38715 rtx_tmp3 = gen_reg_rtx (V4SFmode);
38717 if (signed_convert)
38719 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
38720 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
38722 else
38724 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
38725 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
38728 if (BYTES_BIG_ENDIAN)
38729 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
38730 else
38731 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
38734 void
38735 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
38736 rtx src2)
38738 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
38740 rtx_tmp0 = gen_reg_rtx (V2DFmode);
38741 rtx_tmp1 = gen_reg_rtx (V2DFmode);
38743 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
38744 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
38746 rtx_tmp2 = gen_reg_rtx (V4SImode);
38747 rtx_tmp3 = gen_reg_rtx (V4SImode);
38749 if (signed_convert)
38751 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
38752 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
38754 else
38756 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
38757 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
38760 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
38763 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
38765 static bool
38766 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
38767 optimization_type opt_type)
38769 switch (op)
38771 case rsqrt_optab:
38772 return (opt_type == OPTIMIZE_FOR_SPEED
38773 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
38775 default:
38776 return true;
38780 /* Implement TARGET_CONSTANT_ALIGNMENT. */
38782 static HOST_WIDE_INT
38783 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
38785 if (TREE_CODE (exp) == STRING_CST
38786 && (STRICT_ALIGNMENT || !optimize_size))
38787 return MAX (align, BITS_PER_WORD);
38788 return align;
38791 /* Implement TARGET_STARTING_FRAME_OFFSET. */
38793 static HOST_WIDE_INT
38794 rs6000_starting_frame_offset (void)
38796 if (FRAME_GROWS_DOWNWARD)
38797 return 0;
38798 return RS6000_STARTING_FRAME_OFFSET;
38802 /* Create an alias for a mangled name where we have changed the mangling (in
38803 GCC 8.1, we used U10__float128, and now we use u9__ieee128). This is called
38804 via the target hook TARGET_ASM_GLOBALIZE_DECL_NAME. */
38806 #if TARGET_ELF && RS6000_WEAK
38807 static void
38808 rs6000_globalize_decl_name (FILE * stream, tree decl)
38810 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
38812 targetm.asm_out.globalize_label (stream, name);
38814 if (rs6000_passes_ieee128 && name[0] == '_' && name[1] == 'Z')
38816 tree save_asm_name = DECL_ASSEMBLER_NAME (decl);
38817 const char *old_name;
38819 ieee128_mangling_gcc_8_1 = true;
38820 lang_hooks.set_decl_assembler_name (decl);
38821 old_name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
38822 SET_DECL_ASSEMBLER_NAME (decl, save_asm_name);
38823 ieee128_mangling_gcc_8_1 = false;
38825 if (strcmp (name, old_name) != 0)
38827 fprintf (stream, "\t.weak %s\n", old_name);
38828 fprintf (stream, "\t.set %s,%s\n", old_name, name);
38832 #endif
38835 struct gcc_target targetm = TARGET_INITIALIZER;
38837 #include "gt-rs6000.h"