PR c/81544 - attribute noreturn and warn_unused_result on the same function accepted
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob155ea6e9d991048899173049a126904ea98f4c8f
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "memmodel.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "cfgloop.h"
31 #include "df.h"
32 #include "tm_p.h"
33 #include "stringpool.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "ira.h"
38 #include "recog.h"
39 #include "cgraph.h"
40 #include "diagnostic-core.h"
41 #include "insn-attr.h"
42 #include "flags.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "attribs.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "explow.h"
51 #include "expr.h"
52 #include "output.h"
53 #include "dbxout.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "sched-int.h"
58 #include "gimplify.h"
59 #include "gimple-fold.h"
60 #include "gimple-iterator.h"
61 #include "gimple-ssa.h"
62 #include "gimple-walk.h"
63 #include "intl.h"
64 #include "params.h"
65 #include "tm-constrs.h"
66 #include "tree-vectorizer.h"
67 #include "target-globals.h"
68 #include "builtins.h"
69 #include "context.h"
70 #include "tree-pass.h"
71 #include "except.h"
72 #if TARGET_XCOFF
73 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
74 #endif
75 #if TARGET_MACHO
76 #include "gstab.h" /* for N_SLINE */
77 #endif
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
80 #include "tree-ssa-propagate.h"
82 /* This file should be included last. */
83 #include "target-def.h"
85 #ifndef TARGET_NO_PROTOTYPE
86 #define TARGET_NO_PROTOTYPE 0
87 #endif
89 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
90 systems will also set long double to be IEEE 128-bit. AIX and Darwin
91 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
92 those systems will not pick up this default. This needs to be after all
93 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
94 properly defined. */
95 #ifndef TARGET_IEEEQUAD_DEFAULT
96 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
97 #define TARGET_IEEEQUAD_DEFAULT 1
98 #else
99 #define TARGET_IEEEQUAD_DEFAULT 0
100 #endif
101 #endif
103 #define min(A,B) ((A) < (B) ? (A) : (B))
104 #define max(A,B) ((A) > (B) ? (A) : (B))
106 static pad_direction rs6000_function_arg_padding (machine_mode, const_tree);
108 /* Structure used to define the rs6000 stack */
109 typedef struct rs6000_stack {
110 int reload_completed; /* stack info won't change from here on */
111 int first_gp_reg_save; /* first callee saved GP register used */
112 int first_fp_reg_save; /* first callee saved FP register used */
113 int first_altivec_reg_save; /* first callee saved AltiVec register used */
114 int lr_save_p; /* true if the link reg needs to be saved */
115 int cr_save_p; /* true if the CR reg needs to be saved */
116 unsigned int vrsave_mask; /* mask of vec registers to save */
117 int push_p; /* true if we need to allocate stack space */
118 int calls_p; /* true if the function makes any calls */
119 int world_save_p; /* true if we're saving *everything*:
120 r13-r31, cr, f14-f31, vrsave, v20-v31 */
121 enum rs6000_abi abi; /* which ABI to use */
122 int gp_save_offset; /* offset to save GP regs from initial SP */
123 int fp_save_offset; /* offset to save FP regs from initial SP */
124 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
125 int lr_save_offset; /* offset to save LR from initial SP */
126 int cr_save_offset; /* offset to save CR from initial SP */
127 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
128 int varargs_save_offset; /* offset to save the varargs registers */
129 int ehrd_offset; /* offset to EH return data */
130 int ehcr_offset; /* offset to EH CR field data */
131 int reg_size; /* register size (4 or 8) */
132 HOST_WIDE_INT vars_size; /* variable save area size */
133 int parm_size; /* outgoing parameter size */
134 int save_size; /* save area size */
135 int fixed_size; /* fixed size of stack frame */
136 int gp_size; /* size of saved GP registers */
137 int fp_size; /* size of saved FP registers */
138 int altivec_size; /* size of saved AltiVec registers */
139 int cr_size; /* size to hold CR if not in fixed area */
140 int vrsave_size; /* size to hold VRSAVE */
141 int altivec_padding_size; /* size of altivec alignment padding */
142 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
143 int savres_strategy;
144 } rs6000_stack_t;
146 /* A C structure for machine-specific, per-function data.
147 This is added to the cfun structure. */
148 typedef struct GTY(()) machine_function
150 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
151 int ra_needs_full_frame;
152 /* Flags if __builtin_return_address (0) was used. */
153 int ra_need_lr;
154 /* Cache lr_save_p after expansion of builtin_eh_return. */
155 int lr_save_state;
156 /* Whether we need to save the TOC to the reserved stack location in the
157 function prologue. */
158 bool save_toc_in_prologue;
159 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
160 varargs save area. */
161 HOST_WIDE_INT varargs_save_offset;
162 /* Alternative internal arg pointer for -fsplit-stack. */
163 rtx split_stack_arg_pointer;
164 bool split_stack_argp_used;
165 /* Flag if r2 setup is needed with ELFv2 ABI. */
166 bool r2_setup_needed;
167 /* The number of components we use for separate shrink-wrapping. */
168 int n_components;
169 /* The components already handled by separate shrink-wrapping, which should
170 not be considered by the prologue and epilogue. */
171 bool gpr_is_wrapped_separately[32];
172 bool fpr_is_wrapped_separately[32];
173 bool lr_is_wrapped_separately;
174 bool toc_is_wrapped_separately;
175 } machine_function;
177 /* Support targetm.vectorize.builtin_mask_for_load. */
178 static GTY(()) tree altivec_builtin_mask_for_load;
180 /* Set to nonzero once AIX common-mode calls have been defined. */
181 static GTY(()) int common_mode_defined;
183 /* Label number of label created for -mrelocatable, to call to so we can
184 get the address of the GOT section */
185 static int rs6000_pic_labelno;
187 #ifdef USING_ELFOS_H
188 /* Counter for labels which are to be placed in .fixup. */
189 int fixuplabelno = 0;
190 #endif
192 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
193 int dot_symbols;
195 /* Specify the machine mode that pointers have. After generation of rtl, the
196 compiler makes no further distinction between pointers and any other objects
197 of this machine mode. */
198 scalar_int_mode rs6000_pmode;
200 /* Width in bits of a pointer. */
201 unsigned rs6000_pointer_size;
203 #ifdef HAVE_AS_GNU_ATTRIBUTE
204 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
205 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
206 # endif
207 /* Flag whether floating point values have been passed/returned.
208 Note that this doesn't say whether fprs are used, since the
209 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
210 should be set for soft-float values passed in gprs and ieee128
211 values passed in vsx registers. */
212 static bool rs6000_passes_float;
213 static bool rs6000_passes_long_double;
214 /* Flag whether vector values have been passed/returned. */
215 static bool rs6000_passes_vector;
216 /* Flag whether small (<= 8 byte) structures have been returned. */
217 static bool rs6000_returns_struct;
218 #endif
220 /* Value is TRUE if register/mode pair is acceptable. */
221 static bool rs6000_hard_regno_mode_ok_p
222 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
224 /* Maximum number of registers needed for a given register class and mode. */
225 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
227 /* How many registers are needed for a given register and mode. */
228 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
230 /* Map register number to register class. */
231 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
233 static int dbg_cost_ctrl;
235 /* Built in types. */
236 tree rs6000_builtin_types[RS6000_BTI_MAX];
237 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
239 /* Flag to say the TOC is initialized */
240 int toc_initialized, need_toc_init;
241 char toc_label_name[10];
243 /* Cached value of rs6000_variable_issue. This is cached in
244 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
245 static short cached_can_issue_more;
247 static GTY(()) section *read_only_data_section;
248 static GTY(()) section *private_data_section;
249 static GTY(()) section *tls_data_section;
250 static GTY(()) section *tls_private_data_section;
251 static GTY(()) section *read_only_private_data_section;
252 static GTY(()) section *sdata2_section;
253 static GTY(()) section *toc_section;
255 struct builtin_description
257 const HOST_WIDE_INT mask;
258 const enum insn_code icode;
259 const char *const name;
260 const enum rs6000_builtins code;
263 /* Describe the vector unit used for modes. */
264 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
265 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
267 /* Register classes for various constraints that are based on the target
268 switches. */
269 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
271 /* Describe the alignment of a vector. */
272 int rs6000_vector_align[NUM_MACHINE_MODES];
274 /* Map selected modes to types for builtins. */
275 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
277 /* What modes to automatically generate reciprocal divide estimate (fre) and
278 reciprocal sqrt (frsqrte) for. */
279 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
281 /* Masks to determine which reciprocal esitmate instructions to generate
282 automatically. */
283 enum rs6000_recip_mask {
284 RECIP_SF_DIV = 0x001, /* Use divide estimate */
285 RECIP_DF_DIV = 0x002,
286 RECIP_V4SF_DIV = 0x004,
287 RECIP_V2DF_DIV = 0x008,
289 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
290 RECIP_DF_RSQRT = 0x020,
291 RECIP_V4SF_RSQRT = 0x040,
292 RECIP_V2DF_RSQRT = 0x080,
294 /* Various combination of flags for -mrecip=xxx. */
295 RECIP_NONE = 0,
296 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
297 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
298 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
300 RECIP_HIGH_PRECISION = RECIP_ALL,
302 /* On low precision machines like the power5, don't enable double precision
303 reciprocal square root estimate, since it isn't accurate enough. */
304 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
307 /* -mrecip options. */
308 static struct
310 const char *string; /* option name */
311 unsigned int mask; /* mask bits to set */
312 } recip_options[] = {
313 { "all", RECIP_ALL },
314 { "none", RECIP_NONE },
315 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
316 | RECIP_V2DF_DIV) },
317 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
318 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
319 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
320 | RECIP_V2DF_RSQRT) },
321 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
322 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
325 /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */
326 static const struct
328 const char *cpu;
329 unsigned int cpuid;
330 } cpu_is_info[] = {
331 { "power9", PPC_PLATFORM_POWER9 },
332 { "power8", PPC_PLATFORM_POWER8 },
333 { "power7", PPC_PLATFORM_POWER7 },
334 { "power6x", PPC_PLATFORM_POWER6X },
335 { "power6", PPC_PLATFORM_POWER6 },
336 { "power5+", PPC_PLATFORM_POWER5_PLUS },
337 { "power5", PPC_PLATFORM_POWER5 },
338 { "ppc970", PPC_PLATFORM_PPC970 },
339 { "power4", PPC_PLATFORM_POWER4 },
340 { "ppca2", PPC_PLATFORM_PPCA2 },
341 { "ppc476", PPC_PLATFORM_PPC476 },
342 { "ppc464", PPC_PLATFORM_PPC464 },
343 { "ppc440", PPC_PLATFORM_PPC440 },
344 { "ppc405", PPC_PLATFORM_PPC405 },
345 { "ppc-cell-be", PPC_PLATFORM_CELL_BE }
348 /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */
349 static const struct
351 const char *hwcap;
352 int mask;
353 unsigned int id;
354 } cpu_supports_info[] = {
355 /* AT_HWCAP masks. */
356 { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 },
357 { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 },
358 { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 },
359 { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 },
360 { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 },
361 { "booke", PPC_FEATURE_BOOKE, 0 },
362 { "cellbe", PPC_FEATURE_CELL_BE, 0 },
363 { "dfp", PPC_FEATURE_HAS_DFP, 0 },
364 { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 },
365 { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 },
366 { "fpu", PPC_FEATURE_HAS_FPU, 0 },
367 { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 },
368 { "mmu", PPC_FEATURE_HAS_MMU, 0 },
369 { "notb", PPC_FEATURE_NO_TB, 0 },
370 { "pa6t", PPC_FEATURE_PA6T, 0 },
371 { "power4", PPC_FEATURE_POWER4, 0 },
372 { "power5", PPC_FEATURE_POWER5, 0 },
373 { "power5+", PPC_FEATURE_POWER5_PLUS, 0 },
374 { "power6x", PPC_FEATURE_POWER6_EXT, 0 },
375 { "ppc32", PPC_FEATURE_32, 0 },
376 { "ppc601", PPC_FEATURE_601_INSTR, 0 },
377 { "ppc64", PPC_FEATURE_64, 0 },
378 { "ppcle", PPC_FEATURE_PPC_LE, 0 },
379 { "smt", PPC_FEATURE_SMT, 0 },
380 { "spe", PPC_FEATURE_HAS_SPE, 0 },
381 { "true_le", PPC_FEATURE_TRUE_LE, 0 },
382 { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 },
383 { "vsx", PPC_FEATURE_HAS_VSX, 0 },
385 /* AT_HWCAP2 masks. */
386 { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 },
387 { "dscr", PPC_FEATURE2_HAS_DSCR, 1 },
388 { "ebb", PPC_FEATURE2_HAS_EBB, 1 },
389 { "htm", PPC_FEATURE2_HAS_HTM, 1 },
390 { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 },
391 { "isel", PPC_FEATURE2_HAS_ISEL, 1 },
392 { "tar", PPC_FEATURE2_HAS_TAR, 1 },
393 { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 },
394 { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 },
395 { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 },
396 { "darn", PPC_FEATURE2_DARN, 1 },
397 { "scv", PPC_FEATURE2_SCV, 1 }
400 /* On PowerPC, we have a limited number of target clones that we care about
401 which means we can use an array to hold the options, rather than having more
402 elaborate data structures to identify each possible variation. Order the
403 clones from the default to the highest ISA. */
404 enum {
405 CLONE_DEFAULT = 0, /* default clone. */
406 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
407 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
408 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
409 CLONE_ISA_3_00, /* ISA 3.00 (power9). */
410 CLONE_MAX
413 /* Map compiler ISA bits into HWCAP names. */
414 struct clone_map {
415 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
416 const char *name; /* name to use in __builtin_cpu_supports. */
419 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
420 { 0, "" }, /* Default options. */
421 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
422 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
423 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
424 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.00 (power9). */
428 /* Newer LIBCs explicitly export this symbol to declare that they provide
429 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
430 reference to this symbol whenever we expand a CPU builtin, so that
431 we never link against an old LIBC. */
432 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
434 /* True if we have expanded a CPU builtin. */
435 bool cpu_builtin_p;
437 /* Pointer to function (in rs6000-c.c) that can define or undefine target
438 macros that have changed. Languages that don't support the preprocessor
439 don't link in rs6000-c.c, so we can't call it directly. */
440 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
442 /* Simplfy register classes into simpler classifications. We assume
443 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
444 check for standard register classes (gpr/floating/altivec/vsx) and
445 floating/vector classes (float/altivec/vsx). */
447 enum rs6000_reg_type {
448 NO_REG_TYPE,
449 PSEUDO_REG_TYPE,
450 GPR_REG_TYPE,
451 VSX_REG_TYPE,
452 ALTIVEC_REG_TYPE,
453 FPR_REG_TYPE,
454 SPR_REG_TYPE,
455 CR_REG_TYPE
458 /* Map register class to register type. */
459 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
461 /* First/last register type for the 'normal' register types (i.e. general
462 purpose, floating point, altivec, and VSX registers). */
463 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
465 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
468 /* Register classes we care about in secondary reload or go if legitimate
469 address. We only need to worry about GPR, FPR, and Altivec registers here,
470 along an ANY field that is the OR of the 3 register classes. */
472 enum rs6000_reload_reg_type {
473 RELOAD_REG_GPR, /* General purpose registers. */
474 RELOAD_REG_FPR, /* Traditional floating point regs. */
475 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
476 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
477 N_RELOAD_REG
480 /* For setting up register classes, loop through the 3 register classes mapping
481 into real registers, and skip the ANY class, which is just an OR of the
482 bits. */
483 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
484 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
486 /* Map reload register type to a register in the register class. */
487 struct reload_reg_map_type {
488 const char *name; /* Register class name. */
489 int reg; /* Register in the register class. */
492 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
493 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
494 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
495 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
496 { "Any", -1 }, /* RELOAD_REG_ANY. */
499 /* Mask bits for each register class, indexed per mode. Historically the
500 compiler has been more restrictive which types can do PRE_MODIFY instead of
501 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
502 typedef unsigned char addr_mask_type;
504 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
505 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
506 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
507 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
508 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
509 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
510 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
511 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
513 /* Register type masks based on the type, of valid addressing modes. */
514 struct rs6000_reg_addr {
515 enum insn_code reload_load; /* INSN to reload for loading. */
516 enum insn_code reload_store; /* INSN to reload for storing. */
517 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
518 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
519 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
520 enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */
521 /* INSNs for fusing addi with loads
522 or stores for each reg. class. */
523 enum insn_code fusion_addi_ld[(int)N_RELOAD_REG];
524 enum insn_code fusion_addi_st[(int)N_RELOAD_REG];
525 /* INSNs for fusing addis with loads
526 or stores for each reg. class. */
527 enum insn_code fusion_addis_ld[(int)N_RELOAD_REG];
528 enum insn_code fusion_addis_st[(int)N_RELOAD_REG];
529 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
530 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
531 bool fused_toc; /* Mode supports TOC fusion. */
534 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
536 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
537 static inline bool
538 mode_supports_pre_incdec_p (machine_mode mode)
540 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
541 != 0);
544 /* Helper function to say whether a mode supports PRE_MODIFY. */
545 static inline bool
546 mode_supports_pre_modify_p (machine_mode mode)
548 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
549 != 0);
552 /* Given that there exists at least one variable that is set (produced)
553 by OUT_INSN and read (consumed) by IN_INSN, return true iff
554 IN_INSN represents one or more memory store operations and none of
555 the variables set by OUT_INSN is used by IN_INSN as the address of a
556 store operation. If either IN_INSN or OUT_INSN does not represent
557 a "single" RTL SET expression (as loosely defined by the
558 implementation of the single_set function) or a PARALLEL with only
559 SETs, CLOBBERs, and USEs inside, this function returns false.
561 This rs6000-specific version of store_data_bypass_p checks for
562 certain conditions that result in assertion failures (and internal
563 compiler errors) in the generic store_data_bypass_p function and
564 returns false rather than calling store_data_bypass_p if one of the
565 problematic conditions is detected. */
568 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
570 rtx out_set, in_set;
571 rtx out_pat, in_pat;
572 rtx out_exp, in_exp;
573 int i, j;
575 in_set = single_set (in_insn);
576 if (in_set)
578 if (MEM_P (SET_DEST (in_set)))
580 out_set = single_set (out_insn);
581 if (!out_set)
583 out_pat = PATTERN (out_insn);
584 if (GET_CODE (out_pat) == PARALLEL)
586 for (i = 0; i < XVECLEN (out_pat, 0); i++)
588 out_exp = XVECEXP (out_pat, 0, i);
589 if ((GET_CODE (out_exp) == CLOBBER)
590 || (GET_CODE (out_exp) == USE))
591 continue;
592 else if (GET_CODE (out_exp) != SET)
593 return false;
599 else
601 in_pat = PATTERN (in_insn);
602 if (GET_CODE (in_pat) != PARALLEL)
603 return false;
605 for (i = 0; i < XVECLEN (in_pat, 0); i++)
607 in_exp = XVECEXP (in_pat, 0, i);
608 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
609 continue;
610 else if (GET_CODE (in_exp) != SET)
611 return false;
613 if (MEM_P (SET_DEST (in_exp)))
615 out_set = single_set (out_insn);
616 if (!out_set)
618 out_pat = PATTERN (out_insn);
619 if (GET_CODE (out_pat) != PARALLEL)
620 return false;
621 for (j = 0; j < XVECLEN (out_pat, 0); j++)
623 out_exp = XVECEXP (out_pat, 0, j);
624 if ((GET_CODE (out_exp) == CLOBBER)
625 || (GET_CODE (out_exp) == USE))
626 continue;
627 else if (GET_CODE (out_exp) != SET)
628 return false;
634 return store_data_bypass_p (out_insn, in_insn);
637 /* Return true if we have D-form addressing in altivec registers. */
638 static inline bool
639 mode_supports_vmx_dform (machine_mode mode)
641 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
644 /* Return true if we have D-form addressing in VSX registers. This addressing
645 is more limited than normal d-form addressing in that the offset must be
646 aligned on a 16-byte boundary. */
647 static inline bool
648 mode_supports_vsx_dform_quad (machine_mode mode)
650 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
651 != 0);
655 /* Processor costs (relative to an add) */
657 const struct processor_costs *rs6000_cost;
659 /* Instruction size costs on 32bit processors. */
660 static const
661 struct processor_costs size32_cost = {
662 COSTS_N_INSNS (1), /* mulsi */
663 COSTS_N_INSNS (1), /* mulsi_const */
664 COSTS_N_INSNS (1), /* mulsi_const9 */
665 COSTS_N_INSNS (1), /* muldi */
666 COSTS_N_INSNS (1), /* divsi */
667 COSTS_N_INSNS (1), /* divdi */
668 COSTS_N_INSNS (1), /* fp */
669 COSTS_N_INSNS (1), /* dmul */
670 COSTS_N_INSNS (1), /* sdiv */
671 COSTS_N_INSNS (1), /* ddiv */
672 32, /* cache line size */
673 0, /* l1 cache */
674 0, /* l2 cache */
675 0, /* streams */
676 0, /* SF->DF convert */
679 /* Instruction size costs on 64bit processors. */
680 static const
681 struct processor_costs size64_cost = {
682 COSTS_N_INSNS (1), /* mulsi */
683 COSTS_N_INSNS (1), /* mulsi_const */
684 COSTS_N_INSNS (1), /* mulsi_const9 */
685 COSTS_N_INSNS (1), /* muldi */
686 COSTS_N_INSNS (1), /* divsi */
687 COSTS_N_INSNS (1), /* divdi */
688 COSTS_N_INSNS (1), /* fp */
689 COSTS_N_INSNS (1), /* dmul */
690 COSTS_N_INSNS (1), /* sdiv */
691 COSTS_N_INSNS (1), /* ddiv */
692 128, /* cache line size */
693 0, /* l1 cache */
694 0, /* l2 cache */
695 0, /* streams */
696 0, /* SF->DF convert */
699 /* Instruction costs on RS64A processors. */
700 static const
701 struct processor_costs rs64a_cost = {
702 COSTS_N_INSNS (20), /* mulsi */
703 COSTS_N_INSNS (12), /* mulsi_const */
704 COSTS_N_INSNS (8), /* mulsi_const9 */
705 COSTS_N_INSNS (34), /* muldi */
706 COSTS_N_INSNS (65), /* divsi */
707 COSTS_N_INSNS (67), /* divdi */
708 COSTS_N_INSNS (4), /* fp */
709 COSTS_N_INSNS (4), /* dmul */
710 COSTS_N_INSNS (31), /* sdiv */
711 COSTS_N_INSNS (31), /* ddiv */
712 128, /* cache line size */
713 128, /* l1 cache */
714 2048, /* l2 cache */
715 1, /* streams */
716 0, /* SF->DF convert */
719 /* Instruction costs on MPCCORE processors. */
720 static const
721 struct processor_costs mpccore_cost = {
722 COSTS_N_INSNS (2), /* mulsi */
723 COSTS_N_INSNS (2), /* mulsi_const */
724 COSTS_N_INSNS (2), /* mulsi_const9 */
725 COSTS_N_INSNS (2), /* muldi */
726 COSTS_N_INSNS (6), /* divsi */
727 COSTS_N_INSNS (6), /* divdi */
728 COSTS_N_INSNS (4), /* fp */
729 COSTS_N_INSNS (5), /* dmul */
730 COSTS_N_INSNS (10), /* sdiv */
731 COSTS_N_INSNS (17), /* ddiv */
732 32, /* cache line size */
733 4, /* l1 cache */
734 16, /* l2 cache */
735 1, /* streams */
736 0, /* SF->DF convert */
739 /* Instruction costs on PPC403 processors. */
740 static const
741 struct processor_costs ppc403_cost = {
742 COSTS_N_INSNS (4), /* mulsi */
743 COSTS_N_INSNS (4), /* mulsi_const */
744 COSTS_N_INSNS (4), /* mulsi_const9 */
745 COSTS_N_INSNS (4), /* muldi */
746 COSTS_N_INSNS (33), /* divsi */
747 COSTS_N_INSNS (33), /* divdi */
748 COSTS_N_INSNS (11), /* fp */
749 COSTS_N_INSNS (11), /* dmul */
750 COSTS_N_INSNS (11), /* sdiv */
751 COSTS_N_INSNS (11), /* ddiv */
752 32, /* cache line size */
753 4, /* l1 cache */
754 16, /* l2 cache */
755 1, /* streams */
756 0, /* SF->DF convert */
759 /* Instruction costs on PPC405 processors. */
760 static const
761 struct processor_costs ppc405_cost = {
762 COSTS_N_INSNS (5), /* mulsi */
763 COSTS_N_INSNS (4), /* mulsi_const */
764 COSTS_N_INSNS (3), /* mulsi_const9 */
765 COSTS_N_INSNS (5), /* muldi */
766 COSTS_N_INSNS (35), /* divsi */
767 COSTS_N_INSNS (35), /* divdi */
768 COSTS_N_INSNS (11), /* fp */
769 COSTS_N_INSNS (11), /* dmul */
770 COSTS_N_INSNS (11), /* sdiv */
771 COSTS_N_INSNS (11), /* ddiv */
772 32, /* cache line size */
773 16, /* l1 cache */
774 128, /* l2 cache */
775 1, /* streams */
776 0, /* SF->DF convert */
779 /* Instruction costs on PPC440 processors. */
780 static const
781 struct processor_costs ppc440_cost = {
782 COSTS_N_INSNS (3), /* mulsi */
783 COSTS_N_INSNS (2), /* mulsi_const */
784 COSTS_N_INSNS (2), /* mulsi_const9 */
785 COSTS_N_INSNS (3), /* muldi */
786 COSTS_N_INSNS (34), /* divsi */
787 COSTS_N_INSNS (34), /* divdi */
788 COSTS_N_INSNS (5), /* fp */
789 COSTS_N_INSNS (5), /* dmul */
790 COSTS_N_INSNS (19), /* sdiv */
791 COSTS_N_INSNS (33), /* ddiv */
792 32, /* cache line size */
793 32, /* l1 cache */
794 256, /* l2 cache */
795 1, /* streams */
796 0, /* SF->DF convert */
799 /* Instruction costs on PPC476 processors. */
800 static const
801 struct processor_costs ppc476_cost = {
802 COSTS_N_INSNS (4), /* mulsi */
803 COSTS_N_INSNS (4), /* mulsi_const */
804 COSTS_N_INSNS (4), /* mulsi_const9 */
805 COSTS_N_INSNS (4), /* muldi */
806 COSTS_N_INSNS (11), /* divsi */
807 COSTS_N_INSNS (11), /* divdi */
808 COSTS_N_INSNS (6), /* fp */
809 COSTS_N_INSNS (6), /* dmul */
810 COSTS_N_INSNS (19), /* sdiv */
811 COSTS_N_INSNS (33), /* ddiv */
812 32, /* l1 cache line size */
813 32, /* l1 cache */
814 512, /* l2 cache */
815 1, /* streams */
816 0, /* SF->DF convert */
819 /* Instruction costs on PPC601 processors. */
820 static const
821 struct processor_costs ppc601_cost = {
822 COSTS_N_INSNS (5), /* mulsi */
823 COSTS_N_INSNS (5), /* mulsi_const */
824 COSTS_N_INSNS (5), /* mulsi_const9 */
825 COSTS_N_INSNS (5), /* muldi */
826 COSTS_N_INSNS (36), /* divsi */
827 COSTS_N_INSNS (36), /* divdi */
828 COSTS_N_INSNS (4), /* fp */
829 COSTS_N_INSNS (5), /* dmul */
830 COSTS_N_INSNS (17), /* sdiv */
831 COSTS_N_INSNS (31), /* ddiv */
832 32, /* cache line size */
833 32, /* l1 cache */
834 256, /* l2 cache */
835 1, /* streams */
836 0, /* SF->DF convert */
839 /* Instruction costs on PPC603 processors. */
840 static const
841 struct processor_costs ppc603_cost = {
842 COSTS_N_INSNS (5), /* mulsi */
843 COSTS_N_INSNS (3), /* mulsi_const */
844 COSTS_N_INSNS (2), /* mulsi_const9 */
845 COSTS_N_INSNS (5), /* muldi */
846 COSTS_N_INSNS (37), /* divsi */
847 COSTS_N_INSNS (37), /* divdi */
848 COSTS_N_INSNS (3), /* fp */
849 COSTS_N_INSNS (4), /* dmul */
850 COSTS_N_INSNS (18), /* sdiv */
851 COSTS_N_INSNS (33), /* ddiv */
852 32, /* cache line size */
853 8, /* l1 cache */
854 64, /* l2 cache */
855 1, /* streams */
856 0, /* SF->DF convert */
859 /* Instruction costs on PPC604 processors. */
860 static const
861 struct processor_costs ppc604_cost = {
862 COSTS_N_INSNS (4), /* mulsi */
863 COSTS_N_INSNS (4), /* mulsi_const */
864 COSTS_N_INSNS (4), /* mulsi_const9 */
865 COSTS_N_INSNS (4), /* muldi */
866 COSTS_N_INSNS (20), /* divsi */
867 COSTS_N_INSNS (20), /* divdi */
868 COSTS_N_INSNS (3), /* fp */
869 COSTS_N_INSNS (3), /* dmul */
870 COSTS_N_INSNS (18), /* sdiv */
871 COSTS_N_INSNS (32), /* ddiv */
872 32, /* cache line size */
873 16, /* l1 cache */
874 512, /* l2 cache */
875 1, /* streams */
876 0, /* SF->DF convert */
879 /* Instruction costs on PPC604e processors. */
880 static const
881 struct processor_costs ppc604e_cost = {
882 COSTS_N_INSNS (2), /* mulsi */
883 COSTS_N_INSNS (2), /* mulsi_const */
884 COSTS_N_INSNS (2), /* mulsi_const9 */
885 COSTS_N_INSNS (2), /* muldi */
886 COSTS_N_INSNS (20), /* divsi */
887 COSTS_N_INSNS (20), /* divdi */
888 COSTS_N_INSNS (3), /* fp */
889 COSTS_N_INSNS (3), /* dmul */
890 COSTS_N_INSNS (18), /* sdiv */
891 COSTS_N_INSNS (32), /* ddiv */
892 32, /* cache line size */
893 32, /* l1 cache */
894 1024, /* l2 cache */
895 1, /* streams */
896 0, /* SF->DF convert */
899 /* Instruction costs on PPC620 processors. */
900 static const
901 struct processor_costs ppc620_cost = {
902 COSTS_N_INSNS (5), /* mulsi */
903 COSTS_N_INSNS (4), /* mulsi_const */
904 COSTS_N_INSNS (3), /* mulsi_const9 */
905 COSTS_N_INSNS (7), /* muldi */
906 COSTS_N_INSNS (21), /* divsi */
907 COSTS_N_INSNS (37), /* divdi */
908 COSTS_N_INSNS (3), /* fp */
909 COSTS_N_INSNS (3), /* dmul */
910 COSTS_N_INSNS (18), /* sdiv */
911 COSTS_N_INSNS (32), /* ddiv */
912 128, /* cache line size */
913 32, /* l1 cache */
914 1024, /* l2 cache */
915 1, /* streams */
916 0, /* SF->DF convert */
919 /* Instruction costs on PPC630 processors. */
920 static const
921 struct processor_costs ppc630_cost = {
922 COSTS_N_INSNS (5), /* mulsi */
923 COSTS_N_INSNS (4), /* mulsi_const */
924 COSTS_N_INSNS (3), /* mulsi_const9 */
925 COSTS_N_INSNS (7), /* muldi */
926 COSTS_N_INSNS (21), /* divsi */
927 COSTS_N_INSNS (37), /* divdi */
928 COSTS_N_INSNS (3), /* fp */
929 COSTS_N_INSNS (3), /* dmul */
930 COSTS_N_INSNS (17), /* sdiv */
931 COSTS_N_INSNS (21), /* ddiv */
932 128, /* cache line size */
933 64, /* l1 cache */
934 1024, /* l2 cache */
935 1, /* streams */
936 0, /* SF->DF convert */
939 /* Instruction costs on Cell processor. */
940 /* COSTS_N_INSNS (1) ~ one add. */
941 static const
942 struct processor_costs ppccell_cost = {
943 COSTS_N_INSNS (9/2)+2, /* mulsi */
944 COSTS_N_INSNS (6/2), /* mulsi_const */
945 COSTS_N_INSNS (6/2), /* mulsi_const9 */
946 COSTS_N_INSNS (15/2)+2, /* muldi */
947 COSTS_N_INSNS (38/2), /* divsi */
948 COSTS_N_INSNS (70/2), /* divdi */
949 COSTS_N_INSNS (10/2), /* fp */
950 COSTS_N_INSNS (10/2), /* dmul */
951 COSTS_N_INSNS (74/2), /* sdiv */
952 COSTS_N_INSNS (74/2), /* ddiv */
953 128, /* cache line size */
954 32, /* l1 cache */
955 512, /* l2 cache */
956 6, /* streams */
957 0, /* SF->DF convert */
960 /* Instruction costs on PPC750 and PPC7400 processors. */
961 static const
962 struct processor_costs ppc750_cost = {
963 COSTS_N_INSNS (5), /* mulsi */
964 COSTS_N_INSNS (3), /* mulsi_const */
965 COSTS_N_INSNS (2), /* mulsi_const9 */
966 COSTS_N_INSNS (5), /* muldi */
967 COSTS_N_INSNS (17), /* divsi */
968 COSTS_N_INSNS (17), /* divdi */
969 COSTS_N_INSNS (3), /* fp */
970 COSTS_N_INSNS (3), /* dmul */
971 COSTS_N_INSNS (17), /* sdiv */
972 COSTS_N_INSNS (31), /* ddiv */
973 32, /* cache line size */
974 32, /* l1 cache */
975 512, /* l2 cache */
976 1, /* streams */
977 0, /* SF->DF convert */
980 /* Instruction costs on PPC7450 processors. */
981 static const
982 struct processor_costs ppc7450_cost = {
983 COSTS_N_INSNS (4), /* mulsi */
984 COSTS_N_INSNS (3), /* mulsi_const */
985 COSTS_N_INSNS (3), /* mulsi_const9 */
986 COSTS_N_INSNS (4), /* muldi */
987 COSTS_N_INSNS (23), /* divsi */
988 COSTS_N_INSNS (23), /* divdi */
989 COSTS_N_INSNS (5), /* fp */
990 COSTS_N_INSNS (5), /* dmul */
991 COSTS_N_INSNS (21), /* sdiv */
992 COSTS_N_INSNS (35), /* ddiv */
993 32, /* cache line size */
994 32, /* l1 cache */
995 1024, /* l2 cache */
996 1, /* streams */
997 0, /* SF->DF convert */
1000 /* Instruction costs on PPC8540 processors. */
1001 static const
1002 struct processor_costs ppc8540_cost = {
1003 COSTS_N_INSNS (4), /* mulsi */
1004 COSTS_N_INSNS (4), /* mulsi_const */
1005 COSTS_N_INSNS (4), /* mulsi_const9 */
1006 COSTS_N_INSNS (4), /* muldi */
1007 COSTS_N_INSNS (19), /* divsi */
1008 COSTS_N_INSNS (19), /* divdi */
1009 COSTS_N_INSNS (4), /* fp */
1010 COSTS_N_INSNS (4), /* dmul */
1011 COSTS_N_INSNS (29), /* sdiv */
1012 COSTS_N_INSNS (29), /* ddiv */
1013 32, /* cache line size */
1014 32, /* l1 cache */
1015 256, /* l2 cache */
1016 1, /* prefetch streams /*/
1017 0, /* SF->DF convert */
1020 /* Instruction costs on E300C2 and E300C3 cores. */
1021 static const
1022 struct processor_costs ppce300c2c3_cost = {
1023 COSTS_N_INSNS (4), /* mulsi */
1024 COSTS_N_INSNS (4), /* mulsi_const */
1025 COSTS_N_INSNS (4), /* mulsi_const9 */
1026 COSTS_N_INSNS (4), /* muldi */
1027 COSTS_N_INSNS (19), /* divsi */
1028 COSTS_N_INSNS (19), /* divdi */
1029 COSTS_N_INSNS (3), /* fp */
1030 COSTS_N_INSNS (4), /* dmul */
1031 COSTS_N_INSNS (18), /* sdiv */
1032 COSTS_N_INSNS (33), /* ddiv */
1034 16, /* l1 cache */
1035 16, /* l2 cache */
1036 1, /* prefetch streams /*/
1037 0, /* SF->DF convert */
1040 /* Instruction costs on PPCE500MC processors. */
1041 static const
1042 struct processor_costs ppce500mc_cost = {
1043 COSTS_N_INSNS (4), /* mulsi */
1044 COSTS_N_INSNS (4), /* mulsi_const */
1045 COSTS_N_INSNS (4), /* mulsi_const9 */
1046 COSTS_N_INSNS (4), /* muldi */
1047 COSTS_N_INSNS (14), /* divsi */
1048 COSTS_N_INSNS (14), /* divdi */
1049 COSTS_N_INSNS (8), /* fp */
1050 COSTS_N_INSNS (10), /* dmul */
1051 COSTS_N_INSNS (36), /* sdiv */
1052 COSTS_N_INSNS (66), /* ddiv */
1053 64, /* cache line size */
1054 32, /* l1 cache */
1055 128, /* l2 cache */
1056 1, /* prefetch streams /*/
1057 0, /* SF->DF convert */
1060 /* Instruction costs on PPCE500MC64 processors. */
1061 static const
1062 struct processor_costs ppce500mc64_cost = {
1063 COSTS_N_INSNS (4), /* mulsi */
1064 COSTS_N_INSNS (4), /* mulsi_const */
1065 COSTS_N_INSNS (4), /* mulsi_const9 */
1066 COSTS_N_INSNS (4), /* muldi */
1067 COSTS_N_INSNS (14), /* divsi */
1068 COSTS_N_INSNS (14), /* divdi */
1069 COSTS_N_INSNS (4), /* fp */
1070 COSTS_N_INSNS (10), /* dmul */
1071 COSTS_N_INSNS (36), /* sdiv */
1072 COSTS_N_INSNS (66), /* ddiv */
1073 64, /* cache line size */
1074 32, /* l1 cache */
1075 128, /* l2 cache */
1076 1, /* prefetch streams /*/
1077 0, /* SF->DF convert */
1080 /* Instruction costs on PPCE5500 processors. */
1081 static const
1082 struct processor_costs ppce5500_cost = {
1083 COSTS_N_INSNS (5), /* mulsi */
1084 COSTS_N_INSNS (5), /* mulsi_const */
1085 COSTS_N_INSNS (4), /* mulsi_const9 */
1086 COSTS_N_INSNS (5), /* muldi */
1087 COSTS_N_INSNS (14), /* divsi */
1088 COSTS_N_INSNS (14), /* divdi */
1089 COSTS_N_INSNS (7), /* fp */
1090 COSTS_N_INSNS (10), /* dmul */
1091 COSTS_N_INSNS (36), /* sdiv */
1092 COSTS_N_INSNS (66), /* ddiv */
1093 64, /* cache line size */
1094 32, /* l1 cache */
1095 128, /* l2 cache */
1096 1, /* prefetch streams /*/
1097 0, /* SF->DF convert */
1100 /* Instruction costs on PPCE6500 processors. */
1101 static const
1102 struct processor_costs ppce6500_cost = {
1103 COSTS_N_INSNS (5), /* mulsi */
1104 COSTS_N_INSNS (5), /* mulsi_const */
1105 COSTS_N_INSNS (4), /* mulsi_const9 */
1106 COSTS_N_INSNS (5), /* muldi */
1107 COSTS_N_INSNS (14), /* divsi */
1108 COSTS_N_INSNS (14), /* divdi */
1109 COSTS_N_INSNS (7), /* fp */
1110 COSTS_N_INSNS (10), /* dmul */
1111 COSTS_N_INSNS (36), /* sdiv */
1112 COSTS_N_INSNS (66), /* ddiv */
1113 64, /* cache line size */
1114 32, /* l1 cache */
1115 128, /* l2 cache */
1116 1, /* prefetch streams /*/
1117 0, /* SF->DF convert */
1120 /* Instruction costs on AppliedMicro Titan processors. */
1121 static const
1122 struct processor_costs titan_cost = {
1123 COSTS_N_INSNS (5), /* mulsi */
1124 COSTS_N_INSNS (5), /* mulsi_const */
1125 COSTS_N_INSNS (5), /* mulsi_const9 */
1126 COSTS_N_INSNS (5), /* muldi */
1127 COSTS_N_INSNS (18), /* divsi */
1128 COSTS_N_INSNS (18), /* divdi */
1129 COSTS_N_INSNS (10), /* fp */
1130 COSTS_N_INSNS (10), /* dmul */
1131 COSTS_N_INSNS (46), /* sdiv */
1132 COSTS_N_INSNS (72), /* ddiv */
1133 32, /* cache line size */
1134 32, /* l1 cache */
1135 512, /* l2 cache */
1136 1, /* prefetch streams /*/
1137 0, /* SF->DF convert */
1140 /* Instruction costs on POWER4 and POWER5 processors. */
1141 static const
1142 struct processor_costs power4_cost = {
1143 COSTS_N_INSNS (3), /* mulsi */
1144 COSTS_N_INSNS (2), /* mulsi_const */
1145 COSTS_N_INSNS (2), /* mulsi_const9 */
1146 COSTS_N_INSNS (4), /* muldi */
1147 COSTS_N_INSNS (18), /* divsi */
1148 COSTS_N_INSNS (34), /* divdi */
1149 COSTS_N_INSNS (3), /* fp */
1150 COSTS_N_INSNS (3), /* dmul */
1151 COSTS_N_INSNS (17), /* sdiv */
1152 COSTS_N_INSNS (17), /* ddiv */
1153 128, /* cache line size */
1154 32, /* l1 cache */
1155 1024, /* l2 cache */
1156 8, /* prefetch streams /*/
1157 0, /* SF->DF convert */
1160 /* Instruction costs on POWER6 processors. */
1161 static const
1162 struct processor_costs power6_cost = {
1163 COSTS_N_INSNS (8), /* mulsi */
1164 COSTS_N_INSNS (8), /* mulsi_const */
1165 COSTS_N_INSNS (8), /* mulsi_const9 */
1166 COSTS_N_INSNS (8), /* muldi */
1167 COSTS_N_INSNS (22), /* divsi */
1168 COSTS_N_INSNS (28), /* divdi */
1169 COSTS_N_INSNS (3), /* fp */
1170 COSTS_N_INSNS (3), /* dmul */
1171 COSTS_N_INSNS (13), /* sdiv */
1172 COSTS_N_INSNS (16), /* ddiv */
1173 128, /* cache line size */
1174 64, /* l1 cache */
1175 2048, /* l2 cache */
1176 16, /* prefetch streams */
1177 0, /* SF->DF convert */
1180 /* Instruction costs on POWER7 processors. */
1181 static const
1182 struct processor_costs power7_cost = {
1183 COSTS_N_INSNS (2), /* mulsi */
1184 COSTS_N_INSNS (2), /* mulsi_const */
1185 COSTS_N_INSNS (2), /* mulsi_const9 */
1186 COSTS_N_INSNS (2), /* muldi */
1187 COSTS_N_INSNS (18), /* divsi */
1188 COSTS_N_INSNS (34), /* divdi */
1189 COSTS_N_INSNS (3), /* fp */
1190 COSTS_N_INSNS (3), /* dmul */
1191 COSTS_N_INSNS (13), /* sdiv */
1192 COSTS_N_INSNS (16), /* ddiv */
1193 128, /* cache line size */
1194 32, /* l1 cache */
1195 256, /* l2 cache */
1196 12, /* prefetch streams */
1197 COSTS_N_INSNS (3), /* SF->DF convert */
1200 /* Instruction costs on POWER8 processors. */
1201 static const
1202 struct processor_costs power8_cost = {
1203 COSTS_N_INSNS (3), /* mulsi */
1204 COSTS_N_INSNS (3), /* mulsi_const */
1205 COSTS_N_INSNS (3), /* mulsi_const9 */
1206 COSTS_N_INSNS (3), /* muldi */
1207 COSTS_N_INSNS (19), /* divsi */
1208 COSTS_N_INSNS (35), /* divdi */
1209 COSTS_N_INSNS (3), /* fp */
1210 COSTS_N_INSNS (3), /* dmul */
1211 COSTS_N_INSNS (14), /* sdiv */
1212 COSTS_N_INSNS (17), /* ddiv */
1213 128, /* cache line size */
1214 32, /* l1 cache */
1215 256, /* l2 cache */
1216 12, /* prefetch streams */
1217 COSTS_N_INSNS (3), /* SF->DF convert */
1220 /* Instruction costs on POWER9 processors. */
1221 static const
1222 struct processor_costs power9_cost = {
1223 COSTS_N_INSNS (3), /* mulsi */
1224 COSTS_N_INSNS (3), /* mulsi_const */
1225 COSTS_N_INSNS (3), /* mulsi_const9 */
1226 COSTS_N_INSNS (3), /* muldi */
1227 COSTS_N_INSNS (8), /* divsi */
1228 COSTS_N_INSNS (12), /* divdi */
1229 COSTS_N_INSNS (3), /* fp */
1230 COSTS_N_INSNS (3), /* dmul */
1231 COSTS_N_INSNS (13), /* sdiv */
1232 COSTS_N_INSNS (18), /* ddiv */
1233 128, /* cache line size */
1234 32, /* l1 cache */
1235 512, /* l2 cache */
1236 8, /* prefetch streams */
1237 COSTS_N_INSNS (3), /* SF->DF convert */
1240 /* Instruction costs on POWER A2 processors. */
1241 static const
1242 struct processor_costs ppca2_cost = {
1243 COSTS_N_INSNS (16), /* mulsi */
1244 COSTS_N_INSNS (16), /* mulsi_const */
1245 COSTS_N_INSNS (16), /* mulsi_const9 */
1246 COSTS_N_INSNS (16), /* muldi */
1247 COSTS_N_INSNS (22), /* divsi */
1248 COSTS_N_INSNS (28), /* divdi */
1249 COSTS_N_INSNS (3), /* fp */
1250 COSTS_N_INSNS (3), /* dmul */
1251 COSTS_N_INSNS (59), /* sdiv */
1252 COSTS_N_INSNS (72), /* ddiv */
1254 16, /* l1 cache */
1255 2048, /* l2 cache */
1256 16, /* prefetch streams */
1257 0, /* SF->DF convert */
1261 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1262 #undef RS6000_BUILTIN_0
1263 #undef RS6000_BUILTIN_1
1264 #undef RS6000_BUILTIN_2
1265 #undef RS6000_BUILTIN_3
1266 #undef RS6000_BUILTIN_A
1267 #undef RS6000_BUILTIN_D
1268 #undef RS6000_BUILTIN_H
1269 #undef RS6000_BUILTIN_P
1270 #undef RS6000_BUILTIN_Q
1271 #undef RS6000_BUILTIN_X
1273 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
1274 { NAME, ICODE, MASK, ATTR },
1276 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1277 { NAME, ICODE, MASK, ATTR },
1279 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1280 { NAME, ICODE, MASK, ATTR },
1282 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1283 { NAME, ICODE, MASK, ATTR },
1285 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1286 { NAME, ICODE, MASK, ATTR },
1288 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1289 { NAME, ICODE, MASK, ATTR },
1291 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1292 { NAME, ICODE, MASK, ATTR },
1294 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1295 { NAME, ICODE, MASK, ATTR },
1297 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1298 { NAME, ICODE, MASK, ATTR },
1300 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1301 { NAME, ICODE, MASK, ATTR },
1303 struct rs6000_builtin_info_type {
1304 const char *name;
1305 const enum insn_code icode;
1306 const HOST_WIDE_INT mask;
1307 const unsigned attr;
1310 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1312 #include "rs6000-builtin.def"
1315 #undef RS6000_BUILTIN_0
1316 #undef RS6000_BUILTIN_1
1317 #undef RS6000_BUILTIN_2
1318 #undef RS6000_BUILTIN_3
1319 #undef RS6000_BUILTIN_A
1320 #undef RS6000_BUILTIN_D
1321 #undef RS6000_BUILTIN_H
1322 #undef RS6000_BUILTIN_P
1323 #undef RS6000_BUILTIN_Q
1324 #undef RS6000_BUILTIN_X
1326 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1327 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1330 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1331 static struct machine_function * rs6000_init_machine_status (void);
1332 static int rs6000_ra_ever_killed (void);
1333 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1334 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1335 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1336 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1337 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1338 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1339 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1340 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1341 bool);
1342 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1343 unsigned int);
1344 static bool is_microcoded_insn (rtx_insn *);
1345 static bool is_nonpipeline_insn (rtx_insn *);
1346 static bool is_cracked_insn (rtx_insn *);
1347 static bool is_load_insn (rtx, rtx *);
1348 static bool is_store_insn (rtx, rtx *);
1349 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1350 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1351 static bool insn_must_be_first_in_group (rtx_insn *);
1352 static bool insn_must_be_last_in_group (rtx_insn *);
1353 static void altivec_init_builtins (void);
1354 static tree builtin_function_type (machine_mode, machine_mode,
1355 machine_mode, machine_mode,
1356 enum rs6000_builtins, const char *name);
1357 static void rs6000_common_init_builtins (void);
1358 static void paired_init_builtins (void);
1359 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1360 static void htm_init_builtins (void);
1361 static rs6000_stack_t *rs6000_stack_info (void);
1362 static void is_altivec_return_reg (rtx, void *);
1363 int easy_vector_constant (rtx, machine_mode);
1364 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1365 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1366 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1367 bool, bool);
1368 #if TARGET_MACHO
1369 static void macho_branch_islands (void);
1370 #endif
1371 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1372 int, int *);
1373 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1374 int, int, int *);
1375 static bool rs6000_mode_dependent_address (const_rtx);
1376 static bool rs6000_debug_mode_dependent_address (const_rtx);
1377 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1378 machine_mode, rtx);
1379 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1380 machine_mode,
1381 rtx);
1382 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1383 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1384 enum reg_class);
1385 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1386 reg_class_t,
1387 reg_class_t);
1388 static bool rs6000_debug_can_change_mode_class (machine_mode,
1389 machine_mode,
1390 reg_class_t);
1391 static bool rs6000_save_toc_in_prologue_p (void);
1392 static rtx rs6000_internal_arg_pointer (void);
1394 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1395 int, int *)
1396 = rs6000_legitimize_reload_address;
1398 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1399 = rs6000_mode_dependent_address;
1401 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1402 machine_mode, rtx)
1403 = rs6000_secondary_reload_class;
1405 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1406 = rs6000_preferred_reload_class;
1408 const int INSN_NOT_AVAILABLE = -1;
1410 static void rs6000_print_isa_options (FILE *, int, const char *,
1411 HOST_WIDE_INT);
1412 static void rs6000_print_builtin_options (FILE *, int, const char *,
1413 HOST_WIDE_INT);
1414 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1416 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1417 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1418 enum rs6000_reg_type,
1419 machine_mode,
1420 secondary_reload_info *,
1421 bool);
1422 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1423 static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused));
1424 static tree rs6000_fold_builtin (tree, int, tree *, bool);
1426 /* Hash table stuff for keeping track of TOC entries. */
1428 struct GTY((for_user)) toc_hash_struct
1430 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1431 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1432 rtx key;
1433 machine_mode key_mode;
1434 int labelno;
1437 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1439 static hashval_t hash (toc_hash_struct *);
1440 static bool equal (toc_hash_struct *, toc_hash_struct *);
1443 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1445 /* Hash table to keep track of the argument types for builtin functions. */
1447 struct GTY((for_user)) builtin_hash_struct
1449 tree type;
1450 machine_mode mode[4]; /* return value + 3 arguments. */
1451 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1454 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1456 static hashval_t hash (builtin_hash_struct *);
1457 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1460 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1463 /* Default register names. */
1464 char rs6000_reg_names[][8] =
1466 "0", "1", "2", "3", "4", "5", "6", "7",
1467 "8", "9", "10", "11", "12", "13", "14", "15",
1468 "16", "17", "18", "19", "20", "21", "22", "23",
1469 "24", "25", "26", "27", "28", "29", "30", "31",
1470 "0", "1", "2", "3", "4", "5", "6", "7",
1471 "8", "9", "10", "11", "12", "13", "14", "15",
1472 "16", "17", "18", "19", "20", "21", "22", "23",
1473 "24", "25", "26", "27", "28", "29", "30", "31",
1474 "mq", "lr", "ctr","ap",
1475 "0", "1", "2", "3", "4", "5", "6", "7",
1476 "ca",
1477 /* AltiVec registers. */
1478 "0", "1", "2", "3", "4", "5", "6", "7",
1479 "8", "9", "10", "11", "12", "13", "14", "15",
1480 "16", "17", "18", "19", "20", "21", "22", "23",
1481 "24", "25", "26", "27", "28", "29", "30", "31",
1482 "vrsave", "vscr",
1483 /* Soft frame pointer. */
1484 "sfp",
1485 /* HTM SPR registers. */
1486 "tfhar", "tfiar", "texasr"
1489 #ifdef TARGET_REGNAMES
1490 static const char alt_reg_names[][8] =
1492 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1493 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1494 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1495 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1496 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1497 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1498 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1499 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1500 "mq", "lr", "ctr", "ap",
1501 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1502 "ca",
1503 /* AltiVec registers. */
1504 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1505 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1506 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1507 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1508 "vrsave", "vscr",
1509 /* Soft frame pointer. */
1510 "sfp",
1511 /* HTM SPR registers. */
1512 "tfhar", "tfiar", "texasr"
1514 #endif
1516 /* Table of valid machine attributes. */
1518 static const struct attribute_spec rs6000_attribute_table[] =
1520 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1521 affects_type_identity, exclusions } */
1522 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1523 false, NULL },
1524 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1525 false, NULL },
1526 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1527 false, NULL },
1528 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1529 false, NULL },
1530 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1531 false, NULL },
1532 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1533 SUBTARGET_ATTRIBUTE_TABLE,
1534 #endif
1535 { NULL, 0, 0, false, false, false, NULL, false, NULL }
1538 #ifndef TARGET_PROFILE_KERNEL
1539 #define TARGET_PROFILE_KERNEL 0
1540 #endif
1542 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1543 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1545 /* Initialize the GCC target structure. */
1546 #undef TARGET_ATTRIBUTE_TABLE
1547 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1548 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1549 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1550 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1551 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1553 #undef TARGET_ASM_ALIGNED_DI_OP
1554 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1556 /* Default unaligned ops are only provided for ELF. Find the ops needed
1557 for non-ELF systems. */
1558 #ifndef OBJECT_FORMAT_ELF
1559 #if TARGET_XCOFF
1560 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1561 64-bit targets. */
1562 #undef TARGET_ASM_UNALIGNED_HI_OP
1563 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1564 #undef TARGET_ASM_UNALIGNED_SI_OP
1565 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1566 #undef TARGET_ASM_UNALIGNED_DI_OP
1567 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1568 #else
1569 /* For Darwin. */
1570 #undef TARGET_ASM_UNALIGNED_HI_OP
1571 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1572 #undef TARGET_ASM_UNALIGNED_SI_OP
1573 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1574 #undef TARGET_ASM_UNALIGNED_DI_OP
1575 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1576 #undef TARGET_ASM_ALIGNED_DI_OP
1577 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1578 #endif
1579 #endif
1581 /* This hook deals with fixups for relocatable code and DI-mode objects
1582 in 64-bit code. */
1583 #undef TARGET_ASM_INTEGER
1584 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1586 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1587 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1588 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1589 #endif
1591 #undef TARGET_SET_UP_BY_PROLOGUE
1592 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1594 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1595 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1596 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1597 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1598 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1599 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1600 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1601 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1602 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1603 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1604 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1605 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1607 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1608 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1610 #undef TARGET_INTERNAL_ARG_POINTER
1611 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1613 #undef TARGET_HAVE_TLS
1614 #define TARGET_HAVE_TLS HAVE_AS_TLS
1616 #undef TARGET_CANNOT_FORCE_CONST_MEM
1617 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1619 #undef TARGET_DELEGITIMIZE_ADDRESS
1620 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1622 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1623 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1625 #undef TARGET_LEGITIMATE_COMBINED_INSN
1626 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1628 #undef TARGET_ASM_FUNCTION_PROLOGUE
1629 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1630 #undef TARGET_ASM_FUNCTION_EPILOGUE
1631 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1633 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1634 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1636 #undef TARGET_LEGITIMIZE_ADDRESS
1637 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1639 #undef TARGET_SCHED_VARIABLE_ISSUE
1640 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1642 #undef TARGET_SCHED_ISSUE_RATE
1643 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1644 #undef TARGET_SCHED_ADJUST_COST
1645 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1646 #undef TARGET_SCHED_ADJUST_PRIORITY
1647 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1648 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1649 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1650 #undef TARGET_SCHED_INIT
1651 #define TARGET_SCHED_INIT rs6000_sched_init
1652 #undef TARGET_SCHED_FINISH
1653 #define TARGET_SCHED_FINISH rs6000_sched_finish
1654 #undef TARGET_SCHED_REORDER
1655 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1656 #undef TARGET_SCHED_REORDER2
1657 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1662 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1663 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1665 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1666 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1667 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1668 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1669 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1670 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1671 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1672 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1674 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1675 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1677 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1678 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1679 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1680 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1681 rs6000_builtin_support_vector_misalignment
1682 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1683 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1684 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1685 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1686 rs6000_builtin_vectorization_cost
1687 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1688 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1689 rs6000_preferred_simd_mode
1690 #undef TARGET_VECTORIZE_INIT_COST
1691 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1692 #undef TARGET_VECTORIZE_ADD_STMT_COST
1693 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1694 #undef TARGET_VECTORIZE_FINISH_COST
1695 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1696 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1697 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1699 #undef TARGET_INIT_BUILTINS
1700 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1701 #undef TARGET_BUILTIN_DECL
1702 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1704 #undef TARGET_FOLD_BUILTIN
1705 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1706 #undef TARGET_GIMPLE_FOLD_BUILTIN
1707 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1709 #undef TARGET_EXPAND_BUILTIN
1710 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1712 #undef TARGET_MANGLE_TYPE
1713 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1715 #undef TARGET_INIT_LIBFUNCS
1716 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1718 #if TARGET_MACHO
1719 #undef TARGET_BINDS_LOCAL_P
1720 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1721 #endif
1723 #undef TARGET_MS_BITFIELD_LAYOUT_P
1724 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1726 #undef TARGET_ASM_OUTPUT_MI_THUNK
1727 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1729 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1730 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1732 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1733 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1735 #undef TARGET_REGISTER_MOVE_COST
1736 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1737 #undef TARGET_MEMORY_MOVE_COST
1738 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1739 #undef TARGET_CANNOT_COPY_INSN_P
1740 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1741 #undef TARGET_RTX_COSTS
1742 #define TARGET_RTX_COSTS rs6000_rtx_costs
1743 #undef TARGET_ADDRESS_COST
1744 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1745 #undef TARGET_INSN_COST
1746 #define TARGET_INSN_COST rs6000_insn_cost
1748 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1749 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1751 #undef TARGET_PROMOTE_FUNCTION_MODE
1752 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1754 #undef TARGET_RETURN_IN_MEMORY
1755 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1757 #undef TARGET_RETURN_IN_MSB
1758 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1760 #undef TARGET_SETUP_INCOMING_VARARGS
1761 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1763 /* Always strict argument naming on rs6000. */
1764 #undef TARGET_STRICT_ARGUMENT_NAMING
1765 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1766 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1767 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1768 #undef TARGET_SPLIT_COMPLEX_ARG
1769 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1770 #undef TARGET_MUST_PASS_IN_STACK
1771 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1772 #undef TARGET_PASS_BY_REFERENCE
1773 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1774 #undef TARGET_ARG_PARTIAL_BYTES
1775 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1776 #undef TARGET_FUNCTION_ARG_ADVANCE
1777 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1778 #undef TARGET_FUNCTION_ARG
1779 #define TARGET_FUNCTION_ARG rs6000_function_arg
1780 #undef TARGET_FUNCTION_ARG_PADDING
1781 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1782 #undef TARGET_FUNCTION_ARG_BOUNDARY
1783 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1785 #undef TARGET_BUILD_BUILTIN_VA_LIST
1786 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1788 #undef TARGET_EXPAND_BUILTIN_VA_START
1789 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1791 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1792 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1794 #undef TARGET_EH_RETURN_FILTER_MODE
1795 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1797 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1798 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1800 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1801 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1803 #undef TARGET_FLOATN_MODE
1804 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1806 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1807 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1809 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1810 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1812 #undef TARGET_MD_ASM_ADJUST
1813 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1815 #undef TARGET_OPTION_OVERRIDE
1816 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1818 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1819 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1820 rs6000_builtin_vectorized_function
1822 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1823 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1824 rs6000_builtin_md_vectorized_function
1826 #undef TARGET_STACK_PROTECT_GUARD
1827 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1829 #if !TARGET_MACHO
1830 #undef TARGET_STACK_PROTECT_FAIL
1831 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1832 #endif
1834 #ifdef HAVE_AS_TLS
1835 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1836 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1837 #endif
1839 /* Use a 32-bit anchor range. This leads to sequences like:
1841 addis tmp,anchor,high
1842 add dest,tmp,low
1844 where tmp itself acts as an anchor, and can be shared between
1845 accesses to the same 64k page. */
1846 #undef TARGET_MIN_ANCHOR_OFFSET
1847 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1848 #undef TARGET_MAX_ANCHOR_OFFSET
1849 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1850 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1851 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1852 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1853 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1855 #undef TARGET_BUILTIN_RECIPROCAL
1856 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1858 #undef TARGET_SECONDARY_RELOAD
1859 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1860 #undef TARGET_SECONDARY_MEMORY_NEEDED
1861 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1862 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1863 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1865 #undef TARGET_LEGITIMATE_ADDRESS_P
1866 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1868 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1869 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1871 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1872 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1874 #undef TARGET_CAN_ELIMINATE
1875 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1877 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1878 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1880 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1881 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1883 #undef TARGET_TRAMPOLINE_INIT
1884 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1886 #undef TARGET_FUNCTION_VALUE
1887 #define TARGET_FUNCTION_VALUE rs6000_function_value
1889 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1890 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1892 #undef TARGET_OPTION_SAVE
1893 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1895 #undef TARGET_OPTION_RESTORE
1896 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1898 #undef TARGET_OPTION_PRINT
1899 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1901 #undef TARGET_CAN_INLINE_P
1902 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1904 #undef TARGET_SET_CURRENT_FUNCTION
1905 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1907 #undef TARGET_LEGITIMATE_CONSTANT_P
1908 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1910 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1911 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1913 #undef TARGET_CAN_USE_DOLOOP_P
1914 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1916 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1917 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1919 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1920 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1921 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1922 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1923 #undef TARGET_UNWIND_WORD_MODE
1924 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1926 #undef TARGET_OFFLOAD_OPTIONS
1927 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1929 #undef TARGET_C_MODE_FOR_SUFFIX
1930 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1932 #undef TARGET_INVALID_BINARY_OP
1933 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1935 #undef TARGET_OPTAB_SUPPORTED_P
1936 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1938 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1939 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1941 #undef TARGET_COMPARE_VERSION_PRIORITY
1942 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1944 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1945 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1946 rs6000_generate_version_dispatcher_body
1948 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1949 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1950 rs6000_get_function_versions_dispatcher
1952 #undef TARGET_OPTION_FUNCTION_VERSIONS
1953 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1955 #undef TARGET_HARD_REGNO_NREGS
1956 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1957 #undef TARGET_HARD_REGNO_MODE_OK
1958 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1960 #undef TARGET_MODES_TIEABLE_P
1961 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1963 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1964 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1965 rs6000_hard_regno_call_part_clobbered
1967 #undef TARGET_SLOW_UNALIGNED_ACCESS
1968 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1970 #undef TARGET_CAN_CHANGE_MODE_CLASS
1971 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1973 #undef TARGET_CONSTANT_ALIGNMENT
1974 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1976 #undef TARGET_STARTING_FRAME_OFFSET
1977 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1980 /* Processor table. */
1981 struct rs6000_ptt
1983 const char *const name; /* Canonical processor name. */
1984 const enum processor_type processor; /* Processor type enum value. */
1985 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1988 static struct rs6000_ptt const processor_target_table[] =
1990 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1991 #include "rs6000-cpus.def"
1992 #undef RS6000_CPU
1995 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1996 name is invalid. */
1998 static int
1999 rs6000_cpu_name_lookup (const char *name)
2001 size_t i;
2003 if (name != NULL)
2005 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
2006 if (! strcmp (name, processor_target_table[i].name))
2007 return (int)i;
2010 return -1;
2014 /* Return number of consecutive hard regs needed starting at reg REGNO
2015 to hold something of mode MODE.
2016 This is ordinarily the length in words of a value of mode MODE
2017 but can be less for certain modes in special long registers.
2019 POWER and PowerPC GPRs hold 32 bits worth;
2020 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
2022 static int
2023 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
2025 unsigned HOST_WIDE_INT reg_size;
2027 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
2028 128-bit floating point that can go in vector registers, which has VSX
2029 memory addressing. */
2030 if (FP_REGNO_P (regno))
2031 reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode)
2032 ? UNITS_PER_VSX_WORD
2033 : UNITS_PER_FP_WORD);
2035 else if (ALTIVEC_REGNO_P (regno))
2036 reg_size = UNITS_PER_ALTIVEC_WORD;
2038 else
2039 reg_size = UNITS_PER_WORD;
2041 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
2044 /* Value is 1 if hard register REGNO can hold a value of machine-mode
2045 MODE. */
2046 static int
2047 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
2049 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
2051 if (COMPLEX_MODE_P (mode))
2052 mode = GET_MODE_INNER (mode);
2054 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
2055 register combinations, and use PTImode where we need to deal with quad
2056 word memory operations. Don't allow quad words in the argument or frame
2057 pointer registers, just registers 0..31. */
2058 if (mode == PTImode)
2059 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2060 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
2061 && ((regno & 1) == 0));
2063 /* VSX registers that overlap the FPR registers are larger than for non-VSX
2064 implementations. Don't allow an item to be split between a FP register
2065 and an Altivec register. Allow TImode in all VSX registers if the user
2066 asked for it. */
2067 if (TARGET_VSX && VSX_REGNO_P (regno)
2068 && (VECTOR_MEM_VSX_P (mode)
2069 || FLOAT128_VECTOR_P (mode)
2070 || reg_addr[mode].scalar_in_vmx_p
2071 || mode == TImode
2072 || (TARGET_VADDUQM && mode == V1TImode)))
2074 if (FP_REGNO_P (regno))
2075 return FP_REGNO_P (last_regno);
2077 if (ALTIVEC_REGNO_P (regno))
2079 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
2080 return 0;
2082 return ALTIVEC_REGNO_P (last_regno);
2086 /* The GPRs can hold any mode, but values bigger than one register
2087 cannot go past R31. */
2088 if (INT_REGNO_P (regno))
2089 return INT_REGNO_P (last_regno);
2091 /* The float registers (except for VSX vector modes) can only hold floating
2092 modes and DImode. */
2093 if (FP_REGNO_P (regno))
2095 if (FLOAT128_VECTOR_P (mode))
2096 return false;
2098 if (SCALAR_FLOAT_MODE_P (mode)
2099 && (mode != TDmode || (regno % 2) == 0)
2100 && FP_REGNO_P (last_regno))
2101 return 1;
2103 if (GET_MODE_CLASS (mode) == MODE_INT)
2105 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
2106 return 1;
2108 if (TARGET_P8_VECTOR && (mode == SImode))
2109 return 1;
2111 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
2112 return 1;
2115 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
2116 && PAIRED_VECTOR_MODE (mode))
2117 return 1;
2119 return 0;
2122 /* The CR register can only hold CC modes. */
2123 if (CR_REGNO_P (regno))
2124 return GET_MODE_CLASS (mode) == MODE_CC;
2126 if (CA_REGNO_P (regno))
2127 return mode == Pmode || mode == SImode;
2129 /* AltiVec only in AldyVec registers. */
2130 if (ALTIVEC_REGNO_P (regno))
2131 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
2132 || mode == V1TImode);
2134 /* We cannot put non-VSX TImode or PTImode anywhere except general register
2135 and it must be able to fit within the register set. */
2137 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
2140 /* Implement TARGET_HARD_REGNO_NREGS. */
2142 static unsigned int
2143 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
2145 return rs6000_hard_regno_nregs[mode][regno];
2148 /* Implement TARGET_HARD_REGNO_MODE_OK. */
2150 static bool
2151 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
2153 return rs6000_hard_regno_mode_ok_p[mode][regno];
2156 /* Implement TARGET_MODES_TIEABLE_P.
2158 PTImode cannot tie with other modes because PTImode is restricted to even
2159 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
2160 57744).
2162 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
2163 128-bit floating point on VSX systems ties with other vectors. */
2165 static bool
2166 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
2168 if (mode1 == PTImode)
2169 return mode2 == PTImode;
2170 if (mode2 == PTImode)
2171 return false;
2173 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
2174 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
2175 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
2176 return false;
2178 if (SCALAR_FLOAT_MODE_P (mode1))
2179 return SCALAR_FLOAT_MODE_P (mode2);
2180 if (SCALAR_FLOAT_MODE_P (mode2))
2181 return false;
2183 if (GET_MODE_CLASS (mode1) == MODE_CC)
2184 return GET_MODE_CLASS (mode2) == MODE_CC;
2185 if (GET_MODE_CLASS (mode2) == MODE_CC)
2186 return false;
2188 if (PAIRED_VECTOR_MODE (mode1))
2189 return PAIRED_VECTOR_MODE (mode2);
2190 if (PAIRED_VECTOR_MODE (mode2))
2191 return false;
2193 return true;
2196 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
2198 static bool
2199 rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
2201 if (TARGET_32BIT
2202 && TARGET_POWERPC64
2203 && GET_MODE_SIZE (mode) > 4
2204 && INT_REGNO_P (regno))
2205 return true;
2207 if (TARGET_VSX
2208 && FP_REGNO_P (regno)
2209 && GET_MODE_SIZE (mode) > 8
2210 && !FLOAT128_2REG_P (mode))
2211 return true;
2213 return false;
2216 /* Print interesting facts about registers. */
2217 static void
2218 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2220 int r, m;
2222 for (r = first_regno; r <= last_regno; ++r)
2224 const char *comma = "";
2225 int len;
2227 if (first_regno == last_regno)
2228 fprintf (stderr, "%s:\t", reg_name);
2229 else
2230 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2232 len = 8;
2233 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2234 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2236 if (len > 70)
2238 fprintf (stderr, ",\n\t");
2239 len = 8;
2240 comma = "";
2243 if (rs6000_hard_regno_nregs[m][r] > 1)
2244 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2245 rs6000_hard_regno_nregs[m][r]);
2246 else
2247 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2249 comma = ", ";
2252 if (call_used_regs[r])
2254 if (len > 70)
2256 fprintf (stderr, ",\n\t");
2257 len = 8;
2258 comma = "";
2261 len += fprintf (stderr, "%s%s", comma, "call-used");
2262 comma = ", ";
2265 if (fixed_regs[r])
2267 if (len > 70)
2269 fprintf (stderr, ",\n\t");
2270 len = 8;
2271 comma = "";
2274 len += fprintf (stderr, "%s%s", comma, "fixed");
2275 comma = ", ";
2278 if (len > 70)
2280 fprintf (stderr, ",\n\t");
2281 comma = "";
2284 len += fprintf (stderr, "%sreg-class = %s", comma,
2285 reg_class_names[(int)rs6000_regno_regclass[r]]);
2286 comma = ", ";
2288 if (len > 70)
2290 fprintf (stderr, ",\n\t");
2291 comma = "";
2294 fprintf (stderr, "%sregno = %d\n", comma, r);
2298 static const char *
2299 rs6000_debug_vector_unit (enum rs6000_vector v)
2301 const char *ret;
2303 switch (v)
2305 case VECTOR_NONE: ret = "none"; break;
2306 case VECTOR_ALTIVEC: ret = "altivec"; break;
2307 case VECTOR_VSX: ret = "vsx"; break;
2308 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2309 case VECTOR_PAIRED: ret = "paired"; break;
2310 case VECTOR_OTHER: ret = "other"; break;
2311 default: ret = "unknown"; break;
2314 return ret;
2317 /* Inner function printing just the address mask for a particular reload
2318 register class. */
2319 DEBUG_FUNCTION char *
2320 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2322 static char ret[8];
2323 char *p = ret;
2325 if ((mask & RELOAD_REG_VALID) != 0)
2326 *p++ = 'v';
2327 else if (keep_spaces)
2328 *p++ = ' ';
2330 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2331 *p++ = 'm';
2332 else if (keep_spaces)
2333 *p++ = ' ';
2335 if ((mask & RELOAD_REG_INDEXED) != 0)
2336 *p++ = 'i';
2337 else if (keep_spaces)
2338 *p++ = ' ';
2340 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2341 *p++ = 'O';
2342 else if ((mask & RELOAD_REG_OFFSET) != 0)
2343 *p++ = 'o';
2344 else if (keep_spaces)
2345 *p++ = ' ';
2347 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2348 *p++ = '+';
2349 else if (keep_spaces)
2350 *p++ = ' ';
2352 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2353 *p++ = '+';
2354 else if (keep_spaces)
2355 *p++ = ' ';
2357 if ((mask & RELOAD_REG_AND_M16) != 0)
2358 *p++ = '&';
2359 else if (keep_spaces)
2360 *p++ = ' ';
2362 *p = '\0';
2364 return ret;
2367 /* Print the address masks in a human readble fashion. */
2368 DEBUG_FUNCTION void
2369 rs6000_debug_print_mode (ssize_t m)
2371 ssize_t rc;
2372 int spaces = 0;
2373 bool fuse_extra_p;
2375 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2376 for (rc = 0; rc < N_RELOAD_REG; rc++)
2377 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2378 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2380 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2381 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2382 fprintf (stderr, " Reload=%c%c",
2383 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2384 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2385 else
2386 spaces += sizeof (" Reload=sl") - 1;
2388 if (reg_addr[m].scalar_in_vmx_p)
2390 fprintf (stderr, "%*s Upper=y", spaces, "");
2391 spaces = 0;
2393 else
2394 spaces += sizeof (" Upper=y") - 1;
2396 fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2397 || reg_addr[m].fused_toc);
2398 if (!fuse_extra_p)
2400 for (rc = 0; rc < N_RELOAD_REG; rc++)
2402 if (rc != RELOAD_REG_ANY)
2404 if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2405 || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing
2406 || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing
2407 || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing
2408 || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2410 fuse_extra_p = true;
2411 break;
2417 if (fuse_extra_p)
2419 fprintf (stderr, "%*s Fuse:", spaces, "");
2420 spaces = 0;
2422 for (rc = 0; rc < N_RELOAD_REG; rc++)
2424 if (rc != RELOAD_REG_ANY)
2426 char load, store;
2428 if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing)
2429 load = 'l';
2430 else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing)
2431 load = 'L';
2432 else
2433 load = '-';
2435 if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing)
2436 store = 's';
2437 else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing)
2438 store = 'S';
2439 else
2440 store = '-';
2442 if (load == '-' && store == '-')
2443 spaces += 5;
2444 else
2446 fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "",
2447 reload_reg_map[rc].name[0], load, store);
2448 spaces = 0;
2453 if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing)
2455 fprintf (stderr, "%*sP8gpr", (spaces + 1), "");
2456 spaces = 0;
2458 else
2459 spaces += sizeof (" P8gpr") - 1;
2461 if (reg_addr[m].fused_toc)
2463 fprintf (stderr, "%*sToc", (spaces + 1), "");
2464 spaces = 0;
2466 else
2467 spaces += sizeof (" Toc") - 1;
2469 else
2470 spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1;
2472 if (rs6000_vector_unit[m] != VECTOR_NONE
2473 || rs6000_vector_mem[m] != VECTOR_NONE)
2475 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2476 spaces, "",
2477 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2478 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2481 fputs ("\n", stderr);
2484 #define DEBUG_FMT_ID "%-32s= "
2485 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2486 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2487 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2489 /* Print various interesting information with -mdebug=reg. */
2490 static void
2491 rs6000_debug_reg_global (void)
2493 static const char *const tf[2] = { "false", "true" };
2494 const char *nl = (const char *)0;
2495 int m;
2496 size_t m1, m2, v;
2497 char costly_num[20];
2498 char nop_num[20];
2499 char flags_buffer[40];
2500 const char *costly_str;
2501 const char *nop_str;
2502 const char *trace_str;
2503 const char *abi_str;
2504 const char *cmodel_str;
2505 struct cl_target_option cl_opts;
2507 /* Modes we want tieable information on. */
2508 static const machine_mode print_tieable_modes[] = {
2509 QImode,
2510 HImode,
2511 SImode,
2512 DImode,
2513 TImode,
2514 PTImode,
2515 SFmode,
2516 DFmode,
2517 TFmode,
2518 IFmode,
2519 KFmode,
2520 SDmode,
2521 DDmode,
2522 TDmode,
2523 V2SImode,
2524 V16QImode,
2525 V8HImode,
2526 V4SImode,
2527 V2DImode,
2528 V1TImode,
2529 V32QImode,
2530 V16HImode,
2531 V8SImode,
2532 V4DImode,
2533 V2TImode,
2534 V2SFmode,
2535 V4SFmode,
2536 V2DFmode,
2537 V8SFmode,
2538 V4DFmode,
2539 CCmode,
2540 CCUNSmode,
2541 CCEQmode,
2544 /* Virtual regs we are interested in. */
2545 const static struct {
2546 int regno; /* register number. */
2547 const char *name; /* register name. */
2548 } virtual_regs[] = {
2549 { STACK_POINTER_REGNUM, "stack pointer:" },
2550 { TOC_REGNUM, "toc: " },
2551 { STATIC_CHAIN_REGNUM, "static chain: " },
2552 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2553 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2554 { ARG_POINTER_REGNUM, "arg pointer: " },
2555 { FRAME_POINTER_REGNUM, "frame pointer:" },
2556 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2557 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2558 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2559 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2560 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2561 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2562 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2563 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2564 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2567 fputs ("\nHard register information:\n", stderr);
2568 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2569 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2570 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2571 LAST_ALTIVEC_REGNO,
2572 "vs");
2573 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2574 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2575 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2576 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2577 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2578 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2580 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2581 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2582 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2584 fprintf (stderr,
2585 "\n"
2586 "d reg_class = %s\n"
2587 "f reg_class = %s\n"
2588 "v reg_class = %s\n"
2589 "wa reg_class = %s\n"
2590 "wb reg_class = %s\n"
2591 "wd reg_class = %s\n"
2592 "we reg_class = %s\n"
2593 "wf reg_class = %s\n"
2594 "wg reg_class = %s\n"
2595 "wh reg_class = %s\n"
2596 "wi reg_class = %s\n"
2597 "wj reg_class = %s\n"
2598 "wk reg_class = %s\n"
2599 "wl reg_class = %s\n"
2600 "wm reg_class = %s\n"
2601 "wo reg_class = %s\n"
2602 "wp reg_class = %s\n"
2603 "wq reg_class = %s\n"
2604 "wr reg_class = %s\n"
2605 "ws reg_class = %s\n"
2606 "wt reg_class = %s\n"
2607 "wu reg_class = %s\n"
2608 "wv reg_class = %s\n"
2609 "ww reg_class = %s\n"
2610 "wx reg_class = %s\n"
2611 "wy reg_class = %s\n"
2612 "wz reg_class = %s\n"
2613 "wA reg_class = %s\n"
2614 "wH reg_class = %s\n"
2615 "wI reg_class = %s\n"
2616 "wJ reg_class = %s\n"
2617 "wK reg_class = %s\n"
2618 "\n",
2619 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2620 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2621 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2622 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2623 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]],
2624 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2625 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2626 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2627 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2628 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2629 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2630 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2631 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2632 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2633 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2634 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]],
2635 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2636 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2637 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2638 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2639 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2640 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2641 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2642 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2643 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2644 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2645 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]],
2646 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]],
2647 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]],
2648 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]],
2649 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]],
2650 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]);
2652 nl = "\n";
2653 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2654 rs6000_debug_print_mode (m);
2656 fputs ("\n", stderr);
2658 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2660 machine_mode mode1 = print_tieable_modes[m1];
2661 bool first_time = true;
2663 nl = (const char *)0;
2664 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2666 machine_mode mode2 = print_tieable_modes[m2];
2667 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2669 if (first_time)
2671 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2672 nl = "\n";
2673 first_time = false;
2676 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2680 if (!first_time)
2681 fputs ("\n", stderr);
2684 if (nl)
2685 fputs (nl, stderr);
2687 if (rs6000_recip_control)
2689 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2691 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2692 if (rs6000_recip_bits[m])
2694 fprintf (stderr,
2695 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2696 GET_MODE_NAME (m),
2697 (RS6000_RECIP_AUTO_RE_P (m)
2698 ? "auto"
2699 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2700 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2701 ? "auto"
2702 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2705 fputs ("\n", stderr);
2708 if (rs6000_cpu_index >= 0)
2710 const char *name = processor_target_table[rs6000_cpu_index].name;
2711 HOST_WIDE_INT flags
2712 = processor_target_table[rs6000_cpu_index].target_enable;
2714 sprintf (flags_buffer, "-mcpu=%s flags", name);
2715 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2717 else
2718 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2720 if (rs6000_tune_index >= 0)
2722 const char *name = processor_target_table[rs6000_tune_index].name;
2723 HOST_WIDE_INT flags
2724 = processor_target_table[rs6000_tune_index].target_enable;
2726 sprintf (flags_buffer, "-mtune=%s flags", name);
2727 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2729 else
2730 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2732 cl_target_option_save (&cl_opts, &global_options);
2733 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2734 rs6000_isa_flags);
2736 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2737 rs6000_isa_flags_explicit);
2739 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2740 rs6000_builtin_mask);
2742 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2744 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2745 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2747 switch (rs6000_sched_costly_dep)
2749 case max_dep_latency:
2750 costly_str = "max_dep_latency";
2751 break;
2753 case no_dep_costly:
2754 costly_str = "no_dep_costly";
2755 break;
2757 case all_deps_costly:
2758 costly_str = "all_deps_costly";
2759 break;
2761 case true_store_to_load_dep_costly:
2762 costly_str = "true_store_to_load_dep_costly";
2763 break;
2765 case store_to_load_dep_costly:
2766 costly_str = "store_to_load_dep_costly";
2767 break;
2769 default:
2770 costly_str = costly_num;
2771 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2772 break;
2775 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2777 switch (rs6000_sched_insert_nops)
2779 case sched_finish_regroup_exact:
2780 nop_str = "sched_finish_regroup_exact";
2781 break;
2783 case sched_finish_pad_groups:
2784 nop_str = "sched_finish_pad_groups";
2785 break;
2787 case sched_finish_none:
2788 nop_str = "sched_finish_none";
2789 break;
2791 default:
2792 nop_str = nop_num;
2793 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2794 break;
2797 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2799 switch (rs6000_sdata)
2801 default:
2802 case SDATA_NONE:
2803 break;
2805 case SDATA_DATA:
2806 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2807 break;
2809 case SDATA_SYSV:
2810 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2811 break;
2813 case SDATA_EABI:
2814 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2815 break;
2819 switch (rs6000_traceback)
2821 case traceback_default: trace_str = "default"; break;
2822 case traceback_none: trace_str = "none"; break;
2823 case traceback_part: trace_str = "part"; break;
2824 case traceback_full: trace_str = "full"; break;
2825 default: trace_str = "unknown"; break;
2828 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2830 switch (rs6000_current_cmodel)
2832 case CMODEL_SMALL: cmodel_str = "small"; break;
2833 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2834 case CMODEL_LARGE: cmodel_str = "large"; break;
2835 default: cmodel_str = "unknown"; break;
2838 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2840 switch (rs6000_current_abi)
2842 case ABI_NONE: abi_str = "none"; break;
2843 case ABI_AIX: abi_str = "aix"; break;
2844 case ABI_ELFv2: abi_str = "ELFv2"; break;
2845 case ABI_V4: abi_str = "V4"; break;
2846 case ABI_DARWIN: abi_str = "darwin"; break;
2847 default: abi_str = "unknown"; break;
2850 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2852 if (rs6000_altivec_abi)
2853 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2855 if (rs6000_darwin64_abi)
2856 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2858 fprintf (stderr, DEBUG_FMT_S, "single_float",
2859 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2861 fprintf (stderr, DEBUG_FMT_S, "double_float",
2862 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2864 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2865 (TARGET_SOFT_FLOAT ? "true" : "false"));
2867 if (TARGET_LINK_STACK)
2868 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2870 if (TARGET_P8_FUSION)
2872 char options[80];
2874 strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8");
2875 if (TARGET_TOC_FUSION)
2876 strcat (options, ", toc");
2878 if (TARGET_P8_FUSION_SIGN)
2879 strcat (options, ", sign");
2881 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2884 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2885 TARGET_SECURE_PLT ? "secure" : "bss");
2886 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2887 aix_struct_return ? "aix" : "sysv");
2888 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2889 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2890 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2891 tf[!!rs6000_align_branch_targets]);
2892 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2893 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2894 rs6000_long_double_type_size);
2895 if (rs6000_long_double_type_size == 128)
2897 fprintf (stderr, DEBUG_FMT_S, "long double type",
2898 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2899 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2900 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2902 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2903 (int)rs6000_sched_restricted_insns_priority);
2904 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2905 (int)END_BUILTINS);
2906 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2907 (int)RS6000_BUILTIN_COUNT);
2909 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2910 (int)TARGET_FLOAT128_ENABLE_TYPE);
2912 if (TARGET_VSX)
2913 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2914 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2916 if (TARGET_DIRECT_MOVE_128)
2917 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2918 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2922 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2923 legitimate address support to figure out the appropriate addressing to
2924 use. */
2926 static void
2927 rs6000_setup_reg_addr_masks (void)
2929 ssize_t rc, reg, m, nregs;
2930 addr_mask_type any_addr_mask, addr_mask;
2932 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2934 machine_mode m2 = (machine_mode) m;
2935 bool complex_p = false;
2936 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2937 size_t msize;
2939 if (COMPLEX_MODE_P (m2))
2941 complex_p = true;
2942 m2 = GET_MODE_INNER (m2);
2945 msize = GET_MODE_SIZE (m2);
2947 /* SDmode is special in that we want to access it only via REG+REG
2948 addressing on power7 and above, since we want to use the LFIWZX and
2949 STFIWZX instructions to load it. */
2950 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2952 any_addr_mask = 0;
2953 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2955 addr_mask = 0;
2956 reg = reload_reg_map[rc].reg;
2958 /* Can mode values go in the GPR/FPR/Altivec registers? */
2959 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2961 bool small_int_vsx_p = (small_int_p
2962 && (rc == RELOAD_REG_FPR
2963 || rc == RELOAD_REG_VMX));
2965 nregs = rs6000_hard_regno_nregs[m][reg];
2966 addr_mask |= RELOAD_REG_VALID;
2968 /* Indicate if the mode takes more than 1 physical register. If
2969 it takes a single register, indicate it can do REG+REG
2970 addressing. Small integers in VSX registers can only do
2971 REG+REG addressing. */
2972 if (small_int_vsx_p)
2973 addr_mask |= RELOAD_REG_INDEXED;
2974 else if (nregs > 1 || m == BLKmode || complex_p)
2975 addr_mask |= RELOAD_REG_MULTIPLE;
2976 else
2977 addr_mask |= RELOAD_REG_INDEXED;
2979 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2980 addressing. If we allow scalars into Altivec registers,
2981 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */
2983 if (TARGET_UPDATE
2984 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2985 && msize <= 8
2986 && !VECTOR_MODE_P (m2)
2987 && !FLOAT128_VECTOR_P (m2)
2988 && !complex_p
2989 && !small_int_vsx_p)
2991 addr_mask |= RELOAD_REG_PRE_INCDEC;
2993 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2994 we don't allow PRE_MODIFY for some multi-register
2995 operations. */
2996 switch (m)
2998 default:
2999 addr_mask |= RELOAD_REG_PRE_MODIFY;
3000 break;
3002 case E_DImode:
3003 if (TARGET_POWERPC64)
3004 addr_mask |= RELOAD_REG_PRE_MODIFY;
3005 break;
3007 case E_DFmode:
3008 case E_DDmode:
3009 if (TARGET_DF_INSN)
3010 addr_mask |= RELOAD_REG_PRE_MODIFY;
3011 break;
3016 /* GPR and FPR registers can do REG+OFFSET addressing, except
3017 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
3018 for 64-bit scalars and 32-bit SFmode to altivec registers. */
3019 if ((addr_mask != 0) && !indexed_only_p
3020 && msize <= 8
3021 && (rc == RELOAD_REG_GPR
3022 || ((msize == 8 || m2 == SFmode)
3023 && (rc == RELOAD_REG_FPR
3024 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
3025 addr_mask |= RELOAD_REG_OFFSET;
3027 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
3028 instructions are enabled. The offset for 128-bit VSX registers is
3029 only 12-bits. While GPRs can handle the full offset range, VSX
3030 registers can only handle the restricted range. */
3031 else if ((addr_mask != 0) && !indexed_only_p
3032 && msize == 16 && TARGET_P9_VECTOR
3033 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
3034 || (m2 == TImode && TARGET_VSX)))
3036 addr_mask |= RELOAD_REG_OFFSET;
3037 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
3038 addr_mask |= RELOAD_REG_QUAD_OFFSET;
3041 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
3042 addressing on 128-bit types. */
3043 if (rc == RELOAD_REG_VMX && msize == 16
3044 && (addr_mask & RELOAD_REG_VALID) != 0)
3045 addr_mask |= RELOAD_REG_AND_M16;
3047 reg_addr[m].addr_mask[rc] = addr_mask;
3048 any_addr_mask |= addr_mask;
3051 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
3056 /* Initialize the various global tables that are based on register size. */
3057 static void
3058 rs6000_init_hard_regno_mode_ok (bool global_init_p)
3060 ssize_t r, m, c;
3061 int align64;
3062 int align32;
3064 /* Precalculate REGNO_REG_CLASS. */
3065 rs6000_regno_regclass[0] = GENERAL_REGS;
3066 for (r = 1; r < 32; ++r)
3067 rs6000_regno_regclass[r] = BASE_REGS;
3069 for (r = 32; r < 64; ++r)
3070 rs6000_regno_regclass[r] = FLOAT_REGS;
3072 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
3073 rs6000_regno_regclass[r] = NO_REGS;
3075 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
3076 rs6000_regno_regclass[r] = ALTIVEC_REGS;
3078 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
3079 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
3080 rs6000_regno_regclass[r] = CR_REGS;
3082 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
3083 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
3084 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
3085 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
3086 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
3087 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
3088 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
3089 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
3090 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
3091 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
3093 /* Precalculate register class to simpler reload register class. We don't
3094 need all of the register classes that are combinations of different
3095 classes, just the simple ones that have constraint letters. */
3096 for (c = 0; c < N_REG_CLASSES; c++)
3097 reg_class_to_reg_type[c] = NO_REG_TYPE;
3099 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
3100 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
3101 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
3102 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
3103 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
3104 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
3105 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
3106 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
3107 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
3108 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
3110 if (TARGET_VSX)
3112 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
3113 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
3115 else
3117 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
3118 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
3121 /* Precalculate the valid memory formats as well as the vector information,
3122 this must be set up before the rs6000_hard_regno_nregs_internal calls
3123 below. */
3124 gcc_assert ((int)VECTOR_NONE == 0);
3125 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
3126 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
3128 gcc_assert ((int)CODE_FOR_nothing == 0);
3129 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
3131 gcc_assert ((int)NO_REGS == 0);
3132 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
3134 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
3135 believes it can use native alignment or still uses 128-bit alignment. */
3136 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
3138 align64 = 64;
3139 align32 = 32;
3141 else
3143 align64 = 128;
3144 align32 = 128;
3147 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
3148 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
3149 if (TARGET_FLOAT128_TYPE)
3151 rs6000_vector_mem[KFmode] = VECTOR_VSX;
3152 rs6000_vector_align[KFmode] = 128;
3154 if (FLOAT128_IEEE_P (TFmode))
3156 rs6000_vector_mem[TFmode] = VECTOR_VSX;
3157 rs6000_vector_align[TFmode] = 128;
3161 /* V2DF mode, VSX only. */
3162 if (TARGET_VSX)
3164 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
3165 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
3166 rs6000_vector_align[V2DFmode] = align64;
3169 /* V4SF mode, either VSX or Altivec. */
3170 if (TARGET_VSX)
3172 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
3173 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
3174 rs6000_vector_align[V4SFmode] = align32;
3176 else if (TARGET_ALTIVEC)
3178 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
3179 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
3180 rs6000_vector_align[V4SFmode] = align32;
3183 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
3184 and stores. */
3185 if (TARGET_ALTIVEC)
3187 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
3188 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
3189 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
3190 rs6000_vector_align[V4SImode] = align32;
3191 rs6000_vector_align[V8HImode] = align32;
3192 rs6000_vector_align[V16QImode] = align32;
3194 if (TARGET_VSX)
3196 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
3197 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
3198 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
3200 else
3202 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
3203 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
3204 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
3208 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
3209 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
3210 if (TARGET_VSX)
3212 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
3213 rs6000_vector_unit[V2DImode]
3214 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3215 rs6000_vector_align[V2DImode] = align64;
3217 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
3218 rs6000_vector_unit[V1TImode]
3219 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
3220 rs6000_vector_align[V1TImode] = 128;
3223 /* DFmode, see if we want to use the VSX unit. Memory is handled
3224 differently, so don't set rs6000_vector_mem. */
3225 if (TARGET_VSX)
3227 rs6000_vector_unit[DFmode] = VECTOR_VSX;
3228 rs6000_vector_align[DFmode] = 64;
3231 /* SFmode, see if we want to use the VSX unit. */
3232 if (TARGET_P8_VECTOR)
3234 rs6000_vector_unit[SFmode] = VECTOR_VSX;
3235 rs6000_vector_align[SFmode] = 32;
3238 /* Allow TImode in VSX register and set the VSX memory macros. */
3239 if (TARGET_VSX)
3241 rs6000_vector_mem[TImode] = VECTOR_VSX;
3242 rs6000_vector_align[TImode] = align64;
3245 /* TODO add paired floating point vector support. */
3247 /* Register class constraints for the constraints that depend on compile
3248 switches. When the VSX code was added, different constraints were added
3249 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
3250 of the VSX registers are used. The register classes for scalar floating
3251 point types is set, based on whether we allow that type into the upper
3252 (Altivec) registers. GCC has register classes to target the Altivec
3253 registers for load/store operations, to select using a VSX memory
3254 operation instead of the traditional floating point operation. The
3255 constraints are:
3257 d - Register class to use with traditional DFmode instructions.
3258 f - Register class to use with traditional SFmode instructions.
3259 v - Altivec register.
3260 wa - Any VSX register.
3261 wc - Reserved to represent individual CR bits (used in LLVM).
3262 wd - Preferred register class for V2DFmode.
3263 wf - Preferred register class for V4SFmode.
3264 wg - Float register for power6x move insns.
3265 wh - FP register for direct move instructions.
3266 wi - FP or VSX register to hold 64-bit integers for VSX insns.
3267 wj - FP or VSX register to hold 64-bit integers for direct moves.
3268 wk - FP or VSX register to hold 64-bit doubles for direct moves.
3269 wl - Float register if we can do 32-bit signed int loads.
3270 wm - VSX register for ISA 2.07 direct move operations.
3271 wn - always NO_REGS.
3272 wr - GPR if 64-bit mode is permitted.
3273 ws - Register class to do ISA 2.06 DF operations.
3274 wt - VSX register for TImode in VSX registers.
3275 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
3276 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
3277 ww - Register class to do SF conversions in with VSX operations.
3278 wx - Float register if we can do 32-bit int stores.
3279 wy - Register class to do ISA 2.07 SF operations.
3280 wz - Float register if we can do 32-bit unsigned int loads.
3281 wH - Altivec register if SImode is allowed in VSX registers.
3282 wI - VSX register if SImode is allowed in VSX registers.
3283 wJ - VSX register if QImode/HImode are allowed in VSX registers.
3284 wK - Altivec register if QImode/HImode are allowed in VSX registers. */
3286 if (TARGET_HARD_FLOAT)
3287 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
3289 if (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
3290 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
3292 if (TARGET_VSX)
3294 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
3295 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
3296 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
3297 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; /* DFmode */
3298 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; /* DFmode */
3299 rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; /* DImode */
3300 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
3303 /* Add conditional constraints based on various options, to allow us to
3304 collapse multiple insn patterns. */
3305 if (TARGET_ALTIVEC)
3306 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
3308 if (TARGET_MFPGPR) /* DFmode */
3309 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
3311 if (TARGET_LFIWAX)
3312 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
3314 if (TARGET_DIRECT_MOVE)
3316 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
3317 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
3318 = rs6000_constraints[RS6000_CONSTRAINT_wi];
3319 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
3320 = rs6000_constraints[RS6000_CONSTRAINT_ws];
3321 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
3324 if (TARGET_POWERPC64)
3326 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
3327 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
3330 if (TARGET_P8_VECTOR) /* SFmode */
3332 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
3333 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
3334 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
3336 else if (TARGET_VSX)
3337 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
3339 if (TARGET_STFIWX)
3340 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
3342 if (TARGET_LFIWZX)
3343 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
3345 if (TARGET_FLOAT128_TYPE)
3347 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
3348 if (FLOAT128_IEEE_P (TFmode))
3349 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
3352 if (TARGET_P9_VECTOR)
3354 /* Support for new D-form instructions. */
3355 rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS;
3357 /* Support for ISA 3.0 (power9) vectors. */
3358 rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS;
3361 /* Support for new direct moves (ISA 3.0 + 64bit). */
3362 if (TARGET_DIRECT_MOVE_128)
3363 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
3365 /* Support small integers in VSX registers. */
3366 if (TARGET_P8_VECTOR)
3368 rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS;
3369 rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS;
3370 if (TARGET_P9_VECTOR)
3372 rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS;
3373 rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS;
3377 /* Set up the reload helper and direct move functions. */
3378 if (TARGET_VSX || TARGET_ALTIVEC)
3380 if (TARGET_64BIT)
3382 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
3383 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
3384 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
3385 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
3386 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
3387 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
3388 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
3389 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3390 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3391 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3392 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3393 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3394 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3395 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3396 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3397 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3398 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3399 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3400 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3401 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3403 if (FLOAT128_VECTOR_P (KFmode))
3405 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3406 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3409 if (FLOAT128_VECTOR_P (TFmode))
3411 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3412 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3415 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3416 available. */
3417 if (TARGET_NO_SDMODE_STACK)
3419 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3420 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3423 if (TARGET_VSX)
3425 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3426 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3429 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3431 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3432 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3433 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3434 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3435 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3436 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3437 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3438 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3439 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3441 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3442 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3443 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3444 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3445 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3446 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3447 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3448 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3449 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3451 if (FLOAT128_VECTOR_P (KFmode))
3453 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3454 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3457 if (FLOAT128_VECTOR_P (TFmode))
3459 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3460 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3464 else
3466 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3467 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3468 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3469 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3470 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3471 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3472 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3473 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3474 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3475 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3476 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3477 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3478 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3479 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3480 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3481 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3482 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3483 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3484 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3485 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3487 if (FLOAT128_VECTOR_P (KFmode))
3489 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3490 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3493 if (FLOAT128_IEEE_P (TFmode))
3495 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3496 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3499 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3500 available. */
3501 if (TARGET_NO_SDMODE_STACK)
3503 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3504 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3507 if (TARGET_VSX)
3509 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3510 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3513 if (TARGET_DIRECT_MOVE)
3515 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3516 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3517 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3521 reg_addr[DFmode].scalar_in_vmx_p = true;
3522 reg_addr[DImode].scalar_in_vmx_p = true;
3524 if (TARGET_P8_VECTOR)
3526 reg_addr[SFmode].scalar_in_vmx_p = true;
3527 reg_addr[SImode].scalar_in_vmx_p = true;
3529 if (TARGET_P9_VECTOR)
3531 reg_addr[HImode].scalar_in_vmx_p = true;
3532 reg_addr[QImode].scalar_in_vmx_p = true;
3537 /* Setup the fusion operations. */
3538 if (TARGET_P8_FUSION)
3540 reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi;
3541 reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi;
3542 reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si;
3543 if (TARGET_64BIT)
3544 reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di;
3547 if (TARGET_P9_FUSION)
3549 struct fuse_insns {
3550 enum machine_mode mode; /* mode of the fused type. */
3551 enum machine_mode pmode; /* pointer mode. */
3552 enum rs6000_reload_reg_type rtype; /* register type. */
3553 enum insn_code load; /* load insn. */
3554 enum insn_code store; /* store insn. */
3557 static const struct fuse_insns addis_insns[] = {
3558 { E_SFmode, E_DImode, RELOAD_REG_FPR,
3559 CODE_FOR_fusion_vsx_di_sf_load,
3560 CODE_FOR_fusion_vsx_di_sf_store },
3562 { E_SFmode, E_SImode, RELOAD_REG_FPR,
3563 CODE_FOR_fusion_vsx_si_sf_load,
3564 CODE_FOR_fusion_vsx_si_sf_store },
3566 { E_DFmode, E_DImode, RELOAD_REG_FPR,
3567 CODE_FOR_fusion_vsx_di_df_load,
3568 CODE_FOR_fusion_vsx_di_df_store },
3570 { E_DFmode, E_SImode, RELOAD_REG_FPR,
3571 CODE_FOR_fusion_vsx_si_df_load,
3572 CODE_FOR_fusion_vsx_si_df_store },
3574 { E_DImode, E_DImode, RELOAD_REG_FPR,
3575 CODE_FOR_fusion_vsx_di_di_load,
3576 CODE_FOR_fusion_vsx_di_di_store },
3578 { E_DImode, E_SImode, RELOAD_REG_FPR,
3579 CODE_FOR_fusion_vsx_si_di_load,
3580 CODE_FOR_fusion_vsx_si_di_store },
3582 { E_QImode, E_DImode, RELOAD_REG_GPR,
3583 CODE_FOR_fusion_gpr_di_qi_load,
3584 CODE_FOR_fusion_gpr_di_qi_store },
3586 { E_QImode, E_SImode, RELOAD_REG_GPR,
3587 CODE_FOR_fusion_gpr_si_qi_load,
3588 CODE_FOR_fusion_gpr_si_qi_store },
3590 { E_HImode, E_DImode, RELOAD_REG_GPR,
3591 CODE_FOR_fusion_gpr_di_hi_load,
3592 CODE_FOR_fusion_gpr_di_hi_store },
3594 { E_HImode, E_SImode, RELOAD_REG_GPR,
3595 CODE_FOR_fusion_gpr_si_hi_load,
3596 CODE_FOR_fusion_gpr_si_hi_store },
3598 { E_SImode, E_DImode, RELOAD_REG_GPR,
3599 CODE_FOR_fusion_gpr_di_si_load,
3600 CODE_FOR_fusion_gpr_di_si_store },
3602 { E_SImode, E_SImode, RELOAD_REG_GPR,
3603 CODE_FOR_fusion_gpr_si_si_load,
3604 CODE_FOR_fusion_gpr_si_si_store },
3606 { E_SFmode, E_DImode, RELOAD_REG_GPR,
3607 CODE_FOR_fusion_gpr_di_sf_load,
3608 CODE_FOR_fusion_gpr_di_sf_store },
3610 { E_SFmode, E_SImode, RELOAD_REG_GPR,
3611 CODE_FOR_fusion_gpr_si_sf_load,
3612 CODE_FOR_fusion_gpr_si_sf_store },
3614 { E_DImode, E_DImode, RELOAD_REG_GPR,
3615 CODE_FOR_fusion_gpr_di_di_load,
3616 CODE_FOR_fusion_gpr_di_di_store },
3618 { E_DFmode, E_DImode, RELOAD_REG_GPR,
3619 CODE_FOR_fusion_gpr_di_df_load,
3620 CODE_FOR_fusion_gpr_di_df_store },
3623 machine_mode cur_pmode = Pmode;
3624 size_t i;
3626 for (i = 0; i < ARRAY_SIZE (addis_insns); i++)
3628 machine_mode xmode = addis_insns[i].mode;
3629 enum rs6000_reload_reg_type rtype = addis_insns[i].rtype;
3631 if (addis_insns[i].pmode != cur_pmode)
3632 continue;
3634 if (rtype == RELOAD_REG_FPR && !TARGET_HARD_FLOAT)
3635 continue;
3637 reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
3638 reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
3640 if (rtype == RELOAD_REG_FPR && TARGET_P9_VECTOR)
3642 reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
3643 = addis_insns[i].load;
3644 reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
3645 = addis_insns[i].store;
3650 /* Note which types we support fusing TOC setup plus memory insn. We only do
3651 fused TOCs for medium/large code models. */
3652 if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64
3653 && (TARGET_CMODEL != CMODEL_SMALL))
3655 reg_addr[QImode].fused_toc = true;
3656 reg_addr[HImode].fused_toc = true;
3657 reg_addr[SImode].fused_toc = true;
3658 reg_addr[DImode].fused_toc = true;
3659 if (TARGET_HARD_FLOAT)
3661 if (TARGET_SINGLE_FLOAT)
3662 reg_addr[SFmode].fused_toc = true;
3663 if (TARGET_DOUBLE_FLOAT)
3664 reg_addr[DFmode].fused_toc = true;
3668 /* Precalculate HARD_REGNO_NREGS. */
3669 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3670 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3671 rs6000_hard_regno_nregs[m][r]
3672 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
3674 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3675 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
3676 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3677 if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m))
3678 rs6000_hard_regno_mode_ok_p[m][r] = true;
3680 /* Precalculate CLASS_MAX_NREGS sizes. */
3681 for (c = 0; c < LIM_REG_CLASSES; ++c)
3683 int reg_size;
3685 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3686 reg_size = UNITS_PER_VSX_WORD;
3688 else if (c == ALTIVEC_REGS)
3689 reg_size = UNITS_PER_ALTIVEC_WORD;
3691 else if (c == FLOAT_REGS)
3692 reg_size = UNITS_PER_FP_WORD;
3694 else
3695 reg_size = UNITS_PER_WORD;
3697 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3699 machine_mode m2 = (machine_mode)m;
3700 int reg_size2 = reg_size;
3702 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3703 in VSX. */
3704 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3705 reg_size2 = UNITS_PER_FP_WORD;
3707 rs6000_class_max_nregs[m][c]
3708 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3712 /* Calculate which modes to automatically generate code to use a the
3713 reciprocal divide and square root instructions. In the future, possibly
3714 automatically generate the instructions even if the user did not specify
3715 -mrecip. The older machines double precision reciprocal sqrt estimate is
3716 not accurate enough. */
3717 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3718 if (TARGET_FRES)
3719 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3720 if (TARGET_FRE)
3721 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3722 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3723 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3724 if (VECTOR_UNIT_VSX_P (V2DFmode))
3725 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3727 if (TARGET_FRSQRTES)
3728 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3729 if (TARGET_FRSQRTE)
3730 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3731 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3732 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3733 if (VECTOR_UNIT_VSX_P (V2DFmode))
3734 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3736 if (rs6000_recip_control)
3738 if (!flag_finite_math_only)
3739 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3740 "-ffast-math");
3741 if (flag_trapping_math)
3742 warning (0, "%qs requires %qs or %qs", "-mrecip",
3743 "-fno-trapping-math", "-ffast-math");
3744 if (!flag_reciprocal_math)
3745 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3746 "-ffast-math");
3747 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3749 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3750 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3751 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3753 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3754 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3755 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3757 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3758 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3759 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3761 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3762 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3763 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3765 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3766 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3767 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3769 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3770 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3771 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3773 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3774 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3775 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3777 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3778 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3779 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3783 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3784 legitimate address support to figure out the appropriate addressing to
3785 use. */
3786 rs6000_setup_reg_addr_masks ();
3788 if (global_init_p || TARGET_DEBUG_TARGET)
3790 if (TARGET_DEBUG_REG)
3791 rs6000_debug_reg_global ();
3793 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3794 fprintf (stderr,
3795 "SImode variable mult cost = %d\n"
3796 "SImode constant mult cost = %d\n"
3797 "SImode short constant mult cost = %d\n"
3798 "DImode multipliciation cost = %d\n"
3799 "SImode division cost = %d\n"
3800 "DImode division cost = %d\n"
3801 "Simple fp operation cost = %d\n"
3802 "DFmode multiplication cost = %d\n"
3803 "SFmode division cost = %d\n"
3804 "DFmode division cost = %d\n"
3805 "cache line size = %d\n"
3806 "l1 cache size = %d\n"
3807 "l2 cache size = %d\n"
3808 "simultaneous prefetches = %d\n"
3809 "\n",
3810 rs6000_cost->mulsi,
3811 rs6000_cost->mulsi_const,
3812 rs6000_cost->mulsi_const9,
3813 rs6000_cost->muldi,
3814 rs6000_cost->divsi,
3815 rs6000_cost->divdi,
3816 rs6000_cost->fp,
3817 rs6000_cost->dmul,
3818 rs6000_cost->sdiv,
3819 rs6000_cost->ddiv,
3820 rs6000_cost->cache_line_size,
3821 rs6000_cost->l1_cache_size,
3822 rs6000_cost->l2_cache_size,
3823 rs6000_cost->simultaneous_prefetches);
3827 #if TARGET_MACHO
3828 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3830 static void
3831 darwin_rs6000_override_options (void)
3833 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3834 off. */
3835 rs6000_altivec_abi = 1;
3836 TARGET_ALTIVEC_VRSAVE = 1;
3837 rs6000_current_abi = ABI_DARWIN;
3839 if (DEFAULT_ABI == ABI_DARWIN
3840 && TARGET_64BIT)
3841 darwin_one_byte_bool = 1;
3843 if (TARGET_64BIT && ! TARGET_POWERPC64)
3845 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3846 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3848 if (flag_mkernel)
3850 rs6000_default_long_calls = 1;
3851 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3854 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3855 Altivec. */
3856 if (!flag_mkernel && !flag_apple_kext
3857 && TARGET_64BIT
3858 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3859 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3861 /* Unless the user (not the configurer) has explicitly overridden
3862 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3863 G4 unless targeting the kernel. */
3864 if (!flag_mkernel
3865 && !flag_apple_kext
3866 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3867 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3868 && ! global_options_set.x_rs6000_cpu_index)
3870 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3873 #endif
3875 /* If not otherwise specified by a target, make 'long double' equivalent to
3876 'double'. */
3878 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3879 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3880 #endif
3882 /* Return the builtin mask of the various options used that could affect which
3883 builtins were used. In the past we used target_flags, but we've run out of
3884 bits, and some options like PAIRED are no longer in target_flags. */
3886 HOST_WIDE_INT
3887 rs6000_builtin_mask_calculate (void)
3889 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3890 | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0)
3891 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3892 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3893 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3894 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3895 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3896 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3897 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3898 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3899 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3900 | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0)
3901 | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0)
3902 | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0)
3903 | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0)
3904 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3905 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3906 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3907 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3908 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0)
3909 | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)
3910 | ((TARGET_FLOAT128_HW) ? RS6000_BTM_FLOAT128_HW : 0));
3913 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3914 to clobber the XER[CA] bit because clobbering that bit without telling
3915 the compiler worked just fine with versions of GCC before GCC 5, and
3916 breaking a lot of older code in ways that are hard to track down is
3917 not such a great idea. */
3919 static rtx_insn *
3920 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3921 vec<const char *> &/*constraints*/,
3922 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3924 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3925 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3926 return NULL;
3929 /* Override command line options.
3931 Combine build-specific configuration information with options
3932 specified on the command line to set various state variables which
3933 influence code generation, optimization, and expansion of built-in
3934 functions. Assure that command-line configuration preferences are
3935 compatible with each other and with the build configuration; issue
3936 warnings while adjusting configuration or error messages while
3937 rejecting configuration.
3939 Upon entry to this function:
3941 This function is called once at the beginning of
3942 compilation, and then again at the start and end of compiling
3943 each section of code that has a different configuration, as
3944 indicated, for example, by adding the
3946 __attribute__((__target__("cpu=power9")))
3948 qualifier to a function definition or, for example, by bracketing
3949 code between
3951 #pragma GCC target("altivec")
3955 #pragma GCC reset_options
3957 directives. Parameter global_init_p is true for the initial
3958 invocation, which initializes global variables, and false for all
3959 subsequent invocations.
3962 Various global state information is assumed to be valid. This
3963 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3964 default CPU specified at build configure time, TARGET_DEFAULT,
3965 representing the default set of option flags for the default
3966 target, and global_options_set.x_rs6000_isa_flags, representing
3967 which options were requested on the command line.
3969 Upon return from this function:
3971 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3972 was set by name on the command line. Additionally, if certain
3973 attributes are automatically enabled or disabled by this function
3974 in order to assure compatibility between options and
3975 configuration, the flags associated with those attributes are
3976 also set. By setting these "explicit bits", we avoid the risk
3977 that other code might accidentally overwrite these particular
3978 attributes with "default values".
3980 The various bits of rs6000_isa_flags are set to indicate the
3981 target options that have been selected for the most current
3982 compilation efforts. This has the effect of also turning on the
3983 associated TARGET_XXX values since these are macros which are
3984 generally defined to test the corresponding bit of the
3985 rs6000_isa_flags variable.
3987 The variable rs6000_builtin_mask is set to represent the target
3988 options for the most current compilation efforts, consistent with
3989 the current contents of rs6000_isa_flags. This variable controls
3990 expansion of built-in functions.
3992 Various other global variables and fields of global structures
3993 (over 50 in all) are initialized to reflect the desired options
3994 for the most current compilation efforts. */
3996 static bool
3997 rs6000_option_override_internal (bool global_init_p)
3999 bool ret = true;
4001 HOST_WIDE_INT set_masks;
4002 HOST_WIDE_INT ignore_masks;
4003 int cpu_index = -1;
4004 int tune_index;
4005 struct cl_target_option *main_target_opt
4006 = ((global_init_p || target_option_default_node == NULL)
4007 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
4009 /* Print defaults. */
4010 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
4011 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
4013 /* Remember the explicit arguments. */
4014 if (global_init_p)
4015 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
4017 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
4018 library functions, so warn about it. The flag may be useful for
4019 performance studies from time to time though, so don't disable it
4020 entirely. */
4021 if (global_options_set.x_rs6000_alignment_flags
4022 && rs6000_alignment_flags == MASK_ALIGN_POWER
4023 && DEFAULT_ABI == ABI_DARWIN
4024 && TARGET_64BIT)
4025 warning (0, "%qs is not supported for 64-bit Darwin;"
4026 " it is incompatible with the installed C and C++ libraries",
4027 "-malign-power");
4029 /* Numerous experiment shows that IRA based loop pressure
4030 calculation works better for RTL loop invariant motion on targets
4031 with enough (>= 32) registers. It is an expensive optimization.
4032 So it is on only for peak performance. */
4033 if (optimize >= 3 && global_init_p
4034 && !global_options_set.x_flag_ira_loop_pressure)
4035 flag_ira_loop_pressure = 1;
4037 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
4038 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
4039 options were already specified. */
4040 if (flag_sanitize & SANITIZE_USER_ADDRESS
4041 && !global_options_set.x_flag_asynchronous_unwind_tables)
4042 flag_asynchronous_unwind_tables = 1;
4044 /* Set the pointer size. */
4045 if (TARGET_64BIT)
4047 rs6000_pmode = DImode;
4048 rs6000_pointer_size = 64;
4050 else
4052 rs6000_pmode = SImode;
4053 rs6000_pointer_size = 32;
4056 /* Some OSs don't support saving the high part of 64-bit registers on context
4057 switch. Other OSs don't support saving Altivec registers. On those OSs,
4058 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
4059 if the user wants either, the user must explicitly specify them and we
4060 won't interfere with the user's specification. */
4062 set_masks = POWERPC_MASKS;
4063 #ifdef OS_MISSING_POWERPC64
4064 if (OS_MISSING_POWERPC64)
4065 set_masks &= ~OPTION_MASK_POWERPC64;
4066 #endif
4067 #ifdef OS_MISSING_ALTIVEC
4068 if (OS_MISSING_ALTIVEC)
4069 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
4070 | OTHER_VSX_VECTOR_MASKS);
4071 #endif
4073 /* Don't override by the processor default if given explicitly. */
4074 set_masks &= ~rs6000_isa_flags_explicit;
4076 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
4077 the cpu in a target attribute or pragma, but did not specify a tuning
4078 option, use the cpu for the tuning option rather than the option specified
4079 with -mtune on the command line. Process a '--with-cpu' configuration
4080 request as an implicit --cpu. */
4081 if (rs6000_cpu_index >= 0)
4082 cpu_index = rs6000_cpu_index;
4083 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
4084 cpu_index = main_target_opt->x_rs6000_cpu_index;
4085 else if (OPTION_TARGET_CPU_DEFAULT)
4086 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
4088 if (cpu_index >= 0)
4090 const char *unavailable_cpu = NULL;
4091 switch (processor_target_table[cpu_index].processor)
4093 #ifndef HAVE_AS_POWER9
4094 case PROCESSOR_POWER9:
4095 unavailable_cpu = "power9";
4096 break;
4097 #endif
4098 #ifndef HAVE_AS_POWER8
4099 case PROCESSOR_POWER8:
4100 unavailable_cpu = "power8";
4101 break;
4102 #endif
4103 #ifndef HAVE_AS_POPCNTD
4104 case PROCESSOR_POWER7:
4105 unavailable_cpu = "power7";
4106 break;
4107 #endif
4108 #ifndef HAVE_AS_DFP
4109 case PROCESSOR_POWER6:
4110 unavailable_cpu = "power6";
4111 break;
4112 #endif
4113 #ifndef HAVE_AS_POPCNTB
4114 case PROCESSOR_POWER5:
4115 unavailable_cpu = "power5";
4116 break;
4117 #endif
4118 default:
4119 break;
4121 if (unavailable_cpu)
4123 cpu_index = -1;
4124 warning (0, "will not generate %qs instructions because "
4125 "assembler lacks %qs support", unavailable_cpu,
4126 unavailable_cpu);
4130 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
4131 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
4132 with those from the cpu, except for options that were explicitly set. If
4133 we don't have a cpu, do not override the target bits set in
4134 TARGET_DEFAULT. */
4135 if (cpu_index >= 0)
4137 rs6000_cpu_index = cpu_index;
4138 rs6000_isa_flags &= ~set_masks;
4139 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
4140 & set_masks);
4142 else
4144 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
4145 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
4146 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
4147 to using rs6000_isa_flags, we need to do the initialization here.
4149 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
4150 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
4151 HOST_WIDE_INT flags;
4152 if (TARGET_DEFAULT)
4153 flags = TARGET_DEFAULT;
4154 else
4156 /* PowerPC 64-bit LE requires at least ISA 2.07. */
4157 const char *default_cpu = (!TARGET_POWERPC64
4158 ? "powerpc"
4159 : (BYTES_BIG_ENDIAN
4160 ? "powerpc64"
4161 : "powerpc64le"));
4162 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
4163 flags = processor_target_table[default_cpu_index].target_enable;
4165 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
4168 if (rs6000_tune_index >= 0)
4169 tune_index = rs6000_tune_index;
4170 else if (cpu_index >= 0)
4171 rs6000_tune_index = tune_index = cpu_index;
4172 else
4174 size_t i;
4175 enum processor_type tune_proc
4176 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
4178 tune_index = -1;
4179 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
4180 if (processor_target_table[i].processor == tune_proc)
4182 tune_index = i;
4183 break;
4187 if (cpu_index >= 0)
4188 rs6000_cpu = processor_target_table[cpu_index].processor;
4189 else
4190 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
4192 gcc_assert (tune_index >= 0);
4193 rs6000_tune = processor_target_table[tune_index].processor;
4195 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
4196 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
4197 || rs6000_cpu == PROCESSOR_PPCE5500)
4199 if (TARGET_ALTIVEC)
4200 error ("AltiVec not supported in this target");
4203 /* If we are optimizing big endian systems for space, use the load/store
4204 multiple and string instructions. */
4205 if (BYTES_BIG_ENDIAN && optimize_size)
4206 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
4207 | OPTION_MASK_STRING);
4209 /* Don't allow -mmultiple or -mstring on little endian systems
4210 unless the cpu is a 750, because the hardware doesn't support the
4211 instructions used in little endian mode, and causes an alignment
4212 trap. The 750 does not cause an alignment trap (except when the
4213 target is unaligned). */
4215 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
4217 if (TARGET_MULTIPLE)
4219 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
4220 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
4221 warning (0, "%qs is not supported on little endian systems",
4222 "-mmultiple");
4225 if (TARGET_STRING)
4227 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4228 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
4229 warning (0, "%qs is not supported on little endian systems",
4230 "-mstring");
4234 /* If little-endian, default to -mstrict-align on older processors.
4235 Testing for htm matches power8 and later. */
4236 if (!BYTES_BIG_ENDIAN
4237 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
4238 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
4240 /* -maltivec={le,be} implies -maltivec. */
4241 if (rs6000_altivec_element_order != 0)
4242 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
4244 /* Disallow -maltivec=le in big endian mode for now. This is not
4245 known to be useful for anyone. */
4246 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
4248 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
4249 rs6000_altivec_element_order = 0;
4252 if (!rs6000_fold_gimple)
4253 fprintf (stderr,
4254 "gimple folding of rs6000 builtins has been disabled.\n");
4256 /* Add some warnings for VSX. */
4257 if (TARGET_VSX)
4259 const char *msg = NULL;
4260 if (!TARGET_HARD_FLOAT || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
4262 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4263 msg = N_("-mvsx requires hardware floating point");
4264 else
4266 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4267 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4270 else if (TARGET_PAIRED_FLOAT)
4271 msg = N_("-mvsx and -mpaired are incompatible");
4272 else if (TARGET_AVOID_XFORM > 0)
4273 msg = N_("-mvsx needs indexed addressing");
4274 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
4275 & OPTION_MASK_ALTIVEC))
4277 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4278 msg = N_("-mvsx and -mno-altivec are incompatible");
4279 else
4280 msg = N_("-mno-altivec disables vsx");
4283 if (msg)
4285 warning (0, msg);
4286 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
4287 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4291 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
4292 the -mcpu setting to enable options that conflict. */
4293 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
4294 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
4295 | OPTION_MASK_ALTIVEC
4296 | OPTION_MASK_VSX)) != 0)
4297 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
4298 | OPTION_MASK_DIRECT_MOVE)
4299 & ~rs6000_isa_flags_explicit);
4301 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4302 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
4304 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
4305 off all of the options that depend on those flags. */
4306 ignore_masks = rs6000_disable_incompatible_switches ();
4308 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
4309 unless the user explicitly used the -mno-<option> to disable the code. */
4310 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
4311 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4312 else if (TARGET_P9_MINMAX)
4314 if (cpu_index >= 0)
4316 if (cpu_index == PROCESSOR_POWER9)
4318 /* legacy behavior: allow -mcpu=power9 with certain
4319 capabilities explicitly disabled. */
4320 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
4322 else
4323 error ("power9 target option is incompatible with %<%s=<xxx>%> "
4324 "for <xxx> less than power9", "-mcpu");
4326 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
4327 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
4328 & rs6000_isa_flags_explicit))
4329 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
4330 were explicitly cleared. */
4331 error ("%qs incompatible with explicitly disabled options",
4332 "-mpower9-minmax");
4333 else
4334 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
4336 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
4337 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
4338 else if (TARGET_VSX)
4339 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
4340 else if (TARGET_POPCNTD)
4341 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
4342 else if (TARGET_DFP)
4343 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
4344 else if (TARGET_CMPB)
4345 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
4346 else if (TARGET_FPRND)
4347 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
4348 else if (TARGET_POPCNTB)
4349 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
4350 else if (TARGET_ALTIVEC)
4351 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
4353 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
4355 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
4356 error ("%qs requires %qs", "-mcrypto", "-maltivec");
4357 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
4360 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
4362 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
4363 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
4364 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
4367 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
4369 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4370 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
4371 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4374 if (TARGET_P8_VECTOR && !TARGET_VSX)
4376 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4377 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
4378 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
4379 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
4381 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
4382 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
4383 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4385 else
4387 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
4388 not explicit. */
4389 rs6000_isa_flags |= OPTION_MASK_VSX;
4390 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
4394 if (TARGET_DFP && !TARGET_HARD_FLOAT)
4396 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
4397 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
4398 rs6000_isa_flags &= ~OPTION_MASK_DFP;
4401 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
4402 silently turn off quad memory mode. */
4403 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
4405 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4406 warning (0, N_("-mquad-memory requires 64-bit mode"));
4408 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
4409 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
4411 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
4412 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
4415 /* Non-atomic quad memory load/store are disabled for little endian, since
4416 the words are reversed, but atomic operations can still be done by
4417 swapping the words. */
4418 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
4420 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4421 warning (0, N_("-mquad-memory is not available in little endian "
4422 "mode"));
4424 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4427 /* Assume if the user asked for normal quad memory instructions, they want
4428 the atomic versions as well, unless they explicity told us not to use quad
4429 word atomic instructions. */
4430 if (TARGET_QUAD_MEMORY
4431 && !TARGET_QUAD_MEMORY_ATOMIC
4432 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4433 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4435 /* If we can shrink-wrap the TOC register save separately, then use
4436 -msave-toc-indirect unless explicitly disabled. */
4437 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4438 && flag_shrink_wrap_separate
4439 && optimize_function_for_speed_p (cfun))
4440 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4442 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4443 generating power8 instructions. */
4444 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4445 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4446 & OPTION_MASK_P8_FUSION);
4448 /* Setting additional fusion flags turns on base fusion. */
4449 if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION))
4451 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4453 if (TARGET_P8_FUSION_SIGN)
4454 error ("%qs requires %qs", "-mpower8-fusion-sign",
4455 "-mpower8-fusion");
4457 if (TARGET_TOC_FUSION)
4458 error ("%qs requires %qs", "-mtoc-fusion", "-mpower8-fusion");
4460 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4462 else
4463 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4466 /* Power9 fusion is a superset over power8 fusion. */
4467 if (TARGET_P9_FUSION && !TARGET_P8_FUSION)
4469 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4471 /* We prefer to not mention undocumented options in
4472 error messages. However, if users have managed to select
4473 power9-fusion without selecting power8-fusion, they
4474 already know about undocumented flags. */
4475 error ("%qs requires %qs", "-mpower9-fusion", "-mpower8-fusion");
4476 rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION;
4478 else
4479 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4482 /* Enable power9 fusion if we are tuning for power9, even if we aren't
4483 generating power9 instructions. */
4484 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION))
4485 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
4486 & OPTION_MASK_P9_FUSION);
4488 /* Power8 does not fuse sign extended loads with the addis. If we are
4489 optimizing at high levels for speed, convert a sign extended load into a
4490 zero extending load, and an explicit sign extension. */
4491 if (TARGET_P8_FUSION
4492 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4493 && optimize_function_for_speed_p (cfun)
4494 && optimize >= 3)
4495 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4497 /* TOC fusion requires 64-bit and medium/large code model. */
4498 if (TARGET_TOC_FUSION && !TARGET_POWERPC64)
4500 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4501 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4502 warning (0, N_("-mtoc-fusion requires 64-bit"));
4505 if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL))
4507 rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION;
4508 if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0)
4509 warning (0, N_("-mtoc-fusion requires medium/large code model"));
4512 /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code
4513 model. */
4514 if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64
4515 && (TARGET_CMODEL != CMODEL_SMALL)
4516 && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION))
4517 rs6000_isa_flags |= OPTION_MASK_TOC_FUSION;
4519 /* ISA 3.0 vector instructions include ISA 2.07. */
4520 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4522 /* We prefer to not mention undocumented options in
4523 error messages. However, if users have managed to select
4524 power9-vector without selecting power8-vector, they
4525 already know about undocumented flags. */
4526 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4527 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4528 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4529 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4531 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4532 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4533 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4535 else
4537 /* OPTION_MASK_P9_VECTOR is explicit and
4538 OPTION_MASK_P8_VECTOR is not explicit. */
4539 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4540 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4544 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4545 support. If we only have ISA 2.06 support, and the user did not specify
4546 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4547 but we don't enable the full vectorization support */
4548 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4549 TARGET_ALLOW_MOVMISALIGN = 1;
4551 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4553 if (TARGET_ALLOW_MOVMISALIGN > 0
4554 && global_options_set.x_TARGET_ALLOW_MOVMISALIGN)
4555 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4557 TARGET_ALLOW_MOVMISALIGN = 0;
4560 /* Determine when unaligned vector accesses are permitted, and when
4561 they are preferred over masked Altivec loads. Note that if
4562 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4563 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4564 not true. */
4565 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4567 if (!TARGET_VSX)
4569 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4570 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4572 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4575 else if (!TARGET_ALLOW_MOVMISALIGN)
4577 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4578 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4579 "-mallow-movmisalign");
4581 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4585 /* Set long double size before the IEEE 128-bit tests. */
4586 if (!global_options_set.x_rs6000_long_double_type_size)
4588 if (main_target_opt != NULL
4589 && (main_target_opt->x_rs6000_long_double_type_size
4590 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
4591 error ("target attribute or pragma changes long double size");
4592 else
4593 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
4596 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4597 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4598 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4599 those systems will not pick up this default. Warn if the user changes the
4600 default unless -Wno-psabi. */
4601 if (!global_options_set.x_rs6000_ieeequad)
4602 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4604 else if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128)
4606 static bool warned_change_long_double;
4607 if (!warned_change_long_double)
4609 warned_change_long_double = true;
4610 if (TARGET_IEEEQUAD)
4611 warning (OPT_Wpsabi, "Using IEEE extended precision long double");
4612 else
4613 warning (OPT_Wpsabi, "Using IBM extended precision long double");
4617 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4618 sytems. In GCC 7, we would enable the the IEEE 128-bit floating point
4619 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4620 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4621 the keyword as well as the type. */
4622 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4624 /* IEEE 128-bit floating point requires VSX support. */
4625 if (TARGET_FLOAT128_KEYWORD)
4627 if (!TARGET_VSX)
4629 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4630 error ("%qs requires VSX support", "-mfloat128");
4632 TARGET_FLOAT128_TYPE = 0;
4633 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4634 | OPTION_MASK_FLOAT128_HW);
4636 else if (!TARGET_FLOAT128_TYPE)
4638 TARGET_FLOAT128_TYPE = 1;
4639 warning (0, "The -mfloat128 option may not be fully supported");
4643 /* Enable the __float128 keyword under Linux by default. */
4644 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4645 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4646 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4648 /* If we have are supporting the float128 type and full ISA 3.0 support,
4649 enable -mfloat128-hardware by default. However, don't enable the
4650 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4651 because sometimes the compiler wants to put things in an integer
4652 container, and if we don't have __int128 support, it is impossible. */
4653 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4654 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4655 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4656 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4658 if (TARGET_FLOAT128_HW
4659 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4661 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4662 error ("%qs requires full ISA 3.0 support", "-mfloat128-hardware");
4664 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4667 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4669 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4670 error ("%qs requires %qs", "-mfloat128-hardware", "-m64");
4672 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4675 /* Print the options after updating the defaults. */
4676 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4677 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4679 /* E500mc does "better" if we inline more aggressively. Respect the
4680 user's opinion, though. */
4681 if (rs6000_block_move_inline_limit == 0
4682 && (rs6000_tune == PROCESSOR_PPCE500MC
4683 || rs6000_tune == PROCESSOR_PPCE500MC64
4684 || rs6000_tune == PROCESSOR_PPCE5500
4685 || rs6000_tune == PROCESSOR_PPCE6500))
4686 rs6000_block_move_inline_limit = 128;
4688 /* store_one_arg depends on expand_block_move to handle at least the
4689 size of reg_parm_stack_space. */
4690 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4691 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4693 if (global_init_p)
4695 /* If the appropriate debug option is enabled, replace the target hooks
4696 with debug versions that call the real version and then prints
4697 debugging information. */
4698 if (TARGET_DEBUG_COST)
4700 targetm.rtx_costs = rs6000_debug_rtx_costs;
4701 targetm.address_cost = rs6000_debug_address_cost;
4702 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4705 if (TARGET_DEBUG_ADDR)
4707 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4708 targetm.legitimize_address = rs6000_debug_legitimize_address;
4709 rs6000_secondary_reload_class_ptr
4710 = rs6000_debug_secondary_reload_class;
4711 targetm.secondary_memory_needed
4712 = rs6000_debug_secondary_memory_needed;
4713 targetm.can_change_mode_class
4714 = rs6000_debug_can_change_mode_class;
4715 rs6000_preferred_reload_class_ptr
4716 = rs6000_debug_preferred_reload_class;
4717 rs6000_legitimize_reload_address_ptr
4718 = rs6000_debug_legitimize_reload_address;
4719 rs6000_mode_dependent_address_ptr
4720 = rs6000_debug_mode_dependent_address;
4723 if (rs6000_veclibabi_name)
4725 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4726 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4727 else
4729 error ("unknown vectorization library ABI type (%qs) for "
4730 "%qs switch", rs6000_veclibabi_name, "-mveclibabi=");
4731 ret = false;
4736 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
4737 target attribute or pragma which automatically enables both options,
4738 unless the altivec ABI was set. This is set by default for 64-bit, but
4739 not for 32-bit. */
4740 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4742 TARGET_FLOAT128_TYPE = 0;
4743 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC
4744 | OPTION_MASK_FLOAT128_KEYWORD)
4745 & ~rs6000_isa_flags_explicit);
4748 /* Enable Altivec ABI for AIX -maltivec. */
4749 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
4751 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4752 error ("target attribute or pragma changes AltiVec ABI");
4753 else
4754 rs6000_altivec_abi = 1;
4757 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4758 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4759 be explicitly overridden in either case. */
4760 if (TARGET_ELF)
4762 if (!global_options_set.x_rs6000_altivec_abi
4763 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4765 if (main_target_opt != NULL &&
4766 !main_target_opt->x_rs6000_altivec_abi)
4767 error ("target attribute or pragma changes AltiVec ABI");
4768 else
4769 rs6000_altivec_abi = 1;
4773 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4774 So far, the only darwin64 targets are also MACH-O. */
4775 if (TARGET_MACHO
4776 && DEFAULT_ABI == ABI_DARWIN
4777 && TARGET_64BIT)
4779 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4780 error ("target attribute or pragma changes darwin64 ABI");
4781 else
4783 rs6000_darwin64_abi = 1;
4784 /* Default to natural alignment, for better performance. */
4785 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4789 /* Place FP constants in the constant pool instead of TOC
4790 if section anchors enabled. */
4791 if (flag_section_anchors
4792 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
4793 TARGET_NO_FP_IN_TOC = 1;
4795 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4796 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4798 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4799 SUBTARGET_OVERRIDE_OPTIONS;
4800 #endif
4801 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4802 SUBSUBTARGET_OVERRIDE_OPTIONS;
4803 #endif
4804 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4805 SUB3TARGET_OVERRIDE_OPTIONS;
4806 #endif
4808 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4809 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4811 /* For the E500 family of cores, reset the single/double FP flags to let us
4812 check that they remain constant across attributes or pragmas. Also,
4813 clear a possible request for string instructions, not supported and which
4814 we might have silently queried above for -Os. */
4816 switch (rs6000_cpu)
4818 case PROCESSOR_PPC8540:
4819 case PROCESSOR_PPC8548:
4820 case PROCESSOR_PPCE500MC:
4821 case PROCESSOR_PPCE500MC64:
4822 case PROCESSOR_PPCE5500:
4823 case PROCESSOR_PPCE6500:
4824 rs6000_single_float = 0;
4825 rs6000_double_float = 0;
4826 rs6000_isa_flags &= ~OPTION_MASK_STRING;
4827 break;
4829 default:
4830 break;
4833 if (main_target_opt)
4835 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
4836 error ("target attribute or pragma changes single precision floating "
4837 "point");
4838 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
4839 error ("target attribute or pragma changes double precision floating "
4840 "point");
4843 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4844 && rs6000_tune != PROCESSOR_POWER5
4845 && rs6000_tune != PROCESSOR_POWER6
4846 && rs6000_tune != PROCESSOR_POWER7
4847 && rs6000_tune != PROCESSOR_POWER8
4848 && rs6000_tune != PROCESSOR_POWER9
4849 && rs6000_tune != PROCESSOR_PPCA2
4850 && rs6000_tune != PROCESSOR_CELL
4851 && rs6000_tune != PROCESSOR_PPC476);
4852 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4853 || rs6000_tune == PROCESSOR_POWER5
4854 || rs6000_tune == PROCESSOR_POWER7
4855 || rs6000_tune == PROCESSOR_POWER8);
4856 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4857 || rs6000_tune == PROCESSOR_POWER5
4858 || rs6000_tune == PROCESSOR_POWER6
4859 || rs6000_tune == PROCESSOR_POWER7
4860 || rs6000_tune == PROCESSOR_POWER8
4861 || rs6000_tune == PROCESSOR_POWER9
4862 || rs6000_tune == PROCESSOR_PPCE500MC
4863 || rs6000_tune == PROCESSOR_PPCE500MC64
4864 || rs6000_tune == PROCESSOR_PPCE5500
4865 || rs6000_tune == PROCESSOR_PPCE6500);
4867 /* Allow debug switches to override the above settings. These are set to -1
4868 in rs6000.opt to indicate the user hasn't directly set the switch. */
4869 if (TARGET_ALWAYS_HINT >= 0)
4870 rs6000_always_hint = TARGET_ALWAYS_HINT;
4872 if (TARGET_SCHED_GROUPS >= 0)
4873 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4875 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4876 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4878 rs6000_sched_restricted_insns_priority
4879 = (rs6000_sched_groups ? 1 : 0);
4881 /* Handle -msched-costly-dep option. */
4882 rs6000_sched_costly_dep
4883 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4885 if (rs6000_sched_costly_dep_str)
4887 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4888 rs6000_sched_costly_dep = no_dep_costly;
4889 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4890 rs6000_sched_costly_dep = all_deps_costly;
4891 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4892 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4893 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4894 rs6000_sched_costly_dep = store_to_load_dep_costly;
4895 else
4896 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4897 atoi (rs6000_sched_costly_dep_str));
4900 /* Handle -minsert-sched-nops option. */
4901 rs6000_sched_insert_nops
4902 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4904 if (rs6000_sched_insert_nops_str)
4906 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4907 rs6000_sched_insert_nops = sched_finish_none;
4908 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4909 rs6000_sched_insert_nops = sched_finish_pad_groups;
4910 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4911 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4912 else
4913 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4914 atoi (rs6000_sched_insert_nops_str));
4917 /* Handle stack protector */
4918 if (!global_options_set.x_rs6000_stack_protector_guard)
4919 #ifdef TARGET_THREAD_SSP_OFFSET
4920 rs6000_stack_protector_guard = SSP_TLS;
4921 #else
4922 rs6000_stack_protector_guard = SSP_GLOBAL;
4923 #endif
4925 #ifdef TARGET_THREAD_SSP_OFFSET
4926 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4927 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4928 #endif
4930 if (global_options_set.x_rs6000_stack_protector_guard_offset_str)
4932 char *endp;
4933 const char *str = rs6000_stack_protector_guard_offset_str;
4935 errno = 0;
4936 long offset = strtol (str, &endp, 0);
4937 if (!*str || *endp || errno)
4938 error ("%qs is not a valid number in %qs", str,
4939 "-mstack-protector-guard-offset=");
4941 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4942 || (TARGET_64BIT && (offset & 3)))
4943 error ("%qs is not a valid offset in %qs", str,
4944 "-mstack-protector-guard-offset=");
4946 rs6000_stack_protector_guard_offset = offset;
4949 if (global_options_set.x_rs6000_stack_protector_guard_reg_str)
4951 const char *str = rs6000_stack_protector_guard_reg_str;
4952 int reg = decode_reg_name (str);
4954 if (!IN_RANGE (reg, 1, 31))
4955 error ("%qs is not a valid base register in %qs", str,
4956 "-mstack-protector-guard-reg=");
4958 rs6000_stack_protector_guard_reg = reg;
4961 if (rs6000_stack_protector_guard == SSP_TLS
4962 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4963 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4965 if (global_init_p)
4967 #ifdef TARGET_REGNAMES
4968 /* If the user desires alternate register names, copy in the
4969 alternate names now. */
4970 if (TARGET_REGNAMES)
4971 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4972 #endif
4974 /* Set aix_struct_return last, after the ABI is determined.
4975 If -maix-struct-return or -msvr4-struct-return was explicitly
4976 used, don't override with the ABI default. */
4977 if (!global_options_set.x_aix_struct_return)
4978 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4980 #if 0
4981 /* IBM XL compiler defaults to unsigned bitfields. */
4982 if (TARGET_XL_COMPAT)
4983 flag_signed_bitfields = 0;
4984 #endif
4986 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4987 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4989 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4991 /* We can only guarantee the availability of DI pseudo-ops when
4992 assembling for 64-bit targets. */
4993 if (!TARGET_64BIT)
4995 targetm.asm_out.aligned_op.di = NULL;
4996 targetm.asm_out.unaligned_op.di = NULL;
5000 /* Set branch target alignment, if not optimizing for size. */
5001 if (!optimize_size)
5003 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
5004 aligned 8byte to avoid misprediction by the branch predictor. */
5005 if (rs6000_tune == PROCESSOR_TITAN
5006 || rs6000_tune == PROCESSOR_CELL)
5008 if (align_functions <= 0)
5009 align_functions = 8;
5010 if (align_jumps <= 0)
5011 align_jumps = 8;
5012 if (align_loops <= 0)
5013 align_loops = 8;
5015 if (rs6000_align_branch_targets)
5017 if (align_functions <= 0)
5018 align_functions = 16;
5019 if (align_jumps <= 0)
5020 align_jumps = 16;
5021 if (align_loops <= 0)
5023 can_override_loop_align = 1;
5024 align_loops = 16;
5027 if (align_jumps_max_skip <= 0)
5028 align_jumps_max_skip = 15;
5029 if (align_loops_max_skip <= 0)
5030 align_loops_max_skip = 15;
5033 /* Arrange to save and restore machine status around nested functions. */
5034 init_machine_status = rs6000_init_machine_status;
5036 /* We should always be splitting complex arguments, but we can't break
5037 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
5038 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
5039 targetm.calls.split_complex_arg = NULL;
5041 /* The AIX and ELFv1 ABIs define standard function descriptors. */
5042 if (DEFAULT_ABI == ABI_AIX)
5043 targetm.calls.custom_function_descriptors = 0;
5046 /* Initialize rs6000_cost with the appropriate target costs. */
5047 if (optimize_size)
5048 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
5049 else
5050 switch (rs6000_tune)
5052 case PROCESSOR_RS64A:
5053 rs6000_cost = &rs64a_cost;
5054 break;
5056 case PROCESSOR_MPCCORE:
5057 rs6000_cost = &mpccore_cost;
5058 break;
5060 case PROCESSOR_PPC403:
5061 rs6000_cost = &ppc403_cost;
5062 break;
5064 case PROCESSOR_PPC405:
5065 rs6000_cost = &ppc405_cost;
5066 break;
5068 case PROCESSOR_PPC440:
5069 rs6000_cost = &ppc440_cost;
5070 break;
5072 case PROCESSOR_PPC476:
5073 rs6000_cost = &ppc476_cost;
5074 break;
5076 case PROCESSOR_PPC601:
5077 rs6000_cost = &ppc601_cost;
5078 break;
5080 case PROCESSOR_PPC603:
5081 rs6000_cost = &ppc603_cost;
5082 break;
5084 case PROCESSOR_PPC604:
5085 rs6000_cost = &ppc604_cost;
5086 break;
5088 case PROCESSOR_PPC604e:
5089 rs6000_cost = &ppc604e_cost;
5090 break;
5092 case PROCESSOR_PPC620:
5093 rs6000_cost = &ppc620_cost;
5094 break;
5096 case PROCESSOR_PPC630:
5097 rs6000_cost = &ppc630_cost;
5098 break;
5100 case PROCESSOR_CELL:
5101 rs6000_cost = &ppccell_cost;
5102 break;
5104 case PROCESSOR_PPC750:
5105 case PROCESSOR_PPC7400:
5106 rs6000_cost = &ppc750_cost;
5107 break;
5109 case PROCESSOR_PPC7450:
5110 rs6000_cost = &ppc7450_cost;
5111 break;
5113 case PROCESSOR_PPC8540:
5114 case PROCESSOR_PPC8548:
5115 rs6000_cost = &ppc8540_cost;
5116 break;
5118 case PROCESSOR_PPCE300C2:
5119 case PROCESSOR_PPCE300C3:
5120 rs6000_cost = &ppce300c2c3_cost;
5121 break;
5123 case PROCESSOR_PPCE500MC:
5124 rs6000_cost = &ppce500mc_cost;
5125 break;
5127 case PROCESSOR_PPCE500MC64:
5128 rs6000_cost = &ppce500mc64_cost;
5129 break;
5131 case PROCESSOR_PPCE5500:
5132 rs6000_cost = &ppce5500_cost;
5133 break;
5135 case PROCESSOR_PPCE6500:
5136 rs6000_cost = &ppce6500_cost;
5137 break;
5139 case PROCESSOR_TITAN:
5140 rs6000_cost = &titan_cost;
5141 break;
5143 case PROCESSOR_POWER4:
5144 case PROCESSOR_POWER5:
5145 rs6000_cost = &power4_cost;
5146 break;
5148 case PROCESSOR_POWER6:
5149 rs6000_cost = &power6_cost;
5150 break;
5152 case PROCESSOR_POWER7:
5153 rs6000_cost = &power7_cost;
5154 break;
5156 case PROCESSOR_POWER8:
5157 rs6000_cost = &power8_cost;
5158 break;
5160 case PROCESSOR_POWER9:
5161 rs6000_cost = &power9_cost;
5162 break;
5164 case PROCESSOR_PPCA2:
5165 rs6000_cost = &ppca2_cost;
5166 break;
5168 default:
5169 gcc_unreachable ();
5172 if (global_init_p)
5174 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
5175 rs6000_cost->simultaneous_prefetches,
5176 global_options.x_param_values,
5177 global_options_set.x_param_values);
5178 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
5179 global_options.x_param_values,
5180 global_options_set.x_param_values);
5181 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
5182 rs6000_cost->cache_line_size,
5183 global_options.x_param_values,
5184 global_options_set.x_param_values);
5185 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
5186 global_options.x_param_values,
5187 global_options_set.x_param_values);
5189 /* Increase loop peeling limits based on performance analysis. */
5190 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
5191 global_options.x_param_values,
5192 global_options_set.x_param_values);
5193 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
5194 global_options.x_param_values,
5195 global_options_set.x_param_values);
5197 /* Use the 'model' -fsched-pressure algorithm by default. */
5198 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM,
5199 SCHED_PRESSURE_MODEL,
5200 global_options.x_param_values,
5201 global_options_set.x_param_values);
5203 /* If using typedef char *va_list, signal that
5204 __builtin_va_start (&ap, 0) can be optimized to
5205 ap = __builtin_next_arg (0). */
5206 if (DEFAULT_ABI != ABI_V4)
5207 targetm.expand_builtin_va_start = NULL;
5210 /* Set up single/double float flags.
5211 If TARGET_HARD_FLOAT is set, but neither single or double is set,
5212 then set both flags. */
5213 if (TARGET_HARD_FLOAT && rs6000_single_float == 0 && rs6000_double_float == 0)
5214 rs6000_single_float = rs6000_double_float = 1;
5216 /* If not explicitly specified via option, decide whether to generate indexed
5217 load/store instructions. A value of -1 indicates that the
5218 initial value of this variable has not been overwritten. During
5219 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
5220 if (TARGET_AVOID_XFORM == -1)
5221 /* Avoid indexed addressing when targeting Power6 in order to avoid the
5222 DERAT mispredict penalty. However the LVE and STVE altivec instructions
5223 need indexed accesses and the type used is the scalar type of the element
5224 being loaded or stored. */
5225 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
5226 && !TARGET_ALTIVEC);
5228 /* Set the -mrecip options. */
5229 if (rs6000_recip_name)
5231 char *p = ASTRDUP (rs6000_recip_name);
5232 char *q;
5233 unsigned int mask, i;
5234 bool invert;
5236 while ((q = strtok (p, ",")) != NULL)
5238 p = NULL;
5239 if (*q == '!')
5241 invert = true;
5242 q++;
5244 else
5245 invert = false;
5247 if (!strcmp (q, "default"))
5248 mask = ((TARGET_RECIP_PRECISION)
5249 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
5250 else
5252 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
5253 if (!strcmp (q, recip_options[i].string))
5255 mask = recip_options[i].mask;
5256 break;
5259 if (i == ARRAY_SIZE (recip_options))
5261 error ("unknown option for %<%s=%s%>", "-mrecip", q);
5262 invert = false;
5263 mask = 0;
5264 ret = false;
5268 if (invert)
5269 rs6000_recip_control &= ~mask;
5270 else
5271 rs6000_recip_control |= mask;
5275 /* Set the builtin mask of the various options used that could affect which
5276 builtins were used. In the past we used target_flags, but we've run out
5277 of bits, and some options like PAIRED are no longer in target_flags. */
5278 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
5279 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
5280 rs6000_print_builtin_options (stderr, 0, "builtin mask",
5281 rs6000_builtin_mask);
5283 /* Initialize all of the registers. */
5284 rs6000_init_hard_regno_mode_ok (global_init_p);
5286 /* Save the initial options in case the user does function specific options */
5287 if (global_init_p)
5288 target_option_default_node = target_option_current_node
5289 = build_target_option_node (&global_options);
5291 /* If not explicitly specified via option, decide whether to generate the
5292 extra blr's required to preserve the link stack on some cpus (eg, 476). */
5293 if (TARGET_LINK_STACK == -1)
5294 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
5296 return ret;
5299 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
5300 define the target cpu type. */
5302 static void
5303 rs6000_option_override (void)
5305 (void) rs6000_option_override_internal (true);
5309 /* Implement targetm.vectorize.builtin_mask_for_load. */
5310 static tree
5311 rs6000_builtin_mask_for_load (void)
5313 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
5314 if ((TARGET_ALTIVEC && !TARGET_VSX)
5315 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
5316 return altivec_builtin_mask_for_load;
5317 else
5318 return 0;
5321 /* Implement LOOP_ALIGN. */
5323 rs6000_loop_align (rtx label)
5325 basic_block bb;
5326 int ninsns;
5328 /* Don't override loop alignment if -falign-loops was specified. */
5329 if (!can_override_loop_align)
5330 return align_loops_log;
5332 bb = BLOCK_FOR_INSN (label);
5333 ninsns = num_loop_insns(bb->loop_father);
5335 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
5336 if (ninsns > 4 && ninsns <= 8
5337 && (rs6000_tune == PROCESSOR_POWER4
5338 || rs6000_tune == PROCESSOR_POWER5
5339 || rs6000_tune == PROCESSOR_POWER6
5340 || rs6000_tune == PROCESSOR_POWER7
5341 || rs6000_tune == PROCESSOR_POWER8
5342 || rs6000_tune == PROCESSOR_POWER9))
5343 return 5;
5344 else
5345 return align_loops_log;
5348 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
5349 static int
5350 rs6000_loop_align_max_skip (rtx_insn *label)
5352 return (1 << rs6000_loop_align (label)) - 1;
5355 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5356 after applying N number of iterations. This routine does not determine
5357 how may iterations are required to reach desired alignment. */
5359 static bool
5360 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5362 if (is_packed)
5363 return false;
5365 if (TARGET_32BIT)
5367 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
5368 return true;
5370 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
5371 return true;
5373 return false;
5375 else
5377 if (TARGET_MACHO)
5378 return false;
5380 /* Assuming that all other types are naturally aligned. CHECKME! */
5381 return true;
5385 /* Return true if the vector misalignment factor is supported by the
5386 target. */
5387 static bool
5388 rs6000_builtin_support_vector_misalignment (machine_mode mode,
5389 const_tree type,
5390 int misalignment,
5391 bool is_packed)
5393 if (TARGET_VSX)
5395 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5396 return true;
5398 /* Return if movmisalign pattern is not supported for this mode. */
5399 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
5400 return false;
5402 if (misalignment == -1)
5404 /* Misalignment factor is unknown at compile time but we know
5405 it's word aligned. */
5406 if (rs6000_vector_alignment_reachable (type, is_packed))
5408 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
5410 if (element_size == 64 || element_size == 32)
5411 return true;
5414 return false;
5417 /* VSX supports word-aligned vector. */
5418 if (misalignment % 4 == 0)
5419 return true;
5421 return false;
5424 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5425 static int
5426 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5427 tree vectype, int misalign)
5429 unsigned elements;
5430 tree elem_type;
5432 switch (type_of_cost)
5434 case scalar_stmt:
5435 case scalar_load:
5436 case scalar_store:
5437 case vector_stmt:
5438 case vector_load:
5439 case vector_store:
5440 case vec_to_scalar:
5441 case scalar_to_vec:
5442 case cond_branch_not_taken:
5443 return 1;
5445 case vec_perm:
5446 if (TARGET_VSX)
5447 return 3;
5448 else
5449 return 1;
5451 case vec_promote_demote:
5452 if (TARGET_VSX)
5453 return 4;
5454 else
5455 return 1;
5457 case cond_branch_taken:
5458 return 3;
5460 case unaligned_load:
5461 case vector_gather_load:
5462 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5463 return 1;
5465 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5467 elements = TYPE_VECTOR_SUBPARTS (vectype);
5468 if (elements == 2)
5469 /* Double word aligned. */
5470 return 2;
5472 if (elements == 4)
5474 switch (misalign)
5476 case 8:
5477 /* Double word aligned. */
5478 return 2;
5480 case -1:
5481 /* Unknown misalignment. */
5482 case 4:
5483 case 12:
5484 /* Word aligned. */
5485 return 22;
5487 default:
5488 gcc_unreachable ();
5493 if (TARGET_ALTIVEC)
5494 /* Misaligned loads are not supported. */
5495 gcc_unreachable ();
5497 return 2;
5499 case unaligned_store:
5500 case vector_scatter_store:
5501 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5502 return 1;
5504 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5506 elements = TYPE_VECTOR_SUBPARTS (vectype);
5507 if (elements == 2)
5508 /* Double word aligned. */
5509 return 2;
5511 if (elements == 4)
5513 switch (misalign)
5515 case 8:
5516 /* Double word aligned. */
5517 return 2;
5519 case -1:
5520 /* Unknown misalignment. */
5521 case 4:
5522 case 12:
5523 /* Word aligned. */
5524 return 23;
5526 default:
5527 gcc_unreachable ();
5532 if (TARGET_ALTIVEC)
5533 /* Misaligned stores are not supported. */
5534 gcc_unreachable ();
5536 return 2;
5538 case vec_construct:
5539 /* This is a rough approximation assuming non-constant elements
5540 constructed into a vector via element insertion. FIXME:
5541 vec_construct is not granular enough for uniformly good
5542 decisions. If the initialization is a splat, this is
5543 cheaper than we estimate. Improve this someday. */
5544 elem_type = TREE_TYPE (vectype);
5545 /* 32-bit vectors loaded into registers are stored as double
5546 precision, so we need 2 permutes, 2 converts, and 1 merge
5547 to construct a vector of short floats from them. */
5548 if (SCALAR_FLOAT_TYPE_P (elem_type)
5549 && TYPE_PRECISION (elem_type) == 32)
5550 return 5;
5551 /* On POWER9, integer vector types are built up in GPRs and then
5552 use a direct move (2 cycles). For POWER8 this is even worse,
5553 as we need two direct moves and a merge, and the direct moves
5554 are five cycles. */
5555 else if (INTEGRAL_TYPE_P (elem_type))
5557 if (TARGET_P9_VECTOR)
5558 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5559 else
5560 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5562 else
5563 /* V2DFmode doesn't need a direct move. */
5564 return 2;
5566 default:
5567 gcc_unreachable ();
5571 /* Implement targetm.vectorize.preferred_simd_mode. */
5573 static machine_mode
5574 rs6000_preferred_simd_mode (scalar_mode mode)
5576 if (TARGET_VSX)
5577 switch (mode)
5579 case E_DFmode:
5580 return V2DFmode;
5581 default:;
5583 if (TARGET_ALTIVEC || TARGET_VSX)
5584 switch (mode)
5586 case E_SFmode:
5587 return V4SFmode;
5588 case E_TImode:
5589 return V1TImode;
5590 case E_DImode:
5591 return V2DImode;
5592 case E_SImode:
5593 return V4SImode;
5594 case E_HImode:
5595 return V8HImode;
5596 case E_QImode:
5597 return V16QImode;
5598 default:;
5600 if (TARGET_PAIRED_FLOAT
5601 && mode == SFmode)
5602 return V2SFmode;
5603 return word_mode;
5606 typedef struct _rs6000_cost_data
5608 struct loop *loop_info;
5609 unsigned cost[3];
5610 } rs6000_cost_data;
5612 /* Test for likely overcommitment of vector hardware resources. If a
5613 loop iteration is relatively large, and too large a percentage of
5614 instructions in the loop are vectorized, the cost model may not
5615 adequately reflect delays from unavailable vector resources.
5616 Penalize the loop body cost for this case. */
5618 static void
5619 rs6000_density_test (rs6000_cost_data *data)
5621 const int DENSITY_PCT_THRESHOLD = 85;
5622 const int DENSITY_SIZE_THRESHOLD = 70;
5623 const int DENSITY_PENALTY = 10;
5624 struct loop *loop = data->loop_info;
5625 basic_block *bbs = get_loop_body (loop);
5626 int nbbs = loop->num_nodes;
5627 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
5628 int i, density_pct;
5630 for (i = 0; i < nbbs; i++)
5632 basic_block bb = bbs[i];
5633 gimple_stmt_iterator gsi;
5635 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5637 gimple *stmt = gsi_stmt (gsi);
5638 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5640 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5641 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5642 not_vec_cost++;
5646 free (bbs);
5647 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5649 if (density_pct > DENSITY_PCT_THRESHOLD
5650 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
5652 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
5653 if (dump_enabled_p ())
5654 dump_printf_loc (MSG_NOTE, vect_location,
5655 "density %d%%, cost %d exceeds threshold, penalizing "
5656 "loop body cost by %d%%", density_pct,
5657 vec_cost + not_vec_cost, DENSITY_PENALTY);
5661 /* Implement targetm.vectorize.init_cost. */
5663 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5664 instruction is needed by the vectorization. */
5665 static bool rs6000_vect_nonmem;
5667 static void *
5668 rs6000_init_cost (struct loop *loop_info)
5670 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
5671 data->loop_info = loop_info;
5672 data->cost[vect_prologue] = 0;
5673 data->cost[vect_body] = 0;
5674 data->cost[vect_epilogue] = 0;
5675 rs6000_vect_nonmem = false;
5676 return data;
5679 /* Implement targetm.vectorize.add_stmt_cost. */
5681 static unsigned
5682 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5683 struct _stmt_vec_info *stmt_info, int misalign,
5684 enum vect_cost_model_location where)
5686 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5687 unsigned retval = 0;
5689 if (flag_vect_cost_model)
5691 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5692 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5693 misalign);
5694 /* Statements in an inner loop relative to the loop being
5695 vectorized are weighted more heavily. The value here is
5696 arbitrary and could potentially be improved with analysis. */
5697 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5698 count *= 50; /* FIXME. */
5700 retval = (unsigned) (count * stmt_cost);
5701 cost_data->cost[where] += retval;
5703 /* Check whether we're doing something other than just a copy loop.
5704 Not all such loops may be profitably vectorized; see
5705 rs6000_finish_cost. */
5706 if ((kind == vec_to_scalar || kind == vec_perm
5707 || kind == vec_promote_demote || kind == vec_construct
5708 || kind == scalar_to_vec)
5709 || (where == vect_body && kind == vector_stmt))
5710 rs6000_vect_nonmem = true;
5713 return retval;
5716 /* Implement targetm.vectorize.finish_cost. */
5718 static void
5719 rs6000_finish_cost (void *data, unsigned *prologue_cost,
5720 unsigned *body_cost, unsigned *epilogue_cost)
5722 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
5724 if (cost_data->loop_info)
5725 rs6000_density_test (cost_data);
5727 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5728 that require versioning for any reason. The vectorization is at
5729 best a wash inside the loop, and the versioning checks make
5730 profitability highly unlikely and potentially quite harmful. */
5731 if (cost_data->loop_info)
5733 loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info);
5734 if (!rs6000_vect_nonmem
5735 && LOOP_VINFO_VECT_FACTOR (vec_info) == 2
5736 && LOOP_REQUIRES_VERSIONING (vec_info))
5737 cost_data->cost[vect_body] += 10000;
5740 *prologue_cost = cost_data->cost[vect_prologue];
5741 *body_cost = cost_data->cost[vect_body];
5742 *epilogue_cost = cost_data->cost[vect_epilogue];
5745 /* Implement targetm.vectorize.destroy_cost_data. */
5747 static void
5748 rs6000_destroy_cost_data (void *data)
5750 free (data);
5753 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5754 library with vectorized intrinsics. */
5756 static tree
5757 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5758 tree type_in)
5760 char name[32];
5761 const char *suffix = NULL;
5762 tree fntype, new_fndecl, bdecl = NULL_TREE;
5763 int n_args = 1;
5764 const char *bname;
5765 machine_mode el_mode, in_mode;
5766 int n, in_n;
5768 /* Libmass is suitable for unsafe math only as it does not correctly support
5769 parts of IEEE with the required precision such as denormals. Only support
5770 it if we have VSX to use the simd d2 or f4 functions.
5771 XXX: Add variable length support. */
5772 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5773 return NULL_TREE;
5775 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5776 n = TYPE_VECTOR_SUBPARTS (type_out);
5777 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5778 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5779 if (el_mode != in_mode
5780 || n != in_n)
5781 return NULL_TREE;
5783 switch (fn)
5785 CASE_CFN_ATAN2:
5786 CASE_CFN_HYPOT:
5787 CASE_CFN_POW:
5788 n_args = 2;
5789 gcc_fallthrough ();
5791 CASE_CFN_ACOS:
5792 CASE_CFN_ACOSH:
5793 CASE_CFN_ASIN:
5794 CASE_CFN_ASINH:
5795 CASE_CFN_ATAN:
5796 CASE_CFN_ATANH:
5797 CASE_CFN_CBRT:
5798 CASE_CFN_COS:
5799 CASE_CFN_COSH:
5800 CASE_CFN_ERF:
5801 CASE_CFN_ERFC:
5802 CASE_CFN_EXP2:
5803 CASE_CFN_EXP:
5804 CASE_CFN_EXPM1:
5805 CASE_CFN_LGAMMA:
5806 CASE_CFN_LOG10:
5807 CASE_CFN_LOG1P:
5808 CASE_CFN_LOG2:
5809 CASE_CFN_LOG:
5810 CASE_CFN_SIN:
5811 CASE_CFN_SINH:
5812 CASE_CFN_SQRT:
5813 CASE_CFN_TAN:
5814 CASE_CFN_TANH:
5815 if (el_mode == DFmode && n == 2)
5817 bdecl = mathfn_built_in (double_type_node, fn);
5818 suffix = "d2"; /* pow -> powd2 */
5820 else if (el_mode == SFmode && n == 4)
5822 bdecl = mathfn_built_in (float_type_node, fn);
5823 suffix = "4"; /* powf -> powf4 */
5825 else
5826 return NULL_TREE;
5827 if (!bdecl)
5828 return NULL_TREE;
5829 break;
5831 default:
5832 return NULL_TREE;
5835 gcc_assert (suffix != NULL);
5836 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5837 if (!bname)
5838 return NULL_TREE;
5840 strcpy (name, bname + sizeof ("__builtin_") - 1);
5841 strcat (name, suffix);
5843 if (n_args == 1)
5844 fntype = build_function_type_list (type_out, type_in, NULL);
5845 else if (n_args == 2)
5846 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5847 else
5848 gcc_unreachable ();
5850 /* Build a function declaration for the vectorized function. */
5851 new_fndecl = build_decl (BUILTINS_LOCATION,
5852 FUNCTION_DECL, get_identifier (name), fntype);
5853 TREE_PUBLIC (new_fndecl) = 1;
5854 DECL_EXTERNAL (new_fndecl) = 1;
5855 DECL_IS_NOVOPS (new_fndecl) = 1;
5856 TREE_READONLY (new_fndecl) = 1;
5858 return new_fndecl;
5861 /* Returns a function decl for a vectorized version of the builtin function
5862 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5863 if it is not available. */
5865 static tree
5866 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5867 tree type_in)
5869 machine_mode in_mode, out_mode;
5870 int in_n, out_n;
5872 if (TARGET_DEBUG_BUILTIN)
5873 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5874 combined_fn_name (combined_fn (fn)),
5875 GET_MODE_NAME (TYPE_MODE (type_out)),
5876 GET_MODE_NAME (TYPE_MODE (type_in)));
5878 if (TREE_CODE (type_out) != VECTOR_TYPE
5879 || TREE_CODE (type_in) != VECTOR_TYPE)
5880 return NULL_TREE;
5882 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5883 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5884 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5885 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5887 switch (fn)
5889 CASE_CFN_COPYSIGN:
5890 if (VECTOR_UNIT_VSX_P (V2DFmode)
5891 && out_mode == DFmode && out_n == 2
5892 && in_mode == DFmode && in_n == 2)
5893 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
5894 if (VECTOR_UNIT_VSX_P (V4SFmode)
5895 && out_mode == SFmode && out_n == 4
5896 && in_mode == SFmode && in_n == 4)
5897 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
5898 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5899 && out_mode == SFmode && out_n == 4
5900 && in_mode == SFmode && in_n == 4)
5901 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
5902 break;
5903 CASE_CFN_CEIL:
5904 if (VECTOR_UNIT_VSX_P (V2DFmode)
5905 && out_mode == DFmode && out_n == 2
5906 && in_mode == DFmode && in_n == 2)
5907 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5908 if (VECTOR_UNIT_VSX_P (V4SFmode)
5909 && out_mode == SFmode && out_n == 4
5910 && in_mode == SFmode && in_n == 4)
5911 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5912 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5913 && out_mode == SFmode && out_n == 4
5914 && in_mode == SFmode && in_n == 4)
5915 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5916 break;
5917 CASE_CFN_FLOOR:
5918 if (VECTOR_UNIT_VSX_P (V2DFmode)
5919 && out_mode == DFmode && out_n == 2
5920 && in_mode == DFmode && in_n == 2)
5921 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5922 if (VECTOR_UNIT_VSX_P (V4SFmode)
5923 && out_mode == SFmode && out_n == 4
5924 && in_mode == SFmode && in_n == 4)
5925 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5926 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5927 && out_mode == SFmode && out_n == 4
5928 && in_mode == SFmode && in_n == 4)
5929 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5930 break;
5931 CASE_CFN_FMA:
5932 if (VECTOR_UNIT_VSX_P (V2DFmode)
5933 && out_mode == DFmode && out_n == 2
5934 && in_mode == DFmode && in_n == 2)
5935 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5936 if (VECTOR_UNIT_VSX_P (V4SFmode)
5937 && out_mode == SFmode && out_n == 4
5938 && in_mode == SFmode && in_n == 4)
5939 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5940 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5941 && out_mode == SFmode && out_n == 4
5942 && in_mode == SFmode && in_n == 4)
5943 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5944 break;
5945 CASE_CFN_TRUNC:
5946 if (VECTOR_UNIT_VSX_P (V2DFmode)
5947 && out_mode == DFmode && out_n == 2
5948 && in_mode == DFmode && in_n == 2)
5949 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5950 if (VECTOR_UNIT_VSX_P (V4SFmode)
5951 && out_mode == SFmode && out_n == 4
5952 && in_mode == SFmode && in_n == 4)
5953 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5954 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5955 && out_mode == SFmode && out_n == 4
5956 && in_mode == SFmode && in_n == 4)
5957 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5958 break;
5959 CASE_CFN_NEARBYINT:
5960 if (VECTOR_UNIT_VSX_P (V2DFmode)
5961 && flag_unsafe_math_optimizations
5962 && out_mode == DFmode && out_n == 2
5963 && in_mode == DFmode && in_n == 2)
5964 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5965 if (VECTOR_UNIT_VSX_P (V4SFmode)
5966 && flag_unsafe_math_optimizations
5967 && out_mode == SFmode && out_n == 4
5968 && in_mode == SFmode && in_n == 4)
5969 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5970 break;
5971 CASE_CFN_RINT:
5972 if (VECTOR_UNIT_VSX_P (V2DFmode)
5973 && !flag_trapping_math
5974 && out_mode == DFmode && out_n == 2
5975 && in_mode == DFmode && in_n == 2)
5976 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5977 if (VECTOR_UNIT_VSX_P (V4SFmode)
5978 && !flag_trapping_math
5979 && out_mode == SFmode && out_n == 4
5980 && in_mode == SFmode && in_n == 4)
5981 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5982 break;
5983 default:
5984 break;
5987 /* Generate calls to libmass if appropriate. */
5988 if (rs6000_veclib_handler)
5989 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5991 return NULL_TREE;
5994 /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */
5996 static tree
5997 rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
5998 tree type_in)
6000 machine_mode in_mode, out_mode;
6001 int in_n, out_n;
6003 if (TARGET_DEBUG_BUILTIN)
6004 fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
6005 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
6006 GET_MODE_NAME (TYPE_MODE (type_out)),
6007 GET_MODE_NAME (TYPE_MODE (type_in)));
6009 if (TREE_CODE (type_out) != VECTOR_TYPE
6010 || TREE_CODE (type_in) != VECTOR_TYPE)
6011 return NULL_TREE;
6013 out_mode = TYPE_MODE (TREE_TYPE (type_out));
6014 out_n = TYPE_VECTOR_SUBPARTS (type_out);
6015 in_mode = TYPE_MODE (TREE_TYPE (type_in));
6016 in_n = TYPE_VECTOR_SUBPARTS (type_in);
6018 enum rs6000_builtins fn
6019 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
6020 switch (fn)
6022 case RS6000_BUILTIN_RSQRTF:
6023 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6024 && out_mode == SFmode && out_n == 4
6025 && in_mode == SFmode && in_n == 4)
6026 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
6027 break;
6028 case RS6000_BUILTIN_RSQRT:
6029 if (VECTOR_UNIT_VSX_P (V2DFmode)
6030 && out_mode == DFmode && out_n == 2
6031 && in_mode == DFmode && in_n == 2)
6032 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
6033 break;
6034 case RS6000_BUILTIN_RECIPF:
6035 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
6036 && out_mode == SFmode && out_n == 4
6037 && in_mode == SFmode && in_n == 4)
6038 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
6039 break;
6040 case RS6000_BUILTIN_RECIP:
6041 if (VECTOR_UNIT_VSX_P (V2DFmode)
6042 && out_mode == DFmode && out_n == 2
6043 && in_mode == DFmode && in_n == 2)
6044 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
6045 break;
6046 default:
6047 break;
6049 return NULL_TREE;
6052 /* Default CPU string for rs6000*_file_start functions. */
6053 static const char *rs6000_default_cpu;
6055 /* Do anything needed at the start of the asm file. */
6057 static void
6058 rs6000_file_start (void)
6060 char buffer[80];
6061 const char *start = buffer;
6062 FILE *file = asm_out_file;
6064 rs6000_default_cpu = TARGET_CPU_DEFAULT;
6066 default_file_start ();
6068 if (flag_verbose_asm)
6070 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
6072 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
6074 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
6075 start = "";
6078 if (global_options_set.x_rs6000_cpu_index)
6080 fprintf (file, "%s -mcpu=%s", start,
6081 processor_target_table[rs6000_cpu_index].name);
6082 start = "";
6085 if (global_options_set.x_rs6000_tune_index)
6087 fprintf (file, "%s -mtune=%s", start,
6088 processor_target_table[rs6000_tune_index].name);
6089 start = "";
6092 if (PPC405_ERRATUM77)
6094 fprintf (file, "%s PPC405CR_ERRATUM77", start);
6095 start = "";
6098 #ifdef USING_ELFOS_H
6099 switch (rs6000_sdata)
6101 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
6102 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
6103 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
6104 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
6107 if (rs6000_sdata && g_switch_value)
6109 fprintf (file, "%s -G %d", start,
6110 g_switch_value);
6111 start = "";
6113 #endif
6115 if (*start == '\0')
6116 putc ('\n', file);
6119 #ifdef USING_ELFOS_H
6120 if (!(rs6000_default_cpu && rs6000_default_cpu[0])
6121 && !global_options_set.x_rs6000_cpu_index)
6123 fputs ("\t.machine ", asm_out_file);
6124 if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0)
6125 fputs ("power9\n", asm_out_file);
6126 else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
6127 fputs ("power8\n", asm_out_file);
6128 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
6129 fputs ("power7\n", asm_out_file);
6130 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
6131 fputs ("power6\n", asm_out_file);
6132 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
6133 fputs ("power5\n", asm_out_file);
6134 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
6135 fputs ("power4\n", asm_out_file);
6136 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
6137 fputs ("ppc64\n", asm_out_file);
6138 else
6139 fputs ("ppc\n", asm_out_file);
6141 #endif
6143 if (DEFAULT_ABI == ABI_ELFv2)
6144 fprintf (file, "\t.abiversion 2\n");
6148 /* Return nonzero if this function is known to have a null epilogue. */
6151 direct_return (void)
6153 if (reload_completed)
6155 rs6000_stack_t *info = rs6000_stack_info ();
6157 if (info->first_gp_reg_save == 32
6158 && info->first_fp_reg_save == 64
6159 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6160 && ! info->lr_save_p
6161 && ! info->cr_save_p
6162 && info->vrsave_size == 0
6163 && ! info->push_p)
6164 return 1;
6167 return 0;
6170 /* Return the number of instructions it takes to form a constant in an
6171 integer register. */
6174 num_insns_constant_wide (HOST_WIDE_INT value)
6176 /* signed constant loadable with addi */
6177 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
6178 return 1;
6180 /* constant loadable with addis */
6181 else if ((value & 0xffff) == 0
6182 && (value >> 31 == -1 || value >> 31 == 0))
6183 return 1;
6185 else if (TARGET_POWERPC64)
6187 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
6188 HOST_WIDE_INT high = value >> 31;
6190 if (high == 0 || high == -1)
6191 return 2;
6193 high >>= 1;
6195 if (low == 0)
6196 return num_insns_constant_wide (high) + 1;
6197 else if (high == 0)
6198 return num_insns_constant_wide (low) + 1;
6199 else
6200 return (num_insns_constant_wide (high)
6201 + num_insns_constant_wide (low) + 1);
6204 else
6205 return 2;
6209 num_insns_constant (rtx op, machine_mode mode)
6211 HOST_WIDE_INT low, high;
6213 switch (GET_CODE (op))
6215 case CONST_INT:
6216 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
6217 && rs6000_is_valid_and_mask (op, mode))
6218 return 2;
6219 else
6220 return num_insns_constant_wide (INTVAL (op));
6222 case CONST_WIDE_INT:
6224 int i;
6225 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
6226 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6227 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
6228 return ins;
6231 case CONST_DOUBLE:
6232 if (mode == SFmode || mode == SDmode)
6234 long l;
6236 if (DECIMAL_FLOAT_MODE_P (mode))
6237 REAL_VALUE_TO_TARGET_DECIMAL32
6238 (*CONST_DOUBLE_REAL_VALUE (op), l);
6239 else
6240 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6241 return num_insns_constant_wide ((HOST_WIDE_INT) l);
6244 long l[2];
6245 if (DECIMAL_FLOAT_MODE_P (mode))
6246 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l);
6247 else
6248 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l);
6249 high = l[WORDS_BIG_ENDIAN == 0];
6250 low = l[WORDS_BIG_ENDIAN != 0];
6252 if (TARGET_32BIT)
6253 return (num_insns_constant_wide (low)
6254 + num_insns_constant_wide (high));
6255 else
6257 if ((high == 0 && low >= 0)
6258 || (high == -1 && low < 0))
6259 return num_insns_constant_wide (low);
6261 else if (rs6000_is_valid_and_mask (op, mode))
6262 return 2;
6264 else if (low == 0)
6265 return num_insns_constant_wide (high) + 1;
6267 else
6268 return (num_insns_constant_wide (high)
6269 + num_insns_constant_wide (low) + 1);
6272 default:
6273 gcc_unreachable ();
6277 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6278 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6279 corresponding element of the vector, but for V4SFmode and V2SFmode,
6280 the corresponding "float" is interpreted as an SImode integer. */
6282 HOST_WIDE_INT
6283 const_vector_elt_as_int (rtx op, unsigned int elt)
6285 rtx tmp;
6287 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6288 gcc_assert (GET_MODE (op) != V2DImode
6289 && GET_MODE (op) != V2DFmode);
6291 tmp = CONST_VECTOR_ELT (op, elt);
6292 if (GET_MODE (op) == V4SFmode
6293 || GET_MODE (op) == V2SFmode)
6294 tmp = gen_lowpart (SImode, tmp);
6295 return INTVAL (tmp);
6298 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6299 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6300 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6301 all items are set to the same value and contain COPIES replicas of the
6302 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6303 operand and the others are set to the value of the operand's msb. */
6305 static bool
6306 vspltis_constant (rtx op, unsigned step, unsigned copies)
6308 machine_mode mode = GET_MODE (op);
6309 machine_mode inner = GET_MODE_INNER (mode);
6311 unsigned i;
6312 unsigned nunits;
6313 unsigned bitsize;
6314 unsigned mask;
6316 HOST_WIDE_INT val;
6317 HOST_WIDE_INT splat_val;
6318 HOST_WIDE_INT msb_val;
6320 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6321 return false;
6323 nunits = GET_MODE_NUNITS (mode);
6324 bitsize = GET_MODE_BITSIZE (inner);
6325 mask = GET_MODE_MASK (inner);
6327 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6328 splat_val = val;
6329 msb_val = val >= 0 ? 0 : -1;
6331 /* Construct the value to be splatted, if possible. If not, return 0. */
6332 for (i = 2; i <= copies; i *= 2)
6334 HOST_WIDE_INT small_val;
6335 bitsize /= 2;
6336 small_val = splat_val >> bitsize;
6337 mask >>= bitsize;
6338 if (splat_val != ((HOST_WIDE_INT)
6339 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6340 | (small_val & mask)))
6341 return false;
6342 splat_val = small_val;
6345 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6346 if (EASY_VECTOR_15 (splat_val))
6349 /* Also check if we can splat, and then add the result to itself. Do so if
6350 the value is positive, of if the splat instruction is using OP's mode;
6351 for splat_val < 0, the splat and the add should use the same mode. */
6352 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6353 && (splat_val >= 0 || (step == 1 && copies == 1)))
6356 /* Also check if are loading up the most significant bit which can be done by
6357 loading up -1 and shifting the value left by -1. */
6358 else if (EASY_VECTOR_MSB (splat_val, inner))
6361 else
6362 return false;
6364 /* Check if VAL is present in every STEP-th element, and the
6365 other elements are filled with its most significant bit. */
6366 for (i = 1; i < nunits; ++i)
6368 HOST_WIDE_INT desired_val;
6369 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6370 if ((i & (step - 1)) == 0)
6371 desired_val = val;
6372 else
6373 desired_val = msb_val;
6375 if (desired_val != const_vector_elt_as_int (op, elt))
6376 return false;
6379 return true;
6382 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6383 instruction, filling in the bottom elements with 0 or -1.
6385 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6386 for the number of zeroes to shift in, or negative for the number of 0xff
6387 bytes to shift in.
6389 OP is a CONST_VECTOR. */
6392 vspltis_shifted (rtx op)
6394 machine_mode mode = GET_MODE (op);
6395 machine_mode inner = GET_MODE_INNER (mode);
6397 unsigned i, j;
6398 unsigned nunits;
6399 unsigned mask;
6401 HOST_WIDE_INT val;
6403 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6404 return false;
6406 /* We need to create pseudo registers to do the shift, so don't recognize
6407 shift vector constants after reload. */
6408 if (!can_create_pseudo_p ())
6409 return false;
6411 nunits = GET_MODE_NUNITS (mode);
6412 mask = GET_MODE_MASK (inner);
6414 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6416 /* Check if the value can really be the operand of a vspltis[bhw]. */
6417 if (EASY_VECTOR_15 (val))
6420 /* Also check if we are loading up the most significant bit which can be done
6421 by loading up -1 and shifting the value left by -1. */
6422 else if (EASY_VECTOR_MSB (val, inner))
6425 else
6426 return 0;
6428 /* Check if VAL is present in every STEP-th element until we find elements
6429 that are 0 or all 1 bits. */
6430 for (i = 1; i < nunits; ++i)
6432 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6433 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6435 /* If the value isn't the splat value, check for the remaining elements
6436 being 0/-1. */
6437 if (val != elt_val)
6439 if (elt_val == 0)
6441 for (j = i+1; j < nunits; ++j)
6443 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6444 if (const_vector_elt_as_int (op, elt2) != 0)
6445 return 0;
6448 return (nunits - i) * GET_MODE_SIZE (inner);
6451 else if ((elt_val & mask) == mask)
6453 for (j = i+1; j < nunits; ++j)
6455 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6456 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6457 return 0;
6460 return -((nunits - i) * GET_MODE_SIZE (inner));
6463 else
6464 return 0;
6468 /* If all elements are equal, we don't need to do VLSDOI. */
6469 return 0;
6473 /* Return true if OP is of the given MODE and can be synthesized
6474 with a vspltisb, vspltish or vspltisw. */
6476 bool
6477 easy_altivec_constant (rtx op, machine_mode mode)
6479 unsigned step, copies;
6481 if (mode == VOIDmode)
6482 mode = GET_MODE (op);
6483 else if (mode != GET_MODE (op))
6484 return false;
6486 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6487 constants. */
6488 if (mode == V2DFmode)
6489 return zero_constant (op, mode);
6491 else if (mode == V2DImode)
6493 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
6494 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
6495 return false;
6497 if (zero_constant (op, mode))
6498 return true;
6500 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6501 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6502 return true;
6504 return false;
6507 /* V1TImode is a special container for TImode. Ignore for now. */
6508 else if (mode == V1TImode)
6509 return false;
6511 /* Start with a vspltisw. */
6512 step = GET_MODE_NUNITS (mode) / 4;
6513 copies = 1;
6515 if (vspltis_constant (op, step, copies))
6516 return true;
6518 /* Then try with a vspltish. */
6519 if (step == 1)
6520 copies <<= 1;
6521 else
6522 step >>= 1;
6524 if (vspltis_constant (op, step, copies))
6525 return true;
6527 /* And finally a vspltisb. */
6528 if (step == 1)
6529 copies <<= 1;
6530 else
6531 step >>= 1;
6533 if (vspltis_constant (op, step, copies))
6534 return true;
6536 if (vspltis_shifted (op) != 0)
6537 return true;
6539 return false;
6542 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6543 result is OP. Abort if it is not possible. */
6546 gen_easy_altivec_constant (rtx op)
6548 machine_mode mode = GET_MODE (op);
6549 int nunits = GET_MODE_NUNITS (mode);
6550 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6551 unsigned step = nunits / 4;
6552 unsigned copies = 1;
6554 /* Start with a vspltisw. */
6555 if (vspltis_constant (op, step, copies))
6556 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6558 /* Then try with a vspltish. */
6559 if (step == 1)
6560 copies <<= 1;
6561 else
6562 step >>= 1;
6564 if (vspltis_constant (op, step, copies))
6565 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6567 /* And finally a vspltisb. */
6568 if (step == 1)
6569 copies <<= 1;
6570 else
6571 step >>= 1;
6573 if (vspltis_constant (op, step, copies))
6574 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6576 gcc_unreachable ();
6579 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6580 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6582 Return the number of instructions needed (1 or 2) into the address pointed
6583 via NUM_INSNS_PTR.
6585 Return the constant that is being split via CONSTANT_PTR. */
6587 bool
6588 xxspltib_constant_p (rtx op,
6589 machine_mode mode,
6590 int *num_insns_ptr,
6591 int *constant_ptr)
6593 size_t nunits = GET_MODE_NUNITS (mode);
6594 size_t i;
6595 HOST_WIDE_INT value;
6596 rtx element;
6598 /* Set the returned values to out of bound values. */
6599 *num_insns_ptr = -1;
6600 *constant_ptr = 256;
6602 if (!TARGET_P9_VECTOR)
6603 return false;
6605 if (mode == VOIDmode)
6606 mode = GET_MODE (op);
6608 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6609 return false;
6611 /* Handle (vec_duplicate <constant>). */
6612 if (GET_CODE (op) == VEC_DUPLICATE)
6614 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6615 && mode != V2DImode)
6616 return false;
6618 element = XEXP (op, 0);
6619 if (!CONST_INT_P (element))
6620 return false;
6622 value = INTVAL (element);
6623 if (!IN_RANGE (value, -128, 127))
6624 return false;
6627 /* Handle (const_vector [...]). */
6628 else if (GET_CODE (op) == CONST_VECTOR)
6630 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6631 && mode != V2DImode)
6632 return false;
6634 element = CONST_VECTOR_ELT (op, 0);
6635 if (!CONST_INT_P (element))
6636 return false;
6638 value = INTVAL (element);
6639 if (!IN_RANGE (value, -128, 127))
6640 return false;
6642 for (i = 1; i < nunits; i++)
6644 element = CONST_VECTOR_ELT (op, i);
6645 if (!CONST_INT_P (element))
6646 return false;
6648 if (value != INTVAL (element))
6649 return false;
6653 /* Handle integer constants being loaded into the upper part of the VSX
6654 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6655 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6656 else if (CONST_INT_P (op))
6658 if (!SCALAR_INT_MODE_P (mode))
6659 return false;
6661 value = INTVAL (op);
6662 if (!IN_RANGE (value, -128, 127))
6663 return false;
6665 if (!IN_RANGE (value, -1, 0))
6667 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6668 return false;
6670 if (EASY_VECTOR_15 (value))
6671 return false;
6675 else
6676 return false;
6678 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6679 sign extend. Special case 0/-1 to allow getting any VSX register instead
6680 of an Altivec register. */
6681 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6682 && EASY_VECTOR_15 (value))
6683 return false;
6685 /* Return # of instructions and the constant byte for XXSPLTIB. */
6686 if (mode == V16QImode)
6687 *num_insns_ptr = 1;
6689 else if (IN_RANGE (value, -1, 0))
6690 *num_insns_ptr = 1;
6692 else
6693 *num_insns_ptr = 2;
6695 *constant_ptr = (int) value;
6696 return true;
6699 const char *
6700 output_vec_const_move (rtx *operands)
6702 int shift;
6703 machine_mode mode;
6704 rtx dest, vec;
6706 dest = operands[0];
6707 vec = operands[1];
6708 mode = GET_MODE (dest);
6710 if (TARGET_VSX)
6712 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6713 int xxspltib_value = 256;
6714 int num_insns = -1;
6716 if (zero_constant (vec, mode))
6718 if (TARGET_P9_VECTOR)
6719 return "xxspltib %x0,0";
6721 else if (dest_vmx_p)
6722 return "vspltisw %0,0";
6724 else
6725 return "xxlxor %x0,%x0,%x0";
6728 if (all_ones_constant (vec, mode))
6730 if (TARGET_P9_VECTOR)
6731 return "xxspltib %x0,255";
6733 else if (dest_vmx_p)
6734 return "vspltisw %0,-1";
6736 else if (TARGET_P8_VECTOR)
6737 return "xxlorc %x0,%x0,%x0";
6739 else
6740 gcc_unreachable ();
6743 if (TARGET_P9_VECTOR
6744 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6746 if (num_insns == 1)
6748 operands[2] = GEN_INT (xxspltib_value & 0xff);
6749 return "xxspltib %x0,%2";
6752 return "#";
6756 if (TARGET_ALTIVEC)
6758 rtx splat_vec;
6760 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6761 if (zero_constant (vec, mode))
6762 return "vspltisw %0,0";
6764 if (all_ones_constant (vec, mode))
6765 return "vspltisw %0,-1";
6767 /* Do we need to construct a value using VSLDOI? */
6768 shift = vspltis_shifted (vec);
6769 if (shift != 0)
6770 return "#";
6772 splat_vec = gen_easy_altivec_constant (vec);
6773 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6774 operands[1] = XEXP (splat_vec, 0);
6775 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6776 return "#";
6778 switch (GET_MODE (splat_vec))
6780 case E_V4SImode:
6781 return "vspltisw %0,%1";
6783 case E_V8HImode:
6784 return "vspltish %0,%1";
6786 case E_V16QImode:
6787 return "vspltisb %0,%1";
6789 default:
6790 gcc_unreachable ();
6794 gcc_unreachable ();
6797 /* Initialize TARGET of vector PAIRED to VALS. */
6799 void
6800 paired_expand_vector_init (rtx target, rtx vals)
6802 machine_mode mode = GET_MODE (target);
6803 int n_elts = GET_MODE_NUNITS (mode);
6804 int n_var = 0;
6805 rtx x, new_rtx, tmp, constant_op, op1, op2;
6806 int i;
6808 for (i = 0; i < n_elts; ++i)
6810 x = XVECEXP (vals, 0, i);
6811 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6812 ++n_var;
6814 if (n_var == 0)
6816 /* Load from constant pool. */
6817 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6818 return;
6821 if (n_var == 2)
6823 /* The vector is initialized only with non-constants. */
6824 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
6825 XVECEXP (vals, 0, 1));
6827 emit_move_insn (target, new_rtx);
6828 return;
6831 /* One field is non-constant and the other one is a constant. Load the
6832 constant from the constant pool and use ps_merge instruction to
6833 construct the whole vector. */
6834 op1 = XVECEXP (vals, 0, 0);
6835 op2 = XVECEXP (vals, 0, 1);
6837 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
6839 tmp = gen_reg_rtx (GET_MODE (constant_op));
6840 emit_move_insn (tmp, constant_op);
6842 if (CONSTANT_P (op1))
6843 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
6844 else
6845 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
6847 emit_move_insn (target, new_rtx);
6850 void
6851 paired_expand_vector_move (rtx operands[])
6853 rtx op0 = operands[0], op1 = operands[1];
6855 emit_move_insn (op0, op1);
6858 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
6859 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
6860 operands for the relation operation COND. This is a recursive
6861 function. */
6863 static void
6864 paired_emit_vector_compare (enum rtx_code rcode,
6865 rtx dest, rtx op0, rtx op1,
6866 rtx cc_op0, rtx cc_op1)
6868 rtx tmp = gen_reg_rtx (V2SFmode);
6869 rtx tmp1, max, min;
6871 gcc_assert (TARGET_PAIRED_FLOAT);
6872 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6874 switch (rcode)
6876 case LT:
6877 case LTU:
6878 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6879 return;
6880 case GE:
6881 case GEU:
6882 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6883 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
6884 return;
6885 case LE:
6886 case LEU:
6887 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
6888 return;
6889 case GT:
6890 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6891 return;
6892 case EQ:
6893 tmp1 = gen_reg_rtx (V2SFmode);
6894 max = gen_reg_rtx (V2SFmode);
6895 min = gen_reg_rtx (V2SFmode);
6896 gen_reg_rtx (V2SFmode);
6898 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
6899 emit_insn (gen_selv2sf4
6900 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6901 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
6902 emit_insn (gen_selv2sf4
6903 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
6904 emit_insn (gen_subv2sf3 (tmp1, min, max));
6905 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
6906 return;
6907 case NE:
6908 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
6909 return;
6910 case UNLE:
6911 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
6912 return;
6913 case UNLT:
6914 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
6915 return;
6916 case UNGE:
6917 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
6918 return;
6919 case UNGT:
6920 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
6921 return;
6922 default:
6923 gcc_unreachable ();
6926 return;
6929 /* Emit vector conditional expression.
6930 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6931 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6934 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6935 rtx cond, rtx cc_op0, rtx cc_op1)
6937 enum rtx_code rcode = GET_CODE (cond);
6939 if (!TARGET_PAIRED_FLOAT)
6940 return 0;
6942 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
6944 return 1;
6947 /* Initialize vector TARGET to VALS. */
6949 void
6950 rs6000_expand_vector_init (rtx target, rtx vals)
6952 machine_mode mode = GET_MODE (target);
6953 machine_mode inner_mode = GET_MODE_INNER (mode);
6954 int n_elts = GET_MODE_NUNITS (mode);
6955 int n_var = 0, one_var = -1;
6956 bool all_same = true, all_const_zero = true;
6957 rtx x, mem;
6958 int i;
6960 for (i = 0; i < n_elts; ++i)
6962 x = XVECEXP (vals, 0, i);
6963 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6964 ++n_var, one_var = i;
6965 else if (x != CONST0_RTX (inner_mode))
6966 all_const_zero = false;
6968 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6969 all_same = false;
6972 if (n_var == 0)
6974 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6975 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6976 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6978 /* Zero register. */
6979 emit_move_insn (target, CONST0_RTX (mode));
6980 return;
6982 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6984 /* Splat immediate. */
6985 emit_insn (gen_rtx_SET (target, const_vec));
6986 return;
6988 else
6990 /* Load from constant pool. */
6991 emit_move_insn (target, const_vec);
6992 return;
6996 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6997 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6999 rtx op[2];
7000 size_t i;
7001 size_t num_elements = all_same ? 1 : 2;
7002 for (i = 0; i < num_elements; i++)
7004 op[i] = XVECEXP (vals, 0, i);
7005 /* Just in case there is a SUBREG with a smaller mode, do a
7006 conversion. */
7007 if (GET_MODE (op[i]) != inner_mode)
7009 rtx tmp = gen_reg_rtx (inner_mode);
7010 convert_move (tmp, op[i], 0);
7011 op[i] = tmp;
7013 /* Allow load with splat double word. */
7014 else if (MEM_P (op[i]))
7016 if (!all_same)
7017 op[i] = force_reg (inner_mode, op[i]);
7019 else if (!REG_P (op[i]))
7020 op[i] = force_reg (inner_mode, op[i]);
7023 if (all_same)
7025 if (mode == V2DFmode)
7026 emit_insn (gen_vsx_splat_v2df (target, op[0]));
7027 else
7028 emit_insn (gen_vsx_splat_v2di (target, op[0]));
7030 else
7032 if (mode == V2DFmode)
7033 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
7034 else
7035 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
7037 return;
7040 /* Special case initializing vector int if we are on 64-bit systems with
7041 direct move or we have the ISA 3.0 instructions. */
7042 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
7043 && TARGET_DIRECT_MOVE_64BIT)
7045 if (all_same)
7047 rtx element0 = XVECEXP (vals, 0, 0);
7048 if (MEM_P (element0))
7049 element0 = rs6000_address_for_fpconvert (element0);
7050 else
7051 element0 = force_reg (SImode, element0);
7053 if (TARGET_P9_VECTOR)
7054 emit_insn (gen_vsx_splat_v4si (target, element0));
7055 else
7057 rtx tmp = gen_reg_rtx (DImode);
7058 emit_insn (gen_zero_extendsidi2 (tmp, element0));
7059 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
7061 return;
7063 else
7065 rtx elements[4];
7066 size_t i;
7068 for (i = 0; i < 4; i++)
7070 elements[i] = XVECEXP (vals, 0, i);
7071 if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
7072 elements[i] = copy_to_mode_reg (SImode, elements[i]);
7075 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
7076 elements[2], elements[3]));
7077 return;
7081 /* With single precision floating point on VSX, know that internally single
7082 precision is actually represented as a double, and either make 2 V2DF
7083 vectors, and convert these vectors to single precision, or do one
7084 conversion, and splat the result to the other elements. */
7085 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
7087 if (all_same)
7089 rtx element0 = XVECEXP (vals, 0, 0);
7091 if (TARGET_P9_VECTOR)
7093 if (MEM_P (element0))
7094 element0 = rs6000_address_for_fpconvert (element0);
7096 emit_insn (gen_vsx_splat_v4sf (target, element0));
7099 else
7101 rtx freg = gen_reg_rtx (V4SFmode);
7102 rtx sreg = force_reg (SFmode, element0);
7103 rtx cvt = (TARGET_XSCVDPSPN
7104 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
7105 : gen_vsx_xscvdpsp_scalar (freg, sreg));
7107 emit_insn (cvt);
7108 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
7109 const0_rtx));
7112 else
7114 rtx dbl_even = gen_reg_rtx (V2DFmode);
7115 rtx dbl_odd = gen_reg_rtx (V2DFmode);
7116 rtx flt_even = gen_reg_rtx (V4SFmode);
7117 rtx flt_odd = gen_reg_rtx (V4SFmode);
7118 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7119 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7120 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7121 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7123 /* Use VMRGEW if we can instead of doing a permute. */
7124 if (TARGET_P8_VECTOR)
7126 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2));
7127 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3));
7128 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7129 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7130 if (BYTES_BIG_ENDIAN)
7131 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd));
7132 else
7133 emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even));
7135 else
7137 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7138 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7139 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7140 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7141 rs6000_expand_extract_even (target, flt_even, flt_odd);
7144 return;
7147 /* Special case initializing vector short/char that are splats if we are on
7148 64-bit systems with direct move. */
7149 if (all_same && TARGET_DIRECT_MOVE_64BIT
7150 && (mode == V16QImode || mode == V8HImode))
7152 rtx op0 = XVECEXP (vals, 0, 0);
7153 rtx di_tmp = gen_reg_rtx (DImode);
7155 if (!REG_P (op0))
7156 op0 = force_reg (GET_MODE_INNER (mode), op0);
7158 if (mode == V16QImode)
7160 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7161 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7162 return;
7165 if (mode == V8HImode)
7167 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7168 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7169 return;
7173 /* Store value to stack temp. Load vector element. Splat. However, splat
7174 of 64-bit items is not supported on Altivec. */
7175 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7177 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7178 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7179 XVECEXP (vals, 0, 0));
7180 x = gen_rtx_UNSPEC (VOIDmode,
7181 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7182 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7183 gen_rtvec (2,
7184 gen_rtx_SET (target, mem),
7185 x)));
7186 x = gen_rtx_VEC_SELECT (inner_mode, target,
7187 gen_rtx_PARALLEL (VOIDmode,
7188 gen_rtvec (1, const0_rtx)));
7189 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7190 return;
7193 /* One field is non-constant. Load constant then overwrite
7194 varying field. */
7195 if (n_var == 1)
7197 rtx copy = copy_rtx (vals);
7199 /* Load constant part of vector, substitute neighboring value for
7200 varying element. */
7201 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7202 rs6000_expand_vector_init (target, copy);
7204 /* Insert variable. */
7205 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
7206 return;
7209 /* Construct the vector in memory one field at a time
7210 and load the whole vector. */
7211 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7212 for (i = 0; i < n_elts; i++)
7213 emit_move_insn (adjust_address_nv (mem, inner_mode,
7214 i * GET_MODE_SIZE (inner_mode)),
7215 XVECEXP (vals, 0, i));
7216 emit_move_insn (target, mem);
7219 /* Set field ELT of TARGET to VAL. */
7221 void
7222 rs6000_expand_vector_set (rtx target, rtx val, int elt)
7224 machine_mode mode = GET_MODE (target);
7225 machine_mode inner_mode = GET_MODE_INNER (mode);
7226 rtx reg = gen_reg_rtx (mode);
7227 rtx mask, mem, x;
7228 int width = GET_MODE_SIZE (inner_mode);
7229 int i;
7231 val = force_reg (GET_MODE (val), val);
7233 if (VECTOR_MEM_VSX_P (mode))
7235 rtx insn = NULL_RTX;
7236 rtx elt_rtx = GEN_INT (elt);
7238 if (mode == V2DFmode)
7239 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7241 else if (mode == V2DImode)
7242 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7244 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7246 if (mode == V4SImode)
7247 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7248 else if (mode == V8HImode)
7249 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7250 else if (mode == V16QImode)
7251 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7252 else if (mode == V4SFmode)
7253 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7256 if (insn)
7258 emit_insn (insn);
7259 return;
7263 /* Simplify setting single element vectors like V1TImode. */
7264 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
7266 emit_move_insn (target, gen_lowpart (mode, val));
7267 return;
7270 /* Load single variable value. */
7271 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7272 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7273 x = gen_rtx_UNSPEC (VOIDmode,
7274 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7275 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7276 gen_rtvec (2,
7277 gen_rtx_SET (reg, mem),
7278 x)));
7280 /* Linear sequence. */
7281 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7282 for (i = 0; i < 16; ++i)
7283 XVECEXP (mask, 0, i) = GEN_INT (i);
7285 /* Set permute mask to insert element into target. */
7286 for (i = 0; i < width; ++i)
7287 XVECEXP (mask, 0, elt*width + i)
7288 = GEN_INT (i + 0x10);
7289 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7291 if (BYTES_BIG_ENDIAN)
7292 x = gen_rtx_UNSPEC (mode,
7293 gen_rtvec (3, target, reg,
7294 force_reg (V16QImode, x)),
7295 UNSPEC_VPERM);
7296 else
7298 if (TARGET_P9_VECTOR)
7299 x = gen_rtx_UNSPEC (mode,
7300 gen_rtvec (3, target, reg,
7301 force_reg (V16QImode, x)),
7302 UNSPEC_VPERMR);
7303 else
7305 /* Invert selector. We prefer to generate VNAND on P8 so
7306 that future fusion opportunities can kick in, but must
7307 generate VNOR elsewhere. */
7308 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7309 rtx iorx = (TARGET_P8_VECTOR
7310 ? gen_rtx_IOR (V16QImode, notx, notx)
7311 : gen_rtx_AND (V16QImode, notx, notx));
7312 rtx tmp = gen_reg_rtx (V16QImode);
7313 emit_insn (gen_rtx_SET (tmp, iorx));
7315 /* Permute with operands reversed and adjusted selector. */
7316 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7317 UNSPEC_VPERM);
7321 emit_insn (gen_rtx_SET (target, x));
7324 /* Extract field ELT from VEC into TARGET. */
7326 void
7327 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7329 machine_mode mode = GET_MODE (vec);
7330 machine_mode inner_mode = GET_MODE_INNER (mode);
7331 rtx mem;
7333 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7335 switch (mode)
7337 default:
7338 break;
7339 case E_V1TImode:
7340 gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode);
7341 emit_move_insn (target, gen_lowpart (TImode, vec));
7342 break;
7343 case E_V2DFmode:
7344 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7345 return;
7346 case E_V2DImode:
7347 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7348 return;
7349 case E_V4SFmode:
7350 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7351 return;
7352 case E_V16QImode:
7353 if (TARGET_DIRECT_MOVE_64BIT)
7355 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7356 return;
7358 else
7359 break;
7360 case E_V8HImode:
7361 if (TARGET_DIRECT_MOVE_64BIT)
7363 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7364 return;
7366 else
7367 break;
7368 case E_V4SImode:
7369 if (TARGET_DIRECT_MOVE_64BIT)
7371 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7372 return;
7374 break;
7377 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7378 && TARGET_DIRECT_MOVE_64BIT)
7380 if (GET_MODE (elt) != DImode)
7382 rtx tmp = gen_reg_rtx (DImode);
7383 convert_move (tmp, elt, 0);
7384 elt = tmp;
7386 else if (!REG_P (elt))
7387 elt = force_reg (DImode, elt);
7389 switch (mode)
7391 case E_V2DFmode:
7392 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7393 return;
7395 case E_V2DImode:
7396 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7397 return;
7399 case E_V4SFmode:
7400 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7401 return;
7403 case E_V4SImode:
7404 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7405 return;
7407 case E_V8HImode:
7408 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7409 return;
7411 case E_V16QImode:
7412 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7413 return;
7415 default:
7416 gcc_unreachable ();
7420 gcc_assert (CONST_INT_P (elt));
7422 /* Allocate mode-sized buffer. */
7423 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7425 emit_move_insn (mem, vec);
7427 /* Add offset to field within buffer matching vector element. */
7428 mem = adjust_address_nv (mem, inner_mode,
7429 INTVAL (elt) * GET_MODE_SIZE (inner_mode));
7431 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7434 /* Helper function to return the register number of a RTX. */
7435 static inline int
7436 regno_or_subregno (rtx op)
7438 if (REG_P (op))
7439 return REGNO (op);
7440 else if (SUBREG_P (op))
7441 return subreg_regno (op);
7442 else
7443 gcc_unreachable ();
7446 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7447 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7448 temporary (BASE_TMP) to fixup the address. Return the new memory address
7449 that is valid for reads or writes to a given register (SCALAR_REG). */
7452 rs6000_adjust_vec_address (rtx scalar_reg,
7453 rtx mem,
7454 rtx element,
7455 rtx base_tmp,
7456 machine_mode scalar_mode)
7458 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7459 rtx addr = XEXP (mem, 0);
7460 rtx element_offset;
7461 rtx new_addr;
7462 bool valid_addr_p;
7464 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7465 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7467 /* Calculate what we need to add to the address to get the element
7468 address. */
7469 if (CONST_INT_P (element))
7470 element_offset = GEN_INT (INTVAL (element) * scalar_size);
7471 else
7473 int byte_shift = exact_log2 (scalar_size);
7474 gcc_assert (byte_shift >= 0);
7476 if (byte_shift == 0)
7477 element_offset = element;
7479 else
7481 if (TARGET_POWERPC64)
7482 emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift)));
7483 else
7484 emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift)));
7486 element_offset = base_tmp;
7490 /* Create the new address pointing to the element within the vector. If we
7491 are adding 0, we don't have to change the address. */
7492 if (element_offset == const0_rtx)
7493 new_addr = addr;
7495 /* A simple indirect address can be converted into a reg + offset
7496 address. */
7497 else if (REG_P (addr) || SUBREG_P (addr))
7498 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7500 /* Optimize D-FORM addresses with constant offset with a constant element, to
7501 include the element offset in the address directly. */
7502 else if (GET_CODE (addr) == PLUS)
7504 rtx op0 = XEXP (addr, 0);
7505 rtx op1 = XEXP (addr, 1);
7506 rtx insn;
7508 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7509 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7511 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7512 rtx offset_rtx = GEN_INT (offset);
7514 if (IN_RANGE (offset, -32768, 32767)
7515 && (scalar_size < 8 || (offset & 0x3) == 0))
7516 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7517 else
7519 emit_move_insn (base_tmp, offset_rtx);
7520 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7523 else
7525 bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1));
7526 bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset));
7528 /* Note, ADDI requires the register being added to be a base
7529 register. If the register was R0, load it up into the temporary
7530 and do the add. */
7531 if (op1_reg_p
7532 && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO))
7534 insn = gen_add3_insn (base_tmp, op1, element_offset);
7535 gcc_assert (insn != NULL_RTX);
7536 emit_insn (insn);
7539 else if (ele_reg_p
7540 && reg_or_subregno (element_offset) != FIRST_GPR_REGNO)
7542 insn = gen_add3_insn (base_tmp, element_offset, op1);
7543 gcc_assert (insn != NULL_RTX);
7544 emit_insn (insn);
7547 else
7549 emit_move_insn (base_tmp, op1);
7550 emit_insn (gen_add2_insn (base_tmp, element_offset));
7553 new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp);
7557 else
7559 emit_move_insn (base_tmp, addr);
7560 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7563 /* If we have a PLUS, we need to see whether the particular register class
7564 allows for D-FORM or X-FORM addressing. */
7565 if (GET_CODE (new_addr) == PLUS)
7567 rtx op1 = XEXP (new_addr, 1);
7568 addr_mask_type addr_mask;
7569 int scalar_regno = regno_or_subregno (scalar_reg);
7571 gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER);
7572 if (INT_REGNO_P (scalar_regno))
7573 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR];
7575 else if (FP_REGNO_P (scalar_regno))
7576 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR];
7578 else if (ALTIVEC_REGNO_P (scalar_regno))
7579 addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX];
7581 else
7582 gcc_unreachable ();
7584 if (REG_P (op1) || SUBREG_P (op1))
7585 valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0;
7586 else
7587 valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0;
7590 else if (REG_P (new_addr) || SUBREG_P (new_addr))
7591 valid_addr_p = true;
7593 else
7594 valid_addr_p = false;
7596 if (!valid_addr_p)
7598 emit_move_insn (base_tmp, new_addr);
7599 new_addr = base_tmp;
7602 return change_address (mem, scalar_mode, new_addr);
7605 /* Split a variable vec_extract operation into the component instructions. */
7607 void
7608 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7609 rtx tmp_altivec)
7611 machine_mode mode = GET_MODE (src);
7612 machine_mode scalar_mode = GET_MODE (dest);
7613 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7614 int byte_shift = exact_log2 (scalar_size);
7616 gcc_assert (byte_shift >= 0);
7618 /* If we are given a memory address, optimize to load just the element. We
7619 don't have to adjust the vector element number on little endian
7620 systems. */
7621 if (MEM_P (src))
7623 gcc_assert (REG_P (tmp_gpr));
7624 emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element,
7625 tmp_gpr, scalar_mode));
7626 return;
7629 else if (REG_P (src) || SUBREG_P (src))
7631 int bit_shift = byte_shift + 3;
7632 rtx element2;
7633 int dest_regno = regno_or_subregno (dest);
7634 int src_regno = regno_or_subregno (src);
7635 int element_regno = regno_or_subregno (element);
7637 gcc_assert (REG_P (tmp_gpr));
7639 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7640 a general purpose register. */
7641 if (TARGET_P9_VECTOR
7642 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7643 && INT_REGNO_P (dest_regno)
7644 && ALTIVEC_REGNO_P (src_regno)
7645 && INT_REGNO_P (element_regno))
7647 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7648 rtx element_si = gen_rtx_REG (SImode, element_regno);
7650 if (mode == V16QImode)
7651 emit_insn (VECTOR_ELT_ORDER_BIG
7652 ? gen_vextublx (dest_si, element_si, src)
7653 : gen_vextubrx (dest_si, element_si, src));
7655 else if (mode == V8HImode)
7657 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7658 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7659 emit_insn (VECTOR_ELT_ORDER_BIG
7660 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7661 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7665 else
7667 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7668 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7669 emit_insn (VECTOR_ELT_ORDER_BIG
7670 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7671 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7674 return;
7678 gcc_assert (REG_P (tmp_altivec));
7680 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7681 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7682 will shift the element into the upper position (adding 3 to convert a
7683 byte shift into a bit shift). */
7684 if (scalar_size == 8)
7686 if (!VECTOR_ELT_ORDER_BIG)
7688 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7689 element2 = tmp_gpr;
7691 else
7692 element2 = element;
7694 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
7695 bit. */
7696 emit_insn (gen_rtx_SET (tmp_gpr,
7697 gen_rtx_AND (DImode,
7698 gen_rtx_ASHIFT (DImode,
7699 element2,
7700 GEN_INT (6)),
7701 GEN_INT (64))));
7703 else
7705 if (!VECTOR_ELT_ORDER_BIG)
7707 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7709 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
7710 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
7711 element2 = tmp_gpr;
7713 else
7714 element2 = element;
7716 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
7719 /* Get the value into the lower byte of the Altivec register where VSLO
7720 expects it. */
7721 if (TARGET_P9_VECTOR)
7722 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
7723 else if (can_create_pseudo_p ())
7724 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
7725 else
7727 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7728 emit_move_insn (tmp_di, tmp_gpr);
7729 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
7732 /* Do the VSLO to get the value into the final location. */
7733 switch (mode)
7735 case E_V2DFmode:
7736 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
7737 return;
7739 case E_V2DImode:
7740 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
7741 return;
7743 case E_V4SFmode:
7745 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7746 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
7747 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7748 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7749 tmp_altivec));
7751 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
7752 return;
7755 case E_V4SImode:
7756 case E_V8HImode:
7757 case E_V16QImode:
7759 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
7760 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
7761 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
7762 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
7763 tmp_altivec));
7764 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
7765 emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di,
7766 GEN_INT (64 - (8 * scalar_size))));
7767 return;
7770 default:
7771 gcc_unreachable ();
7774 return;
7776 else
7777 gcc_unreachable ();
7780 /* Helper function for rs6000_split_v4si_init to build up a DImode value from
7781 two SImode values. */
7783 static void
7784 rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
7786 const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
7788 if (CONST_INT_P (si1) && CONST_INT_P (si2))
7790 unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
7791 unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
7793 emit_move_insn (dest, GEN_INT (const1 | const2));
7794 return;
7797 /* Put si1 into upper 32-bits of dest. */
7798 if (CONST_INT_P (si1))
7799 emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
7800 else
7802 /* Generate RLDIC. */
7803 rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
7804 rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
7805 rtx mask_rtx = GEN_INT (mask_32bit << 32);
7806 rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
7807 gcc_assert (!reg_overlap_mentioned_p (dest, si1));
7808 emit_insn (gen_rtx_SET (dest, and_rtx));
7811 /* Put si2 into the temporary. */
7812 gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
7813 if (CONST_INT_P (si2))
7814 emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
7815 else
7816 emit_insn (gen_zero_extendsidi2 (tmp, si2));
7818 /* Combine the two parts. */
7819 emit_insn (gen_iordi3 (dest, dest, tmp));
7820 return;
7823 /* Split a V4SI initialization. */
7825 void
7826 rs6000_split_v4si_init (rtx operands[])
7828 rtx dest = operands[0];
7830 /* Destination is a GPR, build up the two DImode parts in place. */
7831 if (REG_P (dest) || SUBREG_P (dest))
7833 int d_regno = regno_or_subregno (dest);
7834 rtx scalar1 = operands[1];
7835 rtx scalar2 = operands[2];
7836 rtx scalar3 = operands[3];
7837 rtx scalar4 = operands[4];
7838 rtx tmp1 = operands[5];
7839 rtx tmp2 = operands[6];
7841 /* Even though we only need one temporary (plus the destination, which
7842 has an early clobber constraint, try to use two temporaries, one for
7843 each double word created. That way the 2nd insn scheduling pass can
7844 rearrange things so the two parts are done in parallel. */
7845 if (BYTES_BIG_ENDIAN)
7847 rtx di_lo = gen_rtx_REG (DImode, d_regno);
7848 rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
7849 rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
7850 rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
7852 else
7854 rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
7855 rtx di_hi = gen_rtx_REG (DImode, d_regno);
7856 gcc_assert (!VECTOR_ELT_ORDER_BIG);
7857 rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
7858 rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
7860 return;
7863 else
7864 gcc_unreachable ();
7867 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
7868 selects whether the alignment is abi mandated, optional, or
7869 both abi and optional alignment. */
7871 unsigned int
7872 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
7874 if (how != align_opt)
7876 if (TREE_CODE (type) == VECTOR_TYPE)
7878 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type)))
7880 if (align < 64)
7881 align = 64;
7883 else if (align < 128)
7884 align = 128;
7888 if (how != align_abi)
7890 if (TREE_CODE (type) == ARRAY_TYPE
7891 && TYPE_MODE (TREE_TYPE (type)) == QImode)
7893 if (align < BITS_PER_WORD)
7894 align = BITS_PER_WORD;
7898 return align;
7901 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
7902 instructions simply ignore the low bits; VSX memory instructions
7903 are aligned to 4 or 8 bytes. */
7905 static bool
7906 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
7908 return (STRICT_ALIGNMENT
7909 || (!TARGET_EFFICIENT_UNALIGNED_VSX
7910 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
7911 || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode))
7912 && (int) align < VECTOR_ALIGN (mode)))));
7915 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
7917 bool
7918 rs6000_special_adjust_field_align_p (tree type, unsigned int computed)
7920 if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE)
7922 if (computed != 128)
7924 static bool warned;
7925 if (!warned && warn_psabi)
7927 warned = true;
7928 inform (input_location,
7929 "the layout of aggregates containing vectors with"
7930 " %d-byte alignment has changed in GCC 5",
7931 computed / BITS_PER_UNIT);
7934 /* In current GCC there is no special case. */
7935 return false;
7938 return false;
7941 /* AIX increases natural record alignment to doubleword if the first
7942 field is an FP double while the FP fields remain word aligned. */
7944 unsigned int
7945 rs6000_special_round_type_align (tree type, unsigned int computed,
7946 unsigned int specified)
7948 unsigned int align = MAX (computed, specified);
7949 tree field = TYPE_FIELDS (type);
7951 /* Skip all non field decls */
7952 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7953 field = DECL_CHAIN (field);
7955 if (field != NULL && field != type)
7957 type = TREE_TYPE (field);
7958 while (TREE_CODE (type) == ARRAY_TYPE)
7959 type = TREE_TYPE (type);
7961 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
7962 align = MAX (align, 64);
7965 return align;
7968 /* Darwin increases record alignment to the natural alignment of
7969 the first field. */
7971 unsigned int
7972 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
7973 unsigned int specified)
7975 unsigned int align = MAX (computed, specified);
7977 if (TYPE_PACKED (type))
7978 return align;
7980 /* Find the first field, looking down into aggregates. */
7981 do {
7982 tree field = TYPE_FIELDS (type);
7983 /* Skip all non field decls */
7984 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
7985 field = DECL_CHAIN (field);
7986 if (! field)
7987 break;
7988 /* A packed field does not contribute any extra alignment. */
7989 if (DECL_PACKED (field))
7990 return align;
7991 type = TREE_TYPE (field);
7992 while (TREE_CODE (type) == ARRAY_TYPE)
7993 type = TREE_TYPE (type);
7994 } while (AGGREGATE_TYPE_P (type));
7996 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
7997 align = MAX (align, TYPE_ALIGN (type));
7999 return align;
8002 /* Return 1 for an operand in small memory on V.4/eabi. */
8005 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8006 machine_mode mode ATTRIBUTE_UNUSED)
8008 #if TARGET_ELF
8009 rtx sym_ref;
8011 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8012 return 0;
8014 if (DEFAULT_ABI != ABI_V4)
8015 return 0;
8017 if (GET_CODE (op) == SYMBOL_REF)
8018 sym_ref = op;
8020 else if (GET_CODE (op) != CONST
8021 || GET_CODE (XEXP (op, 0)) != PLUS
8022 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
8023 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
8024 return 0;
8026 else
8028 rtx sum = XEXP (op, 0);
8029 HOST_WIDE_INT summand;
8031 /* We have to be careful here, because it is the referenced address
8032 that must be 32k from _SDA_BASE_, not just the symbol. */
8033 summand = INTVAL (XEXP (sum, 1));
8034 if (summand < 0 || summand > g_switch_value)
8035 return 0;
8037 sym_ref = XEXP (sum, 0);
8040 return SYMBOL_REF_SMALL_P (sym_ref);
8041 #else
8042 return 0;
8043 #endif
8046 /* Return true if either operand is a general purpose register. */
8048 bool
8049 gpr_or_gpr_p (rtx op0, rtx op1)
8051 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8052 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8055 /* Return true if this is a move direct operation between GPR registers and
8056 floating point/VSX registers. */
8058 bool
8059 direct_move_p (rtx op0, rtx op1)
8061 int regno0, regno1;
8063 if (!REG_P (op0) || !REG_P (op1))
8064 return false;
8066 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
8067 return false;
8069 regno0 = REGNO (op0);
8070 regno1 = REGNO (op1);
8071 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
8072 return false;
8074 if (INT_REGNO_P (regno0))
8075 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
8077 else if (INT_REGNO_P (regno1))
8079 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
8080 return true;
8082 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
8083 return true;
8086 return false;
8089 /* Return true if the OFFSET is valid for the quad address instructions that
8090 use d-form (register + offset) addressing. */
8092 static inline bool
8093 quad_address_offset_p (HOST_WIDE_INT offset)
8095 return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0);
8098 /* Return true if the ADDR is an acceptable address for a quad memory
8099 operation of mode MODE (either LQ/STQ for general purpose registers, or
8100 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8101 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8102 3.0 LXV/STXV instruction. */
8104 bool
8105 quad_address_p (rtx addr, machine_mode mode, bool strict)
8107 rtx op0, op1;
8109 if (GET_MODE_SIZE (mode) != 16)
8110 return false;
8112 if (legitimate_indirect_address_p (addr, strict))
8113 return true;
8115 if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode))
8116 return false;
8118 if (GET_CODE (addr) != PLUS)
8119 return false;
8121 op0 = XEXP (addr, 0);
8122 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8123 return false;
8125 op1 = XEXP (addr, 1);
8126 if (!CONST_INT_P (op1))
8127 return false;
8129 return quad_address_offset_p (INTVAL (op1));
8132 /* Return true if this is a load or store quad operation. This function does
8133 not handle the atomic quad memory instructions. */
8135 bool
8136 quad_load_store_p (rtx op0, rtx op1)
8138 bool ret;
8140 if (!TARGET_QUAD_MEMORY)
8141 ret = false;
8143 else if (REG_P (op0) && MEM_P (op1))
8144 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8145 && quad_memory_operand (op1, GET_MODE (op1))
8146 && !reg_overlap_mentioned_p (op0, op1));
8148 else if (MEM_P (op0) && REG_P (op1))
8149 ret = (quad_memory_operand (op0, GET_MODE (op0))
8150 && quad_int_reg_operand (op1, GET_MODE (op1)));
8152 else
8153 ret = false;
8155 if (TARGET_DEBUG_ADDR)
8157 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8158 ret ? "true" : "false");
8159 debug_rtx (gen_rtx_SET (op0, op1));
8162 return ret;
8165 /* Given an address, return a constant offset term if one exists. */
8167 static rtx
8168 address_offset (rtx op)
8170 if (GET_CODE (op) == PRE_INC
8171 || GET_CODE (op) == PRE_DEC)
8172 op = XEXP (op, 0);
8173 else if (GET_CODE (op) == PRE_MODIFY
8174 || GET_CODE (op) == LO_SUM)
8175 op = XEXP (op, 1);
8177 if (GET_CODE (op) == CONST)
8178 op = XEXP (op, 0);
8180 if (GET_CODE (op) == PLUS)
8181 op = XEXP (op, 1);
8183 if (CONST_INT_P (op))
8184 return op;
8186 return NULL_RTX;
8189 /* Return true if the MEM operand is a memory operand suitable for use
8190 with a (full width, possibly multiple) gpr load/store. On
8191 powerpc64 this means the offset must be divisible by 4.
8192 Implements 'Y' constraint.
8194 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8195 a constraint function we know the operand has satisfied a suitable
8196 memory predicate. Also accept some odd rtl generated by reload
8197 (see rs6000_legitimize_reload_address for various forms). It is
8198 important that reload rtl be accepted by appropriate constraints
8199 but not by the operand predicate.
8201 Offsetting a lo_sum should not be allowed, except where we know by
8202 alignment that a 32k boundary is not crossed, but see the ???
8203 comment in rs6000_legitimize_reload_address. Note that by
8204 "offsetting" here we mean a further offset to access parts of the
8205 MEM. It's fine to have a lo_sum where the inner address is offset
8206 from a sym, since the same sym+offset will appear in the high part
8207 of the address calculation. */
8209 bool
8210 mem_operand_gpr (rtx op, machine_mode mode)
8212 unsigned HOST_WIDE_INT offset;
8213 int extra;
8214 rtx addr = XEXP (op, 0);
8216 op = address_offset (addr);
8217 if (op == NULL_RTX)
8218 return true;
8220 offset = INTVAL (op);
8221 if (TARGET_POWERPC64 && (offset & 3) != 0)
8222 return false;
8224 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8225 if (extra < 0)
8226 extra = 0;
8228 if (GET_CODE (addr) == LO_SUM)
8229 /* For lo_sum addresses, we must allow any offset except one that
8230 causes a wrap, so test only the low 16 bits. */
8231 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8233 return offset + 0x8000 < 0x10000u - extra;
8236 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8237 enforce an offset divisible by 4 even for 32-bit. */
8239 bool
8240 mem_operand_ds_form (rtx op, machine_mode mode)
8242 unsigned HOST_WIDE_INT offset;
8243 int extra;
8244 rtx addr = XEXP (op, 0);
8246 if (!offsettable_address_p (false, mode, addr))
8247 return false;
8249 op = address_offset (addr);
8250 if (op == NULL_RTX)
8251 return true;
8253 offset = INTVAL (op);
8254 if ((offset & 3) != 0)
8255 return false;
8257 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8258 if (extra < 0)
8259 extra = 0;
8261 if (GET_CODE (addr) == LO_SUM)
8262 /* For lo_sum addresses, we must allow any offset except one that
8263 causes a wrap, so test only the low 16 bits. */
8264 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
8266 return offset + 0x8000 < 0x10000u - extra;
8269 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8271 static bool
8272 reg_offset_addressing_ok_p (machine_mode mode)
8274 switch (mode)
8276 case E_V16QImode:
8277 case E_V8HImode:
8278 case E_V4SFmode:
8279 case E_V4SImode:
8280 case E_V2DFmode:
8281 case E_V2DImode:
8282 case E_V1TImode:
8283 case E_TImode:
8284 case E_TFmode:
8285 case E_KFmode:
8286 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8287 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8288 a vector mode, if we want to use the VSX registers to move it around,
8289 we need to restrict ourselves to reg+reg addressing. Similarly for
8290 IEEE 128-bit floating point that is passed in a single vector
8291 register. */
8292 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8293 return mode_supports_vsx_dform_quad (mode);
8294 break;
8296 case E_V2SImode:
8297 case E_V2SFmode:
8298 /* Paired vector modes. Only reg+reg addressing is valid. */
8299 if (TARGET_PAIRED_FLOAT)
8300 return false;
8301 break;
8303 case E_SDmode:
8304 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8305 addressing for the LFIWZX and STFIWX instructions. */
8306 if (TARGET_NO_SDMODE_STACK)
8307 return false;
8308 break;
8310 default:
8311 break;
8314 return true;
8317 static bool
8318 virtual_stack_registers_memory_p (rtx op)
8320 int regnum;
8322 if (GET_CODE (op) == REG)
8323 regnum = REGNO (op);
8325 else if (GET_CODE (op) == PLUS
8326 && GET_CODE (XEXP (op, 0)) == REG
8327 && GET_CODE (XEXP (op, 1)) == CONST_INT)
8328 regnum = REGNO (XEXP (op, 0));
8330 else
8331 return false;
8333 return (regnum >= FIRST_VIRTUAL_REGISTER
8334 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8337 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8338 is known to not straddle a 32k boundary. This function is used
8339 to determine whether -mcmodel=medium code can use TOC pointer
8340 relative addressing for OP. This means the alignment of the TOC
8341 pointer must also be taken into account, and unfortunately that is
8342 only 8 bytes. */
8344 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8345 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8346 #endif
8348 static bool
8349 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8350 machine_mode mode)
8352 tree decl;
8353 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8355 if (GET_CODE (op) != SYMBOL_REF)
8356 return false;
8358 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8359 SYMBOL_REF. */
8360 if (mode_supports_vsx_dform_quad (mode))
8361 return false;
8363 dsize = GET_MODE_SIZE (mode);
8364 decl = SYMBOL_REF_DECL (op);
8365 if (!decl)
8367 if (dsize == 0)
8368 return false;
8370 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8371 replacing memory addresses with an anchor plus offset. We
8372 could find the decl by rummaging around in the block->objects
8373 VEC for the given offset but that seems like too much work. */
8374 dalign = BITS_PER_UNIT;
8375 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8376 && SYMBOL_REF_ANCHOR_P (op)
8377 && SYMBOL_REF_BLOCK (op) != NULL)
8379 struct object_block *block = SYMBOL_REF_BLOCK (op);
8381 dalign = block->alignment;
8382 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8384 else if (CONSTANT_POOL_ADDRESS_P (op))
8386 /* It would be nice to have get_pool_align().. */
8387 machine_mode cmode = get_pool_mode (op);
8389 dalign = GET_MODE_ALIGNMENT (cmode);
8392 else if (DECL_P (decl))
8394 dalign = DECL_ALIGN (decl);
8396 if (dsize == 0)
8398 /* Allow BLKmode when the entire object is known to not
8399 cross a 32k boundary. */
8400 if (!DECL_SIZE_UNIT (decl))
8401 return false;
8403 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8404 return false;
8406 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8407 if (dsize > 32768)
8408 return false;
8410 dalign /= BITS_PER_UNIT;
8411 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8412 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8413 return dalign >= dsize;
8416 else
8417 gcc_unreachable ();
8419 /* Find how many bits of the alignment we know for this access. */
8420 dalign /= BITS_PER_UNIT;
8421 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8422 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8423 mask = dalign - 1;
8424 lsb = offset & -offset;
8425 mask &= lsb - 1;
8426 dalign = mask + 1;
8428 return dalign >= dsize;
8431 static bool
8432 constant_pool_expr_p (rtx op)
8434 rtx base, offset;
8436 split_const (op, &base, &offset);
8437 return (GET_CODE (base) == SYMBOL_REF
8438 && CONSTANT_POOL_ADDRESS_P (base)
8439 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8442 /* These are only used to pass through from print_operand/print_operand_address
8443 to rs6000_output_addr_const_extra over the intervening function
8444 output_addr_const which is not target code. */
8445 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8447 /* Return true if OP is a toc pointer relative address (the output
8448 of create_TOC_reference). If STRICT, do not match non-split
8449 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8450 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8451 TOCREL_OFFSET_RET respectively. */
8453 bool
8454 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8455 const_rtx *tocrel_offset_ret)
8457 if (!TARGET_TOC)
8458 return false;
8460 if (TARGET_CMODEL != CMODEL_SMALL)
8462 /* When strict ensure we have everything tidy. */
8463 if (strict
8464 && !(GET_CODE (op) == LO_SUM
8465 && REG_P (XEXP (op, 0))
8466 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8467 return false;
8469 /* When not strict, allow non-split TOC addresses and also allow
8470 (lo_sum (high ..)) TOC addresses created during reload. */
8471 if (GET_CODE (op) == LO_SUM)
8472 op = XEXP (op, 1);
8475 const_rtx tocrel_base = op;
8476 const_rtx tocrel_offset = const0_rtx;
8478 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8480 tocrel_base = XEXP (op, 0);
8481 tocrel_offset = XEXP (op, 1);
8484 if (tocrel_base_ret)
8485 *tocrel_base_ret = tocrel_base;
8486 if (tocrel_offset_ret)
8487 *tocrel_offset_ret = tocrel_offset;
8489 return (GET_CODE (tocrel_base) == UNSPEC
8490 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
8493 /* Return true if X is a constant pool address, and also for cmodel=medium
8494 if X is a toc-relative address known to be offsettable within MODE. */
8496 bool
8497 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8498 bool strict)
8500 const_rtx tocrel_base, tocrel_offset;
8501 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8502 && (TARGET_CMODEL != CMODEL_MEDIUM
8503 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8504 || mode == QImode
8505 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8506 INTVAL (tocrel_offset), mode)));
8509 static bool
8510 legitimate_small_data_p (machine_mode mode, rtx x)
8512 return (DEFAULT_ABI == ABI_V4
8513 && !flag_pic && !TARGET_TOC
8514 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
8515 && small_data_operand (x, mode));
8518 bool
8519 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8520 bool strict, bool worst_case)
8522 unsigned HOST_WIDE_INT offset;
8523 unsigned int extra;
8525 if (GET_CODE (x) != PLUS)
8526 return false;
8527 if (!REG_P (XEXP (x, 0)))
8528 return false;
8529 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8530 return false;
8531 if (mode_supports_vsx_dform_quad (mode))
8532 return quad_address_p (x, mode, strict);
8533 if (!reg_offset_addressing_ok_p (mode))
8534 return virtual_stack_registers_memory_p (x);
8535 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8536 return true;
8537 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8538 return false;
8540 offset = INTVAL (XEXP (x, 1));
8541 extra = 0;
8542 switch (mode)
8544 case E_V2SImode:
8545 case E_V2SFmode:
8546 /* Paired single modes: offset addressing isn't valid. */
8547 return false;
8549 case E_DFmode:
8550 case E_DDmode:
8551 case E_DImode:
8552 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8553 addressing. */
8554 if (VECTOR_MEM_VSX_P (mode))
8555 return false;
8557 if (!worst_case)
8558 break;
8559 if (!TARGET_POWERPC64)
8560 extra = 4;
8561 else if (offset & 3)
8562 return false;
8563 break;
8565 case E_TFmode:
8566 case E_IFmode:
8567 case E_KFmode:
8568 case E_TDmode:
8569 case E_TImode:
8570 case E_PTImode:
8571 extra = 8;
8572 if (!worst_case)
8573 break;
8574 if (!TARGET_POWERPC64)
8575 extra = 12;
8576 else if (offset & 3)
8577 return false;
8578 break;
8580 default:
8581 break;
8584 offset += 0x8000;
8585 return offset < 0x10000 - extra;
8588 bool
8589 legitimate_indexed_address_p (rtx x, int strict)
8591 rtx op0, op1;
8593 if (GET_CODE (x) != PLUS)
8594 return false;
8596 op0 = XEXP (x, 0);
8597 op1 = XEXP (x, 1);
8599 return (REG_P (op0) && REG_P (op1)
8600 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
8601 && INT_REG_OK_FOR_INDEX_P (op1, strict))
8602 || (INT_REG_OK_FOR_BASE_P (op1, strict)
8603 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
8606 bool
8607 avoiding_indexed_address_p (machine_mode mode)
8609 /* Avoid indexed addressing for modes that have non-indexed
8610 load/store instruction forms. */
8611 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
8614 bool
8615 legitimate_indirect_address_p (rtx x, int strict)
8617 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
8620 bool
8621 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
8623 if (!TARGET_MACHO || !flag_pic
8624 || mode != SImode || GET_CODE (x) != MEM)
8625 return false;
8626 x = XEXP (x, 0);
8628 if (GET_CODE (x) != LO_SUM)
8629 return false;
8630 if (GET_CODE (XEXP (x, 0)) != REG)
8631 return false;
8632 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
8633 return false;
8634 x = XEXP (x, 1);
8636 return CONSTANT_P (x);
8639 static bool
8640 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
8642 if (GET_CODE (x) != LO_SUM)
8643 return false;
8644 if (GET_CODE (XEXP (x, 0)) != REG)
8645 return false;
8646 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8647 return false;
8648 /* quad word addresses are restricted, and we can't use LO_SUM. */
8649 if (mode_supports_vsx_dform_quad (mode))
8650 return false;
8651 x = XEXP (x, 1);
8653 if (TARGET_ELF || TARGET_MACHO)
8655 bool large_toc_ok;
8657 if (DEFAULT_ABI == ABI_V4 && flag_pic)
8658 return false;
8659 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
8660 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
8661 recognizes some LO_SUM addresses as valid although this
8662 function says opposite. In most cases, LRA through different
8663 transformations can generate correct code for address reloads.
8664 It can not manage only some LO_SUM cases. So we need to add
8665 code analogous to one in rs6000_legitimize_reload_address for
8666 LOW_SUM here saying that some addresses are still valid. */
8667 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
8668 && small_toc_ref (x, VOIDmode));
8669 if (TARGET_TOC && ! large_toc_ok)
8670 return false;
8671 if (GET_MODE_NUNITS (mode) != 1)
8672 return false;
8673 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
8674 && !(/* ??? Assume floating point reg based on mode? */
8675 TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
8676 && (mode == DFmode || mode == DDmode)))
8677 return false;
8679 return CONSTANT_P (x) || large_toc_ok;
8682 return false;
8686 /* Try machine-dependent ways of modifying an illegitimate address
8687 to be legitimate. If we find one, return the new, valid address.
8688 This is used from only one place: `memory_address' in explow.c.
8690 OLDX is the address as it was before break_out_memory_refs was
8691 called. In some cases it is useful to look at this to decide what
8692 needs to be done.
8694 It is always safe for this function to do nothing. It exists to
8695 recognize opportunities to optimize the output.
8697 On RS/6000, first check for the sum of a register with a constant
8698 integer that is out of range. If so, generate code to add the
8699 constant with the low-order 16 bits masked to the register and force
8700 this result into another register (this can be done with `cau').
8701 Then generate an address of REG+(CONST&0xffff), allowing for the
8702 possibility of bit 16 being a one.
8704 Then check for the sum of a register and something not constant, try to
8705 load the other things into a register and return the sum. */
8707 static rtx
8708 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
8709 machine_mode mode)
8711 unsigned int extra;
8713 if (!reg_offset_addressing_ok_p (mode)
8714 || mode_supports_vsx_dform_quad (mode))
8716 if (virtual_stack_registers_memory_p (x))
8717 return x;
8719 /* In theory we should not be seeing addresses of the form reg+0,
8720 but just in case it is generated, optimize it away. */
8721 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
8722 return force_reg (Pmode, XEXP (x, 0));
8724 /* For TImode with load/store quad, restrict addresses to just a single
8725 pointer, so it works with both GPRs and VSX registers. */
8726 /* Make sure both operands are registers. */
8727 else if (GET_CODE (x) == PLUS
8728 && (mode != TImode || !TARGET_VSX))
8729 return gen_rtx_PLUS (Pmode,
8730 force_reg (Pmode, XEXP (x, 0)),
8731 force_reg (Pmode, XEXP (x, 1)));
8732 else
8733 return force_reg (Pmode, x);
8735 if (GET_CODE (x) == SYMBOL_REF)
8737 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
8738 if (model != 0)
8739 return rs6000_legitimize_tls_address (x, model);
8742 extra = 0;
8743 switch (mode)
8745 case E_TFmode:
8746 case E_TDmode:
8747 case E_TImode:
8748 case E_PTImode:
8749 case E_IFmode:
8750 case E_KFmode:
8751 /* As in legitimate_offset_address_p we do not assume
8752 worst-case. The mode here is just a hint as to the registers
8753 used. A TImode is usually in gprs, but may actually be in
8754 fprs. Leave worst-case scenario for reload to handle via
8755 insn constraints. PTImode is only GPRs. */
8756 extra = 8;
8757 break;
8758 default:
8759 break;
8762 if (GET_CODE (x) == PLUS
8763 && GET_CODE (XEXP (x, 0)) == REG
8764 && GET_CODE (XEXP (x, 1)) == CONST_INT
8765 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
8766 >= 0x10000 - extra)
8767 && !PAIRED_VECTOR_MODE (mode))
8769 HOST_WIDE_INT high_int, low_int;
8770 rtx sum;
8771 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
8772 if (low_int >= 0x8000 - extra)
8773 low_int = 0;
8774 high_int = INTVAL (XEXP (x, 1)) - low_int;
8775 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
8776 GEN_INT (high_int)), 0);
8777 return plus_constant (Pmode, sum, low_int);
8779 else if (GET_CODE (x) == PLUS
8780 && GET_CODE (XEXP (x, 0)) == REG
8781 && GET_CODE (XEXP (x, 1)) != CONST_INT
8782 && GET_MODE_NUNITS (mode) == 1
8783 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8784 || (/* ??? Assume floating point reg based on mode? */
8785 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
8786 && (mode == DFmode || mode == DDmode)))
8787 && !avoiding_indexed_address_p (mode))
8789 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
8790 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
8792 else if (PAIRED_VECTOR_MODE (mode))
8794 if (mode == DImode)
8795 return x;
8796 /* We accept [reg + reg]. */
8798 if (GET_CODE (x) == PLUS)
8800 rtx op1 = XEXP (x, 0);
8801 rtx op2 = XEXP (x, 1);
8802 rtx y;
8804 op1 = force_reg (Pmode, op1);
8805 op2 = force_reg (Pmode, op2);
8807 /* We can't always do [reg + reg] for these, because [reg +
8808 reg + offset] is not a legitimate addressing mode. */
8809 y = gen_rtx_PLUS (Pmode, op1, op2);
8811 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
8812 return force_reg (Pmode, y);
8813 else
8814 return y;
8817 return force_reg (Pmode, x);
8819 else if ((TARGET_ELF
8820 #if TARGET_MACHO
8821 || !MACHO_DYNAMIC_NO_PIC_P
8822 #endif
8824 && TARGET_32BIT
8825 && TARGET_NO_TOC
8826 && ! flag_pic
8827 && GET_CODE (x) != CONST_INT
8828 && GET_CODE (x) != CONST_WIDE_INT
8829 && GET_CODE (x) != CONST_DOUBLE
8830 && CONSTANT_P (x)
8831 && GET_MODE_NUNITS (mode) == 1
8832 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
8833 || (/* ??? Assume floating point reg based on mode? */
8834 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
8835 && (mode == DFmode || mode == DDmode))))
8837 rtx reg = gen_reg_rtx (Pmode);
8838 if (TARGET_ELF)
8839 emit_insn (gen_elf_high (reg, x));
8840 else
8841 emit_insn (gen_macho_high (reg, x));
8842 return gen_rtx_LO_SUM (Pmode, reg, x);
8844 else if (TARGET_TOC
8845 && GET_CODE (x) == SYMBOL_REF
8846 && constant_pool_expr_p (x)
8847 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
8848 return create_TOC_reference (x, NULL_RTX);
8849 else
8850 return x;
8853 /* Debug version of rs6000_legitimize_address. */
8854 static rtx
8855 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
8857 rtx ret;
8858 rtx_insn *insns;
8860 start_sequence ();
8861 ret = rs6000_legitimize_address (x, oldx, mode);
8862 insns = get_insns ();
8863 end_sequence ();
8865 if (ret != x)
8867 fprintf (stderr,
8868 "\nrs6000_legitimize_address: mode %s, old code %s, "
8869 "new code %s, modified\n",
8870 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
8871 GET_RTX_NAME (GET_CODE (ret)));
8873 fprintf (stderr, "Original address:\n");
8874 debug_rtx (x);
8876 fprintf (stderr, "oldx:\n");
8877 debug_rtx (oldx);
8879 fprintf (stderr, "New address:\n");
8880 debug_rtx (ret);
8882 if (insns)
8884 fprintf (stderr, "Insns added:\n");
8885 debug_rtx_list (insns, 20);
8888 else
8890 fprintf (stderr,
8891 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
8892 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
8894 debug_rtx (x);
8897 if (insns)
8898 emit_insn (insns);
8900 return ret;
8903 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8904 We need to emit DTP-relative relocations. */
8906 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
8907 static void
8908 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
8910 switch (size)
8912 case 4:
8913 fputs ("\t.long\t", file);
8914 break;
8915 case 8:
8916 fputs (DOUBLE_INT_ASM_OP, file);
8917 break;
8918 default:
8919 gcc_unreachable ();
8921 output_addr_const (file, x);
8922 if (TARGET_ELF)
8923 fputs ("@dtprel+0x8000", file);
8924 else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF)
8926 switch (SYMBOL_REF_TLS_MODEL (x))
8928 case 0:
8929 break;
8930 case TLS_MODEL_LOCAL_EXEC:
8931 fputs ("@le", file);
8932 break;
8933 case TLS_MODEL_INITIAL_EXEC:
8934 fputs ("@ie", file);
8935 break;
8936 case TLS_MODEL_GLOBAL_DYNAMIC:
8937 case TLS_MODEL_LOCAL_DYNAMIC:
8938 fputs ("@m", file);
8939 break;
8940 default:
8941 gcc_unreachable ();
8946 /* Return true if X is a symbol that refers to real (rather than emulated)
8947 TLS. */
8949 static bool
8950 rs6000_real_tls_symbol_ref_p (rtx x)
8952 return (GET_CODE (x) == SYMBOL_REF
8953 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
8956 /* In the name of slightly smaller debug output, and to cater to
8957 general assembler lossage, recognize various UNSPEC sequences
8958 and turn them back into a direct symbol reference. */
8960 static rtx
8961 rs6000_delegitimize_address (rtx orig_x)
8963 rtx x, y, offset;
8965 orig_x = delegitimize_mem_from_attrs (orig_x);
8966 x = orig_x;
8967 if (MEM_P (x))
8968 x = XEXP (x, 0);
8970 y = x;
8971 if (TARGET_CMODEL != CMODEL_SMALL
8972 && GET_CODE (y) == LO_SUM)
8973 y = XEXP (y, 1);
8975 offset = NULL_RTX;
8976 if (GET_CODE (y) == PLUS
8977 && GET_MODE (y) == Pmode
8978 && CONST_INT_P (XEXP (y, 1)))
8980 offset = XEXP (y, 1);
8981 y = XEXP (y, 0);
8984 if (GET_CODE (y) == UNSPEC
8985 && XINT (y, 1) == UNSPEC_TOCREL)
8987 y = XVECEXP (y, 0, 0);
8989 #ifdef HAVE_AS_TLS
8990 /* Do not associate thread-local symbols with the original
8991 constant pool symbol. */
8992 if (TARGET_XCOFF
8993 && GET_CODE (y) == SYMBOL_REF
8994 && CONSTANT_POOL_ADDRESS_P (y)
8995 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
8996 return orig_x;
8997 #endif
8999 if (offset != NULL_RTX)
9000 y = gen_rtx_PLUS (Pmode, y, offset);
9001 if (!MEM_P (orig_x))
9002 return y;
9003 else
9004 return replace_equiv_address_nv (orig_x, y);
9007 if (TARGET_MACHO
9008 && GET_CODE (orig_x) == LO_SUM
9009 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9011 y = XEXP (XEXP (orig_x, 1), 0);
9012 if (GET_CODE (y) == UNSPEC
9013 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9014 return XVECEXP (y, 0, 0);
9017 return orig_x;
9020 /* Return true if X shouldn't be emitted into the debug info.
9021 The linker doesn't like .toc section references from
9022 .debug_* sections, so reject .toc section symbols. */
9024 static bool
9025 rs6000_const_not_ok_for_debug_p (rtx x)
9027 if (GET_CODE (x) == UNSPEC)
9028 return true;
9029 if (GET_CODE (x) == SYMBOL_REF
9030 && CONSTANT_POOL_ADDRESS_P (x))
9032 rtx c = get_pool_constant (x);
9033 machine_mode cmode = get_pool_mode (x);
9034 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9035 return true;
9038 return false;
9042 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9044 static bool
9045 rs6000_legitimate_combined_insn (rtx_insn *insn)
9047 int icode = INSN_CODE (insn);
9049 /* Reject creating doloop insns. Combine should not be allowed
9050 to create these for a number of reasons:
9051 1) In a nested loop, if combine creates one of these in an
9052 outer loop and the register allocator happens to allocate ctr
9053 to the outer loop insn, then the inner loop can't use ctr.
9054 Inner loops ought to be more highly optimized.
9055 2) Combine often wants to create one of these from what was
9056 originally a three insn sequence, first combining the three
9057 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9058 allocated ctr, the splitter takes use back to the three insn
9059 sequence. It's better to stop combine at the two insn
9060 sequence.
9061 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9062 insns, the register allocator sometimes uses floating point
9063 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9064 jump insn and output reloads are not implemented for jumps,
9065 the ctrsi/ctrdi splitters need to handle all possible cases.
9066 That's a pain, and it gets to be seriously difficult when a
9067 splitter that runs after reload needs memory to transfer from
9068 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9069 for the difficult case. It's better to not create problems
9070 in the first place. */
9071 if (icode != CODE_FOR_nothing
9072 && (icode == CODE_FOR_ctrsi_internal1
9073 || icode == CODE_FOR_ctrdi_internal1
9074 || icode == CODE_FOR_ctrsi_internal2
9075 || icode == CODE_FOR_ctrdi_internal2))
9076 return false;
9078 return true;
9081 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9083 static GTY(()) rtx rs6000_tls_symbol;
9084 static rtx
9085 rs6000_tls_get_addr (void)
9087 if (!rs6000_tls_symbol)
9088 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9090 return rs6000_tls_symbol;
9093 /* Construct the SYMBOL_REF for TLS GOT references. */
9095 static GTY(()) rtx rs6000_got_symbol;
9096 static rtx
9097 rs6000_got_sym (void)
9099 if (!rs6000_got_symbol)
9101 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9102 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9103 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9106 return rs6000_got_symbol;
9109 /* AIX Thread-Local Address support. */
9111 static rtx
9112 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9114 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
9115 const char *name;
9116 char *tlsname;
9118 name = XSTR (addr, 0);
9119 /* Append TLS CSECT qualifier, unless the symbol already is qualified
9120 or the symbol will be in TLS private data section. */
9121 if (name[strlen (name) - 1] != ']'
9122 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
9123 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
9125 tlsname = XALLOCAVEC (char, strlen (name) + 4);
9126 strcpy (tlsname, name);
9127 strcat (tlsname,
9128 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
9129 tlsaddr = copy_rtx (addr);
9130 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
9132 else
9133 tlsaddr = addr;
9135 /* Place addr into TOC constant pool. */
9136 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
9138 /* Output the TOC entry and create the MEM referencing the value. */
9139 if (constant_pool_expr_p (XEXP (sym, 0))
9140 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9142 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9143 mem = gen_const_mem (Pmode, tocref);
9144 set_mem_alias_set (mem, get_TOC_alias_set ());
9146 else
9147 return sym;
9149 /* Use global-dynamic for local-dynamic. */
9150 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9151 || model == TLS_MODEL_LOCAL_DYNAMIC)
9153 /* Create new TOC reference for @m symbol. */
9154 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9155 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9156 strcpy (tlsname, "*LCM");
9157 strcat (tlsname, name + 3);
9158 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9159 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9160 tocref = create_TOC_reference (modaddr, NULL_RTX);
9161 rtx modmem = gen_const_mem (Pmode, tocref);
9162 set_mem_alias_set (modmem, get_TOC_alias_set ());
9164 rtx modreg = gen_reg_rtx (Pmode);
9165 emit_insn (gen_rtx_SET (modreg, modmem));
9167 tmpreg = gen_reg_rtx (Pmode);
9168 emit_insn (gen_rtx_SET (tmpreg, mem));
9170 dest = gen_reg_rtx (Pmode);
9171 if (TARGET_32BIT)
9172 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9173 else
9174 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9175 return dest;
9177 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9178 else if (TARGET_32BIT)
9180 tlsreg = gen_reg_rtx (SImode);
9181 emit_insn (gen_tls_get_tpointer (tlsreg));
9183 else
9184 tlsreg = gen_rtx_REG (DImode, 13);
9186 /* Load the TOC value into temporary register. */
9187 tmpreg = gen_reg_rtx (Pmode);
9188 emit_insn (gen_rtx_SET (tmpreg, mem));
9189 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9190 gen_rtx_MINUS (Pmode, addr, tlsreg));
9192 /* Add TOC symbol value to TLS pointer. */
9193 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9195 return dest;
9198 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9199 this (thread-local) address. */
9201 static rtx
9202 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9204 rtx dest, insn;
9206 if (TARGET_XCOFF)
9207 return rs6000_legitimize_tls_address_aix (addr, model);
9209 dest = gen_reg_rtx (Pmode);
9210 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
9212 rtx tlsreg;
9214 if (TARGET_64BIT)
9216 tlsreg = gen_rtx_REG (Pmode, 13);
9217 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9219 else
9221 tlsreg = gen_rtx_REG (Pmode, 2);
9222 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9224 emit_insn (insn);
9226 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9228 rtx tlsreg, tmp;
9230 tmp = gen_reg_rtx (Pmode);
9231 if (TARGET_64BIT)
9233 tlsreg = gen_rtx_REG (Pmode, 13);
9234 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9236 else
9238 tlsreg = gen_rtx_REG (Pmode, 2);
9239 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9241 emit_insn (insn);
9242 if (TARGET_64BIT)
9243 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9244 else
9245 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9246 emit_insn (insn);
9248 else
9250 rtx r3, got, tga, tmp1, tmp2, call_insn;
9252 /* We currently use relocations like @got@tlsgd for tls, which
9253 means the linker will handle allocation of tls entries, placing
9254 them in the .got section. So use a pointer to the .got section,
9255 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9256 or to secondary GOT sections used by 32-bit -fPIC. */
9257 if (TARGET_64BIT)
9258 got = gen_rtx_REG (Pmode, 2);
9259 else
9261 if (flag_pic == 1)
9262 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9263 else
9265 rtx gsym = rs6000_got_sym ();
9266 got = gen_reg_rtx (Pmode);
9267 if (flag_pic == 0)
9268 rs6000_emit_move (got, gsym, Pmode);
9269 else
9271 rtx mem, lab;
9273 tmp1 = gen_reg_rtx (Pmode);
9274 tmp2 = gen_reg_rtx (Pmode);
9275 mem = gen_const_mem (Pmode, tmp1);
9276 lab = gen_label_rtx ();
9277 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9278 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9279 if (TARGET_LINK_STACK)
9280 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9281 emit_move_insn (tmp2, mem);
9282 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9283 set_unique_reg_note (last, REG_EQUAL, gsym);
9288 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9290 tga = rs6000_tls_get_addr ();
9291 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
9292 const0_rtx, Pmode);
9294 r3 = gen_rtx_REG (Pmode, 3);
9295 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9297 if (TARGET_64BIT)
9298 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
9299 else
9300 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
9302 else if (DEFAULT_ABI == ABI_V4)
9303 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
9304 else
9305 gcc_unreachable ();
9306 call_insn = last_call_insn ();
9307 PATTERN (call_insn) = insn;
9308 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9309 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9310 pic_offset_table_rtx);
9312 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9314 tga = rs6000_tls_get_addr ();
9315 tmp1 = gen_reg_rtx (Pmode);
9316 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
9317 const0_rtx, Pmode);
9319 r3 = gen_rtx_REG (Pmode, 3);
9320 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9322 if (TARGET_64BIT)
9323 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
9324 else
9325 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
9327 else if (DEFAULT_ABI == ABI_V4)
9328 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
9329 else
9330 gcc_unreachable ();
9331 call_insn = last_call_insn ();
9332 PATTERN (call_insn) = insn;
9333 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
9334 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
9335 pic_offset_table_rtx);
9337 if (rs6000_tls_size == 16)
9339 if (TARGET_64BIT)
9340 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9341 else
9342 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9344 else if (rs6000_tls_size == 32)
9346 tmp2 = gen_reg_rtx (Pmode);
9347 if (TARGET_64BIT)
9348 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9349 else
9350 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9351 emit_insn (insn);
9352 if (TARGET_64BIT)
9353 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9354 else
9355 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9357 else
9359 tmp2 = gen_reg_rtx (Pmode);
9360 if (TARGET_64BIT)
9361 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9362 else
9363 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9364 emit_insn (insn);
9365 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9367 emit_insn (insn);
9369 else
9371 /* IE, or 64-bit offset LE. */
9372 tmp2 = gen_reg_rtx (Pmode);
9373 if (TARGET_64BIT)
9374 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9375 else
9376 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9377 emit_insn (insn);
9378 if (TARGET_64BIT)
9379 insn = gen_tls_tls_64 (dest, tmp2, addr);
9380 else
9381 insn = gen_tls_tls_32 (dest, tmp2, addr);
9382 emit_insn (insn);
9386 return dest;
9389 /* Only create the global variable for the stack protect guard if we are using
9390 the global flavor of that guard. */
9391 static tree
9392 rs6000_init_stack_protect_guard (void)
9394 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9395 return default_stack_protect_guard ();
9397 return NULL_TREE;
9400 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9402 static bool
9403 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9405 if (GET_CODE (x) == HIGH
9406 && GET_CODE (XEXP (x, 0)) == UNSPEC)
9407 return true;
9409 /* A TLS symbol in the TOC cannot contain a sum. */
9410 if (GET_CODE (x) == CONST
9411 && GET_CODE (XEXP (x, 0)) == PLUS
9412 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
9413 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9414 return true;
9416 /* Do not place an ELF TLS symbol in the constant pool. */
9417 return TARGET_ELF && tls_referenced_p (x);
9420 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9421 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9422 can be addressed relative to the toc pointer. */
9424 static bool
9425 use_toc_relative_ref (rtx sym, machine_mode mode)
9427 return ((constant_pool_expr_p (sym)
9428 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9429 get_pool_mode (sym)))
9430 || (TARGET_CMODEL == CMODEL_MEDIUM
9431 && SYMBOL_REF_LOCAL_P (sym)
9432 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9435 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
9436 replace the input X, or the original X if no replacement is called for.
9437 The output parameter *WIN is 1 if the calling macro should goto WIN,
9438 0 if it should not.
9440 For RS/6000, we wish to handle large displacements off a base
9441 register by splitting the addend across an addiu/addis and the mem insn.
9442 This cuts number of extra insns needed from 3 to 1.
9444 On Darwin, we use this to generate code for floating point constants.
9445 A movsf_low is generated so we wind up with 2 instructions rather than 3.
9446 The Darwin code is inside #if TARGET_MACHO because only then are the
9447 machopic_* functions defined. */
9448 static rtx
9449 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
9450 int opnum, int type,
9451 int ind_levels ATTRIBUTE_UNUSED, int *win)
9453 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9454 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9456 /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
9457 DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
9458 if (reg_offset_p
9459 && opnum == 1
9460 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
9461 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
9462 || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
9463 && TARGET_P9_VECTOR)
9464 || (mode == SImode && recog_data.operand_mode[0] == V4SImode
9465 && TARGET_P9_VECTOR)))
9466 reg_offset_p = false;
9468 /* We must recognize output that we have already generated ourselves. */
9469 if (GET_CODE (x) == PLUS
9470 && GET_CODE (XEXP (x, 0)) == PLUS
9471 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9472 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9473 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9475 if (TARGET_DEBUG_ADDR)
9477 fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n");
9478 debug_rtx (x);
9480 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9481 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9482 opnum, (enum reload_type) type);
9483 *win = 1;
9484 return x;
9487 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
9488 if (GET_CODE (x) == LO_SUM
9489 && GET_CODE (XEXP (x, 0)) == HIGH)
9491 if (TARGET_DEBUG_ADDR)
9493 fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n");
9494 debug_rtx (x);
9496 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9497 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9498 opnum, (enum reload_type) type);
9499 *win = 1;
9500 return x;
9503 #if TARGET_MACHO
9504 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
9505 && GET_CODE (x) == LO_SUM
9506 && GET_CODE (XEXP (x, 0)) == PLUS
9507 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
9508 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
9509 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
9510 && machopic_operand_p (XEXP (x, 1)))
9512 /* Result of previous invocation of this function on Darwin
9513 floating point constant. */
9514 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9515 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9516 opnum, (enum reload_type) type);
9517 *win = 1;
9518 return x;
9520 #endif
9522 if (TARGET_CMODEL != CMODEL_SMALL
9523 && reg_offset_p
9524 && !quad_offset_p
9525 && small_toc_ref (x, VOIDmode))
9527 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
9528 x = gen_rtx_LO_SUM (Pmode, hi, x);
9529 if (TARGET_DEBUG_ADDR)
9531 fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n");
9532 debug_rtx (x);
9534 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9535 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9536 opnum, (enum reload_type) type);
9537 *win = 1;
9538 return x;
9541 if (GET_CODE (x) == PLUS
9542 && REG_P (XEXP (x, 0))
9543 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
9544 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
9545 && CONST_INT_P (XEXP (x, 1))
9546 && reg_offset_p
9547 && !PAIRED_VECTOR_MODE (mode)
9548 && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
9550 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
9551 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
9552 HOST_WIDE_INT high
9553 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
9555 /* Check for 32-bit overflow or quad addresses with one of the
9556 four least significant bits set. */
9557 if (high + low != val
9558 || (quad_offset_p && (low & 0xf)))
9560 *win = 0;
9561 return x;
9564 /* Reload the high part into a base reg; leave the low part
9565 in the mem directly. */
9567 x = gen_rtx_PLUS (GET_MODE (x),
9568 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
9569 GEN_INT (high)),
9570 GEN_INT (low));
9572 if (TARGET_DEBUG_ADDR)
9574 fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n");
9575 debug_rtx (x);
9577 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9578 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
9579 opnum, (enum reload_type) type);
9580 *win = 1;
9581 return x;
9584 if (GET_CODE (x) == SYMBOL_REF
9585 && reg_offset_p
9586 && !quad_offset_p
9587 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
9588 && !PAIRED_VECTOR_MODE (mode)
9589 #if TARGET_MACHO
9590 && DEFAULT_ABI == ABI_DARWIN
9591 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
9592 && machopic_symbol_defined_p (x)
9593 #else
9594 && DEFAULT_ABI == ABI_V4
9595 && !flag_pic
9596 #endif
9597 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
9598 The same goes for DImode without 64-bit gprs and DFmode and DDmode
9599 without fprs.
9600 ??? Assume floating point reg based on mode? This assumption is
9601 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
9602 where reload ends up doing a DFmode load of a constant from
9603 mem using two gprs. Unfortunately, at this point reload
9604 hasn't yet selected regs so poking around in reload data
9605 won't help and even if we could figure out the regs reliably,
9606 we'd still want to allow this transformation when the mem is
9607 naturally aligned. Since we say the address is good here, we
9608 can't disable offsets from LO_SUMs in mem_operand_gpr.
9609 FIXME: Allow offset from lo_sum for other modes too, when
9610 mem is sufficiently aligned.
9612 Also disallow this if the type can go in VMX/Altivec registers, since
9613 those registers do not have d-form (reg+offset) address modes. */
9614 && !reg_addr[mode].scalar_in_vmx_p
9615 && mode != TFmode
9616 && mode != TDmode
9617 && mode != IFmode
9618 && mode != KFmode
9619 && (mode != TImode || !TARGET_VSX)
9620 && mode != PTImode
9621 && (mode != DImode || TARGET_POWERPC64)
9622 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
9623 || (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)))
9625 #if TARGET_MACHO
9626 if (flag_pic)
9628 rtx offset = machopic_gen_offset (x);
9629 x = gen_rtx_LO_SUM (GET_MODE (x),
9630 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
9631 gen_rtx_HIGH (Pmode, offset)), offset);
9633 else
9634 #endif
9635 x = gen_rtx_LO_SUM (GET_MODE (x),
9636 gen_rtx_HIGH (Pmode, x), x);
9638 if (TARGET_DEBUG_ADDR)
9640 fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n");
9641 debug_rtx (x);
9643 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9644 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9645 opnum, (enum reload_type) type);
9646 *win = 1;
9647 return x;
9650 /* Reload an offset address wrapped by an AND that represents the
9651 masking of the lower bits. Strip the outer AND and let reload
9652 convert the offset address into an indirect address. For VSX,
9653 force reload to create the address with an AND in a separate
9654 register, because we can't guarantee an altivec register will
9655 be used. */
9656 if (VECTOR_MEM_ALTIVEC_P (mode)
9657 && GET_CODE (x) == AND
9658 && GET_CODE (XEXP (x, 0)) == PLUS
9659 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
9660 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
9661 && GET_CODE (XEXP (x, 1)) == CONST_INT
9662 && INTVAL (XEXP (x, 1)) == -16)
9664 x = XEXP (x, 0);
9665 *win = 1;
9666 return x;
9669 if (TARGET_TOC
9670 && reg_offset_p
9671 && !quad_offset_p
9672 && GET_CODE (x) == SYMBOL_REF
9673 && use_toc_relative_ref (x, mode))
9675 x = create_TOC_reference (x, NULL_RTX);
9676 if (TARGET_CMODEL != CMODEL_SMALL)
9678 if (TARGET_DEBUG_ADDR)
9680 fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n");
9681 debug_rtx (x);
9683 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
9684 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
9685 opnum, (enum reload_type) type);
9687 *win = 1;
9688 return x;
9690 *win = 0;
9691 return x;
9694 /* Debug version of rs6000_legitimize_reload_address. */
9695 static rtx
9696 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
9697 int opnum, int type,
9698 int ind_levels, int *win)
9700 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
9701 ind_levels, win);
9702 fprintf (stderr,
9703 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
9704 "type = %d, ind_levels = %d, win = %d, original addr:\n",
9705 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
9706 debug_rtx (x);
9708 if (x == ret)
9709 fprintf (stderr, "Same address returned\n");
9710 else if (!ret)
9711 fprintf (stderr, "NULL returned\n");
9712 else
9714 fprintf (stderr, "New address:\n");
9715 debug_rtx (ret);
9718 return ret;
9721 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9722 that is a valid memory address for an instruction.
9723 The MODE argument is the machine mode for the MEM expression
9724 that wants to use this address.
9726 On the RS/6000, there are four valid address: a SYMBOL_REF that
9727 refers to a constant pool entry of an address (or the sum of it
9728 plus a constant), a short (16-bit signed) constant plus a register,
9729 the sum of two registers, or a register indirect, possibly with an
9730 auto-increment. For DFmode, DDmode and DImode with a constant plus
9731 register, we must ensure that both words are addressable or PowerPC64
9732 with offset word aligned.
9734 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9735 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9736 because adjacent memory cells are accessed by adding word-sized offsets
9737 during assembly output. */
9738 static bool
9739 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
9741 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9742 bool quad_offset_p = mode_supports_vsx_dform_quad (mode);
9744 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
9745 if (VECTOR_MEM_ALTIVEC_P (mode)
9746 && GET_CODE (x) == AND
9747 && GET_CODE (XEXP (x, 1)) == CONST_INT
9748 && INTVAL (XEXP (x, 1)) == -16)
9749 x = XEXP (x, 0);
9751 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9752 return 0;
9753 if (legitimate_indirect_address_p (x, reg_ok_strict))
9754 return 1;
9755 if (TARGET_UPDATE
9756 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9757 && mode_supports_pre_incdec_p (mode)
9758 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9759 return 1;
9760 /* Handle restricted vector d-form offsets in ISA 3.0. */
9761 if (quad_offset_p)
9763 if (quad_address_p (x, mode, reg_ok_strict))
9764 return 1;
9766 else if (virtual_stack_registers_memory_p (x))
9767 return 1;
9769 else if (reg_offset_p)
9771 if (legitimate_small_data_p (mode, x))
9772 return 1;
9773 if (legitimate_constant_pool_address_p (x, mode,
9774 reg_ok_strict || lra_in_progress))
9775 return 1;
9776 if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC
9777 && XINT (x, 1) == UNSPEC_FUSION_ADDIS)
9778 return 1;
9781 /* For TImode, if we have TImode in VSX registers, only allow register
9782 indirect addresses. This will allow the values to go in either GPRs
9783 or VSX registers without reloading. The vector types would tend to
9784 go into VSX registers, so we allow REG+REG, while TImode seems
9785 somewhat split, in that some uses are GPR based, and some VSX based. */
9786 /* FIXME: We could loosen this by changing the following to
9787 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9788 but currently we cannot allow REG+REG addressing for TImode. See
9789 PR72827 for complete details on how this ends up hoodwinking DSE. */
9790 if (mode == TImode && TARGET_VSX)
9791 return 0;
9792 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9793 if (! reg_ok_strict
9794 && reg_offset_p
9795 && GET_CODE (x) == PLUS
9796 && GET_CODE (XEXP (x, 0)) == REG
9797 && (XEXP (x, 0) == virtual_stack_vars_rtx
9798 || XEXP (x, 0) == arg_pointer_rtx)
9799 && GET_CODE (XEXP (x, 1)) == CONST_INT)
9800 return 1;
9801 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9802 return 1;
9803 if (!FLOAT128_2REG_P (mode)
9804 && ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
9805 || TARGET_POWERPC64
9806 || (mode != DFmode && mode != DDmode))
9807 && (TARGET_POWERPC64 || mode != DImode)
9808 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9809 && mode != PTImode
9810 && !avoiding_indexed_address_p (mode)
9811 && legitimate_indexed_address_p (x, reg_ok_strict))
9812 return 1;
9813 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9814 && mode_supports_pre_modify_p (mode)
9815 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9816 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9817 reg_ok_strict, false)
9818 || (!avoiding_indexed_address_p (mode)
9819 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9820 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9821 return 1;
9822 if (reg_offset_p && !quad_offset_p
9823 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9824 return 1;
9825 return 0;
9828 /* Debug version of rs6000_legitimate_address_p. */
9829 static bool
9830 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
9831 bool reg_ok_strict)
9833 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
9834 fprintf (stderr,
9835 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
9836 "strict = %d, reload = %s, code = %s\n",
9837 ret ? "true" : "false",
9838 GET_MODE_NAME (mode),
9839 reg_ok_strict,
9840 (reload_completed ? "after" : "before"),
9841 GET_RTX_NAME (GET_CODE (x)));
9842 debug_rtx (x);
9844 return ret;
9847 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
9849 static bool
9850 rs6000_mode_dependent_address_p (const_rtx addr,
9851 addr_space_t as ATTRIBUTE_UNUSED)
9853 return rs6000_mode_dependent_address_ptr (addr);
9856 /* Go to LABEL if ADDR (a legitimate address expression)
9857 has an effect that depends on the machine mode it is used for.
9859 On the RS/6000 this is true of all integral offsets (since AltiVec
9860 and VSX modes don't allow them) or is a pre-increment or decrement.
9862 ??? Except that due to conceptual problems in offsettable_address_p
9863 we can't really report the problems of integral offsets. So leave
9864 this assuming that the adjustable offset must be valid for the
9865 sub-words of a TFmode operand, which is what we had before. */
9867 static bool
9868 rs6000_mode_dependent_address (const_rtx addr)
9870 switch (GET_CODE (addr))
9872 case PLUS:
9873 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
9874 is considered a legitimate address before reload, so there
9875 are no offset restrictions in that case. Note that this
9876 condition is safe in strict mode because any address involving
9877 virtual_stack_vars_rtx or arg_pointer_rtx would already have
9878 been rejected as illegitimate. */
9879 if (XEXP (addr, 0) != virtual_stack_vars_rtx
9880 && XEXP (addr, 0) != arg_pointer_rtx
9881 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
9883 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
9884 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
9886 break;
9888 case LO_SUM:
9889 /* Anything in the constant pool is sufficiently aligned that
9890 all bytes have the same high part address. */
9891 return !legitimate_constant_pool_address_p (addr, QImode, false);
9893 /* Auto-increment cases are now treated generically in recog.c. */
9894 case PRE_MODIFY:
9895 return TARGET_UPDATE;
9897 /* AND is only allowed in Altivec loads. */
9898 case AND:
9899 return true;
9901 default:
9902 break;
9905 return false;
9908 /* Debug version of rs6000_mode_dependent_address. */
9909 static bool
9910 rs6000_debug_mode_dependent_address (const_rtx addr)
9912 bool ret = rs6000_mode_dependent_address (addr);
9914 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
9915 ret ? "true" : "false");
9916 debug_rtx (addr);
9918 return ret;
9921 /* Implement FIND_BASE_TERM. */
9924 rs6000_find_base_term (rtx op)
9926 rtx base;
9928 base = op;
9929 if (GET_CODE (base) == CONST)
9930 base = XEXP (base, 0);
9931 if (GET_CODE (base) == PLUS)
9932 base = XEXP (base, 0);
9933 if (GET_CODE (base) == UNSPEC)
9934 switch (XINT (base, 1))
9936 case UNSPEC_TOCREL:
9937 case UNSPEC_MACHOPIC_OFFSET:
9938 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
9939 for aliasing purposes. */
9940 return XVECEXP (base, 0, 0);
9943 return op;
9946 /* More elaborate version of recog's offsettable_memref_p predicate
9947 that works around the ??? note of rs6000_mode_dependent_address.
9948 In particular it accepts
9950 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
9952 in 32-bit mode, that the recog predicate rejects. */
9954 static bool
9955 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
9957 bool worst_case;
9959 if (!MEM_P (op))
9960 return false;
9962 /* First mimic offsettable_memref_p. */
9963 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
9964 return true;
9966 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
9967 the latter predicate knows nothing about the mode of the memory
9968 reference and, therefore, assumes that it is the largest supported
9969 mode (TFmode). As a consequence, legitimate offsettable memory
9970 references are rejected. rs6000_legitimate_offset_address_p contains
9971 the correct logic for the PLUS case of rs6000_mode_dependent_address,
9972 at least with a little bit of help here given that we know the
9973 actual registers used. */
9974 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
9975 || GET_MODE_SIZE (reg_mode) == 4);
9976 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
9977 true, worst_case);
9980 /* Determine the reassociation width to be used in reassociate_bb.
9981 This takes into account how many parallel operations we
9982 can actually do of a given type, and also the latency.
9984 int add/sub 6/cycle
9985 mul 2/cycle
9986 vect add/sub/mul 2/cycle
9987 fp add/sub/mul 2/cycle
9988 dfp 1/cycle
9991 static int
9992 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
9993 machine_mode mode)
9995 switch (rs6000_tune)
9997 case PROCESSOR_POWER8:
9998 case PROCESSOR_POWER9:
9999 if (DECIMAL_FLOAT_MODE_P (mode))
10000 return 1;
10001 if (VECTOR_MODE_P (mode))
10002 return 4;
10003 if (INTEGRAL_MODE_P (mode))
10004 return opc == MULT_EXPR ? 4 : 6;
10005 if (FLOAT_MODE_P (mode))
10006 return 4;
10007 break;
10008 default:
10009 break;
10011 return 1;
10014 /* Change register usage conditional on target flags. */
10015 static void
10016 rs6000_conditional_register_usage (void)
10018 int i;
10020 if (TARGET_DEBUG_TARGET)
10021 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10023 /* Set MQ register fixed (already call_used) so that it will not be
10024 allocated. */
10025 fixed_regs[64] = 1;
10027 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10028 if (TARGET_64BIT)
10029 fixed_regs[13] = call_used_regs[13]
10030 = call_really_used_regs[13] = 1;
10032 /* Conditionally disable FPRs. */
10033 if (TARGET_SOFT_FLOAT)
10034 for (i = 32; i < 64; i++)
10035 fixed_regs[i] = call_used_regs[i]
10036 = call_really_used_regs[i] = 1;
10038 /* The TOC register is not killed across calls in a way that is
10039 visible to the compiler. */
10040 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10041 call_really_used_regs[2] = 0;
10043 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10044 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10046 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10047 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10048 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10049 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10051 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10052 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10053 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10054 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10056 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10057 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10058 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10060 if (!TARGET_ALTIVEC && !TARGET_VSX)
10062 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10063 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10064 call_really_used_regs[VRSAVE_REGNO] = 1;
10067 if (TARGET_ALTIVEC || TARGET_VSX)
10068 global_regs[VSCR_REGNO] = 1;
10070 if (TARGET_ALTIVEC_ABI)
10072 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10073 call_used_regs[i] = call_really_used_regs[i] = 1;
10075 /* AIX reserves VR20:31 in non-extended ABI mode. */
10076 if (TARGET_XCOFF)
10077 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10078 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
10083 /* Output insns to set DEST equal to the constant SOURCE as a series of
10084 lis, ori and shl instructions and return TRUE. */
10086 bool
10087 rs6000_emit_set_const (rtx dest, rtx source)
10089 machine_mode mode = GET_MODE (dest);
10090 rtx temp, set;
10091 rtx_insn *insn;
10092 HOST_WIDE_INT c;
10094 gcc_checking_assert (CONST_INT_P (source));
10095 c = INTVAL (source);
10096 switch (mode)
10098 case E_QImode:
10099 case E_HImode:
10100 emit_insn (gen_rtx_SET (dest, source));
10101 return true;
10103 case E_SImode:
10104 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10106 emit_insn (gen_rtx_SET (copy_rtx (temp),
10107 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10108 emit_insn (gen_rtx_SET (dest,
10109 gen_rtx_IOR (SImode, copy_rtx (temp),
10110 GEN_INT (c & 0xffff))));
10111 break;
10113 case E_DImode:
10114 if (!TARGET_POWERPC64)
10116 rtx hi, lo;
10118 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
10119 DImode);
10120 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
10121 DImode);
10122 emit_move_insn (hi, GEN_INT (c >> 32));
10123 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
10124 emit_move_insn (lo, GEN_INT (c));
10126 else
10127 rs6000_emit_set_long_const (dest, c);
10128 break;
10130 default:
10131 gcc_unreachable ();
10134 insn = get_last_insn ();
10135 set = single_set (insn);
10136 if (! CONSTANT_P (SET_SRC (set)))
10137 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10139 return true;
10142 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10143 Output insns to set DEST equal to the constant C as a series of
10144 lis, ori and shl instructions. */
10146 static void
10147 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10149 rtx temp;
10150 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10152 ud1 = c & 0xffff;
10153 c = c >> 16;
10154 ud2 = c & 0xffff;
10155 c = c >> 16;
10156 ud3 = c & 0xffff;
10157 c = c >> 16;
10158 ud4 = c & 0xffff;
10160 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10161 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10162 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
10164 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10165 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10167 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10169 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10170 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10171 if (ud1 != 0)
10172 emit_move_insn (dest,
10173 gen_rtx_IOR (DImode, copy_rtx (temp),
10174 GEN_INT (ud1)));
10176 else if (ud3 == 0 && ud4 == 0)
10178 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10180 gcc_assert (ud2 & 0x8000);
10181 emit_move_insn (copy_rtx (temp),
10182 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
10183 if (ud1 != 0)
10184 emit_move_insn (copy_rtx (temp),
10185 gen_rtx_IOR (DImode, copy_rtx (temp),
10186 GEN_INT (ud1)));
10187 emit_move_insn (dest,
10188 gen_rtx_ZERO_EXTEND (DImode,
10189 gen_lowpart (SImode,
10190 copy_rtx (temp))));
10192 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10193 || (ud4 == 0 && ! (ud3 & 0x8000)))
10195 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10197 emit_move_insn (copy_rtx (temp),
10198 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
10199 if (ud2 != 0)
10200 emit_move_insn (copy_rtx (temp),
10201 gen_rtx_IOR (DImode, copy_rtx (temp),
10202 GEN_INT (ud2)));
10203 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10204 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10205 GEN_INT (16)));
10206 if (ud1 != 0)
10207 emit_move_insn (dest,
10208 gen_rtx_IOR (DImode, copy_rtx (temp),
10209 GEN_INT (ud1)));
10211 else
10213 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10215 emit_move_insn (copy_rtx (temp),
10216 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
10217 if (ud3 != 0)
10218 emit_move_insn (copy_rtx (temp),
10219 gen_rtx_IOR (DImode, copy_rtx (temp),
10220 GEN_INT (ud3)));
10222 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
10223 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
10224 GEN_INT (32)));
10225 if (ud2 != 0)
10226 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
10227 gen_rtx_IOR (DImode, copy_rtx (temp),
10228 GEN_INT (ud2 << 16)));
10229 if (ud1 != 0)
10230 emit_move_insn (dest,
10231 gen_rtx_IOR (DImode, copy_rtx (temp),
10232 GEN_INT (ud1)));
10236 /* Helper for the following. Get rid of [r+r] memory refs
10237 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10239 static void
10240 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10242 if (GET_CODE (operands[0]) == MEM
10243 && GET_CODE (XEXP (operands[0], 0)) != REG
10244 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10245 GET_MODE (operands[0]), false))
10246 operands[0]
10247 = replace_equiv_address (operands[0],
10248 copy_addr_to_reg (XEXP (operands[0], 0)));
10250 if (GET_CODE (operands[1]) == MEM
10251 && GET_CODE (XEXP (operands[1], 0)) != REG
10252 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10253 GET_MODE (operands[1]), false))
10254 operands[1]
10255 = replace_equiv_address (operands[1],
10256 copy_addr_to_reg (XEXP (operands[1], 0)));
10259 /* Generate a vector of constants to permute MODE for a little-endian
10260 storage operation by swapping the two halves of a vector. */
10261 static rtvec
10262 rs6000_const_vec (machine_mode mode)
10264 int i, subparts;
10265 rtvec v;
10267 switch (mode)
10269 case E_V1TImode:
10270 subparts = 1;
10271 break;
10272 case E_V2DFmode:
10273 case E_V2DImode:
10274 subparts = 2;
10275 break;
10276 case E_V4SFmode:
10277 case E_V4SImode:
10278 subparts = 4;
10279 break;
10280 case E_V8HImode:
10281 subparts = 8;
10282 break;
10283 case E_V16QImode:
10284 subparts = 16;
10285 break;
10286 default:
10287 gcc_unreachable();
10290 v = rtvec_alloc (subparts);
10292 for (i = 0; i < subparts / 2; ++i)
10293 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10294 for (i = subparts / 2; i < subparts; ++i)
10295 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10297 return v;
10300 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10301 store operation. */
10302 void
10303 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10305 /* Scalar permutations are easier to express in integer modes rather than
10306 floating-point modes, so cast them here. We use V1TImode instead
10307 of TImode to ensure that the values don't go through GPRs. */
10308 if (FLOAT128_VECTOR_P (mode))
10310 dest = gen_lowpart (V1TImode, dest);
10311 source = gen_lowpart (V1TImode, source);
10312 mode = V1TImode;
10315 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10316 scalar. */
10317 if (mode == TImode || mode == V1TImode)
10318 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10319 GEN_INT (64))));
10320 else
10322 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10323 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10327 /* Emit a little-endian load from vector memory location SOURCE to VSX
10328 register DEST in mode MODE. The load is done with two permuting
10329 insn's that represent an lxvd2x and xxpermdi. */
10330 void
10331 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10333 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10334 V1TImode). */
10335 if (mode == TImode || mode == V1TImode)
10337 mode = V2DImode;
10338 dest = gen_lowpart (V2DImode, dest);
10339 source = adjust_address (source, V2DImode, 0);
10342 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10343 rs6000_emit_le_vsx_permute (tmp, source, mode);
10344 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10347 /* Emit a little-endian store to vector memory location DEST from VSX
10348 register SOURCE in mode MODE. The store is done with two permuting
10349 insn's that represent an xxpermdi and an stxvd2x. */
10350 void
10351 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10353 /* This should never be called during or after LRA, because it does
10354 not re-permute the source register. It is intended only for use
10355 during expand. */
10356 gcc_assert (!lra_in_progress && !reload_completed);
10358 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10359 V1TImode). */
10360 if (mode == TImode || mode == V1TImode)
10362 mode = V2DImode;
10363 dest = adjust_address (dest, V2DImode, 0);
10364 source = gen_lowpart (V2DImode, source);
10367 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
10368 rs6000_emit_le_vsx_permute (tmp, source, mode);
10369 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10372 /* Emit a sequence representing a little-endian VSX load or store,
10373 moving data from SOURCE to DEST in mode MODE. This is done
10374 separately from rs6000_emit_move to ensure it is called only
10375 during expand. LE VSX loads and stores introduced later are
10376 handled with a split. The expand-time RTL generation allows
10377 us to optimize away redundant pairs of register-permutes. */
10378 void
10379 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10381 gcc_assert (!BYTES_BIG_ENDIAN
10382 && VECTOR_MEM_VSX_P (mode)
10383 && !TARGET_P9_VECTOR
10384 && !gpr_or_gpr_p (dest, source)
10385 && (MEM_P (source) ^ MEM_P (dest)));
10387 if (MEM_P (source))
10389 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
10390 rs6000_emit_le_vsx_load (dest, source, mode);
10392 else
10394 if (!REG_P (source))
10395 source = force_reg (mode, source);
10396 rs6000_emit_le_vsx_store (dest, source, mode);
10400 /* Return whether a SFmode or SImode move can be done without converting one
10401 mode to another. This arrises when we have:
10403 (SUBREG:SF (REG:SI ...))
10404 (SUBREG:SI (REG:SF ...))
10406 and one of the values is in a floating point/vector register, where SFmode
10407 scalars are stored in DFmode format. */
10409 bool
10410 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10412 if (TARGET_ALLOW_SF_SUBREG)
10413 return true;
10415 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10416 return true;
10418 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10419 return true;
10421 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10422 if (SUBREG_P (dest))
10424 rtx dest_subreg = SUBREG_REG (dest);
10425 rtx src_subreg = SUBREG_REG (src);
10426 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10429 return false;
10433 /* Helper function to change moves with:
10435 (SUBREG:SF (REG:SI)) and
10436 (SUBREG:SI (REG:SF))
10438 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10439 values are stored as DFmode values in the VSX registers. We need to convert
10440 the bits before we can use a direct move or operate on the bits in the
10441 vector register as an integer type.
10443 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10445 static bool
10446 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10448 if (TARGET_DIRECT_MOVE_64BIT && !lra_in_progress && !reload_completed
10449 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10450 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10452 rtx inner_source = SUBREG_REG (source);
10453 machine_mode inner_mode = GET_MODE (inner_source);
10455 if (mode == SImode && inner_mode == SFmode)
10457 emit_insn (gen_movsi_from_sf (dest, inner_source));
10458 return true;
10461 if (mode == SFmode && inner_mode == SImode)
10463 emit_insn (gen_movsf_from_si (dest, inner_source));
10464 return true;
10468 return false;
10471 /* Emit a move from SOURCE to DEST in mode MODE. */
10472 void
10473 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10475 rtx operands[2];
10476 operands[0] = dest;
10477 operands[1] = source;
10479 if (TARGET_DEBUG_ADDR)
10481 fprintf (stderr,
10482 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10483 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10484 GET_MODE_NAME (mode),
10485 lra_in_progress,
10486 reload_completed,
10487 can_create_pseudo_p ());
10488 debug_rtx (dest);
10489 fprintf (stderr, "source:\n");
10490 debug_rtx (source);
10493 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
10494 if (CONST_WIDE_INT_P (operands[1])
10495 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10497 /* This should be fixed with the introduction of CONST_WIDE_INT. */
10498 gcc_unreachable ();
10501 /* See if we need to special case SImode/SFmode SUBREG moves. */
10502 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10503 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10504 return;
10506 /* Check if GCC is setting up a block move that will end up using FP
10507 registers as temporaries. We must make sure this is acceptable. */
10508 if (GET_CODE (operands[0]) == MEM
10509 && GET_CODE (operands[1]) == MEM
10510 && mode == DImode
10511 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10512 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10513 && ! (rs6000_slow_unaligned_access (SImode,
10514 (MEM_ALIGN (operands[0]) > 32
10515 ? 32 : MEM_ALIGN (operands[0])))
10516 || rs6000_slow_unaligned_access (SImode,
10517 (MEM_ALIGN (operands[1]) > 32
10518 ? 32 : MEM_ALIGN (operands[1]))))
10519 && ! MEM_VOLATILE_P (operands [0])
10520 && ! MEM_VOLATILE_P (operands [1]))
10522 emit_move_insn (adjust_address (operands[0], SImode, 0),
10523 adjust_address (operands[1], SImode, 0));
10524 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10525 adjust_address (copy_rtx (operands[1]), SImode, 4));
10526 return;
10529 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
10530 && !gpc_reg_operand (operands[1], mode))
10531 operands[1] = force_reg (mode, operands[1]);
10533 /* Recognize the case where operand[1] is a reference to thread-local
10534 data and load its address to a register. */
10535 if (tls_referenced_p (operands[1]))
10537 enum tls_model model;
10538 rtx tmp = operands[1];
10539 rtx addend = NULL;
10541 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10543 addend = XEXP (XEXP (tmp, 0), 1);
10544 tmp = XEXP (XEXP (tmp, 0), 0);
10547 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
10548 model = SYMBOL_REF_TLS_MODEL (tmp);
10549 gcc_assert (model != 0);
10551 tmp = rs6000_legitimize_tls_address (tmp, model);
10552 if (addend)
10554 tmp = gen_rtx_PLUS (mode, tmp, addend);
10555 tmp = force_operand (tmp, operands[0]);
10557 operands[1] = tmp;
10560 /* 128-bit constant floating-point values on Darwin should really be loaded
10561 as two parts. However, this premature splitting is a problem when DFmode
10562 values can go into Altivec registers. */
10563 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
10564 && GET_CODE (operands[1]) == CONST_DOUBLE)
10566 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10567 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10568 DFmode);
10569 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10570 GET_MODE_SIZE (DFmode)),
10571 simplify_gen_subreg (DFmode, operands[1], mode,
10572 GET_MODE_SIZE (DFmode)),
10573 DFmode);
10574 return;
10577 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10578 p1:SD) if p1 is not of floating point class and p0 is spilled as
10579 we can have no analogous movsd_store for this. */
10580 if (lra_in_progress && mode == DDmode
10581 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10582 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10583 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
10584 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10586 enum reg_class cl;
10587 int regno = REGNO (SUBREG_REG (operands[1]));
10589 if (regno >= FIRST_PSEUDO_REGISTER)
10591 cl = reg_preferred_class (regno);
10592 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10594 if (regno >= 0 && ! FP_REGNO_P (regno))
10596 mode = SDmode;
10597 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10598 operands[1] = SUBREG_REG (operands[1]);
10601 if (lra_in_progress
10602 && mode == SDmode
10603 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
10604 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10605 && (REG_P (operands[1])
10606 || (GET_CODE (operands[1]) == SUBREG
10607 && REG_P (SUBREG_REG (operands[1])))))
10609 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
10610 ? SUBREG_REG (operands[1]) : operands[1]);
10611 enum reg_class cl;
10613 if (regno >= FIRST_PSEUDO_REGISTER)
10615 cl = reg_preferred_class (regno);
10616 gcc_assert (cl != NO_REGS);
10617 regno = ira_class_hard_regs[cl][0];
10619 if (FP_REGNO_P (regno))
10621 if (GET_MODE (operands[0]) != DDmode)
10622 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10623 emit_insn (gen_movsd_store (operands[0], operands[1]));
10625 else if (INT_REGNO_P (regno))
10626 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10627 else
10628 gcc_unreachable();
10629 return;
10631 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10632 p:DD)) if p0 is not of floating point class and p1 is spilled as
10633 we can have no analogous movsd_load for this. */
10634 if (lra_in_progress && mode == DDmode
10635 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
10636 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10637 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10638 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10640 enum reg_class cl;
10641 int regno = REGNO (SUBREG_REG (operands[0]));
10643 if (regno >= FIRST_PSEUDO_REGISTER)
10645 cl = reg_preferred_class (regno);
10646 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10648 if (regno >= 0 && ! FP_REGNO_P (regno))
10650 mode = SDmode;
10651 operands[0] = SUBREG_REG (operands[0]);
10652 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10655 if (lra_in_progress
10656 && mode == SDmode
10657 && (REG_P (operands[0])
10658 || (GET_CODE (operands[0]) == SUBREG
10659 && REG_P (SUBREG_REG (operands[0]))))
10660 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
10661 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10663 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
10664 ? SUBREG_REG (operands[0]) : operands[0]);
10665 enum reg_class cl;
10667 if (regno >= FIRST_PSEUDO_REGISTER)
10669 cl = reg_preferred_class (regno);
10670 gcc_assert (cl != NO_REGS);
10671 regno = ira_class_hard_regs[cl][0];
10673 if (FP_REGNO_P (regno))
10675 if (GET_MODE (operands[1]) != DDmode)
10676 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10677 emit_insn (gen_movsd_load (operands[0], operands[1]));
10679 else if (INT_REGNO_P (regno))
10680 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10681 else
10682 gcc_unreachable();
10683 return;
10686 /* FIXME: In the long term, this switch statement should go away
10687 and be replaced by a sequence of tests based on things like
10688 mode == Pmode. */
10689 switch (mode)
10691 case E_HImode:
10692 case E_QImode:
10693 if (CONSTANT_P (operands[1])
10694 && GET_CODE (operands[1]) != CONST_INT)
10695 operands[1] = force_const_mem (mode, operands[1]);
10696 break;
10698 case E_TFmode:
10699 case E_TDmode:
10700 case E_IFmode:
10701 case E_KFmode:
10702 if (FLOAT128_2REG_P (mode))
10703 rs6000_eliminate_indexed_memrefs (operands);
10704 /* fall through */
10706 case E_DFmode:
10707 case E_DDmode:
10708 case E_SFmode:
10709 case E_SDmode:
10710 if (CONSTANT_P (operands[1])
10711 && ! easy_fp_constant (operands[1], mode))
10712 operands[1] = force_const_mem (mode, operands[1]);
10713 break;
10715 case E_V16QImode:
10716 case E_V8HImode:
10717 case E_V4SFmode:
10718 case E_V4SImode:
10719 case E_V2SFmode:
10720 case E_V2SImode:
10721 case E_V2DFmode:
10722 case E_V2DImode:
10723 case E_V1TImode:
10724 if (CONSTANT_P (operands[1])
10725 && !easy_vector_constant (operands[1], mode))
10726 operands[1] = force_const_mem (mode, operands[1]);
10727 break;
10729 case E_SImode:
10730 case E_DImode:
10731 /* Use default pattern for address of ELF small data */
10732 if (TARGET_ELF
10733 && mode == Pmode
10734 && DEFAULT_ABI == ABI_V4
10735 && (GET_CODE (operands[1]) == SYMBOL_REF
10736 || GET_CODE (operands[1]) == CONST)
10737 && small_data_operand (operands[1], mode))
10739 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10740 return;
10743 if (DEFAULT_ABI == ABI_V4
10744 && mode == Pmode && mode == SImode
10745 && flag_pic == 1 && got_operand (operands[1], mode))
10747 emit_insn (gen_movsi_got (operands[0], operands[1]));
10748 return;
10751 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
10752 && TARGET_NO_TOC
10753 && ! flag_pic
10754 && mode == Pmode
10755 && CONSTANT_P (operands[1])
10756 && GET_CODE (operands[1]) != HIGH
10757 && GET_CODE (operands[1]) != CONST_INT)
10759 rtx target = (!can_create_pseudo_p ()
10760 ? operands[0]
10761 : gen_reg_rtx (mode));
10763 /* If this is a function address on -mcall-aixdesc,
10764 convert it to the address of the descriptor. */
10765 if (DEFAULT_ABI == ABI_AIX
10766 && GET_CODE (operands[1]) == SYMBOL_REF
10767 && XSTR (operands[1], 0)[0] == '.')
10769 const char *name = XSTR (operands[1], 0);
10770 rtx new_ref;
10771 while (*name == '.')
10772 name++;
10773 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
10774 CONSTANT_POOL_ADDRESS_P (new_ref)
10775 = CONSTANT_POOL_ADDRESS_P (operands[1]);
10776 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
10777 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
10778 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
10779 operands[1] = new_ref;
10782 if (DEFAULT_ABI == ABI_DARWIN)
10784 #if TARGET_MACHO
10785 if (MACHO_DYNAMIC_NO_PIC_P)
10787 /* Take care of any required data indirection. */
10788 operands[1] = rs6000_machopic_legitimize_pic_address (
10789 operands[1], mode, operands[0]);
10790 if (operands[0] != operands[1])
10791 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10792 return;
10794 #endif
10795 emit_insn (gen_macho_high (target, operands[1]));
10796 emit_insn (gen_macho_low (operands[0], target, operands[1]));
10797 return;
10800 emit_insn (gen_elf_high (target, operands[1]));
10801 emit_insn (gen_elf_low (operands[0], target, operands[1]));
10802 return;
10805 /* If this is a SYMBOL_REF that refers to a constant pool entry,
10806 and we have put it in the TOC, we just need to make a TOC-relative
10807 reference to it. */
10808 if (TARGET_TOC
10809 && GET_CODE (operands[1]) == SYMBOL_REF
10810 && use_toc_relative_ref (operands[1], mode))
10811 operands[1] = create_TOC_reference (operands[1], operands[0]);
10812 else if (mode == Pmode
10813 && CONSTANT_P (operands[1])
10814 && GET_CODE (operands[1]) != HIGH
10815 && ((GET_CODE (operands[1]) != CONST_INT
10816 && ! easy_fp_constant (operands[1], mode))
10817 || (GET_CODE (operands[1]) == CONST_INT
10818 && (num_insns_constant (operands[1], mode)
10819 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
10820 || (GET_CODE (operands[0]) == REG
10821 && FP_REGNO_P (REGNO (operands[0]))))
10822 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
10823 && (TARGET_CMODEL == CMODEL_SMALL
10824 || can_create_pseudo_p ()
10825 || (REG_P (operands[0])
10826 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
10829 #if TARGET_MACHO
10830 /* Darwin uses a special PIC legitimizer. */
10831 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
10833 operands[1] =
10834 rs6000_machopic_legitimize_pic_address (operands[1], mode,
10835 operands[0]);
10836 if (operands[0] != operands[1])
10837 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10838 return;
10840 #endif
10842 /* If we are to limit the number of things we put in the TOC and
10843 this is a symbol plus a constant we can add in one insn,
10844 just put the symbol in the TOC and add the constant. */
10845 if (GET_CODE (operands[1]) == CONST
10846 && TARGET_NO_SUM_IN_TOC
10847 && GET_CODE (XEXP (operands[1], 0)) == PLUS
10848 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
10849 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
10850 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
10851 && ! side_effects_p (operands[0]))
10853 rtx sym =
10854 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
10855 rtx other = XEXP (XEXP (operands[1], 0), 1);
10857 sym = force_reg (mode, sym);
10858 emit_insn (gen_add3_insn (operands[0], sym, other));
10859 return;
10862 operands[1] = force_const_mem (mode, operands[1]);
10864 if (TARGET_TOC
10865 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10866 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
10868 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
10869 operands[0]);
10870 operands[1] = gen_const_mem (mode, tocref);
10871 set_mem_alias_set (operands[1], get_TOC_alias_set ());
10874 break;
10876 case E_TImode:
10877 if (!VECTOR_MEM_VSX_P (TImode))
10878 rs6000_eliminate_indexed_memrefs (operands);
10879 break;
10881 case E_PTImode:
10882 rs6000_eliminate_indexed_memrefs (operands);
10883 break;
10885 default:
10886 fatal_insn ("bad move", gen_rtx_SET (dest, source));
10889 /* Above, we may have called force_const_mem which may have returned
10890 an invalid address. If we can, fix this up; otherwise, reload will
10891 have to deal with it. */
10892 if (GET_CODE (operands[1]) == MEM)
10893 operands[1] = validize_mem (operands[1]);
10895 emit_insn (gen_rtx_SET (operands[0], operands[1]));
10898 /* Nonzero if we can use a floating-point register to pass this arg. */
10899 #define USE_FP_FOR_ARG_P(CUM,MODE) \
10900 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
10901 && (CUM)->fregno <= FP_ARG_MAX_REG \
10902 && TARGET_HARD_FLOAT)
10904 /* Nonzero if we can use an AltiVec register to pass this arg. */
10905 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
10906 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
10907 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
10908 && TARGET_ALTIVEC_ABI \
10909 && (NAMED))
10911 /* Walk down the type tree of TYPE counting consecutive base elements.
10912 If *MODEP is VOIDmode, then set it to the first valid floating point
10913 or vector type. If a non-floating point or vector type is found, or
10914 if a floating point or vector type that doesn't match a non-VOIDmode
10915 *MODEP is found, then return -1, otherwise return the count in the
10916 sub-tree. */
10918 static int
10919 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
10921 machine_mode mode;
10922 HOST_WIDE_INT size;
10924 switch (TREE_CODE (type))
10926 case REAL_TYPE:
10927 mode = TYPE_MODE (type);
10928 if (!SCALAR_FLOAT_MODE_P (mode))
10929 return -1;
10931 if (*modep == VOIDmode)
10932 *modep = mode;
10934 if (*modep == mode)
10935 return 1;
10937 break;
10939 case COMPLEX_TYPE:
10940 mode = TYPE_MODE (TREE_TYPE (type));
10941 if (!SCALAR_FLOAT_MODE_P (mode))
10942 return -1;
10944 if (*modep == VOIDmode)
10945 *modep = mode;
10947 if (*modep == mode)
10948 return 2;
10950 break;
10952 case VECTOR_TYPE:
10953 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
10954 return -1;
10956 /* Use V4SImode as representative of all 128-bit vector types. */
10957 size = int_size_in_bytes (type);
10958 switch (size)
10960 case 16:
10961 mode = V4SImode;
10962 break;
10963 default:
10964 return -1;
10967 if (*modep == VOIDmode)
10968 *modep = mode;
10970 /* Vector modes are considered to be opaque: two vectors are
10971 equivalent for the purposes of being homogeneous aggregates
10972 if they are the same size. */
10973 if (*modep == mode)
10974 return 1;
10976 break;
10978 case ARRAY_TYPE:
10980 int count;
10981 tree index = TYPE_DOMAIN (type);
10983 /* Can't handle incomplete types nor sizes that are not
10984 fixed. */
10985 if (!COMPLETE_TYPE_P (type)
10986 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
10987 return -1;
10989 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
10990 if (count == -1
10991 || !index
10992 || !TYPE_MAX_VALUE (index)
10993 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
10994 || !TYPE_MIN_VALUE (index)
10995 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
10996 || count < 0)
10997 return -1;
10999 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
11000 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
11002 /* There must be no padding. */
11003 if (wi::to_wide (TYPE_SIZE (type))
11004 != count * GET_MODE_BITSIZE (*modep))
11005 return -1;
11007 return count;
11010 case RECORD_TYPE:
11012 int count = 0;
11013 int sub_count;
11014 tree field;
11016 /* Can't handle incomplete types nor sizes that are not
11017 fixed. */
11018 if (!COMPLETE_TYPE_P (type)
11019 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11020 return -1;
11022 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11024 if (TREE_CODE (field) != FIELD_DECL)
11025 continue;
11027 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11028 if (sub_count < 0)
11029 return -1;
11030 count += sub_count;
11033 /* There must be no padding. */
11034 if (wi::to_wide (TYPE_SIZE (type))
11035 != count * GET_MODE_BITSIZE (*modep))
11036 return -1;
11038 return count;
11041 case UNION_TYPE:
11042 case QUAL_UNION_TYPE:
11044 /* These aren't very interesting except in a degenerate case. */
11045 int count = 0;
11046 int sub_count;
11047 tree field;
11049 /* Can't handle incomplete types nor sizes that are not
11050 fixed. */
11051 if (!COMPLETE_TYPE_P (type)
11052 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
11053 return -1;
11055 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
11057 if (TREE_CODE (field) != FIELD_DECL)
11058 continue;
11060 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
11061 if (sub_count < 0)
11062 return -1;
11063 count = count > sub_count ? count : sub_count;
11066 /* There must be no padding. */
11067 if (wi::to_wide (TYPE_SIZE (type))
11068 != count * GET_MODE_BITSIZE (*modep))
11069 return -1;
11071 return count;
11074 default:
11075 break;
11078 return -1;
11081 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
11082 float or vector aggregate that shall be passed in FP/vector registers
11083 according to the ELFv2 ABI, return the homogeneous element mode in
11084 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
11086 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
11088 static bool
11089 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
11090 machine_mode *elt_mode,
11091 int *n_elts)
11093 /* Note that we do not accept complex types at the top level as
11094 homogeneous aggregates; these types are handled via the
11095 targetm.calls.split_complex_arg mechanism. Complex types
11096 can be elements of homogeneous aggregates, however. */
11097 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
11099 machine_mode field_mode = VOIDmode;
11100 int field_count = rs6000_aggregate_candidate (type, &field_mode);
11102 if (field_count > 0)
11104 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
11105 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
11107 /* The ELFv2 ABI allows homogeneous aggregates to occupy
11108 up to AGGR_ARG_NUM_REG registers. */
11109 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
11111 if (elt_mode)
11112 *elt_mode = field_mode;
11113 if (n_elts)
11114 *n_elts = field_count;
11115 return true;
11120 if (elt_mode)
11121 *elt_mode = mode;
11122 if (n_elts)
11123 *n_elts = 1;
11124 return false;
11127 /* Return a nonzero value to say to return the function value in
11128 memory, just as large structures are always returned. TYPE will be
11129 the data type of the value, and FNTYPE will be the type of the
11130 function doing the returning, or @code{NULL} for libcalls.
11132 The AIX ABI for the RS/6000 specifies that all structures are
11133 returned in memory. The Darwin ABI does the same.
11135 For the Darwin 64 Bit ABI, a function result can be returned in
11136 registers or in memory, depending on the size of the return data
11137 type. If it is returned in registers, the value occupies the same
11138 registers as it would if it were the first and only function
11139 argument. Otherwise, the function places its result in memory at
11140 the location pointed to by GPR3.
11142 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
11143 but a draft put them in memory, and GCC used to implement the draft
11144 instead of the final standard. Therefore, aix_struct_return
11145 controls this instead of DEFAULT_ABI; V.4 targets needing backward
11146 compatibility can change DRAFT_V4_STRUCT_RET to override the
11147 default, and -m switches get the final word. See
11148 rs6000_option_override_internal for more details.
11150 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
11151 long double support is enabled. These values are returned in memory.
11153 int_size_in_bytes returns -1 for variable size objects, which go in
11154 memory always. The cast to unsigned makes -1 > 8. */
11156 static bool
11157 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
11159 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
11160 if (TARGET_MACHO
11161 && rs6000_darwin64_abi
11162 && TREE_CODE (type) == RECORD_TYPE
11163 && int_size_in_bytes (type) > 0)
11165 CUMULATIVE_ARGS valcum;
11166 rtx valret;
11168 valcum.words = 0;
11169 valcum.fregno = FP_ARG_MIN_REG;
11170 valcum.vregno = ALTIVEC_ARG_MIN_REG;
11171 /* Do a trial code generation as if this were going to be passed
11172 as an argument; if any part goes in memory, we return NULL. */
11173 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
11174 if (valret)
11175 return false;
11176 /* Otherwise fall through to more conventional ABI rules. */
11179 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
11180 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
11181 NULL, NULL))
11182 return false;
11184 /* The ELFv2 ABI returns aggregates up to 16B in registers */
11185 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
11186 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
11187 return false;
11189 if (AGGREGATE_TYPE_P (type)
11190 && (aix_struct_return
11191 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
11192 return true;
11194 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
11195 modes only exist for GCC vector types if -maltivec. */
11196 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
11197 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11198 return false;
11200 /* Return synthetic vectors in memory. */
11201 if (TREE_CODE (type) == VECTOR_TYPE
11202 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
11204 static bool warned_for_return_big_vectors = false;
11205 if (!warned_for_return_big_vectors)
11207 warning (OPT_Wpsabi, "GCC vector returned by reference: "
11208 "non-standard ABI extension with no compatibility "
11209 "guarantee");
11210 warned_for_return_big_vectors = true;
11212 return true;
11215 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
11216 && FLOAT128_IEEE_P (TYPE_MODE (type)))
11217 return true;
11219 return false;
11222 /* Specify whether values returned in registers should be at the most
11223 significant end of a register. We want aggregates returned by
11224 value to match the way aggregates are passed to functions. */
11226 static bool
11227 rs6000_return_in_msb (const_tree valtype)
11229 return (DEFAULT_ABI == ABI_ELFv2
11230 && BYTES_BIG_ENDIAN
11231 && AGGREGATE_TYPE_P (valtype)
11232 && (rs6000_function_arg_padding (TYPE_MODE (valtype), valtype)
11233 == PAD_UPWARD));
11236 #ifdef HAVE_AS_GNU_ATTRIBUTE
11237 /* Return TRUE if a call to function FNDECL may be one that
11238 potentially affects the function calling ABI of the object file. */
11240 static bool
11241 call_ABI_of_interest (tree fndecl)
11243 if (rs6000_gnu_attr && symtab->state == EXPANSION)
11245 struct cgraph_node *c_node;
11247 /* Libcalls are always interesting. */
11248 if (fndecl == NULL_TREE)
11249 return true;
11251 /* Any call to an external function is interesting. */
11252 if (DECL_EXTERNAL (fndecl))
11253 return true;
11255 /* Interesting functions that we are emitting in this object file. */
11256 c_node = cgraph_node::get (fndecl);
11257 c_node = c_node->ultimate_alias_target ();
11258 return !c_node->only_called_directly_p ();
11260 return false;
11262 #endif
11264 /* Initialize a variable CUM of type CUMULATIVE_ARGS
11265 for a call to a function whose data type is FNTYPE.
11266 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
11268 For incoming args we set the number of arguments in the prototype large
11269 so we never return a PARALLEL. */
11271 void
11272 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
11273 rtx libname ATTRIBUTE_UNUSED, int incoming,
11274 int libcall, int n_named_args,
11275 tree fndecl ATTRIBUTE_UNUSED,
11276 machine_mode return_mode ATTRIBUTE_UNUSED)
11278 static CUMULATIVE_ARGS zero_cumulative;
11280 *cum = zero_cumulative;
11281 cum->words = 0;
11282 cum->fregno = FP_ARG_MIN_REG;
11283 cum->vregno = ALTIVEC_ARG_MIN_REG;
11284 cum->prototype = (fntype && prototype_p (fntype));
11285 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
11286 ? CALL_LIBCALL : CALL_NORMAL);
11287 cum->sysv_gregno = GP_ARG_MIN_REG;
11288 cum->stdarg = stdarg_p (fntype);
11289 cum->libcall = libcall;
11291 cum->nargs_prototype = 0;
11292 if (incoming || cum->prototype)
11293 cum->nargs_prototype = n_named_args;
11295 /* Check for a longcall attribute. */
11296 if ((!fntype && rs6000_default_long_calls)
11297 || (fntype
11298 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
11299 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
11300 cum->call_cookie |= CALL_LONG;
11302 if (TARGET_DEBUG_ARG)
11304 fprintf (stderr, "\ninit_cumulative_args:");
11305 if (fntype)
11307 tree ret_type = TREE_TYPE (fntype);
11308 fprintf (stderr, " ret code = %s,",
11309 get_tree_code_name (TREE_CODE (ret_type)));
11312 if (cum->call_cookie & CALL_LONG)
11313 fprintf (stderr, " longcall,");
11315 fprintf (stderr, " proto = %d, nargs = %d\n",
11316 cum->prototype, cum->nargs_prototype);
11319 #ifdef HAVE_AS_GNU_ATTRIBUTE
11320 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4))
11322 cum->escapes = call_ABI_of_interest (fndecl);
11323 if (cum->escapes)
11325 tree return_type;
11327 if (fntype)
11329 return_type = TREE_TYPE (fntype);
11330 return_mode = TYPE_MODE (return_type);
11332 else
11333 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
11335 if (return_type != NULL)
11337 if (TREE_CODE (return_type) == RECORD_TYPE
11338 && TYPE_TRANSPARENT_AGGR (return_type))
11340 return_type = TREE_TYPE (first_field (return_type));
11341 return_mode = TYPE_MODE (return_type);
11343 if (AGGREGATE_TYPE_P (return_type)
11344 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
11345 <= 8))
11346 rs6000_returns_struct = true;
11348 if (SCALAR_FLOAT_MODE_P (return_mode))
11350 rs6000_passes_float = true;
11351 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11352 && (FLOAT128_IBM_P (return_mode)
11353 || FLOAT128_IEEE_P (return_mode)
11354 || (return_type != NULL
11355 && (TYPE_MAIN_VARIANT (return_type)
11356 == long_double_type_node))))
11357 rs6000_passes_long_double = true;
11359 if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
11360 || PAIRED_VECTOR_MODE (return_mode))
11361 rs6000_passes_vector = true;
11364 #endif
11366 if (fntype
11367 && !TARGET_ALTIVEC
11368 && TARGET_ALTIVEC_ABI
11369 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
11371 error ("cannot return value in vector register because"
11372 " altivec instructions are disabled, use %qs"
11373 " to enable them", "-maltivec");
11377 /* The mode the ABI uses for a word. This is not the same as word_mode
11378 for -m32 -mpowerpc64. This is used to implement various target hooks. */
11380 static scalar_int_mode
11381 rs6000_abi_word_mode (void)
11383 return TARGET_32BIT ? SImode : DImode;
11386 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
11387 static char *
11388 rs6000_offload_options (void)
11390 if (TARGET_64BIT)
11391 return xstrdup ("-foffload-abi=lp64");
11392 else
11393 return xstrdup ("-foffload-abi=ilp32");
11396 /* On rs6000, function arguments are promoted, as are function return
11397 values. */
11399 static machine_mode
11400 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
11401 machine_mode mode,
11402 int *punsignedp ATTRIBUTE_UNUSED,
11403 const_tree, int)
11405 PROMOTE_MODE (mode, *punsignedp, type);
11407 return mode;
11410 /* Return true if TYPE must be passed on the stack and not in registers. */
11412 static bool
11413 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
11415 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
11416 return must_pass_in_stack_var_size (mode, type);
11417 else
11418 return must_pass_in_stack_var_size_or_pad (mode, type);
11421 static inline bool
11422 is_complex_IBM_long_double (machine_mode mode)
11424 return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode);
11427 /* Whether ABI_V4 passes MODE args to a function in floating point
11428 registers. */
11430 static bool
11431 abi_v4_pass_in_fpr (machine_mode mode)
11433 if (!TARGET_HARD_FLOAT)
11434 return false;
11435 if (TARGET_SINGLE_FLOAT && mode == SFmode)
11436 return true;
11437 if (TARGET_DOUBLE_FLOAT && mode == DFmode)
11438 return true;
11439 /* ABI_V4 passes complex IBM long double in 8 gprs.
11440 Stupid, but we can't change the ABI now. */
11441 if (is_complex_IBM_long_double (mode))
11442 return false;
11443 if (FLOAT128_2REG_P (mode))
11444 return true;
11445 if (DECIMAL_FLOAT_MODE_P (mode))
11446 return true;
11447 return false;
11450 /* Implement TARGET_FUNCTION_ARG_PADDING.
11452 For the AIX ABI structs are always stored left shifted in their
11453 argument slot. */
11455 static pad_direction
11456 rs6000_function_arg_padding (machine_mode mode, const_tree type)
11458 #ifndef AGGREGATE_PADDING_FIXED
11459 #define AGGREGATE_PADDING_FIXED 0
11460 #endif
11461 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
11462 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
11463 #endif
11465 if (!AGGREGATE_PADDING_FIXED)
11467 /* GCC used to pass structures of the same size as integer types as
11468 if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING.
11469 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
11470 passed padded downward, except that -mstrict-align further
11471 muddied the water in that multi-component structures of 2 and 4
11472 bytes in size were passed padded upward.
11474 The following arranges for best compatibility with previous
11475 versions of gcc, but removes the -mstrict-align dependency. */
11476 if (BYTES_BIG_ENDIAN)
11478 HOST_WIDE_INT size = 0;
11480 if (mode == BLKmode)
11482 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
11483 size = int_size_in_bytes (type);
11485 else
11486 size = GET_MODE_SIZE (mode);
11488 if (size == 1 || size == 2 || size == 4)
11489 return PAD_DOWNWARD;
11491 return PAD_UPWARD;
11494 if (AGGREGATES_PAD_UPWARD_ALWAYS)
11496 if (type != 0 && AGGREGATE_TYPE_P (type))
11497 return PAD_UPWARD;
11500 /* Fall back to the default. */
11501 return default_function_arg_padding (mode, type);
11504 /* If defined, a C expression that gives the alignment boundary, in bits,
11505 of an argument with the specified mode and type. If it is not defined,
11506 PARM_BOUNDARY is used for all arguments.
11508 V.4 wants long longs and doubles to be double word aligned. Just
11509 testing the mode size is a boneheaded way to do this as it means
11510 that other types such as complex int are also double word aligned.
11511 However, we're stuck with this because changing the ABI might break
11512 existing library interfaces.
11514 Quadword align Altivec/VSX vectors.
11515 Quadword align large synthetic vector types. */
11517 static unsigned int
11518 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
11520 machine_mode elt_mode;
11521 int n_elts;
11523 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11525 if (DEFAULT_ABI == ABI_V4
11526 && (GET_MODE_SIZE (mode) == 8
11527 || (TARGET_HARD_FLOAT
11528 && !is_complex_IBM_long_double (mode)
11529 && FLOAT128_2REG_P (mode))))
11530 return 64;
11531 else if (FLOAT128_VECTOR_P (mode))
11532 return 128;
11533 else if (PAIRED_VECTOR_MODE (mode)
11534 || (type && TREE_CODE (type) == VECTOR_TYPE
11535 && int_size_in_bytes (type) >= 8
11536 && int_size_in_bytes (type) < 16))
11537 return 64;
11538 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11539 || (type && TREE_CODE (type) == VECTOR_TYPE
11540 && int_size_in_bytes (type) >= 16))
11541 return 128;
11543 /* Aggregate types that need > 8 byte alignment are quadword-aligned
11544 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
11545 -mcompat-align-parm is used. */
11546 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
11547 || DEFAULT_ABI == ABI_ELFv2)
11548 && type && TYPE_ALIGN (type) > 64)
11550 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
11551 or homogeneous float/vector aggregates here. We already handled
11552 vector aggregates above, but still need to check for float here. */
11553 bool aggregate_p = (AGGREGATE_TYPE_P (type)
11554 && !SCALAR_FLOAT_MODE_P (elt_mode));
11556 /* We used to check for BLKmode instead of the above aggregate type
11557 check. Warn when this results in any difference to the ABI. */
11558 if (aggregate_p != (mode == BLKmode))
11560 static bool warned;
11561 if (!warned && warn_psabi)
11563 warned = true;
11564 inform (input_location,
11565 "the ABI of passing aggregates with %d-byte alignment"
11566 " has changed in GCC 5",
11567 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
11571 if (aggregate_p)
11572 return 128;
11575 /* Similar for the Darwin64 ABI. Note that for historical reasons we
11576 implement the "aggregate type" check as a BLKmode check here; this
11577 means certain aggregate types are in fact not aligned. */
11578 if (TARGET_MACHO && rs6000_darwin64_abi
11579 && mode == BLKmode
11580 && type && TYPE_ALIGN (type) > 64)
11581 return 128;
11583 return PARM_BOUNDARY;
11586 /* The offset in words to the start of the parameter save area. */
11588 static unsigned int
11589 rs6000_parm_offset (void)
11591 return (DEFAULT_ABI == ABI_V4 ? 2
11592 : DEFAULT_ABI == ABI_ELFv2 ? 4
11593 : 6);
11596 /* For a function parm of MODE and TYPE, return the starting word in
11597 the parameter area. NWORDS of the parameter area are already used. */
11599 static unsigned int
11600 rs6000_parm_start (machine_mode mode, const_tree type,
11601 unsigned int nwords)
11603 unsigned int align;
11605 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
11606 return nwords + (-(rs6000_parm_offset () + nwords) & align);
11609 /* Compute the size (in words) of a function argument. */
11611 static unsigned long
11612 rs6000_arg_size (machine_mode mode, const_tree type)
11614 unsigned long size;
11616 if (mode != BLKmode)
11617 size = GET_MODE_SIZE (mode);
11618 else
11619 size = int_size_in_bytes (type);
11621 if (TARGET_32BIT)
11622 return (size + 3) >> 2;
11623 else
11624 return (size + 7) >> 3;
11627 /* Use this to flush pending int fields. */
11629 static void
11630 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
11631 HOST_WIDE_INT bitpos, int final)
11633 unsigned int startbit, endbit;
11634 int intregs, intoffset;
11636 /* Handle the situations where a float is taking up the first half
11637 of the GPR, and the other half is empty (typically due to
11638 alignment restrictions). We can detect this by a 8-byte-aligned
11639 int field, or by seeing that this is the final flush for this
11640 argument. Count the word and continue on. */
11641 if (cum->floats_in_gpr == 1
11642 && (cum->intoffset % 64 == 0
11643 || (cum->intoffset == -1 && final)))
11645 cum->words++;
11646 cum->floats_in_gpr = 0;
11649 if (cum->intoffset == -1)
11650 return;
11652 intoffset = cum->intoffset;
11653 cum->intoffset = -1;
11654 cum->floats_in_gpr = 0;
11656 if (intoffset % BITS_PER_WORD != 0)
11658 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
11659 if (!int_mode_for_size (bits, 0).exists ())
11661 /* We couldn't find an appropriate mode, which happens,
11662 e.g., in packed structs when there are 3 bytes to load.
11663 Back intoffset back to the beginning of the word in this
11664 case. */
11665 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
11669 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
11670 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
11671 intregs = (endbit - startbit) / BITS_PER_WORD;
11672 cum->words += intregs;
11673 /* words should be unsigned. */
11674 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
11676 int pad = (endbit/BITS_PER_WORD) - cum->words;
11677 cum->words += pad;
11681 /* The darwin64 ABI calls for us to recurse down through structs,
11682 looking for elements passed in registers. Unfortunately, we have
11683 to track int register count here also because of misalignments
11684 in powerpc alignment mode. */
11686 static void
11687 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
11688 const_tree type,
11689 HOST_WIDE_INT startbitpos)
11691 tree f;
11693 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
11694 if (TREE_CODE (f) == FIELD_DECL)
11696 HOST_WIDE_INT bitpos = startbitpos;
11697 tree ftype = TREE_TYPE (f);
11698 machine_mode mode;
11699 if (ftype == error_mark_node)
11700 continue;
11701 mode = TYPE_MODE (ftype);
11703 if (DECL_SIZE (f) != 0
11704 && tree_fits_uhwi_p (bit_position (f)))
11705 bitpos += int_bit_position (f);
11707 /* ??? FIXME: else assume zero offset. */
11709 if (TREE_CODE (ftype) == RECORD_TYPE)
11710 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
11711 else if (USE_FP_FOR_ARG_P (cum, mode))
11713 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
11714 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11715 cum->fregno += n_fpregs;
11716 /* Single-precision floats present a special problem for
11717 us, because they are smaller than an 8-byte GPR, and so
11718 the structure-packing rules combined with the standard
11719 varargs behavior mean that we want to pack float/float
11720 and float/int combinations into a single register's
11721 space. This is complicated by the arg advance flushing,
11722 which works on arbitrarily large groups of int-type
11723 fields. */
11724 if (mode == SFmode)
11726 if (cum->floats_in_gpr == 1)
11728 /* Two floats in a word; count the word and reset
11729 the float count. */
11730 cum->words++;
11731 cum->floats_in_gpr = 0;
11733 else if (bitpos % 64 == 0)
11735 /* A float at the beginning of an 8-byte word;
11736 count it and put off adjusting cum->words until
11737 we see if a arg advance flush is going to do it
11738 for us. */
11739 cum->floats_in_gpr++;
11741 else
11743 /* The float is at the end of a word, preceded
11744 by integer fields, so the arg advance flush
11745 just above has already set cum->words and
11746 everything is taken care of. */
11749 else
11750 cum->words += n_fpregs;
11752 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
11754 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
11755 cum->vregno++;
11756 cum->words += 2;
11758 else if (cum->intoffset == -1)
11759 cum->intoffset = bitpos;
11763 /* Check for an item that needs to be considered specially under the darwin 64
11764 bit ABI. These are record types where the mode is BLK or the structure is
11765 8 bytes in size. */
11766 static int
11767 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
11769 return rs6000_darwin64_abi
11770 && ((mode == BLKmode
11771 && TREE_CODE (type) == RECORD_TYPE
11772 && int_size_in_bytes (type) > 0)
11773 || (type && TREE_CODE (type) == RECORD_TYPE
11774 && int_size_in_bytes (type) == 8)) ? 1 : 0;
11777 /* Update the data in CUM to advance over an argument
11778 of mode MODE and data type TYPE.
11779 (TYPE is null for libcalls where that information may not be available.)
11781 Note that for args passed by reference, function_arg will be called
11782 with MODE and TYPE set to that of the pointer to the arg, not the arg
11783 itself. */
11785 static void
11786 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
11787 const_tree type, bool named, int depth)
11789 machine_mode elt_mode;
11790 int n_elts;
11792 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
11794 /* Only tick off an argument if we're not recursing. */
11795 if (depth == 0)
11796 cum->nargs_prototype--;
11798 #ifdef HAVE_AS_GNU_ATTRIBUTE
11799 if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)
11800 && cum->escapes)
11802 if (SCALAR_FLOAT_MODE_P (mode))
11804 rs6000_passes_float = true;
11805 if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT)
11806 && (FLOAT128_IBM_P (mode)
11807 || FLOAT128_IEEE_P (mode)
11808 || (type != NULL
11809 && TYPE_MAIN_VARIANT (type) == long_double_type_node)))
11810 rs6000_passes_long_double = true;
11812 if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
11813 || (PAIRED_VECTOR_MODE (mode)
11814 && !cum->stdarg
11815 && cum->sysv_gregno <= GP_ARG_MAX_REG))
11816 rs6000_passes_vector = true;
11818 #endif
11820 if (TARGET_ALTIVEC_ABI
11821 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
11822 || (type && TREE_CODE (type) == VECTOR_TYPE
11823 && int_size_in_bytes (type) == 16)))
11825 bool stack = false;
11827 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
11829 cum->vregno += n_elts;
11831 if (!TARGET_ALTIVEC)
11832 error ("cannot pass argument in vector register because"
11833 " altivec instructions are disabled, use %qs"
11834 " to enable them", "-maltivec");
11836 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
11837 even if it is going to be passed in a vector register.
11838 Darwin does the same for variable-argument functions. */
11839 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
11840 && TARGET_64BIT)
11841 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
11842 stack = true;
11844 else
11845 stack = true;
11847 if (stack)
11849 int align;
11851 /* Vector parameters must be 16-byte aligned. In 32-bit
11852 mode this means we need to take into account the offset
11853 to the parameter save area. In 64-bit mode, they just
11854 have to start on an even word, since the parameter save
11855 area is 16-byte aligned. */
11856 if (TARGET_32BIT)
11857 align = -(rs6000_parm_offset () + cum->words) & 3;
11858 else
11859 align = cum->words & 1;
11860 cum->words += align + rs6000_arg_size (mode, type);
11862 if (TARGET_DEBUG_ARG)
11864 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
11865 cum->words, align);
11866 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
11867 cum->nargs_prototype, cum->prototype,
11868 GET_MODE_NAME (mode));
11872 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
11874 int size = int_size_in_bytes (type);
11875 /* Variable sized types have size == -1 and are
11876 treated as if consisting entirely of ints.
11877 Pad to 16 byte boundary if needed. */
11878 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
11879 && (cum->words % 2) != 0)
11880 cum->words++;
11881 /* For varargs, we can just go up by the size of the struct. */
11882 if (!named)
11883 cum->words += (size + 7) / 8;
11884 else
11886 /* It is tempting to say int register count just goes up by
11887 sizeof(type)/8, but this is wrong in a case such as
11888 { int; double; int; } [powerpc alignment]. We have to
11889 grovel through the fields for these too. */
11890 cum->intoffset = 0;
11891 cum->floats_in_gpr = 0;
11892 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
11893 rs6000_darwin64_record_arg_advance_flush (cum,
11894 size * BITS_PER_UNIT, 1);
11896 if (TARGET_DEBUG_ARG)
11898 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
11899 cum->words, TYPE_ALIGN (type), size);
11900 fprintf (stderr,
11901 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
11902 cum->nargs_prototype, cum->prototype,
11903 GET_MODE_NAME (mode));
11906 else if (DEFAULT_ABI == ABI_V4)
11908 if (abi_v4_pass_in_fpr (mode))
11910 /* _Decimal128 must use an even/odd register pair. This assumes
11911 that the register number is odd when fregno is odd. */
11912 if (mode == TDmode && (cum->fregno % 2) == 1)
11913 cum->fregno++;
11915 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
11916 <= FP_ARG_V4_MAX_REG)
11917 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
11918 else
11920 cum->fregno = FP_ARG_V4_MAX_REG + 1;
11921 if (mode == DFmode || FLOAT128_IBM_P (mode)
11922 || mode == DDmode || mode == TDmode)
11923 cum->words += cum->words & 1;
11924 cum->words += rs6000_arg_size (mode, type);
11927 else
11929 int n_words = rs6000_arg_size (mode, type);
11930 int gregno = cum->sysv_gregno;
11932 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
11933 As does any other 2 word item such as complex int due to a
11934 historical mistake. */
11935 if (n_words == 2)
11936 gregno += (1 - gregno) & 1;
11938 /* Multi-reg args are not split between registers and stack. */
11939 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
11941 /* Long long is aligned on the stack. So are other 2 word
11942 items such as complex int due to a historical mistake. */
11943 if (n_words == 2)
11944 cum->words += cum->words & 1;
11945 cum->words += n_words;
11948 /* Note: continuing to accumulate gregno past when we've started
11949 spilling to the stack indicates the fact that we've started
11950 spilling to the stack to expand_builtin_saveregs. */
11951 cum->sysv_gregno = gregno + n_words;
11954 if (TARGET_DEBUG_ARG)
11956 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11957 cum->words, cum->fregno);
11958 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
11959 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
11960 fprintf (stderr, "mode = %4s, named = %d\n",
11961 GET_MODE_NAME (mode), named);
11964 else
11966 int n_words = rs6000_arg_size (mode, type);
11967 int start_words = cum->words;
11968 int align_words = rs6000_parm_start (mode, type, start_words);
11970 cum->words = align_words + n_words;
11972 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT)
11974 /* _Decimal128 must be passed in an even/odd float register pair.
11975 This assumes that the register number is odd when fregno is
11976 odd. */
11977 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
11978 cum->fregno++;
11979 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
11982 if (TARGET_DEBUG_ARG)
11984 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
11985 cum->words, cum->fregno);
11986 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
11987 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
11988 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
11989 named, align_words - start_words, depth);
11994 static void
11995 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
11996 const_tree type, bool named)
11998 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
12002 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
12003 structure between cum->intoffset and bitpos to integer registers. */
12005 static void
12006 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
12007 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
12009 machine_mode mode;
12010 unsigned int regno;
12011 unsigned int startbit, endbit;
12012 int this_regno, intregs, intoffset;
12013 rtx reg;
12015 if (cum->intoffset == -1)
12016 return;
12018 intoffset = cum->intoffset;
12019 cum->intoffset = -1;
12021 /* If this is the trailing part of a word, try to only load that
12022 much into the register. Otherwise load the whole register. Note
12023 that in the latter case we may pick up unwanted bits. It's not a
12024 problem at the moment but may wish to revisit. */
12026 if (intoffset % BITS_PER_WORD != 0)
12028 unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD;
12029 if (!int_mode_for_size (bits, 0).exists (&mode))
12031 /* We couldn't find an appropriate mode, which happens,
12032 e.g., in packed structs when there are 3 bytes to load.
12033 Back intoffset back to the beginning of the word in this
12034 case. */
12035 intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD);
12036 mode = word_mode;
12039 else
12040 mode = word_mode;
12042 startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
12043 endbit = ROUND_UP (bitpos, BITS_PER_WORD);
12044 intregs = (endbit - startbit) / BITS_PER_WORD;
12045 this_regno = cum->words + intoffset / BITS_PER_WORD;
12047 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
12048 cum->use_stack = 1;
12050 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
12051 if (intregs <= 0)
12052 return;
12054 intoffset /= BITS_PER_UNIT;
12057 regno = GP_ARG_MIN_REG + this_regno;
12058 reg = gen_rtx_REG (mode, regno);
12059 rvec[(*k)++] =
12060 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
12062 this_regno += 1;
12063 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
12064 mode = word_mode;
12065 intregs -= 1;
12067 while (intregs > 0);
12070 /* Recursive workhorse for the following. */
12072 static void
12073 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
12074 HOST_WIDE_INT startbitpos, rtx rvec[],
12075 int *k)
12077 tree f;
12079 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
12080 if (TREE_CODE (f) == FIELD_DECL)
12082 HOST_WIDE_INT bitpos = startbitpos;
12083 tree ftype = TREE_TYPE (f);
12084 machine_mode mode;
12085 if (ftype == error_mark_node)
12086 continue;
12087 mode = TYPE_MODE (ftype);
12089 if (DECL_SIZE (f) != 0
12090 && tree_fits_uhwi_p (bit_position (f)))
12091 bitpos += int_bit_position (f);
12093 /* ??? FIXME: else assume zero offset. */
12095 if (TREE_CODE (ftype) == RECORD_TYPE)
12096 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
12097 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
12099 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
12100 #if 0
12101 switch (mode)
12103 case E_SCmode: mode = SFmode; break;
12104 case E_DCmode: mode = DFmode; break;
12105 case E_TCmode: mode = TFmode; break;
12106 default: break;
12108 #endif
12109 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12110 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
12112 gcc_assert (cum->fregno == FP_ARG_MAX_REG
12113 && (mode == TFmode || mode == TDmode));
12114 /* Long double or _Decimal128 split over regs and memory. */
12115 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
12116 cum->use_stack=1;
12118 rvec[(*k)++]
12119 = gen_rtx_EXPR_LIST (VOIDmode,
12120 gen_rtx_REG (mode, cum->fregno++),
12121 GEN_INT (bitpos / BITS_PER_UNIT));
12122 if (FLOAT128_2REG_P (mode))
12123 cum->fregno++;
12125 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
12127 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
12128 rvec[(*k)++]
12129 = gen_rtx_EXPR_LIST (VOIDmode,
12130 gen_rtx_REG (mode, cum->vregno++),
12131 GEN_INT (bitpos / BITS_PER_UNIT));
12133 else if (cum->intoffset == -1)
12134 cum->intoffset = bitpos;
12138 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
12139 the register(s) to be used for each field and subfield of a struct
12140 being passed by value, along with the offset of where the
12141 register's value may be found in the block. FP fields go in FP
12142 register, vector fields go in vector registers, and everything
12143 else goes in int registers, packed as in memory.
12145 This code is also used for function return values. RETVAL indicates
12146 whether this is the case.
12148 Much of this is taken from the SPARC V9 port, which has a similar
12149 calling convention. */
12151 static rtx
12152 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
12153 bool named, bool retval)
12155 rtx rvec[FIRST_PSEUDO_REGISTER];
12156 int k = 1, kbase = 1;
12157 HOST_WIDE_INT typesize = int_size_in_bytes (type);
12158 /* This is a copy; modifications are not visible to our caller. */
12159 CUMULATIVE_ARGS copy_cum = *orig_cum;
12160 CUMULATIVE_ARGS *cum = &copy_cum;
12162 /* Pad to 16 byte boundary if needed. */
12163 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
12164 && (cum->words % 2) != 0)
12165 cum->words++;
12167 cum->intoffset = 0;
12168 cum->use_stack = 0;
12169 cum->named = named;
12171 /* Put entries into rvec[] for individual FP and vector fields, and
12172 for the chunks of memory that go in int regs. Note we start at
12173 element 1; 0 is reserved for an indication of using memory, and
12174 may or may not be filled in below. */
12175 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
12176 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
12178 /* If any part of the struct went on the stack put all of it there.
12179 This hack is because the generic code for
12180 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
12181 parts of the struct are not at the beginning. */
12182 if (cum->use_stack)
12184 if (retval)
12185 return NULL_RTX; /* doesn't go in registers at all */
12186 kbase = 0;
12187 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12189 if (k > 1 || cum->use_stack)
12190 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
12191 else
12192 return NULL_RTX;
12195 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
12197 static rtx
12198 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
12199 int align_words)
12201 int n_units;
12202 int i, k;
12203 rtx rvec[GP_ARG_NUM_REG + 1];
12205 if (align_words >= GP_ARG_NUM_REG)
12206 return NULL_RTX;
12208 n_units = rs6000_arg_size (mode, type);
12210 /* Optimize the simple case where the arg fits in one gpr, except in
12211 the case of BLKmode due to assign_parms assuming that registers are
12212 BITS_PER_WORD wide. */
12213 if (n_units == 0
12214 || (n_units == 1 && mode != BLKmode))
12215 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12217 k = 0;
12218 if (align_words + n_units > GP_ARG_NUM_REG)
12219 /* Not all of the arg fits in gprs. Say that it goes in memory too,
12220 using a magic NULL_RTX component.
12221 This is not strictly correct. Only some of the arg belongs in
12222 memory, not all of it. However, the normal scheme using
12223 function_arg_partial_nregs can result in unusual subregs, eg.
12224 (subreg:SI (reg:DF) 4), which are not handled well. The code to
12225 store the whole arg to memory is often more efficient than code
12226 to store pieces, and we know that space is available in the right
12227 place for the whole arg. */
12228 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12230 i = 0;
12233 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
12234 rtx off = GEN_INT (i++ * 4);
12235 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12237 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
12239 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12242 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
12243 but must also be copied into the parameter save area starting at
12244 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
12245 to the GPRs and/or memory. Return the number of elements used. */
12247 static int
12248 rs6000_psave_function_arg (machine_mode mode, const_tree type,
12249 int align_words, rtx *rvec)
12251 int k = 0;
12253 if (align_words < GP_ARG_NUM_REG)
12255 int n_words = rs6000_arg_size (mode, type);
12257 if (align_words + n_words > GP_ARG_NUM_REG
12258 || mode == BLKmode
12259 || (TARGET_32BIT && TARGET_POWERPC64))
12261 /* If this is partially on the stack, then we only
12262 include the portion actually in registers here. */
12263 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12264 int i = 0;
12266 if (align_words + n_words > GP_ARG_NUM_REG)
12268 /* Not all of the arg fits in gprs. Say that it goes in memory
12269 too, using a magic NULL_RTX component. Also see comment in
12270 rs6000_mixed_function_arg for why the normal
12271 function_arg_partial_nregs scheme doesn't work in this case. */
12272 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12277 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12278 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
12279 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12281 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12283 else
12285 /* The whole arg fits in gprs. */
12286 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12287 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
12290 else
12292 /* It's entirely in memory. */
12293 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
12296 return k;
12299 /* RVEC is a vector of K components of an argument of mode MODE.
12300 Construct the final function_arg return value from it. */
12302 static rtx
12303 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
12305 gcc_assert (k >= 1);
12307 /* Avoid returning a PARALLEL in the trivial cases. */
12308 if (k == 1)
12310 if (XEXP (rvec[0], 0) == NULL_RTX)
12311 return NULL_RTX;
12313 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
12314 return XEXP (rvec[0], 0);
12317 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
12320 /* Determine where to put an argument to a function.
12321 Value is zero to push the argument on the stack,
12322 or a hard register in which to store the argument.
12324 MODE is the argument's machine mode.
12325 TYPE is the data type of the argument (as a tree).
12326 This is null for libcalls where that information may
12327 not be available.
12328 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12329 the preceding args and about the function being called. It is
12330 not modified in this routine.
12331 NAMED is nonzero if this argument is a named parameter
12332 (otherwise it is an extra parameter matching an ellipsis).
12334 On RS/6000 the first eight words of non-FP are normally in registers
12335 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
12336 Under V.4, the first 8 FP args are in registers.
12338 If this is floating-point and no prototype is specified, we use
12339 both an FP and integer register (or possibly FP reg and stack). Library
12340 functions (when CALL_LIBCALL is set) always have the proper types for args,
12341 so we can pass the FP value just in one register. emit_library_function
12342 doesn't support PARALLEL anyway.
12344 Note that for args passed by reference, function_arg will be called
12345 with MODE and TYPE set to that of the pointer to the arg, not the arg
12346 itself. */
12348 static rtx
12349 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
12350 const_tree type, bool named)
12352 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12353 enum rs6000_abi abi = DEFAULT_ABI;
12354 machine_mode elt_mode;
12355 int n_elts;
12357 /* Return a marker to indicate whether CR1 needs to set or clear the
12358 bit that V.4 uses to say fp args were passed in registers.
12359 Assume that we don't need the marker for software floating point,
12360 or compiler generated library calls. */
12361 if (mode == VOIDmode)
12363 if (abi == ABI_V4
12364 && (cum->call_cookie & CALL_LIBCALL) == 0
12365 && (cum->stdarg
12366 || (cum->nargs_prototype < 0
12367 && (cum->prototype || TARGET_NO_PROTOTYPE)))
12368 && TARGET_HARD_FLOAT)
12369 return GEN_INT (cum->call_cookie
12370 | ((cum->fregno == FP_ARG_MIN_REG)
12371 ? CALL_V4_SET_FP_ARGS
12372 : CALL_V4_CLEAR_FP_ARGS));
12374 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
12377 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12379 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12381 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
12382 if (rslt != NULL_RTX)
12383 return rslt;
12384 /* Else fall through to usual handling. */
12387 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12389 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12390 rtx r, off;
12391 int i, k = 0;
12393 /* Do we also need to pass this argument in the parameter save area?
12394 Library support functions for IEEE 128-bit are assumed to not need the
12395 value passed both in GPRs and in vector registers. */
12396 if (TARGET_64BIT && !cum->prototype
12397 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12399 int align_words = ROUND_UP (cum->words, 2);
12400 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12403 /* Describe where this argument goes in the vector registers. */
12404 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
12406 r = gen_rtx_REG (elt_mode, cum->vregno + i);
12407 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12408 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12411 return rs6000_finish_function_arg (mode, rvec, k);
12413 else if (TARGET_ALTIVEC_ABI
12414 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
12415 || (type && TREE_CODE (type) == VECTOR_TYPE
12416 && int_size_in_bytes (type) == 16)))
12418 if (named || abi == ABI_V4)
12419 return NULL_RTX;
12420 else
12422 /* Vector parameters to varargs functions under AIX or Darwin
12423 get passed in memory and possibly also in GPRs. */
12424 int align, align_words, n_words;
12425 machine_mode part_mode;
12427 /* Vector parameters must be 16-byte aligned. In 32-bit
12428 mode this means we need to take into account the offset
12429 to the parameter save area. In 64-bit mode, they just
12430 have to start on an even word, since the parameter save
12431 area is 16-byte aligned. */
12432 if (TARGET_32BIT)
12433 align = -(rs6000_parm_offset () + cum->words) & 3;
12434 else
12435 align = cum->words & 1;
12436 align_words = cum->words + align;
12438 /* Out of registers? Memory, then. */
12439 if (align_words >= GP_ARG_NUM_REG)
12440 return NULL_RTX;
12442 if (TARGET_32BIT && TARGET_POWERPC64)
12443 return rs6000_mixed_function_arg (mode, type, align_words);
12445 /* The vector value goes in GPRs. Only the part of the
12446 value in GPRs is reported here. */
12447 part_mode = mode;
12448 n_words = rs6000_arg_size (mode, type);
12449 if (align_words + n_words > GP_ARG_NUM_REG)
12450 /* Fortunately, there are only two possibilities, the value
12451 is either wholly in GPRs or half in GPRs and half not. */
12452 part_mode = DImode;
12454 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
12458 else if (abi == ABI_V4)
12460 if (abi_v4_pass_in_fpr (mode))
12462 /* _Decimal128 must use an even/odd register pair. This assumes
12463 that the register number is odd when fregno is odd. */
12464 if (mode == TDmode && (cum->fregno % 2) == 1)
12465 cum->fregno++;
12467 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
12468 <= FP_ARG_V4_MAX_REG)
12469 return gen_rtx_REG (mode, cum->fregno);
12470 else
12471 return NULL_RTX;
12473 else
12475 int n_words = rs6000_arg_size (mode, type);
12476 int gregno = cum->sysv_gregno;
12478 /* Long long is put in (r3,r4), (r5,r6), (r7,r8) or (r9,r10).
12479 As does any other 2 word item such as complex int due to a
12480 historical mistake. */
12481 if (n_words == 2)
12482 gregno += (1 - gregno) & 1;
12484 /* Multi-reg args are not split between registers and stack. */
12485 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
12486 return NULL_RTX;
12488 if (TARGET_32BIT && TARGET_POWERPC64)
12489 return rs6000_mixed_function_arg (mode, type,
12490 gregno - GP_ARG_MIN_REG);
12491 return gen_rtx_REG (mode, gregno);
12494 else
12496 int align_words = rs6000_parm_start (mode, type, cum->words);
12498 /* _Decimal128 must be passed in an even/odd float register pair.
12499 This assumes that the register number is odd when fregno is odd. */
12500 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
12501 cum->fregno++;
12503 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12505 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
12506 rtx r, off;
12507 int i, k = 0;
12508 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12509 int fpr_words;
12511 /* Do we also need to pass this argument in the parameter
12512 save area? */
12513 if (type && (cum->nargs_prototype <= 0
12514 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12515 && TARGET_XL_COMPAT
12516 && align_words >= GP_ARG_NUM_REG)))
12517 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
12519 /* Describe where this argument goes in the fprs. */
12520 for (i = 0; i < n_elts
12521 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
12523 /* Check if the argument is split over registers and memory.
12524 This can only ever happen for long double or _Decimal128;
12525 complex types are handled via split_complex_arg. */
12526 machine_mode fmode = elt_mode;
12527 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
12529 gcc_assert (FLOAT128_2REG_P (fmode));
12530 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
12533 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
12534 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
12535 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12538 /* If there were not enough FPRs to hold the argument, the rest
12539 usually goes into memory. However, if the current position
12540 is still within the register parameter area, a portion may
12541 actually have to go into GPRs.
12543 Note that it may happen that the portion of the argument
12544 passed in the first "half" of the first GPR was already
12545 passed in the last FPR as well.
12547 For unnamed arguments, we already set up GPRs to cover the
12548 whole argument in rs6000_psave_function_arg, so there is
12549 nothing further to do at this point. */
12550 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
12551 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
12552 && cum->nargs_prototype > 0)
12554 static bool warned;
12556 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
12557 int n_words = rs6000_arg_size (mode, type);
12559 align_words += fpr_words;
12560 n_words -= fpr_words;
12564 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
12565 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
12566 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
12568 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
12570 if (!warned && warn_psabi)
12572 warned = true;
12573 inform (input_location,
12574 "the ABI of passing homogeneous float aggregates"
12575 " has changed in GCC 5");
12579 return rs6000_finish_function_arg (mode, rvec, k);
12581 else if (align_words < GP_ARG_NUM_REG)
12583 if (TARGET_32BIT && TARGET_POWERPC64)
12584 return rs6000_mixed_function_arg (mode, type, align_words);
12586 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
12588 else
12589 return NULL_RTX;
12593 /* For an arg passed partly in registers and partly in memory, this is
12594 the number of bytes passed in registers. For args passed entirely in
12595 registers or entirely in memory, zero. When an arg is described by a
12596 PARALLEL, perhaps using more than one register type, this function
12597 returns the number of bytes used by the first element of the PARALLEL. */
12599 static int
12600 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
12601 tree type, bool named)
12603 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12604 bool passed_in_gprs = true;
12605 int ret = 0;
12606 int align_words;
12607 machine_mode elt_mode;
12608 int n_elts;
12610 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
12612 if (DEFAULT_ABI == ABI_V4)
12613 return 0;
12615 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
12617 /* If we are passing this arg in the fixed parameter save area (gprs or
12618 memory) as well as VRs, we do not use the partial bytes mechanism;
12619 instead, rs6000_function_arg will return a PARALLEL including a memory
12620 element as necessary. Library support functions for IEEE 128-bit are
12621 assumed to not need the value passed both in GPRs and in vector
12622 registers. */
12623 if (TARGET_64BIT && !cum->prototype
12624 && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode)))
12625 return 0;
12627 /* Otherwise, we pass in VRs only. Check for partial copies. */
12628 passed_in_gprs = false;
12629 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
12630 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
12633 /* In this complicated case we just disable the partial_nregs code. */
12634 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
12635 return 0;
12637 align_words = rs6000_parm_start (mode, type, cum->words);
12639 if (USE_FP_FOR_ARG_P (cum, elt_mode))
12641 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
12643 /* If we are passing this arg in the fixed parameter save area
12644 (gprs or memory) as well as FPRs, we do not use the partial
12645 bytes mechanism; instead, rs6000_function_arg will return a
12646 PARALLEL including a memory element as necessary. */
12647 if (type
12648 && (cum->nargs_prototype <= 0
12649 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
12650 && TARGET_XL_COMPAT
12651 && align_words >= GP_ARG_NUM_REG)))
12652 return 0;
12654 /* Otherwise, we pass in FPRs only. Check for partial copies. */
12655 passed_in_gprs = false;
12656 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
12658 /* Compute number of bytes / words passed in FPRs. If there
12659 is still space available in the register parameter area
12660 *after* that amount, a part of the argument will be passed
12661 in GPRs. In that case, the total amount passed in any
12662 registers is equal to the amount that would have been passed
12663 in GPRs if everything were passed there, so we fall back to
12664 the GPR code below to compute the appropriate value. */
12665 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
12666 * MIN (8, GET_MODE_SIZE (elt_mode)));
12667 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
12669 if (align_words + fpr_words < GP_ARG_NUM_REG)
12670 passed_in_gprs = true;
12671 else
12672 ret = fpr;
12676 if (passed_in_gprs
12677 && align_words < GP_ARG_NUM_REG
12678 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
12679 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
12681 if (ret != 0 && TARGET_DEBUG_ARG)
12682 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
12684 return ret;
12687 /* A C expression that indicates when an argument must be passed by
12688 reference. If nonzero for an argument, a copy of that argument is
12689 made in memory and a pointer to the argument is passed instead of
12690 the argument itself. The pointer is passed in whatever way is
12691 appropriate for passing a pointer to that type.
12693 Under V.4, aggregates and long double are passed by reference.
12695 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
12696 reference unless the AltiVec vector extension ABI is in force.
12698 As an extension to all ABIs, variable sized types are passed by
12699 reference. */
12701 static bool
12702 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
12703 machine_mode mode, const_tree type,
12704 bool named ATTRIBUTE_UNUSED)
12706 if (!type)
12707 return 0;
12709 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
12710 && FLOAT128_IEEE_P (TYPE_MODE (type)))
12712 if (TARGET_DEBUG_ARG)
12713 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
12714 return 1;
12717 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
12719 if (TARGET_DEBUG_ARG)
12720 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
12721 return 1;
12724 if (int_size_in_bytes (type) < 0)
12726 if (TARGET_DEBUG_ARG)
12727 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
12728 return 1;
12731 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
12732 modes only exist for GCC vector types if -maltivec. */
12733 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
12735 if (TARGET_DEBUG_ARG)
12736 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
12737 return 1;
12740 /* Pass synthetic vectors in memory. */
12741 if (TREE_CODE (type) == VECTOR_TYPE
12742 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
12744 static bool warned_for_pass_big_vectors = false;
12745 if (TARGET_DEBUG_ARG)
12746 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
12747 if (!warned_for_pass_big_vectors)
12749 warning (OPT_Wpsabi, "GCC vector passed by reference: "
12750 "non-standard ABI extension with no compatibility "
12751 "guarantee");
12752 warned_for_pass_big_vectors = true;
12754 return 1;
12757 return 0;
12760 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
12761 already processes. Return true if the parameter must be passed
12762 (fully or partially) on the stack. */
12764 static bool
12765 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
12767 machine_mode mode;
12768 int unsignedp;
12769 rtx entry_parm;
12771 /* Catch errors. */
12772 if (type == NULL || type == error_mark_node)
12773 return true;
12775 /* Handle types with no storage requirement. */
12776 if (TYPE_MODE (type) == VOIDmode)
12777 return false;
12779 /* Handle complex types. */
12780 if (TREE_CODE (type) == COMPLEX_TYPE)
12781 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
12782 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
12784 /* Handle transparent aggregates. */
12785 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
12786 && TYPE_TRANSPARENT_AGGR (type))
12787 type = TREE_TYPE (first_field (type));
12789 /* See if this arg was passed by invisible reference. */
12790 if (pass_by_reference (get_cumulative_args (args_so_far),
12791 TYPE_MODE (type), type, true))
12792 type = build_pointer_type (type);
12794 /* Find mode as it is passed by the ABI. */
12795 unsignedp = TYPE_UNSIGNED (type);
12796 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
12798 /* If we must pass in stack, we need a stack. */
12799 if (rs6000_must_pass_in_stack (mode, type))
12800 return true;
12802 /* If there is no incoming register, we need a stack. */
12803 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
12804 if (entry_parm == NULL)
12805 return true;
12807 /* Likewise if we need to pass both in registers and on the stack. */
12808 if (GET_CODE (entry_parm) == PARALLEL
12809 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
12810 return true;
12812 /* Also true if we're partially in registers and partially not. */
12813 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
12814 return true;
12816 /* Update info on where next arg arrives in registers. */
12817 rs6000_function_arg_advance (args_so_far, mode, type, true);
12818 return false;
12821 /* Return true if FUN has no prototype, has a variable argument
12822 list, or passes any parameter in memory. */
12824 static bool
12825 rs6000_function_parms_need_stack (tree fun, bool incoming)
12827 tree fntype, result;
12828 CUMULATIVE_ARGS args_so_far_v;
12829 cumulative_args_t args_so_far;
12831 if (!fun)
12832 /* Must be a libcall, all of which only use reg parms. */
12833 return false;
12835 fntype = fun;
12836 if (!TYPE_P (fun))
12837 fntype = TREE_TYPE (fun);
12839 /* Varargs functions need the parameter save area. */
12840 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
12841 return true;
12843 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
12844 args_so_far = pack_cumulative_args (&args_so_far_v);
12846 /* When incoming, we will have been passed the function decl.
12847 It is necessary to use the decl to handle K&R style functions,
12848 where TYPE_ARG_TYPES may not be available. */
12849 if (incoming)
12851 gcc_assert (DECL_P (fun));
12852 result = DECL_RESULT (fun);
12854 else
12855 result = TREE_TYPE (fntype);
12857 if (result && aggregate_value_p (result, fntype))
12859 if (!TYPE_P (result))
12860 result = TREE_TYPE (result);
12861 result = build_pointer_type (result);
12862 rs6000_parm_needs_stack (args_so_far, result);
12865 if (incoming)
12867 tree parm;
12869 for (parm = DECL_ARGUMENTS (fun);
12870 parm && parm != void_list_node;
12871 parm = TREE_CHAIN (parm))
12872 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
12873 return true;
12875 else
12877 function_args_iterator args_iter;
12878 tree arg_type;
12880 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
12881 if (rs6000_parm_needs_stack (args_so_far, arg_type))
12882 return true;
12885 return false;
12888 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
12889 usually a constant depending on the ABI. However, in the ELFv2 ABI
12890 the register parameter area is optional when calling a function that
12891 has a prototype is scope, has no variable argument list, and passes
12892 all parameters in registers. */
12895 rs6000_reg_parm_stack_space (tree fun, bool incoming)
12897 int reg_parm_stack_space;
12899 switch (DEFAULT_ABI)
12901 default:
12902 reg_parm_stack_space = 0;
12903 break;
12905 case ABI_AIX:
12906 case ABI_DARWIN:
12907 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12908 break;
12910 case ABI_ELFv2:
12911 /* ??? Recomputing this every time is a bit expensive. Is there
12912 a place to cache this information? */
12913 if (rs6000_function_parms_need_stack (fun, incoming))
12914 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
12915 else
12916 reg_parm_stack_space = 0;
12917 break;
12920 return reg_parm_stack_space;
12923 static void
12924 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
12926 int i;
12927 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
12929 if (nregs == 0)
12930 return;
12932 for (i = 0; i < nregs; i++)
12934 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
12935 if (reload_completed)
12937 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
12938 tem = NULL_RTX;
12939 else
12940 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
12941 i * GET_MODE_SIZE (reg_mode));
12943 else
12944 tem = replace_equiv_address (tem, XEXP (tem, 0));
12946 gcc_assert (tem);
12948 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
12952 /* Perform any needed actions needed for a function that is receiving a
12953 variable number of arguments.
12955 CUM is as above.
12957 MODE and TYPE are the mode and type of the current parameter.
12959 PRETEND_SIZE is a variable that should be set to the amount of stack
12960 that must be pushed by the prolog to pretend that our caller pushed
12963 Normally, this macro will push all remaining incoming registers on the
12964 stack and set PRETEND_SIZE to the length of the registers pushed. */
12966 static void
12967 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
12968 tree type, int *pretend_size ATTRIBUTE_UNUSED,
12969 int no_rtl)
12971 CUMULATIVE_ARGS next_cum;
12972 int reg_size = TARGET_32BIT ? 4 : 8;
12973 rtx save_area = NULL_RTX, mem;
12974 int first_reg_offset;
12975 alias_set_type set;
12977 /* Skip the last named argument. */
12978 next_cum = *get_cumulative_args (cum);
12979 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
12981 if (DEFAULT_ABI == ABI_V4)
12983 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
12985 if (! no_rtl)
12987 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
12988 HOST_WIDE_INT offset = 0;
12990 /* Try to optimize the size of the varargs save area.
12991 The ABI requires that ap.reg_save_area is doubleword
12992 aligned, but we don't need to allocate space for all
12993 the bytes, only those to which we actually will save
12994 anything. */
12995 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
12996 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
12997 if (TARGET_HARD_FLOAT
12998 && next_cum.fregno <= FP_ARG_V4_MAX_REG
12999 && cfun->va_list_fpr_size)
13001 if (gpr_reg_num)
13002 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
13003 * UNITS_PER_FP_WORD;
13004 if (cfun->va_list_fpr_size
13005 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13006 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
13007 else
13008 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
13009 * UNITS_PER_FP_WORD;
13011 if (gpr_reg_num)
13013 offset = -((first_reg_offset * reg_size) & ~7);
13014 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
13016 gpr_reg_num = cfun->va_list_gpr_size;
13017 if (reg_size == 4 && (first_reg_offset & 1))
13018 gpr_reg_num++;
13020 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
13022 else if (fpr_size)
13023 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
13024 * UNITS_PER_FP_WORD
13025 - (int) (GP_ARG_NUM_REG * reg_size);
13027 if (gpr_size + fpr_size)
13029 rtx reg_save_area
13030 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
13031 gcc_assert (GET_CODE (reg_save_area) == MEM);
13032 reg_save_area = XEXP (reg_save_area, 0);
13033 if (GET_CODE (reg_save_area) == PLUS)
13035 gcc_assert (XEXP (reg_save_area, 0)
13036 == virtual_stack_vars_rtx);
13037 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
13038 offset += INTVAL (XEXP (reg_save_area, 1));
13040 else
13041 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
13044 cfun->machine->varargs_save_offset = offset;
13045 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
13048 else
13050 first_reg_offset = next_cum.words;
13051 save_area = crtl->args.internal_arg_pointer;
13053 if (targetm.calls.must_pass_in_stack (mode, type))
13054 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
13057 set = get_varargs_alias_set ();
13058 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
13059 && cfun->va_list_gpr_size)
13061 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
13063 if (va_list_gpr_counter_field)
13064 /* V4 va_list_gpr_size counts number of registers needed. */
13065 n_gpr = cfun->va_list_gpr_size;
13066 else
13067 /* char * va_list instead counts number of bytes needed. */
13068 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
13070 if (nregs > n_gpr)
13071 nregs = n_gpr;
13073 mem = gen_rtx_MEM (BLKmode,
13074 plus_constant (Pmode, save_area,
13075 first_reg_offset * reg_size));
13076 MEM_NOTRAP_P (mem) = 1;
13077 set_mem_alias_set (mem, set);
13078 set_mem_align (mem, BITS_PER_WORD);
13080 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
13081 nregs);
13084 /* Save FP registers if needed. */
13085 if (DEFAULT_ABI == ABI_V4
13086 && TARGET_HARD_FLOAT
13087 && ! no_rtl
13088 && next_cum.fregno <= FP_ARG_V4_MAX_REG
13089 && cfun->va_list_fpr_size)
13091 int fregno = next_cum.fregno, nregs;
13092 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
13093 rtx lab = gen_label_rtx ();
13094 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
13095 * UNITS_PER_FP_WORD);
13097 emit_jump_insn
13098 (gen_rtx_SET (pc_rtx,
13099 gen_rtx_IF_THEN_ELSE (VOIDmode,
13100 gen_rtx_NE (VOIDmode, cr1,
13101 const0_rtx),
13102 gen_rtx_LABEL_REF (VOIDmode, lab),
13103 pc_rtx)));
13105 for (nregs = 0;
13106 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
13107 fregno++, off += UNITS_PER_FP_WORD, nregs++)
13109 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13110 ? DFmode : SFmode,
13111 plus_constant (Pmode, save_area, off));
13112 MEM_NOTRAP_P (mem) = 1;
13113 set_mem_alias_set (mem, set);
13114 set_mem_align (mem, GET_MODE_ALIGNMENT (
13115 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13116 ? DFmode : SFmode));
13117 emit_move_insn (mem, gen_rtx_REG (
13118 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
13119 ? DFmode : SFmode, fregno));
13122 emit_label (lab);
13126 /* Create the va_list data type. */
13128 static tree
13129 rs6000_build_builtin_va_list (void)
13131 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
13133 /* For AIX, prefer 'char *' because that's what the system
13134 header files like. */
13135 if (DEFAULT_ABI != ABI_V4)
13136 return build_pointer_type (char_type_node);
13138 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
13139 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
13140 get_identifier ("__va_list_tag"), record);
13142 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
13143 unsigned_char_type_node);
13144 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
13145 unsigned_char_type_node);
13146 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
13147 every user file. */
13148 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13149 get_identifier ("reserved"), short_unsigned_type_node);
13150 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13151 get_identifier ("overflow_arg_area"),
13152 ptr_type_node);
13153 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
13154 get_identifier ("reg_save_area"),
13155 ptr_type_node);
13157 va_list_gpr_counter_field = f_gpr;
13158 va_list_fpr_counter_field = f_fpr;
13160 DECL_FIELD_CONTEXT (f_gpr) = record;
13161 DECL_FIELD_CONTEXT (f_fpr) = record;
13162 DECL_FIELD_CONTEXT (f_res) = record;
13163 DECL_FIELD_CONTEXT (f_ovf) = record;
13164 DECL_FIELD_CONTEXT (f_sav) = record;
13166 TYPE_STUB_DECL (record) = type_decl;
13167 TYPE_NAME (record) = type_decl;
13168 TYPE_FIELDS (record) = f_gpr;
13169 DECL_CHAIN (f_gpr) = f_fpr;
13170 DECL_CHAIN (f_fpr) = f_res;
13171 DECL_CHAIN (f_res) = f_ovf;
13172 DECL_CHAIN (f_ovf) = f_sav;
13174 layout_type (record);
13176 /* The correct type is an array type of one element. */
13177 return build_array_type (record, build_index_type (size_zero_node));
13180 /* Implement va_start. */
13182 static void
13183 rs6000_va_start (tree valist, rtx nextarg)
13185 HOST_WIDE_INT words, n_gpr, n_fpr;
13186 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13187 tree gpr, fpr, ovf, sav, t;
13189 /* Only SVR4 needs something special. */
13190 if (DEFAULT_ABI != ABI_V4)
13192 std_expand_builtin_va_start (valist, nextarg);
13193 return;
13196 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13197 f_fpr = DECL_CHAIN (f_gpr);
13198 f_res = DECL_CHAIN (f_fpr);
13199 f_ovf = DECL_CHAIN (f_res);
13200 f_sav = DECL_CHAIN (f_ovf);
13202 valist = build_simple_mem_ref (valist);
13203 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13204 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13205 f_fpr, NULL_TREE);
13206 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13207 f_ovf, NULL_TREE);
13208 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13209 f_sav, NULL_TREE);
13211 /* Count number of gp and fp argument registers used. */
13212 words = crtl->args.info.words;
13213 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
13214 GP_ARG_NUM_REG);
13215 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
13216 FP_ARG_NUM_REG);
13218 if (TARGET_DEBUG_ARG)
13219 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
13220 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
13221 words, n_gpr, n_fpr);
13223 if (cfun->va_list_gpr_size)
13225 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
13226 build_int_cst (NULL_TREE, n_gpr));
13227 TREE_SIDE_EFFECTS (t) = 1;
13228 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13231 if (cfun->va_list_fpr_size)
13233 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
13234 build_int_cst (NULL_TREE, n_fpr));
13235 TREE_SIDE_EFFECTS (t) = 1;
13236 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13238 #ifdef HAVE_AS_GNU_ATTRIBUTE
13239 if (call_ABI_of_interest (cfun->decl))
13240 rs6000_passes_float = true;
13241 #endif
13244 /* Find the overflow area. */
13245 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
13246 if (words != 0)
13247 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
13248 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
13249 TREE_SIDE_EFFECTS (t) = 1;
13250 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13252 /* If there were no va_arg invocations, don't set up the register
13253 save area. */
13254 if (!cfun->va_list_gpr_size
13255 && !cfun->va_list_fpr_size
13256 && n_gpr < GP_ARG_NUM_REG
13257 && n_fpr < FP_ARG_V4_MAX_REG)
13258 return;
13260 /* Find the register save area. */
13261 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
13262 if (cfun->machine->varargs_save_offset)
13263 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
13264 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
13265 TREE_SIDE_EFFECTS (t) = 1;
13266 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
13269 /* Implement va_arg. */
13271 static tree
13272 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
13273 gimple_seq *post_p)
13275 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
13276 tree gpr, fpr, ovf, sav, reg, t, u;
13277 int size, rsize, n_reg, sav_ofs, sav_scale;
13278 tree lab_false, lab_over, addr;
13279 int align;
13280 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
13281 int regalign = 0;
13282 gimple *stmt;
13284 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
13286 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
13287 return build_va_arg_indirect_ref (t);
13290 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
13291 earlier version of gcc, with the property that it always applied alignment
13292 adjustments to the va-args (even for zero-sized types). The cheapest way
13293 to deal with this is to replicate the effect of the part of
13294 std_gimplify_va_arg_expr that carries out the align adjust, for the case
13295 of relevance.
13296 We don't need to check for pass-by-reference because of the test above.
13297 We can return a simplifed answer, since we know there's no offset to add. */
13299 if (((TARGET_MACHO
13300 && rs6000_darwin64_abi)
13301 || DEFAULT_ABI == ABI_ELFv2
13302 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
13303 && integer_zerop (TYPE_SIZE (type)))
13305 unsigned HOST_WIDE_INT align, boundary;
13306 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
13307 align = PARM_BOUNDARY / BITS_PER_UNIT;
13308 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
13309 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
13310 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
13311 boundary /= BITS_PER_UNIT;
13312 if (boundary > align)
13314 tree t ;
13315 /* This updates arg ptr by the amount that would be necessary
13316 to align the zero-sized (but not zero-alignment) item. */
13317 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13318 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
13319 gimplify_and_add (t, pre_p);
13321 t = fold_convert (sizetype, valist_tmp);
13322 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
13323 fold_convert (TREE_TYPE (valist),
13324 fold_build2 (BIT_AND_EXPR, sizetype, t,
13325 size_int (-boundary))));
13326 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
13327 gimplify_and_add (t, pre_p);
13329 /* Since it is zero-sized there's no increment for the item itself. */
13330 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
13331 return build_va_arg_indirect_ref (valist_tmp);
13334 if (DEFAULT_ABI != ABI_V4)
13336 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
13338 tree elem_type = TREE_TYPE (type);
13339 machine_mode elem_mode = TYPE_MODE (elem_type);
13340 int elem_size = GET_MODE_SIZE (elem_mode);
13342 if (elem_size < UNITS_PER_WORD)
13344 tree real_part, imag_part;
13345 gimple_seq post = NULL;
13347 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13348 &post);
13349 /* Copy the value into a temporary, lest the formal temporary
13350 be reused out from under us. */
13351 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
13352 gimple_seq_add_seq (pre_p, post);
13354 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
13355 post_p);
13357 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
13361 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
13364 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
13365 f_fpr = DECL_CHAIN (f_gpr);
13366 f_res = DECL_CHAIN (f_fpr);
13367 f_ovf = DECL_CHAIN (f_res);
13368 f_sav = DECL_CHAIN (f_ovf);
13370 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
13371 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
13372 f_fpr, NULL_TREE);
13373 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
13374 f_ovf, NULL_TREE);
13375 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
13376 f_sav, NULL_TREE);
13378 size = int_size_in_bytes (type);
13379 rsize = (size + 3) / 4;
13380 int pad = 4 * rsize - size;
13381 align = 1;
13383 machine_mode mode = TYPE_MODE (type);
13384 if (abi_v4_pass_in_fpr (mode))
13386 /* FP args go in FP registers, if present. */
13387 reg = fpr;
13388 n_reg = (size + 7) / 8;
13389 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
13390 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
13391 if (mode != SFmode && mode != SDmode)
13392 align = 8;
13394 else
13396 /* Otherwise into GP registers. */
13397 reg = gpr;
13398 n_reg = rsize;
13399 sav_ofs = 0;
13400 sav_scale = 4;
13401 if (n_reg == 2)
13402 align = 8;
13405 /* Pull the value out of the saved registers.... */
13407 lab_over = NULL;
13408 addr = create_tmp_var (ptr_type_node, "addr");
13410 /* AltiVec vectors never go in registers when -mabi=altivec. */
13411 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
13412 align = 16;
13413 else
13415 lab_false = create_artificial_label (input_location);
13416 lab_over = create_artificial_label (input_location);
13418 /* Long long is aligned in the registers. As are any other 2 gpr
13419 item such as complex int due to a historical mistake. */
13420 u = reg;
13421 if (n_reg == 2 && reg == gpr)
13423 regalign = 1;
13424 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13425 build_int_cst (TREE_TYPE (reg), n_reg - 1));
13426 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
13427 unshare_expr (reg), u);
13429 /* _Decimal128 is passed in even/odd fpr pairs; the stored
13430 reg number is 0 for f1, so we want to make it odd. */
13431 else if (reg == fpr && mode == TDmode)
13433 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13434 build_int_cst (TREE_TYPE (reg), 1));
13435 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
13438 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
13439 t = build2 (GE_EXPR, boolean_type_node, u, t);
13440 u = build1 (GOTO_EXPR, void_type_node, lab_false);
13441 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
13442 gimplify_and_add (t, pre_p);
13444 t = sav;
13445 if (sav_ofs)
13446 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
13448 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
13449 build_int_cst (TREE_TYPE (reg), n_reg));
13450 u = fold_convert (sizetype, u);
13451 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
13452 t = fold_build_pointer_plus (t, u);
13454 /* _Decimal32 varargs are located in the second word of the 64-bit
13455 FP register for 32-bit binaries. */
13456 if (TARGET_32BIT && TARGET_HARD_FLOAT && mode == SDmode)
13457 t = fold_build_pointer_plus_hwi (t, size);
13459 /* Args are passed right-aligned. */
13460 if (BYTES_BIG_ENDIAN)
13461 t = fold_build_pointer_plus_hwi (t, pad);
13463 gimplify_assign (addr, t, pre_p);
13465 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
13467 stmt = gimple_build_label (lab_false);
13468 gimple_seq_add_stmt (pre_p, stmt);
13470 if ((n_reg == 2 && !regalign) || n_reg > 2)
13472 /* Ensure that we don't find any more args in regs.
13473 Alignment has taken care of for special cases. */
13474 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
13478 /* ... otherwise out of the overflow area. */
13480 /* Care for on-stack alignment if needed. */
13481 t = ovf;
13482 if (align != 1)
13484 t = fold_build_pointer_plus_hwi (t, align - 1);
13485 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
13486 build_int_cst (TREE_TYPE (t), -align));
13489 /* Args are passed right-aligned. */
13490 if (BYTES_BIG_ENDIAN)
13491 t = fold_build_pointer_plus_hwi (t, pad);
13493 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
13495 gimplify_assign (unshare_expr (addr), t, pre_p);
13497 t = fold_build_pointer_plus_hwi (t, size);
13498 gimplify_assign (unshare_expr (ovf), t, pre_p);
13500 if (lab_over)
13502 stmt = gimple_build_label (lab_over);
13503 gimple_seq_add_stmt (pre_p, stmt);
13506 if (STRICT_ALIGNMENT
13507 && (TYPE_ALIGN (type)
13508 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
13510 /* The value (of type complex double, for example) may not be
13511 aligned in memory in the saved registers, so copy via a
13512 temporary. (This is the same code as used for SPARC.) */
13513 tree tmp = create_tmp_var (type, "va_arg_tmp");
13514 tree dest_addr = build_fold_addr_expr (tmp);
13516 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
13517 3, dest_addr, addr, size_int (rsize * 4));
13519 gimplify_and_add (copy, pre_p);
13520 addr = dest_addr;
13523 addr = fold_convert (ptrtype, addr);
13524 return build_va_arg_indirect_ref (addr);
13527 /* Builtins. */
13529 static void
13530 def_builtin (const char *name, tree type, enum rs6000_builtins code)
13532 tree t;
13533 unsigned classify = rs6000_builtin_info[(int)code].attr;
13534 const char *attr_string = "";
13536 gcc_assert (name != NULL);
13537 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
13539 if (rs6000_builtin_decls[(int)code])
13540 fatal_error (input_location,
13541 "internal error: builtin function %qs already processed",
13542 name);
13544 rs6000_builtin_decls[(int)code] = t =
13545 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
13547 /* Set any special attributes. */
13548 if ((classify & RS6000_BTC_CONST) != 0)
13550 /* const function, function only depends on the inputs. */
13551 TREE_READONLY (t) = 1;
13552 TREE_NOTHROW (t) = 1;
13553 attr_string = ", const";
13555 else if ((classify & RS6000_BTC_PURE) != 0)
13557 /* pure function, function can read global memory, but does not set any
13558 external state. */
13559 DECL_PURE_P (t) = 1;
13560 TREE_NOTHROW (t) = 1;
13561 attr_string = ", pure";
13563 else if ((classify & RS6000_BTC_FP) != 0)
13565 /* Function is a math function. If rounding mode is on, then treat the
13566 function as not reading global memory, but it can have arbitrary side
13567 effects. If it is off, then assume the function is a const function.
13568 This mimics the ATTR_MATHFN_FPROUNDING attribute in
13569 builtin-attribute.def that is used for the math functions. */
13570 TREE_NOTHROW (t) = 1;
13571 if (flag_rounding_math)
13573 DECL_PURE_P (t) = 1;
13574 DECL_IS_NOVOPS (t) = 1;
13575 attr_string = ", fp, pure";
13577 else
13579 TREE_READONLY (t) = 1;
13580 attr_string = ", fp, const";
13583 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
13584 gcc_unreachable ();
13586 if (TARGET_DEBUG_BUILTIN)
13587 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
13588 (int)code, name, attr_string);
13591 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
13593 #undef RS6000_BUILTIN_0
13594 #undef RS6000_BUILTIN_1
13595 #undef RS6000_BUILTIN_2
13596 #undef RS6000_BUILTIN_3
13597 #undef RS6000_BUILTIN_A
13598 #undef RS6000_BUILTIN_D
13599 #undef RS6000_BUILTIN_H
13600 #undef RS6000_BUILTIN_P
13601 #undef RS6000_BUILTIN_Q
13602 #undef RS6000_BUILTIN_X
13604 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13605 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13606 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13607 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
13608 { MASK, ICODE, NAME, ENUM },
13610 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13611 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13612 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13613 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13614 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13615 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13617 static const struct builtin_description bdesc_3arg[] =
13619 #include "rs6000-builtin.def"
13622 /* DST operations: void foo (void *, const int, const char). */
13624 #undef RS6000_BUILTIN_0
13625 #undef RS6000_BUILTIN_1
13626 #undef RS6000_BUILTIN_2
13627 #undef RS6000_BUILTIN_3
13628 #undef RS6000_BUILTIN_A
13629 #undef RS6000_BUILTIN_D
13630 #undef RS6000_BUILTIN_H
13631 #undef RS6000_BUILTIN_P
13632 #undef RS6000_BUILTIN_Q
13633 #undef RS6000_BUILTIN_X
13635 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13636 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13637 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13638 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13639 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13640 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
13641 { MASK, ICODE, NAME, ENUM },
13643 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13644 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13645 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13646 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13648 static const struct builtin_description bdesc_dst[] =
13650 #include "rs6000-builtin.def"
13653 /* Simple binary operations: VECc = foo (VECa, VECb). */
13655 #undef RS6000_BUILTIN_0
13656 #undef RS6000_BUILTIN_1
13657 #undef RS6000_BUILTIN_2
13658 #undef RS6000_BUILTIN_3
13659 #undef RS6000_BUILTIN_A
13660 #undef RS6000_BUILTIN_D
13661 #undef RS6000_BUILTIN_H
13662 #undef RS6000_BUILTIN_P
13663 #undef RS6000_BUILTIN_Q
13664 #undef RS6000_BUILTIN_X
13666 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13667 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13668 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
13669 { MASK, ICODE, NAME, ENUM },
13671 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13672 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13673 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13674 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13675 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13676 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13677 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13679 static const struct builtin_description bdesc_2arg[] =
13681 #include "rs6000-builtin.def"
13684 #undef RS6000_BUILTIN_0
13685 #undef RS6000_BUILTIN_1
13686 #undef RS6000_BUILTIN_2
13687 #undef RS6000_BUILTIN_3
13688 #undef RS6000_BUILTIN_A
13689 #undef RS6000_BUILTIN_D
13690 #undef RS6000_BUILTIN_H
13691 #undef RS6000_BUILTIN_P
13692 #undef RS6000_BUILTIN_Q
13693 #undef RS6000_BUILTIN_X
13695 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13696 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13697 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13698 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13699 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13700 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13701 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13702 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
13703 { MASK, ICODE, NAME, ENUM },
13705 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13706 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13708 /* AltiVec predicates. */
13710 static const struct builtin_description bdesc_altivec_preds[] =
13712 #include "rs6000-builtin.def"
13715 /* PAIRED predicates. */
13716 #undef RS6000_BUILTIN_0
13717 #undef RS6000_BUILTIN_1
13718 #undef RS6000_BUILTIN_2
13719 #undef RS6000_BUILTIN_3
13720 #undef RS6000_BUILTIN_A
13721 #undef RS6000_BUILTIN_D
13722 #undef RS6000_BUILTIN_H
13723 #undef RS6000_BUILTIN_P
13724 #undef RS6000_BUILTIN_Q
13725 #undef RS6000_BUILTIN_X
13727 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13728 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13729 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13730 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13731 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13732 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13733 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13734 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13735 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
13736 { MASK, ICODE, NAME, ENUM },
13738 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13740 static const struct builtin_description bdesc_paired_preds[] =
13742 #include "rs6000-builtin.def"
13745 /* ABS* operations. */
13747 #undef RS6000_BUILTIN_0
13748 #undef RS6000_BUILTIN_1
13749 #undef RS6000_BUILTIN_2
13750 #undef RS6000_BUILTIN_3
13751 #undef RS6000_BUILTIN_A
13752 #undef RS6000_BUILTIN_D
13753 #undef RS6000_BUILTIN_H
13754 #undef RS6000_BUILTIN_P
13755 #undef RS6000_BUILTIN_Q
13756 #undef RS6000_BUILTIN_X
13758 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13759 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13760 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13761 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13762 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
13763 { MASK, ICODE, NAME, ENUM },
13765 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13766 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13767 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13768 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13769 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13771 static const struct builtin_description bdesc_abs[] =
13773 #include "rs6000-builtin.def"
13776 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
13777 foo (VECa). */
13779 #undef RS6000_BUILTIN_0
13780 #undef RS6000_BUILTIN_1
13781 #undef RS6000_BUILTIN_2
13782 #undef RS6000_BUILTIN_3
13783 #undef RS6000_BUILTIN_A
13784 #undef RS6000_BUILTIN_D
13785 #undef RS6000_BUILTIN_H
13786 #undef RS6000_BUILTIN_P
13787 #undef RS6000_BUILTIN_Q
13788 #undef RS6000_BUILTIN_X
13790 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13791 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
13792 { MASK, ICODE, NAME, ENUM },
13794 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13795 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13796 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13797 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13798 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13799 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13800 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13801 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13803 static const struct builtin_description bdesc_1arg[] =
13805 #include "rs6000-builtin.def"
13808 /* Simple no-argument operations: result = __builtin_darn_32 () */
13810 #undef RS6000_BUILTIN_0
13811 #undef RS6000_BUILTIN_1
13812 #undef RS6000_BUILTIN_2
13813 #undef RS6000_BUILTIN_3
13814 #undef RS6000_BUILTIN_A
13815 #undef RS6000_BUILTIN_D
13816 #undef RS6000_BUILTIN_H
13817 #undef RS6000_BUILTIN_P
13818 #undef RS6000_BUILTIN_Q
13819 #undef RS6000_BUILTIN_X
13821 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \
13822 { MASK, ICODE, NAME, ENUM },
13824 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13825 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13826 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13827 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13828 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13829 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
13830 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13831 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13832 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13834 static const struct builtin_description bdesc_0arg[] =
13836 #include "rs6000-builtin.def"
13839 /* HTM builtins. */
13840 #undef RS6000_BUILTIN_0
13841 #undef RS6000_BUILTIN_1
13842 #undef RS6000_BUILTIN_2
13843 #undef RS6000_BUILTIN_3
13844 #undef RS6000_BUILTIN_A
13845 #undef RS6000_BUILTIN_D
13846 #undef RS6000_BUILTIN_H
13847 #undef RS6000_BUILTIN_P
13848 #undef RS6000_BUILTIN_Q
13849 #undef RS6000_BUILTIN_X
13851 #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE)
13852 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
13853 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
13854 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
13855 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
13856 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
13857 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
13858 { MASK, ICODE, NAME, ENUM },
13860 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
13861 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
13862 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
13864 static const struct builtin_description bdesc_htm[] =
13866 #include "rs6000-builtin.def"
13869 #undef RS6000_BUILTIN_0
13870 #undef RS6000_BUILTIN_1
13871 #undef RS6000_BUILTIN_2
13872 #undef RS6000_BUILTIN_3
13873 #undef RS6000_BUILTIN_A
13874 #undef RS6000_BUILTIN_D
13875 #undef RS6000_BUILTIN_H
13876 #undef RS6000_BUILTIN_P
13877 #undef RS6000_BUILTIN_Q
13879 /* Return true if a builtin function is overloaded. */
13880 bool
13881 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
13883 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
13886 const char *
13887 rs6000_overloaded_builtin_name (enum rs6000_builtins fncode)
13889 return rs6000_builtin_info[(int)fncode].name;
13892 /* Expand an expression EXP that calls a builtin without arguments. */
13893 static rtx
13894 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
13896 rtx pat;
13897 machine_mode tmode = insn_data[icode].operand[0].mode;
13899 if (icode == CODE_FOR_nothing)
13900 /* Builtin not supported on this processor. */
13901 return 0;
13903 if (target == 0
13904 || GET_MODE (target) != tmode
13905 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13906 target = gen_reg_rtx (tmode);
13908 pat = GEN_FCN (icode) (target);
13909 if (! pat)
13910 return 0;
13911 emit_insn (pat);
13913 return target;
13917 static rtx
13918 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
13920 rtx pat;
13921 tree arg0 = CALL_EXPR_ARG (exp, 0);
13922 tree arg1 = CALL_EXPR_ARG (exp, 1);
13923 rtx op0 = expand_normal (arg0);
13924 rtx op1 = expand_normal (arg1);
13925 machine_mode mode0 = insn_data[icode].operand[0].mode;
13926 machine_mode mode1 = insn_data[icode].operand[1].mode;
13928 if (icode == CODE_FOR_nothing)
13929 /* Builtin not supported on this processor. */
13930 return 0;
13932 /* If we got invalid arguments bail out before generating bad rtl. */
13933 if (arg0 == error_mark_node || arg1 == error_mark_node)
13934 return const0_rtx;
13936 if (GET_CODE (op0) != CONST_INT
13937 || INTVAL (op0) > 255
13938 || INTVAL (op0) < 0)
13940 error ("argument 1 must be an 8-bit field value");
13941 return const0_rtx;
13944 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13945 op0 = copy_to_mode_reg (mode0, op0);
13947 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13948 op1 = copy_to_mode_reg (mode1, op1);
13950 pat = GEN_FCN (icode) (op0, op1);
13951 if (! pat)
13952 return const0_rtx;
13953 emit_insn (pat);
13955 return NULL_RTX;
13958 static rtx
13959 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
13961 rtx pat;
13962 tree arg0 = CALL_EXPR_ARG (exp, 0);
13963 rtx op0 = expand_normal (arg0);
13964 machine_mode tmode = insn_data[icode].operand[0].mode;
13965 machine_mode mode0 = insn_data[icode].operand[1].mode;
13967 if (icode == CODE_FOR_nothing)
13968 /* Builtin not supported on this processor. */
13969 return 0;
13971 /* If we got invalid arguments bail out before generating bad rtl. */
13972 if (arg0 == error_mark_node)
13973 return const0_rtx;
13975 if (icode == CODE_FOR_altivec_vspltisb
13976 || icode == CODE_FOR_altivec_vspltish
13977 || icode == CODE_FOR_altivec_vspltisw)
13979 /* Only allow 5-bit *signed* literals. */
13980 if (GET_CODE (op0) != CONST_INT
13981 || INTVAL (op0) > 15
13982 || INTVAL (op0) < -16)
13984 error ("argument 1 must be a 5-bit signed literal");
13985 return CONST0_RTX (tmode);
13989 if (target == 0
13990 || GET_MODE (target) != tmode
13991 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13992 target = gen_reg_rtx (tmode);
13994 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13995 op0 = copy_to_mode_reg (mode0, op0);
13997 pat = GEN_FCN (icode) (target, op0);
13998 if (! pat)
13999 return 0;
14000 emit_insn (pat);
14002 return target;
14005 static rtx
14006 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
14008 rtx pat, scratch1, scratch2;
14009 tree arg0 = CALL_EXPR_ARG (exp, 0);
14010 rtx op0 = expand_normal (arg0);
14011 machine_mode tmode = insn_data[icode].operand[0].mode;
14012 machine_mode mode0 = insn_data[icode].operand[1].mode;
14014 /* If we have invalid arguments, bail out before generating bad rtl. */
14015 if (arg0 == error_mark_node)
14016 return const0_rtx;
14018 if (target == 0
14019 || GET_MODE (target) != tmode
14020 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14021 target = gen_reg_rtx (tmode);
14023 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14024 op0 = copy_to_mode_reg (mode0, op0);
14026 scratch1 = gen_reg_rtx (mode0);
14027 scratch2 = gen_reg_rtx (mode0);
14029 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
14030 if (! pat)
14031 return 0;
14032 emit_insn (pat);
14034 return target;
14037 static rtx
14038 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
14040 rtx pat;
14041 tree arg0 = CALL_EXPR_ARG (exp, 0);
14042 tree arg1 = CALL_EXPR_ARG (exp, 1);
14043 rtx op0 = expand_normal (arg0);
14044 rtx op1 = expand_normal (arg1);
14045 machine_mode tmode = insn_data[icode].operand[0].mode;
14046 machine_mode mode0 = insn_data[icode].operand[1].mode;
14047 machine_mode mode1 = insn_data[icode].operand[2].mode;
14049 if (icode == CODE_FOR_nothing)
14050 /* Builtin not supported on this processor. */
14051 return 0;
14053 /* If we got invalid arguments bail out before generating bad rtl. */
14054 if (arg0 == error_mark_node || arg1 == error_mark_node)
14055 return const0_rtx;
14057 if (icode == CODE_FOR_altivec_vcfux
14058 || icode == CODE_FOR_altivec_vcfsx
14059 || icode == CODE_FOR_altivec_vctsxs
14060 || icode == CODE_FOR_altivec_vctuxs
14061 || icode == CODE_FOR_altivec_vspltb
14062 || icode == CODE_FOR_altivec_vsplth
14063 || icode == CODE_FOR_altivec_vspltw)
14065 /* Only allow 5-bit unsigned literals. */
14066 STRIP_NOPS (arg1);
14067 if (TREE_CODE (arg1) != INTEGER_CST
14068 || TREE_INT_CST_LOW (arg1) & ~0x1f)
14070 error ("argument 2 must be a 5-bit unsigned literal");
14071 return CONST0_RTX (tmode);
14074 else if (icode == CODE_FOR_dfptstsfi_eq_dd
14075 || icode == CODE_FOR_dfptstsfi_lt_dd
14076 || icode == CODE_FOR_dfptstsfi_gt_dd
14077 || icode == CODE_FOR_dfptstsfi_unordered_dd
14078 || icode == CODE_FOR_dfptstsfi_eq_td
14079 || icode == CODE_FOR_dfptstsfi_lt_td
14080 || icode == CODE_FOR_dfptstsfi_gt_td
14081 || icode == CODE_FOR_dfptstsfi_unordered_td)
14083 /* Only allow 6-bit unsigned literals. */
14084 STRIP_NOPS (arg0);
14085 if (TREE_CODE (arg0) != INTEGER_CST
14086 || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63))
14088 error ("argument 1 must be a 6-bit unsigned literal");
14089 return CONST0_RTX (tmode);
14092 else if (icode == CODE_FOR_xststdcqp_kf
14093 || icode == CODE_FOR_xststdcqp_tf
14094 || icode == CODE_FOR_xststdcdp
14095 || icode == CODE_FOR_xststdcsp
14096 || icode == CODE_FOR_xvtstdcdp
14097 || icode == CODE_FOR_xvtstdcsp)
14099 /* Only allow 7-bit unsigned literals. */
14100 STRIP_NOPS (arg1);
14101 if (TREE_CODE (arg1) != INTEGER_CST
14102 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127))
14104 error ("argument 2 must be a 7-bit unsigned literal");
14105 return CONST0_RTX (tmode);
14108 else if (icode == CODE_FOR_unpackv1ti
14109 || icode == CODE_FOR_unpackkf
14110 || icode == CODE_FOR_unpacktf
14111 || icode == CODE_FOR_unpackif
14112 || icode == CODE_FOR_unpacktd)
14114 /* Only allow 1-bit unsigned literals. */
14115 STRIP_NOPS (arg1);
14116 if (TREE_CODE (arg1) != INTEGER_CST
14117 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
14119 error ("argument 2 must be a 1-bit unsigned literal");
14120 return CONST0_RTX (tmode);
14124 if (target == 0
14125 || GET_MODE (target) != tmode
14126 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14127 target = gen_reg_rtx (tmode);
14129 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14130 op0 = copy_to_mode_reg (mode0, op0);
14131 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14132 op1 = copy_to_mode_reg (mode1, op1);
14134 pat = GEN_FCN (icode) (target, op0, op1);
14135 if (! pat)
14136 return 0;
14137 emit_insn (pat);
14139 return target;
14142 static rtx
14143 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14145 rtx pat, scratch;
14146 tree cr6_form = CALL_EXPR_ARG (exp, 0);
14147 tree arg0 = CALL_EXPR_ARG (exp, 1);
14148 tree arg1 = CALL_EXPR_ARG (exp, 2);
14149 rtx op0 = expand_normal (arg0);
14150 rtx op1 = expand_normal (arg1);
14151 machine_mode tmode = SImode;
14152 machine_mode mode0 = insn_data[icode].operand[1].mode;
14153 machine_mode mode1 = insn_data[icode].operand[2].mode;
14154 int cr6_form_int;
14156 if (TREE_CODE (cr6_form) != INTEGER_CST)
14158 error ("argument 1 of %qs must be a constant",
14159 "__builtin_altivec_predicate");
14160 return const0_rtx;
14162 else
14163 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
14165 gcc_assert (mode0 == mode1);
14167 /* If we have invalid arguments, bail out before generating bad rtl. */
14168 if (arg0 == error_mark_node || arg1 == error_mark_node)
14169 return const0_rtx;
14171 if (target == 0
14172 || GET_MODE (target) != tmode
14173 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14174 target = gen_reg_rtx (tmode);
14176 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14177 op0 = copy_to_mode_reg (mode0, op0);
14178 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14179 op1 = copy_to_mode_reg (mode1, op1);
14181 /* Note that for many of the relevant operations (e.g. cmpne or
14182 cmpeq) with float or double operands, it makes more sense for the
14183 mode of the allocated scratch register to select a vector of
14184 integer. But the choice to copy the mode of operand 0 was made
14185 long ago and there are no plans to change it. */
14186 scratch = gen_reg_rtx (mode0);
14188 pat = GEN_FCN (icode) (scratch, op0, op1);
14189 if (! pat)
14190 return 0;
14191 emit_insn (pat);
14193 /* The vec_any* and vec_all* predicates use the same opcodes for two
14194 different operations, but the bits in CR6 will be different
14195 depending on what information we want. So we have to play tricks
14196 with CR6 to get the right bits out.
14198 If you think this is disgusting, look at the specs for the
14199 AltiVec predicates. */
14201 switch (cr6_form_int)
14203 case 0:
14204 emit_insn (gen_cr6_test_for_zero (target));
14205 break;
14206 case 1:
14207 emit_insn (gen_cr6_test_for_zero_reverse (target));
14208 break;
14209 case 2:
14210 emit_insn (gen_cr6_test_for_lt (target));
14211 break;
14212 case 3:
14213 emit_insn (gen_cr6_test_for_lt_reverse (target));
14214 break;
14215 default:
14216 error ("argument 1 of %qs is out of range",
14217 "__builtin_altivec_predicate");
14218 break;
14221 return target;
14224 static rtx
14225 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
14227 rtx pat, addr;
14228 tree arg0 = CALL_EXPR_ARG (exp, 0);
14229 tree arg1 = CALL_EXPR_ARG (exp, 1);
14230 machine_mode tmode = insn_data[icode].operand[0].mode;
14231 machine_mode mode0 = Pmode;
14232 machine_mode mode1 = Pmode;
14233 rtx op0 = expand_normal (arg0);
14234 rtx op1 = expand_normal (arg1);
14236 if (icode == CODE_FOR_nothing)
14237 /* Builtin not supported on this processor. */
14238 return 0;
14240 /* If we got invalid arguments bail out before generating bad rtl. */
14241 if (arg0 == error_mark_node || arg1 == error_mark_node)
14242 return const0_rtx;
14244 if (target == 0
14245 || GET_MODE (target) != tmode
14246 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14247 target = gen_reg_rtx (tmode);
14249 op1 = copy_to_mode_reg (mode1, op1);
14251 if (op0 == const0_rtx)
14253 addr = gen_rtx_MEM (tmode, op1);
14255 else
14257 op0 = copy_to_mode_reg (mode0, op0);
14258 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
14261 pat = GEN_FCN (icode) (target, addr);
14263 if (! pat)
14264 return 0;
14265 emit_insn (pat);
14267 return target;
14270 /* Return a constant vector for use as a little-endian permute control vector
14271 to reverse the order of elements of the given vector mode. */
14272 static rtx
14273 swap_selector_for_mode (machine_mode mode)
14275 /* These are little endian vectors, so their elements are reversed
14276 from what you would normally expect for a permute control vector. */
14277 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14278 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14279 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14280 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
14281 unsigned int *swaparray, i;
14282 rtx perm[16];
14284 switch (mode)
14286 case E_V2DFmode:
14287 case E_V2DImode:
14288 swaparray = swap2;
14289 break;
14290 case E_V4SFmode:
14291 case E_V4SImode:
14292 swaparray = swap4;
14293 break;
14294 case E_V8HImode:
14295 swaparray = swap8;
14296 break;
14297 case E_V16QImode:
14298 swaparray = swap16;
14299 break;
14300 default:
14301 gcc_unreachable ();
14304 for (i = 0; i < 16; ++i)
14305 perm[i] = GEN_INT (swaparray[i]);
14307 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
14311 swap_endian_selector_for_mode (machine_mode mode)
14313 unsigned int swap1[16] = {15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0};
14314 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
14315 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
14316 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
14318 unsigned int *swaparray, i;
14319 rtx perm[16];
14321 switch (mode)
14323 case E_V1TImode:
14324 swaparray = swap1;
14325 break;
14326 case E_V2DFmode:
14327 case E_V2DImode:
14328 swaparray = swap2;
14329 break;
14330 case E_V4SFmode:
14331 case E_V4SImode:
14332 swaparray = swap4;
14333 break;
14334 case E_V8HImode:
14335 swaparray = swap8;
14336 break;
14337 default:
14338 gcc_unreachable ();
14341 for (i = 0; i < 16; ++i)
14342 perm[i] = GEN_INT (swaparray[i]);
14344 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode,
14345 gen_rtvec_v (16, perm)));
14348 /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target
14349 with -maltivec=be specified. Issue the load followed by an element-
14350 reversing permute. */
14351 void
14352 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14354 rtx tmp = gen_reg_rtx (mode);
14355 rtx load = gen_rtx_SET (tmp, op1);
14356 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14357 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
14358 rtx sel = swap_selector_for_mode (mode);
14359 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
14361 gcc_assert (REG_P (op0));
14362 emit_insn (par);
14363 emit_insn (gen_rtx_SET (op0, vperm));
14366 /* Generate code for a "stvxl" built-in for a little endian target with
14367 -maltivec=be specified. Issue the store preceded by an element-reversing
14368 permute. */
14369 void
14370 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14372 rtx tmp = gen_reg_rtx (mode);
14373 rtx store = gen_rtx_SET (op0, tmp);
14374 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
14375 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
14376 rtx sel = swap_selector_for_mode (mode);
14377 rtx vperm;
14379 gcc_assert (REG_P (op1));
14380 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14381 emit_insn (gen_rtx_SET (tmp, vperm));
14382 emit_insn (par);
14385 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
14386 specified. Issue the store preceded by an element-reversing permute. */
14387 void
14388 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
14390 machine_mode inner_mode = GET_MODE_INNER (mode);
14391 rtx tmp = gen_reg_rtx (mode);
14392 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
14393 rtx sel = swap_selector_for_mode (mode);
14394 rtx vperm;
14396 gcc_assert (REG_P (op1));
14397 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
14398 emit_insn (gen_rtx_SET (tmp, vperm));
14399 emit_insn (gen_rtx_SET (op0, stvx));
14402 static rtx
14403 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
14405 rtx pat, addr;
14406 tree arg0 = CALL_EXPR_ARG (exp, 0);
14407 tree arg1 = CALL_EXPR_ARG (exp, 1);
14408 machine_mode tmode = insn_data[icode].operand[0].mode;
14409 machine_mode mode0 = Pmode;
14410 machine_mode mode1 = Pmode;
14411 rtx op0 = expand_normal (arg0);
14412 rtx op1 = expand_normal (arg1);
14414 if (icode == CODE_FOR_nothing)
14415 /* Builtin not supported on this processor. */
14416 return 0;
14418 /* If we got invalid arguments bail out before generating bad rtl. */
14419 if (arg0 == error_mark_node || arg1 == error_mark_node)
14420 return const0_rtx;
14422 if (target == 0
14423 || GET_MODE (target) != tmode
14424 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14425 target = gen_reg_rtx (tmode);
14427 op1 = copy_to_mode_reg (mode1, op1);
14429 /* For LVX, express the RTL accurately by ANDing the address with -16.
14430 LVXL and LVE*X expand to use UNSPECs to hide their special behavior,
14431 so the raw address is fine. */
14432 if (icode == CODE_FOR_altivec_lvx_v2df_2op
14433 || icode == CODE_FOR_altivec_lvx_v2di_2op
14434 || icode == CODE_FOR_altivec_lvx_v4sf_2op
14435 || icode == CODE_FOR_altivec_lvx_v4si_2op
14436 || icode == CODE_FOR_altivec_lvx_v8hi_2op
14437 || icode == CODE_FOR_altivec_lvx_v16qi_2op)
14439 rtx rawaddr;
14440 if (op0 == const0_rtx)
14441 rawaddr = op1;
14442 else
14444 op0 = copy_to_mode_reg (mode0, op0);
14445 rawaddr = gen_rtx_PLUS (Pmode, op1, op0);
14447 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14448 addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr);
14450 /* For -maltivec=be, emit the load and follow it up with a
14451 permute to swap the elements. */
14452 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14454 rtx temp = gen_reg_rtx (tmode);
14455 emit_insn (gen_rtx_SET (temp, addr));
14457 rtx sel = swap_selector_for_mode (tmode);
14458 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel),
14459 UNSPEC_VPERM);
14460 emit_insn (gen_rtx_SET (target, vperm));
14462 else
14463 emit_insn (gen_rtx_SET (target, addr));
14465 else
14467 if (op0 == const0_rtx)
14468 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
14469 else
14471 op0 = copy_to_mode_reg (mode0, op0);
14472 addr = gen_rtx_MEM (blk ? BLKmode : tmode,
14473 gen_rtx_PLUS (Pmode, op1, op0));
14476 pat = GEN_FCN (icode) (target, addr);
14477 if (! pat)
14478 return 0;
14479 emit_insn (pat);
14482 return target;
14485 static rtx
14486 paired_expand_stv_builtin (enum insn_code icode, tree exp)
14488 tree arg0 = CALL_EXPR_ARG (exp, 0);
14489 tree arg1 = CALL_EXPR_ARG (exp, 1);
14490 tree arg2 = CALL_EXPR_ARG (exp, 2);
14491 rtx op0 = expand_normal (arg0);
14492 rtx op1 = expand_normal (arg1);
14493 rtx op2 = expand_normal (arg2);
14494 rtx pat, addr;
14495 machine_mode tmode = insn_data[icode].operand[0].mode;
14496 machine_mode mode1 = Pmode;
14497 machine_mode mode2 = Pmode;
14499 /* Invalid arguments. Bail before doing anything stoopid! */
14500 if (arg0 == error_mark_node
14501 || arg1 == error_mark_node
14502 || arg2 == error_mark_node)
14503 return const0_rtx;
14505 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
14506 op0 = copy_to_mode_reg (tmode, op0);
14508 op2 = copy_to_mode_reg (mode2, op2);
14510 if (op1 == const0_rtx)
14512 addr = gen_rtx_MEM (tmode, op2);
14514 else
14516 op1 = copy_to_mode_reg (mode1, op1);
14517 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
14520 pat = GEN_FCN (icode) (addr, op0);
14521 if (pat)
14522 emit_insn (pat);
14523 return NULL_RTX;
14526 static rtx
14527 altivec_expand_stxvl_builtin (enum insn_code icode, tree exp)
14529 rtx pat;
14530 tree arg0 = CALL_EXPR_ARG (exp, 0);
14531 tree arg1 = CALL_EXPR_ARG (exp, 1);
14532 tree arg2 = CALL_EXPR_ARG (exp, 2);
14533 rtx op0 = expand_normal (arg0);
14534 rtx op1 = expand_normal (arg1);
14535 rtx op2 = expand_normal (arg2);
14536 machine_mode mode0 = insn_data[icode].operand[0].mode;
14537 machine_mode mode1 = insn_data[icode].operand[1].mode;
14538 machine_mode mode2 = insn_data[icode].operand[2].mode;
14540 if (icode == CODE_FOR_nothing)
14541 /* Builtin not supported on this processor. */
14542 return NULL_RTX;
14544 /* If we got invalid arguments bail out before generating bad rtl. */
14545 if (arg0 == error_mark_node
14546 || arg1 == error_mark_node
14547 || arg2 == error_mark_node)
14548 return NULL_RTX;
14550 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14551 op0 = copy_to_mode_reg (mode0, op0);
14552 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14553 op1 = copy_to_mode_reg (mode1, op1);
14554 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14555 op2 = copy_to_mode_reg (mode2, op2);
14557 pat = GEN_FCN (icode) (op0, op1, op2);
14558 if (pat)
14559 emit_insn (pat);
14561 return NULL_RTX;
14564 static rtx
14565 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
14567 tree arg0 = CALL_EXPR_ARG (exp, 0);
14568 tree arg1 = CALL_EXPR_ARG (exp, 1);
14569 tree arg2 = CALL_EXPR_ARG (exp, 2);
14570 rtx op0 = expand_normal (arg0);
14571 rtx op1 = expand_normal (arg1);
14572 rtx op2 = expand_normal (arg2);
14573 rtx pat, addr, rawaddr;
14574 machine_mode tmode = insn_data[icode].operand[0].mode;
14575 machine_mode smode = insn_data[icode].operand[1].mode;
14576 machine_mode mode1 = Pmode;
14577 machine_mode mode2 = Pmode;
14579 /* Invalid arguments. Bail before doing anything stoopid! */
14580 if (arg0 == error_mark_node
14581 || arg1 == error_mark_node
14582 || arg2 == error_mark_node)
14583 return const0_rtx;
14585 op2 = copy_to_mode_reg (mode2, op2);
14587 /* For STVX, express the RTL accurately by ANDing the address with -16.
14588 STVXL and STVE*X expand to use UNSPECs to hide their special behavior,
14589 so the raw address is fine. */
14590 if (icode == CODE_FOR_altivec_stvx_v2df_2op
14591 || icode == CODE_FOR_altivec_stvx_v2di_2op
14592 || icode == CODE_FOR_altivec_stvx_v4sf_2op
14593 || icode == CODE_FOR_altivec_stvx_v4si_2op
14594 || icode == CODE_FOR_altivec_stvx_v8hi_2op
14595 || icode == CODE_FOR_altivec_stvx_v16qi_2op)
14597 if (op1 == const0_rtx)
14598 rawaddr = op2;
14599 else
14601 op1 = copy_to_mode_reg (mode1, op1);
14602 rawaddr = gen_rtx_PLUS (Pmode, op2, op1);
14605 addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16));
14606 addr = gen_rtx_MEM (tmode, addr);
14608 op0 = copy_to_mode_reg (tmode, op0);
14610 /* For -maltivec=be, emit a permute to swap the elements, followed
14611 by the store. */
14612 if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG)
14614 rtx temp = gen_reg_rtx (tmode);
14615 rtx sel = swap_selector_for_mode (tmode);
14616 rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel),
14617 UNSPEC_VPERM);
14618 emit_insn (gen_rtx_SET (temp, vperm));
14619 emit_insn (gen_rtx_SET (addr, temp));
14621 else
14622 emit_insn (gen_rtx_SET (addr, op0));
14624 else
14626 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
14627 op0 = copy_to_mode_reg (smode, op0);
14629 if (op1 == const0_rtx)
14630 addr = gen_rtx_MEM (tmode, op2);
14631 else
14633 op1 = copy_to_mode_reg (mode1, op1);
14634 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1));
14637 pat = GEN_FCN (icode) (addr, op0);
14638 if (pat)
14639 emit_insn (pat);
14642 return NULL_RTX;
14645 /* Return the appropriate SPR number associated with the given builtin. */
14646 static inline HOST_WIDE_INT
14647 htm_spr_num (enum rs6000_builtins code)
14649 if (code == HTM_BUILTIN_GET_TFHAR
14650 || code == HTM_BUILTIN_SET_TFHAR)
14651 return TFHAR_SPR;
14652 else if (code == HTM_BUILTIN_GET_TFIAR
14653 || code == HTM_BUILTIN_SET_TFIAR)
14654 return TFIAR_SPR;
14655 else if (code == HTM_BUILTIN_GET_TEXASR
14656 || code == HTM_BUILTIN_SET_TEXASR)
14657 return TEXASR_SPR;
14658 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
14659 || code == HTM_BUILTIN_SET_TEXASRU);
14660 return TEXASRU_SPR;
14663 /* Return the appropriate SPR regno associated with the given builtin. */
14664 static inline HOST_WIDE_INT
14665 htm_spr_regno (enum rs6000_builtins code)
14667 if (code == HTM_BUILTIN_GET_TFHAR
14668 || code == HTM_BUILTIN_SET_TFHAR)
14669 return TFHAR_REGNO;
14670 else if (code == HTM_BUILTIN_GET_TFIAR
14671 || code == HTM_BUILTIN_SET_TFIAR)
14672 return TFIAR_REGNO;
14673 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
14674 || code == HTM_BUILTIN_SET_TEXASR
14675 || code == HTM_BUILTIN_GET_TEXASRU
14676 || code == HTM_BUILTIN_SET_TEXASRU);
14677 return TEXASR_REGNO;
14680 /* Return the correct ICODE value depending on whether we are
14681 setting or reading the HTM SPRs. */
14682 static inline enum insn_code
14683 rs6000_htm_spr_icode (bool nonvoid)
14685 if (nonvoid)
14686 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
14687 else
14688 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
14691 /* Expand the HTM builtin in EXP and store the result in TARGET.
14692 Store true in *EXPANDEDP if we found a builtin to expand. */
14693 static rtx
14694 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
14696 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14697 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
14698 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
14699 const struct builtin_description *d;
14700 size_t i;
14702 *expandedp = true;
14704 if (!TARGET_POWERPC64
14705 && (fcode == HTM_BUILTIN_TABORTDC
14706 || fcode == HTM_BUILTIN_TABORTDCI))
14708 size_t uns_fcode = (size_t)fcode;
14709 const char *name = rs6000_builtin_info[uns_fcode].name;
14710 error ("builtin %qs is only valid in 64-bit mode", name);
14711 return const0_rtx;
14714 /* Expand the HTM builtins. */
14715 d = bdesc_htm;
14716 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14717 if (d->code == fcode)
14719 rtx op[MAX_HTM_OPERANDS], pat;
14720 int nopnds = 0;
14721 tree arg;
14722 call_expr_arg_iterator iter;
14723 unsigned attr = rs6000_builtin_info[fcode].attr;
14724 enum insn_code icode = d->icode;
14725 const struct insn_operand_data *insn_op;
14726 bool uses_spr = (attr & RS6000_BTC_SPR);
14727 rtx cr = NULL_RTX;
14729 if (uses_spr)
14730 icode = rs6000_htm_spr_icode (nonvoid);
14731 insn_op = &insn_data[icode].operand[0];
14733 if (nonvoid)
14735 machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode;
14736 if (!target
14737 || GET_MODE (target) != tmode
14738 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
14739 target = gen_reg_rtx (tmode);
14740 if (uses_spr)
14741 op[nopnds++] = target;
14744 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
14746 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
14747 return const0_rtx;
14749 insn_op = &insn_data[icode].operand[nopnds];
14751 op[nopnds] = expand_normal (arg);
14753 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
14755 if (!strcmp (insn_op->constraint, "n"))
14757 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
14758 if (!CONST_INT_P (op[nopnds]))
14759 error ("argument %d must be an unsigned literal", arg_num);
14760 else
14761 error ("argument %d is an unsigned literal that is "
14762 "out of range", arg_num);
14763 return const0_rtx;
14765 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
14768 nopnds++;
14771 /* Handle the builtins for extended mnemonics. These accept
14772 no arguments, but map to builtins that take arguments. */
14773 switch (fcode)
14775 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
14776 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
14777 op[nopnds++] = GEN_INT (1);
14778 if (flag_checking)
14779 attr |= RS6000_BTC_UNARY;
14780 break;
14781 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
14782 op[nopnds++] = GEN_INT (0);
14783 if (flag_checking)
14784 attr |= RS6000_BTC_UNARY;
14785 break;
14786 default:
14787 break;
14790 /* If this builtin accesses SPRs, then pass in the appropriate
14791 SPR number and SPR regno as the last two operands. */
14792 if (uses_spr)
14794 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
14795 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
14796 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
14798 /* If this builtin accesses a CR, then pass in a scratch
14799 CR as the last operand. */
14800 else if (attr & RS6000_BTC_CR)
14801 { cr = gen_reg_rtx (CCmode);
14802 op[nopnds++] = cr;
14805 if (flag_checking)
14807 int expected_nopnds = 0;
14808 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
14809 expected_nopnds = 1;
14810 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
14811 expected_nopnds = 2;
14812 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
14813 expected_nopnds = 3;
14814 if (!(attr & RS6000_BTC_VOID))
14815 expected_nopnds += 1;
14816 if (uses_spr)
14817 expected_nopnds += 2;
14819 gcc_assert (nopnds == expected_nopnds
14820 && nopnds <= MAX_HTM_OPERANDS);
14823 switch (nopnds)
14825 case 1:
14826 pat = GEN_FCN (icode) (op[0]);
14827 break;
14828 case 2:
14829 pat = GEN_FCN (icode) (op[0], op[1]);
14830 break;
14831 case 3:
14832 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
14833 break;
14834 case 4:
14835 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
14836 break;
14837 default:
14838 gcc_unreachable ();
14840 if (!pat)
14841 return NULL_RTX;
14842 emit_insn (pat);
14844 if (attr & RS6000_BTC_CR)
14846 if (fcode == HTM_BUILTIN_TBEGIN)
14848 /* Emit code to set TARGET to true or false depending on
14849 whether the tbegin. instruction successfully or failed
14850 to start a transaction. We do this by placing the 1's
14851 complement of CR's EQ bit into TARGET. */
14852 rtx scratch = gen_reg_rtx (SImode);
14853 emit_insn (gen_rtx_SET (scratch,
14854 gen_rtx_EQ (SImode, cr,
14855 const0_rtx)));
14856 emit_insn (gen_rtx_SET (target,
14857 gen_rtx_XOR (SImode, scratch,
14858 GEN_INT (1))));
14860 else
14862 /* Emit code to copy the 4-bit condition register field
14863 CR into the least significant end of register TARGET. */
14864 rtx scratch1 = gen_reg_rtx (SImode);
14865 rtx scratch2 = gen_reg_rtx (SImode);
14866 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
14867 emit_insn (gen_movcc (subreg, cr));
14868 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
14869 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
14873 if (nonvoid)
14874 return target;
14875 return const0_rtx;
14878 *expandedp = false;
14879 return NULL_RTX;
14882 /* Expand the CPU builtin in FCODE and store the result in TARGET. */
14884 static rtx
14885 cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED,
14886 rtx target)
14888 /* __builtin_cpu_init () is a nop, so expand to nothing. */
14889 if (fcode == RS6000_BUILTIN_CPU_INIT)
14890 return const0_rtx;
14892 if (target == 0 || GET_MODE (target) != SImode)
14893 target = gen_reg_rtx (SImode);
14895 #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
14896 tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0);
14897 /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back
14898 to a STRING_CST. */
14899 if (TREE_CODE (arg) == ARRAY_REF
14900 && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST
14901 && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST
14902 && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0)
14903 arg = TREE_OPERAND (arg, 0);
14905 if (TREE_CODE (arg) != STRING_CST)
14907 error ("builtin %qs only accepts a string argument",
14908 rs6000_builtin_info[(size_t) fcode].name);
14909 return const0_rtx;
14912 if (fcode == RS6000_BUILTIN_CPU_IS)
14914 const char *cpu = TREE_STRING_POINTER (arg);
14915 rtx cpuid = NULL_RTX;
14916 for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++)
14917 if (strcmp (cpu, cpu_is_info[i].cpu) == 0)
14919 /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */
14920 cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM);
14921 break;
14923 if (cpuid == NULL_RTX)
14925 /* Invalid CPU argument. */
14926 error ("cpu %qs is an invalid argument to builtin %qs",
14927 cpu, rs6000_builtin_info[(size_t) fcode].name);
14928 return const0_rtx;
14931 rtx platform = gen_reg_rtx (SImode);
14932 rtx tcbmem = gen_const_mem (SImode,
14933 gen_rtx_PLUS (Pmode,
14934 gen_rtx_REG (Pmode, TLS_REGNUM),
14935 GEN_INT (TCB_PLATFORM_OFFSET)));
14936 emit_move_insn (platform, tcbmem);
14937 emit_insn (gen_eqsi3 (target, platform, cpuid));
14939 else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS)
14941 const char *hwcap = TREE_STRING_POINTER (arg);
14942 rtx mask = NULL_RTX;
14943 int hwcap_offset;
14944 for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++)
14945 if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0)
14947 mask = GEN_INT (cpu_supports_info[i].mask);
14948 hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id);
14949 break;
14951 if (mask == NULL_RTX)
14953 /* Invalid HWCAP argument. */
14954 error ("%s %qs is an invalid argument to builtin %qs",
14955 "hwcap", hwcap, rs6000_builtin_info[(size_t) fcode].name);
14956 return const0_rtx;
14959 rtx tcb_hwcap = gen_reg_rtx (SImode);
14960 rtx tcbmem = gen_const_mem (SImode,
14961 gen_rtx_PLUS (Pmode,
14962 gen_rtx_REG (Pmode, TLS_REGNUM),
14963 GEN_INT (hwcap_offset)));
14964 emit_move_insn (tcb_hwcap, tcbmem);
14965 rtx scratch1 = gen_reg_rtx (SImode);
14966 emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask)));
14967 rtx scratch2 = gen_reg_rtx (SImode);
14968 emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx));
14969 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx)));
14971 else
14972 gcc_unreachable ();
14974 /* Record that we have expanded a CPU builtin, so that we can later
14975 emit a reference to the special symbol exported by LIBC to ensure we
14976 do not link against an old LIBC that doesn't support this feature. */
14977 cpu_builtin_p = true;
14979 #else
14980 warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware "
14981 "capability bits", rs6000_builtin_info[(size_t) fcode].name);
14983 /* For old LIBCs, always return FALSE. */
14984 emit_move_insn (target, GEN_INT (0));
14985 #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */
14987 return target;
14990 static rtx
14991 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
14993 rtx pat;
14994 tree arg0 = CALL_EXPR_ARG (exp, 0);
14995 tree arg1 = CALL_EXPR_ARG (exp, 1);
14996 tree arg2 = CALL_EXPR_ARG (exp, 2);
14997 rtx op0 = expand_normal (arg0);
14998 rtx op1 = expand_normal (arg1);
14999 rtx op2 = expand_normal (arg2);
15000 machine_mode tmode = insn_data[icode].operand[0].mode;
15001 machine_mode mode0 = insn_data[icode].operand[1].mode;
15002 machine_mode mode1 = insn_data[icode].operand[2].mode;
15003 machine_mode mode2 = insn_data[icode].operand[3].mode;
15005 if (icode == CODE_FOR_nothing)
15006 /* Builtin not supported on this processor. */
15007 return 0;
15009 /* If we got invalid arguments bail out before generating bad rtl. */
15010 if (arg0 == error_mark_node
15011 || arg1 == error_mark_node
15012 || arg2 == error_mark_node)
15013 return const0_rtx;
15015 /* Check and prepare argument depending on the instruction code.
15017 Note that a switch statement instead of the sequence of tests
15018 would be incorrect as many of the CODE_FOR values could be
15019 CODE_FOR_nothing and that would yield multiple alternatives
15020 with identical values. We'd never reach here at runtime in
15021 this case. */
15022 if (icode == CODE_FOR_altivec_vsldoi_v4sf
15023 || icode == CODE_FOR_altivec_vsldoi_v2df
15024 || icode == CODE_FOR_altivec_vsldoi_v4si
15025 || icode == CODE_FOR_altivec_vsldoi_v8hi
15026 || icode == CODE_FOR_altivec_vsldoi_v16qi)
15028 /* Only allow 4-bit unsigned literals. */
15029 STRIP_NOPS (arg2);
15030 if (TREE_CODE (arg2) != INTEGER_CST
15031 || TREE_INT_CST_LOW (arg2) & ~0xf)
15033 error ("argument 3 must be a 4-bit unsigned literal");
15034 return CONST0_RTX (tmode);
15037 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
15038 || icode == CODE_FOR_vsx_xxpermdi_v2di
15039 || icode == CODE_FOR_vsx_xxpermdi_v2df_be
15040 || icode == CODE_FOR_vsx_xxpermdi_v2di_be
15041 || icode == CODE_FOR_vsx_xxpermdi_v1ti
15042 || icode == CODE_FOR_vsx_xxpermdi_v4sf
15043 || icode == CODE_FOR_vsx_xxpermdi_v4si
15044 || icode == CODE_FOR_vsx_xxpermdi_v8hi
15045 || icode == CODE_FOR_vsx_xxpermdi_v16qi
15046 || icode == CODE_FOR_vsx_xxsldwi_v16qi
15047 || icode == CODE_FOR_vsx_xxsldwi_v8hi
15048 || icode == CODE_FOR_vsx_xxsldwi_v4si
15049 || icode == CODE_FOR_vsx_xxsldwi_v4sf
15050 || icode == CODE_FOR_vsx_xxsldwi_v2di
15051 || icode == CODE_FOR_vsx_xxsldwi_v2df)
15053 /* Only allow 2-bit unsigned literals. */
15054 STRIP_NOPS (arg2);
15055 if (TREE_CODE (arg2) != INTEGER_CST
15056 || TREE_INT_CST_LOW (arg2) & ~0x3)
15058 error ("argument 3 must be a 2-bit unsigned literal");
15059 return CONST0_RTX (tmode);
15062 else if (icode == CODE_FOR_vsx_set_v2df
15063 || icode == CODE_FOR_vsx_set_v2di
15064 || icode == CODE_FOR_bcdadd
15065 || icode == CODE_FOR_bcdadd_lt
15066 || icode == CODE_FOR_bcdadd_eq
15067 || icode == CODE_FOR_bcdadd_gt
15068 || icode == CODE_FOR_bcdsub
15069 || icode == CODE_FOR_bcdsub_lt
15070 || icode == CODE_FOR_bcdsub_eq
15071 || icode == CODE_FOR_bcdsub_gt)
15073 /* Only allow 1-bit unsigned literals. */
15074 STRIP_NOPS (arg2);
15075 if (TREE_CODE (arg2) != INTEGER_CST
15076 || TREE_INT_CST_LOW (arg2) & ~0x1)
15078 error ("argument 3 must be a 1-bit unsigned literal");
15079 return CONST0_RTX (tmode);
15082 else if (icode == CODE_FOR_dfp_ddedpd_dd
15083 || icode == CODE_FOR_dfp_ddedpd_td)
15085 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
15086 STRIP_NOPS (arg0);
15087 if (TREE_CODE (arg0) != INTEGER_CST
15088 || TREE_INT_CST_LOW (arg2) & ~0x3)
15090 error ("argument 1 must be 0 or 2");
15091 return CONST0_RTX (tmode);
15094 else if (icode == CODE_FOR_dfp_denbcd_dd
15095 || icode == CODE_FOR_dfp_denbcd_td)
15097 /* Only allow 1-bit unsigned literals. */
15098 STRIP_NOPS (arg0);
15099 if (TREE_CODE (arg0) != INTEGER_CST
15100 || TREE_INT_CST_LOW (arg0) & ~0x1)
15102 error ("argument 1 must be a 1-bit unsigned literal");
15103 return CONST0_RTX (tmode);
15106 else if (icode == CODE_FOR_dfp_dscli_dd
15107 || icode == CODE_FOR_dfp_dscli_td
15108 || icode == CODE_FOR_dfp_dscri_dd
15109 || icode == CODE_FOR_dfp_dscri_td)
15111 /* Only allow 6-bit unsigned literals. */
15112 STRIP_NOPS (arg1);
15113 if (TREE_CODE (arg1) != INTEGER_CST
15114 || TREE_INT_CST_LOW (arg1) & ~0x3f)
15116 error ("argument 2 must be a 6-bit unsigned literal");
15117 return CONST0_RTX (tmode);
15120 else if (icode == CODE_FOR_crypto_vshasigmaw
15121 || icode == CODE_FOR_crypto_vshasigmad)
15123 /* Check whether the 2nd and 3rd arguments are integer constants and in
15124 range and prepare arguments. */
15125 STRIP_NOPS (arg1);
15126 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2))
15128 error ("argument 2 must be 0 or 1");
15129 return CONST0_RTX (tmode);
15132 STRIP_NOPS (arg2);
15133 if (TREE_CODE (arg2) != INTEGER_CST
15134 || wi::geu_p (wi::to_wide (arg2), 16))
15136 error ("argument 3 must be in the range 0..15");
15137 return CONST0_RTX (tmode);
15141 if (target == 0
15142 || GET_MODE (target) != tmode
15143 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15144 target = gen_reg_rtx (tmode);
15146 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15147 op0 = copy_to_mode_reg (mode0, op0);
15148 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15149 op1 = copy_to_mode_reg (mode1, op1);
15150 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15151 op2 = copy_to_mode_reg (mode2, op2);
15153 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
15154 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
15155 else
15156 pat = GEN_FCN (icode) (target, op0, op1, op2);
15157 if (! pat)
15158 return 0;
15159 emit_insn (pat);
15161 return target;
15164 /* Expand the lvx builtins. */
15165 static rtx
15166 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
15168 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15169 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15170 tree arg0;
15171 machine_mode tmode, mode0;
15172 rtx pat, op0;
15173 enum insn_code icode;
15175 switch (fcode)
15177 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
15178 icode = CODE_FOR_vector_altivec_load_v16qi;
15179 break;
15180 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
15181 icode = CODE_FOR_vector_altivec_load_v8hi;
15182 break;
15183 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
15184 icode = CODE_FOR_vector_altivec_load_v4si;
15185 break;
15186 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
15187 icode = CODE_FOR_vector_altivec_load_v4sf;
15188 break;
15189 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
15190 icode = CODE_FOR_vector_altivec_load_v2df;
15191 break;
15192 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
15193 icode = CODE_FOR_vector_altivec_load_v2di;
15194 break;
15195 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
15196 icode = CODE_FOR_vector_altivec_load_v1ti;
15197 break;
15198 default:
15199 *expandedp = false;
15200 return NULL_RTX;
15203 *expandedp = true;
15205 arg0 = CALL_EXPR_ARG (exp, 0);
15206 op0 = expand_normal (arg0);
15207 tmode = insn_data[icode].operand[0].mode;
15208 mode0 = insn_data[icode].operand[1].mode;
15210 if (target == 0
15211 || GET_MODE (target) != tmode
15212 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15213 target = gen_reg_rtx (tmode);
15215 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15216 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15218 pat = GEN_FCN (icode) (target, op0);
15219 if (! pat)
15220 return 0;
15221 emit_insn (pat);
15222 return target;
15225 /* Expand the stvx builtins. */
15226 static rtx
15227 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15228 bool *expandedp)
15230 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15231 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15232 tree arg0, arg1;
15233 machine_mode mode0, mode1;
15234 rtx pat, op0, op1;
15235 enum insn_code icode;
15237 switch (fcode)
15239 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
15240 icode = CODE_FOR_vector_altivec_store_v16qi;
15241 break;
15242 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
15243 icode = CODE_FOR_vector_altivec_store_v8hi;
15244 break;
15245 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
15246 icode = CODE_FOR_vector_altivec_store_v4si;
15247 break;
15248 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
15249 icode = CODE_FOR_vector_altivec_store_v4sf;
15250 break;
15251 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
15252 icode = CODE_FOR_vector_altivec_store_v2df;
15253 break;
15254 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
15255 icode = CODE_FOR_vector_altivec_store_v2di;
15256 break;
15257 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
15258 icode = CODE_FOR_vector_altivec_store_v1ti;
15259 break;
15260 default:
15261 *expandedp = false;
15262 return NULL_RTX;
15265 arg0 = CALL_EXPR_ARG (exp, 0);
15266 arg1 = CALL_EXPR_ARG (exp, 1);
15267 op0 = expand_normal (arg0);
15268 op1 = expand_normal (arg1);
15269 mode0 = insn_data[icode].operand[0].mode;
15270 mode1 = insn_data[icode].operand[1].mode;
15272 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15273 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15274 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15275 op1 = copy_to_mode_reg (mode1, op1);
15277 pat = GEN_FCN (icode) (op0, op1);
15278 if (pat)
15279 emit_insn (pat);
15281 *expandedp = true;
15282 return NULL_RTX;
15285 /* Expand the dst builtins. */
15286 static rtx
15287 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
15288 bool *expandedp)
15290 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15291 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15292 tree arg0, arg1, arg2;
15293 machine_mode mode0, mode1;
15294 rtx pat, op0, op1, op2;
15295 const struct builtin_description *d;
15296 size_t i;
15298 *expandedp = false;
15300 /* Handle DST variants. */
15301 d = bdesc_dst;
15302 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15303 if (d->code == fcode)
15305 arg0 = CALL_EXPR_ARG (exp, 0);
15306 arg1 = CALL_EXPR_ARG (exp, 1);
15307 arg2 = CALL_EXPR_ARG (exp, 2);
15308 op0 = expand_normal (arg0);
15309 op1 = expand_normal (arg1);
15310 op2 = expand_normal (arg2);
15311 mode0 = insn_data[d->icode].operand[0].mode;
15312 mode1 = insn_data[d->icode].operand[1].mode;
15314 /* Invalid arguments, bail out before generating bad rtl. */
15315 if (arg0 == error_mark_node
15316 || arg1 == error_mark_node
15317 || arg2 == error_mark_node)
15318 return const0_rtx;
15320 *expandedp = true;
15321 STRIP_NOPS (arg2);
15322 if (TREE_CODE (arg2) != INTEGER_CST
15323 || TREE_INT_CST_LOW (arg2) & ~0x3)
15325 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
15326 return const0_rtx;
15329 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15330 op0 = copy_to_mode_reg (Pmode, op0);
15331 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15332 op1 = copy_to_mode_reg (mode1, op1);
15334 pat = GEN_FCN (d->icode) (op0, op1, op2);
15335 if (pat != 0)
15336 emit_insn (pat);
15338 return NULL_RTX;
15341 return NULL_RTX;
15344 /* Expand vec_init builtin. */
15345 static rtx
15346 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
15348 machine_mode tmode = TYPE_MODE (type);
15349 machine_mode inner_mode = GET_MODE_INNER (tmode);
15350 int i, n_elt = GET_MODE_NUNITS (tmode);
15352 gcc_assert (VECTOR_MODE_P (tmode));
15353 gcc_assert (n_elt == call_expr_nargs (exp));
15355 if (!target || !register_operand (target, tmode))
15356 target = gen_reg_rtx (tmode);
15358 /* If we have a vector compromised of a single element, such as V1TImode, do
15359 the initialization directly. */
15360 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
15362 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
15363 emit_move_insn (target, gen_lowpart (tmode, x));
15365 else
15367 rtvec v = rtvec_alloc (n_elt);
15369 for (i = 0; i < n_elt; ++i)
15371 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
15372 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15375 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
15378 return target;
15381 /* Return the integer constant in ARG. Constrain it to be in the range
15382 of the subparts of VEC_TYPE; issue an error if not. */
15384 static int
15385 get_element_number (tree vec_type, tree arg)
15387 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15389 if (!tree_fits_uhwi_p (arg)
15390 || (elt = tree_to_uhwi (arg), elt > max))
15392 error ("selector must be an integer constant in the range 0..%wi", max);
15393 return 0;
15396 return elt;
15399 /* Expand vec_set builtin. */
15400 static rtx
15401 altivec_expand_vec_set_builtin (tree exp)
15403 machine_mode tmode, mode1;
15404 tree arg0, arg1, arg2;
15405 int elt;
15406 rtx op0, op1;
15408 arg0 = CALL_EXPR_ARG (exp, 0);
15409 arg1 = CALL_EXPR_ARG (exp, 1);
15410 arg2 = CALL_EXPR_ARG (exp, 2);
15412 tmode = TYPE_MODE (TREE_TYPE (arg0));
15413 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15414 gcc_assert (VECTOR_MODE_P (tmode));
15416 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
15417 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
15418 elt = get_element_number (TREE_TYPE (arg0), arg2);
15420 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15421 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15423 op0 = force_reg (tmode, op0);
15424 op1 = force_reg (mode1, op1);
15426 rs6000_expand_vector_set (op0, op1, elt);
15428 return op0;
15431 /* Expand vec_ext builtin. */
15432 static rtx
15433 altivec_expand_vec_ext_builtin (tree exp, rtx target)
15435 machine_mode tmode, mode0;
15436 tree arg0, arg1;
15437 rtx op0;
15438 rtx op1;
15440 arg0 = CALL_EXPR_ARG (exp, 0);
15441 arg1 = CALL_EXPR_ARG (exp, 1);
15443 op0 = expand_normal (arg0);
15444 op1 = expand_normal (arg1);
15446 /* Call get_element_number to validate arg1 if it is a constant. */
15447 if (TREE_CODE (arg1) == INTEGER_CST)
15448 (void) get_element_number (TREE_TYPE (arg0), arg1);
15450 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15451 mode0 = TYPE_MODE (TREE_TYPE (arg0));
15452 gcc_assert (VECTOR_MODE_P (mode0));
15454 op0 = force_reg (mode0, op0);
15456 if (optimize || !target || !register_operand (target, tmode))
15457 target = gen_reg_rtx (tmode);
15459 rs6000_expand_vector_extract (target, op0, op1);
15461 return target;
15464 /* Expand the builtin in EXP and store the result in TARGET. Store
15465 true in *EXPANDEDP if we found a builtin to expand. */
15466 static rtx
15467 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
15469 const struct builtin_description *d;
15470 size_t i;
15471 enum insn_code icode;
15472 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15473 tree arg0, arg1, arg2;
15474 rtx op0, pat;
15475 machine_mode tmode, mode0;
15476 enum rs6000_builtins fcode
15477 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15479 if (rs6000_overloaded_builtin_p (fcode))
15481 *expandedp = true;
15482 error ("unresolved overload for Altivec builtin %qF", fndecl);
15484 /* Given it is invalid, just generate a normal call. */
15485 return expand_call (exp, target, false);
15488 target = altivec_expand_ld_builtin (exp, target, expandedp);
15489 if (*expandedp)
15490 return target;
15492 target = altivec_expand_st_builtin (exp, target, expandedp);
15493 if (*expandedp)
15494 return target;
15496 target = altivec_expand_dst_builtin (exp, target, expandedp);
15497 if (*expandedp)
15498 return target;
15500 *expandedp = true;
15502 switch (fcode)
15504 case ALTIVEC_BUILTIN_STVX_V2DF:
15505 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp);
15506 case ALTIVEC_BUILTIN_STVX_V2DI:
15507 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp);
15508 case ALTIVEC_BUILTIN_STVX_V4SF:
15509 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp);
15510 case ALTIVEC_BUILTIN_STVX:
15511 case ALTIVEC_BUILTIN_STVX_V4SI:
15512 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp);
15513 case ALTIVEC_BUILTIN_STVX_V8HI:
15514 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp);
15515 case ALTIVEC_BUILTIN_STVX_V16QI:
15516 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp);
15517 case ALTIVEC_BUILTIN_STVEBX:
15518 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
15519 case ALTIVEC_BUILTIN_STVEHX:
15520 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
15521 case ALTIVEC_BUILTIN_STVEWX:
15522 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
15523 case ALTIVEC_BUILTIN_STVXL_V2DF:
15524 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
15525 case ALTIVEC_BUILTIN_STVXL_V2DI:
15526 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
15527 case ALTIVEC_BUILTIN_STVXL_V4SF:
15528 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
15529 case ALTIVEC_BUILTIN_STVXL:
15530 case ALTIVEC_BUILTIN_STVXL_V4SI:
15531 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
15532 case ALTIVEC_BUILTIN_STVXL_V8HI:
15533 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
15534 case ALTIVEC_BUILTIN_STVXL_V16QI:
15535 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
15537 case ALTIVEC_BUILTIN_STVLX:
15538 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
15539 case ALTIVEC_BUILTIN_STVLXL:
15540 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
15541 case ALTIVEC_BUILTIN_STVRX:
15542 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
15543 case ALTIVEC_BUILTIN_STVRXL:
15544 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
15546 case P9V_BUILTIN_STXVL:
15547 return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp);
15549 case P9V_BUILTIN_XST_LEN_R:
15550 return altivec_expand_stxvl_builtin (CODE_FOR_xst_len_r, exp);
15552 case VSX_BUILTIN_STXVD2X_V1TI:
15553 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
15554 case VSX_BUILTIN_STXVD2X_V2DF:
15555 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
15556 case VSX_BUILTIN_STXVD2X_V2DI:
15557 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
15558 case VSX_BUILTIN_STXVW4X_V4SF:
15559 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
15560 case VSX_BUILTIN_STXVW4X_V4SI:
15561 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
15562 case VSX_BUILTIN_STXVW4X_V8HI:
15563 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
15564 case VSX_BUILTIN_STXVW4X_V16QI:
15565 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
15567 /* For the following on big endian, it's ok to use any appropriate
15568 unaligned-supporting store, so use a generic expander. For
15569 little-endian, the exact element-reversing instruction must
15570 be used. */
15571 case VSX_BUILTIN_ST_ELEMREV_V2DF:
15573 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df
15574 : CODE_FOR_vsx_st_elemrev_v2df);
15575 return altivec_expand_stv_builtin (code, exp);
15577 case VSX_BUILTIN_ST_ELEMREV_V2DI:
15579 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di
15580 : CODE_FOR_vsx_st_elemrev_v2di);
15581 return altivec_expand_stv_builtin (code, exp);
15583 case VSX_BUILTIN_ST_ELEMREV_V4SF:
15585 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf
15586 : CODE_FOR_vsx_st_elemrev_v4sf);
15587 return altivec_expand_stv_builtin (code, exp);
15589 case VSX_BUILTIN_ST_ELEMREV_V4SI:
15591 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si
15592 : CODE_FOR_vsx_st_elemrev_v4si);
15593 return altivec_expand_stv_builtin (code, exp);
15595 case VSX_BUILTIN_ST_ELEMREV_V8HI:
15597 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi
15598 : CODE_FOR_vsx_st_elemrev_v8hi);
15599 return altivec_expand_stv_builtin (code, exp);
15601 case VSX_BUILTIN_ST_ELEMREV_V16QI:
15603 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi
15604 : CODE_FOR_vsx_st_elemrev_v16qi);
15605 return altivec_expand_stv_builtin (code, exp);
15608 case ALTIVEC_BUILTIN_MFVSCR:
15609 icode = CODE_FOR_altivec_mfvscr;
15610 tmode = insn_data[icode].operand[0].mode;
15612 if (target == 0
15613 || GET_MODE (target) != tmode
15614 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15615 target = gen_reg_rtx (tmode);
15617 pat = GEN_FCN (icode) (target);
15618 if (! pat)
15619 return 0;
15620 emit_insn (pat);
15621 return target;
15623 case ALTIVEC_BUILTIN_MTVSCR:
15624 icode = CODE_FOR_altivec_mtvscr;
15625 arg0 = CALL_EXPR_ARG (exp, 0);
15626 op0 = expand_normal (arg0);
15627 mode0 = insn_data[icode].operand[0].mode;
15629 /* If we got invalid arguments bail out before generating bad rtl. */
15630 if (arg0 == error_mark_node)
15631 return const0_rtx;
15633 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15634 op0 = copy_to_mode_reg (mode0, op0);
15636 pat = GEN_FCN (icode) (op0);
15637 if (pat)
15638 emit_insn (pat);
15639 return NULL_RTX;
15641 case ALTIVEC_BUILTIN_DSSALL:
15642 emit_insn (gen_altivec_dssall ());
15643 return NULL_RTX;
15645 case ALTIVEC_BUILTIN_DSS:
15646 icode = CODE_FOR_altivec_dss;
15647 arg0 = CALL_EXPR_ARG (exp, 0);
15648 STRIP_NOPS (arg0);
15649 op0 = expand_normal (arg0);
15650 mode0 = insn_data[icode].operand[0].mode;
15652 /* If we got invalid arguments bail out before generating bad rtl. */
15653 if (arg0 == error_mark_node)
15654 return const0_rtx;
15656 if (TREE_CODE (arg0) != INTEGER_CST
15657 || TREE_INT_CST_LOW (arg0) & ~0x3)
15659 error ("argument to %qs must be a 2-bit unsigned literal", "dss");
15660 return const0_rtx;
15663 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15664 op0 = copy_to_mode_reg (mode0, op0);
15666 emit_insn (gen_altivec_dss (op0));
15667 return NULL_RTX;
15669 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
15670 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
15671 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
15672 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
15673 case VSX_BUILTIN_VEC_INIT_V2DF:
15674 case VSX_BUILTIN_VEC_INIT_V2DI:
15675 case VSX_BUILTIN_VEC_INIT_V1TI:
15676 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
15678 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
15679 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
15680 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
15681 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
15682 case VSX_BUILTIN_VEC_SET_V2DF:
15683 case VSX_BUILTIN_VEC_SET_V2DI:
15684 case VSX_BUILTIN_VEC_SET_V1TI:
15685 return altivec_expand_vec_set_builtin (exp);
15687 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
15688 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
15689 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
15690 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
15691 case VSX_BUILTIN_VEC_EXT_V2DF:
15692 case VSX_BUILTIN_VEC_EXT_V2DI:
15693 case VSX_BUILTIN_VEC_EXT_V1TI:
15694 return altivec_expand_vec_ext_builtin (exp, target);
15696 case P9V_BUILTIN_VEXTRACT4B:
15697 case P9V_BUILTIN_VEC_VEXTRACT4B:
15698 arg1 = CALL_EXPR_ARG (exp, 1);
15699 STRIP_NOPS (arg1);
15701 /* Generate a normal call if it is invalid. */
15702 if (arg1 == error_mark_node)
15703 return expand_call (exp, target, false);
15705 if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12)
15707 error ("second argument to %qs must be 0..12", "vec_vextract4b");
15708 return expand_call (exp, target, false);
15710 break;
15712 case P9V_BUILTIN_VINSERT4B:
15713 case P9V_BUILTIN_VINSERT4B_DI:
15714 case P9V_BUILTIN_VEC_VINSERT4B:
15715 arg2 = CALL_EXPR_ARG (exp, 2);
15716 STRIP_NOPS (arg2);
15718 /* Generate a normal call if it is invalid. */
15719 if (arg2 == error_mark_node)
15720 return expand_call (exp, target, false);
15722 if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12)
15724 error ("third argument to %qs must be 0..12", "vec_vinsert4b");
15725 return expand_call (exp, target, false);
15727 break;
15729 default:
15730 break;
15731 /* Fall through. */
15734 /* Expand abs* operations. */
15735 d = bdesc_abs;
15736 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15737 if (d->code == fcode)
15738 return altivec_expand_abs_builtin (d->icode, exp, target);
15740 /* Expand the AltiVec predicates. */
15741 d = bdesc_altivec_preds;
15742 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15743 if (d->code == fcode)
15744 return altivec_expand_predicate_builtin (d->icode, exp, target);
15746 /* LV* are funky. We initialized them differently. */
15747 switch (fcode)
15749 case ALTIVEC_BUILTIN_LVSL:
15750 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
15751 exp, target, false);
15752 case ALTIVEC_BUILTIN_LVSR:
15753 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
15754 exp, target, false);
15755 case ALTIVEC_BUILTIN_LVEBX:
15756 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
15757 exp, target, false);
15758 case ALTIVEC_BUILTIN_LVEHX:
15759 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
15760 exp, target, false);
15761 case ALTIVEC_BUILTIN_LVEWX:
15762 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
15763 exp, target, false);
15764 case ALTIVEC_BUILTIN_LVXL_V2DF:
15765 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
15766 exp, target, false);
15767 case ALTIVEC_BUILTIN_LVXL_V2DI:
15768 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
15769 exp, target, false);
15770 case ALTIVEC_BUILTIN_LVXL_V4SF:
15771 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
15772 exp, target, false);
15773 case ALTIVEC_BUILTIN_LVXL:
15774 case ALTIVEC_BUILTIN_LVXL_V4SI:
15775 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
15776 exp, target, false);
15777 case ALTIVEC_BUILTIN_LVXL_V8HI:
15778 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
15779 exp, target, false);
15780 case ALTIVEC_BUILTIN_LVXL_V16QI:
15781 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
15782 exp, target, false);
15783 case ALTIVEC_BUILTIN_LVX_V2DF:
15784 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op,
15785 exp, target, false);
15786 case ALTIVEC_BUILTIN_LVX_V2DI:
15787 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op,
15788 exp, target, false);
15789 case ALTIVEC_BUILTIN_LVX_V4SF:
15790 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op,
15791 exp, target, false);
15792 case ALTIVEC_BUILTIN_LVX:
15793 case ALTIVEC_BUILTIN_LVX_V4SI:
15794 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op,
15795 exp, target, false);
15796 case ALTIVEC_BUILTIN_LVX_V8HI:
15797 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op,
15798 exp, target, false);
15799 case ALTIVEC_BUILTIN_LVX_V16QI:
15800 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op,
15801 exp, target, false);
15802 case ALTIVEC_BUILTIN_LVLX:
15803 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
15804 exp, target, true);
15805 case ALTIVEC_BUILTIN_LVLXL:
15806 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
15807 exp, target, true);
15808 case ALTIVEC_BUILTIN_LVRX:
15809 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
15810 exp, target, true);
15811 case ALTIVEC_BUILTIN_LVRXL:
15812 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
15813 exp, target, true);
15814 case VSX_BUILTIN_LXVD2X_V1TI:
15815 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
15816 exp, target, false);
15817 case VSX_BUILTIN_LXVD2X_V2DF:
15818 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
15819 exp, target, false);
15820 case VSX_BUILTIN_LXVD2X_V2DI:
15821 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
15822 exp, target, false);
15823 case VSX_BUILTIN_LXVW4X_V4SF:
15824 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
15825 exp, target, false);
15826 case VSX_BUILTIN_LXVW4X_V4SI:
15827 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
15828 exp, target, false);
15829 case VSX_BUILTIN_LXVW4X_V8HI:
15830 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
15831 exp, target, false);
15832 case VSX_BUILTIN_LXVW4X_V16QI:
15833 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
15834 exp, target, false);
15835 /* For the following on big endian, it's ok to use any appropriate
15836 unaligned-supporting load, so use a generic expander. For
15837 little-endian, the exact element-reversing instruction must
15838 be used. */
15839 case VSX_BUILTIN_LD_ELEMREV_V2DF:
15841 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df
15842 : CODE_FOR_vsx_ld_elemrev_v2df);
15843 return altivec_expand_lv_builtin (code, exp, target, false);
15845 case VSX_BUILTIN_LD_ELEMREV_V2DI:
15847 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di
15848 : CODE_FOR_vsx_ld_elemrev_v2di);
15849 return altivec_expand_lv_builtin (code, exp, target, false);
15851 case VSX_BUILTIN_LD_ELEMREV_V4SF:
15853 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf
15854 : CODE_FOR_vsx_ld_elemrev_v4sf);
15855 return altivec_expand_lv_builtin (code, exp, target, false);
15857 case VSX_BUILTIN_LD_ELEMREV_V4SI:
15859 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si
15860 : CODE_FOR_vsx_ld_elemrev_v4si);
15861 return altivec_expand_lv_builtin (code, exp, target, false);
15863 case VSX_BUILTIN_LD_ELEMREV_V8HI:
15865 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi
15866 : CODE_FOR_vsx_ld_elemrev_v8hi);
15867 return altivec_expand_lv_builtin (code, exp, target, false);
15869 case VSX_BUILTIN_LD_ELEMREV_V16QI:
15871 enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi
15872 : CODE_FOR_vsx_ld_elemrev_v16qi);
15873 return altivec_expand_lv_builtin (code, exp, target, false);
15875 break;
15876 default:
15877 break;
15878 /* Fall through. */
15881 *expandedp = false;
15882 return NULL_RTX;
15885 /* Expand the builtin in EXP and store the result in TARGET. Store
15886 true in *EXPANDEDP if we found a builtin to expand. */
15887 static rtx
15888 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
15890 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15891 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
15892 const struct builtin_description *d;
15893 size_t i;
15895 *expandedp = true;
15897 switch (fcode)
15899 case PAIRED_BUILTIN_STX:
15900 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
15901 case PAIRED_BUILTIN_LX:
15902 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
15903 default:
15904 break;
15905 /* Fall through. */
15908 /* Expand the paired predicates. */
15909 d = bdesc_paired_preds;
15910 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
15911 if (d->code == fcode)
15912 return paired_expand_predicate_builtin (d->icode, exp, target);
15914 *expandedp = false;
15915 return NULL_RTX;
15918 static rtx
15919 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
15921 rtx pat, scratch, tmp;
15922 tree form = CALL_EXPR_ARG (exp, 0);
15923 tree arg0 = CALL_EXPR_ARG (exp, 1);
15924 tree arg1 = CALL_EXPR_ARG (exp, 2);
15925 rtx op0 = expand_normal (arg0);
15926 rtx op1 = expand_normal (arg1);
15927 machine_mode mode0 = insn_data[icode].operand[1].mode;
15928 machine_mode mode1 = insn_data[icode].operand[2].mode;
15929 int form_int;
15930 enum rtx_code code;
15932 if (TREE_CODE (form) != INTEGER_CST)
15934 error ("argument 1 of %s must be a constant",
15935 "__builtin_paired_predicate");
15936 return const0_rtx;
15938 else
15939 form_int = TREE_INT_CST_LOW (form);
15941 gcc_assert (mode0 == mode1);
15943 if (arg0 == error_mark_node || arg1 == error_mark_node)
15944 return const0_rtx;
15946 if (target == 0
15947 || GET_MODE (target) != SImode
15948 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
15949 target = gen_reg_rtx (SImode);
15950 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15951 op0 = copy_to_mode_reg (mode0, op0);
15952 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15953 op1 = copy_to_mode_reg (mode1, op1);
15955 scratch = gen_reg_rtx (CCFPmode);
15957 pat = GEN_FCN (icode) (scratch, op0, op1);
15958 if (!pat)
15959 return const0_rtx;
15961 emit_insn (pat);
15963 switch (form_int)
15965 /* LT bit. */
15966 case 0:
15967 code = LT;
15968 break;
15969 /* GT bit. */
15970 case 1:
15971 code = GT;
15972 break;
15973 /* EQ bit. */
15974 case 2:
15975 code = EQ;
15976 break;
15977 /* UN bit. */
15978 case 3:
15979 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
15980 return target;
15981 default:
15982 error ("argument 1 of %qs is out of range",
15983 "__builtin_paired_predicate");
15984 return const0_rtx;
15987 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
15988 emit_move_insn (target, tmp);
15989 return target;
15992 /* Raise an error message for a builtin function that is called without the
15993 appropriate target options being set. */
15995 static void
15996 rs6000_invalid_builtin (enum rs6000_builtins fncode)
15998 size_t uns_fncode = (size_t) fncode;
15999 const char *name = rs6000_builtin_info[uns_fncode].name;
16000 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
16002 gcc_assert (name != NULL);
16003 if ((fnmask & RS6000_BTM_CELL) != 0)
16004 error ("builtin function %qs is only valid for the cell processor", name);
16005 else if ((fnmask & RS6000_BTM_VSX) != 0)
16006 error ("builtin function %qs requires the %qs option", name, "-mvsx");
16007 else if ((fnmask & RS6000_BTM_HTM) != 0)
16008 error ("builtin function %qs requires the %qs option", name, "-mhtm");
16009 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
16010 error ("builtin function %qs requires the %qs option", name, "-maltivec");
16011 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
16012 error ("builtin function %qs requires the %qs option", name, "-mpaired");
16013 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16014 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
16015 error ("builtin function %qs requires the %qs and %qs options",
16016 name, "-mhard-dfp", "-mpower8-vector");
16017 else if ((fnmask & RS6000_BTM_DFP) != 0)
16018 error ("builtin function %qs requires the %qs option", name, "-mhard-dfp");
16019 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
16020 error ("builtin function %qs requires the %qs option", name,
16021 "-mpower8-vector");
16022 else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16023 == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT))
16024 error ("builtin function %qs requires the %qs and %qs options",
16025 name, "-mcpu=power9", "-m64");
16026 else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0)
16027 error ("builtin function %qs requires the %qs option", name,
16028 "-mcpu=power9");
16029 else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16030 == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT))
16031 error ("builtin function %qs requires the %qs and %qs options",
16032 name, "-mcpu=power9", "-m64");
16033 else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC)
16034 error ("builtin function %qs requires the %qs option", name,
16035 "-mcpu=power9");
16036 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16037 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
16038 error ("builtin function %qs requires the %qs and %qs options",
16039 name, "-mhard-float", "-mlong-double-128");
16040 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
16041 error ("builtin function %qs requires the %qs option", name,
16042 "-mhard-float");
16043 else if ((fnmask & RS6000_BTM_FLOAT128_HW) != 0)
16044 error ("builtin function %qs requires ISA 3.0 IEEE 128-bit floating point",
16045 name);
16046 else if ((fnmask & RS6000_BTM_FLOAT128) != 0)
16047 error ("builtin function %qs requires the %qs option", name, "-mfloat128");
16048 else
16049 error ("builtin function %qs is not supported with the current options",
16050 name);
16053 /* Target hook for early folding of built-ins, shamelessly stolen
16054 from ia64.c. */
16056 static tree
16057 rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED,
16058 int n_args ATTRIBUTE_UNUSED,
16059 tree *args ATTRIBUTE_UNUSED,
16060 bool ignore ATTRIBUTE_UNUSED)
16062 #ifdef SUBTARGET_FOLD_BUILTIN
16063 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
16064 #else
16065 return NULL_TREE;
16066 #endif
16069 /* Helper function to sort out which built-ins may be valid without having
16070 a LHS. */
16071 static bool
16072 rs6000_builtin_valid_without_lhs (enum rs6000_builtins fn_code)
16074 switch (fn_code)
16076 case ALTIVEC_BUILTIN_STVX_V16QI:
16077 case ALTIVEC_BUILTIN_STVX_V8HI:
16078 case ALTIVEC_BUILTIN_STVX_V4SI:
16079 case ALTIVEC_BUILTIN_STVX_V4SF:
16080 case ALTIVEC_BUILTIN_STVX_V2DI:
16081 case ALTIVEC_BUILTIN_STVX_V2DF:
16082 return true;
16083 default:
16084 return false;
16088 /* Helper function to handle the gimple folding of a vector compare
16089 operation. This sets up true/false vectors, and uses the
16090 VEC_COND_EXPR operation.
16091 CODE indicates which comparison is to be made. (EQ, GT, ...).
16092 TYPE indicates the type of the result. */
16093 static tree
16094 fold_build_vec_cmp (tree_code code, tree type,
16095 tree arg0, tree arg1)
16097 tree cmp_type = build_same_sized_truth_vector_type (type);
16098 tree zero_vec = build_zero_cst (type);
16099 tree minus_one_vec = build_minus_one_cst (type);
16100 tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
16101 return fold_build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
16104 /* Helper function to handle the in-between steps for the
16105 vector compare built-ins. */
16106 static void
16107 fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt)
16109 tree arg0 = gimple_call_arg (stmt, 0);
16110 tree arg1 = gimple_call_arg (stmt, 1);
16111 tree lhs = gimple_call_lhs (stmt);
16112 tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1);
16113 gimple *g = gimple_build_assign (lhs, cmp);
16114 gimple_set_location (g, gimple_location (stmt));
16115 gsi_replace (gsi, g, true);
16118 /* Fold a machine-dependent built-in in GIMPLE. (For folding into
16119 a constant, use rs6000_fold_builtin.) */
16121 bool
16122 rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
16124 gimple *stmt = gsi_stmt (*gsi);
16125 tree fndecl = gimple_call_fndecl (stmt);
16126 gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD);
16127 enum rs6000_builtins fn_code
16128 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
16129 tree arg0, arg1, lhs, temp;
16130 gimple *g;
16132 size_t uns_fncode = (size_t) fn_code;
16133 enum insn_code icode = rs6000_builtin_info[uns_fncode].icode;
16134 const char *fn_name1 = rs6000_builtin_info[uns_fncode].name;
16135 const char *fn_name2 = (icode != CODE_FOR_nothing)
16136 ? get_insn_name ((int) icode)
16137 : "nothing";
16139 if (TARGET_DEBUG_BUILTIN)
16140 fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n",
16141 fn_code, fn_name1, fn_name2);
16143 if (!rs6000_fold_gimple)
16144 return false;
16146 /* Prevent gimple folding for code that does not have a LHS, unless it is
16147 allowed per the rs6000_builtin_valid_without_lhs helper function. */
16148 if (!gimple_call_lhs (stmt) && !rs6000_builtin_valid_without_lhs (fn_code))
16149 return false;
16151 /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */
16152 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fncode].mask;
16153 bool func_valid_p = (rs6000_builtin_mask & mask) == mask;
16154 if (!func_valid_p)
16155 return false;
16157 switch (fn_code)
16159 /* Flavors of vec_add. We deliberately don't expand
16160 P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to
16161 TImode, resulting in much poorer code generation. */
16162 case ALTIVEC_BUILTIN_VADDUBM:
16163 case ALTIVEC_BUILTIN_VADDUHM:
16164 case ALTIVEC_BUILTIN_VADDUWM:
16165 case P8V_BUILTIN_VADDUDM:
16166 case ALTIVEC_BUILTIN_VADDFP:
16167 case VSX_BUILTIN_XVADDDP:
16168 arg0 = gimple_call_arg (stmt, 0);
16169 arg1 = gimple_call_arg (stmt, 1);
16170 lhs = gimple_call_lhs (stmt);
16171 g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1);
16172 gimple_set_location (g, gimple_location (stmt));
16173 gsi_replace (gsi, g, true);
16174 return true;
16175 /* Flavors of vec_sub. We deliberately don't expand
16176 P8V_BUILTIN_VSUBUQM. */
16177 case ALTIVEC_BUILTIN_VSUBUBM:
16178 case ALTIVEC_BUILTIN_VSUBUHM:
16179 case ALTIVEC_BUILTIN_VSUBUWM:
16180 case P8V_BUILTIN_VSUBUDM:
16181 case ALTIVEC_BUILTIN_VSUBFP:
16182 case VSX_BUILTIN_XVSUBDP:
16183 arg0 = gimple_call_arg (stmt, 0);
16184 arg1 = gimple_call_arg (stmt, 1);
16185 lhs = gimple_call_lhs (stmt);
16186 g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1);
16187 gimple_set_location (g, gimple_location (stmt));
16188 gsi_replace (gsi, g, true);
16189 return true;
16190 case VSX_BUILTIN_XVMULSP:
16191 case VSX_BUILTIN_XVMULDP:
16192 arg0 = gimple_call_arg (stmt, 0);
16193 arg1 = gimple_call_arg (stmt, 1);
16194 lhs = gimple_call_lhs (stmt);
16195 g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1);
16196 gimple_set_location (g, gimple_location (stmt));
16197 gsi_replace (gsi, g, true);
16198 return true;
16199 /* Even element flavors of vec_mul (signed). */
16200 case ALTIVEC_BUILTIN_VMULESB:
16201 case ALTIVEC_BUILTIN_VMULESH:
16202 case ALTIVEC_BUILTIN_VMULESW:
16203 /* Even element flavors of vec_mul (unsigned). */
16204 case ALTIVEC_BUILTIN_VMULEUB:
16205 case ALTIVEC_BUILTIN_VMULEUH:
16206 case ALTIVEC_BUILTIN_VMULEUW:
16207 arg0 = gimple_call_arg (stmt, 0);
16208 arg1 = gimple_call_arg (stmt, 1);
16209 lhs = gimple_call_lhs (stmt);
16210 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1);
16211 gimple_set_location (g, gimple_location (stmt));
16212 gsi_replace (gsi, g, true);
16213 return true;
16214 /* Odd element flavors of vec_mul (signed). */
16215 case ALTIVEC_BUILTIN_VMULOSB:
16216 case ALTIVEC_BUILTIN_VMULOSH:
16217 case ALTIVEC_BUILTIN_VMULOSW:
16218 /* Odd element flavors of vec_mul (unsigned). */
16219 case ALTIVEC_BUILTIN_VMULOUB:
16220 case ALTIVEC_BUILTIN_VMULOUH:
16221 case ALTIVEC_BUILTIN_VMULOUW:
16222 arg0 = gimple_call_arg (stmt, 0);
16223 arg1 = gimple_call_arg (stmt, 1);
16224 lhs = gimple_call_lhs (stmt);
16225 g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1);
16226 gimple_set_location (g, gimple_location (stmt));
16227 gsi_replace (gsi, g, true);
16228 return true;
16229 /* Flavors of vec_div (Integer). */
16230 case VSX_BUILTIN_DIV_V2DI:
16231 case VSX_BUILTIN_UDIV_V2DI:
16232 arg0 = gimple_call_arg (stmt, 0);
16233 arg1 = gimple_call_arg (stmt, 1);
16234 lhs = gimple_call_lhs (stmt);
16235 g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1);
16236 gimple_set_location (g, gimple_location (stmt));
16237 gsi_replace (gsi, g, true);
16238 return true;
16239 /* Flavors of vec_div (Float). */
16240 case VSX_BUILTIN_XVDIVSP:
16241 case VSX_BUILTIN_XVDIVDP:
16242 arg0 = gimple_call_arg (stmt, 0);
16243 arg1 = gimple_call_arg (stmt, 1);
16244 lhs = gimple_call_lhs (stmt);
16245 g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1);
16246 gimple_set_location (g, gimple_location (stmt));
16247 gsi_replace (gsi, g, true);
16248 return true;
16249 /* Flavors of vec_and. */
16250 case ALTIVEC_BUILTIN_VAND:
16251 arg0 = gimple_call_arg (stmt, 0);
16252 arg1 = gimple_call_arg (stmt, 1);
16253 lhs = gimple_call_lhs (stmt);
16254 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1);
16255 gimple_set_location (g, gimple_location (stmt));
16256 gsi_replace (gsi, g, true);
16257 return true;
16258 /* Flavors of vec_andc. */
16259 case ALTIVEC_BUILTIN_VANDC:
16260 arg0 = gimple_call_arg (stmt, 0);
16261 arg1 = gimple_call_arg (stmt, 1);
16262 lhs = gimple_call_lhs (stmt);
16263 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16264 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
16265 gimple_set_location (g, gimple_location (stmt));
16266 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16267 g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp);
16268 gimple_set_location (g, gimple_location (stmt));
16269 gsi_replace (gsi, g, true);
16270 return true;
16271 /* Flavors of vec_nand. */
16272 case P8V_BUILTIN_VEC_NAND:
16273 case P8V_BUILTIN_NAND_V16QI:
16274 case P8V_BUILTIN_NAND_V8HI:
16275 case P8V_BUILTIN_NAND_V4SI:
16276 case P8V_BUILTIN_NAND_V4SF:
16277 case P8V_BUILTIN_NAND_V2DF:
16278 case P8V_BUILTIN_NAND_V2DI:
16279 arg0 = gimple_call_arg (stmt, 0);
16280 arg1 = gimple_call_arg (stmt, 1);
16281 lhs = gimple_call_lhs (stmt);
16282 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16283 g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
16284 gimple_set_location (g, gimple_location (stmt));
16285 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16286 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16287 gimple_set_location (g, gimple_location (stmt));
16288 gsi_replace (gsi, g, true);
16289 return true;
16290 /* Flavors of vec_or. */
16291 case ALTIVEC_BUILTIN_VOR:
16292 arg0 = gimple_call_arg (stmt, 0);
16293 arg1 = gimple_call_arg (stmt, 1);
16294 lhs = gimple_call_lhs (stmt);
16295 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1);
16296 gimple_set_location (g, gimple_location (stmt));
16297 gsi_replace (gsi, g, true);
16298 return true;
16299 /* flavors of vec_orc. */
16300 case P8V_BUILTIN_ORC_V16QI:
16301 case P8V_BUILTIN_ORC_V8HI:
16302 case P8V_BUILTIN_ORC_V4SI:
16303 case P8V_BUILTIN_ORC_V4SF:
16304 case P8V_BUILTIN_ORC_V2DF:
16305 case P8V_BUILTIN_ORC_V2DI:
16306 arg0 = gimple_call_arg (stmt, 0);
16307 arg1 = gimple_call_arg (stmt, 1);
16308 lhs = gimple_call_lhs (stmt);
16309 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16310 g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
16311 gimple_set_location (g, gimple_location (stmt));
16312 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16313 g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp);
16314 gimple_set_location (g, gimple_location (stmt));
16315 gsi_replace (gsi, g, true);
16316 return true;
16317 /* Flavors of vec_xor. */
16318 case ALTIVEC_BUILTIN_VXOR:
16319 arg0 = gimple_call_arg (stmt, 0);
16320 arg1 = gimple_call_arg (stmt, 1);
16321 lhs = gimple_call_lhs (stmt);
16322 g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1);
16323 gimple_set_location (g, gimple_location (stmt));
16324 gsi_replace (gsi, g, true);
16325 return true;
16326 /* Flavors of vec_nor. */
16327 case ALTIVEC_BUILTIN_VNOR:
16328 arg0 = gimple_call_arg (stmt, 0);
16329 arg1 = gimple_call_arg (stmt, 1);
16330 lhs = gimple_call_lhs (stmt);
16331 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16332 g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
16333 gimple_set_location (g, gimple_location (stmt));
16334 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16335 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16336 gimple_set_location (g, gimple_location (stmt));
16337 gsi_replace (gsi, g, true);
16338 return true;
16339 /* flavors of vec_abs. */
16340 case ALTIVEC_BUILTIN_ABS_V16QI:
16341 case ALTIVEC_BUILTIN_ABS_V8HI:
16342 case ALTIVEC_BUILTIN_ABS_V4SI:
16343 case ALTIVEC_BUILTIN_ABS_V4SF:
16344 case P8V_BUILTIN_ABS_V2DI:
16345 case VSX_BUILTIN_XVABSDP:
16346 arg0 = gimple_call_arg (stmt, 0);
16347 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
16348 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
16349 return false;
16350 lhs = gimple_call_lhs (stmt);
16351 g = gimple_build_assign (lhs, ABS_EXPR, arg0);
16352 gimple_set_location (g, gimple_location (stmt));
16353 gsi_replace (gsi, g, true);
16354 return true;
16355 /* flavors of vec_min. */
16356 case VSX_BUILTIN_XVMINDP:
16357 case P8V_BUILTIN_VMINSD:
16358 case P8V_BUILTIN_VMINUD:
16359 case ALTIVEC_BUILTIN_VMINSB:
16360 case ALTIVEC_BUILTIN_VMINSH:
16361 case ALTIVEC_BUILTIN_VMINSW:
16362 case ALTIVEC_BUILTIN_VMINUB:
16363 case ALTIVEC_BUILTIN_VMINUH:
16364 case ALTIVEC_BUILTIN_VMINUW:
16365 case ALTIVEC_BUILTIN_VMINFP:
16366 arg0 = gimple_call_arg (stmt, 0);
16367 arg1 = gimple_call_arg (stmt, 1);
16368 lhs = gimple_call_lhs (stmt);
16369 g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1);
16370 gimple_set_location (g, gimple_location (stmt));
16371 gsi_replace (gsi, g, true);
16372 return true;
16373 /* flavors of vec_max. */
16374 case VSX_BUILTIN_XVMAXDP:
16375 case P8V_BUILTIN_VMAXSD:
16376 case P8V_BUILTIN_VMAXUD:
16377 case ALTIVEC_BUILTIN_VMAXSB:
16378 case ALTIVEC_BUILTIN_VMAXSH:
16379 case ALTIVEC_BUILTIN_VMAXSW:
16380 case ALTIVEC_BUILTIN_VMAXUB:
16381 case ALTIVEC_BUILTIN_VMAXUH:
16382 case ALTIVEC_BUILTIN_VMAXUW:
16383 case ALTIVEC_BUILTIN_VMAXFP:
16384 arg0 = gimple_call_arg (stmt, 0);
16385 arg1 = gimple_call_arg (stmt, 1);
16386 lhs = gimple_call_lhs (stmt);
16387 g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1);
16388 gimple_set_location (g, gimple_location (stmt));
16389 gsi_replace (gsi, g, true);
16390 return true;
16391 /* Flavors of vec_eqv. */
16392 case P8V_BUILTIN_EQV_V16QI:
16393 case P8V_BUILTIN_EQV_V8HI:
16394 case P8V_BUILTIN_EQV_V4SI:
16395 case P8V_BUILTIN_EQV_V4SF:
16396 case P8V_BUILTIN_EQV_V2DF:
16397 case P8V_BUILTIN_EQV_V2DI:
16398 arg0 = gimple_call_arg (stmt, 0);
16399 arg1 = gimple_call_arg (stmt, 1);
16400 lhs = gimple_call_lhs (stmt);
16401 temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
16402 g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
16403 gimple_set_location (g, gimple_location (stmt));
16404 gsi_insert_before (gsi, g, GSI_SAME_STMT);
16405 g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp);
16406 gimple_set_location (g, gimple_location (stmt));
16407 gsi_replace (gsi, g, true);
16408 return true;
16409 /* Flavors of vec_rotate_left. */
16410 case ALTIVEC_BUILTIN_VRLB:
16411 case ALTIVEC_BUILTIN_VRLH:
16412 case ALTIVEC_BUILTIN_VRLW:
16413 case P8V_BUILTIN_VRLD:
16414 arg0 = gimple_call_arg (stmt, 0);
16415 arg1 = gimple_call_arg (stmt, 1);
16416 lhs = gimple_call_lhs (stmt);
16417 g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1);
16418 gimple_set_location (g, gimple_location (stmt));
16419 gsi_replace (gsi, g, true);
16420 return true;
16421 /* Flavors of vector shift right algebraic.
16422 vec_sra{b,h,w} -> vsra{b,h,w}. */
16423 case ALTIVEC_BUILTIN_VSRAB:
16424 case ALTIVEC_BUILTIN_VSRAH:
16425 case ALTIVEC_BUILTIN_VSRAW:
16426 case P8V_BUILTIN_VSRAD:
16427 arg0 = gimple_call_arg (stmt, 0);
16428 arg1 = gimple_call_arg (stmt, 1);
16429 lhs = gimple_call_lhs (stmt);
16430 g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1);
16431 gimple_set_location (g, gimple_location (stmt));
16432 gsi_replace (gsi, g, true);
16433 return true;
16434 /* Flavors of vector shift left.
16435 builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */
16436 case ALTIVEC_BUILTIN_VSLB:
16437 case ALTIVEC_BUILTIN_VSLH:
16438 case ALTIVEC_BUILTIN_VSLW:
16439 case P8V_BUILTIN_VSLD:
16440 arg0 = gimple_call_arg (stmt, 0);
16441 if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0)))
16442 && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0))))
16443 return false;
16444 arg1 = gimple_call_arg (stmt, 1);
16445 lhs = gimple_call_lhs (stmt);
16446 g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, arg1);
16447 gimple_set_location (g, gimple_location (stmt));
16448 gsi_replace (gsi, g, true);
16449 return true;
16450 /* Flavors of vector shift right. */
16451 case ALTIVEC_BUILTIN_VSRB:
16452 case ALTIVEC_BUILTIN_VSRH:
16453 case ALTIVEC_BUILTIN_VSRW:
16454 case P8V_BUILTIN_VSRD:
16456 arg0 = gimple_call_arg (stmt, 0);
16457 arg1 = gimple_call_arg (stmt, 1);
16458 lhs = gimple_call_lhs (stmt);
16459 gimple_seq stmts = NULL;
16460 /* Convert arg0 to unsigned. */
16461 tree arg0_unsigned
16462 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
16463 unsigned_type_for (TREE_TYPE (arg0)), arg0);
16464 tree res
16465 = gimple_build (&stmts, RSHIFT_EXPR,
16466 TREE_TYPE (arg0_unsigned), arg0_unsigned, arg1);
16467 /* Convert result back to the lhs type. */
16468 res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
16469 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16470 update_call_from_tree (gsi, res);
16471 return true;
16473 /* Vector loads. */
16474 case ALTIVEC_BUILTIN_LVX_V16QI:
16475 case ALTIVEC_BUILTIN_LVX_V8HI:
16476 case ALTIVEC_BUILTIN_LVX_V4SI:
16477 case ALTIVEC_BUILTIN_LVX_V4SF:
16478 case ALTIVEC_BUILTIN_LVX_V2DI:
16479 case ALTIVEC_BUILTIN_LVX_V2DF:
16481 arg0 = gimple_call_arg (stmt, 0); // offset
16482 arg1 = gimple_call_arg (stmt, 1); // address
16483 /* Do not fold for -maltivec=be on LE targets. */
16484 if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
16485 return false;
16486 lhs = gimple_call_lhs (stmt);
16487 location_t loc = gimple_location (stmt);
16488 /* Since arg1 may be cast to a different type, just use ptr_type_node
16489 here instead of trying to enforce TBAA on pointer types. */
16490 tree arg1_type = ptr_type_node;
16491 tree lhs_type = TREE_TYPE (lhs);
16492 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
16493 the tree using the value from arg0. The resulting type will match
16494 the type of arg1. */
16495 gimple_seq stmts = NULL;
16496 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
16497 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
16498 arg1_type, arg1, temp_offset);
16499 /* Mask off any lower bits from the address. */
16500 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
16501 arg1_type, temp_addr,
16502 build_int_cst (arg1_type, -16));
16503 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16504 /* Use the build2 helper to set up the mem_ref. The MEM_REF could also
16505 take an offset, but since we've already incorporated the offset
16506 above, here we just pass in a zero. */
16507 gimple *g
16508 = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
16509 build_int_cst (arg1_type, 0)));
16510 gimple_set_location (g, loc);
16511 gsi_replace (gsi, g, true);
16512 return true;
16514 /* Vector stores. */
16515 case ALTIVEC_BUILTIN_STVX_V16QI:
16516 case ALTIVEC_BUILTIN_STVX_V8HI:
16517 case ALTIVEC_BUILTIN_STVX_V4SI:
16518 case ALTIVEC_BUILTIN_STVX_V4SF:
16519 case ALTIVEC_BUILTIN_STVX_V2DI:
16520 case ALTIVEC_BUILTIN_STVX_V2DF:
16522 /* Do not fold for -maltivec=be on LE targets. */
16523 if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
16524 return false;
16525 arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */
16526 arg1 = gimple_call_arg (stmt, 1); /* Offset. */
16527 tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */
16528 location_t loc = gimple_location (stmt);
16529 tree arg0_type = TREE_TYPE (arg0);
16530 /* Use ptr_type_node (no TBAA) for the arg2_type.
16531 FIXME: (Richard) "A proper fix would be to transition this type as
16532 seen from the frontend to GIMPLE, for example in a similar way we
16533 do for MEM_REFs by piggy-backing that on an extra argument, a
16534 constant zero pointer of the alias pointer type to use (which would
16535 also serve as a type indicator of the store itself). I'd use a
16536 target specific internal function for this (not sure if we can have
16537 those target specific, but I guess if it's folded away then that's
16538 fine) and get away with the overload set." */
16539 tree arg2_type = ptr_type_node;
16540 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create
16541 the tree using the value from arg0. The resulting type will match
16542 the type of arg2. */
16543 gimple_seq stmts = NULL;
16544 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1);
16545 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
16546 arg2_type, arg2, temp_offset);
16547 /* Mask off any lower bits from the address. */
16548 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
16549 arg2_type, temp_addr,
16550 build_int_cst (arg2_type, -16));
16551 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
16552 /* The desired gimple result should be similar to:
16553 MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */
16554 gimple *g
16555 = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr,
16556 build_int_cst (arg2_type, 0)), arg0);
16557 gimple_set_location (g, loc);
16558 gsi_replace (gsi, g, true);
16559 return true;
16562 /* Vector Fused multiply-add (fma). */
16563 case ALTIVEC_BUILTIN_VMADDFP:
16564 case VSX_BUILTIN_XVMADDDP:
16565 case ALTIVEC_BUILTIN_VMLADDUHM:
16567 arg0 = gimple_call_arg (stmt, 0);
16568 arg1 = gimple_call_arg (stmt, 1);
16569 tree arg2 = gimple_call_arg (stmt, 2);
16570 lhs = gimple_call_lhs (stmt);
16571 gimple *g = gimple_build_assign (lhs, FMA_EXPR, arg0, arg1, arg2);
16572 gimple_set_location (g, gimple_location (stmt));
16573 gsi_replace (gsi, g, true);
16574 return true;
16577 /* Vector compares; EQ, NE, GE, GT, LE. */
16578 case ALTIVEC_BUILTIN_VCMPEQUB:
16579 case ALTIVEC_BUILTIN_VCMPEQUH:
16580 case ALTIVEC_BUILTIN_VCMPEQUW:
16581 case P8V_BUILTIN_VCMPEQUD:
16582 fold_compare_helper (gsi, EQ_EXPR, stmt);
16583 return true;
16585 case P9V_BUILTIN_CMPNEB:
16586 case P9V_BUILTIN_CMPNEH:
16587 case P9V_BUILTIN_CMPNEW:
16588 fold_compare_helper (gsi, NE_EXPR, stmt);
16589 return true;
16591 case VSX_BUILTIN_CMPGE_16QI:
16592 case VSX_BUILTIN_CMPGE_U16QI:
16593 case VSX_BUILTIN_CMPGE_8HI:
16594 case VSX_BUILTIN_CMPGE_U8HI:
16595 case VSX_BUILTIN_CMPGE_4SI:
16596 case VSX_BUILTIN_CMPGE_U4SI:
16597 case VSX_BUILTIN_CMPGE_2DI:
16598 case VSX_BUILTIN_CMPGE_U2DI:
16599 fold_compare_helper (gsi, GE_EXPR, stmt);
16600 return true;
16602 case ALTIVEC_BUILTIN_VCMPGTSB:
16603 case ALTIVEC_BUILTIN_VCMPGTUB:
16604 case ALTIVEC_BUILTIN_VCMPGTSH:
16605 case ALTIVEC_BUILTIN_VCMPGTUH:
16606 case ALTIVEC_BUILTIN_VCMPGTSW:
16607 case ALTIVEC_BUILTIN_VCMPGTUW:
16608 case P8V_BUILTIN_VCMPGTUD:
16609 case P8V_BUILTIN_VCMPGTSD:
16610 fold_compare_helper (gsi, GT_EXPR, stmt);
16611 return true;
16613 case VSX_BUILTIN_CMPLE_16QI:
16614 case VSX_BUILTIN_CMPLE_U16QI:
16615 case VSX_BUILTIN_CMPLE_8HI:
16616 case VSX_BUILTIN_CMPLE_U8HI:
16617 case VSX_BUILTIN_CMPLE_4SI:
16618 case VSX_BUILTIN_CMPLE_U4SI:
16619 case VSX_BUILTIN_CMPLE_2DI:
16620 case VSX_BUILTIN_CMPLE_U2DI:
16621 fold_compare_helper (gsi, LE_EXPR, stmt);
16622 return true;
16624 default:
16625 if (TARGET_DEBUG_BUILTIN)
16626 fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
16627 fn_code, fn_name1, fn_name2);
16628 break;
16631 return false;
16634 /* Expand an expression EXP that calls a built-in function,
16635 with result going to TARGET if that's convenient
16636 (and in mode MODE if that's convenient).
16637 SUBTARGET may be used as the target for computing one of EXP's operands.
16638 IGNORE is nonzero if the value is to be ignored. */
16640 static rtx
16641 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
16642 machine_mode mode ATTRIBUTE_UNUSED,
16643 int ignore ATTRIBUTE_UNUSED)
16645 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
16646 enum rs6000_builtins fcode
16647 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
16648 size_t uns_fcode = (size_t)fcode;
16649 const struct builtin_description *d;
16650 size_t i;
16651 rtx ret;
16652 bool success;
16653 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
16654 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
16655 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
16657 /* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
16658 floating point type, depending on whether long double is the IBM extended
16659 double (KFmode) or long double is IEEE 128-bit (TFmode). It is simpler if
16660 we only define one variant of the built-in function, and switch the code
16661 when defining it, rather than defining two built-ins and using the
16662 overload table in rs6000-c.c to switch between the two. If we don't have
16663 the proper assembler, don't do this switch because CODE_FOR_*kf* and
16664 CODE_FOR_*tf* will be CODE_FOR_nothing. */
16665 #ifdef HAVE_AS_POWER9
16666 if (FLOAT128_IEEE_P (TFmode))
16667 switch (icode)
16669 default:
16670 break;
16672 case CODE_FOR_sqrtkf2_odd: icode = CODE_FOR_sqrttf2_odd; break;
16673 case CODE_FOR_trunckfdf2_odd: icode = CODE_FOR_trunctfdf2_odd; break;
16674 case CODE_FOR_addkf3_odd: icode = CODE_FOR_addtf3_odd; break;
16675 case CODE_FOR_subkf3_odd: icode = CODE_FOR_subtf3_odd; break;
16676 case CODE_FOR_mulkf3_odd: icode = CODE_FOR_multf3_odd; break;
16677 case CODE_FOR_divkf3_odd: icode = CODE_FOR_divtf3_odd; break;
16678 case CODE_FOR_fmakf4_odd: icode = CODE_FOR_fmatf4_odd; break;
16679 case CODE_FOR_xsxexpqp_kf: icode = CODE_FOR_xsxexpqp_tf; break;
16680 case CODE_FOR_xsxsigqp_kf: icode = CODE_FOR_xsxsigqp_tf; break;
16681 case CODE_FOR_xststdcnegqp_kf: icode = CODE_FOR_xststdcnegqp_tf; break;
16682 case CODE_FOR_xsiexpqp_kf: icode = CODE_FOR_xsiexpqp_tf; break;
16683 case CODE_FOR_xsiexpqpf_kf: icode = CODE_FOR_xsiexpqpf_tf; break;
16684 case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
16686 #endif
16688 if (TARGET_DEBUG_BUILTIN)
16690 const char *name1 = rs6000_builtin_info[uns_fcode].name;
16691 const char *name2 = (icode != CODE_FOR_nothing)
16692 ? get_insn_name ((int) icode)
16693 : "nothing";
16694 const char *name3;
16696 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
16698 default: name3 = "unknown"; break;
16699 case RS6000_BTC_SPECIAL: name3 = "special"; break;
16700 case RS6000_BTC_UNARY: name3 = "unary"; break;
16701 case RS6000_BTC_BINARY: name3 = "binary"; break;
16702 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
16703 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
16704 case RS6000_BTC_ABS: name3 = "abs"; break;
16705 case RS6000_BTC_DST: name3 = "dst"; break;
16709 fprintf (stderr,
16710 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
16711 (name1) ? name1 : "---", fcode,
16712 (name2) ? name2 : "---", (int) icode,
16713 name3,
16714 func_valid_p ? "" : ", not valid");
16717 if (!func_valid_p)
16719 rs6000_invalid_builtin (fcode);
16721 /* Given it is invalid, just generate a normal call. */
16722 return expand_call (exp, target, ignore);
16725 switch (fcode)
16727 case RS6000_BUILTIN_RECIP:
16728 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
16730 case RS6000_BUILTIN_RECIPF:
16731 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
16733 case RS6000_BUILTIN_RSQRTF:
16734 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
16736 case RS6000_BUILTIN_RSQRT:
16737 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
16739 case POWER7_BUILTIN_BPERMD:
16740 return rs6000_expand_binop_builtin (((TARGET_64BIT)
16741 ? CODE_FOR_bpermd_di
16742 : CODE_FOR_bpermd_si), exp, target);
16744 case RS6000_BUILTIN_GET_TB:
16745 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
16746 target);
16748 case RS6000_BUILTIN_MFTB:
16749 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
16750 ? CODE_FOR_rs6000_mftb_di
16751 : CODE_FOR_rs6000_mftb_si),
16752 target);
16754 case RS6000_BUILTIN_MFFS:
16755 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
16757 case RS6000_BUILTIN_MTFSF:
16758 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
16760 case RS6000_BUILTIN_CPU_INIT:
16761 case RS6000_BUILTIN_CPU_IS:
16762 case RS6000_BUILTIN_CPU_SUPPORTS:
16763 return cpu_expand_builtin (fcode, exp, target);
16765 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
16766 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
16768 int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
16769 : (int) CODE_FOR_altivec_lvsl_direct);
16770 machine_mode tmode = insn_data[icode2].operand[0].mode;
16771 machine_mode mode = insn_data[icode2].operand[1].mode;
16772 tree arg;
16773 rtx op, addr, pat;
16775 gcc_assert (TARGET_ALTIVEC);
16777 arg = CALL_EXPR_ARG (exp, 0);
16778 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
16779 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
16780 addr = memory_address (mode, op);
16781 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
16782 op = addr;
16783 else
16785 /* For the load case need to negate the address. */
16786 op = gen_reg_rtx (GET_MODE (addr));
16787 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
16789 op = gen_rtx_MEM (mode, op);
16791 if (target == 0
16792 || GET_MODE (target) != tmode
16793 || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
16794 target = gen_reg_rtx (tmode);
16796 pat = GEN_FCN (icode2) (target, op);
16797 if (!pat)
16798 return 0;
16799 emit_insn (pat);
16801 return target;
16804 case ALTIVEC_BUILTIN_VCFUX:
16805 case ALTIVEC_BUILTIN_VCFSX:
16806 case ALTIVEC_BUILTIN_VCTUXS:
16807 case ALTIVEC_BUILTIN_VCTSXS:
16808 /* FIXME: There's got to be a nicer way to handle this case than
16809 constructing a new CALL_EXPR. */
16810 if (call_expr_nargs (exp) == 1)
16812 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
16813 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
16815 break;
16817 default:
16818 break;
16821 if (TARGET_ALTIVEC)
16823 ret = altivec_expand_builtin (exp, target, &success);
16825 if (success)
16826 return ret;
16828 if (TARGET_PAIRED_FLOAT)
16830 ret = paired_expand_builtin (exp, target, &success);
16832 if (success)
16833 return ret;
16835 if (TARGET_HTM)
16837 ret = htm_expand_builtin (exp, target, &success);
16839 if (success)
16840 return ret;
16843 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
16844 /* RS6000_BTC_SPECIAL represents no-operand operators. */
16845 gcc_assert (attr == RS6000_BTC_UNARY
16846 || attr == RS6000_BTC_BINARY
16847 || attr == RS6000_BTC_TERNARY
16848 || attr == RS6000_BTC_SPECIAL);
16850 /* Handle simple unary operations. */
16851 d = bdesc_1arg;
16852 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16853 if (d->code == fcode)
16854 return rs6000_expand_unop_builtin (icode, exp, target);
16856 /* Handle simple binary operations. */
16857 d = bdesc_2arg;
16858 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16859 if (d->code == fcode)
16860 return rs6000_expand_binop_builtin (icode, exp, target);
16862 /* Handle simple ternary operations. */
16863 d = bdesc_3arg;
16864 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
16865 if (d->code == fcode)
16866 return rs6000_expand_ternop_builtin (icode, exp, target);
16868 /* Handle simple no-argument operations. */
16869 d = bdesc_0arg;
16870 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
16871 if (d->code == fcode)
16872 return rs6000_expand_zeroop_builtin (icode, target);
16874 gcc_unreachable ();
16877 /* Create a builtin vector type with a name. Taking care not to give
16878 the canonical type a name. */
16880 static tree
16881 rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts)
16883 tree result = build_vector_type (elt_type, num_elts);
16885 /* Copy so we don't give the canonical type a name. */
16886 result = build_variant_type_copy (result);
16888 add_builtin_type (name, result);
16890 return result;
16893 static void
16894 rs6000_init_builtins (void)
16896 tree tdecl;
16897 tree ftype;
16898 machine_mode mode;
16900 if (TARGET_DEBUG_BUILTIN)
16901 fprintf (stderr, "rs6000_init_builtins%s%s%s\n",
16902 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
16903 (TARGET_ALTIVEC) ? ", altivec" : "",
16904 (TARGET_VSX) ? ", vsx" : "");
16906 V2SI_type_node = build_vector_type (intSI_type_node, 2);
16907 V2SF_type_node = build_vector_type (float_type_node, 2);
16908 V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long"
16909 : "__vector long long",
16910 intDI_type_node, 2);
16911 V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2);
16912 V4SI_type_node = rs6000_vector_type ("__vector signed int",
16913 intSI_type_node, 4);
16914 V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4);
16915 V8HI_type_node = rs6000_vector_type ("__vector signed short",
16916 intHI_type_node, 8);
16917 V16QI_type_node = rs6000_vector_type ("__vector signed char",
16918 intQI_type_node, 16);
16920 unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char",
16921 unsigned_intQI_type_node, 16);
16922 unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short",
16923 unsigned_intHI_type_node, 8);
16924 unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int",
16925 unsigned_intSI_type_node, 4);
16926 unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
16927 ? "__vector unsigned long"
16928 : "__vector unsigned long long",
16929 unsigned_intDI_type_node, 2);
16931 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
16932 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
16933 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
16934 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
16936 const_str_type_node
16937 = build_pointer_type (build_qualified_type (char_type_node,
16938 TYPE_QUAL_CONST));
16940 /* We use V1TI mode as a special container to hold __int128_t items that
16941 must live in VSX registers. */
16942 if (intTI_type_node)
16944 V1TI_type_node = rs6000_vector_type ("__vector __int128",
16945 intTI_type_node, 1);
16946 unsigned_V1TI_type_node
16947 = rs6000_vector_type ("__vector unsigned __int128",
16948 unsigned_intTI_type_node, 1);
16951 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
16952 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
16953 'vector unsigned short'. */
16955 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
16956 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16957 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
16958 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
16959 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
16961 long_integer_type_internal_node = long_integer_type_node;
16962 long_unsigned_type_internal_node = long_unsigned_type_node;
16963 long_long_integer_type_internal_node = long_long_integer_type_node;
16964 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
16965 intQI_type_internal_node = intQI_type_node;
16966 uintQI_type_internal_node = unsigned_intQI_type_node;
16967 intHI_type_internal_node = intHI_type_node;
16968 uintHI_type_internal_node = unsigned_intHI_type_node;
16969 intSI_type_internal_node = intSI_type_node;
16970 uintSI_type_internal_node = unsigned_intSI_type_node;
16971 intDI_type_internal_node = intDI_type_node;
16972 uintDI_type_internal_node = unsigned_intDI_type_node;
16973 intTI_type_internal_node = intTI_type_node;
16974 uintTI_type_internal_node = unsigned_intTI_type_node;
16975 float_type_internal_node = float_type_node;
16976 double_type_internal_node = double_type_node;
16977 long_double_type_internal_node = long_double_type_node;
16978 dfloat64_type_internal_node = dfloat64_type_node;
16979 dfloat128_type_internal_node = dfloat128_type_node;
16980 void_type_internal_node = void_type_node;
16982 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
16983 IFmode is the IBM extended 128-bit format that is a pair of doubles.
16984 TFmode will be either IEEE 128-bit floating point or the IBM double-double
16985 format that uses a pair of doubles, depending on the switches and
16986 defaults.
16988 If we don't support for either 128-bit IBM double double or IEEE 128-bit
16989 floating point, we need make sure the type is non-zero or else self-test
16990 fails during bootstrap.
16992 We don't register a built-in type for __ibm128 if the type is the same as
16993 long double. Instead we add a #define for __ibm128 in
16994 rs6000_cpu_cpp_builtins to long double.
16996 For IEEE 128-bit floating point, always create the type __ieee128. If the
16997 user used -mfloat128, rs6000-c.c will create a define from __float128 to
16998 __ieee128. */
16999 if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode))
17001 ibm128_float_type_node = make_node (REAL_TYPE);
17002 TYPE_PRECISION (ibm128_float_type_node) = 128;
17003 SET_TYPE_MODE (ibm128_float_type_node, IFmode);
17004 layout_type (ibm128_float_type_node);
17006 lang_hooks.types.register_builtin_type (ibm128_float_type_node,
17007 "__ibm128");
17009 else
17010 ibm128_float_type_node = long_double_type_node;
17012 if (TARGET_FLOAT128_TYPE)
17014 ieee128_float_type_node = float128_type_node;
17015 lang_hooks.types.register_builtin_type (ieee128_float_type_node,
17016 "__ieee128");
17019 else
17020 ieee128_float_type_node = long_double_type_node;
17022 /* Initialize the modes for builtin_function_type, mapping a machine mode to
17023 tree type node. */
17024 builtin_mode_to_type[QImode][0] = integer_type_node;
17025 builtin_mode_to_type[HImode][0] = integer_type_node;
17026 builtin_mode_to_type[SImode][0] = intSI_type_node;
17027 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
17028 builtin_mode_to_type[DImode][0] = intDI_type_node;
17029 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
17030 builtin_mode_to_type[TImode][0] = intTI_type_node;
17031 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
17032 builtin_mode_to_type[SFmode][0] = float_type_node;
17033 builtin_mode_to_type[DFmode][0] = double_type_node;
17034 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
17035 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
17036 builtin_mode_to_type[TFmode][0] = long_double_type_node;
17037 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
17038 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
17039 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
17040 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
17041 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
17042 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
17043 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
17044 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
17045 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
17046 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
17047 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
17048 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
17049 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
17050 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
17051 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
17052 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
17054 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
17055 TYPE_NAME (bool_char_type_node) = tdecl;
17057 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
17058 TYPE_NAME (bool_short_type_node) = tdecl;
17060 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
17061 TYPE_NAME (bool_int_type_node) = tdecl;
17063 tdecl = add_builtin_type ("__pixel", pixel_type_node);
17064 TYPE_NAME (pixel_type_node) = tdecl;
17066 bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char",
17067 bool_char_type_node, 16);
17068 bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short",
17069 bool_short_type_node, 8);
17070 bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int",
17071 bool_int_type_node, 4);
17072 bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64
17073 ? "__vector __bool long"
17074 : "__vector __bool long long",
17075 bool_long_type_node, 2);
17076 pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel",
17077 pixel_type_node, 8);
17079 /* Paired builtins are only available if you build a compiler with the
17080 appropriate options, so only create those builtins with the appropriate
17081 compiler option. Create Altivec and VSX builtins on machines with at
17082 least the general purpose extensions (970 and newer) to allow the use of
17083 the target attribute. */
17084 if (TARGET_PAIRED_FLOAT)
17085 paired_init_builtins ();
17086 if (TARGET_EXTRA_BUILTINS)
17087 altivec_init_builtins ();
17088 if (TARGET_HTM)
17089 htm_init_builtins ();
17091 if (TARGET_EXTRA_BUILTINS || TARGET_PAIRED_FLOAT)
17092 rs6000_common_init_builtins ();
17094 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
17095 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
17096 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
17098 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
17099 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
17100 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
17102 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
17103 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
17104 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
17106 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
17107 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
17108 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
17110 mode = (TARGET_64BIT) ? DImode : SImode;
17111 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
17112 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
17113 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
17115 ftype = build_function_type_list (unsigned_intDI_type_node,
17116 NULL_TREE);
17117 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
17119 if (TARGET_64BIT)
17120 ftype = build_function_type_list (unsigned_intDI_type_node,
17121 NULL_TREE);
17122 else
17123 ftype = build_function_type_list (unsigned_intSI_type_node,
17124 NULL_TREE);
17125 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
17127 ftype = build_function_type_list (double_type_node, NULL_TREE);
17128 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
17130 ftype = build_function_type_list (void_type_node,
17131 intSI_type_node, double_type_node,
17132 NULL_TREE);
17133 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
17135 ftype = build_function_type_list (void_type_node, NULL_TREE);
17136 def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT);
17138 ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node,
17139 NULL_TREE);
17140 def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS);
17141 def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS);
17143 /* AIX libm provides clog as __clog. */
17144 if (TARGET_XCOFF &&
17145 (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
17146 set_user_assembler_name (tdecl, "__clog");
17148 #ifdef SUBTARGET_INIT_BUILTINS
17149 SUBTARGET_INIT_BUILTINS;
17150 #endif
17153 /* Returns the rs6000 builtin decl for CODE. */
17155 static tree
17156 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
17158 HOST_WIDE_INT fnmask;
17160 if (code >= RS6000_BUILTIN_COUNT)
17161 return error_mark_node;
17163 fnmask = rs6000_builtin_info[code].mask;
17164 if ((fnmask & rs6000_builtin_mask) != fnmask)
17166 rs6000_invalid_builtin ((enum rs6000_builtins)code);
17167 return error_mark_node;
17170 return rs6000_builtin_decls[code];
17173 static void
17174 paired_init_builtins (void)
17176 const struct builtin_description *d;
17177 size_t i;
17178 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17180 tree int_ftype_int_v2sf_v2sf
17181 = build_function_type_list (integer_type_node,
17182 integer_type_node,
17183 V2SF_type_node,
17184 V2SF_type_node,
17185 NULL_TREE);
17186 tree pcfloat_type_node =
17187 build_pointer_type (build_qualified_type
17188 (float_type_node, TYPE_QUAL_CONST));
17190 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
17191 long_integer_type_node,
17192 pcfloat_type_node,
17193 NULL_TREE);
17194 tree void_ftype_v2sf_long_pcfloat =
17195 build_function_type_list (void_type_node,
17196 V2SF_type_node,
17197 long_integer_type_node,
17198 pcfloat_type_node,
17199 NULL_TREE);
17202 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
17203 PAIRED_BUILTIN_LX);
17206 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
17207 PAIRED_BUILTIN_STX);
17209 /* Predicates. */
17210 d = bdesc_paired_preds;
17211 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
17213 tree type;
17214 HOST_WIDE_INT mask = d->mask;
17216 if ((mask & builtin_mask) != mask)
17218 if (TARGET_DEBUG_BUILTIN)
17219 fprintf (stderr, "paired_init_builtins, skip predicate %s\n",
17220 d->name);
17221 continue;
17224 /* Cannot define builtin if the instruction is disabled. */
17225 gcc_assert (d->icode != CODE_FOR_nothing);
17227 if (TARGET_DEBUG_BUILTIN)
17228 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
17229 (int)i, get_insn_name (d->icode), (int)d->icode,
17230 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
17232 switch (insn_data[d->icode].operand[1].mode)
17234 case E_V2SFmode:
17235 type = int_ftype_int_v2sf_v2sf;
17236 break;
17237 default:
17238 gcc_unreachable ();
17241 def_builtin (d->name, type, d->code);
17245 static void
17246 altivec_init_builtins (void)
17248 const struct builtin_description *d;
17249 size_t i;
17250 tree ftype;
17251 tree decl;
17252 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17254 tree pvoid_type_node = build_pointer_type (void_type_node);
17256 tree pcvoid_type_node
17257 = build_pointer_type (build_qualified_type (void_type_node,
17258 TYPE_QUAL_CONST));
17260 tree int_ftype_opaque
17261 = build_function_type_list (integer_type_node,
17262 opaque_V4SI_type_node, NULL_TREE);
17263 tree opaque_ftype_opaque
17264 = build_function_type_list (integer_type_node, NULL_TREE);
17265 tree opaque_ftype_opaque_int
17266 = build_function_type_list (opaque_V4SI_type_node,
17267 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
17268 tree opaque_ftype_opaque_opaque_int
17269 = build_function_type_list (opaque_V4SI_type_node,
17270 opaque_V4SI_type_node, opaque_V4SI_type_node,
17271 integer_type_node, NULL_TREE);
17272 tree opaque_ftype_opaque_opaque_opaque
17273 = build_function_type_list (opaque_V4SI_type_node,
17274 opaque_V4SI_type_node, opaque_V4SI_type_node,
17275 opaque_V4SI_type_node, NULL_TREE);
17276 tree opaque_ftype_opaque_opaque
17277 = build_function_type_list (opaque_V4SI_type_node,
17278 opaque_V4SI_type_node, opaque_V4SI_type_node,
17279 NULL_TREE);
17280 tree int_ftype_int_opaque_opaque
17281 = build_function_type_list (integer_type_node,
17282 integer_type_node, opaque_V4SI_type_node,
17283 opaque_V4SI_type_node, NULL_TREE);
17284 tree int_ftype_int_v4si_v4si
17285 = build_function_type_list (integer_type_node,
17286 integer_type_node, V4SI_type_node,
17287 V4SI_type_node, NULL_TREE);
17288 tree int_ftype_int_v2di_v2di
17289 = build_function_type_list (integer_type_node,
17290 integer_type_node, V2DI_type_node,
17291 V2DI_type_node, NULL_TREE);
17292 tree void_ftype_v4si
17293 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
17294 tree v8hi_ftype_void
17295 = build_function_type_list (V8HI_type_node, NULL_TREE);
17296 tree void_ftype_void
17297 = build_function_type_list (void_type_node, NULL_TREE);
17298 tree void_ftype_int
17299 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
17301 tree opaque_ftype_long_pcvoid
17302 = build_function_type_list (opaque_V4SI_type_node,
17303 long_integer_type_node, pcvoid_type_node,
17304 NULL_TREE);
17305 tree v16qi_ftype_long_pcvoid
17306 = build_function_type_list (V16QI_type_node,
17307 long_integer_type_node, pcvoid_type_node,
17308 NULL_TREE);
17309 tree v8hi_ftype_long_pcvoid
17310 = build_function_type_list (V8HI_type_node,
17311 long_integer_type_node, pcvoid_type_node,
17312 NULL_TREE);
17313 tree v4si_ftype_long_pcvoid
17314 = build_function_type_list (V4SI_type_node,
17315 long_integer_type_node, pcvoid_type_node,
17316 NULL_TREE);
17317 tree v4sf_ftype_long_pcvoid
17318 = build_function_type_list (V4SF_type_node,
17319 long_integer_type_node, pcvoid_type_node,
17320 NULL_TREE);
17321 tree v2df_ftype_long_pcvoid
17322 = build_function_type_list (V2DF_type_node,
17323 long_integer_type_node, pcvoid_type_node,
17324 NULL_TREE);
17325 tree v2di_ftype_long_pcvoid
17326 = build_function_type_list (V2DI_type_node,
17327 long_integer_type_node, pcvoid_type_node,
17328 NULL_TREE);
17330 tree void_ftype_opaque_long_pvoid
17331 = build_function_type_list (void_type_node,
17332 opaque_V4SI_type_node, long_integer_type_node,
17333 pvoid_type_node, NULL_TREE);
17334 tree void_ftype_v4si_long_pvoid
17335 = build_function_type_list (void_type_node,
17336 V4SI_type_node, long_integer_type_node,
17337 pvoid_type_node, NULL_TREE);
17338 tree void_ftype_v16qi_long_pvoid
17339 = build_function_type_list (void_type_node,
17340 V16QI_type_node, long_integer_type_node,
17341 pvoid_type_node, NULL_TREE);
17343 tree void_ftype_v16qi_pvoid_long
17344 = build_function_type_list (void_type_node,
17345 V16QI_type_node, pvoid_type_node,
17346 long_integer_type_node, NULL_TREE);
17348 tree void_ftype_v8hi_long_pvoid
17349 = build_function_type_list (void_type_node,
17350 V8HI_type_node, long_integer_type_node,
17351 pvoid_type_node, NULL_TREE);
17352 tree void_ftype_v4sf_long_pvoid
17353 = build_function_type_list (void_type_node,
17354 V4SF_type_node, long_integer_type_node,
17355 pvoid_type_node, NULL_TREE);
17356 tree void_ftype_v2df_long_pvoid
17357 = build_function_type_list (void_type_node,
17358 V2DF_type_node, long_integer_type_node,
17359 pvoid_type_node, NULL_TREE);
17360 tree void_ftype_v2di_long_pvoid
17361 = build_function_type_list (void_type_node,
17362 V2DI_type_node, long_integer_type_node,
17363 pvoid_type_node, NULL_TREE);
17364 tree int_ftype_int_v8hi_v8hi
17365 = build_function_type_list (integer_type_node,
17366 integer_type_node, V8HI_type_node,
17367 V8HI_type_node, NULL_TREE);
17368 tree int_ftype_int_v16qi_v16qi
17369 = build_function_type_list (integer_type_node,
17370 integer_type_node, V16QI_type_node,
17371 V16QI_type_node, NULL_TREE);
17372 tree int_ftype_int_v4sf_v4sf
17373 = build_function_type_list (integer_type_node,
17374 integer_type_node, V4SF_type_node,
17375 V4SF_type_node, NULL_TREE);
17376 tree int_ftype_int_v2df_v2df
17377 = build_function_type_list (integer_type_node,
17378 integer_type_node, V2DF_type_node,
17379 V2DF_type_node, NULL_TREE);
17380 tree v2di_ftype_v2di
17381 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
17382 tree v4si_ftype_v4si
17383 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17384 tree v8hi_ftype_v8hi
17385 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17386 tree v16qi_ftype_v16qi
17387 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17388 tree v4sf_ftype_v4sf
17389 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17390 tree v2df_ftype_v2df
17391 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17392 tree void_ftype_pcvoid_int_int
17393 = build_function_type_list (void_type_node,
17394 pcvoid_type_node, integer_type_node,
17395 integer_type_node, NULL_TREE);
17397 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
17398 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
17399 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
17400 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
17401 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
17402 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
17403 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
17404 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
17405 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
17406 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
17407 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
17408 ALTIVEC_BUILTIN_LVXL_V2DF);
17409 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
17410 ALTIVEC_BUILTIN_LVXL_V2DI);
17411 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
17412 ALTIVEC_BUILTIN_LVXL_V4SF);
17413 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
17414 ALTIVEC_BUILTIN_LVXL_V4SI);
17415 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
17416 ALTIVEC_BUILTIN_LVXL_V8HI);
17417 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
17418 ALTIVEC_BUILTIN_LVXL_V16QI);
17419 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
17420 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
17421 ALTIVEC_BUILTIN_LVX_V2DF);
17422 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
17423 ALTIVEC_BUILTIN_LVX_V2DI);
17424 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
17425 ALTIVEC_BUILTIN_LVX_V4SF);
17426 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
17427 ALTIVEC_BUILTIN_LVX_V4SI);
17428 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
17429 ALTIVEC_BUILTIN_LVX_V8HI);
17430 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
17431 ALTIVEC_BUILTIN_LVX_V16QI);
17432 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
17433 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
17434 ALTIVEC_BUILTIN_STVX_V2DF);
17435 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
17436 ALTIVEC_BUILTIN_STVX_V2DI);
17437 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
17438 ALTIVEC_BUILTIN_STVX_V4SF);
17439 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
17440 ALTIVEC_BUILTIN_STVX_V4SI);
17441 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
17442 ALTIVEC_BUILTIN_STVX_V8HI);
17443 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
17444 ALTIVEC_BUILTIN_STVX_V16QI);
17445 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
17446 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
17447 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
17448 ALTIVEC_BUILTIN_STVXL_V2DF);
17449 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
17450 ALTIVEC_BUILTIN_STVXL_V2DI);
17451 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
17452 ALTIVEC_BUILTIN_STVXL_V4SF);
17453 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
17454 ALTIVEC_BUILTIN_STVXL_V4SI);
17455 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
17456 ALTIVEC_BUILTIN_STVXL_V8HI);
17457 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
17458 ALTIVEC_BUILTIN_STVXL_V16QI);
17459 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
17460 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
17461 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
17462 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
17463 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
17464 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
17465 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
17466 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
17467 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
17468 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
17469 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
17470 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
17471 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
17472 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
17473 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
17474 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
17476 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
17477 VSX_BUILTIN_LXVD2X_V2DF);
17478 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
17479 VSX_BUILTIN_LXVD2X_V2DI);
17480 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
17481 VSX_BUILTIN_LXVW4X_V4SF);
17482 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
17483 VSX_BUILTIN_LXVW4X_V4SI);
17484 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
17485 VSX_BUILTIN_LXVW4X_V8HI);
17486 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
17487 VSX_BUILTIN_LXVW4X_V16QI);
17488 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
17489 VSX_BUILTIN_STXVD2X_V2DF);
17490 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
17491 VSX_BUILTIN_STXVD2X_V2DI);
17492 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
17493 VSX_BUILTIN_STXVW4X_V4SF);
17494 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
17495 VSX_BUILTIN_STXVW4X_V4SI);
17496 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
17497 VSX_BUILTIN_STXVW4X_V8HI);
17498 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
17499 VSX_BUILTIN_STXVW4X_V16QI);
17501 def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid,
17502 VSX_BUILTIN_LD_ELEMREV_V2DF);
17503 def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid,
17504 VSX_BUILTIN_LD_ELEMREV_V2DI);
17505 def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid,
17506 VSX_BUILTIN_LD_ELEMREV_V4SF);
17507 def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid,
17508 VSX_BUILTIN_LD_ELEMREV_V4SI);
17509 def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid,
17510 VSX_BUILTIN_LD_ELEMREV_V8HI);
17511 def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid,
17512 VSX_BUILTIN_LD_ELEMREV_V16QI);
17513 def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid,
17514 VSX_BUILTIN_ST_ELEMREV_V2DF);
17515 def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid,
17516 VSX_BUILTIN_ST_ELEMREV_V2DI);
17517 def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid,
17518 VSX_BUILTIN_ST_ELEMREV_V4SF);
17519 def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid,
17520 VSX_BUILTIN_ST_ELEMREV_V4SI);
17521 def_builtin ("__builtin_vsx_st_elemrev_v8hi", void_ftype_v8hi_long_pvoid,
17522 VSX_BUILTIN_ST_ELEMREV_V8HI);
17523 def_builtin ("__builtin_vsx_st_elemrev_v16qi", void_ftype_v16qi_long_pvoid,
17524 VSX_BUILTIN_ST_ELEMREV_V16QI);
17526 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
17527 VSX_BUILTIN_VEC_LD);
17528 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
17529 VSX_BUILTIN_VEC_ST);
17530 def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid,
17531 VSX_BUILTIN_VEC_XL);
17532 def_builtin ("__builtin_vec_xl_be", opaque_ftype_long_pcvoid,
17533 VSX_BUILTIN_VEC_XL_BE);
17534 def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid,
17535 VSX_BUILTIN_VEC_XST);
17536 def_builtin ("__builtin_vec_xst_be", void_ftype_opaque_long_pvoid,
17537 VSX_BUILTIN_VEC_XST_BE);
17539 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
17540 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
17541 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
17543 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
17544 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
17545 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
17546 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
17547 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
17548 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
17549 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
17550 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
17551 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
17552 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
17553 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
17554 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
17556 def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque,
17557 ALTIVEC_BUILTIN_VEC_ADDE);
17558 def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque,
17559 ALTIVEC_BUILTIN_VEC_ADDEC);
17560 def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque,
17561 ALTIVEC_BUILTIN_VEC_CMPNE);
17562 def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque,
17563 ALTIVEC_BUILTIN_VEC_MUL);
17564 def_builtin ("__builtin_vec_sube", opaque_ftype_opaque_opaque_opaque,
17565 ALTIVEC_BUILTIN_VEC_SUBE);
17566 def_builtin ("__builtin_vec_subec", opaque_ftype_opaque_opaque_opaque,
17567 ALTIVEC_BUILTIN_VEC_SUBEC);
17569 /* Cell builtins. */
17570 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
17571 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
17572 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
17573 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
17575 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
17576 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
17577 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
17578 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
17580 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
17581 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
17582 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
17583 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
17585 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
17586 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
17587 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
17588 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
17590 if (TARGET_P9_VECTOR)
17592 def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
17593 P9V_BUILTIN_STXVL);
17594 def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
17595 P9V_BUILTIN_XST_LEN_R);
17598 /* Add the DST variants. */
17599 d = bdesc_dst;
17600 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
17602 HOST_WIDE_INT mask = d->mask;
17604 /* It is expected that these dst built-in functions may have
17605 d->icode equal to CODE_FOR_nothing. */
17606 if ((mask & builtin_mask) != mask)
17608 if (TARGET_DEBUG_BUILTIN)
17609 fprintf (stderr, "altivec_init_builtins, skip dst %s\n",
17610 d->name);
17611 continue;
17613 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
17616 /* Initialize the predicates. */
17617 d = bdesc_altivec_preds;
17618 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
17620 machine_mode mode1;
17621 tree type;
17622 HOST_WIDE_INT mask = d->mask;
17624 if ((mask & builtin_mask) != mask)
17626 if (TARGET_DEBUG_BUILTIN)
17627 fprintf (stderr, "altivec_init_builtins, skip predicate %s\n",
17628 d->name);
17629 continue;
17632 if (rs6000_overloaded_builtin_p (d->code))
17633 mode1 = VOIDmode;
17634 else
17636 /* Cannot define builtin if the instruction is disabled. */
17637 gcc_assert (d->icode != CODE_FOR_nothing);
17638 mode1 = insn_data[d->icode].operand[1].mode;
17641 switch (mode1)
17643 case E_VOIDmode:
17644 type = int_ftype_int_opaque_opaque;
17645 break;
17646 case E_V2DImode:
17647 type = int_ftype_int_v2di_v2di;
17648 break;
17649 case E_V4SImode:
17650 type = int_ftype_int_v4si_v4si;
17651 break;
17652 case E_V8HImode:
17653 type = int_ftype_int_v8hi_v8hi;
17654 break;
17655 case E_V16QImode:
17656 type = int_ftype_int_v16qi_v16qi;
17657 break;
17658 case E_V4SFmode:
17659 type = int_ftype_int_v4sf_v4sf;
17660 break;
17661 case E_V2DFmode:
17662 type = int_ftype_int_v2df_v2df;
17663 break;
17664 default:
17665 gcc_unreachable ();
17668 def_builtin (d->name, type, d->code);
17671 /* Initialize the abs* operators. */
17672 d = bdesc_abs;
17673 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
17675 machine_mode mode0;
17676 tree type;
17677 HOST_WIDE_INT mask = d->mask;
17679 if ((mask & builtin_mask) != mask)
17681 if (TARGET_DEBUG_BUILTIN)
17682 fprintf (stderr, "altivec_init_builtins, skip abs %s\n",
17683 d->name);
17684 continue;
17687 /* Cannot define builtin if the instruction is disabled. */
17688 gcc_assert (d->icode != CODE_FOR_nothing);
17689 mode0 = insn_data[d->icode].operand[0].mode;
17691 switch (mode0)
17693 case E_V2DImode:
17694 type = v2di_ftype_v2di;
17695 break;
17696 case E_V4SImode:
17697 type = v4si_ftype_v4si;
17698 break;
17699 case E_V8HImode:
17700 type = v8hi_ftype_v8hi;
17701 break;
17702 case E_V16QImode:
17703 type = v16qi_ftype_v16qi;
17704 break;
17705 case E_V4SFmode:
17706 type = v4sf_ftype_v4sf;
17707 break;
17708 case E_V2DFmode:
17709 type = v2df_ftype_v2df;
17710 break;
17711 default:
17712 gcc_unreachable ();
17715 def_builtin (d->name, type, d->code);
17718 /* Initialize target builtin that implements
17719 targetm.vectorize.builtin_mask_for_load. */
17721 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
17722 v16qi_ftype_long_pcvoid,
17723 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
17724 BUILT_IN_MD, NULL, NULL_TREE);
17725 TREE_READONLY (decl) = 1;
17726 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
17727 altivec_builtin_mask_for_load = decl;
17729 /* Access to the vec_init patterns. */
17730 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
17731 integer_type_node, integer_type_node,
17732 integer_type_node, NULL_TREE);
17733 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
17735 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
17736 short_integer_type_node,
17737 short_integer_type_node,
17738 short_integer_type_node,
17739 short_integer_type_node,
17740 short_integer_type_node,
17741 short_integer_type_node,
17742 short_integer_type_node, NULL_TREE);
17743 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
17745 ftype = build_function_type_list (V16QI_type_node, char_type_node,
17746 char_type_node, char_type_node,
17747 char_type_node, char_type_node,
17748 char_type_node, char_type_node,
17749 char_type_node, char_type_node,
17750 char_type_node, char_type_node,
17751 char_type_node, char_type_node,
17752 char_type_node, char_type_node,
17753 char_type_node, NULL_TREE);
17754 def_builtin ("__builtin_vec_init_v16qi", ftype,
17755 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
17757 ftype = build_function_type_list (V4SF_type_node, float_type_node,
17758 float_type_node, float_type_node,
17759 float_type_node, NULL_TREE);
17760 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
17762 /* VSX builtins. */
17763 ftype = build_function_type_list (V2DF_type_node, double_type_node,
17764 double_type_node, NULL_TREE);
17765 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
17767 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
17768 intDI_type_node, NULL_TREE);
17769 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
17771 /* Access to the vec_set patterns. */
17772 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17773 intSI_type_node,
17774 integer_type_node, NULL_TREE);
17775 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
17777 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17778 intHI_type_node,
17779 integer_type_node, NULL_TREE);
17780 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
17782 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17783 intQI_type_node,
17784 integer_type_node, NULL_TREE);
17785 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
17787 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17788 float_type_node,
17789 integer_type_node, NULL_TREE);
17790 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
17792 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
17793 double_type_node,
17794 integer_type_node, NULL_TREE);
17795 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
17797 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17798 intDI_type_node,
17799 integer_type_node, NULL_TREE);
17800 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
17802 /* Access to the vec_extract patterns. */
17803 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17804 integer_type_node, NULL_TREE);
17805 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
17807 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17808 integer_type_node, NULL_TREE);
17809 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
17811 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17812 integer_type_node, NULL_TREE);
17813 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
17815 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17816 integer_type_node, NULL_TREE);
17817 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
17819 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17820 integer_type_node, NULL_TREE);
17821 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
17823 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
17824 integer_type_node, NULL_TREE);
17825 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
17828 if (V1TI_type_node)
17830 tree v1ti_ftype_long_pcvoid
17831 = build_function_type_list (V1TI_type_node,
17832 long_integer_type_node, pcvoid_type_node,
17833 NULL_TREE);
17834 tree void_ftype_v1ti_long_pvoid
17835 = build_function_type_list (void_type_node,
17836 V1TI_type_node, long_integer_type_node,
17837 pvoid_type_node, NULL_TREE);
17838 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
17839 VSX_BUILTIN_LXVD2X_V1TI);
17840 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
17841 VSX_BUILTIN_STXVD2X_V1TI);
17842 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
17843 NULL_TREE, NULL_TREE);
17844 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
17845 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
17846 intTI_type_node,
17847 integer_type_node, NULL_TREE);
17848 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
17849 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
17850 integer_type_node, NULL_TREE);
17851 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
17856 static void
17857 htm_init_builtins (void)
17859 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
17860 const struct builtin_description *d;
17861 size_t i;
17863 d = bdesc_htm;
17864 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
17866 tree op[MAX_HTM_OPERANDS], type;
17867 HOST_WIDE_INT mask = d->mask;
17868 unsigned attr = rs6000_builtin_info[d->code].attr;
17869 bool void_func = (attr & RS6000_BTC_VOID);
17870 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
17871 int nopnds = 0;
17872 tree gpr_type_node;
17873 tree rettype;
17874 tree argtype;
17876 /* It is expected that these htm built-in functions may have
17877 d->icode equal to CODE_FOR_nothing. */
17879 if (TARGET_32BIT && TARGET_POWERPC64)
17880 gpr_type_node = long_long_unsigned_type_node;
17881 else
17882 gpr_type_node = long_unsigned_type_node;
17884 if (attr & RS6000_BTC_SPR)
17886 rettype = gpr_type_node;
17887 argtype = gpr_type_node;
17889 else if (d->code == HTM_BUILTIN_TABORTDC
17890 || d->code == HTM_BUILTIN_TABORTDCI)
17892 rettype = unsigned_type_node;
17893 argtype = gpr_type_node;
17895 else
17897 rettype = unsigned_type_node;
17898 argtype = unsigned_type_node;
17901 if ((mask & builtin_mask) != mask)
17903 if (TARGET_DEBUG_BUILTIN)
17904 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
17905 continue;
17908 if (d->name == 0)
17910 if (TARGET_DEBUG_BUILTIN)
17911 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
17912 (long unsigned) i);
17913 continue;
17916 op[nopnds++] = (void_func) ? void_type_node : rettype;
17918 if (attr_args == RS6000_BTC_UNARY)
17919 op[nopnds++] = argtype;
17920 else if (attr_args == RS6000_BTC_BINARY)
17922 op[nopnds++] = argtype;
17923 op[nopnds++] = argtype;
17925 else if (attr_args == RS6000_BTC_TERNARY)
17927 op[nopnds++] = argtype;
17928 op[nopnds++] = argtype;
17929 op[nopnds++] = argtype;
17932 switch (nopnds)
17934 case 1:
17935 type = build_function_type_list (op[0], NULL_TREE);
17936 break;
17937 case 2:
17938 type = build_function_type_list (op[0], op[1], NULL_TREE);
17939 break;
17940 case 3:
17941 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
17942 break;
17943 case 4:
17944 type = build_function_type_list (op[0], op[1], op[2], op[3],
17945 NULL_TREE);
17946 break;
17947 default:
17948 gcc_unreachable ();
17951 def_builtin (d->name, type, d->code);
17955 /* Hash function for builtin functions with up to 3 arguments and a return
17956 type. */
17957 hashval_t
17958 builtin_hasher::hash (builtin_hash_struct *bh)
17960 unsigned ret = 0;
17961 int i;
17963 for (i = 0; i < 4; i++)
17965 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
17966 ret = (ret * 2) + bh->uns_p[i];
17969 return ret;
17972 /* Compare builtin hash entries H1 and H2 for equivalence. */
17973 bool
17974 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
17976 return ((p1->mode[0] == p2->mode[0])
17977 && (p1->mode[1] == p2->mode[1])
17978 && (p1->mode[2] == p2->mode[2])
17979 && (p1->mode[3] == p2->mode[3])
17980 && (p1->uns_p[0] == p2->uns_p[0])
17981 && (p1->uns_p[1] == p2->uns_p[1])
17982 && (p1->uns_p[2] == p2->uns_p[2])
17983 && (p1->uns_p[3] == p2->uns_p[3]));
17986 /* Map types for builtin functions with an explicit return type and up to 3
17987 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
17988 of the argument. */
17989 static tree
17990 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
17991 machine_mode mode_arg1, machine_mode mode_arg2,
17992 enum rs6000_builtins builtin, const char *name)
17994 struct builtin_hash_struct h;
17995 struct builtin_hash_struct *h2;
17996 int num_args = 3;
17997 int i;
17998 tree ret_type = NULL_TREE;
17999 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
18001 /* Create builtin_hash_table. */
18002 if (builtin_hash_table == NULL)
18003 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
18005 h.type = NULL_TREE;
18006 h.mode[0] = mode_ret;
18007 h.mode[1] = mode_arg0;
18008 h.mode[2] = mode_arg1;
18009 h.mode[3] = mode_arg2;
18010 h.uns_p[0] = 0;
18011 h.uns_p[1] = 0;
18012 h.uns_p[2] = 0;
18013 h.uns_p[3] = 0;
18015 /* If the builtin is a type that produces unsigned results or takes unsigned
18016 arguments, and it is returned as a decl for the vectorizer (such as
18017 widening multiplies, permute), make sure the arguments and return value
18018 are type correct. */
18019 switch (builtin)
18021 /* unsigned 1 argument functions. */
18022 case CRYPTO_BUILTIN_VSBOX:
18023 case P8V_BUILTIN_VGBBD:
18024 case MISC_BUILTIN_CDTBCD:
18025 case MISC_BUILTIN_CBCDTD:
18026 h.uns_p[0] = 1;
18027 h.uns_p[1] = 1;
18028 break;
18030 /* unsigned 2 argument functions. */
18031 case ALTIVEC_BUILTIN_VMULEUB:
18032 case ALTIVEC_BUILTIN_VMULEUH:
18033 case ALTIVEC_BUILTIN_VMULEUW:
18034 case ALTIVEC_BUILTIN_VMULOUB:
18035 case ALTIVEC_BUILTIN_VMULOUH:
18036 case ALTIVEC_BUILTIN_VMULOUW:
18037 case CRYPTO_BUILTIN_VCIPHER:
18038 case CRYPTO_BUILTIN_VCIPHERLAST:
18039 case CRYPTO_BUILTIN_VNCIPHER:
18040 case CRYPTO_BUILTIN_VNCIPHERLAST:
18041 case CRYPTO_BUILTIN_VPMSUMB:
18042 case CRYPTO_BUILTIN_VPMSUMH:
18043 case CRYPTO_BUILTIN_VPMSUMW:
18044 case CRYPTO_BUILTIN_VPMSUMD:
18045 case CRYPTO_BUILTIN_VPMSUM:
18046 case MISC_BUILTIN_ADDG6S:
18047 case MISC_BUILTIN_DIVWEU:
18048 case MISC_BUILTIN_DIVWEUO:
18049 case MISC_BUILTIN_DIVDEU:
18050 case MISC_BUILTIN_DIVDEUO:
18051 case VSX_BUILTIN_UDIV_V2DI:
18052 case ALTIVEC_BUILTIN_VMAXUB:
18053 case ALTIVEC_BUILTIN_VMINUB:
18054 case ALTIVEC_BUILTIN_VMAXUH:
18055 case ALTIVEC_BUILTIN_VMINUH:
18056 case ALTIVEC_BUILTIN_VMAXUW:
18057 case ALTIVEC_BUILTIN_VMINUW:
18058 case P8V_BUILTIN_VMAXUD:
18059 case P8V_BUILTIN_VMINUD:
18060 h.uns_p[0] = 1;
18061 h.uns_p[1] = 1;
18062 h.uns_p[2] = 1;
18063 break;
18065 /* unsigned 3 argument functions. */
18066 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
18067 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
18068 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
18069 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
18070 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
18071 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
18072 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
18073 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
18074 case VSX_BUILTIN_VPERM_16QI_UNS:
18075 case VSX_BUILTIN_VPERM_8HI_UNS:
18076 case VSX_BUILTIN_VPERM_4SI_UNS:
18077 case VSX_BUILTIN_VPERM_2DI_UNS:
18078 case VSX_BUILTIN_XXSEL_16QI_UNS:
18079 case VSX_BUILTIN_XXSEL_8HI_UNS:
18080 case VSX_BUILTIN_XXSEL_4SI_UNS:
18081 case VSX_BUILTIN_XXSEL_2DI_UNS:
18082 case CRYPTO_BUILTIN_VPERMXOR:
18083 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
18084 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
18085 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
18086 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
18087 case CRYPTO_BUILTIN_VSHASIGMAW:
18088 case CRYPTO_BUILTIN_VSHASIGMAD:
18089 case CRYPTO_BUILTIN_VSHASIGMA:
18090 h.uns_p[0] = 1;
18091 h.uns_p[1] = 1;
18092 h.uns_p[2] = 1;
18093 h.uns_p[3] = 1;
18094 break;
18096 /* signed permute functions with unsigned char mask. */
18097 case ALTIVEC_BUILTIN_VPERM_16QI:
18098 case ALTIVEC_BUILTIN_VPERM_8HI:
18099 case ALTIVEC_BUILTIN_VPERM_4SI:
18100 case ALTIVEC_BUILTIN_VPERM_4SF:
18101 case ALTIVEC_BUILTIN_VPERM_2DI:
18102 case ALTIVEC_BUILTIN_VPERM_2DF:
18103 case VSX_BUILTIN_VPERM_16QI:
18104 case VSX_BUILTIN_VPERM_8HI:
18105 case VSX_BUILTIN_VPERM_4SI:
18106 case VSX_BUILTIN_VPERM_4SF:
18107 case VSX_BUILTIN_VPERM_2DI:
18108 case VSX_BUILTIN_VPERM_2DF:
18109 h.uns_p[3] = 1;
18110 break;
18112 /* unsigned args, signed return. */
18113 case VSX_BUILTIN_XVCVUXDSP:
18114 case VSX_BUILTIN_XVCVUXDDP_UNS:
18115 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
18116 h.uns_p[1] = 1;
18117 break;
18119 /* signed args, unsigned return. */
18120 case VSX_BUILTIN_XVCVDPUXDS_UNS:
18121 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
18122 case MISC_BUILTIN_UNPACK_TD:
18123 case MISC_BUILTIN_UNPACK_V1TI:
18124 h.uns_p[0] = 1;
18125 break;
18127 /* unsigned arguments, bool return (compares). */
18128 case ALTIVEC_BUILTIN_VCMPEQUB:
18129 case ALTIVEC_BUILTIN_VCMPEQUH:
18130 case ALTIVEC_BUILTIN_VCMPEQUW:
18131 case P8V_BUILTIN_VCMPEQUD:
18132 case VSX_BUILTIN_CMPGE_U16QI:
18133 case VSX_BUILTIN_CMPGE_U8HI:
18134 case VSX_BUILTIN_CMPGE_U4SI:
18135 case VSX_BUILTIN_CMPGE_U2DI:
18136 case ALTIVEC_BUILTIN_VCMPGTUB:
18137 case ALTIVEC_BUILTIN_VCMPGTUH:
18138 case ALTIVEC_BUILTIN_VCMPGTUW:
18139 case P8V_BUILTIN_VCMPGTUD:
18140 h.uns_p[1] = 1;
18141 h.uns_p[2] = 1;
18142 break;
18144 /* unsigned arguments for 128-bit pack instructions. */
18145 case MISC_BUILTIN_PACK_TD:
18146 case MISC_BUILTIN_PACK_V1TI:
18147 h.uns_p[1] = 1;
18148 h.uns_p[2] = 1;
18149 break;
18151 /* unsigned second arguments (vector shift right). */
18152 case ALTIVEC_BUILTIN_VSRB:
18153 case ALTIVEC_BUILTIN_VSRH:
18154 case ALTIVEC_BUILTIN_VSRW:
18155 case P8V_BUILTIN_VSRD:
18156 h.uns_p[2] = 1;
18157 break;
18159 default:
18160 break;
18163 /* Figure out how many args are present. */
18164 while (num_args > 0 && h.mode[num_args] == VOIDmode)
18165 num_args--;
18167 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
18168 if (!ret_type && h.uns_p[0])
18169 ret_type = builtin_mode_to_type[h.mode[0]][0];
18171 if (!ret_type)
18172 fatal_error (input_location,
18173 "internal error: builtin function %qs had an unexpected "
18174 "return type %qs", name, GET_MODE_NAME (h.mode[0]));
18176 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
18177 arg_type[i] = NULL_TREE;
18179 for (i = 0; i < num_args; i++)
18181 int m = (int) h.mode[i+1];
18182 int uns_p = h.uns_p[i+1];
18184 arg_type[i] = builtin_mode_to_type[m][uns_p];
18185 if (!arg_type[i] && uns_p)
18186 arg_type[i] = builtin_mode_to_type[m][0];
18188 if (!arg_type[i])
18189 fatal_error (input_location,
18190 "internal error: builtin function %qs, argument %d "
18191 "had unexpected argument type %qs", name, i,
18192 GET_MODE_NAME (m));
18195 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
18196 if (*found == NULL)
18198 h2 = ggc_alloc<builtin_hash_struct> ();
18199 *h2 = h;
18200 *found = h2;
18202 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
18203 arg_type[2], NULL_TREE);
18206 return (*found)->type;
18209 static void
18210 rs6000_common_init_builtins (void)
18212 const struct builtin_description *d;
18213 size_t i;
18215 tree opaque_ftype_opaque = NULL_TREE;
18216 tree opaque_ftype_opaque_opaque = NULL_TREE;
18217 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
18218 tree v2si_ftype = NULL_TREE;
18219 tree v2si_ftype_qi = NULL_TREE;
18220 tree v2si_ftype_v2si_qi = NULL_TREE;
18221 tree v2si_ftype_int_qi = NULL_TREE;
18222 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
18224 if (!TARGET_PAIRED_FLOAT)
18226 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
18227 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
18230 /* Paired builtins are only available if you build a compiler with the
18231 appropriate options, so only create those builtins with the appropriate
18232 compiler option. Create Altivec and VSX builtins on machines with at
18233 least the general purpose extensions (970 and newer) to allow the use of
18234 the target attribute.. */
18236 if (TARGET_EXTRA_BUILTINS)
18237 builtin_mask |= RS6000_BTM_COMMON;
18239 /* Add the ternary operators. */
18240 d = bdesc_3arg;
18241 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
18243 tree type;
18244 HOST_WIDE_INT mask = d->mask;
18246 if ((mask & builtin_mask) != mask)
18248 if (TARGET_DEBUG_BUILTIN)
18249 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
18250 continue;
18253 if (rs6000_overloaded_builtin_p (d->code))
18255 if (! (type = opaque_ftype_opaque_opaque_opaque))
18256 type = opaque_ftype_opaque_opaque_opaque
18257 = build_function_type_list (opaque_V4SI_type_node,
18258 opaque_V4SI_type_node,
18259 opaque_V4SI_type_node,
18260 opaque_V4SI_type_node,
18261 NULL_TREE);
18263 else
18265 enum insn_code icode = d->icode;
18266 if (d->name == 0)
18268 if (TARGET_DEBUG_BUILTIN)
18269 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
18270 (long unsigned)i);
18272 continue;
18275 if (icode == CODE_FOR_nothing)
18277 if (TARGET_DEBUG_BUILTIN)
18278 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
18279 d->name);
18281 continue;
18284 type = builtin_function_type (insn_data[icode].operand[0].mode,
18285 insn_data[icode].operand[1].mode,
18286 insn_data[icode].operand[2].mode,
18287 insn_data[icode].operand[3].mode,
18288 d->code, d->name);
18291 def_builtin (d->name, type, d->code);
18294 /* Add the binary operators. */
18295 d = bdesc_2arg;
18296 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18298 machine_mode mode0, mode1, mode2;
18299 tree type;
18300 HOST_WIDE_INT mask = d->mask;
18302 if ((mask & builtin_mask) != mask)
18304 if (TARGET_DEBUG_BUILTIN)
18305 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
18306 continue;
18309 if (rs6000_overloaded_builtin_p (d->code))
18311 if (! (type = opaque_ftype_opaque_opaque))
18312 type = opaque_ftype_opaque_opaque
18313 = build_function_type_list (opaque_V4SI_type_node,
18314 opaque_V4SI_type_node,
18315 opaque_V4SI_type_node,
18316 NULL_TREE);
18318 else
18320 enum insn_code icode = d->icode;
18321 if (d->name == 0)
18323 if (TARGET_DEBUG_BUILTIN)
18324 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
18325 (long unsigned)i);
18327 continue;
18330 if (icode == CODE_FOR_nothing)
18332 if (TARGET_DEBUG_BUILTIN)
18333 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
18334 d->name);
18336 continue;
18339 mode0 = insn_data[icode].operand[0].mode;
18340 mode1 = insn_data[icode].operand[1].mode;
18341 mode2 = insn_data[icode].operand[2].mode;
18343 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
18345 if (! (type = v2si_ftype_v2si_qi))
18346 type = v2si_ftype_v2si_qi
18347 = build_function_type_list (opaque_V2SI_type_node,
18348 opaque_V2SI_type_node,
18349 char_type_node,
18350 NULL_TREE);
18353 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
18354 && mode2 == QImode)
18356 if (! (type = v2si_ftype_int_qi))
18357 type = v2si_ftype_int_qi
18358 = build_function_type_list (opaque_V2SI_type_node,
18359 integer_type_node,
18360 char_type_node,
18361 NULL_TREE);
18364 else
18365 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
18366 d->code, d->name);
18369 def_builtin (d->name, type, d->code);
18372 /* Add the simple unary operators. */
18373 d = bdesc_1arg;
18374 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18376 machine_mode mode0, mode1;
18377 tree type;
18378 HOST_WIDE_INT mask = d->mask;
18380 if ((mask & builtin_mask) != mask)
18382 if (TARGET_DEBUG_BUILTIN)
18383 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
18384 continue;
18387 if (rs6000_overloaded_builtin_p (d->code))
18389 if (! (type = opaque_ftype_opaque))
18390 type = opaque_ftype_opaque
18391 = build_function_type_list (opaque_V4SI_type_node,
18392 opaque_V4SI_type_node,
18393 NULL_TREE);
18395 else
18397 enum insn_code icode = d->icode;
18398 if (d->name == 0)
18400 if (TARGET_DEBUG_BUILTIN)
18401 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
18402 (long unsigned)i);
18404 continue;
18407 if (icode == CODE_FOR_nothing)
18409 if (TARGET_DEBUG_BUILTIN)
18410 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
18411 d->name);
18413 continue;
18416 mode0 = insn_data[icode].operand[0].mode;
18417 mode1 = insn_data[icode].operand[1].mode;
18419 if (mode0 == V2SImode && mode1 == QImode)
18421 if (! (type = v2si_ftype_qi))
18422 type = v2si_ftype_qi
18423 = build_function_type_list (opaque_V2SI_type_node,
18424 char_type_node,
18425 NULL_TREE);
18428 else
18429 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
18430 d->code, d->name);
18433 def_builtin (d->name, type, d->code);
18436 /* Add the simple no-argument operators. */
18437 d = bdesc_0arg;
18438 for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
18440 machine_mode mode0;
18441 tree type;
18442 HOST_WIDE_INT mask = d->mask;
18444 if ((mask & builtin_mask) != mask)
18446 if (TARGET_DEBUG_BUILTIN)
18447 fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name);
18448 continue;
18450 if (rs6000_overloaded_builtin_p (d->code))
18452 if (!opaque_ftype_opaque)
18453 opaque_ftype_opaque
18454 = build_function_type_list (opaque_V4SI_type_node, NULL_TREE);
18455 type = opaque_ftype_opaque;
18457 else
18459 enum insn_code icode = d->icode;
18460 if (d->name == 0)
18462 if (TARGET_DEBUG_BUILTIN)
18463 fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n",
18464 (long unsigned) i);
18465 continue;
18467 if (icode == CODE_FOR_nothing)
18469 if (TARGET_DEBUG_BUILTIN)
18470 fprintf (stderr,
18471 "rs6000_builtin, skip no-argument %s (no code)\n",
18472 d->name);
18473 continue;
18475 mode0 = insn_data[icode].operand[0].mode;
18476 if (mode0 == V2SImode)
18478 /* code for paired single */
18479 if (! (type = v2si_ftype))
18481 v2si_ftype
18482 = build_function_type_list (opaque_V2SI_type_node,
18483 NULL_TREE);
18484 type = v2si_ftype;
18487 else
18488 type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode,
18489 d->code, d->name);
18491 def_builtin (d->name, type, d->code);
18495 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
18496 static void
18497 init_float128_ibm (machine_mode mode)
18499 if (!TARGET_XL_COMPAT)
18501 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
18502 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
18503 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
18504 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
18506 if (!TARGET_HARD_FLOAT)
18508 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
18509 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
18510 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
18511 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
18512 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
18513 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
18514 set_optab_libfunc (le_optab, mode, "__gcc_qle");
18515 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
18517 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
18518 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
18519 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
18520 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
18521 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
18522 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
18523 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
18524 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
18527 else
18529 set_optab_libfunc (add_optab, mode, "_xlqadd");
18530 set_optab_libfunc (sub_optab, mode, "_xlqsub");
18531 set_optab_libfunc (smul_optab, mode, "_xlqmul");
18532 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
18535 /* Add various conversions for IFmode to use the traditional TFmode
18536 names. */
18537 if (mode == IFmode)
18539 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2");
18540 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2");
18541 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2");
18542 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2");
18543 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2");
18544 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2");
18546 if (TARGET_POWERPC64)
18548 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
18549 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
18550 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
18551 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
18556 /* Set up IEEE 128-bit floating point routines. Use different names if the
18557 arguments can be passed in a vector register. The historical PowerPC
18558 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
18559 continue to use that if we aren't using vector registers to pass IEEE
18560 128-bit floating point. */
18562 static void
18563 init_float128_ieee (machine_mode mode)
18565 if (FLOAT128_VECTOR_P (mode))
18567 set_optab_libfunc (add_optab, mode, "__addkf3");
18568 set_optab_libfunc (sub_optab, mode, "__subkf3");
18569 set_optab_libfunc (neg_optab, mode, "__negkf2");
18570 set_optab_libfunc (smul_optab, mode, "__mulkf3");
18571 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
18572 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
18573 set_optab_libfunc (abs_optab, mode, "__abstkf2");
18575 set_optab_libfunc (eq_optab, mode, "__eqkf2");
18576 set_optab_libfunc (ne_optab, mode, "__nekf2");
18577 set_optab_libfunc (gt_optab, mode, "__gtkf2");
18578 set_optab_libfunc (ge_optab, mode, "__gekf2");
18579 set_optab_libfunc (lt_optab, mode, "__ltkf2");
18580 set_optab_libfunc (le_optab, mode, "__lekf2");
18581 set_optab_libfunc (unord_optab, mode, "__unordkf2");
18583 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
18584 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
18585 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
18586 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
18588 set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2");
18589 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18590 set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2");
18592 set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2");
18593 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
18594 set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2");
18596 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2");
18597 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2");
18598 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2");
18599 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2");
18600 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2");
18601 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2");
18603 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
18604 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
18605 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
18606 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
18608 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
18609 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
18610 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
18611 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
18613 if (TARGET_POWERPC64)
18615 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti");
18616 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti");
18617 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf");
18618 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf");
18622 else
18624 set_optab_libfunc (add_optab, mode, "_q_add");
18625 set_optab_libfunc (sub_optab, mode, "_q_sub");
18626 set_optab_libfunc (neg_optab, mode, "_q_neg");
18627 set_optab_libfunc (smul_optab, mode, "_q_mul");
18628 set_optab_libfunc (sdiv_optab, mode, "_q_div");
18629 if (TARGET_PPC_GPOPT)
18630 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
18632 set_optab_libfunc (eq_optab, mode, "_q_feq");
18633 set_optab_libfunc (ne_optab, mode, "_q_fne");
18634 set_optab_libfunc (gt_optab, mode, "_q_fgt");
18635 set_optab_libfunc (ge_optab, mode, "_q_fge");
18636 set_optab_libfunc (lt_optab, mode, "_q_flt");
18637 set_optab_libfunc (le_optab, mode, "_q_fle");
18639 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
18640 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
18641 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
18642 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
18643 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
18644 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
18645 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
18646 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
18650 static void
18651 rs6000_init_libfuncs (void)
18653 /* __float128 support. */
18654 if (TARGET_FLOAT128_TYPE)
18656 init_float128_ibm (IFmode);
18657 init_float128_ieee (KFmode);
18660 /* AIX/Darwin/64-bit Linux quad floating point routines. */
18661 if (TARGET_LONG_DOUBLE_128)
18663 if (!TARGET_IEEEQUAD)
18664 init_float128_ibm (TFmode);
18666 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
18667 else
18668 init_float128_ieee (TFmode);
18672 /* Emit a potentially record-form instruction, setting DST from SRC.
18673 If DOT is 0, that is all; otherwise, set CCREG to the result of the
18674 signed comparison of DST with zero. If DOT is 1, the generated RTL
18675 doesn't care about the DST result; if DOT is 2, it does. If CCREG
18676 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
18677 a separate COMPARE. */
18679 void
18680 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
18682 if (dot == 0)
18684 emit_move_insn (dst, src);
18685 return;
18688 if (cc_reg_not_cr0_operand (ccreg, CCmode))
18690 emit_move_insn (dst, src);
18691 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
18692 return;
18695 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
18696 if (dot == 1)
18698 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
18699 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
18701 else
18703 rtx set = gen_rtx_SET (dst, src);
18704 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
18709 /* A validation routine: say whether CODE, a condition code, and MODE
18710 match. The other alternatives either don't make sense or should
18711 never be generated. */
18713 void
18714 validate_condition_mode (enum rtx_code code, machine_mode mode)
18716 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
18717 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
18718 && GET_MODE_CLASS (mode) == MODE_CC);
18720 /* These don't make sense. */
18721 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
18722 || mode != CCUNSmode);
18724 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
18725 || mode == CCUNSmode);
18727 gcc_assert (mode == CCFPmode
18728 || (code != ORDERED && code != UNORDERED
18729 && code != UNEQ && code != LTGT
18730 && code != UNGT && code != UNLT
18731 && code != UNGE && code != UNLE));
18733 /* These should never be generated except for
18734 flag_finite_math_only. */
18735 gcc_assert (mode != CCFPmode
18736 || flag_finite_math_only
18737 || (code != LE && code != GE
18738 && code != UNEQ && code != LTGT
18739 && code != UNGT && code != UNLT));
18741 /* These are invalid; the information is not there. */
18742 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
18746 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
18747 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
18748 not zero, store there the bit offset (counted from the right) where
18749 the single stretch of 1 bits begins; and similarly for B, the bit
18750 offset where it ends. */
18752 bool
18753 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
18755 unsigned HOST_WIDE_INT val = INTVAL (mask);
18756 unsigned HOST_WIDE_INT bit;
18757 int nb, ne;
18758 int n = GET_MODE_PRECISION (mode);
18760 if (mode != DImode && mode != SImode)
18761 return false;
18763 if (INTVAL (mask) >= 0)
18765 bit = val & -val;
18766 ne = exact_log2 (bit);
18767 nb = exact_log2 (val + bit);
18769 else if (val + 1 == 0)
18771 nb = n;
18772 ne = 0;
18774 else if (val & 1)
18776 val = ~val;
18777 bit = val & -val;
18778 nb = exact_log2 (bit);
18779 ne = exact_log2 (val + bit);
18781 else
18783 bit = val & -val;
18784 ne = exact_log2 (bit);
18785 if (val + bit == 0)
18786 nb = n;
18787 else
18788 nb = 0;
18791 nb--;
18793 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
18794 return false;
18796 if (b)
18797 *b = nb;
18798 if (e)
18799 *e = ne;
18801 return true;
18804 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
18805 or rldicr instruction, to implement an AND with it in mode MODE. */
18807 bool
18808 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
18810 int nb, ne;
18812 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18813 return false;
18815 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
18816 does not wrap. */
18817 if (mode == DImode)
18818 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
18820 /* For SImode, rlwinm can do everything. */
18821 if (mode == SImode)
18822 return (nb < 32 && ne < 32);
18824 return false;
18827 /* Return the instruction template for an AND with mask in mode MODE, with
18828 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18830 const char *
18831 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
18833 int nb, ne;
18835 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
18836 gcc_unreachable ();
18838 if (mode == DImode && ne == 0)
18840 operands[3] = GEN_INT (63 - nb);
18841 if (dot)
18842 return "rldicl. %0,%1,0,%3";
18843 return "rldicl %0,%1,0,%3";
18846 if (mode == DImode && nb == 63)
18848 operands[3] = GEN_INT (63 - ne);
18849 if (dot)
18850 return "rldicr. %0,%1,0,%3";
18851 return "rldicr %0,%1,0,%3";
18854 if (nb < 32 && ne < 32)
18856 operands[3] = GEN_INT (31 - nb);
18857 operands[4] = GEN_INT (31 - ne);
18858 if (dot)
18859 return "rlwinm. %0,%1,0,%3,%4";
18860 return "rlwinm %0,%1,0,%3,%4";
18863 gcc_unreachable ();
18866 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
18867 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
18868 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
18870 bool
18871 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
18873 int nb, ne;
18875 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
18876 return false;
18878 int n = GET_MODE_PRECISION (mode);
18879 int sh = -1;
18881 if (CONST_INT_P (XEXP (shift, 1)))
18883 sh = INTVAL (XEXP (shift, 1));
18884 if (sh < 0 || sh >= n)
18885 return false;
18888 rtx_code code = GET_CODE (shift);
18890 /* Convert any shift by 0 to a rotate, to simplify below code. */
18891 if (sh == 0)
18892 code = ROTATE;
18894 /* Convert rotate to simple shift if we can, to make analysis simpler. */
18895 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
18896 code = ASHIFT;
18897 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
18899 code = LSHIFTRT;
18900 sh = n - sh;
18903 /* DImode rotates need rld*. */
18904 if (mode == DImode && code == ROTATE)
18905 return (nb == 63 || ne == 0 || ne == sh);
18907 /* SImode rotates need rlw*. */
18908 if (mode == SImode && code == ROTATE)
18909 return (nb < 32 && ne < 32 && sh < 32);
18911 /* Wrap-around masks are only okay for rotates. */
18912 if (ne > nb)
18913 return false;
18915 /* Variable shifts are only okay for rotates. */
18916 if (sh < 0)
18917 return false;
18919 /* Don't allow ASHIFT if the mask is wrong for that. */
18920 if (code == ASHIFT && ne < sh)
18921 return false;
18923 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
18924 if the mask is wrong for that. */
18925 if (nb < 32 && ne < 32 && sh < 32
18926 && !(code == LSHIFTRT && nb >= 32 - sh))
18927 return true;
18929 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
18930 if the mask is wrong for that. */
18931 if (code == LSHIFTRT)
18932 sh = 64 - sh;
18933 if (nb == 63 || ne == 0 || ne == sh)
18934 return !(code == LSHIFTRT && nb >= sh);
18936 return false;
18939 /* Return the instruction template for a shift with mask in mode MODE, with
18940 operands OPERANDS. If DOT is true, make it a record-form instruction. */
18942 const char *
18943 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
18945 int nb, ne;
18947 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
18948 gcc_unreachable ();
18950 if (mode == DImode && ne == 0)
18952 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18953 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
18954 operands[3] = GEN_INT (63 - nb);
18955 if (dot)
18956 return "rld%I2cl. %0,%1,%2,%3";
18957 return "rld%I2cl %0,%1,%2,%3";
18960 if (mode == DImode && nb == 63)
18962 operands[3] = GEN_INT (63 - ne);
18963 if (dot)
18964 return "rld%I2cr. %0,%1,%2,%3";
18965 return "rld%I2cr %0,%1,%2,%3";
18968 if (mode == DImode
18969 && GET_CODE (operands[4]) != LSHIFTRT
18970 && CONST_INT_P (operands[2])
18971 && ne == INTVAL (operands[2]))
18973 operands[3] = GEN_INT (63 - nb);
18974 if (dot)
18975 return "rld%I2c. %0,%1,%2,%3";
18976 return "rld%I2c %0,%1,%2,%3";
18979 if (nb < 32 && ne < 32)
18981 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
18982 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
18983 operands[3] = GEN_INT (31 - nb);
18984 operands[4] = GEN_INT (31 - ne);
18985 /* This insn can also be a 64-bit rotate with mask that really makes
18986 it just a shift right (with mask); the %h below are to adjust for
18987 that situation (shift count is >= 32 in that case). */
18988 if (dot)
18989 return "rlw%I2nm. %0,%1,%h2,%3,%4";
18990 return "rlw%I2nm %0,%1,%h2,%3,%4";
18993 gcc_unreachable ();
18996 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
18997 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
18998 ASHIFT, or LSHIFTRT) in mode MODE. */
19000 bool
19001 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
19003 int nb, ne;
19005 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
19006 return false;
19008 int n = GET_MODE_PRECISION (mode);
19010 int sh = INTVAL (XEXP (shift, 1));
19011 if (sh < 0 || sh >= n)
19012 return false;
19014 rtx_code code = GET_CODE (shift);
19016 /* Convert any shift by 0 to a rotate, to simplify below code. */
19017 if (sh == 0)
19018 code = ROTATE;
19020 /* Convert rotate to simple shift if we can, to make analysis simpler. */
19021 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
19022 code = ASHIFT;
19023 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
19025 code = LSHIFTRT;
19026 sh = n - sh;
19029 /* DImode rotates need rldimi. */
19030 if (mode == DImode && code == ROTATE)
19031 return (ne == sh);
19033 /* SImode rotates need rlwimi. */
19034 if (mode == SImode && code == ROTATE)
19035 return (nb < 32 && ne < 32 && sh < 32);
19037 /* Wrap-around masks are only okay for rotates. */
19038 if (ne > nb)
19039 return false;
19041 /* Don't allow ASHIFT if the mask is wrong for that. */
19042 if (code == ASHIFT && ne < sh)
19043 return false;
19045 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
19046 if the mask is wrong for that. */
19047 if (nb < 32 && ne < 32 && sh < 32
19048 && !(code == LSHIFTRT && nb >= 32 - sh))
19049 return true;
19051 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
19052 if the mask is wrong for that. */
19053 if (code == LSHIFTRT)
19054 sh = 64 - sh;
19055 if (ne == sh)
19056 return !(code == LSHIFTRT && nb >= sh);
19058 return false;
19061 /* Return the instruction template for an insert with mask in mode MODE, with
19062 operands OPERANDS. If DOT is true, make it a record-form instruction. */
19064 const char *
19065 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
19067 int nb, ne;
19069 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
19070 gcc_unreachable ();
19072 /* Prefer rldimi because rlwimi is cracked. */
19073 if (TARGET_POWERPC64
19074 && (!dot || mode == DImode)
19075 && GET_CODE (operands[4]) != LSHIFTRT
19076 && ne == INTVAL (operands[2]))
19078 operands[3] = GEN_INT (63 - nb);
19079 if (dot)
19080 return "rldimi. %0,%1,%2,%3";
19081 return "rldimi %0,%1,%2,%3";
19084 if (nb < 32 && ne < 32)
19086 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
19087 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
19088 operands[3] = GEN_INT (31 - nb);
19089 operands[4] = GEN_INT (31 - ne);
19090 if (dot)
19091 return "rlwimi. %0,%1,%2,%3,%4";
19092 return "rlwimi %0,%1,%2,%3,%4";
19095 gcc_unreachable ();
19098 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
19099 using two machine instructions. */
19101 bool
19102 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
19104 /* There are two kinds of AND we can handle with two insns:
19105 1) those we can do with two rl* insn;
19106 2) ori[s];xori[s].
19108 We do not handle that last case yet. */
19110 /* If there is just one stretch of ones, we can do it. */
19111 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
19112 return true;
19114 /* Otherwise, fill in the lowest "hole"; if we can do the result with
19115 one insn, we can do the whole thing with two. */
19116 unsigned HOST_WIDE_INT val = INTVAL (c);
19117 unsigned HOST_WIDE_INT bit1 = val & -val;
19118 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19119 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19120 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19121 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
19124 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
19125 If EXPAND is true, split rotate-and-mask instructions we generate to
19126 their constituent parts as well (this is used during expand); if DOT
19127 is 1, make the last insn a record-form instruction clobbering the
19128 destination GPR and setting the CC reg (from operands[3]); if 2, set
19129 that GPR as well as the CC reg. */
19131 void
19132 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
19134 gcc_assert (!(expand && dot));
19136 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
19138 /* If it is one stretch of ones, it is DImode; shift left, mask, then
19139 shift right. This generates better code than doing the masks without
19140 shifts, or shifting first right and then left. */
19141 int nb, ne;
19142 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
19144 gcc_assert (mode == DImode);
19146 int shift = 63 - nb;
19147 if (expand)
19149 rtx tmp1 = gen_reg_rtx (DImode);
19150 rtx tmp2 = gen_reg_rtx (DImode);
19151 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
19152 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
19153 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
19155 else
19157 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
19158 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
19159 emit_move_insn (operands[0], tmp);
19160 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
19161 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19163 return;
19166 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
19167 that does the rest. */
19168 unsigned HOST_WIDE_INT bit1 = val & -val;
19169 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
19170 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
19171 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
19173 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
19174 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
19176 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
19178 /* Two "no-rotate"-and-mask instructions, for SImode. */
19179 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
19181 gcc_assert (mode == SImode);
19183 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19184 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
19185 emit_move_insn (reg, tmp);
19186 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19187 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19188 return;
19191 gcc_assert (mode == DImode);
19193 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
19194 insns; we have to do the first in SImode, because it wraps. */
19195 if (mask2 <= 0xffffffff
19196 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
19198 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
19199 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
19200 GEN_INT (mask1));
19201 rtx reg_low = gen_lowpart (SImode, reg);
19202 emit_move_insn (reg_low, tmp);
19203 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
19204 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19205 return;
19208 /* Two rld* insns: rotate, clear the hole in the middle (which now is
19209 at the top end), rotate back and clear the other hole. */
19210 int right = exact_log2 (bit3);
19211 int left = 64 - right;
19213 /* Rotate the mask too. */
19214 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
19216 if (expand)
19218 rtx tmp1 = gen_reg_rtx (DImode);
19219 rtx tmp2 = gen_reg_rtx (DImode);
19220 rtx tmp3 = gen_reg_rtx (DImode);
19221 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
19222 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
19223 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
19224 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
19226 else
19228 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
19229 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
19230 emit_move_insn (operands[0], tmp);
19231 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
19232 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
19233 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
19237 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
19238 for lfq and stfq insns iff the registers are hard registers. */
19241 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
19243 /* We might have been passed a SUBREG. */
19244 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
19245 return 0;
19247 /* We might have been passed non floating point registers. */
19248 if (!FP_REGNO_P (REGNO (reg1))
19249 || !FP_REGNO_P (REGNO (reg2)))
19250 return 0;
19252 return (REGNO (reg1) == REGNO (reg2) - 1);
19255 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
19256 addr1 and addr2 must be in consecutive memory locations
19257 (addr2 == addr1 + 8). */
19260 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
19262 rtx addr1, addr2;
19263 unsigned int reg1, reg2;
19264 int offset1, offset2;
19266 /* The mems cannot be volatile. */
19267 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
19268 return 0;
19270 addr1 = XEXP (mem1, 0);
19271 addr2 = XEXP (mem2, 0);
19273 /* Extract an offset (if used) from the first addr. */
19274 if (GET_CODE (addr1) == PLUS)
19276 /* If not a REG, return zero. */
19277 if (GET_CODE (XEXP (addr1, 0)) != REG)
19278 return 0;
19279 else
19281 reg1 = REGNO (XEXP (addr1, 0));
19282 /* The offset must be constant! */
19283 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
19284 return 0;
19285 offset1 = INTVAL (XEXP (addr1, 1));
19288 else if (GET_CODE (addr1) != REG)
19289 return 0;
19290 else
19292 reg1 = REGNO (addr1);
19293 /* This was a simple (mem (reg)) expression. Offset is 0. */
19294 offset1 = 0;
19297 /* And now for the second addr. */
19298 if (GET_CODE (addr2) == PLUS)
19300 /* If not a REG, return zero. */
19301 if (GET_CODE (XEXP (addr2, 0)) != REG)
19302 return 0;
19303 else
19305 reg2 = REGNO (XEXP (addr2, 0));
19306 /* The offset must be constant. */
19307 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
19308 return 0;
19309 offset2 = INTVAL (XEXP (addr2, 1));
19312 else if (GET_CODE (addr2) != REG)
19313 return 0;
19314 else
19316 reg2 = REGNO (addr2);
19317 /* This was a simple (mem (reg)) expression. Offset is 0. */
19318 offset2 = 0;
19321 /* Both of these must have the same base register. */
19322 if (reg1 != reg2)
19323 return 0;
19325 /* The offset for the second addr must be 8 more than the first addr. */
19326 if (offset2 != offset1 + 8)
19327 return 0;
19329 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
19330 instructions. */
19331 return 1;
19334 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
19335 need to use DDmode, in all other cases we can use the same mode. */
19336 static machine_mode
19337 rs6000_secondary_memory_needed_mode (machine_mode mode)
19339 if (lra_in_progress && mode == SDmode)
19340 return DDmode;
19341 return mode;
19344 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
19345 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
19346 only work on the traditional altivec registers, note if an altivec register
19347 was chosen. */
19349 static enum rs6000_reg_type
19350 register_to_reg_type (rtx reg, bool *is_altivec)
19352 HOST_WIDE_INT regno;
19353 enum reg_class rclass;
19355 if (GET_CODE (reg) == SUBREG)
19356 reg = SUBREG_REG (reg);
19358 if (!REG_P (reg))
19359 return NO_REG_TYPE;
19361 regno = REGNO (reg);
19362 if (regno >= FIRST_PSEUDO_REGISTER)
19364 if (!lra_in_progress && !reload_completed)
19365 return PSEUDO_REG_TYPE;
19367 regno = true_regnum (reg);
19368 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
19369 return PSEUDO_REG_TYPE;
19372 gcc_assert (regno >= 0);
19374 if (is_altivec && ALTIVEC_REGNO_P (regno))
19375 *is_altivec = true;
19377 rclass = rs6000_regno_regclass[regno];
19378 return reg_class_to_reg_type[(int)rclass];
19381 /* Helper function to return the cost of adding a TOC entry address. */
19383 static inline int
19384 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
19386 int ret;
19388 if (TARGET_CMODEL != CMODEL_SMALL)
19389 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
19391 else
19392 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
19394 return ret;
19397 /* Helper function for rs6000_secondary_reload to determine whether the memory
19398 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
19399 needs reloading. Return negative if the memory is not handled by the memory
19400 helper functions and to try a different reload method, 0 if no additional
19401 instructions are need, and positive to give the extra cost for the
19402 memory. */
19404 static int
19405 rs6000_secondary_reload_memory (rtx addr,
19406 enum reg_class rclass,
19407 machine_mode mode)
19409 int extra_cost = 0;
19410 rtx reg, and_arg, plus_arg0, plus_arg1;
19411 addr_mask_type addr_mask;
19412 const char *type = NULL;
19413 const char *fail_msg = NULL;
19415 if (GPR_REG_CLASS_P (rclass))
19416 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
19418 else if (rclass == FLOAT_REGS)
19419 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
19421 else if (rclass == ALTIVEC_REGS)
19422 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
19424 /* For the combined VSX_REGS, turn off Altivec AND -16. */
19425 else if (rclass == VSX_REGS)
19426 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
19427 & ~RELOAD_REG_AND_M16);
19429 /* If the register allocator hasn't made up its mind yet on the register
19430 class to use, settle on defaults to use. */
19431 else if (rclass == NO_REGS)
19433 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
19434 & ~RELOAD_REG_AND_M16);
19436 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
19437 addr_mask &= ~(RELOAD_REG_INDEXED
19438 | RELOAD_REG_PRE_INCDEC
19439 | RELOAD_REG_PRE_MODIFY);
19442 else
19443 addr_mask = 0;
19445 /* If the register isn't valid in this register class, just return now. */
19446 if ((addr_mask & RELOAD_REG_VALID) == 0)
19448 if (TARGET_DEBUG_ADDR)
19450 fprintf (stderr,
19451 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19452 "not valid in class\n",
19453 GET_MODE_NAME (mode), reg_class_names[rclass]);
19454 debug_rtx (addr);
19457 return -1;
19460 switch (GET_CODE (addr))
19462 /* Does the register class supports auto update forms for this mode? We
19463 don't need a scratch register, since the powerpc only supports
19464 PRE_INC, PRE_DEC, and PRE_MODIFY. */
19465 case PRE_INC:
19466 case PRE_DEC:
19467 reg = XEXP (addr, 0);
19468 if (!base_reg_operand (addr, GET_MODE (reg)))
19470 fail_msg = "no base register #1";
19471 extra_cost = -1;
19474 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
19476 extra_cost = 1;
19477 type = "update";
19479 break;
19481 case PRE_MODIFY:
19482 reg = XEXP (addr, 0);
19483 plus_arg1 = XEXP (addr, 1);
19484 if (!base_reg_operand (reg, GET_MODE (reg))
19485 || GET_CODE (plus_arg1) != PLUS
19486 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
19488 fail_msg = "bad PRE_MODIFY";
19489 extra_cost = -1;
19492 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
19494 extra_cost = 1;
19495 type = "update";
19497 break;
19499 /* Do we need to simulate AND -16 to clear the bottom address bits used
19500 in VMX load/stores? Only allow the AND for vector sizes. */
19501 case AND:
19502 and_arg = XEXP (addr, 0);
19503 if (GET_MODE_SIZE (mode) != 16
19504 || GET_CODE (XEXP (addr, 1)) != CONST_INT
19505 || INTVAL (XEXP (addr, 1)) != -16)
19507 fail_msg = "bad Altivec AND #1";
19508 extra_cost = -1;
19511 if (rclass != ALTIVEC_REGS)
19513 if (legitimate_indirect_address_p (and_arg, false))
19514 extra_cost = 1;
19516 else if (legitimate_indexed_address_p (and_arg, false))
19517 extra_cost = 2;
19519 else
19521 fail_msg = "bad Altivec AND #2";
19522 extra_cost = -1;
19525 type = "and";
19527 break;
19529 /* If this is an indirect address, make sure it is a base register. */
19530 case REG:
19531 case SUBREG:
19532 if (!legitimate_indirect_address_p (addr, false))
19534 extra_cost = 1;
19535 type = "move";
19537 break;
19539 /* If this is an indexed address, make sure the register class can handle
19540 indexed addresses for this mode. */
19541 case PLUS:
19542 plus_arg0 = XEXP (addr, 0);
19543 plus_arg1 = XEXP (addr, 1);
19545 /* (plus (plus (reg) (constant)) (constant)) is generated during
19546 push_reload processing, so handle it now. */
19547 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
19549 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19551 extra_cost = 1;
19552 type = "offset";
19556 /* (plus (plus (reg) (constant)) (reg)) is also generated during
19557 push_reload processing, so handle it now. */
19558 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
19560 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
19562 extra_cost = 1;
19563 type = "indexed #2";
19567 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
19569 fail_msg = "no base register #2";
19570 extra_cost = -1;
19573 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
19575 if ((addr_mask & RELOAD_REG_INDEXED) == 0
19576 || !legitimate_indexed_address_p (addr, false))
19578 extra_cost = 1;
19579 type = "indexed";
19583 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
19584 && CONST_INT_P (plus_arg1))
19586 if (!quad_address_offset_p (INTVAL (plus_arg1)))
19588 extra_cost = 1;
19589 type = "vector d-form offset";
19593 /* Make sure the register class can handle offset addresses. */
19594 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
19596 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19598 extra_cost = 1;
19599 type = "offset #2";
19603 else
19605 fail_msg = "bad PLUS";
19606 extra_cost = -1;
19609 break;
19611 case LO_SUM:
19612 /* Quad offsets are restricted and can't handle normal addresses. */
19613 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19615 extra_cost = -1;
19616 type = "vector d-form lo_sum";
19619 else if (!legitimate_lo_sum_address_p (mode, addr, false))
19621 fail_msg = "bad LO_SUM";
19622 extra_cost = -1;
19625 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19627 extra_cost = 1;
19628 type = "lo_sum";
19630 break;
19632 /* Static addresses need to create a TOC entry. */
19633 case CONST:
19634 case SYMBOL_REF:
19635 case LABEL_REF:
19636 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19638 extra_cost = -1;
19639 type = "vector d-form lo_sum #2";
19642 else
19644 type = "address";
19645 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
19647 break;
19649 /* TOC references look like offsetable memory. */
19650 case UNSPEC:
19651 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
19653 fail_msg = "bad UNSPEC";
19654 extra_cost = -1;
19657 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
19659 extra_cost = -1;
19660 type = "vector d-form lo_sum #3";
19663 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
19665 extra_cost = 1;
19666 type = "toc reference";
19668 break;
19670 default:
19672 fail_msg = "bad address";
19673 extra_cost = -1;
19677 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
19679 if (extra_cost < 0)
19680 fprintf (stderr,
19681 "rs6000_secondary_reload_memory error: mode = %s, "
19682 "class = %s, addr_mask = '%s', %s\n",
19683 GET_MODE_NAME (mode),
19684 reg_class_names[rclass],
19685 rs6000_debug_addr_mask (addr_mask, false),
19686 (fail_msg != NULL) ? fail_msg : "<bad address>");
19688 else
19689 fprintf (stderr,
19690 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
19691 "addr_mask = '%s', extra cost = %d, %s\n",
19692 GET_MODE_NAME (mode),
19693 reg_class_names[rclass],
19694 rs6000_debug_addr_mask (addr_mask, false),
19695 extra_cost,
19696 (type) ? type : "<none>");
19698 debug_rtx (addr);
19701 return extra_cost;
19704 /* Helper function for rs6000_secondary_reload to return true if a move to a
19705 different register classe is really a simple move. */
19707 static bool
19708 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
19709 enum rs6000_reg_type from_type,
19710 machine_mode mode)
19712 int size = GET_MODE_SIZE (mode);
19714 /* Add support for various direct moves available. In this function, we only
19715 look at cases where we don't need any extra registers, and one or more
19716 simple move insns are issued. Originally small integers are not allowed
19717 in FPR/VSX registers. Single precision binary floating is not a simple
19718 move because we need to convert to the single precision memory layout.
19719 The 4-byte SDmode can be moved. TDmode values are disallowed since they
19720 need special direct move handling, which we do not support yet. */
19721 if (TARGET_DIRECT_MOVE
19722 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19723 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
19725 if (TARGET_POWERPC64)
19727 /* ISA 2.07: MTVSRD or MVFVSRD. */
19728 if (size == 8)
19729 return true;
19731 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
19732 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
19733 return true;
19736 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19737 if (TARGET_P8_VECTOR)
19739 if (mode == SImode)
19740 return true;
19742 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
19743 return true;
19746 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
19747 if (mode == SDmode)
19748 return true;
19751 /* Power6+: MFTGPR or MFFGPR. */
19752 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
19753 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
19754 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19755 return true;
19757 /* Move to/from SPR. */
19758 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
19759 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
19760 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
19761 return true;
19763 return false;
19766 /* Direct move helper function for rs6000_secondary_reload, handle all of the
19767 special direct moves that involve allocating an extra register, return the
19768 insn code of the helper function if there is such a function or
19769 CODE_FOR_nothing if not. */
19771 static bool
19772 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
19773 enum rs6000_reg_type from_type,
19774 machine_mode mode,
19775 secondary_reload_info *sri,
19776 bool altivec_p)
19778 bool ret = false;
19779 enum insn_code icode = CODE_FOR_nothing;
19780 int cost = 0;
19781 int size = GET_MODE_SIZE (mode);
19783 if (TARGET_POWERPC64 && size == 16)
19785 /* Handle moving 128-bit values from GPRs to VSX point registers on
19786 ISA 2.07 (power8, power9) when running in 64-bit mode using
19787 XXPERMDI to glue the two 64-bit values back together. */
19788 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19790 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
19791 icode = reg_addr[mode].reload_vsx_gpr;
19794 /* Handle moving 128-bit values from VSX point registers to GPRs on
19795 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
19796 bottom 64-bit value. */
19797 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19799 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
19800 icode = reg_addr[mode].reload_gpr_vsx;
19804 else if (TARGET_POWERPC64 && mode == SFmode)
19806 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
19808 cost = 3; /* xscvdpspn, mfvsrd, and. */
19809 icode = reg_addr[mode].reload_gpr_vsx;
19812 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
19814 cost = 2; /* mtvsrz, xscvspdpn. */
19815 icode = reg_addr[mode].reload_vsx_gpr;
19819 else if (!TARGET_POWERPC64 && size == 8)
19821 /* Handle moving 64-bit values from GPRs to floating point registers on
19822 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
19823 32-bit values back together. Altivec register classes must be handled
19824 specially since a different instruction is used, and the secondary
19825 reload support requires a single instruction class in the scratch
19826 register constraint. However, right now TFmode is not allowed in
19827 Altivec registers, so the pattern will never match. */
19828 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
19830 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
19831 icode = reg_addr[mode].reload_fpr_gpr;
19835 if (icode != CODE_FOR_nothing)
19837 ret = true;
19838 if (sri)
19840 sri->icode = icode;
19841 sri->extra_cost = cost;
19845 return ret;
19848 /* Return whether a move between two register classes can be done either
19849 directly (simple move) or via a pattern that uses a single extra temporary
19850 (using ISA 2.07's direct move in this case. */
19852 static bool
19853 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
19854 enum rs6000_reg_type from_type,
19855 machine_mode mode,
19856 secondary_reload_info *sri,
19857 bool altivec_p)
19859 /* Fall back to load/store reloads if either type is not a register. */
19860 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
19861 return false;
19863 /* If we haven't allocated registers yet, assume the move can be done for the
19864 standard register types. */
19865 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
19866 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
19867 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
19868 return true;
19870 /* Moves to the same set of registers is a simple move for non-specialized
19871 registers. */
19872 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
19873 return true;
19875 /* Check whether a simple move can be done directly. */
19876 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
19878 if (sri)
19880 sri->icode = CODE_FOR_nothing;
19881 sri->extra_cost = 0;
19883 return true;
19886 /* Now check if we can do it in a few steps. */
19887 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
19888 altivec_p);
19891 /* Inform reload about cases where moving X with a mode MODE to a register in
19892 RCLASS requires an extra scratch or immediate register. Return the class
19893 needed for the immediate register.
19895 For VSX and Altivec, we may need a register to convert sp+offset into
19896 reg+sp.
19898 For misaligned 64-bit gpr loads and stores we need a register to
19899 convert an offset address to indirect. */
19901 static reg_class_t
19902 rs6000_secondary_reload (bool in_p,
19903 rtx x,
19904 reg_class_t rclass_i,
19905 machine_mode mode,
19906 secondary_reload_info *sri)
19908 enum reg_class rclass = (enum reg_class) rclass_i;
19909 reg_class_t ret = ALL_REGS;
19910 enum insn_code icode;
19911 bool default_p = false;
19912 bool done_p = false;
19914 /* Allow subreg of memory before/during reload. */
19915 bool memory_p = (MEM_P (x)
19916 || (!reload_completed && GET_CODE (x) == SUBREG
19917 && MEM_P (SUBREG_REG (x))));
19919 sri->icode = CODE_FOR_nothing;
19920 sri->t_icode = CODE_FOR_nothing;
19921 sri->extra_cost = 0;
19922 icode = ((in_p)
19923 ? reg_addr[mode].reload_load
19924 : reg_addr[mode].reload_store);
19926 if (REG_P (x) || register_operand (x, mode))
19928 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
19929 bool altivec_p = (rclass == ALTIVEC_REGS);
19930 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
19932 if (!in_p)
19933 std::swap (to_type, from_type);
19935 /* Can we do a direct move of some sort? */
19936 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
19937 altivec_p))
19939 icode = (enum insn_code)sri->icode;
19940 default_p = false;
19941 done_p = true;
19942 ret = NO_REGS;
19946 /* Make sure 0.0 is not reloaded or forced into memory. */
19947 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
19949 ret = NO_REGS;
19950 default_p = false;
19951 done_p = true;
19954 /* If this is a scalar floating point value and we want to load it into the
19955 traditional Altivec registers, do it via a move via a traditional floating
19956 point register, unless we have D-form addressing. Also make sure that
19957 non-zero constants use a FPR. */
19958 if (!done_p && reg_addr[mode].scalar_in_vmx_p
19959 && !mode_supports_vmx_dform (mode)
19960 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
19961 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
19963 ret = FLOAT_REGS;
19964 default_p = false;
19965 done_p = true;
19968 /* Handle reload of load/stores if we have reload helper functions. */
19969 if (!done_p && icode != CODE_FOR_nothing && memory_p)
19971 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
19972 mode);
19974 if (extra_cost >= 0)
19976 done_p = true;
19977 ret = NO_REGS;
19978 if (extra_cost > 0)
19980 sri->extra_cost = extra_cost;
19981 sri->icode = icode;
19986 /* Handle unaligned loads and stores of integer registers. */
19987 if (!done_p && TARGET_POWERPC64
19988 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
19989 && memory_p
19990 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
19992 rtx addr = XEXP (x, 0);
19993 rtx off = address_offset (addr);
19995 if (off != NULL_RTX)
19997 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
19998 unsigned HOST_WIDE_INT offset = INTVAL (off);
20000 /* We need a secondary reload when our legitimate_address_p
20001 says the address is good (as otherwise the entire address
20002 will be reloaded), and the offset is not a multiple of
20003 four or we have an address wrap. Address wrap will only
20004 occur for LO_SUMs since legitimate_offset_address_p
20005 rejects addresses for 16-byte mems that will wrap. */
20006 if (GET_CODE (addr) == LO_SUM
20007 ? (1 /* legitimate_address_p allows any offset for lo_sum */
20008 && ((offset & 3) != 0
20009 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
20010 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
20011 && (offset & 3) != 0))
20013 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
20014 if (in_p)
20015 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
20016 : CODE_FOR_reload_di_load);
20017 else
20018 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
20019 : CODE_FOR_reload_di_store);
20020 sri->extra_cost = 2;
20021 ret = NO_REGS;
20022 done_p = true;
20024 else
20025 default_p = true;
20027 else
20028 default_p = true;
20031 if (!done_p && !TARGET_POWERPC64
20032 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
20033 && memory_p
20034 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
20036 rtx addr = XEXP (x, 0);
20037 rtx off = address_offset (addr);
20039 if (off != NULL_RTX)
20041 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
20042 unsigned HOST_WIDE_INT offset = INTVAL (off);
20044 /* We need a secondary reload when our legitimate_address_p
20045 says the address is good (as otherwise the entire address
20046 will be reloaded), and we have a wrap.
20048 legitimate_lo_sum_address_p allows LO_SUM addresses to
20049 have any offset so test for wrap in the low 16 bits.
20051 legitimate_offset_address_p checks for the range
20052 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
20053 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
20054 [0x7ff4,0x7fff] respectively, so test for the
20055 intersection of these ranges, [0x7ffc,0x7fff] and
20056 [0x7ff4,0x7ff7] respectively.
20058 Note that the address we see here may have been
20059 manipulated by legitimize_reload_address. */
20060 if (GET_CODE (addr) == LO_SUM
20061 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
20062 : offset - (0x8000 - extra) < UNITS_PER_WORD)
20064 if (in_p)
20065 sri->icode = CODE_FOR_reload_si_load;
20066 else
20067 sri->icode = CODE_FOR_reload_si_store;
20068 sri->extra_cost = 2;
20069 ret = NO_REGS;
20070 done_p = true;
20072 else
20073 default_p = true;
20075 else
20076 default_p = true;
20079 if (!done_p)
20080 default_p = true;
20082 if (default_p)
20083 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
20085 gcc_assert (ret != ALL_REGS);
20087 if (TARGET_DEBUG_ADDR)
20089 fprintf (stderr,
20090 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
20091 "mode = %s",
20092 reg_class_names[ret],
20093 in_p ? "true" : "false",
20094 reg_class_names[rclass],
20095 GET_MODE_NAME (mode));
20097 if (reload_completed)
20098 fputs (", after reload", stderr);
20100 if (!done_p)
20101 fputs (", done_p not set", stderr);
20103 if (default_p)
20104 fputs (", default secondary reload", stderr);
20106 if (sri->icode != CODE_FOR_nothing)
20107 fprintf (stderr, ", reload func = %s, extra cost = %d",
20108 insn_data[sri->icode].name, sri->extra_cost);
20110 else if (sri->extra_cost > 0)
20111 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
20113 fputs ("\n", stderr);
20114 debug_rtx (x);
20117 return ret;
20120 /* Better tracing for rs6000_secondary_reload_inner. */
20122 static void
20123 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
20124 bool store_p)
20126 rtx set, clobber;
20128 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
20130 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
20131 store_p ? "store" : "load");
20133 if (store_p)
20134 set = gen_rtx_SET (mem, reg);
20135 else
20136 set = gen_rtx_SET (reg, mem);
20138 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
20139 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
20142 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
20143 ATTRIBUTE_NORETURN;
20145 static void
20146 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
20147 bool store_p)
20149 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
20150 gcc_unreachable ();
20153 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
20154 reload helper functions. These were identified in
20155 rs6000_secondary_reload_memory, and if reload decided to use the secondary
20156 reload, it calls the insns:
20157 reload_<RELOAD:mode>_<P:mptrsize>_store
20158 reload_<RELOAD:mode>_<P:mptrsize>_load
20160 which in turn calls this function, to do whatever is necessary to create
20161 valid addresses. */
20163 void
20164 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
20166 int regno = true_regnum (reg);
20167 machine_mode mode = GET_MODE (reg);
20168 addr_mask_type addr_mask;
20169 rtx addr;
20170 rtx new_addr;
20171 rtx op_reg, op0, op1;
20172 rtx and_op;
20173 rtx cc_clobber;
20174 rtvec rv;
20176 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
20177 || !base_reg_operand (scratch, GET_MODE (scratch)))
20178 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20180 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
20181 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
20183 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
20184 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
20186 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
20187 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
20189 else
20190 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20192 /* Make sure the mode is valid in this register class. */
20193 if ((addr_mask & RELOAD_REG_VALID) == 0)
20194 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20196 if (TARGET_DEBUG_ADDR)
20197 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
20199 new_addr = addr = XEXP (mem, 0);
20200 switch (GET_CODE (addr))
20202 /* Does the register class support auto update forms for this mode? If
20203 not, do the update now. We don't need a scratch register, since the
20204 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
20205 case PRE_INC:
20206 case PRE_DEC:
20207 op_reg = XEXP (addr, 0);
20208 if (!base_reg_operand (op_reg, Pmode))
20209 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20211 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
20213 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
20214 new_addr = op_reg;
20216 break;
20218 case PRE_MODIFY:
20219 op0 = XEXP (addr, 0);
20220 op1 = XEXP (addr, 1);
20221 if (!base_reg_operand (op0, Pmode)
20222 || GET_CODE (op1) != PLUS
20223 || !rtx_equal_p (op0, XEXP (op1, 0)))
20224 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20226 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
20228 emit_insn (gen_rtx_SET (op0, op1));
20229 new_addr = reg;
20231 break;
20233 /* Do we need to simulate AND -16 to clear the bottom address bits used
20234 in VMX load/stores? */
20235 case AND:
20236 op0 = XEXP (addr, 0);
20237 op1 = XEXP (addr, 1);
20238 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
20240 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
20241 op_reg = op0;
20243 else if (GET_CODE (op1) == PLUS)
20245 emit_insn (gen_rtx_SET (scratch, op1));
20246 op_reg = scratch;
20249 else
20250 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20252 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
20253 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
20254 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
20255 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
20256 new_addr = scratch;
20258 break;
20260 /* If this is an indirect address, make sure it is a base register. */
20261 case REG:
20262 case SUBREG:
20263 if (!base_reg_operand (addr, GET_MODE (addr)))
20265 emit_insn (gen_rtx_SET (scratch, addr));
20266 new_addr = scratch;
20268 break;
20270 /* If this is an indexed address, make sure the register class can handle
20271 indexed addresses for this mode. */
20272 case PLUS:
20273 op0 = XEXP (addr, 0);
20274 op1 = XEXP (addr, 1);
20275 if (!base_reg_operand (op0, Pmode))
20276 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20278 else if (int_reg_operand (op1, Pmode))
20280 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20282 emit_insn (gen_rtx_SET (scratch, addr));
20283 new_addr = scratch;
20287 else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1))
20289 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
20290 || !quad_address_p (addr, mode, false))
20292 emit_insn (gen_rtx_SET (scratch, addr));
20293 new_addr = scratch;
20297 /* Make sure the register class can handle offset addresses. */
20298 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
20300 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20302 emit_insn (gen_rtx_SET (scratch, addr));
20303 new_addr = scratch;
20307 else
20308 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20310 break;
20312 case LO_SUM:
20313 op0 = XEXP (addr, 0);
20314 op1 = XEXP (addr, 1);
20315 if (!base_reg_operand (op0, Pmode))
20316 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20318 else if (int_reg_operand (op1, Pmode))
20320 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
20322 emit_insn (gen_rtx_SET (scratch, addr));
20323 new_addr = scratch;
20327 /* Quad offsets are restricted and can't handle normal addresses. */
20328 else if (mode_supports_vsx_dform_quad (mode))
20330 emit_insn (gen_rtx_SET (scratch, addr));
20331 new_addr = scratch;
20334 /* Make sure the register class can handle offset addresses. */
20335 else if (legitimate_lo_sum_address_p (mode, addr, false))
20337 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
20339 emit_insn (gen_rtx_SET (scratch, addr));
20340 new_addr = scratch;
20344 else
20345 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20347 break;
20349 case SYMBOL_REF:
20350 case CONST:
20351 case LABEL_REF:
20352 rs6000_emit_move (scratch, addr, Pmode);
20353 new_addr = scratch;
20354 break;
20356 default:
20357 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
20360 /* Adjust the address if it changed. */
20361 if (addr != new_addr)
20363 mem = replace_equiv_address_nv (mem, new_addr);
20364 if (TARGET_DEBUG_ADDR)
20365 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
20368 /* Now create the move. */
20369 if (store_p)
20370 emit_insn (gen_rtx_SET (mem, reg));
20371 else
20372 emit_insn (gen_rtx_SET (reg, mem));
20374 return;
20377 /* Convert reloads involving 64-bit gprs and misaligned offset
20378 addressing, or multiple 32-bit gprs and offsets that are too large,
20379 to use indirect addressing. */
20381 void
20382 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
20384 int regno = true_regnum (reg);
20385 enum reg_class rclass;
20386 rtx addr;
20387 rtx scratch_or_premodify = scratch;
20389 if (TARGET_DEBUG_ADDR)
20391 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
20392 store_p ? "store" : "load");
20393 fprintf (stderr, "reg:\n");
20394 debug_rtx (reg);
20395 fprintf (stderr, "mem:\n");
20396 debug_rtx (mem);
20397 fprintf (stderr, "scratch:\n");
20398 debug_rtx (scratch);
20401 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
20402 gcc_assert (GET_CODE (mem) == MEM);
20403 rclass = REGNO_REG_CLASS (regno);
20404 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
20405 addr = XEXP (mem, 0);
20407 if (GET_CODE (addr) == PRE_MODIFY)
20409 gcc_assert (REG_P (XEXP (addr, 0))
20410 && GET_CODE (XEXP (addr, 1)) == PLUS
20411 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
20412 scratch_or_premodify = XEXP (addr, 0);
20413 if (!HARD_REGISTER_P (scratch_or_premodify))
20414 /* If we have a pseudo here then reload will have arranged
20415 to have it replaced, but only in the original insn.
20416 Use the replacement here too. */
20417 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
20419 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
20420 expressions from the original insn, without unsharing them.
20421 Any RTL that points into the original insn will of course
20422 have register replacements applied. That is why we don't
20423 need to look for replacements under the PLUS. */
20424 addr = XEXP (addr, 1);
20426 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
20428 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
20430 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
20432 /* Now create the move. */
20433 if (store_p)
20434 emit_insn (gen_rtx_SET (mem, reg));
20435 else
20436 emit_insn (gen_rtx_SET (reg, mem));
20438 return;
20441 /* Given an rtx X being reloaded into a reg required to be
20442 in class CLASS, return the class of reg to actually use.
20443 In general this is just CLASS; but on some machines
20444 in some cases it is preferable to use a more restrictive class.
20446 On the RS/6000, we have to return NO_REGS when we want to reload a
20447 floating-point CONST_DOUBLE to force it to be copied to memory.
20449 We also don't want to reload integer values into floating-point
20450 registers if we can at all help it. In fact, this can
20451 cause reload to die, if it tries to generate a reload of CTR
20452 into a FP register and discovers it doesn't have the memory location
20453 required.
20455 ??? Would it be a good idea to have reload do the converse, that is
20456 try to reload floating modes into FP registers if possible?
20459 static enum reg_class
20460 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
20462 machine_mode mode = GET_MODE (x);
20463 bool is_constant = CONSTANT_P (x);
20465 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
20466 reload class for it. */
20467 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20468 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
20469 return NO_REGS;
20471 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
20472 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
20473 return NO_REGS;
20475 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
20476 the reloading of address expressions using PLUS into floating point
20477 registers. */
20478 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
20480 if (is_constant)
20482 /* Zero is always allowed in all VSX registers. */
20483 if (x == CONST0_RTX (mode))
20484 return rclass;
20486 /* If this is a vector constant that can be formed with a few Altivec
20487 instructions, we want altivec registers. */
20488 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
20489 return ALTIVEC_REGS;
20491 /* If this is an integer constant that can easily be loaded into
20492 vector registers, allow it. */
20493 if (CONST_INT_P (x))
20495 HOST_WIDE_INT value = INTVAL (x);
20497 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
20498 2.06 can generate it in the Altivec registers with
20499 VSPLTI<x>. */
20500 if (value == -1)
20502 if (TARGET_P8_VECTOR)
20503 return rclass;
20504 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
20505 return ALTIVEC_REGS;
20506 else
20507 return NO_REGS;
20510 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
20511 a sign extend in the Altivec registers. */
20512 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
20513 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
20514 return ALTIVEC_REGS;
20517 /* Force constant to memory. */
20518 return NO_REGS;
20521 /* D-form addressing can easily reload the value. */
20522 if (mode_supports_vmx_dform (mode)
20523 || mode_supports_vsx_dform_quad (mode))
20524 return rclass;
20526 /* If this is a scalar floating point value and we don't have D-form
20527 addressing, prefer the traditional floating point registers so that we
20528 can use D-form (register+offset) addressing. */
20529 if (rclass == VSX_REGS
20530 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
20531 return FLOAT_REGS;
20533 /* Prefer the Altivec registers if Altivec is handling the vector
20534 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
20535 loads. */
20536 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
20537 || mode == V1TImode)
20538 return ALTIVEC_REGS;
20540 return rclass;
20543 if (is_constant || GET_CODE (x) == PLUS)
20545 if (reg_class_subset_p (GENERAL_REGS, rclass))
20546 return GENERAL_REGS;
20547 if (reg_class_subset_p (BASE_REGS, rclass))
20548 return BASE_REGS;
20549 return NO_REGS;
20552 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
20553 return GENERAL_REGS;
20555 return rclass;
20558 /* Debug version of rs6000_preferred_reload_class. */
20559 static enum reg_class
20560 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
20562 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
20564 fprintf (stderr,
20565 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
20566 "mode = %s, x:\n",
20567 reg_class_names[ret], reg_class_names[rclass],
20568 GET_MODE_NAME (GET_MODE (x)));
20569 debug_rtx (x);
20571 return ret;
20574 /* If we are copying between FP or AltiVec registers and anything else, we need
20575 a memory location. The exception is when we are targeting ppc64 and the
20576 move to/from fpr to gpr instructions are available. Also, under VSX, you
20577 can copy vector registers from the FP register set to the Altivec register
20578 set and vice versa. */
20580 static bool
20581 rs6000_secondary_memory_needed (machine_mode mode,
20582 reg_class_t from_class,
20583 reg_class_t to_class)
20585 enum rs6000_reg_type from_type, to_type;
20586 bool altivec_p = ((from_class == ALTIVEC_REGS)
20587 || (to_class == ALTIVEC_REGS));
20589 /* If a simple/direct move is available, we don't need secondary memory */
20590 from_type = reg_class_to_reg_type[(int)from_class];
20591 to_type = reg_class_to_reg_type[(int)to_class];
20593 if (rs6000_secondary_reload_move (to_type, from_type, mode,
20594 (secondary_reload_info *)0, altivec_p))
20595 return false;
20597 /* If we have a floating point or vector register class, we need to use
20598 memory to transfer the data. */
20599 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
20600 return true;
20602 return false;
20605 /* Debug version of rs6000_secondary_memory_needed. */
20606 static bool
20607 rs6000_debug_secondary_memory_needed (machine_mode mode,
20608 reg_class_t from_class,
20609 reg_class_t to_class)
20611 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
20613 fprintf (stderr,
20614 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
20615 "to_class = %s, mode = %s\n",
20616 ret ? "true" : "false",
20617 reg_class_names[from_class],
20618 reg_class_names[to_class],
20619 GET_MODE_NAME (mode));
20621 return ret;
20624 /* Return the register class of a scratch register needed to copy IN into
20625 or out of a register in RCLASS in MODE. If it can be done directly,
20626 NO_REGS is returned. */
20628 static enum reg_class
20629 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
20630 rtx in)
20632 int regno;
20634 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
20635 #if TARGET_MACHO
20636 && MACHOPIC_INDIRECT
20637 #endif
20640 /* We cannot copy a symbolic operand directly into anything
20641 other than BASE_REGS for TARGET_ELF. So indicate that a
20642 register from BASE_REGS is needed as an intermediate
20643 register.
20645 On Darwin, pic addresses require a load from memory, which
20646 needs a base register. */
20647 if (rclass != BASE_REGS
20648 && (GET_CODE (in) == SYMBOL_REF
20649 || GET_CODE (in) == HIGH
20650 || GET_CODE (in) == LABEL_REF
20651 || GET_CODE (in) == CONST))
20652 return BASE_REGS;
20655 if (GET_CODE (in) == REG)
20657 regno = REGNO (in);
20658 if (regno >= FIRST_PSEUDO_REGISTER)
20660 regno = true_regnum (in);
20661 if (regno >= FIRST_PSEUDO_REGISTER)
20662 regno = -1;
20665 else if (GET_CODE (in) == SUBREG)
20667 regno = true_regnum (in);
20668 if (regno >= FIRST_PSEUDO_REGISTER)
20669 regno = -1;
20671 else
20672 regno = -1;
20674 /* If we have VSX register moves, prefer moving scalar values between
20675 Altivec registers and GPR by going via an FPR (and then via memory)
20676 instead of reloading the secondary memory address for Altivec moves. */
20677 if (TARGET_VSX
20678 && GET_MODE_SIZE (mode) < 16
20679 && !mode_supports_vmx_dform (mode)
20680 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
20681 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
20682 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
20683 && (regno >= 0 && INT_REGNO_P (regno)))))
20684 return FLOAT_REGS;
20686 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
20687 into anything. */
20688 if (rclass == GENERAL_REGS || rclass == BASE_REGS
20689 || (regno >= 0 && INT_REGNO_P (regno)))
20690 return NO_REGS;
20692 /* Constants, memory, and VSX registers can go into VSX registers (both the
20693 traditional floating point and the altivec registers). */
20694 if (rclass == VSX_REGS
20695 && (regno == -1 || VSX_REGNO_P (regno)))
20696 return NO_REGS;
20698 /* Constants, memory, and FP registers can go into FP registers. */
20699 if ((regno == -1 || FP_REGNO_P (regno))
20700 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
20701 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
20703 /* Memory, and AltiVec registers can go into AltiVec registers. */
20704 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
20705 && rclass == ALTIVEC_REGS)
20706 return NO_REGS;
20708 /* We can copy among the CR registers. */
20709 if ((rclass == CR_REGS || rclass == CR0_REGS)
20710 && regno >= 0 && CR_REGNO_P (regno))
20711 return NO_REGS;
20713 /* Otherwise, we need GENERAL_REGS. */
20714 return GENERAL_REGS;
20717 /* Debug version of rs6000_secondary_reload_class. */
20718 static enum reg_class
20719 rs6000_debug_secondary_reload_class (enum reg_class rclass,
20720 machine_mode mode, rtx in)
20722 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
20723 fprintf (stderr,
20724 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
20725 "mode = %s, input rtx:\n",
20726 reg_class_names[ret], reg_class_names[rclass],
20727 GET_MODE_NAME (mode));
20728 debug_rtx (in);
20730 return ret;
20733 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
20735 static bool
20736 rs6000_can_change_mode_class (machine_mode from,
20737 machine_mode to,
20738 reg_class_t rclass)
20740 unsigned from_size = GET_MODE_SIZE (from);
20741 unsigned to_size = GET_MODE_SIZE (to);
20743 if (from_size != to_size)
20745 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
20747 if (reg_classes_intersect_p (xclass, rclass))
20749 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
20750 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
20751 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
20752 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
20754 /* Don't allow 64-bit types to overlap with 128-bit types that take a
20755 single register under VSX because the scalar part of the register
20756 is in the upper 64-bits, and not the lower 64-bits. Types like
20757 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
20758 IEEE floating point can't overlap, and neither can small
20759 values. */
20761 if (to_float128_vector_p && from_float128_vector_p)
20762 return true;
20764 else if (to_float128_vector_p || from_float128_vector_p)
20765 return false;
20767 /* TDmode in floating-mode registers must always go into a register
20768 pair with the most significant word in the even-numbered register
20769 to match ISA requirements. In little-endian mode, this does not
20770 match subreg numbering, so we cannot allow subregs. */
20771 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
20772 return false;
20774 if (from_size < 8 || to_size < 8)
20775 return false;
20777 if (from_size == 8 && (8 * to_nregs) != to_size)
20778 return false;
20780 if (to_size == 8 && (8 * from_nregs) != from_size)
20781 return false;
20783 return true;
20785 else
20786 return true;
20789 /* Since the VSX register set includes traditional floating point registers
20790 and altivec registers, just check for the size being different instead of
20791 trying to check whether the modes are vector modes. Otherwise it won't
20792 allow say DF and DI to change classes. For types like TFmode and TDmode
20793 that take 2 64-bit registers, rather than a single 128-bit register, don't
20794 allow subregs of those types to other 128 bit types. */
20795 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
20797 unsigned num_regs = (from_size + 15) / 16;
20798 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
20799 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
20800 return false;
20802 return (from_size == 8 || from_size == 16);
20805 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
20806 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
20807 return false;
20809 return true;
20812 /* Debug version of rs6000_can_change_mode_class. */
20813 static bool
20814 rs6000_debug_can_change_mode_class (machine_mode from,
20815 machine_mode to,
20816 reg_class_t rclass)
20818 bool ret = rs6000_can_change_mode_class (from, to, rclass);
20820 fprintf (stderr,
20821 "rs6000_can_change_mode_class, return %s, from = %s, "
20822 "to = %s, rclass = %s\n",
20823 ret ? "true" : "false",
20824 GET_MODE_NAME (from), GET_MODE_NAME (to),
20825 reg_class_names[rclass]);
20827 return ret;
20830 /* Return a string to do a move operation of 128 bits of data. */
20832 const char *
20833 rs6000_output_move_128bit (rtx operands[])
20835 rtx dest = operands[0];
20836 rtx src = operands[1];
20837 machine_mode mode = GET_MODE (dest);
20838 int dest_regno;
20839 int src_regno;
20840 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
20841 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
20843 if (REG_P (dest))
20845 dest_regno = REGNO (dest);
20846 dest_gpr_p = INT_REGNO_P (dest_regno);
20847 dest_fp_p = FP_REGNO_P (dest_regno);
20848 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
20849 dest_vsx_p = dest_fp_p | dest_vmx_p;
20851 else
20853 dest_regno = -1;
20854 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
20857 if (REG_P (src))
20859 src_regno = REGNO (src);
20860 src_gpr_p = INT_REGNO_P (src_regno);
20861 src_fp_p = FP_REGNO_P (src_regno);
20862 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
20863 src_vsx_p = src_fp_p | src_vmx_p;
20865 else
20867 src_regno = -1;
20868 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
20871 /* Register moves. */
20872 if (dest_regno >= 0 && src_regno >= 0)
20874 if (dest_gpr_p)
20876 if (src_gpr_p)
20877 return "#";
20879 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
20880 return (WORDS_BIG_ENDIAN
20881 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
20882 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
20884 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
20885 return "#";
20888 else if (TARGET_VSX && dest_vsx_p)
20890 if (src_vsx_p)
20891 return "xxlor %x0,%x1,%x1";
20893 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
20894 return (WORDS_BIG_ENDIAN
20895 ? "mtvsrdd %x0,%1,%L1"
20896 : "mtvsrdd %x0,%L1,%1");
20898 else if (TARGET_DIRECT_MOVE && src_gpr_p)
20899 return "#";
20902 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
20903 return "vor %0,%1,%1";
20905 else if (dest_fp_p && src_fp_p)
20906 return "#";
20909 /* Loads. */
20910 else if (dest_regno >= 0 && MEM_P (src))
20912 if (dest_gpr_p)
20914 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20915 return "lq %0,%1";
20916 else
20917 return "#";
20920 else if (TARGET_ALTIVEC && dest_vmx_p
20921 && altivec_indexed_or_indirect_operand (src, mode))
20922 return "lvx %0,%y1";
20924 else if (TARGET_VSX && dest_vsx_p)
20926 if (mode_supports_vsx_dform_quad (mode)
20927 && quad_address_p (XEXP (src, 0), mode, true))
20928 return "lxv %x0,%1";
20930 else if (TARGET_P9_VECTOR)
20931 return "lxvx %x0,%y1";
20933 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20934 return "lxvw4x %x0,%y1";
20936 else
20937 return "lxvd2x %x0,%y1";
20940 else if (TARGET_ALTIVEC && dest_vmx_p)
20941 return "lvx %0,%y1";
20943 else if (dest_fp_p)
20944 return "#";
20947 /* Stores. */
20948 else if (src_regno >= 0 && MEM_P (dest))
20950 if (src_gpr_p)
20952 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
20953 return "stq %1,%0";
20954 else
20955 return "#";
20958 else if (TARGET_ALTIVEC && src_vmx_p
20959 && altivec_indexed_or_indirect_operand (src, mode))
20960 return "stvx %1,%y0";
20962 else if (TARGET_VSX && src_vsx_p)
20964 if (mode_supports_vsx_dform_quad (mode)
20965 && quad_address_p (XEXP (dest, 0), mode, true))
20966 return "stxv %x1,%0";
20968 else if (TARGET_P9_VECTOR)
20969 return "stxvx %x1,%y0";
20971 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
20972 return "stxvw4x %x1,%y0";
20974 else
20975 return "stxvd2x %x1,%y0";
20978 else if (TARGET_ALTIVEC && src_vmx_p)
20979 return "stvx %1,%y0";
20981 else if (src_fp_p)
20982 return "#";
20985 /* Constants. */
20986 else if (dest_regno >= 0
20987 && (GET_CODE (src) == CONST_INT
20988 || GET_CODE (src) == CONST_WIDE_INT
20989 || GET_CODE (src) == CONST_DOUBLE
20990 || GET_CODE (src) == CONST_VECTOR))
20992 if (dest_gpr_p)
20993 return "#";
20995 else if ((dest_vmx_p && TARGET_ALTIVEC)
20996 || (dest_vsx_p && TARGET_VSX))
20997 return output_vec_const_move (operands);
21000 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
21003 /* Validate a 128-bit move. */
21004 bool
21005 rs6000_move_128bit_ok_p (rtx operands[])
21007 machine_mode mode = GET_MODE (operands[0]);
21008 return (gpc_reg_operand (operands[0], mode)
21009 || gpc_reg_operand (operands[1], mode));
21012 /* Return true if a 128-bit move needs to be split. */
21013 bool
21014 rs6000_split_128bit_ok_p (rtx operands[])
21016 if (!reload_completed)
21017 return false;
21019 if (!gpr_or_gpr_p (operands[0], operands[1]))
21020 return false;
21022 if (quad_load_store_p (operands[0], operands[1]))
21023 return false;
21025 return true;
21029 /* Given a comparison operation, return the bit number in CCR to test. We
21030 know this is a valid comparison.
21032 SCC_P is 1 if this is for an scc. That means that %D will have been
21033 used instead of %C, so the bits will be in different places.
21035 Return -1 if OP isn't a valid comparison for some reason. */
21038 ccr_bit (rtx op, int scc_p)
21040 enum rtx_code code = GET_CODE (op);
21041 machine_mode cc_mode;
21042 int cc_regnum;
21043 int base_bit;
21044 rtx reg;
21046 if (!COMPARISON_P (op))
21047 return -1;
21049 reg = XEXP (op, 0);
21051 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
21053 cc_mode = GET_MODE (reg);
21054 cc_regnum = REGNO (reg);
21055 base_bit = 4 * (cc_regnum - CR0_REGNO);
21057 validate_condition_mode (code, cc_mode);
21059 /* When generating a sCOND operation, only positive conditions are
21060 allowed. */
21061 gcc_assert (!scc_p
21062 || code == EQ || code == GT || code == LT || code == UNORDERED
21063 || code == GTU || code == LTU);
21065 switch (code)
21067 case NE:
21068 return scc_p ? base_bit + 3 : base_bit + 2;
21069 case EQ:
21070 return base_bit + 2;
21071 case GT: case GTU: case UNLE:
21072 return base_bit + 1;
21073 case LT: case LTU: case UNGE:
21074 return base_bit;
21075 case ORDERED: case UNORDERED:
21076 return base_bit + 3;
21078 case GE: case GEU:
21079 /* If scc, we will have done a cror to put the bit in the
21080 unordered position. So test that bit. For integer, this is ! LT
21081 unless this is an scc insn. */
21082 return scc_p ? base_bit + 3 : base_bit;
21084 case LE: case LEU:
21085 return scc_p ? base_bit + 3 : base_bit + 1;
21087 default:
21088 gcc_unreachable ();
21092 /* Return the GOT register. */
21095 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
21097 /* The second flow pass currently (June 1999) can't update
21098 regs_ever_live without disturbing other parts of the compiler, so
21099 update it here to make the prolog/epilogue code happy. */
21100 if (!can_create_pseudo_p ()
21101 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21102 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
21104 crtl->uses_pic_offset_table = 1;
21106 return pic_offset_table_rtx;
21109 static rs6000_stack_t stack_info;
21111 /* Function to init struct machine_function.
21112 This will be called, via a pointer variable,
21113 from push_function_context. */
21115 static struct machine_function *
21116 rs6000_init_machine_status (void)
21118 stack_info.reload_completed = 0;
21119 return ggc_cleared_alloc<machine_function> ();
21122 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
21124 /* Write out a function code label. */
21126 void
21127 rs6000_output_function_entry (FILE *file, const char *fname)
21129 if (fname[0] != '.')
21131 switch (DEFAULT_ABI)
21133 default:
21134 gcc_unreachable ();
21136 case ABI_AIX:
21137 if (DOT_SYMBOLS)
21138 putc ('.', file);
21139 else
21140 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
21141 break;
21143 case ABI_ELFv2:
21144 case ABI_V4:
21145 case ABI_DARWIN:
21146 break;
21150 RS6000_OUTPUT_BASENAME (file, fname);
21153 /* Print an operand. Recognize special options, documented below. */
21155 #if TARGET_ELF
21156 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
21157 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
21158 #else
21159 #define SMALL_DATA_RELOC "sda21"
21160 #define SMALL_DATA_REG 0
21161 #endif
21163 void
21164 print_operand (FILE *file, rtx x, int code)
21166 int i;
21167 unsigned HOST_WIDE_INT uval;
21169 switch (code)
21171 /* %a is output_address. */
21173 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
21174 output_operand. */
21176 case 'D':
21177 /* Like 'J' but get to the GT bit only. */
21178 gcc_assert (REG_P (x));
21180 /* Bit 1 is GT bit. */
21181 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
21183 /* Add one for shift count in rlinm for scc. */
21184 fprintf (file, "%d", i + 1);
21185 return;
21187 case 'e':
21188 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
21189 if (! INT_P (x))
21191 output_operand_lossage ("invalid %%e value");
21192 return;
21195 uval = INTVAL (x);
21196 if ((uval & 0xffff) == 0 && uval != 0)
21197 putc ('s', file);
21198 return;
21200 case 'E':
21201 /* X is a CR register. Print the number of the EQ bit of the CR */
21202 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21203 output_operand_lossage ("invalid %%E value");
21204 else
21205 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
21206 return;
21208 case 'f':
21209 /* X is a CR register. Print the shift count needed to move it
21210 to the high-order four bits. */
21211 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21212 output_operand_lossage ("invalid %%f value");
21213 else
21214 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
21215 return;
21217 case 'F':
21218 /* Similar, but print the count for the rotate in the opposite
21219 direction. */
21220 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21221 output_operand_lossage ("invalid %%F value");
21222 else
21223 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
21224 return;
21226 case 'G':
21227 /* X is a constant integer. If it is negative, print "m",
21228 otherwise print "z". This is to make an aze or ame insn. */
21229 if (GET_CODE (x) != CONST_INT)
21230 output_operand_lossage ("invalid %%G value");
21231 else if (INTVAL (x) >= 0)
21232 putc ('z', file);
21233 else
21234 putc ('m', file);
21235 return;
21237 case 'h':
21238 /* If constant, output low-order five bits. Otherwise, write
21239 normally. */
21240 if (INT_P (x))
21241 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
21242 else
21243 print_operand (file, x, 0);
21244 return;
21246 case 'H':
21247 /* If constant, output low-order six bits. Otherwise, write
21248 normally. */
21249 if (INT_P (x))
21250 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
21251 else
21252 print_operand (file, x, 0);
21253 return;
21255 case 'I':
21256 /* Print `i' if this is a constant, else nothing. */
21257 if (INT_P (x))
21258 putc ('i', file);
21259 return;
21261 case 'j':
21262 /* Write the bit number in CCR for jump. */
21263 i = ccr_bit (x, 0);
21264 if (i == -1)
21265 output_operand_lossage ("invalid %%j code");
21266 else
21267 fprintf (file, "%d", i);
21268 return;
21270 case 'J':
21271 /* Similar, but add one for shift count in rlinm for scc and pass
21272 scc flag to `ccr_bit'. */
21273 i = ccr_bit (x, 1);
21274 if (i == -1)
21275 output_operand_lossage ("invalid %%J code");
21276 else
21277 /* If we want bit 31, write a shift count of zero, not 32. */
21278 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21279 return;
21281 case 'k':
21282 /* X must be a constant. Write the 1's complement of the
21283 constant. */
21284 if (! INT_P (x))
21285 output_operand_lossage ("invalid %%k value");
21286 else
21287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
21288 return;
21290 case 'K':
21291 /* X must be a symbolic constant on ELF. Write an
21292 expression suitable for an 'addi' that adds in the low 16
21293 bits of the MEM. */
21294 if (GET_CODE (x) == CONST)
21296 if (GET_CODE (XEXP (x, 0)) != PLUS
21297 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
21298 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
21299 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
21300 output_operand_lossage ("invalid %%K value");
21302 print_operand_address (file, x);
21303 fputs ("@l", file);
21304 return;
21306 /* %l is output_asm_label. */
21308 case 'L':
21309 /* Write second word of DImode or DFmode reference. Works on register
21310 or non-indexed memory only. */
21311 if (REG_P (x))
21312 fputs (reg_names[REGNO (x) + 1], file);
21313 else if (MEM_P (x))
21315 machine_mode mode = GET_MODE (x);
21316 /* Handle possible auto-increment. Since it is pre-increment and
21317 we have already done it, we can just use an offset of word. */
21318 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21319 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21320 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21321 UNITS_PER_WORD));
21322 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21323 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
21324 UNITS_PER_WORD));
21325 else
21326 output_address (mode, XEXP (adjust_address_nv (x, SImode,
21327 UNITS_PER_WORD),
21328 0));
21330 if (small_data_operand (x, GET_MODE (x)))
21331 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21332 reg_names[SMALL_DATA_REG]);
21334 return;
21336 case 'N':
21337 /* Write the number of elements in the vector times 4. */
21338 if (GET_CODE (x) != PARALLEL)
21339 output_operand_lossage ("invalid %%N value");
21340 else
21341 fprintf (file, "%d", XVECLEN (x, 0) * 4);
21342 return;
21344 case 'O':
21345 /* Similar, but subtract 1 first. */
21346 if (GET_CODE (x) != PARALLEL)
21347 output_operand_lossage ("invalid %%O value");
21348 else
21349 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
21350 return;
21352 case 'p':
21353 /* X is a CONST_INT that is a power of two. Output the logarithm. */
21354 if (! INT_P (x)
21355 || INTVAL (x) < 0
21356 || (i = exact_log2 (INTVAL (x))) < 0)
21357 output_operand_lossage ("invalid %%p value");
21358 else
21359 fprintf (file, "%d", i);
21360 return;
21362 case 'P':
21363 /* The operand must be an indirect memory reference. The result
21364 is the register name. */
21365 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
21366 || REGNO (XEXP (x, 0)) >= 32)
21367 output_operand_lossage ("invalid %%P value");
21368 else
21369 fputs (reg_names[REGNO (XEXP (x, 0))], file);
21370 return;
21372 case 'q':
21373 /* This outputs the logical code corresponding to a boolean
21374 expression. The expression may have one or both operands
21375 negated (if one, only the first one). For condition register
21376 logical operations, it will also treat the negated
21377 CR codes as NOTs, but not handle NOTs of them. */
21379 const char *const *t = 0;
21380 const char *s;
21381 enum rtx_code code = GET_CODE (x);
21382 static const char * const tbl[3][3] = {
21383 { "and", "andc", "nor" },
21384 { "or", "orc", "nand" },
21385 { "xor", "eqv", "xor" } };
21387 if (code == AND)
21388 t = tbl[0];
21389 else if (code == IOR)
21390 t = tbl[1];
21391 else if (code == XOR)
21392 t = tbl[2];
21393 else
21394 output_operand_lossage ("invalid %%q value");
21396 if (GET_CODE (XEXP (x, 0)) != NOT)
21397 s = t[0];
21398 else
21400 if (GET_CODE (XEXP (x, 1)) == NOT)
21401 s = t[2];
21402 else
21403 s = t[1];
21406 fputs (s, file);
21408 return;
21410 case 'Q':
21411 if (! TARGET_MFCRF)
21412 return;
21413 fputc (',', file);
21414 /* FALLTHRU */
21416 case 'R':
21417 /* X is a CR register. Print the mask for `mtcrf'. */
21418 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
21419 output_operand_lossage ("invalid %%R value");
21420 else
21421 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
21422 return;
21424 case 's':
21425 /* Low 5 bits of 32 - value */
21426 if (! INT_P (x))
21427 output_operand_lossage ("invalid %%s value");
21428 else
21429 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
21430 return;
21432 case 't':
21433 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
21434 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
21436 /* Bit 3 is OV bit. */
21437 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
21439 /* If we want bit 31, write a shift count of zero, not 32. */
21440 fprintf (file, "%d", i == 31 ? 0 : i + 1);
21441 return;
21443 case 'T':
21444 /* Print the symbolic name of a branch target register. */
21445 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
21446 && REGNO (x) != CTR_REGNO))
21447 output_operand_lossage ("invalid %%T value");
21448 else if (REGNO (x) == LR_REGNO)
21449 fputs ("lr", file);
21450 else
21451 fputs ("ctr", file);
21452 return;
21454 case 'u':
21455 /* High-order or low-order 16 bits of constant, whichever is non-zero,
21456 for use in unsigned operand. */
21457 if (! INT_P (x))
21459 output_operand_lossage ("invalid %%u value");
21460 return;
21463 uval = INTVAL (x);
21464 if ((uval & 0xffff) == 0)
21465 uval >>= 16;
21467 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
21468 return;
21470 case 'v':
21471 /* High-order 16 bits of constant for use in signed operand. */
21472 if (! INT_P (x))
21473 output_operand_lossage ("invalid %%v value");
21474 else
21475 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
21476 (INTVAL (x) >> 16) & 0xffff);
21477 return;
21479 case 'U':
21480 /* Print `u' if this has an auto-increment or auto-decrement. */
21481 if (MEM_P (x)
21482 && (GET_CODE (XEXP (x, 0)) == PRE_INC
21483 || GET_CODE (XEXP (x, 0)) == PRE_DEC
21484 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
21485 putc ('u', file);
21486 return;
21488 case 'V':
21489 /* Print the trap code for this operand. */
21490 switch (GET_CODE (x))
21492 case EQ:
21493 fputs ("eq", file); /* 4 */
21494 break;
21495 case NE:
21496 fputs ("ne", file); /* 24 */
21497 break;
21498 case LT:
21499 fputs ("lt", file); /* 16 */
21500 break;
21501 case LE:
21502 fputs ("le", file); /* 20 */
21503 break;
21504 case GT:
21505 fputs ("gt", file); /* 8 */
21506 break;
21507 case GE:
21508 fputs ("ge", file); /* 12 */
21509 break;
21510 case LTU:
21511 fputs ("llt", file); /* 2 */
21512 break;
21513 case LEU:
21514 fputs ("lle", file); /* 6 */
21515 break;
21516 case GTU:
21517 fputs ("lgt", file); /* 1 */
21518 break;
21519 case GEU:
21520 fputs ("lge", file); /* 5 */
21521 break;
21522 default:
21523 gcc_unreachable ();
21525 break;
21527 case 'w':
21528 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
21529 normally. */
21530 if (INT_P (x))
21531 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
21532 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
21533 else
21534 print_operand (file, x, 0);
21535 return;
21537 case 'x':
21538 /* X is a FPR or Altivec register used in a VSX context. */
21539 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
21540 output_operand_lossage ("invalid %%x value");
21541 else
21543 int reg = REGNO (x);
21544 int vsx_reg = (FP_REGNO_P (reg)
21545 ? reg - 32
21546 : reg - FIRST_ALTIVEC_REGNO + 32);
21548 #ifdef TARGET_REGNAMES
21549 if (TARGET_REGNAMES)
21550 fprintf (file, "%%vs%d", vsx_reg);
21551 else
21552 #endif
21553 fprintf (file, "%d", vsx_reg);
21555 return;
21557 case 'X':
21558 if (MEM_P (x)
21559 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
21560 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
21561 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
21562 putc ('x', file);
21563 return;
21565 case 'Y':
21566 /* Like 'L', for third word of TImode/PTImode */
21567 if (REG_P (x))
21568 fputs (reg_names[REGNO (x) + 2], file);
21569 else if (MEM_P (x))
21571 machine_mode mode = GET_MODE (x);
21572 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21573 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21574 output_address (mode, plus_constant (Pmode,
21575 XEXP (XEXP (x, 0), 0), 8));
21576 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21577 output_address (mode, plus_constant (Pmode,
21578 XEXP (XEXP (x, 0), 0), 8));
21579 else
21580 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
21581 if (small_data_operand (x, GET_MODE (x)))
21582 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21583 reg_names[SMALL_DATA_REG]);
21585 return;
21587 case 'z':
21588 /* X is a SYMBOL_REF. Write out the name preceded by a
21589 period and without any trailing data in brackets. Used for function
21590 names. If we are configured for System V (or the embedded ABI) on
21591 the PowerPC, do not emit the period, since those systems do not use
21592 TOCs and the like. */
21593 gcc_assert (GET_CODE (x) == SYMBOL_REF);
21595 /* For macho, check to see if we need a stub. */
21596 if (TARGET_MACHO)
21598 const char *name = XSTR (x, 0);
21599 #if TARGET_MACHO
21600 if (darwin_emit_branch_islands
21601 && MACHOPIC_INDIRECT
21602 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
21603 name = machopic_indirection_name (x, /*stub_p=*/true);
21604 #endif
21605 assemble_name (file, name);
21607 else if (!DOT_SYMBOLS)
21608 assemble_name (file, XSTR (x, 0));
21609 else
21610 rs6000_output_function_entry (file, XSTR (x, 0));
21611 return;
21613 case 'Z':
21614 /* Like 'L', for last word of TImode/PTImode. */
21615 if (REG_P (x))
21616 fputs (reg_names[REGNO (x) + 3], file);
21617 else if (MEM_P (x))
21619 machine_mode mode = GET_MODE (x);
21620 if (GET_CODE (XEXP (x, 0)) == PRE_INC
21621 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
21622 output_address (mode, plus_constant (Pmode,
21623 XEXP (XEXP (x, 0), 0), 12));
21624 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21625 output_address (mode, plus_constant (Pmode,
21626 XEXP (XEXP (x, 0), 0), 12));
21627 else
21628 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
21629 if (small_data_operand (x, GET_MODE (x)))
21630 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21631 reg_names[SMALL_DATA_REG]);
21633 return;
21635 /* Print AltiVec memory operand. */
21636 case 'y':
21638 rtx tmp;
21640 gcc_assert (MEM_P (x));
21642 tmp = XEXP (x, 0);
21644 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
21645 && GET_CODE (tmp) == AND
21646 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
21647 && INTVAL (XEXP (tmp, 1)) == -16)
21648 tmp = XEXP (tmp, 0);
21649 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
21650 && GET_CODE (tmp) == PRE_MODIFY)
21651 tmp = XEXP (tmp, 1);
21652 if (REG_P (tmp))
21653 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
21654 else
21656 if (GET_CODE (tmp) != PLUS
21657 || !REG_P (XEXP (tmp, 0))
21658 || !REG_P (XEXP (tmp, 1)))
21660 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
21661 break;
21664 if (REGNO (XEXP (tmp, 0)) == 0)
21665 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
21666 reg_names[ REGNO (XEXP (tmp, 0)) ]);
21667 else
21668 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
21669 reg_names[ REGNO (XEXP (tmp, 1)) ]);
21671 break;
21674 case 0:
21675 if (REG_P (x))
21676 fprintf (file, "%s", reg_names[REGNO (x)]);
21677 else if (MEM_P (x))
21679 /* We need to handle PRE_INC and PRE_DEC here, since we need to
21680 know the width from the mode. */
21681 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
21682 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
21683 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21684 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
21685 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
21686 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
21687 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
21688 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
21689 else
21690 output_address (GET_MODE (x), XEXP (x, 0));
21692 else
21694 if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21695 /* This hack along with a corresponding hack in
21696 rs6000_output_addr_const_extra arranges to output addends
21697 where the assembler expects to find them. eg.
21698 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
21699 without this hack would be output as "x@toc+4". We
21700 want "x+4@toc". */
21701 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21702 else
21703 output_addr_const (file, x);
21705 return;
21707 case '&':
21708 if (const char *name = get_some_local_dynamic_name ())
21709 assemble_name (file, name);
21710 else
21711 output_operand_lossage ("'%%&' used without any "
21712 "local dynamic TLS references");
21713 return;
21715 default:
21716 output_operand_lossage ("invalid %%xn code");
21720 /* Print the address of an operand. */
21722 void
21723 print_operand_address (FILE *file, rtx x)
21725 if (REG_P (x))
21726 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
21727 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
21728 || GET_CODE (x) == LABEL_REF)
21730 output_addr_const (file, x);
21731 if (small_data_operand (x, GET_MODE (x)))
21732 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
21733 reg_names[SMALL_DATA_REG]);
21734 else
21735 gcc_assert (!TARGET_TOC);
21737 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21738 && REG_P (XEXP (x, 1)))
21740 if (REGNO (XEXP (x, 0)) == 0)
21741 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
21742 reg_names[ REGNO (XEXP (x, 0)) ]);
21743 else
21744 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
21745 reg_names[ REGNO (XEXP (x, 1)) ]);
21747 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
21748 && GET_CODE (XEXP (x, 1)) == CONST_INT)
21749 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
21750 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
21751 #if TARGET_MACHO
21752 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21753 && CONSTANT_P (XEXP (x, 1)))
21755 fprintf (file, "lo16(");
21756 output_addr_const (file, XEXP (x, 1));
21757 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21759 #endif
21760 #if TARGET_ELF
21761 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
21762 && CONSTANT_P (XEXP (x, 1)))
21764 output_addr_const (file, XEXP (x, 1));
21765 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
21767 #endif
21768 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
21770 /* This hack along with a corresponding hack in
21771 rs6000_output_addr_const_extra arranges to output addends
21772 where the assembler expects to find them. eg.
21773 (lo_sum (reg 9)
21774 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
21775 without this hack would be output as "x@toc+8@l(9)". We
21776 want "x+8@toc@l(9)". */
21777 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
21778 if (GET_CODE (x) == LO_SUM)
21779 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
21780 else
21781 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
21783 else
21784 gcc_unreachable ();
21787 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
21789 static bool
21790 rs6000_output_addr_const_extra (FILE *file, rtx x)
21792 if (GET_CODE (x) == UNSPEC)
21793 switch (XINT (x, 1))
21795 case UNSPEC_TOCREL:
21796 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
21797 && REG_P (XVECEXP (x, 0, 1))
21798 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
21799 output_addr_const (file, XVECEXP (x, 0, 0));
21800 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
21802 if (INTVAL (tocrel_offset_oac) >= 0)
21803 fprintf (file, "+");
21804 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
21806 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
21808 putc ('-', file);
21809 assemble_name (file, toc_label_name);
21810 need_toc_init = 1;
21812 else if (TARGET_ELF)
21813 fputs ("@toc", file);
21814 return true;
21816 #if TARGET_MACHO
21817 case UNSPEC_MACHOPIC_OFFSET:
21818 output_addr_const (file, XVECEXP (x, 0, 0));
21819 putc ('-', file);
21820 machopic_output_function_base_name (file);
21821 return true;
21822 #endif
21824 return false;
21827 /* Target hook for assembling integer objects. The PowerPC version has
21828 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
21829 is defined. It also needs to handle DI-mode objects on 64-bit
21830 targets. */
21832 static bool
21833 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
21835 #ifdef RELOCATABLE_NEEDS_FIXUP
21836 /* Special handling for SI values. */
21837 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
21839 static int recurse = 0;
21841 /* For -mrelocatable, we mark all addresses that need to be fixed up in
21842 the .fixup section. Since the TOC section is already relocated, we
21843 don't need to mark it here. We used to skip the text section, but it
21844 should never be valid for relocated addresses to be placed in the text
21845 section. */
21846 if (DEFAULT_ABI == ABI_V4
21847 && (TARGET_RELOCATABLE || flag_pic > 1)
21848 && in_section != toc_section
21849 && !recurse
21850 && !CONST_SCALAR_INT_P (x)
21851 && CONSTANT_P (x))
21853 char buf[256];
21855 recurse = 1;
21856 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
21857 fixuplabelno++;
21858 ASM_OUTPUT_LABEL (asm_out_file, buf);
21859 fprintf (asm_out_file, "\t.long\t(");
21860 output_addr_const (asm_out_file, x);
21861 fprintf (asm_out_file, ")@fixup\n");
21862 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
21863 ASM_OUTPUT_ALIGN (asm_out_file, 2);
21864 fprintf (asm_out_file, "\t.long\t");
21865 assemble_name (asm_out_file, buf);
21866 fprintf (asm_out_file, "\n\t.previous\n");
21867 recurse = 0;
21868 return true;
21870 /* Remove initial .'s to turn a -mcall-aixdesc function
21871 address into the address of the descriptor, not the function
21872 itself. */
21873 else if (GET_CODE (x) == SYMBOL_REF
21874 && XSTR (x, 0)[0] == '.'
21875 && DEFAULT_ABI == ABI_AIX)
21877 const char *name = XSTR (x, 0);
21878 while (*name == '.')
21879 name++;
21881 fprintf (asm_out_file, "\t.long\t%s\n", name);
21882 return true;
21885 #endif /* RELOCATABLE_NEEDS_FIXUP */
21886 return default_assemble_integer (x, size, aligned_p);
21889 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
21890 /* Emit an assembler directive to set symbol visibility for DECL to
21891 VISIBILITY_TYPE. */
21893 static void
21894 rs6000_assemble_visibility (tree decl, int vis)
21896 if (TARGET_XCOFF)
21897 return;
21899 /* Functions need to have their entry point symbol visibility set as
21900 well as their descriptor symbol visibility. */
21901 if (DEFAULT_ABI == ABI_AIX
21902 && DOT_SYMBOLS
21903 && TREE_CODE (decl) == FUNCTION_DECL)
21905 static const char * const visibility_types[] = {
21906 NULL, "protected", "hidden", "internal"
21909 const char *name, *type;
21911 name = ((* targetm.strip_name_encoding)
21912 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
21913 type = visibility_types[vis];
21915 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
21916 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
21918 else
21919 default_assemble_visibility (decl, vis);
21921 #endif
21923 enum rtx_code
21924 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
21926 /* Reversal of FP compares takes care -- an ordered compare
21927 becomes an unordered compare and vice versa. */
21928 if (mode == CCFPmode
21929 && (!flag_finite_math_only
21930 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
21931 || code == UNEQ || code == LTGT))
21932 return reverse_condition_maybe_unordered (code);
21933 else
21934 return reverse_condition (code);
21937 /* Generate a compare for CODE. Return a brand-new rtx that
21938 represents the result of the compare. */
21940 static rtx
21941 rs6000_generate_compare (rtx cmp, machine_mode mode)
21943 machine_mode comp_mode;
21944 rtx compare_result;
21945 enum rtx_code code = GET_CODE (cmp);
21946 rtx op0 = XEXP (cmp, 0);
21947 rtx op1 = XEXP (cmp, 1);
21949 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21950 comp_mode = CCmode;
21951 else if (FLOAT_MODE_P (mode))
21952 comp_mode = CCFPmode;
21953 else if (code == GTU || code == LTU
21954 || code == GEU || code == LEU)
21955 comp_mode = CCUNSmode;
21956 else if ((code == EQ || code == NE)
21957 && unsigned_reg_p (op0)
21958 && (unsigned_reg_p (op1)
21959 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
21960 /* These are unsigned values, perhaps there will be a later
21961 ordering compare that can be shared with this one. */
21962 comp_mode = CCUNSmode;
21963 else
21964 comp_mode = CCmode;
21966 /* If we have an unsigned compare, make sure we don't have a signed value as
21967 an immediate. */
21968 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
21969 && INTVAL (op1) < 0)
21971 op0 = copy_rtx_if_shared (op0);
21972 op1 = force_reg (GET_MODE (op0), op1);
21973 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
21976 /* First, the compare. */
21977 compare_result = gen_reg_rtx (comp_mode);
21979 /* IEEE 128-bit support in VSX registers when we do not have hardware
21980 support. */
21981 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
21983 rtx libfunc = NULL_RTX;
21984 bool check_nan = false;
21985 rtx dest;
21987 switch (code)
21989 case EQ:
21990 case NE:
21991 libfunc = optab_libfunc (eq_optab, mode);
21992 break;
21994 case GT:
21995 case GE:
21996 libfunc = optab_libfunc (ge_optab, mode);
21997 break;
21999 case LT:
22000 case LE:
22001 libfunc = optab_libfunc (le_optab, mode);
22002 break;
22004 case UNORDERED:
22005 case ORDERED:
22006 libfunc = optab_libfunc (unord_optab, mode);
22007 code = (code == UNORDERED) ? NE : EQ;
22008 break;
22010 case UNGE:
22011 case UNGT:
22012 check_nan = true;
22013 libfunc = optab_libfunc (ge_optab, mode);
22014 code = (code == UNGE) ? GE : GT;
22015 break;
22017 case UNLE:
22018 case UNLT:
22019 check_nan = true;
22020 libfunc = optab_libfunc (le_optab, mode);
22021 code = (code == UNLE) ? LE : LT;
22022 break;
22024 case UNEQ:
22025 case LTGT:
22026 check_nan = true;
22027 libfunc = optab_libfunc (eq_optab, mode);
22028 code = (code = UNEQ) ? EQ : NE;
22029 break;
22031 default:
22032 gcc_unreachable ();
22035 gcc_assert (libfunc);
22037 if (!check_nan)
22038 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22039 SImode, op0, mode, op1, mode);
22041 /* The library signals an exception for signalling NaNs, so we need to
22042 handle isgreater, etc. by first checking isordered. */
22043 else
22045 rtx ne_rtx, normal_dest, unord_dest;
22046 rtx unord_func = optab_libfunc (unord_optab, mode);
22047 rtx join_label = gen_label_rtx ();
22048 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
22049 rtx unord_cmp = gen_reg_rtx (comp_mode);
22052 /* Test for either value being a NaN. */
22053 gcc_assert (unord_func);
22054 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
22055 SImode, op0, mode, op1, mode);
22057 /* Set value (0) if either value is a NaN, and jump to the join
22058 label. */
22059 dest = gen_reg_rtx (SImode);
22060 emit_move_insn (dest, const1_rtx);
22061 emit_insn (gen_rtx_SET (unord_cmp,
22062 gen_rtx_COMPARE (comp_mode, unord_dest,
22063 const0_rtx)));
22065 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
22066 emit_jump_insn (gen_rtx_SET (pc_rtx,
22067 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
22068 join_ref,
22069 pc_rtx)));
22071 /* Do the normal comparison, knowing that the values are not
22072 NaNs. */
22073 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
22074 SImode, op0, mode, op1, mode);
22076 emit_insn (gen_cstoresi4 (dest,
22077 gen_rtx_fmt_ee (code, SImode, normal_dest,
22078 const0_rtx),
22079 normal_dest, const0_rtx));
22081 /* Join NaN and non-Nan paths. Compare dest against 0. */
22082 emit_label (join_label);
22083 code = NE;
22086 emit_insn (gen_rtx_SET (compare_result,
22087 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
22090 else
22092 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
22093 CLOBBERs to match cmptf_internal2 pattern. */
22094 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
22095 && FLOAT128_IBM_P (GET_MODE (op0))
22096 && TARGET_HARD_FLOAT)
22097 emit_insn (gen_rtx_PARALLEL (VOIDmode,
22098 gen_rtvec (10,
22099 gen_rtx_SET (compare_result,
22100 gen_rtx_COMPARE (comp_mode, op0, op1)),
22101 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22102 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22103 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22104 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22105 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22106 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22107 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22108 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
22109 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
22110 else if (GET_CODE (op1) == UNSPEC
22111 && XINT (op1, 1) == UNSPEC_SP_TEST)
22113 rtx op1b = XVECEXP (op1, 0, 0);
22114 comp_mode = CCEQmode;
22115 compare_result = gen_reg_rtx (CCEQmode);
22116 if (TARGET_64BIT)
22117 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
22118 else
22119 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
22121 else
22122 emit_insn (gen_rtx_SET (compare_result,
22123 gen_rtx_COMPARE (comp_mode, op0, op1)));
22126 /* Some kinds of FP comparisons need an OR operation;
22127 under flag_finite_math_only we don't bother. */
22128 if (FLOAT_MODE_P (mode)
22129 && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW)
22130 && !flag_finite_math_only
22131 && (code == LE || code == GE
22132 || code == UNEQ || code == LTGT
22133 || code == UNGT || code == UNLT))
22135 enum rtx_code or1, or2;
22136 rtx or1_rtx, or2_rtx, compare2_rtx;
22137 rtx or_result = gen_reg_rtx (CCEQmode);
22139 switch (code)
22141 case LE: or1 = LT; or2 = EQ; break;
22142 case GE: or1 = GT; or2 = EQ; break;
22143 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
22144 case LTGT: or1 = LT; or2 = GT; break;
22145 case UNGT: or1 = UNORDERED; or2 = GT; break;
22146 case UNLT: or1 = UNORDERED; or2 = LT; break;
22147 default: gcc_unreachable ();
22149 validate_condition_mode (or1, comp_mode);
22150 validate_condition_mode (or2, comp_mode);
22151 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
22152 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
22153 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
22154 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
22155 const_true_rtx);
22156 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
22158 compare_result = or_result;
22159 code = EQ;
22162 validate_condition_mode (code, GET_MODE (compare_result));
22164 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
22168 /* Return the diagnostic message string if the binary operation OP is
22169 not permitted on TYPE1 and TYPE2, NULL otherwise. */
22171 static const char*
22172 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
22173 const_tree type1,
22174 const_tree type2)
22176 machine_mode mode1 = TYPE_MODE (type1);
22177 machine_mode mode2 = TYPE_MODE (type2);
22179 /* For complex modes, use the inner type. */
22180 if (COMPLEX_MODE_P (mode1))
22181 mode1 = GET_MODE_INNER (mode1);
22183 if (COMPLEX_MODE_P (mode2))
22184 mode2 = GET_MODE_INNER (mode2);
22186 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
22187 double to intermix unless -mfloat128-convert. */
22188 if (mode1 == mode2)
22189 return NULL;
22191 if (!TARGET_FLOAT128_CVT)
22193 if ((mode1 == KFmode && mode2 == IFmode)
22194 || (mode1 == IFmode && mode2 == KFmode))
22195 return N_("__float128 and __ibm128 cannot be used in the same "
22196 "expression");
22198 if (TARGET_IEEEQUAD
22199 && ((mode1 == IFmode && mode2 == TFmode)
22200 || (mode1 == TFmode && mode2 == IFmode)))
22201 return N_("__ibm128 and long double cannot be used in the same "
22202 "expression");
22204 if (!TARGET_IEEEQUAD
22205 && ((mode1 == KFmode && mode2 == TFmode)
22206 || (mode1 == TFmode && mode2 == KFmode)))
22207 return N_("__float128 and long double cannot be used in the same "
22208 "expression");
22211 return NULL;
22215 /* Expand floating point conversion to/from __float128 and __ibm128. */
22217 void
22218 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
22220 machine_mode dest_mode = GET_MODE (dest);
22221 machine_mode src_mode = GET_MODE (src);
22222 convert_optab cvt = unknown_optab;
22223 bool do_move = false;
22224 rtx libfunc = NULL_RTX;
22225 rtx dest2;
22226 typedef rtx (*rtx_2func_t) (rtx, rtx);
22227 rtx_2func_t hw_convert = (rtx_2func_t)0;
22228 size_t kf_or_tf;
22230 struct hw_conv_t {
22231 rtx_2func_t from_df;
22232 rtx_2func_t from_sf;
22233 rtx_2func_t from_si_sign;
22234 rtx_2func_t from_si_uns;
22235 rtx_2func_t from_di_sign;
22236 rtx_2func_t from_di_uns;
22237 rtx_2func_t to_df;
22238 rtx_2func_t to_sf;
22239 rtx_2func_t to_si_sign;
22240 rtx_2func_t to_si_uns;
22241 rtx_2func_t to_di_sign;
22242 rtx_2func_t to_di_uns;
22243 } hw_conversions[2] = {
22244 /* convertions to/from KFmode */
22246 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
22247 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
22248 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
22249 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
22250 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
22251 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
22252 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
22253 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
22254 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
22255 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
22256 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
22257 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
22260 /* convertions to/from TFmode */
22262 gen_extenddftf2_hw, /* TFmode <- DFmode. */
22263 gen_extendsftf2_hw, /* TFmode <- SFmode. */
22264 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
22265 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
22266 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
22267 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
22268 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
22269 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
22270 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
22271 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
22272 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
22273 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
22277 if (dest_mode == src_mode)
22278 gcc_unreachable ();
22280 /* Eliminate memory operations. */
22281 if (MEM_P (src))
22282 src = force_reg (src_mode, src);
22284 if (MEM_P (dest))
22286 rtx tmp = gen_reg_rtx (dest_mode);
22287 rs6000_expand_float128_convert (tmp, src, unsigned_p);
22288 rs6000_emit_move (dest, tmp, dest_mode);
22289 return;
22292 /* Convert to IEEE 128-bit floating point. */
22293 if (FLOAT128_IEEE_P (dest_mode))
22295 if (dest_mode == KFmode)
22296 kf_or_tf = 0;
22297 else if (dest_mode == TFmode)
22298 kf_or_tf = 1;
22299 else
22300 gcc_unreachable ();
22302 switch (src_mode)
22304 case E_DFmode:
22305 cvt = sext_optab;
22306 hw_convert = hw_conversions[kf_or_tf].from_df;
22307 break;
22309 case E_SFmode:
22310 cvt = sext_optab;
22311 hw_convert = hw_conversions[kf_or_tf].from_sf;
22312 break;
22314 case E_KFmode:
22315 case E_IFmode:
22316 case E_TFmode:
22317 if (FLOAT128_IBM_P (src_mode))
22318 cvt = sext_optab;
22319 else
22320 do_move = true;
22321 break;
22323 case E_SImode:
22324 if (unsigned_p)
22326 cvt = ufloat_optab;
22327 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
22329 else
22331 cvt = sfloat_optab;
22332 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
22334 break;
22336 case E_DImode:
22337 if (unsigned_p)
22339 cvt = ufloat_optab;
22340 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
22342 else
22344 cvt = sfloat_optab;
22345 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
22347 break;
22349 default:
22350 gcc_unreachable ();
22354 /* Convert from IEEE 128-bit floating point. */
22355 else if (FLOAT128_IEEE_P (src_mode))
22357 if (src_mode == KFmode)
22358 kf_or_tf = 0;
22359 else if (src_mode == TFmode)
22360 kf_or_tf = 1;
22361 else
22362 gcc_unreachable ();
22364 switch (dest_mode)
22366 case E_DFmode:
22367 cvt = trunc_optab;
22368 hw_convert = hw_conversions[kf_or_tf].to_df;
22369 break;
22371 case E_SFmode:
22372 cvt = trunc_optab;
22373 hw_convert = hw_conversions[kf_or_tf].to_sf;
22374 break;
22376 case E_KFmode:
22377 case E_IFmode:
22378 case E_TFmode:
22379 if (FLOAT128_IBM_P (dest_mode))
22380 cvt = trunc_optab;
22381 else
22382 do_move = true;
22383 break;
22385 case E_SImode:
22386 if (unsigned_p)
22388 cvt = ufix_optab;
22389 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
22391 else
22393 cvt = sfix_optab;
22394 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
22396 break;
22398 case E_DImode:
22399 if (unsigned_p)
22401 cvt = ufix_optab;
22402 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
22404 else
22406 cvt = sfix_optab;
22407 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
22409 break;
22411 default:
22412 gcc_unreachable ();
22416 /* Both IBM format. */
22417 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
22418 do_move = true;
22420 else
22421 gcc_unreachable ();
22423 /* Handle conversion between TFmode/KFmode. */
22424 if (do_move)
22425 emit_move_insn (dest, gen_lowpart (dest_mode, src));
22427 /* Handle conversion if we have hardware support. */
22428 else if (TARGET_FLOAT128_HW && hw_convert)
22429 emit_insn ((hw_convert) (dest, src));
22431 /* Call an external function to do the conversion. */
22432 else if (cvt != unknown_optab)
22434 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
22435 gcc_assert (libfunc != NULL_RTX);
22437 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
22438 src, src_mode);
22440 gcc_assert (dest2 != NULL_RTX);
22441 if (!rtx_equal_p (dest, dest2))
22442 emit_move_insn (dest, dest2);
22445 else
22446 gcc_unreachable ();
22448 return;
22452 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
22453 can be used as that dest register. Return the dest register. */
22456 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
22458 if (op2 == const0_rtx)
22459 return op1;
22461 if (GET_CODE (scratch) == SCRATCH)
22462 scratch = gen_reg_rtx (mode);
22464 if (logical_operand (op2, mode))
22465 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
22466 else
22467 emit_insn (gen_rtx_SET (scratch,
22468 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
22470 return scratch;
22473 void
22474 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
22476 rtx condition_rtx;
22477 machine_mode op_mode;
22478 enum rtx_code cond_code;
22479 rtx result = operands[0];
22481 condition_rtx = rs6000_generate_compare (operands[1], mode);
22482 cond_code = GET_CODE (condition_rtx);
22484 if (cond_code == NE
22485 || cond_code == GE || cond_code == LE
22486 || cond_code == GEU || cond_code == LEU
22487 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
22489 rtx not_result = gen_reg_rtx (CCEQmode);
22490 rtx not_op, rev_cond_rtx;
22491 machine_mode cc_mode;
22493 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
22495 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
22496 SImode, XEXP (condition_rtx, 0), const0_rtx);
22497 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
22498 emit_insn (gen_rtx_SET (not_result, not_op));
22499 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
22502 op_mode = GET_MODE (XEXP (operands[1], 0));
22503 if (op_mode == VOIDmode)
22504 op_mode = GET_MODE (XEXP (operands[1], 1));
22506 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
22508 PUT_MODE (condition_rtx, DImode);
22509 convert_move (result, condition_rtx, 0);
22511 else
22513 PUT_MODE (condition_rtx, SImode);
22514 emit_insn (gen_rtx_SET (result, condition_rtx));
22518 /* Emit a branch of kind CODE to location LOC. */
22520 void
22521 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
22523 rtx condition_rtx, loc_ref;
22525 condition_rtx = rs6000_generate_compare (operands[0], mode);
22526 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
22527 emit_jump_insn (gen_rtx_SET (pc_rtx,
22528 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
22529 loc_ref, pc_rtx)));
22532 /* Return the string to output a conditional branch to LABEL, which is
22533 the operand template of the label, or NULL if the branch is really a
22534 conditional return.
22536 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
22537 condition code register and its mode specifies what kind of
22538 comparison we made.
22540 REVERSED is nonzero if we should reverse the sense of the comparison.
22542 INSN is the insn. */
22544 char *
22545 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
22547 static char string[64];
22548 enum rtx_code code = GET_CODE (op);
22549 rtx cc_reg = XEXP (op, 0);
22550 machine_mode mode = GET_MODE (cc_reg);
22551 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
22552 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
22553 int really_reversed = reversed ^ need_longbranch;
22554 char *s = string;
22555 const char *ccode;
22556 const char *pred;
22557 rtx note;
22559 validate_condition_mode (code, mode);
22561 /* Work out which way this really branches. We could use
22562 reverse_condition_maybe_unordered here always but this
22563 makes the resulting assembler clearer. */
22564 if (really_reversed)
22566 /* Reversal of FP compares takes care -- an ordered compare
22567 becomes an unordered compare and vice versa. */
22568 if (mode == CCFPmode)
22569 code = reverse_condition_maybe_unordered (code);
22570 else
22571 code = reverse_condition (code);
22574 switch (code)
22576 /* Not all of these are actually distinct opcodes, but
22577 we distinguish them for clarity of the resulting assembler. */
22578 case NE: case LTGT:
22579 ccode = "ne"; break;
22580 case EQ: case UNEQ:
22581 ccode = "eq"; break;
22582 case GE: case GEU:
22583 ccode = "ge"; break;
22584 case GT: case GTU: case UNGT:
22585 ccode = "gt"; break;
22586 case LE: case LEU:
22587 ccode = "le"; break;
22588 case LT: case LTU: case UNLT:
22589 ccode = "lt"; break;
22590 case UNORDERED: ccode = "un"; break;
22591 case ORDERED: ccode = "nu"; break;
22592 case UNGE: ccode = "nl"; break;
22593 case UNLE: ccode = "ng"; break;
22594 default:
22595 gcc_unreachable ();
22598 /* Maybe we have a guess as to how likely the branch is. */
22599 pred = "";
22600 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
22601 if (note != NULL_RTX)
22603 /* PROB is the difference from 50%. */
22604 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
22605 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
22607 /* Only hint for highly probable/improbable branches on newer cpus when
22608 we have real profile data, as static prediction overrides processor
22609 dynamic prediction. For older cpus we may as well always hint, but
22610 assume not taken for branches that are very close to 50% as a
22611 mispredicted taken branch is more expensive than a
22612 mispredicted not-taken branch. */
22613 if (rs6000_always_hint
22614 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
22615 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
22616 && br_prob_note_reliable_p (note)))
22618 if (abs (prob) > REG_BR_PROB_BASE / 20
22619 && ((prob > 0) ^ need_longbranch))
22620 pred = "+";
22621 else
22622 pred = "-";
22626 if (label == NULL)
22627 s += sprintf (s, "b%slr%s ", ccode, pred);
22628 else
22629 s += sprintf (s, "b%s%s ", ccode, pred);
22631 /* We need to escape any '%' characters in the reg_names string.
22632 Assume they'd only be the first character.... */
22633 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
22634 *s++ = '%';
22635 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
22637 if (label != NULL)
22639 /* If the branch distance was too far, we may have to use an
22640 unconditional branch to go the distance. */
22641 if (need_longbranch)
22642 s += sprintf (s, ",$+8\n\tb %s", label);
22643 else
22644 s += sprintf (s, ",%s", label);
22647 return string;
22650 /* Return insn for VSX or Altivec comparisons. */
22652 static rtx
22653 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
22655 rtx mask;
22656 machine_mode mode = GET_MODE (op0);
22658 switch (code)
22660 default:
22661 break;
22663 case GE:
22664 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22665 return NULL_RTX;
22666 /* FALLTHRU */
22668 case EQ:
22669 case GT:
22670 case GTU:
22671 case ORDERED:
22672 case UNORDERED:
22673 case UNEQ:
22674 case LTGT:
22675 mask = gen_reg_rtx (mode);
22676 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
22677 return mask;
22680 return NULL_RTX;
22683 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
22684 DMODE is expected destination mode. This is a recursive function. */
22686 static rtx
22687 rs6000_emit_vector_compare (enum rtx_code rcode,
22688 rtx op0, rtx op1,
22689 machine_mode dmode)
22691 rtx mask;
22692 bool swap_operands = false;
22693 bool try_again = false;
22695 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
22696 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
22698 /* See if the comparison works as is. */
22699 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22700 if (mask)
22701 return mask;
22703 switch (rcode)
22705 case LT:
22706 rcode = GT;
22707 swap_operands = true;
22708 try_again = true;
22709 break;
22710 case LTU:
22711 rcode = GTU;
22712 swap_operands = true;
22713 try_again = true;
22714 break;
22715 case NE:
22716 case UNLE:
22717 case UNLT:
22718 case UNGE:
22719 case UNGT:
22720 /* Invert condition and try again.
22721 e.g., A != B becomes ~(A==B). */
22723 enum rtx_code rev_code;
22724 enum insn_code nor_code;
22725 rtx mask2;
22727 rev_code = reverse_condition_maybe_unordered (rcode);
22728 if (rev_code == UNKNOWN)
22729 return NULL_RTX;
22731 nor_code = optab_handler (one_cmpl_optab, dmode);
22732 if (nor_code == CODE_FOR_nothing)
22733 return NULL_RTX;
22735 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
22736 if (!mask2)
22737 return NULL_RTX;
22739 mask = gen_reg_rtx (dmode);
22740 emit_insn (GEN_FCN (nor_code) (mask, mask2));
22741 return mask;
22743 break;
22744 case GE:
22745 case GEU:
22746 case LE:
22747 case LEU:
22748 /* Try GT/GTU/LT/LTU OR EQ */
22750 rtx c_rtx, eq_rtx;
22751 enum insn_code ior_code;
22752 enum rtx_code new_code;
22754 switch (rcode)
22756 case GE:
22757 new_code = GT;
22758 break;
22760 case GEU:
22761 new_code = GTU;
22762 break;
22764 case LE:
22765 new_code = LT;
22766 break;
22768 case LEU:
22769 new_code = LTU;
22770 break;
22772 default:
22773 gcc_unreachable ();
22776 ior_code = optab_handler (ior_optab, dmode);
22777 if (ior_code == CODE_FOR_nothing)
22778 return NULL_RTX;
22780 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
22781 if (!c_rtx)
22782 return NULL_RTX;
22784 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
22785 if (!eq_rtx)
22786 return NULL_RTX;
22788 mask = gen_reg_rtx (dmode);
22789 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
22790 return mask;
22792 break;
22793 default:
22794 return NULL_RTX;
22797 if (try_again)
22799 if (swap_operands)
22800 std::swap (op0, op1);
22802 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
22803 if (mask)
22804 return mask;
22807 /* You only get two chances. */
22808 return NULL_RTX;
22811 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
22812 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
22813 operands for the relation operation COND. */
22816 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
22817 rtx cond, rtx cc_op0, rtx cc_op1)
22819 machine_mode dest_mode = GET_MODE (dest);
22820 machine_mode mask_mode = GET_MODE (cc_op0);
22821 enum rtx_code rcode = GET_CODE (cond);
22822 machine_mode cc_mode = CCmode;
22823 rtx mask;
22824 rtx cond2;
22825 bool invert_move = false;
22827 if (VECTOR_UNIT_NONE_P (dest_mode))
22828 return 0;
22830 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
22831 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
22833 switch (rcode)
22835 /* Swap operands if we can, and fall back to doing the operation as
22836 specified, and doing a NOR to invert the test. */
22837 case NE:
22838 case UNLE:
22839 case UNLT:
22840 case UNGE:
22841 case UNGT:
22842 /* Invert condition and try again.
22843 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
22844 invert_move = true;
22845 rcode = reverse_condition_maybe_unordered (rcode);
22846 if (rcode == UNKNOWN)
22847 return 0;
22848 break;
22850 case GE:
22851 case LE:
22852 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
22854 /* Invert condition to avoid compound test. */
22855 invert_move = true;
22856 rcode = reverse_condition (rcode);
22858 break;
22860 case GTU:
22861 case GEU:
22862 case LTU:
22863 case LEU:
22864 /* Mark unsigned tests with CCUNSmode. */
22865 cc_mode = CCUNSmode;
22867 /* Invert condition to avoid compound test if necessary. */
22868 if (rcode == GEU || rcode == LEU)
22870 invert_move = true;
22871 rcode = reverse_condition (rcode);
22873 break;
22875 default:
22876 break;
22879 /* Get the vector mask for the given relational operations. */
22880 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
22882 if (!mask)
22883 return 0;
22885 if (invert_move)
22886 std::swap (op_true, op_false);
22888 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
22889 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
22890 && (GET_CODE (op_true) == CONST_VECTOR
22891 || GET_CODE (op_false) == CONST_VECTOR))
22893 rtx constant_0 = CONST0_RTX (dest_mode);
22894 rtx constant_m1 = CONSTM1_RTX (dest_mode);
22896 if (op_true == constant_m1 && op_false == constant_0)
22898 emit_move_insn (dest, mask);
22899 return 1;
22902 else if (op_true == constant_0 && op_false == constant_m1)
22904 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
22905 return 1;
22908 /* If we can't use the vector comparison directly, perhaps we can use
22909 the mask for the true or false fields, instead of loading up a
22910 constant. */
22911 if (op_true == constant_m1)
22912 op_true = mask;
22914 if (op_false == constant_0)
22915 op_false = mask;
22918 if (!REG_P (op_true) && !SUBREG_P (op_true))
22919 op_true = force_reg (dest_mode, op_true);
22921 if (!REG_P (op_false) && !SUBREG_P (op_false))
22922 op_false = force_reg (dest_mode, op_false);
22924 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
22925 CONST0_RTX (dest_mode));
22926 emit_insn (gen_rtx_SET (dest,
22927 gen_rtx_IF_THEN_ELSE (dest_mode,
22928 cond2,
22929 op_true,
22930 op_false)));
22931 return 1;
22934 /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction
22935 for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last
22936 comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the
22937 hardware has no such operation. */
22939 static int
22940 rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22942 enum rtx_code code = GET_CODE (op);
22943 rtx op0 = XEXP (op, 0);
22944 rtx op1 = XEXP (op, 1);
22945 machine_mode compare_mode = GET_MODE (op0);
22946 machine_mode result_mode = GET_MODE (dest);
22947 bool max_p = false;
22949 if (result_mode != compare_mode)
22950 return 0;
22952 if (code == GE || code == GT)
22953 max_p = true;
22954 else if (code == LE || code == LT)
22955 max_p = false;
22956 else
22957 return 0;
22959 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
22962 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond))
22963 max_p = !max_p;
22965 else
22966 return 0;
22968 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
22969 return 1;
22972 /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and
22973 XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the
22974 operands of the last comparison is nonzero/true, FALSE_COND if it is
22975 zero/false. Return 0 if the hardware has no such operation. */
22977 static int
22978 rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
22980 enum rtx_code code = GET_CODE (op);
22981 rtx op0 = XEXP (op, 0);
22982 rtx op1 = XEXP (op, 1);
22983 machine_mode result_mode = GET_MODE (dest);
22984 rtx compare_rtx;
22985 rtx cmove_rtx;
22986 rtx clobber_rtx;
22988 if (!can_create_pseudo_p ())
22989 return 0;
22991 switch (code)
22993 case EQ:
22994 case GE:
22995 case GT:
22996 break;
22998 case NE:
22999 case LT:
23000 case LE:
23001 code = swap_condition (code);
23002 std::swap (op0, op1);
23003 break;
23005 default:
23006 return 0;
23009 /* Generate: [(parallel [(set (dest)
23010 (if_then_else (op (cmp1) (cmp2))
23011 (true)
23012 (false)))
23013 (clobber (scratch))])]. */
23015 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
23016 cmove_rtx = gen_rtx_SET (dest,
23017 gen_rtx_IF_THEN_ELSE (result_mode,
23018 compare_rtx,
23019 true_cond,
23020 false_cond));
23022 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
23023 emit_insn (gen_rtx_PARALLEL (VOIDmode,
23024 gen_rtvec (2, cmove_rtx, clobber_rtx)));
23026 return 1;
23029 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
23030 operands of the last comparison is nonzero/true, FALSE_COND if it
23031 is zero/false. Return 0 if the hardware has no such operation. */
23034 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23036 enum rtx_code code = GET_CODE (op);
23037 rtx op0 = XEXP (op, 0);
23038 rtx op1 = XEXP (op, 1);
23039 machine_mode compare_mode = GET_MODE (op0);
23040 machine_mode result_mode = GET_MODE (dest);
23041 rtx temp;
23042 bool is_against_zero;
23044 /* These modes should always match. */
23045 if (GET_MODE (op1) != compare_mode
23046 /* In the isel case however, we can use a compare immediate, so
23047 op1 may be a small constant. */
23048 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
23049 return 0;
23050 if (GET_MODE (true_cond) != result_mode)
23051 return 0;
23052 if (GET_MODE (false_cond) != result_mode)
23053 return 0;
23055 /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */
23056 if (TARGET_P9_MINMAX
23057 && (compare_mode == SFmode || compare_mode == DFmode)
23058 && (result_mode == SFmode || result_mode == DFmode))
23060 if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond))
23061 return 1;
23063 if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond))
23064 return 1;
23067 /* Don't allow using floating point comparisons for integer results for
23068 now. */
23069 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
23070 return 0;
23072 /* First, work out if the hardware can do this at all, or
23073 if it's too slow.... */
23074 if (!FLOAT_MODE_P (compare_mode))
23076 if (TARGET_ISEL)
23077 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
23078 return 0;
23081 is_against_zero = op1 == CONST0_RTX (compare_mode);
23083 /* A floating-point subtract might overflow, underflow, or produce
23084 an inexact result, thus changing the floating-point flags, so it
23085 can't be generated if we care about that. It's safe if one side
23086 of the construct is zero, since then no subtract will be
23087 generated. */
23088 if (SCALAR_FLOAT_MODE_P (compare_mode)
23089 && flag_trapping_math && ! is_against_zero)
23090 return 0;
23092 /* Eliminate half of the comparisons by switching operands, this
23093 makes the remaining code simpler. */
23094 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
23095 || code == LTGT || code == LT || code == UNLE)
23097 code = reverse_condition_maybe_unordered (code);
23098 temp = true_cond;
23099 true_cond = false_cond;
23100 false_cond = temp;
23103 /* UNEQ and LTGT take four instructions for a comparison with zero,
23104 it'll probably be faster to use a branch here too. */
23105 if (code == UNEQ && HONOR_NANS (compare_mode))
23106 return 0;
23108 /* We're going to try to implement comparisons by performing
23109 a subtract, then comparing against zero. Unfortunately,
23110 Inf - Inf is NaN which is not zero, and so if we don't
23111 know that the operand is finite and the comparison
23112 would treat EQ different to UNORDERED, we can't do it. */
23113 if (HONOR_INFINITIES (compare_mode)
23114 && code != GT && code != UNGE
23115 && (GET_CODE (op1) != CONST_DOUBLE
23116 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
23117 /* Constructs of the form (a OP b ? a : b) are safe. */
23118 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
23119 || (! rtx_equal_p (op0, true_cond)
23120 && ! rtx_equal_p (op1, true_cond))))
23121 return 0;
23123 /* At this point we know we can use fsel. */
23125 /* Reduce the comparison to a comparison against zero. */
23126 if (! is_against_zero)
23128 temp = gen_reg_rtx (compare_mode);
23129 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
23130 op0 = temp;
23131 op1 = CONST0_RTX (compare_mode);
23134 /* If we don't care about NaNs we can reduce some of the comparisons
23135 down to faster ones. */
23136 if (! HONOR_NANS (compare_mode))
23137 switch (code)
23139 case GT:
23140 code = LE;
23141 temp = true_cond;
23142 true_cond = false_cond;
23143 false_cond = temp;
23144 break;
23145 case UNGE:
23146 code = GE;
23147 break;
23148 case UNEQ:
23149 code = EQ;
23150 break;
23151 default:
23152 break;
23155 /* Now, reduce everything down to a GE. */
23156 switch (code)
23158 case GE:
23159 break;
23161 case LE:
23162 temp = gen_reg_rtx (compare_mode);
23163 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23164 op0 = temp;
23165 break;
23167 case ORDERED:
23168 temp = gen_reg_rtx (compare_mode);
23169 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
23170 op0 = temp;
23171 break;
23173 case EQ:
23174 temp = gen_reg_rtx (compare_mode);
23175 emit_insn (gen_rtx_SET (temp,
23176 gen_rtx_NEG (compare_mode,
23177 gen_rtx_ABS (compare_mode, op0))));
23178 op0 = temp;
23179 break;
23181 case UNGE:
23182 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
23183 temp = gen_reg_rtx (result_mode);
23184 emit_insn (gen_rtx_SET (temp,
23185 gen_rtx_IF_THEN_ELSE (result_mode,
23186 gen_rtx_GE (VOIDmode,
23187 op0, op1),
23188 true_cond, false_cond)));
23189 false_cond = true_cond;
23190 true_cond = temp;
23192 temp = gen_reg_rtx (compare_mode);
23193 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23194 op0 = temp;
23195 break;
23197 case GT:
23198 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
23199 temp = gen_reg_rtx (result_mode);
23200 emit_insn (gen_rtx_SET (temp,
23201 gen_rtx_IF_THEN_ELSE (result_mode,
23202 gen_rtx_GE (VOIDmode,
23203 op0, op1),
23204 true_cond, false_cond)));
23205 true_cond = false_cond;
23206 false_cond = temp;
23208 temp = gen_reg_rtx (compare_mode);
23209 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
23210 op0 = temp;
23211 break;
23213 default:
23214 gcc_unreachable ();
23217 emit_insn (gen_rtx_SET (dest,
23218 gen_rtx_IF_THEN_ELSE (result_mode,
23219 gen_rtx_GE (VOIDmode,
23220 op0, op1),
23221 true_cond, false_cond)));
23222 return 1;
23225 /* Same as above, but for ints (isel). */
23228 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
23230 rtx condition_rtx, cr;
23231 machine_mode mode = GET_MODE (dest);
23232 enum rtx_code cond_code;
23233 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
23234 bool signedp;
23236 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
23237 return 0;
23239 /* We still have to do the compare, because isel doesn't do a
23240 compare, it just looks at the CRx bits set by a previous compare
23241 instruction. */
23242 condition_rtx = rs6000_generate_compare (op, mode);
23243 cond_code = GET_CODE (condition_rtx);
23244 cr = XEXP (condition_rtx, 0);
23245 signedp = GET_MODE (cr) == CCmode;
23247 isel_func = (mode == SImode
23248 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
23249 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
23251 switch (cond_code)
23253 case LT: case GT: case LTU: case GTU: case EQ:
23254 /* isel handles these directly. */
23255 break;
23257 default:
23258 /* We need to swap the sense of the comparison. */
23260 std::swap (false_cond, true_cond);
23261 PUT_CODE (condition_rtx, reverse_condition (cond_code));
23263 break;
23266 false_cond = force_reg (mode, false_cond);
23267 if (true_cond != const0_rtx)
23268 true_cond = force_reg (mode, true_cond);
23270 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
23272 return 1;
23275 void
23276 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
23278 machine_mode mode = GET_MODE (op0);
23279 enum rtx_code c;
23280 rtx target;
23282 /* VSX/altivec have direct min/max insns. */
23283 if ((code == SMAX || code == SMIN)
23284 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
23285 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
23287 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
23288 return;
23291 if (code == SMAX || code == SMIN)
23292 c = GE;
23293 else
23294 c = GEU;
23296 if (code == SMAX || code == UMAX)
23297 target = emit_conditional_move (dest, c, op0, op1, mode,
23298 op0, op1, mode, 0);
23299 else
23300 target = emit_conditional_move (dest, c, op0, op1, mode,
23301 op1, op0, mode, 0);
23302 gcc_assert (target);
23303 if (target != dest)
23304 emit_move_insn (dest, target);
23307 /* Split a signbit operation on 64-bit machines with direct move. Also allow
23308 for the value to come from memory or if it is already loaded into a GPR. */
23310 void
23311 rs6000_split_signbit (rtx dest, rtx src)
23313 machine_mode d_mode = GET_MODE (dest);
23314 machine_mode s_mode = GET_MODE (src);
23315 rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest);
23316 rtx shift_reg = dest_di;
23318 gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64);
23320 if (MEM_P (src))
23322 rtx mem = (WORDS_BIG_ENDIAN
23323 ? adjust_address (src, DImode, 0)
23324 : adjust_address (src, DImode, 8));
23325 emit_insn (gen_rtx_SET (dest_di, mem));
23328 else
23330 unsigned int r = reg_or_subregno (src);
23332 if (INT_REGNO_P (r))
23333 shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0));
23335 else
23337 /* Generate the special mfvsrd instruction to get it in a GPR. */
23338 gcc_assert (VSX_REGNO_P (r));
23339 if (s_mode == KFmode)
23340 emit_insn (gen_signbitkf2_dm2 (dest_di, src));
23341 else
23342 emit_insn (gen_signbittf2_dm2 (dest_di, src));
23346 emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63)));
23347 return;
23350 /* A subroutine of the atomic operation splitters. Jump to LABEL if
23351 COND is true. Mark the jump as unlikely to be taken. */
23353 static void
23354 emit_unlikely_jump (rtx cond, rtx label)
23356 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
23357 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
23358 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
23361 /* A subroutine of the atomic operation splitters. Emit a load-locked
23362 instruction in MODE. For QI/HImode, possibly use a pattern than includes
23363 the zero_extend operation. */
23365 static void
23366 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
23368 rtx (*fn) (rtx, rtx) = NULL;
23370 switch (mode)
23372 case E_QImode:
23373 fn = gen_load_lockedqi;
23374 break;
23375 case E_HImode:
23376 fn = gen_load_lockedhi;
23377 break;
23378 case E_SImode:
23379 if (GET_MODE (mem) == QImode)
23380 fn = gen_load_lockedqi_si;
23381 else if (GET_MODE (mem) == HImode)
23382 fn = gen_load_lockedhi_si;
23383 else
23384 fn = gen_load_lockedsi;
23385 break;
23386 case E_DImode:
23387 fn = gen_load_lockeddi;
23388 break;
23389 case E_TImode:
23390 fn = gen_load_lockedti;
23391 break;
23392 default:
23393 gcc_unreachable ();
23395 emit_insn (fn (reg, mem));
23398 /* A subroutine of the atomic operation splitters. Emit a store-conditional
23399 instruction in MODE. */
23401 static void
23402 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
23404 rtx (*fn) (rtx, rtx, rtx) = NULL;
23406 switch (mode)
23408 case E_QImode:
23409 fn = gen_store_conditionalqi;
23410 break;
23411 case E_HImode:
23412 fn = gen_store_conditionalhi;
23413 break;
23414 case E_SImode:
23415 fn = gen_store_conditionalsi;
23416 break;
23417 case E_DImode:
23418 fn = gen_store_conditionaldi;
23419 break;
23420 case E_TImode:
23421 fn = gen_store_conditionalti;
23422 break;
23423 default:
23424 gcc_unreachable ();
23427 /* Emit sync before stwcx. to address PPC405 Erratum. */
23428 if (PPC405_ERRATUM77)
23429 emit_insn (gen_hwsync ());
23431 emit_insn (fn (res, mem, val));
23434 /* Expand barriers before and after a load_locked/store_cond sequence. */
23436 static rtx
23437 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
23439 rtx addr = XEXP (mem, 0);
23441 if (!legitimate_indirect_address_p (addr, reload_completed)
23442 && !legitimate_indexed_address_p (addr, reload_completed))
23444 addr = force_reg (Pmode, addr);
23445 mem = replace_equiv_address_nv (mem, addr);
23448 switch (model)
23450 case MEMMODEL_RELAXED:
23451 case MEMMODEL_CONSUME:
23452 case MEMMODEL_ACQUIRE:
23453 break;
23454 case MEMMODEL_RELEASE:
23455 case MEMMODEL_ACQ_REL:
23456 emit_insn (gen_lwsync ());
23457 break;
23458 case MEMMODEL_SEQ_CST:
23459 emit_insn (gen_hwsync ());
23460 break;
23461 default:
23462 gcc_unreachable ();
23464 return mem;
23467 static void
23468 rs6000_post_atomic_barrier (enum memmodel model)
23470 switch (model)
23472 case MEMMODEL_RELAXED:
23473 case MEMMODEL_CONSUME:
23474 case MEMMODEL_RELEASE:
23475 break;
23476 case MEMMODEL_ACQUIRE:
23477 case MEMMODEL_ACQ_REL:
23478 case MEMMODEL_SEQ_CST:
23479 emit_insn (gen_isync ());
23480 break;
23481 default:
23482 gcc_unreachable ();
23486 /* A subroutine of the various atomic expanders. For sub-word operations,
23487 we must adjust things to operate on SImode. Given the original MEM,
23488 return a new aligned memory. Also build and return the quantities by
23489 which to shift and mask. */
23491 static rtx
23492 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
23494 rtx addr, align, shift, mask, mem;
23495 HOST_WIDE_INT shift_mask;
23496 machine_mode mode = GET_MODE (orig_mem);
23498 /* For smaller modes, we have to implement this via SImode. */
23499 shift_mask = (mode == QImode ? 0x18 : 0x10);
23501 addr = XEXP (orig_mem, 0);
23502 addr = force_reg (GET_MODE (addr), addr);
23504 /* Aligned memory containing subword. Generate a new memory. We
23505 do not want any of the existing MEM_ATTR data, as we're now
23506 accessing memory outside the original object. */
23507 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
23508 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23509 mem = gen_rtx_MEM (SImode, align);
23510 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
23511 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
23512 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
23514 /* Shift amount for subword relative to aligned word. */
23515 shift = gen_reg_rtx (SImode);
23516 addr = gen_lowpart (SImode, addr);
23517 rtx tmp = gen_reg_rtx (SImode);
23518 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
23519 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
23520 if (BYTES_BIG_ENDIAN)
23521 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
23522 shift, 1, OPTAB_LIB_WIDEN);
23523 *pshift = shift;
23525 /* Mask for insertion. */
23526 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
23527 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
23528 *pmask = mask;
23530 return mem;
23533 /* A subroutine of the various atomic expanders. For sub-word operands,
23534 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
23536 static rtx
23537 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
23539 rtx x;
23541 x = gen_reg_rtx (SImode);
23542 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
23543 gen_rtx_NOT (SImode, mask),
23544 oldval)));
23546 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
23548 return x;
23551 /* A subroutine of the various atomic expanders. For sub-word operands,
23552 extract WIDE to NARROW via SHIFT. */
23554 static void
23555 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
23557 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
23558 wide, 1, OPTAB_LIB_WIDEN);
23559 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
23562 /* Expand an atomic compare and swap operation. */
23564 void
23565 rs6000_expand_atomic_compare_and_swap (rtx operands[])
23567 rtx boolval, retval, mem, oldval, newval, cond;
23568 rtx label1, label2, x, mask, shift;
23569 machine_mode mode, orig_mode;
23570 enum memmodel mod_s, mod_f;
23571 bool is_weak;
23573 boolval = operands[0];
23574 retval = operands[1];
23575 mem = operands[2];
23576 oldval = operands[3];
23577 newval = operands[4];
23578 is_weak = (INTVAL (operands[5]) != 0);
23579 mod_s = memmodel_base (INTVAL (operands[6]));
23580 mod_f = memmodel_base (INTVAL (operands[7]));
23581 orig_mode = mode = GET_MODE (mem);
23583 mask = shift = NULL_RTX;
23584 if (mode == QImode || mode == HImode)
23586 /* Before power8, we didn't have access to lbarx/lharx, so generate a
23587 lwarx and shift/mask operations. With power8, we need to do the
23588 comparison in SImode, but the store is still done in QI/HImode. */
23589 oldval = convert_modes (SImode, mode, oldval, 1);
23591 if (!TARGET_SYNC_HI_QI)
23593 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23595 /* Shift and mask OLDVAL into position with the word. */
23596 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
23597 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23599 /* Shift and mask NEWVAL into position within the word. */
23600 newval = convert_modes (SImode, mode, newval, 1);
23601 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
23602 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23605 /* Prepare to adjust the return value. */
23606 retval = gen_reg_rtx (SImode);
23607 mode = SImode;
23609 else if (reg_overlap_mentioned_p (retval, oldval))
23610 oldval = copy_to_reg (oldval);
23612 if (mode != TImode && !reg_or_short_operand (oldval, mode))
23613 oldval = copy_to_mode_reg (mode, oldval);
23615 if (reg_overlap_mentioned_p (retval, newval))
23616 newval = copy_to_reg (newval);
23618 mem = rs6000_pre_atomic_barrier (mem, mod_s);
23620 label1 = NULL_RTX;
23621 if (!is_weak)
23623 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23624 emit_label (XEXP (label1, 0));
23626 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23628 emit_load_locked (mode, retval, mem);
23630 x = retval;
23631 if (mask)
23632 x = expand_simple_binop (SImode, AND, retval, mask,
23633 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23635 cond = gen_reg_rtx (CCmode);
23636 /* If we have TImode, synthesize a comparison. */
23637 if (mode != TImode)
23638 x = gen_rtx_COMPARE (CCmode, x, oldval);
23639 else
23641 rtx xor1_result = gen_reg_rtx (DImode);
23642 rtx xor2_result = gen_reg_rtx (DImode);
23643 rtx or_result = gen_reg_rtx (DImode);
23644 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
23645 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
23646 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
23647 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
23649 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
23650 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
23651 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
23652 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
23655 emit_insn (gen_rtx_SET (cond, x));
23657 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23658 emit_unlikely_jump (x, label2);
23660 x = newval;
23661 if (mask)
23662 x = rs6000_mask_atomic_subword (retval, newval, mask);
23664 emit_store_conditional (orig_mode, cond, mem, x);
23666 if (!is_weak)
23668 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23669 emit_unlikely_jump (x, label1);
23672 if (!is_mm_relaxed (mod_f))
23673 emit_label (XEXP (label2, 0));
23675 rs6000_post_atomic_barrier (mod_s);
23677 if (is_mm_relaxed (mod_f))
23678 emit_label (XEXP (label2, 0));
23680 if (shift)
23681 rs6000_finish_atomic_subword (operands[1], retval, shift);
23682 else if (mode != GET_MODE (operands[1]))
23683 convert_move (operands[1], retval, 1);
23685 /* In all cases, CR0 contains EQ on success, and NE on failure. */
23686 x = gen_rtx_EQ (SImode, cond, const0_rtx);
23687 emit_insn (gen_rtx_SET (boolval, x));
23690 /* Expand an atomic exchange operation. */
23692 void
23693 rs6000_expand_atomic_exchange (rtx operands[])
23695 rtx retval, mem, val, cond;
23696 machine_mode mode;
23697 enum memmodel model;
23698 rtx label, x, mask, shift;
23700 retval = operands[0];
23701 mem = operands[1];
23702 val = operands[2];
23703 model = memmodel_base (INTVAL (operands[3]));
23704 mode = GET_MODE (mem);
23706 mask = shift = NULL_RTX;
23707 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
23709 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23711 /* Shift and mask VAL into position with the word. */
23712 val = convert_modes (SImode, mode, val, 1);
23713 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23714 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23716 /* Prepare to adjust the return value. */
23717 retval = gen_reg_rtx (SImode);
23718 mode = SImode;
23721 mem = rs6000_pre_atomic_barrier (mem, model);
23723 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
23724 emit_label (XEXP (label, 0));
23726 emit_load_locked (mode, retval, mem);
23728 x = val;
23729 if (mask)
23730 x = rs6000_mask_atomic_subword (retval, val, mask);
23732 cond = gen_reg_rtx (CCmode);
23733 emit_store_conditional (mode, cond, mem, x);
23735 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23736 emit_unlikely_jump (x, label);
23738 rs6000_post_atomic_barrier (model);
23740 if (shift)
23741 rs6000_finish_atomic_subword (operands[0], retval, shift);
23744 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
23745 to perform. MEM is the memory on which to operate. VAL is the second
23746 operand of the binary operator. BEFORE and AFTER are optional locations to
23747 return the value of MEM either before of after the operation. MODEL_RTX
23748 is a CONST_INT containing the memory model to use. */
23750 void
23751 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
23752 rtx orig_before, rtx orig_after, rtx model_rtx)
23754 enum memmodel model = memmodel_base (INTVAL (model_rtx));
23755 machine_mode mode = GET_MODE (mem);
23756 machine_mode store_mode = mode;
23757 rtx label, x, cond, mask, shift;
23758 rtx before = orig_before, after = orig_after;
23760 mask = shift = NULL_RTX;
23761 /* On power8, we want to use SImode for the operation. On previous systems,
23762 use the operation in a subword and shift/mask to get the proper byte or
23763 halfword. */
23764 if (mode == QImode || mode == HImode)
23766 if (TARGET_SYNC_HI_QI)
23768 val = convert_modes (SImode, mode, val, 1);
23770 /* Prepare to adjust the return value. */
23771 before = gen_reg_rtx (SImode);
23772 if (after)
23773 after = gen_reg_rtx (SImode);
23774 mode = SImode;
23776 else
23778 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
23780 /* Shift and mask VAL into position with the word. */
23781 val = convert_modes (SImode, mode, val, 1);
23782 val = expand_simple_binop (SImode, ASHIFT, val, shift,
23783 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23785 switch (code)
23787 case IOR:
23788 case XOR:
23789 /* We've already zero-extended VAL. That is sufficient to
23790 make certain that it does not affect other bits. */
23791 mask = NULL;
23792 break;
23794 case AND:
23795 /* If we make certain that all of the other bits in VAL are
23796 set, that will be sufficient to not affect other bits. */
23797 x = gen_rtx_NOT (SImode, mask);
23798 x = gen_rtx_IOR (SImode, x, val);
23799 emit_insn (gen_rtx_SET (val, x));
23800 mask = NULL;
23801 break;
23803 case NOT:
23804 case PLUS:
23805 case MINUS:
23806 /* These will all affect bits outside the field and need
23807 adjustment via MASK within the loop. */
23808 break;
23810 default:
23811 gcc_unreachable ();
23814 /* Prepare to adjust the return value. */
23815 before = gen_reg_rtx (SImode);
23816 if (after)
23817 after = gen_reg_rtx (SImode);
23818 store_mode = mode = SImode;
23822 mem = rs6000_pre_atomic_barrier (mem, model);
23824 label = gen_label_rtx ();
23825 emit_label (label);
23826 label = gen_rtx_LABEL_REF (VOIDmode, label);
23828 if (before == NULL_RTX)
23829 before = gen_reg_rtx (mode);
23831 emit_load_locked (mode, before, mem);
23833 if (code == NOT)
23835 x = expand_simple_binop (mode, AND, before, val,
23836 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23837 after = expand_simple_unop (mode, NOT, x, after, 1);
23839 else
23841 after = expand_simple_binop (mode, code, before, val,
23842 after, 1, OPTAB_LIB_WIDEN);
23845 x = after;
23846 if (mask)
23848 x = expand_simple_binop (SImode, AND, after, mask,
23849 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23850 x = rs6000_mask_atomic_subword (before, x, mask);
23852 else if (store_mode != mode)
23853 x = convert_modes (store_mode, mode, x, 1);
23855 cond = gen_reg_rtx (CCmode);
23856 emit_store_conditional (store_mode, cond, mem, x);
23858 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
23859 emit_unlikely_jump (x, label);
23861 rs6000_post_atomic_barrier (model);
23863 if (shift)
23865 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
23866 then do the calcuations in a SImode register. */
23867 if (orig_before)
23868 rs6000_finish_atomic_subword (orig_before, before, shift);
23869 if (orig_after)
23870 rs6000_finish_atomic_subword (orig_after, after, shift);
23872 else if (store_mode != mode)
23874 /* QImode/HImode on machines with lbarx/lharx where we do the native
23875 operation and then do the calcuations in a SImode register. */
23876 if (orig_before)
23877 convert_move (orig_before, before, 1);
23878 if (orig_after)
23879 convert_move (orig_after, after, 1);
23881 else if (orig_after && after != orig_after)
23882 emit_move_insn (orig_after, after);
23885 /* Emit instructions to move SRC to DST. Called by splitters for
23886 multi-register moves. It will emit at most one instruction for
23887 each register that is accessed; that is, it won't emit li/lis pairs
23888 (or equivalent for 64-bit code). One of SRC or DST must be a hard
23889 register. */
23891 void
23892 rs6000_split_multireg_move (rtx dst, rtx src)
23894 /* The register number of the first register being moved. */
23895 int reg;
23896 /* The mode that is to be moved. */
23897 machine_mode mode;
23898 /* The mode that the move is being done in, and its size. */
23899 machine_mode reg_mode;
23900 int reg_mode_size;
23901 /* The number of registers that will be moved. */
23902 int nregs;
23904 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
23905 mode = GET_MODE (dst);
23906 nregs = hard_regno_nregs (reg, mode);
23907 if (FP_REGNO_P (reg))
23908 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
23909 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
23910 else if (ALTIVEC_REGNO_P (reg))
23911 reg_mode = V16QImode;
23912 else
23913 reg_mode = word_mode;
23914 reg_mode_size = GET_MODE_SIZE (reg_mode);
23916 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
23918 /* TDmode residing in FP registers is special, since the ISA requires that
23919 the lower-numbered word of a register pair is always the most significant
23920 word, even in little-endian mode. This does not match the usual subreg
23921 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
23922 the appropriate constituent registers "by hand" in little-endian mode.
23924 Note we do not need to check for destructive overlap here since TDmode
23925 can only reside in even/odd register pairs. */
23926 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
23928 rtx p_src, p_dst;
23929 int i;
23931 for (i = 0; i < nregs; i++)
23933 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
23934 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
23935 else
23936 p_src = simplify_gen_subreg (reg_mode, src, mode,
23937 i * reg_mode_size);
23939 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
23940 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
23941 else
23942 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
23943 i * reg_mode_size);
23945 emit_insn (gen_rtx_SET (p_dst, p_src));
23948 return;
23951 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
23953 /* Move register range backwards, if we might have destructive
23954 overlap. */
23955 int i;
23956 for (i = nregs - 1; i >= 0; i--)
23957 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
23958 i * reg_mode_size),
23959 simplify_gen_subreg (reg_mode, src, mode,
23960 i * reg_mode_size)));
23962 else
23964 int i;
23965 int j = -1;
23966 bool used_update = false;
23967 rtx restore_basereg = NULL_RTX;
23969 if (MEM_P (src) && INT_REGNO_P (reg))
23971 rtx breg;
23973 if (GET_CODE (XEXP (src, 0)) == PRE_INC
23974 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
23976 rtx delta_rtx;
23977 breg = XEXP (XEXP (src, 0), 0);
23978 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
23979 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
23980 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
23981 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
23982 src = replace_equiv_address (src, breg);
23984 else if (! rs6000_offsettable_memref_p (src, reg_mode))
23986 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
23988 rtx basereg = XEXP (XEXP (src, 0), 0);
23989 if (TARGET_UPDATE)
23991 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
23992 emit_insn (gen_rtx_SET (ndst,
23993 gen_rtx_MEM (reg_mode,
23994 XEXP (src, 0))));
23995 used_update = true;
23997 else
23998 emit_insn (gen_rtx_SET (basereg,
23999 XEXP (XEXP (src, 0), 1)));
24000 src = replace_equiv_address (src, basereg);
24002 else
24004 rtx basereg = gen_rtx_REG (Pmode, reg);
24005 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
24006 src = replace_equiv_address (src, basereg);
24010 breg = XEXP (src, 0);
24011 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
24012 breg = XEXP (breg, 0);
24014 /* If the base register we are using to address memory is
24015 also a destination reg, then change that register last. */
24016 if (REG_P (breg)
24017 && REGNO (breg) >= REGNO (dst)
24018 && REGNO (breg) < REGNO (dst) + nregs)
24019 j = REGNO (breg) - REGNO (dst);
24021 else if (MEM_P (dst) && INT_REGNO_P (reg))
24023 rtx breg;
24025 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
24026 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
24028 rtx delta_rtx;
24029 breg = XEXP (XEXP (dst, 0), 0);
24030 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
24031 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
24032 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
24034 /* We have to update the breg before doing the store.
24035 Use store with update, if available. */
24037 if (TARGET_UPDATE)
24039 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24040 emit_insn (TARGET_32BIT
24041 ? (TARGET_POWERPC64
24042 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
24043 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
24044 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
24045 used_update = true;
24047 else
24048 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
24049 dst = replace_equiv_address (dst, breg);
24051 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
24052 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
24054 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
24056 rtx basereg = XEXP (XEXP (dst, 0), 0);
24057 if (TARGET_UPDATE)
24059 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
24060 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
24061 XEXP (dst, 0)),
24062 nsrc));
24063 used_update = true;
24065 else
24066 emit_insn (gen_rtx_SET (basereg,
24067 XEXP (XEXP (dst, 0), 1)));
24068 dst = replace_equiv_address (dst, basereg);
24070 else
24072 rtx basereg = XEXP (XEXP (dst, 0), 0);
24073 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
24074 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
24075 && REG_P (basereg)
24076 && REG_P (offsetreg)
24077 && REGNO (basereg) != REGNO (offsetreg));
24078 if (REGNO (basereg) == 0)
24080 rtx tmp = offsetreg;
24081 offsetreg = basereg;
24082 basereg = tmp;
24084 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
24085 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
24086 dst = replace_equiv_address (dst, basereg);
24089 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
24090 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
24093 for (i = 0; i < nregs; i++)
24095 /* Calculate index to next subword. */
24096 ++j;
24097 if (j == nregs)
24098 j = 0;
24100 /* If compiler already emitted move of first word by
24101 store with update, no need to do anything. */
24102 if (j == 0 && used_update)
24103 continue;
24105 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
24106 j * reg_mode_size),
24107 simplify_gen_subreg (reg_mode, src, mode,
24108 j * reg_mode_size)));
24110 if (restore_basereg != NULL_RTX)
24111 emit_insn (restore_basereg);
24116 /* This page contains routines that are used to determine what the
24117 function prologue and epilogue code will do and write them out. */
24119 /* Determine whether the REG is really used. */
24121 static bool
24122 save_reg_p (int reg)
24124 /* We need to mark the PIC offset register live for the same conditions
24125 as it is set up, or otherwise it won't be saved before we clobber it. */
24127 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE)
24129 /* When calling eh_return, we must return true for all the cases
24130 where conditional_register_usage marks the PIC offset reg
24131 call used. */
24132 if (TARGET_TOC && TARGET_MINIMAL_TOC
24133 && (crtl->calls_eh_return
24134 || df_regs_ever_live_p (reg)
24135 || !constant_pool_empty_p ()))
24136 return true;
24138 if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
24139 && flag_pic)
24140 return true;
24143 return !call_used_regs[reg] && df_regs_ever_live_p (reg);
24146 /* Return the first fixed-point register that is required to be
24147 saved. 32 if none. */
24150 first_reg_to_save (void)
24152 int first_reg;
24154 /* Find lowest numbered live register. */
24155 for (first_reg = 13; first_reg <= 31; first_reg++)
24156 if (save_reg_p (first_reg))
24157 break;
24159 #if TARGET_MACHO
24160 if (flag_pic
24161 && crtl->uses_pic_offset_table
24162 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
24163 return RS6000_PIC_OFFSET_TABLE_REGNUM;
24164 #endif
24166 return first_reg;
24169 /* Similar, for FP regs. */
24172 first_fp_reg_to_save (void)
24174 int first_reg;
24176 /* Find lowest numbered live register. */
24177 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
24178 if (save_reg_p (first_reg))
24179 break;
24181 return first_reg;
24184 /* Similar, for AltiVec regs. */
24186 static int
24187 first_altivec_reg_to_save (void)
24189 int i;
24191 /* Stack frame remains as is unless we are in AltiVec ABI. */
24192 if (! TARGET_ALTIVEC_ABI)
24193 return LAST_ALTIVEC_REGNO + 1;
24195 /* On Darwin, the unwind routines are compiled without
24196 TARGET_ALTIVEC, and use save_world to save/restore the
24197 altivec registers when necessary. */
24198 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24199 && ! TARGET_ALTIVEC)
24200 return FIRST_ALTIVEC_REGNO + 20;
24202 /* Find lowest numbered live register. */
24203 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
24204 if (save_reg_p (i))
24205 break;
24207 return i;
24210 /* Return a 32-bit mask of the AltiVec registers we need to set in
24211 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
24212 the 32-bit word is 0. */
24214 static unsigned int
24215 compute_vrsave_mask (void)
24217 unsigned int i, mask = 0;
24219 /* On Darwin, the unwind routines are compiled without
24220 TARGET_ALTIVEC, and use save_world to save/restore the
24221 call-saved altivec registers when necessary. */
24222 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
24223 && ! TARGET_ALTIVEC)
24224 mask |= 0xFFF;
24226 /* First, find out if we use _any_ altivec registers. */
24227 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
24228 if (df_regs_ever_live_p (i))
24229 mask |= ALTIVEC_REG_BIT (i);
24231 if (mask == 0)
24232 return mask;
24234 /* Next, remove the argument registers from the set. These must
24235 be in the VRSAVE mask set by the caller, so we don't need to add
24236 them in again. More importantly, the mask we compute here is
24237 used to generate CLOBBERs in the set_vrsave insn, and we do not
24238 wish the argument registers to die. */
24239 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
24240 mask &= ~ALTIVEC_REG_BIT (i);
24242 /* Similarly, remove the return value from the set. */
24244 bool yes = false;
24245 diddle_return_value (is_altivec_return_reg, &yes);
24246 if (yes)
24247 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
24250 return mask;
24253 /* For a very restricted set of circumstances, we can cut down the
24254 size of prologues/epilogues by calling our own save/restore-the-world
24255 routines. */
24257 static void
24258 compute_save_world_info (rs6000_stack_t *info)
24260 info->world_save_p = 1;
24261 info->world_save_p
24262 = (WORLD_SAVE_P (info)
24263 && DEFAULT_ABI == ABI_DARWIN
24264 && !cfun->has_nonlocal_label
24265 && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO
24266 && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO
24267 && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
24268 && info->cr_save_p);
24270 /* This will not work in conjunction with sibcalls. Make sure there
24271 are none. (This check is expensive, but seldom executed.) */
24272 if (WORLD_SAVE_P (info))
24274 rtx_insn *insn;
24275 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
24276 if (CALL_P (insn) && SIBLING_CALL_P (insn))
24278 info->world_save_p = 0;
24279 break;
24283 if (WORLD_SAVE_P (info))
24285 /* Even if we're not touching VRsave, make sure there's room on the
24286 stack for it, if it looks like we're calling SAVE_WORLD, which
24287 will attempt to save it. */
24288 info->vrsave_size = 4;
24290 /* If we are going to save the world, we need to save the link register too. */
24291 info->lr_save_p = 1;
24293 /* "Save" the VRsave register too if we're saving the world. */
24294 if (info->vrsave_mask == 0)
24295 info->vrsave_mask = compute_vrsave_mask ();
24297 /* Because the Darwin register save/restore routines only handle
24298 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
24299 check. */
24300 gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
24301 && (info->first_altivec_reg_save
24302 >= FIRST_SAVED_ALTIVEC_REGNO));
24305 return;
24309 static void
24310 is_altivec_return_reg (rtx reg, void *xyes)
24312 bool *yes = (bool *) xyes;
24313 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
24314 *yes = true;
24318 /* Return whether REG is a global user reg or has been specifed by
24319 -ffixed-REG. We should not restore these, and so cannot use
24320 lmw or out-of-line restore functions if there are any. We also
24321 can't save them (well, emit frame notes for them), because frame
24322 unwinding during exception handling will restore saved registers. */
24324 static bool
24325 fixed_reg_p (int reg)
24327 /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the
24328 backend sets it, overriding anything the user might have given. */
24329 if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
24330 && ((DEFAULT_ABI == ABI_V4 && flag_pic)
24331 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
24332 || (TARGET_TOC && TARGET_MINIMAL_TOC)))
24333 return false;
24335 return fixed_regs[reg];
24338 /* Determine the strategy for savings/restoring registers. */
24340 enum {
24341 SAVE_MULTIPLE = 0x1,
24342 SAVE_INLINE_GPRS = 0x2,
24343 SAVE_INLINE_FPRS = 0x4,
24344 SAVE_NOINLINE_GPRS_SAVES_LR = 0x8,
24345 SAVE_NOINLINE_FPRS_SAVES_LR = 0x10,
24346 SAVE_INLINE_VRS = 0x20,
24347 REST_MULTIPLE = 0x100,
24348 REST_INLINE_GPRS = 0x200,
24349 REST_INLINE_FPRS = 0x400,
24350 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800,
24351 REST_INLINE_VRS = 0x1000
24354 static int
24355 rs6000_savres_strategy (rs6000_stack_t *info,
24356 bool using_static_chain_p)
24358 int strategy = 0;
24360 /* Select between in-line and out-of-line save and restore of regs.
24361 First, all the obvious cases where we don't use out-of-line. */
24362 if (crtl->calls_eh_return
24363 || cfun->machine->ra_need_lr)
24364 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
24365 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
24366 | SAVE_INLINE_VRS | REST_INLINE_VRS);
24368 if (info->first_gp_reg_save == 32)
24369 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24371 if (info->first_fp_reg_save == 64
24372 /* The out-of-line FP routines use double-precision stores;
24373 we can't use those routines if we don't have such stores. */
24374 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT))
24375 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24377 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1)
24378 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24380 /* Define cutoff for using out-of-line functions to save registers. */
24381 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
24383 if (!optimize_size)
24385 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24386 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24387 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24389 else
24391 /* Prefer out-of-line restore if it will exit. */
24392 if (info->first_fp_reg_save > 61)
24393 strategy |= SAVE_INLINE_FPRS;
24394 if (info->first_gp_reg_save > 29)
24396 if (info->first_fp_reg_save == 64)
24397 strategy |= SAVE_INLINE_GPRS;
24398 else
24399 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24401 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
24402 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24405 else if (DEFAULT_ABI == ABI_DARWIN)
24407 if (info->first_fp_reg_save > 60)
24408 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24409 if (info->first_gp_reg_save > 29)
24410 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24411 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24413 else
24415 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24416 if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun))
24417 || info->first_fp_reg_save > 61)
24418 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
24419 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24420 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
24423 /* Don't bother to try to save things out-of-line if r11 is occupied
24424 by the static chain. It would require too much fiddling and the
24425 static chain is rarely used anyway. FPRs are saved w.r.t the stack
24426 pointer on Darwin, and AIX uses r1 or r12. */
24427 if (using_static_chain_p
24428 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
24429 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
24430 | SAVE_INLINE_GPRS
24431 | SAVE_INLINE_VRS);
24433 /* Don't ever restore fixed regs. That means we can't use the
24434 out-of-line register restore functions if a fixed reg is in the
24435 range of regs restored. */
24436 if (!(strategy & REST_INLINE_FPRS))
24437 for (int i = info->first_fp_reg_save; i < 64; i++)
24438 if (fixed_regs[i])
24440 strategy |= REST_INLINE_FPRS;
24441 break;
24444 /* We can only use the out-of-line routines to restore fprs if we've
24445 saved all the registers from first_fp_reg_save in the prologue.
24446 Otherwise, we risk loading garbage. Of course, if we have saved
24447 out-of-line then we know we haven't skipped any fprs. */
24448 if ((strategy & SAVE_INLINE_FPRS)
24449 && !(strategy & REST_INLINE_FPRS))
24450 for (int i = info->first_fp_reg_save; i < 64; i++)
24451 if (!save_reg_p (i))
24453 strategy |= REST_INLINE_FPRS;
24454 break;
24457 /* Similarly, for altivec regs. */
24458 if (!(strategy & REST_INLINE_VRS))
24459 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24460 if (fixed_regs[i])
24462 strategy |= REST_INLINE_VRS;
24463 break;
24466 if ((strategy & SAVE_INLINE_VRS)
24467 && !(strategy & REST_INLINE_VRS))
24468 for (int i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++)
24469 if (!save_reg_p (i))
24471 strategy |= REST_INLINE_VRS;
24472 break;
24475 /* info->lr_save_p isn't yet set if the only reason lr needs to be
24476 saved is an out-of-line save or restore. Set up the value for
24477 the next test (excluding out-of-line gprs). */
24478 bool lr_save_p = (info->lr_save_p
24479 || !(strategy & SAVE_INLINE_FPRS)
24480 || !(strategy & SAVE_INLINE_VRS)
24481 || !(strategy & REST_INLINE_FPRS)
24482 || !(strategy & REST_INLINE_VRS));
24484 if (TARGET_MULTIPLE
24485 && !TARGET_POWERPC64
24486 && info->first_gp_reg_save < 31
24487 && !(flag_shrink_wrap
24488 && flag_shrink_wrap_separate
24489 && optimize_function_for_speed_p (cfun)))
24491 int count = 0;
24492 for (int i = info->first_gp_reg_save; i < 32; i++)
24493 if (save_reg_p (i))
24494 count++;
24496 if (count <= 1)
24497 /* Don't use store multiple if only one reg needs to be
24498 saved. This can occur for example when the ABI_V4 pic reg
24499 (r30) needs to be saved to make calls, but r31 is not
24500 used. */
24501 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24502 else
24504 /* Prefer store multiple for saves over out-of-line
24505 routines, since the store-multiple instruction will
24506 always be smaller. */
24507 strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE;
24509 /* The situation is more complicated with load multiple.
24510 We'd prefer to use the out-of-line routines for restores,
24511 since the "exit" out-of-line routines can handle the
24512 restore of LR and the frame teardown. However if doesn't
24513 make sense to use the out-of-line routine if that is the
24514 only reason we'd need to save LR, and we can't use the
24515 "exit" out-of-line gpr restore if we have saved some
24516 fprs; In those cases it is advantageous to use load
24517 multiple when available. */
24518 if (info->first_fp_reg_save != 64 || !lr_save_p)
24519 strategy |= REST_INLINE_GPRS | REST_MULTIPLE;
24523 /* Using the "exit" out-of-line routine does not improve code size
24524 if using it would require lr to be saved and if only saving one
24525 or two gprs. */
24526 else if (!lr_save_p && info->first_gp_reg_save > 29)
24527 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
24529 /* Don't ever restore fixed regs. */
24530 if ((strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24531 for (int i = info->first_gp_reg_save; i < 32; i++)
24532 if (fixed_reg_p (i))
24534 strategy |= REST_INLINE_GPRS;
24535 strategy &= ~REST_MULTIPLE;
24536 break;
24539 /* We can only use load multiple or the out-of-line routines to
24540 restore gprs if we've saved all the registers from
24541 first_gp_reg_save. Otherwise, we risk loading garbage.
24542 Of course, if we have saved out-of-line or used stmw then we know
24543 we haven't skipped any gprs. */
24544 if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS
24545 && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS)
24546 for (int i = info->first_gp_reg_save; i < 32; i++)
24547 if (!save_reg_p (i))
24549 strategy |= REST_INLINE_GPRS;
24550 strategy &= ~REST_MULTIPLE;
24551 break;
24554 if (TARGET_ELF && TARGET_64BIT)
24556 if (!(strategy & SAVE_INLINE_FPRS))
24557 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24558 else if (!(strategy & SAVE_INLINE_GPRS)
24559 && info->first_fp_reg_save == 64)
24560 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
24562 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
24563 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
24565 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
24566 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
24568 return strategy;
24571 /* Calculate the stack information for the current function. This is
24572 complicated by having two separate calling sequences, the AIX calling
24573 sequence and the V.4 calling sequence.
24575 AIX (and Darwin/Mac OS X) stack frames look like:
24576 32-bit 64-bit
24577 SP----> +---------------------------------------+
24578 | back chain to caller | 0 0
24579 +---------------------------------------+
24580 | saved CR | 4 8 (8-11)
24581 +---------------------------------------+
24582 | saved LR | 8 16
24583 +---------------------------------------+
24584 | reserved for compilers | 12 24
24585 +---------------------------------------+
24586 | reserved for binders | 16 32
24587 +---------------------------------------+
24588 | saved TOC pointer | 20 40
24589 +---------------------------------------+
24590 | Parameter save area (+padding*) (P) | 24 48
24591 +---------------------------------------+
24592 | Alloca space (A) | 24+P etc.
24593 +---------------------------------------+
24594 | Local variable space (L) | 24+P+A
24595 +---------------------------------------+
24596 | Float/int conversion temporary (X) | 24+P+A+L
24597 +---------------------------------------+
24598 | Save area for AltiVec registers (W) | 24+P+A+L+X
24599 +---------------------------------------+
24600 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
24601 +---------------------------------------+
24602 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
24603 +---------------------------------------+
24604 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
24605 +---------------------------------------+
24606 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
24607 +---------------------------------------+
24608 old SP->| back chain to caller's caller |
24609 +---------------------------------------+
24611 * If the alloca area is present, the parameter save area is
24612 padded so that the former starts 16-byte aligned.
24614 The required alignment for AIX configurations is two words (i.e., 8
24615 or 16 bytes).
24617 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
24619 SP----> +---------------------------------------+
24620 | Back chain to caller | 0
24621 +---------------------------------------+
24622 | Save area for CR | 8
24623 +---------------------------------------+
24624 | Saved LR | 16
24625 +---------------------------------------+
24626 | Saved TOC pointer | 24
24627 +---------------------------------------+
24628 | Parameter save area (+padding*) (P) | 32
24629 +---------------------------------------+
24630 | Alloca space (A) | 32+P
24631 +---------------------------------------+
24632 | Local variable space (L) | 32+P+A
24633 +---------------------------------------+
24634 | Save area for AltiVec registers (W) | 32+P+A+L
24635 +---------------------------------------+
24636 | AltiVec alignment padding (Y) | 32+P+A+L+W
24637 +---------------------------------------+
24638 | Save area for GP registers (G) | 32+P+A+L+W+Y
24639 +---------------------------------------+
24640 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
24641 +---------------------------------------+
24642 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
24643 +---------------------------------------+
24645 * If the alloca area is present, the parameter save area is
24646 padded so that the former starts 16-byte aligned.
24648 V.4 stack frames look like:
24650 SP----> +---------------------------------------+
24651 | back chain to caller | 0
24652 +---------------------------------------+
24653 | caller's saved LR | 4
24654 +---------------------------------------+
24655 | Parameter save area (+padding*) (P) | 8
24656 +---------------------------------------+
24657 | Alloca space (A) | 8+P
24658 +---------------------------------------+
24659 | Varargs save area (V) | 8+P+A
24660 +---------------------------------------+
24661 | Local variable space (L) | 8+P+A+V
24662 +---------------------------------------+
24663 | Float/int conversion temporary (X) | 8+P+A+V+L
24664 +---------------------------------------+
24665 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
24666 +---------------------------------------+
24667 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
24668 +---------------------------------------+
24669 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
24670 +---------------------------------------+
24671 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
24672 +---------------------------------------+
24673 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
24674 +---------------------------------------+
24675 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
24676 +---------------------------------------+
24677 old SP->| back chain to caller's caller |
24678 +---------------------------------------+
24680 * If the alloca area is present and the required alignment is
24681 16 bytes, the parameter save area is padded so that the
24682 alloca area starts 16-byte aligned.
24684 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
24685 given. (But note below and in sysv4.h that we require only 8 and
24686 may round up the size of our stack frame anyways. The historical
24687 reason is early versions of powerpc-linux which didn't properly
24688 align the stack at program startup. A happy side-effect is that
24689 -mno-eabi libraries can be used with -meabi programs.)
24691 The EABI configuration defaults to the V.4 layout. However,
24692 the stack alignment requirements may differ. If -mno-eabi is not
24693 given, the required stack alignment is 8 bytes; if -mno-eabi is
24694 given, the required alignment is 16 bytes. (But see V.4 comment
24695 above.) */
24697 #ifndef ABI_STACK_BOUNDARY
24698 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
24699 #endif
24701 static rs6000_stack_t *
24702 rs6000_stack_info (void)
24704 /* We should never be called for thunks, we are not set up for that. */
24705 gcc_assert (!cfun->is_thunk);
24707 rs6000_stack_t *info = &stack_info;
24708 int reg_size = TARGET_32BIT ? 4 : 8;
24709 int ehrd_size;
24710 int ehcr_size;
24711 int save_align;
24712 int first_gp;
24713 HOST_WIDE_INT non_fixed_size;
24714 bool using_static_chain_p;
24716 if (reload_completed && info->reload_completed)
24717 return info;
24719 memset (info, 0, sizeof (*info));
24720 info->reload_completed = reload_completed;
24722 /* Select which calling sequence. */
24723 info->abi = DEFAULT_ABI;
24725 /* Calculate which registers need to be saved & save area size. */
24726 info->first_gp_reg_save = first_reg_to_save ();
24727 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
24728 even if it currently looks like we won't. Reload may need it to
24729 get at a constant; if so, it will have already created a constant
24730 pool entry for it. */
24731 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
24732 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
24733 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
24734 && crtl->uses_const_pool
24735 && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
24736 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
24737 else
24738 first_gp = info->first_gp_reg_save;
24740 info->gp_size = reg_size * (32 - first_gp);
24742 info->first_fp_reg_save = first_fp_reg_to_save ();
24743 info->fp_size = 8 * (64 - info->first_fp_reg_save);
24745 info->first_altivec_reg_save = first_altivec_reg_to_save ();
24746 info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
24747 - info->first_altivec_reg_save);
24749 /* Does this function call anything? */
24750 info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
24752 /* Determine if we need to save the condition code registers. */
24753 if (save_reg_p (CR2_REGNO)
24754 || save_reg_p (CR3_REGNO)
24755 || save_reg_p (CR4_REGNO))
24757 info->cr_save_p = 1;
24758 if (DEFAULT_ABI == ABI_V4)
24759 info->cr_size = reg_size;
24762 /* If the current function calls __builtin_eh_return, then we need
24763 to allocate stack space for registers that will hold data for
24764 the exception handler. */
24765 if (crtl->calls_eh_return)
24767 unsigned int i;
24768 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
24769 continue;
24771 ehrd_size = i * UNITS_PER_WORD;
24773 else
24774 ehrd_size = 0;
24776 /* In the ELFv2 ABI, we also need to allocate space for separate
24777 CR field save areas if the function calls __builtin_eh_return. */
24778 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24780 /* This hard-codes that we have three call-saved CR fields. */
24781 ehcr_size = 3 * reg_size;
24782 /* We do *not* use the regular CR save mechanism. */
24783 info->cr_save_p = 0;
24785 else
24786 ehcr_size = 0;
24788 /* Determine various sizes. */
24789 info->reg_size = reg_size;
24790 info->fixed_size = RS6000_SAVE_AREA;
24791 info->vars_size = RS6000_ALIGN (get_frame_size (), 8);
24792 if (cfun->calls_alloca)
24793 info->parm_size =
24794 RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size,
24795 STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size;
24796 else
24797 info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
24798 TARGET_ALTIVEC ? 16 : 8);
24799 if (FRAME_GROWS_DOWNWARD)
24800 info->vars_size
24801 += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size,
24802 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
24803 - (info->fixed_size + info->vars_size + info->parm_size);
24805 if (TARGET_ALTIVEC_ABI)
24806 info->vrsave_mask = compute_vrsave_mask ();
24808 if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask)
24809 info->vrsave_size = 4;
24811 compute_save_world_info (info);
24813 /* Calculate the offsets. */
24814 switch (DEFAULT_ABI)
24816 case ABI_NONE:
24817 default:
24818 gcc_unreachable ();
24820 case ABI_AIX:
24821 case ABI_ELFv2:
24822 case ABI_DARWIN:
24823 info->fp_save_offset = -info->fp_size;
24824 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24826 if (TARGET_ALTIVEC_ABI)
24828 info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size;
24830 /* Align stack so vector save area is on a quadword boundary.
24831 The padding goes above the vectors. */
24832 if (info->altivec_size != 0)
24833 info->altivec_padding_size = info->vrsave_save_offset & 0xF;
24835 info->altivec_save_offset = info->vrsave_save_offset
24836 - info->altivec_padding_size
24837 - info->altivec_size;
24838 gcc_assert (info->altivec_size == 0
24839 || info->altivec_save_offset % 16 == 0);
24841 /* Adjust for AltiVec case. */
24842 info->ehrd_offset = info->altivec_save_offset - ehrd_size;
24844 else
24845 info->ehrd_offset = info->gp_save_offset - ehrd_size;
24847 info->ehcr_offset = info->ehrd_offset - ehcr_size;
24848 info->cr_save_offset = reg_size; /* first word when 64-bit. */
24849 info->lr_save_offset = 2*reg_size;
24850 break;
24852 case ABI_V4:
24853 info->fp_save_offset = -info->fp_size;
24854 info->gp_save_offset = info->fp_save_offset - info->gp_size;
24855 info->cr_save_offset = info->gp_save_offset - info->cr_size;
24857 if (TARGET_ALTIVEC_ABI)
24859 info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size;
24861 /* Align stack so vector save area is on a quadword boundary. */
24862 if (info->altivec_size != 0)
24863 info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16);
24865 info->altivec_save_offset = info->vrsave_save_offset
24866 - info->altivec_padding_size
24867 - info->altivec_size;
24869 /* Adjust for AltiVec case. */
24870 info->ehrd_offset = info->altivec_save_offset;
24872 else
24873 info->ehrd_offset = info->cr_save_offset;
24875 info->ehrd_offset -= ehrd_size;
24876 info->lr_save_offset = reg_size;
24879 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
24880 info->save_size = RS6000_ALIGN (info->fp_size
24881 + info->gp_size
24882 + info->altivec_size
24883 + info->altivec_padding_size
24884 + ehrd_size
24885 + ehcr_size
24886 + info->cr_size
24887 + info->vrsave_size,
24888 save_align);
24890 non_fixed_size = info->vars_size + info->parm_size + info->save_size;
24892 info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size,
24893 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
24895 /* Determine if we need to save the link register. */
24896 if (info->calls_p
24897 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24898 && crtl->profile
24899 && !TARGET_PROFILE_KERNEL)
24900 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
24901 #ifdef TARGET_RELOCATABLE
24902 || (DEFAULT_ABI == ABI_V4
24903 && (TARGET_RELOCATABLE || flag_pic > 1)
24904 && !constant_pool_empty_p ())
24905 #endif
24906 || rs6000_ra_ever_killed ())
24907 info->lr_save_p = 1;
24909 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24910 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24911 && call_used_regs[STATIC_CHAIN_REGNUM]);
24912 info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p);
24914 if (!(info->savres_strategy & SAVE_INLINE_GPRS)
24915 || !(info->savres_strategy & SAVE_INLINE_FPRS)
24916 || !(info->savres_strategy & SAVE_INLINE_VRS)
24917 || !(info->savres_strategy & REST_INLINE_GPRS)
24918 || !(info->savres_strategy & REST_INLINE_FPRS)
24919 || !(info->savres_strategy & REST_INLINE_VRS))
24920 info->lr_save_p = 1;
24922 if (info->lr_save_p)
24923 df_set_regs_ever_live (LR_REGNO, true);
24925 /* Determine if we need to allocate any stack frame:
24927 For AIX we need to push the stack if a frame pointer is needed
24928 (because the stack might be dynamically adjusted), if we are
24929 debugging, if we make calls, or if the sum of fp_save, gp_save,
24930 and local variables are more than the space needed to save all
24931 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
24932 + 18*8 = 288 (GPR13 reserved).
24934 For V.4 we don't have the stack cushion that AIX uses, but assume
24935 that the debugger can handle stackless frames. */
24937 if (info->calls_p)
24938 info->push_p = 1;
24940 else if (DEFAULT_ABI == ABI_V4)
24941 info->push_p = non_fixed_size != 0;
24943 else if (frame_pointer_needed)
24944 info->push_p = 1;
24946 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
24947 info->push_p = 1;
24949 else
24950 info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
24952 return info;
24955 static void
24956 debug_stack_info (rs6000_stack_t *info)
24958 const char *abi_string;
24960 if (! info)
24961 info = rs6000_stack_info ();
24963 fprintf (stderr, "\nStack information for function %s:\n",
24964 ((current_function_decl && DECL_NAME (current_function_decl))
24965 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
24966 : "<unknown>"));
24968 switch (info->abi)
24970 default: abi_string = "Unknown"; break;
24971 case ABI_NONE: abi_string = "NONE"; break;
24972 case ABI_AIX: abi_string = "AIX"; break;
24973 case ABI_ELFv2: abi_string = "ELFv2"; break;
24974 case ABI_DARWIN: abi_string = "Darwin"; break;
24975 case ABI_V4: abi_string = "V.4"; break;
24978 fprintf (stderr, "\tABI = %5s\n", abi_string);
24980 if (TARGET_ALTIVEC_ABI)
24981 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
24983 if (info->first_gp_reg_save != 32)
24984 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
24986 if (info->first_fp_reg_save != 64)
24987 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
24989 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
24990 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
24991 info->first_altivec_reg_save);
24993 if (info->lr_save_p)
24994 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
24996 if (info->cr_save_p)
24997 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
24999 if (info->vrsave_mask)
25000 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
25002 if (info->push_p)
25003 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
25005 if (info->calls_p)
25006 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
25008 if (info->gp_size)
25009 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
25011 if (info->fp_size)
25012 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
25014 if (info->altivec_size)
25015 fprintf (stderr, "\taltivec_save_offset = %5d\n",
25016 info->altivec_save_offset);
25018 if (info->vrsave_size)
25019 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
25020 info->vrsave_save_offset);
25022 if (info->lr_save_p)
25023 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
25025 if (info->cr_save_p)
25026 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
25028 if (info->varargs_save_offset)
25029 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
25031 if (info->total_size)
25032 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25033 info->total_size);
25035 if (info->vars_size)
25036 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
25037 info->vars_size);
25039 if (info->parm_size)
25040 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
25042 if (info->fixed_size)
25043 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
25045 if (info->gp_size)
25046 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
25048 if (info->fp_size)
25049 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
25051 if (info->altivec_size)
25052 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
25054 if (info->vrsave_size)
25055 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
25057 if (info->altivec_padding_size)
25058 fprintf (stderr, "\taltivec_padding_size= %5d\n",
25059 info->altivec_padding_size);
25061 if (info->cr_size)
25062 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
25064 if (info->save_size)
25065 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
25067 if (info->reg_size != 4)
25068 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
25070 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
25072 fprintf (stderr, "\n");
25076 rs6000_return_addr (int count, rtx frame)
25078 /* We can't use get_hard_reg_initial_val for LR when count == 0 if LR
25079 is trashed by the prologue, as it is for PIC on ABI_V4 and Darwin. */
25080 if (count != 0
25081 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
25083 cfun->machine->ra_needs_full_frame = 1;
25085 if (count == 0)
25086 /* FRAME is set to frame_pointer_rtx by the generic code, but that
25087 is good for loading 0(r1) only when !FRAME_GROWS_DOWNWARD. */
25088 frame = stack_pointer_rtx;
25089 rtx prev_frame_addr = memory_address (Pmode, frame);
25090 rtx prev_frame = copy_to_reg (gen_rtx_MEM (Pmode, prev_frame_addr));
25091 rtx lr_save_off = plus_constant (Pmode,
25092 prev_frame, RETURN_ADDRESS_OFFSET);
25093 rtx lr_save_addr = memory_address (Pmode, lr_save_off);
25094 return gen_rtx_MEM (Pmode, lr_save_addr);
25097 cfun->machine->ra_need_lr = 1;
25098 return get_hard_reg_initial_val (Pmode, LR_REGNO);
25101 /* Say whether a function is a candidate for sibcall handling or not. */
25103 static bool
25104 rs6000_function_ok_for_sibcall (tree decl, tree exp)
25106 tree fntype;
25108 if (decl)
25109 fntype = TREE_TYPE (decl);
25110 else
25111 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
25113 /* We can't do it if the called function has more vector parameters
25114 than the current function; there's nowhere to put the VRsave code. */
25115 if (TARGET_ALTIVEC_ABI
25116 && TARGET_ALTIVEC_VRSAVE
25117 && !(decl && decl == current_function_decl))
25119 function_args_iterator args_iter;
25120 tree type;
25121 int nvreg = 0;
25123 /* Functions with vector parameters are required to have a
25124 prototype, so the argument type info must be available
25125 here. */
25126 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
25127 if (TREE_CODE (type) == VECTOR_TYPE
25128 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25129 nvreg++;
25131 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
25132 if (TREE_CODE (type) == VECTOR_TYPE
25133 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
25134 nvreg--;
25136 if (nvreg > 0)
25137 return false;
25140 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
25141 functions, because the callee may have a different TOC pointer to
25142 the caller and there's no way to ensure we restore the TOC when
25143 we return. With the secure-plt SYSV ABI we can't make non-local
25144 calls when -fpic/PIC because the plt call stubs use r30. */
25145 if (DEFAULT_ABI == ABI_DARWIN
25146 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25147 && decl
25148 && !DECL_EXTERNAL (decl)
25149 && !DECL_WEAK (decl)
25150 && (*targetm.binds_local_p) (decl))
25151 || (DEFAULT_ABI == ABI_V4
25152 && (!TARGET_SECURE_PLT
25153 || !flag_pic
25154 || (decl
25155 && (*targetm.binds_local_p) (decl)))))
25157 tree attr_list = TYPE_ATTRIBUTES (fntype);
25159 if (!lookup_attribute ("longcall", attr_list)
25160 || lookup_attribute ("shortcall", attr_list))
25161 return true;
25164 return false;
25167 static int
25168 rs6000_ra_ever_killed (void)
25170 rtx_insn *top;
25171 rtx reg;
25172 rtx_insn *insn;
25174 if (cfun->is_thunk)
25175 return 0;
25177 if (cfun->machine->lr_save_state)
25178 return cfun->machine->lr_save_state - 1;
25180 /* regs_ever_live has LR marked as used if any sibcalls are present,
25181 but this should not force saving and restoring in the
25182 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
25183 clobbers LR, so that is inappropriate. */
25185 /* Also, the prologue can generate a store into LR that
25186 doesn't really count, like this:
25188 move LR->R0
25189 bcl to set PIC register
25190 move LR->R31
25191 move R0->LR
25193 When we're called from the epilogue, we need to avoid counting
25194 this as a store. */
25196 push_topmost_sequence ();
25197 top = get_insns ();
25198 pop_topmost_sequence ();
25199 reg = gen_rtx_REG (Pmode, LR_REGNO);
25201 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
25203 if (INSN_P (insn))
25205 if (CALL_P (insn))
25207 if (!SIBLING_CALL_P (insn))
25208 return 1;
25210 else if (find_regno_note (insn, REG_INC, LR_REGNO))
25211 return 1;
25212 else if (set_of (reg, insn) != NULL_RTX
25213 && !prologue_epilogue_contains (insn))
25214 return 1;
25217 return 0;
25220 /* Emit instructions needed to load the TOC register.
25221 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
25222 a constant pool; or for SVR4 -fpic. */
25224 void
25225 rs6000_emit_load_toc_table (int fromprolog)
25227 rtx dest;
25228 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
25230 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
25232 char buf[30];
25233 rtx lab, tmp1, tmp2, got;
25235 lab = gen_label_rtx ();
25236 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
25237 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25238 if (flag_pic == 2)
25240 got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25241 need_toc_init = 1;
25243 else
25244 got = rs6000_got_sym ();
25245 tmp1 = tmp2 = dest;
25246 if (!fromprolog)
25248 tmp1 = gen_reg_rtx (Pmode);
25249 tmp2 = gen_reg_rtx (Pmode);
25251 emit_insn (gen_load_toc_v4_PIC_1 (lab));
25252 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
25253 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
25254 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
25256 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
25258 emit_insn (gen_load_toc_v4_pic_si ());
25259 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25261 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
25263 char buf[30];
25264 rtx temp0 = (fromprolog
25265 ? gen_rtx_REG (Pmode, 0)
25266 : gen_reg_rtx (Pmode));
25268 if (fromprolog)
25270 rtx symF, symL;
25272 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
25273 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25275 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
25276 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
25278 emit_insn (gen_load_toc_v4_PIC_1 (symF));
25279 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25280 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
25282 else
25284 rtx tocsym, lab;
25286 tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25287 need_toc_init = 1;
25288 lab = gen_label_rtx ();
25289 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
25290 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
25291 if (TARGET_LINK_STACK)
25292 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
25293 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
25295 emit_insn (gen_addsi3 (dest, temp0, dest));
25297 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
25299 /* This is for AIX code running in non-PIC ELF32. */
25300 rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name));
25302 need_toc_init = 1;
25303 emit_insn (gen_elf_high (dest, realsym));
25304 emit_insn (gen_elf_low (dest, dest, realsym));
25306 else
25308 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25310 if (TARGET_32BIT)
25311 emit_insn (gen_load_toc_aix_si (dest));
25312 else
25313 emit_insn (gen_load_toc_aix_di (dest));
25317 /* Emit instructions to restore the link register after determining where
25318 its value has been stored. */
25320 void
25321 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
25323 rs6000_stack_t *info = rs6000_stack_info ();
25324 rtx operands[2];
25326 operands[0] = source;
25327 operands[1] = scratch;
25329 if (info->lr_save_p)
25331 rtx frame_rtx = stack_pointer_rtx;
25332 HOST_WIDE_INT sp_offset = 0;
25333 rtx tmp;
25335 if (frame_pointer_needed
25336 || cfun->calls_alloca
25337 || info->total_size > 32767)
25339 tmp = gen_frame_mem (Pmode, frame_rtx);
25340 emit_move_insn (operands[1], tmp);
25341 frame_rtx = operands[1];
25343 else if (info->push_p)
25344 sp_offset = info->total_size;
25346 tmp = plus_constant (Pmode, frame_rtx,
25347 info->lr_save_offset + sp_offset);
25348 tmp = gen_frame_mem (Pmode, tmp);
25349 emit_move_insn (tmp, operands[0]);
25351 else
25352 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
25354 /* Freeze lr_save_p. We've just emitted rtl that depends on the
25355 state of lr_save_p so any change from here on would be a bug. In
25356 particular, stop rs6000_ra_ever_killed from considering the SET
25357 of lr we may have added just above. */
25358 cfun->machine->lr_save_state = info->lr_save_p + 1;
25361 static GTY(()) alias_set_type set = -1;
25363 alias_set_type
25364 get_TOC_alias_set (void)
25366 if (set == -1)
25367 set = new_alias_set ();
25368 return set;
25371 /* This returns nonzero if the current function uses the TOC. This is
25372 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
25373 is generated by the ABI_V4 load_toc_* patterns.
25374 Return 2 instead of 1 if the load_toc_* pattern is in the function
25375 partition that doesn't start the function. */
25376 #if TARGET_ELF
25377 static int
25378 uses_TOC (void)
25380 rtx_insn *insn;
25381 int ret = 1;
25383 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
25385 if (INSN_P (insn))
25387 rtx pat = PATTERN (insn);
25388 int i;
25390 if (GET_CODE (pat) == PARALLEL)
25391 for (i = 0; i < XVECLEN (pat, 0); i++)
25393 rtx sub = XVECEXP (pat, 0, i);
25394 if (GET_CODE (sub) == USE)
25396 sub = XEXP (sub, 0);
25397 if (GET_CODE (sub) == UNSPEC
25398 && XINT (sub, 1) == UNSPEC_TOC)
25399 return ret;
25403 else if (crtl->has_bb_partition
25404 && NOTE_P (insn)
25405 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
25406 ret = 2;
25408 return 0;
25410 #endif
25413 create_TOC_reference (rtx symbol, rtx largetoc_reg)
25415 rtx tocrel, tocreg, hi;
25417 if (TARGET_DEBUG_ADDR)
25419 if (GET_CODE (symbol) == SYMBOL_REF)
25420 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
25421 XSTR (symbol, 0));
25422 else
25424 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
25425 GET_RTX_NAME (GET_CODE (symbol)));
25426 debug_rtx (symbol);
25430 if (!can_create_pseudo_p ())
25431 df_set_regs_ever_live (TOC_REGISTER, true);
25433 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
25434 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
25435 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
25436 return tocrel;
25438 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
25439 if (largetoc_reg != NULL)
25441 emit_move_insn (largetoc_reg, hi);
25442 hi = largetoc_reg;
25444 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
25447 /* Issue assembly directives that create a reference to the given DWARF
25448 FRAME_TABLE_LABEL from the current function section. */
25449 void
25450 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
25452 fprintf (asm_out_file, "\t.ref %s\n",
25453 (* targetm.strip_name_encoding) (frame_table_label));
25456 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
25457 and the change to the stack pointer. */
25459 static void
25460 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
25462 rtvec p;
25463 int i;
25464 rtx regs[3];
25466 i = 0;
25467 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25468 if (hard_frame_needed)
25469 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
25470 if (!(REGNO (fp) == STACK_POINTER_REGNUM
25471 || (hard_frame_needed
25472 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
25473 regs[i++] = fp;
25475 p = rtvec_alloc (i);
25476 while (--i >= 0)
25478 rtx mem = gen_frame_mem (BLKmode, regs[i]);
25479 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
25482 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
25485 /* Allocate SIZE_INT bytes on the stack using a store with update style insn
25486 and set the appropriate attributes for the generated insn. Return the
25487 first insn which adjusts the stack pointer or the last insn before
25488 the stack adjustment loop.
25490 SIZE_INT is used to create the CFI note for the allocation.
25492 SIZE_RTX is an rtx containing the size of the adjustment. Note that
25493 since stacks grow to lower addresses its runtime value is -SIZE_INT.
25495 ORIG_SP contains the backchain value that must be stored at *sp. */
25497 static rtx_insn *
25498 rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp)
25500 rtx_insn *insn;
25502 rtx size_rtx = GEN_INT (-size_int);
25503 if (size_int > 32767)
25505 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25506 /* Need a note here so that try_split doesn't get confused. */
25507 if (get_last_insn () == NULL_RTX)
25508 emit_note (NOTE_INSN_DELETED);
25509 insn = emit_move_insn (tmp_reg, size_rtx);
25510 try_split (PATTERN (insn), insn, 0);
25511 size_rtx = tmp_reg;
25514 if (Pmode == SImode)
25515 insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
25516 stack_pointer_rtx,
25517 size_rtx,
25518 orig_sp));
25519 else
25520 insn = emit_insn (gen_movdi_di_update_stack (stack_pointer_rtx,
25521 stack_pointer_rtx,
25522 size_rtx,
25523 orig_sp));
25524 rtx par = PATTERN (insn);
25525 gcc_assert (GET_CODE (par) == PARALLEL);
25526 rtx set = XVECEXP (par, 0, 0);
25527 gcc_assert (GET_CODE (set) == SET);
25528 rtx mem = SET_DEST (set);
25529 gcc_assert (MEM_P (mem));
25530 MEM_NOTRAP_P (mem) = 1;
25531 set_mem_alias_set (mem, get_frame_alias_set ());
25533 RTX_FRAME_RELATED_P (insn) = 1;
25534 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25535 gen_rtx_SET (stack_pointer_rtx,
25536 gen_rtx_PLUS (Pmode,
25537 stack_pointer_rtx,
25538 GEN_INT (-size_int))));
25540 /* Emit a blockage to ensure the allocation/probing insns are
25541 not optimized, combined, removed, etc. Add REG_STACK_CHECK
25542 note for similar reasons. */
25543 if (flag_stack_clash_protection)
25545 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
25546 emit_insn (gen_blockage ());
25549 return insn;
25552 static HOST_WIDE_INT
25553 get_stack_clash_protection_probe_interval (void)
25555 return (HOST_WIDE_INT_1U
25556 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL));
25559 static HOST_WIDE_INT
25560 get_stack_clash_protection_guard_size (void)
25562 return (HOST_WIDE_INT_1U
25563 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE));
25566 /* Allocate ORIG_SIZE bytes on the stack and probe the newly
25567 allocated space every STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes.
25569 COPY_REG, if non-null, should contain a copy of the original
25570 stack pointer at exit from this function.
25572 This is subtly different than the Ada probing in that it tries hard to
25573 prevent attacks that jump the stack guard. Thus it is never allowed to
25574 allocate more than STACK_CLASH_PROTECTION_PROBE_INTERVAL bytes of stack
25575 space without a suitable probe. */
25576 static rtx_insn *
25577 rs6000_emit_probe_stack_range_stack_clash (HOST_WIDE_INT orig_size,
25578 rtx copy_reg)
25580 rtx orig_sp = copy_reg;
25582 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
25584 /* Round the size down to a multiple of PROBE_INTERVAL. */
25585 HOST_WIDE_INT rounded_size = ROUND_DOWN (orig_size, probe_interval);
25587 /* If explicitly requested,
25588 or the rounded size is not the same as the original size
25589 or the the rounded size is greater than a page,
25590 then we will need a copy of the original stack pointer. */
25591 if (rounded_size != orig_size
25592 || rounded_size > probe_interval
25593 || copy_reg)
25595 /* If the caller did not request a copy of the incoming stack
25596 pointer, then we use r0 to hold the copy. */
25597 if (!copy_reg)
25598 orig_sp = gen_rtx_REG (Pmode, 0);
25599 emit_move_insn (orig_sp, stack_pointer_rtx);
25602 /* There's three cases here.
25604 One is a single probe which is the most common and most efficiently
25605 implemented as it does not have to have a copy of the original
25606 stack pointer if there are no residuals.
25608 Second is unrolled allocation/probes which we use if there's just
25609 a few of them. It needs to save the original stack pointer into a
25610 temporary for use as a source register in the allocation/probe.
25612 Last is a loop. This is the most uncommon case and least efficient. */
25613 rtx_insn *retval = NULL;
25614 if (rounded_size == probe_interval)
25616 retval = rs6000_emit_allocate_stack_1 (probe_interval, stack_pointer_rtx);
25618 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
25620 else if (rounded_size <= 8 * probe_interval)
25622 /* The ABI requires using the store with update insns to allocate
25623 space and store the backchain into the stack
25625 So we save the current stack pointer into a temporary, then
25626 emit the store-with-update insns to store the saved stack pointer
25627 into the right location in each new page. */
25628 for (int i = 0; i < rounded_size; i += probe_interval)
25630 rtx_insn *insn
25631 = rs6000_emit_allocate_stack_1 (probe_interval, orig_sp);
25633 /* Save the first stack adjustment in RETVAL. */
25634 if (i == 0)
25635 retval = insn;
25638 dump_stack_clash_frame_info (PROBE_INLINE, rounded_size != orig_size);
25640 else
25642 /* Compute the ending address. */
25643 rtx end_addr
25644 = copy_reg ? gen_rtx_REG (Pmode, 0) : gen_rtx_REG (Pmode, 12);
25645 rtx rs = GEN_INT (-rounded_size);
25646 rtx_insn *insn;
25647 if (add_operand (rs, Pmode))
25648 insn = emit_insn (gen_add3_insn (end_addr, stack_pointer_rtx, rs));
25649 else
25651 emit_move_insn (end_addr, GEN_INT (-rounded_size));
25652 insn = emit_insn (gen_add3_insn (end_addr, end_addr,
25653 stack_pointer_rtx));
25654 /* Describe the effect of INSN to the CFI engine. */
25655 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
25656 gen_rtx_SET (end_addr,
25657 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25658 rs)));
25660 RTX_FRAME_RELATED_P (insn) = 1;
25662 /* Emit the loop. */
25663 if (TARGET_64BIT)
25664 retval = emit_insn (gen_probe_stack_rangedi (stack_pointer_rtx,
25665 stack_pointer_rtx, orig_sp,
25666 end_addr));
25667 else
25668 retval = emit_insn (gen_probe_stack_rangesi (stack_pointer_rtx,
25669 stack_pointer_rtx, orig_sp,
25670 end_addr));
25671 RTX_FRAME_RELATED_P (retval) = 1;
25672 /* Describe the effect of INSN to the CFI engine. */
25673 add_reg_note (retval, REG_FRAME_RELATED_EXPR,
25674 gen_rtx_SET (stack_pointer_rtx, end_addr));
25676 /* Emit a blockage to ensure the allocation/probing insns are
25677 not optimized, combined, removed, etc. Other cases handle this
25678 within their call to rs6000_emit_allocate_stack_1. */
25679 emit_insn (gen_blockage ());
25681 dump_stack_clash_frame_info (PROBE_LOOP, rounded_size != orig_size);
25684 if (orig_size != rounded_size)
25686 /* Allocate (and implicitly probe) any residual space. */
25687 HOST_WIDE_INT residual = orig_size - rounded_size;
25689 rtx_insn *insn = rs6000_emit_allocate_stack_1 (residual, orig_sp);
25691 /* If the residual was the only allocation, then we can return the
25692 allocating insn. */
25693 if (!retval)
25694 retval = insn;
25697 return retval;
25700 /* Emit the correct code for allocating stack space, as insns.
25701 If COPY_REG, make sure a copy of the old frame is left there.
25702 The generated code may use hard register 0 as a temporary. */
25704 static rtx_insn *
25705 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
25707 rtx_insn *insn;
25708 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25709 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
25710 rtx todec = gen_int_mode (-size, Pmode);
25712 if (INTVAL (todec) != -size)
25714 warning (0, "stack frame too large");
25715 emit_insn (gen_trap ());
25716 return 0;
25719 if (crtl->limit_stack)
25721 if (REG_P (stack_limit_rtx)
25722 && REGNO (stack_limit_rtx) > 1
25723 && REGNO (stack_limit_rtx) <= 31)
25725 rtx_insn *insn
25726 = gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size));
25727 gcc_assert (insn);
25728 emit_insn (insn);
25729 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, const0_rtx));
25731 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
25732 && TARGET_32BIT
25733 && DEFAULT_ABI == ABI_V4
25734 && !flag_pic)
25736 rtx toload = gen_rtx_CONST (VOIDmode,
25737 gen_rtx_PLUS (Pmode,
25738 stack_limit_rtx,
25739 GEN_INT (size)));
25741 emit_insn (gen_elf_high (tmp_reg, toload));
25742 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
25743 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
25744 const0_rtx));
25746 else
25747 warning (0, "stack limit expression is not supported");
25750 if (flag_stack_clash_protection)
25752 if (size < get_stack_clash_protection_guard_size ())
25753 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
25754 else
25756 rtx_insn *insn = rs6000_emit_probe_stack_range_stack_clash (size,
25757 copy_reg);
25759 /* If we asked for a copy with an offset, then we still need add in
25760 the offset. */
25761 if (copy_reg && copy_off)
25762 emit_insn (gen_add3_insn (copy_reg, copy_reg, GEN_INT (copy_off)));
25763 return insn;
25767 if (copy_reg)
25769 if (copy_off != 0)
25770 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
25771 else
25772 emit_move_insn (copy_reg, stack_reg);
25775 /* Since we didn't use gen_frame_mem to generate the MEM, grab
25776 it now and set the alias set/attributes. The above gen_*_update
25777 calls will generate a PARALLEL with the MEM set being the first
25778 operation. */
25779 insn = rs6000_emit_allocate_stack_1 (size, stack_reg);
25780 return insn;
25783 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
25785 #if PROBE_INTERVAL > 32768
25786 #error Cannot use indexed addressing mode for stack probing
25787 #endif
25789 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
25790 inclusive. These are offsets from the current stack pointer. */
25792 static void
25793 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
25795 /* See if we have a constant small number of probes to generate. If so,
25796 that's the easy case. */
25797 if (first + size <= 32768)
25799 HOST_WIDE_INT i;
25801 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
25802 it exceeds SIZE. If only one probe is needed, this will not
25803 generate any code. Then probe at FIRST + SIZE. */
25804 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
25805 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25806 -(first + i)));
25808 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
25809 -(first + size)));
25812 /* Otherwise, do the same as above, but in a loop. Note that we must be
25813 extra careful with variables wrapping around because we might be at
25814 the very top (or the very bottom) of the address space and we have
25815 to be able to handle this case properly; in particular, we use an
25816 equality test for the loop condition. */
25817 else
25819 HOST_WIDE_INT rounded_size;
25820 rtx r12 = gen_rtx_REG (Pmode, 12);
25821 rtx r0 = gen_rtx_REG (Pmode, 0);
25823 /* Sanity check for the addressing mode we're going to use. */
25824 gcc_assert (first <= 32768);
25826 /* Step 1: round SIZE to the previous multiple of the interval. */
25828 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
25831 /* Step 2: compute initial and final value of the loop counter. */
25833 /* TEST_ADDR = SP + FIRST. */
25834 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
25835 -first)));
25837 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
25838 if (rounded_size > 32768)
25840 emit_move_insn (r0, GEN_INT (-rounded_size));
25841 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
25843 else
25844 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
25845 -rounded_size)));
25848 /* Step 3: the loop
25852 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
25853 probe at TEST_ADDR
25855 while (TEST_ADDR != LAST_ADDR)
25857 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
25858 until it is equal to ROUNDED_SIZE. */
25860 if (TARGET_64BIT)
25861 emit_insn (gen_probe_stack_rangedi (r12, r12, stack_pointer_rtx, r0));
25862 else
25863 emit_insn (gen_probe_stack_rangesi (r12, r12, stack_pointer_rtx, r0));
25866 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
25867 that SIZE is equal to ROUNDED_SIZE. */
25869 if (size != rounded_size)
25870 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
25874 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
25875 addresses, not offsets. */
25877 static const char *
25878 output_probe_stack_range_1 (rtx reg1, rtx reg2)
25880 static int labelno = 0;
25881 char loop_lab[32];
25882 rtx xops[2];
25884 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25886 /* Loop. */
25887 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25889 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
25890 xops[0] = reg1;
25891 xops[1] = GEN_INT (-PROBE_INTERVAL);
25892 output_asm_insn ("addi %0,%0,%1", xops);
25894 /* Probe at TEST_ADDR. */
25895 xops[1] = gen_rtx_REG (Pmode, 0);
25896 output_asm_insn ("stw %1,0(%0)", xops);
25898 /* Test if TEST_ADDR == LAST_ADDR. */
25899 xops[1] = reg2;
25900 if (TARGET_64BIT)
25901 output_asm_insn ("cmpd 0,%0,%1", xops);
25902 else
25903 output_asm_insn ("cmpw 0,%0,%1", xops);
25905 /* Branch. */
25906 fputs ("\tbne 0,", asm_out_file);
25907 assemble_name_raw (asm_out_file, loop_lab);
25908 fputc ('\n', asm_out_file);
25910 return "";
25913 /* This function is called when rs6000_frame_related is processing
25914 SETs within a PARALLEL, and returns whether the REGNO save ought to
25915 be marked RTX_FRAME_RELATED_P. The PARALLELs involved are those
25916 for out-of-line register save functions, store multiple, and the
25917 Darwin world_save. They may contain registers that don't really
25918 need saving. */
25920 static bool
25921 interesting_frame_related_regno (unsigned int regno)
25923 /* Saves apparently of r0 are actually saving LR. It doesn't make
25924 sense to substitute the regno here to test save_reg_p (LR_REGNO).
25925 We *know* LR needs saving, and dwarf2cfi.c is able to deduce that
25926 (set (mem) (r0)) is saving LR from a prior (set (r0) (lr)) marked
25927 as frame related. */
25928 if (regno == 0)
25929 return true;
25930 /* If we see CR2 then we are here on a Darwin world save. Saves of
25931 CR2 signify the whole CR is being saved. This is a long-standing
25932 ABI wart fixed by ELFv2. As for r0/lr there is no need to check
25933 that CR needs to be saved. */
25934 if (regno == CR2_REGNO)
25935 return true;
25936 /* Omit frame info for any user-defined global regs. If frame info
25937 is supplied for them, frame unwinding will restore a user reg.
25938 Also omit frame info for any reg we don't need to save, as that
25939 bloats frame info and can cause problems with shrink wrapping.
25940 Since global regs won't be seen as needing to be saved, both of
25941 these conditions are covered by save_reg_p. */
25942 return save_reg_p (regno);
25945 /* Probe a range of stack addresses from REG1 to REG3 inclusive. These are
25946 addresses, not offsets.
25948 REG2 contains the backchain that must be stored into *sp at each allocation.
25950 This is subtly different than the Ada probing above in that it tries hard
25951 to prevent attacks that jump the stack guard. Thus, it is never allowed
25952 to allocate more than PROBE_INTERVAL bytes of stack space without a
25953 suitable probe. */
25955 static const char *
25956 output_probe_stack_range_stack_clash (rtx reg1, rtx reg2, rtx reg3)
25958 static int labelno = 0;
25959 char loop_lab[32];
25960 rtx xops[3];
25962 HOST_WIDE_INT probe_interval = get_stack_clash_protection_probe_interval ();
25964 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
25966 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
25968 /* This allocates and probes. */
25969 xops[0] = reg1;
25970 xops[1] = reg2;
25971 xops[2] = GEN_INT (-probe_interval);
25972 if (TARGET_64BIT)
25973 output_asm_insn ("stdu %1,%2(%0)", xops);
25974 else
25975 output_asm_insn ("stwu %1,%2(%0)", xops);
25977 /* Jump to LOOP_LAB if TEST_ADDR != LAST_ADDR. */
25978 xops[0] = reg1;
25979 xops[1] = reg3;
25980 if (TARGET_64BIT)
25981 output_asm_insn ("cmpd 0,%0,%1", xops);
25982 else
25983 output_asm_insn ("cmpw 0,%0,%1", xops);
25985 fputs ("\tbne 0,", asm_out_file);
25986 assemble_name_raw (asm_out_file, loop_lab);
25987 fputc ('\n', asm_out_file);
25989 return "";
25992 /* Wrapper around the output_probe_stack_range routines. */
25993 const char *
25994 output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
25996 if (flag_stack_clash_protection)
25997 return output_probe_stack_range_stack_clash (reg1, reg2, reg3);
25998 else
25999 return output_probe_stack_range_1 (reg1, reg3);
26002 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
26003 with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2
26004 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
26005 deduce these equivalences by itself so it wasn't necessary to hold
26006 its hand so much. Don't be tempted to always supply d2_f_d_e with
26007 the actual cfa register, ie. r31 when we are using a hard frame
26008 pointer. That fails when saving regs off r1, and sched moves the
26009 r31 setup past the reg saves. */
26011 static rtx_insn *
26012 rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
26013 rtx reg2, rtx repl2)
26015 rtx repl;
26017 if (REGNO (reg) == STACK_POINTER_REGNUM)
26019 gcc_checking_assert (val == 0);
26020 repl = NULL_RTX;
26022 else
26023 repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
26024 GEN_INT (val));
26026 rtx pat = PATTERN (insn);
26027 if (!repl && !reg2)
26029 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
26030 if (GET_CODE (pat) == PARALLEL)
26031 for (int i = 0; i < XVECLEN (pat, 0); i++)
26032 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26034 rtx set = XVECEXP (pat, 0, i);
26036 if (!REG_P (SET_SRC (set))
26037 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
26038 RTX_FRAME_RELATED_P (set) = 1;
26040 RTX_FRAME_RELATED_P (insn) = 1;
26041 return insn;
26044 /* We expect that 'pat' is either a SET or a PARALLEL containing
26045 SETs (and possibly other stuff). In a PARALLEL, all the SETs
26046 are important so they all have to be marked RTX_FRAME_RELATED_P.
26047 Call simplify_replace_rtx on the SETs rather than the whole insn
26048 so as to leave the other stuff alone (for example USE of r12). */
26050 set_used_flags (pat);
26051 if (GET_CODE (pat) == SET)
26053 if (repl)
26054 pat = simplify_replace_rtx (pat, reg, repl);
26055 if (reg2)
26056 pat = simplify_replace_rtx (pat, reg2, repl2);
26058 else if (GET_CODE (pat) == PARALLEL)
26060 pat = shallow_copy_rtx (pat);
26061 XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
26063 for (int i = 0; i < XVECLEN (pat, 0); i++)
26064 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26066 rtx set = XVECEXP (pat, 0, i);
26068 if (repl)
26069 set = simplify_replace_rtx (set, reg, repl);
26070 if (reg2)
26071 set = simplify_replace_rtx (set, reg2, repl2);
26072 XVECEXP (pat, 0, i) = set;
26074 if (!REG_P (SET_SRC (set))
26075 || interesting_frame_related_regno (REGNO (SET_SRC (set))))
26076 RTX_FRAME_RELATED_P (set) = 1;
26079 else
26080 gcc_unreachable ();
26082 RTX_FRAME_RELATED_P (insn) = 1;
26083 add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
26085 return insn;
26088 /* Returns an insn that has a vrsave set operation with the
26089 appropriate CLOBBERs. */
26091 static rtx
26092 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
26094 int nclobs, i;
26095 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
26096 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26098 clobs[0]
26099 = gen_rtx_SET (vrsave,
26100 gen_rtx_UNSPEC_VOLATILE (SImode,
26101 gen_rtvec (2, reg, vrsave),
26102 UNSPECV_SET_VRSAVE));
26104 nclobs = 1;
26106 /* We need to clobber the registers in the mask so the scheduler
26107 does not move sets to VRSAVE before sets of AltiVec registers.
26109 However, if the function receives nonlocal gotos, reload will set
26110 all call saved registers live. We will end up with:
26112 (set (reg 999) (mem))
26113 (parallel [ (set (reg vrsave) (unspec blah))
26114 (clobber (reg 999))])
26116 The clobber will cause the store into reg 999 to be dead, and
26117 flow will attempt to delete an epilogue insn. In this case, we
26118 need an unspec use/set of the register. */
26120 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
26121 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
26123 if (!epiloguep || call_used_regs [i])
26124 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
26125 gen_rtx_REG (V4SImode, i));
26126 else
26128 rtx reg = gen_rtx_REG (V4SImode, i);
26130 clobs[nclobs++]
26131 = gen_rtx_SET (reg,
26132 gen_rtx_UNSPEC (V4SImode,
26133 gen_rtvec (1, reg), 27));
26137 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
26139 for (i = 0; i < nclobs; ++i)
26140 XVECEXP (insn, 0, i) = clobs[i];
26142 return insn;
26145 static rtx
26146 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
26148 rtx addr, mem;
26150 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
26151 mem = gen_frame_mem (GET_MODE (reg), addr);
26152 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
26155 static rtx
26156 gen_frame_load (rtx reg, rtx frame_reg, int offset)
26158 return gen_frame_set (reg, frame_reg, offset, false);
26161 static rtx
26162 gen_frame_store (rtx reg, rtx frame_reg, int offset)
26164 return gen_frame_set (reg, frame_reg, offset, true);
26167 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
26168 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
26170 static rtx_insn *
26171 emit_frame_save (rtx frame_reg, machine_mode mode,
26172 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
26174 rtx reg;
26176 /* Some cases that need register indexed addressing. */
26177 gcc_checking_assert (!(TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
26178 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)));
26180 reg = gen_rtx_REG (mode, regno);
26181 rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
26182 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
26183 NULL_RTX, NULL_RTX);
26186 /* Emit an offset memory reference suitable for a frame store, while
26187 converting to a valid addressing mode. */
26189 static rtx
26190 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
26192 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, GEN_INT (offset)));
26195 #ifndef TARGET_FIX_AND_CONTINUE
26196 #define TARGET_FIX_AND_CONTINUE 0
26197 #endif
26199 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
26200 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
26201 #define LAST_SAVRES_REGISTER 31
26202 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
26204 enum {
26205 SAVRES_LR = 0x1,
26206 SAVRES_SAVE = 0x2,
26207 SAVRES_REG = 0x0c,
26208 SAVRES_GPR = 0,
26209 SAVRES_FPR = 4,
26210 SAVRES_VR = 8
26213 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
26215 /* Temporary holding space for an out-of-line register save/restore
26216 routine name. */
26217 static char savres_routine_name[30];
26219 /* Return the name for an out-of-line register save/restore routine.
26220 We are saving/restoring GPRs if GPR is true. */
26222 static char *
26223 rs6000_savres_routine_name (int regno, int sel)
26225 const char *prefix = "";
26226 const char *suffix = "";
26228 /* Different targets are supposed to define
26229 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
26230 routine name could be defined with:
26232 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
26234 This is a nice idea in practice, but in reality, things are
26235 complicated in several ways:
26237 - ELF targets have save/restore routines for GPRs.
26239 - PPC64 ELF targets have routines for save/restore of GPRs that
26240 differ in what they do with the link register, so having a set
26241 prefix doesn't work. (We only use one of the save routines at
26242 the moment, though.)
26244 - PPC32 elf targets have "exit" versions of the restore routines
26245 that restore the link register and can save some extra space.
26246 These require an extra suffix. (There are also "tail" versions
26247 of the restore routines and "GOT" versions of the save routines,
26248 but we don't generate those at present. Same problems apply,
26249 though.)
26251 We deal with all this by synthesizing our own prefix/suffix and
26252 using that for the simple sprintf call shown above. */
26253 if (DEFAULT_ABI == ABI_V4)
26255 if (TARGET_64BIT)
26256 goto aix_names;
26258 if ((sel & SAVRES_REG) == SAVRES_GPR)
26259 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
26260 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26261 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
26262 else if ((sel & SAVRES_REG) == SAVRES_VR)
26263 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26264 else
26265 abort ();
26267 if ((sel & SAVRES_LR))
26268 suffix = "_x";
26270 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26272 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
26273 /* No out-of-line save/restore routines for GPRs on AIX. */
26274 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
26275 #endif
26277 aix_names:
26278 if ((sel & SAVRES_REG) == SAVRES_GPR)
26279 prefix = ((sel & SAVRES_SAVE)
26280 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
26281 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
26282 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26284 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
26285 if ((sel & SAVRES_LR))
26286 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
26287 else
26288 #endif
26290 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
26291 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
26294 else if ((sel & SAVRES_REG) == SAVRES_VR)
26295 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
26296 else
26297 abort ();
26300 if (DEFAULT_ABI == ABI_DARWIN)
26302 /* The Darwin approach is (slightly) different, in order to be
26303 compatible with code generated by the system toolchain. There is a
26304 single symbol for the start of save sequence, and the code here
26305 embeds an offset into that code on the basis of the first register
26306 to be saved. */
26307 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
26308 if ((sel & SAVRES_REG) == SAVRES_GPR)
26309 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
26310 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
26311 (regno - 13) * 4, prefix, regno);
26312 else if ((sel & SAVRES_REG) == SAVRES_FPR)
26313 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
26314 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
26315 else if ((sel & SAVRES_REG) == SAVRES_VR)
26316 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
26317 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
26318 else
26319 abort ();
26321 else
26322 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
26324 return savres_routine_name;
26327 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
26328 We are saving/restoring GPRs if GPR is true. */
26330 static rtx
26331 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
26333 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
26334 ? info->first_gp_reg_save
26335 : (sel & SAVRES_REG) == SAVRES_FPR
26336 ? info->first_fp_reg_save - 32
26337 : (sel & SAVRES_REG) == SAVRES_VR
26338 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
26339 : -1);
26340 rtx sym;
26341 int select = sel;
26343 /* Don't generate bogus routine names. */
26344 gcc_assert (FIRST_SAVRES_REGISTER <= regno
26345 && regno <= LAST_SAVRES_REGISTER
26346 && select >= 0 && select <= 12);
26348 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
26350 if (sym == NULL)
26352 char *name;
26354 name = rs6000_savres_routine_name (regno, sel);
26356 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
26357 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
26358 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
26361 return sym;
26364 /* Emit a sequence of insns, including a stack tie if needed, for
26365 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
26366 reset the stack pointer, but move the base of the frame into
26367 reg UPDT_REGNO for use by out-of-line register restore routines. */
26369 static rtx
26370 rs6000_emit_stack_reset (rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
26371 unsigned updt_regno)
26373 /* If there is nothing to do, don't do anything. */
26374 if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno)
26375 return NULL_RTX;
26377 rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
26379 /* This blockage is needed so that sched doesn't decide to move
26380 the sp change before the register restores. */
26381 if (DEFAULT_ABI == ABI_V4)
26382 return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx,
26383 GEN_INT (frame_off)));
26385 /* If we are restoring registers out-of-line, we will be using the
26386 "exit" variants of the restore routines, which will reset the
26387 stack for us. But we do need to point updt_reg into the
26388 right place for those routines. */
26389 if (frame_off != 0)
26390 return emit_insn (gen_add3_insn (updt_reg_rtx,
26391 frame_reg_rtx, GEN_INT (frame_off)));
26392 else
26393 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
26395 return NULL_RTX;
26398 /* Return the register number used as a pointer by out-of-line
26399 save/restore functions. */
26401 static inline unsigned
26402 ptr_regno_for_savres (int sel)
26404 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26405 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
26406 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
26409 /* Construct a parallel rtx describing the effect of a call to an
26410 out-of-line register save/restore routine, and emit the insn
26411 or jump_insn as appropriate. */
26413 static rtx_insn *
26414 rs6000_emit_savres_rtx (rs6000_stack_t *info,
26415 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
26416 machine_mode reg_mode, int sel)
26418 int i;
26419 int offset, start_reg, end_reg, n_regs, use_reg;
26420 int reg_size = GET_MODE_SIZE (reg_mode);
26421 rtx sym;
26422 rtvec p;
26423 rtx par;
26424 rtx_insn *insn;
26426 offset = 0;
26427 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26428 ? info->first_gp_reg_save
26429 : (sel & SAVRES_REG) == SAVRES_FPR
26430 ? info->first_fp_reg_save
26431 : (sel & SAVRES_REG) == SAVRES_VR
26432 ? info->first_altivec_reg_save
26433 : -1);
26434 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
26435 ? 32
26436 : (sel & SAVRES_REG) == SAVRES_FPR
26437 ? 64
26438 : (sel & SAVRES_REG) == SAVRES_VR
26439 ? LAST_ALTIVEC_REGNO + 1
26440 : -1);
26441 n_regs = end_reg - start_reg;
26442 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
26443 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
26444 + n_regs);
26446 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26447 RTVEC_ELT (p, offset++) = ret_rtx;
26449 RTVEC_ELT (p, offset++)
26450 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
26452 sym = rs6000_savres_routine_sym (info, sel);
26453 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
26455 use_reg = ptr_regno_for_savres (sel);
26456 if ((sel & SAVRES_REG) == SAVRES_VR)
26458 /* Vector regs are saved/restored using [reg+reg] addressing. */
26459 RTVEC_ELT (p, offset++)
26460 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26461 RTVEC_ELT (p, offset++)
26462 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
26464 else
26465 RTVEC_ELT (p, offset++)
26466 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
26468 for (i = 0; i < end_reg - start_reg; i++)
26469 RTVEC_ELT (p, i + offset)
26470 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
26471 frame_reg_rtx, save_area_offset + reg_size * i,
26472 (sel & SAVRES_SAVE) != 0);
26474 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26475 RTVEC_ELT (p, i + offset)
26476 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
26478 par = gen_rtx_PARALLEL (VOIDmode, p);
26480 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
26482 insn = emit_jump_insn (par);
26483 JUMP_LABEL (insn) = ret_rtx;
26485 else
26486 insn = emit_insn (par);
26487 return insn;
26490 /* Emit prologue code to store CR fields that need to be saved into REG. This
26491 function should only be called when moving the non-volatile CRs to REG, it
26492 is not a general purpose routine to move the entire set of CRs to REG.
26493 Specifically, gen_prologue_movesi_from_cr() does not contain uses of the
26494 volatile CRs. */
26496 static void
26497 rs6000_emit_prologue_move_from_cr (rtx reg)
26499 /* Only the ELFv2 ABI allows storing only selected fields. */
26500 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
26502 int i, cr_reg[8], count = 0;
26504 /* Collect CR fields that must be saved. */
26505 for (i = 0; i < 8; i++)
26506 if (save_reg_p (CR0_REGNO + i))
26507 cr_reg[count++] = i;
26509 /* If it's just a single one, use mfcrf. */
26510 if (count == 1)
26512 rtvec p = rtvec_alloc (1);
26513 rtvec r = rtvec_alloc (2);
26514 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
26515 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
26516 RTVEC_ELT (p, 0)
26517 = gen_rtx_SET (reg,
26518 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
26520 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26521 return;
26524 /* ??? It might be better to handle count == 2 / 3 cases here
26525 as well, using logical operations to combine the values. */
26528 emit_insn (gen_prologue_movesi_from_cr (reg));
26531 /* Return whether the split-stack arg pointer (r12) is used. */
26533 static bool
26534 split_stack_arg_pointer_used_p (void)
26536 /* If the pseudo holding the arg pointer is no longer a pseudo,
26537 then the arg pointer is used. */
26538 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
26539 && (!REG_P (cfun->machine->split_stack_arg_pointer)
26540 || (REGNO (cfun->machine->split_stack_arg_pointer)
26541 < FIRST_PSEUDO_REGISTER)))
26542 return true;
26544 /* Unfortunately we also need to do some code scanning, since
26545 r12 may have been substituted for the pseudo. */
26546 rtx_insn *insn;
26547 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
26548 FOR_BB_INSNS (bb, insn)
26549 if (NONDEBUG_INSN_P (insn))
26551 /* A call destroys r12. */
26552 if (CALL_P (insn))
26553 return false;
26555 df_ref use;
26556 FOR_EACH_INSN_USE (use, insn)
26558 rtx x = DF_REF_REG (use);
26559 if (REG_P (x) && REGNO (x) == 12)
26560 return true;
26562 df_ref def;
26563 FOR_EACH_INSN_DEF (def, insn)
26565 rtx x = DF_REF_REG (def);
26566 if (REG_P (x) && REGNO (x) == 12)
26567 return false;
26570 return bitmap_bit_p (DF_LR_OUT (bb), 12);
26573 /* Return whether we need to emit an ELFv2 global entry point prologue. */
26575 static bool
26576 rs6000_global_entry_point_needed_p (void)
26578 /* Only needed for the ELFv2 ABI. */
26579 if (DEFAULT_ABI != ABI_ELFv2)
26580 return false;
26582 /* With -msingle-pic-base, we assume the whole program shares the same
26583 TOC, so no global entry point prologues are needed anywhere. */
26584 if (TARGET_SINGLE_PIC_BASE)
26585 return false;
26587 /* Ensure we have a global entry point for thunks. ??? We could
26588 avoid that if the target routine doesn't need a global entry point,
26589 but we do not know whether this is the case at this point. */
26590 if (cfun->is_thunk)
26591 return true;
26593 /* For regular functions, rs6000_emit_prologue sets this flag if the
26594 routine ever uses the TOC pointer. */
26595 return cfun->machine->r2_setup_needed;
26598 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */
26599 static sbitmap
26600 rs6000_get_separate_components (void)
26602 rs6000_stack_t *info = rs6000_stack_info ();
26604 if (WORLD_SAVE_P (info))
26605 return NULL;
26607 gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE)
26608 && !(info->savres_strategy & REST_MULTIPLE));
26610 /* Component 0 is the save/restore of LR (done via GPR0).
26611 Component 2 is the save of the TOC (GPR2).
26612 Components 13..31 are the save/restore of GPR13..GPR31.
26613 Components 46..63 are the save/restore of FPR14..FPR31. */
26615 cfun->machine->n_components = 64;
26617 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26618 bitmap_clear (components);
26620 int reg_size = TARGET_32BIT ? 4 : 8;
26621 int fp_reg_size = 8;
26623 /* The GPRs we need saved to the frame. */
26624 if ((info->savres_strategy & SAVE_INLINE_GPRS)
26625 && (info->savres_strategy & REST_INLINE_GPRS))
26627 int offset = info->gp_save_offset;
26628 if (info->push_p)
26629 offset += info->total_size;
26631 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26633 if (IN_RANGE (offset, -0x8000, 0x7fff)
26634 && save_reg_p (regno))
26635 bitmap_set_bit (components, regno);
26637 offset += reg_size;
26641 /* Don't mess with the hard frame pointer. */
26642 if (frame_pointer_needed)
26643 bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
26645 /* Don't mess with the fixed TOC register. */
26646 if ((TARGET_TOC && TARGET_MINIMAL_TOC)
26647 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
26648 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
26649 bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM);
26651 /* The FPRs we need saved to the frame. */
26652 if ((info->savres_strategy & SAVE_INLINE_FPRS)
26653 && (info->savres_strategy & REST_INLINE_FPRS))
26655 int offset = info->fp_save_offset;
26656 if (info->push_p)
26657 offset += info->total_size;
26659 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26661 if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno))
26662 bitmap_set_bit (components, regno);
26664 offset += fp_reg_size;
26668 /* Optimize LR save and restore if we can. This is component 0. Any
26669 out-of-line register save/restore routines need LR. */
26670 if (info->lr_save_p
26671 && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
26672 && (info->savres_strategy & SAVE_INLINE_GPRS)
26673 && (info->savres_strategy & REST_INLINE_GPRS)
26674 && (info->savres_strategy & SAVE_INLINE_FPRS)
26675 && (info->savres_strategy & REST_INLINE_FPRS)
26676 && (info->savres_strategy & SAVE_INLINE_VRS)
26677 && (info->savres_strategy & REST_INLINE_VRS))
26679 int offset = info->lr_save_offset;
26680 if (info->push_p)
26681 offset += info->total_size;
26682 if (IN_RANGE (offset, -0x8000, 0x7fff))
26683 bitmap_set_bit (components, 0);
26686 /* Optimize saving the TOC. This is component 2. */
26687 if (cfun->machine->save_toc_in_prologue)
26688 bitmap_set_bit (components, 2);
26690 return components;
26693 /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */
26694 static sbitmap
26695 rs6000_components_for_bb (basic_block bb)
26697 rs6000_stack_t *info = rs6000_stack_info ();
26699 bitmap in = DF_LIVE_IN (bb);
26700 bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
26701 bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
26703 sbitmap components = sbitmap_alloc (cfun->machine->n_components);
26704 bitmap_clear (components);
26706 /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */
26708 /* GPRs. */
26709 for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++)
26710 if (bitmap_bit_p (in, regno)
26711 || bitmap_bit_p (gen, regno)
26712 || bitmap_bit_p (kill, regno))
26713 bitmap_set_bit (components, regno);
26715 /* FPRs. */
26716 for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++)
26717 if (bitmap_bit_p (in, regno)
26718 || bitmap_bit_p (gen, regno)
26719 || bitmap_bit_p (kill, regno))
26720 bitmap_set_bit (components, regno);
26722 /* The link register. */
26723 if (bitmap_bit_p (in, LR_REGNO)
26724 || bitmap_bit_p (gen, LR_REGNO)
26725 || bitmap_bit_p (kill, LR_REGNO))
26726 bitmap_set_bit (components, 0);
26728 /* The TOC save. */
26729 if (bitmap_bit_p (in, TOC_REGNUM)
26730 || bitmap_bit_p (gen, TOC_REGNUM)
26731 || bitmap_bit_p (kill, TOC_REGNUM))
26732 bitmap_set_bit (components, 2);
26734 return components;
26737 /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */
26738 static void
26739 rs6000_disqualify_components (sbitmap components, edge e,
26740 sbitmap edge_components, bool /*is_prologue*/)
26742 /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be
26743 live where we want to place that code. */
26744 if (bitmap_bit_p (edge_components, 0)
26745 && bitmap_bit_p (DF_LIVE_IN (e->dest), 0))
26747 if (dump_file)
26748 fprintf (dump_file, "Disqualifying LR because GPR0 is live "
26749 "on entry to bb %d\n", e->dest->index);
26750 bitmap_clear_bit (components, 0);
26754 /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */
26755 static void
26756 rs6000_emit_prologue_components (sbitmap components)
26758 rs6000_stack_t *info = rs6000_stack_info ();
26759 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26760 ? HARD_FRAME_POINTER_REGNUM
26761 : STACK_POINTER_REGNUM);
26763 machine_mode reg_mode = Pmode;
26764 int reg_size = TARGET_32BIT ? 4 : 8;
26765 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
26766 ? DFmode : SFmode;
26767 int fp_reg_size = 8;
26769 /* Prologue for LR. */
26770 if (bitmap_bit_p (components, 0))
26772 rtx reg = gen_rtx_REG (reg_mode, 0);
26773 rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO));
26774 RTX_FRAME_RELATED_P (insn) = 1;
26775 add_reg_note (insn, REG_CFA_REGISTER, NULL);
26777 int offset = info->lr_save_offset;
26778 if (info->push_p)
26779 offset += info->total_size;
26781 insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26782 RTX_FRAME_RELATED_P (insn) = 1;
26783 rtx lr = gen_rtx_REG (reg_mode, LR_REGNO);
26784 rtx mem = copy_rtx (SET_DEST (single_set (insn)));
26785 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr));
26788 /* Prologue for TOC. */
26789 if (bitmap_bit_p (components, 2))
26791 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
26792 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26793 emit_insn (gen_frame_store (reg, sp_reg, RS6000_TOC_SAVE_SLOT));
26796 /* Prologue for the GPRs. */
26797 int offset = info->gp_save_offset;
26798 if (info->push_p)
26799 offset += info->total_size;
26801 for (int i = info->first_gp_reg_save; i < 32; i++)
26803 if (bitmap_bit_p (components, i))
26805 rtx reg = gen_rtx_REG (reg_mode, i);
26806 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26807 RTX_FRAME_RELATED_P (insn) = 1;
26808 rtx set = copy_rtx (single_set (insn));
26809 add_reg_note (insn, REG_CFA_OFFSET, set);
26812 offset += reg_size;
26815 /* Prologue for the FPRs. */
26816 offset = info->fp_save_offset;
26817 if (info->push_p)
26818 offset += info->total_size;
26820 for (int i = info->first_fp_reg_save; i < 64; i++)
26822 if (bitmap_bit_p (components, i))
26824 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26825 rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset));
26826 RTX_FRAME_RELATED_P (insn) = 1;
26827 rtx set = copy_rtx (single_set (insn));
26828 add_reg_note (insn, REG_CFA_OFFSET, set);
26831 offset += fp_reg_size;
26835 /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */
26836 static void
26837 rs6000_emit_epilogue_components (sbitmap components)
26839 rs6000_stack_t *info = rs6000_stack_info ();
26840 rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
26841 ? HARD_FRAME_POINTER_REGNUM
26842 : STACK_POINTER_REGNUM);
26844 machine_mode reg_mode = Pmode;
26845 int reg_size = TARGET_32BIT ? 4 : 8;
26847 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
26848 ? DFmode : SFmode;
26849 int fp_reg_size = 8;
26851 /* Epilogue for the FPRs. */
26852 int offset = info->fp_save_offset;
26853 if (info->push_p)
26854 offset += info->total_size;
26856 for (int i = info->first_fp_reg_save; i < 64; i++)
26858 if (bitmap_bit_p (components, i))
26860 rtx reg = gen_rtx_REG (fp_reg_mode, i);
26861 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26862 RTX_FRAME_RELATED_P (insn) = 1;
26863 add_reg_note (insn, REG_CFA_RESTORE, reg);
26866 offset += fp_reg_size;
26869 /* Epilogue for the GPRs. */
26870 offset = info->gp_save_offset;
26871 if (info->push_p)
26872 offset += info->total_size;
26874 for (int i = info->first_gp_reg_save; i < 32; i++)
26876 if (bitmap_bit_p (components, i))
26878 rtx reg = gen_rtx_REG (reg_mode, i);
26879 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26880 RTX_FRAME_RELATED_P (insn) = 1;
26881 add_reg_note (insn, REG_CFA_RESTORE, reg);
26884 offset += reg_size;
26887 /* Epilogue for LR. */
26888 if (bitmap_bit_p (components, 0))
26890 int offset = info->lr_save_offset;
26891 if (info->push_p)
26892 offset += info->total_size;
26894 rtx reg = gen_rtx_REG (reg_mode, 0);
26895 rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset));
26897 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
26898 insn = emit_move_insn (lr, reg);
26899 RTX_FRAME_RELATED_P (insn) = 1;
26900 add_reg_note (insn, REG_CFA_RESTORE, lr);
26904 /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */
26905 static void
26906 rs6000_set_handled_components (sbitmap components)
26908 rs6000_stack_t *info = rs6000_stack_info ();
26910 for (int i = info->first_gp_reg_save; i < 32; i++)
26911 if (bitmap_bit_p (components, i))
26912 cfun->machine->gpr_is_wrapped_separately[i] = true;
26914 for (int i = info->first_fp_reg_save; i < 64; i++)
26915 if (bitmap_bit_p (components, i))
26916 cfun->machine->fpr_is_wrapped_separately[i - 32] = true;
26918 if (bitmap_bit_p (components, 0))
26919 cfun->machine->lr_is_wrapped_separately = true;
26921 if (bitmap_bit_p (components, 2))
26922 cfun->machine->toc_is_wrapped_separately = true;
26925 /* VRSAVE is a bit vector representing which AltiVec registers
26926 are used. The OS uses this to determine which vector
26927 registers to save on a context switch. We need to save
26928 VRSAVE on the stack frame, add whatever AltiVec registers we
26929 used in this function, and do the corresponding magic in the
26930 epilogue. */
26931 static void
26932 emit_vrsave_prologue (rs6000_stack_t *info, int save_regno,
26933 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26935 /* Get VRSAVE into a GPR. */
26936 rtx reg = gen_rtx_REG (SImode, save_regno);
26937 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
26938 if (TARGET_MACHO)
26939 emit_insn (gen_get_vrsave_internal (reg));
26940 else
26941 emit_insn (gen_rtx_SET (reg, vrsave));
26943 /* Save VRSAVE. */
26944 int offset = info->vrsave_save_offset + frame_off;
26945 emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
26947 /* Include the registers in the mask. */
26948 emit_insn (gen_iorsi3 (reg, reg, GEN_INT (info->vrsave_mask)));
26950 emit_insn (generate_set_vrsave (reg, info, 0));
26953 /* Set up the arg pointer (r12) for -fsplit-stack code. If __morestack was
26954 called, it left the arg pointer to the old stack in r29. Otherwise, the
26955 arg pointer is the top of the current frame. */
26956 static void
26957 emit_split_stack_prologue (rs6000_stack_t *info, rtx_insn *sp_adjust,
26958 HOST_WIDE_INT frame_off, rtx frame_reg_rtx)
26960 cfun->machine->split_stack_argp_used = true;
26962 if (sp_adjust)
26964 rtx r12 = gen_rtx_REG (Pmode, 12);
26965 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26966 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
26967 emit_insn_before (set_r12, sp_adjust);
26969 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
26971 rtx r12 = gen_rtx_REG (Pmode, 12);
26972 if (frame_off == 0)
26973 emit_move_insn (r12, frame_reg_rtx);
26974 else
26975 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
26978 if (info->push_p)
26980 rtx r12 = gen_rtx_REG (Pmode, 12);
26981 rtx r29 = gen_rtx_REG (Pmode, 29);
26982 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26983 rtx not_more = gen_label_rtx ();
26984 rtx jump;
26986 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26987 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
26988 gen_rtx_LABEL_REF (VOIDmode, not_more),
26989 pc_rtx);
26990 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26991 JUMP_LABEL (jump) = not_more;
26992 LABEL_NUSES (not_more) += 1;
26993 emit_move_insn (r12, r29);
26994 emit_label (not_more);
26998 /* Emit function prologue as insns. */
27000 void
27001 rs6000_emit_prologue (void)
27003 rs6000_stack_t *info = rs6000_stack_info ();
27004 machine_mode reg_mode = Pmode;
27005 int reg_size = TARGET_32BIT ? 4 : 8;
27006 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
27007 ? DFmode : SFmode;
27008 int fp_reg_size = 8;
27009 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
27010 rtx frame_reg_rtx = sp_reg_rtx;
27011 unsigned int cr_save_regno;
27012 rtx cr_save_rtx = NULL_RTX;
27013 rtx_insn *insn;
27014 int strategy;
27015 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
27016 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
27017 && call_used_regs[STATIC_CHAIN_REGNUM]);
27018 int using_split_stack = (flag_split_stack
27019 && (lookup_attribute ("no_split_stack",
27020 DECL_ATTRIBUTES (cfun->decl))
27021 == NULL));
27023 /* Offset to top of frame for frame_reg and sp respectively. */
27024 HOST_WIDE_INT frame_off = 0;
27025 HOST_WIDE_INT sp_off = 0;
27026 /* sp_adjust is the stack adjusting instruction, tracked so that the
27027 insn setting up the split-stack arg pointer can be emitted just
27028 prior to it, when r12 is not used here for other purposes. */
27029 rtx_insn *sp_adjust = 0;
27031 #if CHECKING_P
27032 /* Track and check usage of r0, r11, r12. */
27033 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
27034 #define START_USE(R) do \
27036 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27037 reg_inuse |= 1 << (R); \
27038 } while (0)
27039 #define END_USE(R) do \
27041 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
27042 reg_inuse &= ~(1 << (R)); \
27043 } while (0)
27044 #define NOT_INUSE(R) do \
27046 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
27047 } while (0)
27048 #else
27049 #define START_USE(R) do {} while (0)
27050 #define END_USE(R) do {} while (0)
27051 #define NOT_INUSE(R) do {} while (0)
27052 #endif
27054 if (DEFAULT_ABI == ABI_ELFv2
27055 && !TARGET_SINGLE_PIC_BASE)
27057 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
27059 /* With -mminimal-toc we may generate an extra use of r2 below. */
27060 if (TARGET_TOC && TARGET_MINIMAL_TOC
27061 && !constant_pool_empty_p ())
27062 cfun->machine->r2_setup_needed = true;
27066 if (flag_stack_usage_info)
27067 current_function_static_stack_size = info->total_size;
27069 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
27071 HOST_WIDE_INT size = info->total_size;
27073 if (crtl->is_leaf && !cfun->calls_alloca)
27075 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27076 rs6000_emit_probe_stack_range (get_stack_check_protect (),
27077 size - get_stack_check_protect ());
27079 else if (size > 0)
27080 rs6000_emit_probe_stack_range (get_stack_check_protect (), size);
27083 if (TARGET_FIX_AND_CONTINUE)
27085 /* gdb on darwin arranges to forward a function from the old
27086 address by modifying the first 5 instructions of the function
27087 to branch to the overriding function. This is necessary to
27088 permit function pointers that point to the old function to
27089 actually forward to the new function. */
27090 emit_insn (gen_nop ());
27091 emit_insn (gen_nop ());
27092 emit_insn (gen_nop ());
27093 emit_insn (gen_nop ());
27094 emit_insn (gen_nop ());
27097 /* Handle world saves specially here. */
27098 if (WORLD_SAVE_P (info))
27100 int i, j, sz;
27101 rtx treg;
27102 rtvec p;
27103 rtx reg0;
27105 /* save_world expects lr in r0. */
27106 reg0 = gen_rtx_REG (Pmode, 0);
27107 if (info->lr_save_p)
27109 insn = emit_move_insn (reg0,
27110 gen_rtx_REG (Pmode, LR_REGNO));
27111 RTX_FRAME_RELATED_P (insn) = 1;
27114 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
27115 assumptions about the offsets of various bits of the stack
27116 frame. */
27117 gcc_assert (info->gp_save_offset == -220
27118 && info->fp_save_offset == -144
27119 && info->lr_save_offset == 8
27120 && info->cr_save_offset == 4
27121 && info->push_p
27122 && info->lr_save_p
27123 && (!crtl->calls_eh_return
27124 || info->ehrd_offset == -432)
27125 && info->vrsave_save_offset == -224
27126 && info->altivec_save_offset == -416);
27128 treg = gen_rtx_REG (SImode, 11);
27129 emit_move_insn (treg, GEN_INT (-info->total_size));
27131 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
27132 in R11. It also clobbers R12, so beware! */
27134 /* Preserve CR2 for save_world prologues */
27135 sz = 5;
27136 sz += 32 - info->first_gp_reg_save;
27137 sz += 64 - info->first_fp_reg_save;
27138 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
27139 p = rtvec_alloc (sz);
27140 j = 0;
27141 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
27142 gen_rtx_REG (SImode,
27143 LR_REGNO));
27144 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
27145 gen_rtx_SYMBOL_REF (Pmode,
27146 "*save_world"));
27147 /* We do floats first so that the instruction pattern matches
27148 properly. */
27149 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
27150 RTVEC_ELT (p, j++)
27151 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
27152 ? DFmode : SFmode,
27153 info->first_fp_reg_save + i),
27154 frame_reg_rtx,
27155 info->fp_save_offset + frame_off + 8 * i);
27156 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
27157 RTVEC_ELT (p, j++)
27158 = gen_frame_store (gen_rtx_REG (V4SImode,
27159 info->first_altivec_reg_save + i),
27160 frame_reg_rtx,
27161 info->altivec_save_offset + frame_off + 16 * i);
27162 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27163 RTVEC_ELT (p, j++)
27164 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27165 frame_reg_rtx,
27166 info->gp_save_offset + frame_off + reg_size * i);
27168 /* CR register traditionally saved as CR2. */
27169 RTVEC_ELT (p, j++)
27170 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
27171 frame_reg_rtx, info->cr_save_offset + frame_off);
27172 /* Explain about use of R0. */
27173 if (info->lr_save_p)
27174 RTVEC_ELT (p, j++)
27175 = gen_frame_store (reg0,
27176 frame_reg_rtx, info->lr_save_offset + frame_off);
27177 /* Explain what happens to the stack pointer. */
27179 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
27180 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
27183 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27184 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27185 treg, GEN_INT (-info->total_size));
27186 sp_off = frame_off = info->total_size;
27189 strategy = info->savres_strategy;
27191 /* For V.4, update stack before we do any saving and set back pointer. */
27192 if (! WORLD_SAVE_P (info)
27193 && info->push_p
27194 && (DEFAULT_ABI == ABI_V4
27195 || crtl->calls_eh_return))
27197 bool need_r11 = (!(strategy & SAVE_INLINE_FPRS)
27198 || !(strategy & SAVE_INLINE_GPRS)
27199 || !(strategy & SAVE_INLINE_VRS));
27200 int ptr_regno = -1;
27201 rtx ptr_reg = NULL_RTX;
27202 int ptr_off = 0;
27204 if (info->total_size < 32767)
27205 frame_off = info->total_size;
27206 else if (need_r11)
27207 ptr_regno = 11;
27208 else if (info->cr_save_p
27209 || info->lr_save_p
27210 || info->first_fp_reg_save < 64
27211 || info->first_gp_reg_save < 32
27212 || info->altivec_size != 0
27213 || info->vrsave_size != 0
27214 || crtl->calls_eh_return)
27215 ptr_regno = 12;
27216 else
27218 /* The prologue won't be saving any regs so there is no need
27219 to set up a frame register to access any frame save area.
27220 We also won't be using frame_off anywhere below, but set
27221 the correct value anyway to protect against future
27222 changes to this function. */
27223 frame_off = info->total_size;
27225 if (ptr_regno != -1)
27227 /* Set up the frame offset to that needed by the first
27228 out-of-line save function. */
27229 START_USE (ptr_regno);
27230 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27231 frame_reg_rtx = ptr_reg;
27232 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
27233 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
27234 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
27235 ptr_off = info->gp_save_offset + info->gp_size;
27236 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
27237 ptr_off = info->altivec_save_offset + info->altivec_size;
27238 frame_off = -ptr_off;
27240 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27241 ptr_reg, ptr_off);
27242 if (REGNO (frame_reg_rtx) == 12)
27243 sp_adjust = 0;
27244 sp_off = info->total_size;
27245 if (frame_reg_rtx != sp_reg_rtx)
27246 rs6000_emit_stack_tie (frame_reg_rtx, false);
27249 /* If we use the link register, get it into r0. */
27250 if (!WORLD_SAVE_P (info) && info->lr_save_p
27251 && !cfun->machine->lr_is_wrapped_separately)
27253 rtx addr, reg, mem;
27255 reg = gen_rtx_REG (Pmode, 0);
27256 START_USE (0);
27257 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
27258 RTX_FRAME_RELATED_P (insn) = 1;
27260 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
27261 | SAVE_NOINLINE_FPRS_SAVES_LR)))
27263 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27264 GEN_INT (info->lr_save_offset + frame_off));
27265 mem = gen_rtx_MEM (Pmode, addr);
27266 /* This should not be of rs6000_sr_alias_set, because of
27267 __builtin_return_address. */
27269 insn = emit_move_insn (mem, reg);
27270 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27271 NULL_RTX, NULL_RTX);
27272 END_USE (0);
27276 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
27277 r12 will be needed by out-of-line gpr restore. */
27278 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27279 && !(strategy & (SAVE_INLINE_GPRS
27280 | SAVE_NOINLINE_GPRS_SAVES_LR))
27281 ? 11 : 12);
27282 if (!WORLD_SAVE_P (info)
27283 && info->cr_save_p
27284 && REGNO (frame_reg_rtx) != cr_save_regno
27285 && !(using_static_chain_p && cr_save_regno == 11)
27286 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
27288 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
27289 START_USE (cr_save_regno);
27290 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
27293 /* Do any required saving of fpr's. If only one or two to save, do
27294 it ourselves. Otherwise, call function. */
27295 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
27297 int offset = info->fp_save_offset + frame_off;
27298 for (int i = info->first_fp_reg_save; i < 64; i++)
27300 if (save_reg_p (i)
27301 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
27302 emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset,
27303 sp_off - frame_off);
27305 offset += fp_reg_size;
27308 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
27310 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27311 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27312 unsigned ptr_regno = ptr_regno_for_savres (sel);
27313 rtx ptr_reg = frame_reg_rtx;
27315 if (REGNO (frame_reg_rtx) == ptr_regno)
27316 gcc_checking_assert (frame_off == 0);
27317 else
27319 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27320 NOT_INUSE (ptr_regno);
27321 emit_insn (gen_add3_insn (ptr_reg,
27322 frame_reg_rtx, GEN_INT (frame_off)));
27324 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27325 info->fp_save_offset,
27326 info->lr_save_offset,
27327 DFmode, sel);
27328 rs6000_frame_related (insn, ptr_reg, sp_off,
27329 NULL_RTX, NULL_RTX);
27330 if (lr)
27331 END_USE (0);
27334 /* Save GPRs. This is done as a PARALLEL if we are using
27335 the store-multiple instructions. */
27336 if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
27338 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
27339 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
27340 unsigned ptr_regno = ptr_regno_for_savres (sel);
27341 rtx ptr_reg = frame_reg_rtx;
27342 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
27343 int end_save = info->gp_save_offset + info->gp_size;
27344 int ptr_off;
27346 if (ptr_regno == 12)
27347 sp_adjust = 0;
27348 if (!ptr_set_up)
27349 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27351 /* Need to adjust r11 (r12) if we saved any FPRs. */
27352 if (end_save + frame_off != 0)
27354 rtx offset = GEN_INT (end_save + frame_off);
27356 if (ptr_set_up)
27357 frame_off = -end_save;
27358 else
27359 NOT_INUSE (ptr_regno);
27360 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27362 else if (!ptr_set_up)
27364 NOT_INUSE (ptr_regno);
27365 emit_move_insn (ptr_reg, frame_reg_rtx);
27367 ptr_off = -end_save;
27368 insn = rs6000_emit_savres_rtx (info, ptr_reg,
27369 info->gp_save_offset + ptr_off,
27370 info->lr_save_offset + ptr_off,
27371 reg_mode, sel);
27372 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
27373 NULL_RTX, NULL_RTX);
27374 if (lr)
27375 END_USE (0);
27377 else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE))
27379 rtvec p;
27380 int i;
27381 p = rtvec_alloc (32 - info->first_gp_reg_save);
27382 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
27383 RTVEC_ELT (p, i)
27384 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
27385 frame_reg_rtx,
27386 info->gp_save_offset + frame_off + reg_size * i);
27387 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27388 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27389 NULL_RTX, NULL_RTX);
27391 else if (!WORLD_SAVE_P (info))
27393 int offset = info->gp_save_offset + frame_off;
27394 for (int i = info->first_gp_reg_save; i < 32; i++)
27396 if (save_reg_p (i)
27397 && !cfun->machine->gpr_is_wrapped_separately[i])
27398 emit_frame_save (frame_reg_rtx, reg_mode, i, offset,
27399 sp_off - frame_off);
27401 offset += reg_size;
27405 if (crtl->calls_eh_return)
27407 unsigned int i;
27408 rtvec p;
27410 for (i = 0; ; ++i)
27412 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27413 if (regno == INVALID_REGNUM)
27414 break;
27417 p = rtvec_alloc (i);
27419 for (i = 0; ; ++i)
27421 unsigned int regno = EH_RETURN_DATA_REGNO (i);
27422 if (regno == INVALID_REGNUM)
27423 break;
27425 rtx set
27426 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
27427 sp_reg_rtx,
27428 info->ehrd_offset + sp_off + reg_size * (int) i);
27429 RTVEC_ELT (p, i) = set;
27430 RTX_FRAME_RELATED_P (set) = 1;
27433 insn = emit_insn (gen_blockage ());
27434 RTX_FRAME_RELATED_P (insn) = 1;
27435 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
27438 /* In AIX ABI we need to make sure r2 is really saved. */
27439 if (TARGET_AIX && crtl->calls_eh_return)
27441 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
27442 rtx join_insn, note;
27443 rtx_insn *save_insn;
27444 long toc_restore_insn;
27446 tmp_reg = gen_rtx_REG (Pmode, 11);
27447 tmp_reg_si = gen_rtx_REG (SImode, 11);
27448 if (using_static_chain_p)
27450 START_USE (0);
27451 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
27453 else
27454 START_USE (11);
27455 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
27456 /* Peek at instruction to which this function returns. If it's
27457 restoring r2, then we know we've already saved r2. We can't
27458 unconditionally save r2 because the value we have will already
27459 be updated if we arrived at this function via a plt call or
27460 toc adjusting stub. */
27461 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
27462 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
27463 + RS6000_TOC_SAVE_SLOT);
27464 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
27465 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
27466 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
27467 validate_condition_mode (EQ, CCUNSmode);
27468 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
27469 emit_insn (gen_rtx_SET (compare_result,
27470 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
27471 toc_save_done = gen_label_rtx ();
27472 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
27473 gen_rtx_EQ (VOIDmode, compare_result,
27474 const0_rtx),
27475 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
27476 pc_rtx);
27477 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
27478 JUMP_LABEL (jump) = toc_save_done;
27479 LABEL_NUSES (toc_save_done) += 1;
27481 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
27482 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
27483 sp_off - frame_off);
27485 emit_label (toc_save_done);
27487 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
27488 have a CFG that has different saves along different paths.
27489 Move the note to a dummy blockage insn, which describes that
27490 R2 is unconditionally saved after the label. */
27491 /* ??? An alternate representation might be a special insn pattern
27492 containing both the branch and the store. That might let the
27493 code that minimizes the number of DW_CFA_advance opcodes better
27494 freedom in placing the annotations. */
27495 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
27496 if (note)
27497 remove_note (save_insn, note);
27498 else
27499 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
27500 copy_rtx (PATTERN (save_insn)), NULL_RTX);
27501 RTX_FRAME_RELATED_P (save_insn) = 0;
27503 join_insn = emit_insn (gen_blockage ());
27504 REG_NOTES (join_insn) = note;
27505 RTX_FRAME_RELATED_P (join_insn) = 1;
27507 if (using_static_chain_p)
27509 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
27510 END_USE (0);
27512 else
27513 END_USE (11);
27516 /* Save CR if we use any that must be preserved. */
27517 if (!WORLD_SAVE_P (info) && info->cr_save_p)
27519 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
27520 GEN_INT (info->cr_save_offset + frame_off));
27521 rtx mem = gen_frame_mem (SImode, addr);
27523 /* If we didn't copy cr before, do so now using r0. */
27524 if (cr_save_rtx == NULL_RTX)
27526 START_USE (0);
27527 cr_save_rtx = gen_rtx_REG (SImode, 0);
27528 rs6000_emit_prologue_move_from_cr (cr_save_rtx);
27531 /* Saving CR requires a two-instruction sequence: one instruction
27532 to move the CR to a general-purpose register, and a second
27533 instruction that stores the GPR to memory.
27535 We do not emit any DWARF CFI records for the first of these,
27536 because we cannot properly represent the fact that CR is saved in
27537 a register. One reason is that we cannot express that multiple
27538 CR fields are saved; another reason is that on 64-bit, the size
27539 of the CR register in DWARF (4 bytes) differs from the size of
27540 a general-purpose register.
27542 This means if any intervening instruction were to clobber one of
27543 the call-saved CR fields, we'd have incorrect CFI. To prevent
27544 this from happening, we mark the store to memory as a use of
27545 those CR fields, which prevents any such instruction from being
27546 scheduled in between the two instructions. */
27547 rtx crsave_v[9];
27548 int n_crsave = 0;
27549 int i;
27551 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
27552 for (i = 0; i < 8; i++)
27553 if (save_reg_p (CR0_REGNO + i))
27554 crsave_v[n_crsave++]
27555 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27557 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
27558 gen_rtvec_v (n_crsave, crsave_v)));
27559 END_USE (REGNO (cr_save_rtx));
27561 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
27562 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
27563 so we need to construct a frame expression manually. */
27564 RTX_FRAME_RELATED_P (insn) = 1;
27566 /* Update address to be stack-pointer relative, like
27567 rs6000_frame_related would do. */
27568 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
27569 GEN_INT (info->cr_save_offset + sp_off));
27570 mem = gen_frame_mem (SImode, addr);
27572 if (DEFAULT_ABI == ABI_ELFv2)
27574 /* In the ELFv2 ABI we generate separate CFI records for each
27575 CR field that was actually saved. They all point to the
27576 same 32-bit stack slot. */
27577 rtx crframe[8];
27578 int n_crframe = 0;
27580 for (i = 0; i < 8; i++)
27581 if (save_reg_p (CR0_REGNO + i))
27583 crframe[n_crframe]
27584 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
27586 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
27587 n_crframe++;
27590 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27591 gen_rtx_PARALLEL (VOIDmode,
27592 gen_rtvec_v (n_crframe, crframe)));
27594 else
27596 /* In other ABIs, by convention, we use a single CR regnum to
27597 represent the fact that all call-saved CR fields are saved.
27598 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
27599 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
27600 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
27604 /* In the ELFv2 ABI we need to save all call-saved CR fields into
27605 *separate* slots if the routine calls __builtin_eh_return, so
27606 that they can be independently restored by the unwinder. */
27607 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
27609 int i, cr_off = info->ehcr_offset;
27610 rtx crsave;
27612 /* ??? We might get better performance by using multiple mfocrf
27613 instructions. */
27614 crsave = gen_rtx_REG (SImode, 0);
27615 emit_insn (gen_prologue_movesi_from_cr (crsave));
27617 for (i = 0; i < 8; i++)
27618 if (!call_used_regs[CR0_REGNO + i])
27620 rtvec p = rtvec_alloc (2);
27621 RTVEC_ELT (p, 0)
27622 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
27623 RTVEC_ELT (p, 1)
27624 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
27626 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
27628 RTX_FRAME_RELATED_P (insn) = 1;
27629 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
27630 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
27631 sp_reg_rtx, cr_off + sp_off));
27633 cr_off += reg_size;
27637 /* If we are emitting stack probes, but allocate no stack, then
27638 just note that in the dump file. */
27639 if (flag_stack_clash_protection
27640 && dump_file
27641 && !info->push_p)
27642 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
27644 /* Update stack and set back pointer unless this is V.4,
27645 for which it was done previously. */
27646 if (!WORLD_SAVE_P (info) && info->push_p
27647 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
27649 rtx ptr_reg = NULL;
27650 int ptr_off = 0;
27652 /* If saving altivec regs we need to be able to address all save
27653 locations using a 16-bit offset. */
27654 if ((strategy & SAVE_INLINE_VRS) == 0
27655 || (info->altivec_size != 0
27656 && (info->altivec_save_offset + info->altivec_size - 16
27657 + info->total_size - frame_off) > 32767)
27658 || (info->vrsave_size != 0
27659 && (info->vrsave_save_offset
27660 + info->total_size - frame_off) > 32767))
27662 int sel = SAVRES_SAVE | SAVRES_VR;
27663 unsigned ptr_regno = ptr_regno_for_savres (sel);
27665 if (using_static_chain_p
27666 && ptr_regno == STATIC_CHAIN_REGNUM)
27667 ptr_regno = 12;
27668 if (REGNO (frame_reg_rtx) != ptr_regno)
27669 START_USE (ptr_regno);
27670 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
27671 frame_reg_rtx = ptr_reg;
27672 ptr_off = info->altivec_save_offset + info->altivec_size;
27673 frame_off = -ptr_off;
27675 else if (REGNO (frame_reg_rtx) == 1)
27676 frame_off = info->total_size;
27677 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
27678 ptr_reg, ptr_off);
27679 if (REGNO (frame_reg_rtx) == 12)
27680 sp_adjust = 0;
27681 sp_off = info->total_size;
27682 if (frame_reg_rtx != sp_reg_rtx)
27683 rs6000_emit_stack_tie (frame_reg_rtx, false);
27686 /* Set frame pointer, if needed. */
27687 if (frame_pointer_needed)
27689 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
27690 sp_reg_rtx);
27691 RTX_FRAME_RELATED_P (insn) = 1;
27694 /* Save AltiVec registers if needed. Save here because the red zone does
27695 not always include AltiVec registers. */
27696 if (!WORLD_SAVE_P (info)
27697 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
27699 int end_save = info->altivec_save_offset + info->altivec_size;
27700 int ptr_off;
27701 /* Oddly, the vector save/restore functions point r0 at the end
27702 of the save area, then use r11 or r12 to load offsets for
27703 [reg+reg] addressing. */
27704 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
27705 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
27706 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
27708 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
27709 NOT_INUSE (0);
27710 if (scratch_regno == 12)
27711 sp_adjust = 0;
27712 if (end_save + frame_off != 0)
27714 rtx offset = GEN_INT (end_save + frame_off);
27716 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
27718 else
27719 emit_move_insn (ptr_reg, frame_reg_rtx);
27721 ptr_off = -end_save;
27722 insn = rs6000_emit_savres_rtx (info, scratch_reg,
27723 info->altivec_save_offset + ptr_off,
27724 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
27725 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
27726 NULL_RTX, NULL_RTX);
27727 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
27729 /* The oddity mentioned above clobbered our frame reg. */
27730 emit_move_insn (frame_reg_rtx, ptr_reg);
27731 frame_off = ptr_off;
27734 else if (!WORLD_SAVE_P (info)
27735 && info->altivec_size != 0)
27737 int i;
27739 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
27740 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
27742 rtx areg, savereg, mem;
27743 HOST_WIDE_INT offset;
27745 offset = (info->altivec_save_offset + frame_off
27746 + 16 * (i - info->first_altivec_reg_save));
27748 savereg = gen_rtx_REG (V4SImode, i);
27750 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
27752 mem = gen_frame_mem (V4SImode,
27753 gen_rtx_PLUS (Pmode, frame_reg_rtx,
27754 GEN_INT (offset)));
27755 insn = emit_insn (gen_rtx_SET (mem, savereg));
27756 areg = NULL_RTX;
27758 else
27760 NOT_INUSE (0);
27761 areg = gen_rtx_REG (Pmode, 0);
27762 emit_move_insn (areg, GEN_INT (offset));
27764 /* AltiVec addressing mode is [reg+reg]. */
27765 mem = gen_frame_mem (V4SImode,
27766 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
27768 /* Rather than emitting a generic move, force use of the stvx
27769 instruction, which we always want on ISA 2.07 (power8) systems.
27770 In particular we don't want xxpermdi/stxvd2x for little
27771 endian. */
27772 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
27775 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
27776 areg, GEN_INT (offset));
27780 /* VRSAVE is a bit vector representing which AltiVec registers
27781 are used. The OS uses this to determine which vector
27782 registers to save on a context switch. We need to save
27783 VRSAVE on the stack frame, add whatever AltiVec registers we
27784 used in this function, and do the corresponding magic in the
27785 epilogue. */
27787 if (!WORLD_SAVE_P (info) && info->vrsave_size != 0)
27789 /* Get VRSAVE into a GPR. Note that ABI_V4 and ABI_DARWIN might
27790 be using r12 as frame_reg_rtx and r11 as the static chain
27791 pointer for nested functions. */
27792 int save_regno = 12;
27793 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27794 && !using_static_chain_p)
27795 save_regno = 11;
27796 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
27798 save_regno = 11;
27799 if (using_static_chain_p)
27800 save_regno = 0;
27802 NOT_INUSE (save_regno);
27804 emit_vrsave_prologue (info, save_regno, frame_off, frame_reg_rtx);
27807 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
27808 if (!TARGET_SINGLE_PIC_BASE
27809 && ((TARGET_TOC && TARGET_MINIMAL_TOC
27810 && !constant_pool_empty_p ())
27811 || (DEFAULT_ABI == ABI_V4
27812 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
27813 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
27815 /* If emit_load_toc_table will use the link register, we need to save
27816 it. We use R12 for this purpose because emit_load_toc_table
27817 can use register 0. This allows us to use a plain 'blr' to return
27818 from the procedure more often. */
27819 int save_LR_around_toc_setup = (TARGET_ELF
27820 && DEFAULT_ABI == ABI_V4
27821 && flag_pic
27822 && ! info->lr_save_p
27823 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
27824 if (save_LR_around_toc_setup)
27826 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27827 rtx tmp = gen_rtx_REG (Pmode, 12);
27829 sp_adjust = 0;
27830 insn = emit_move_insn (tmp, lr);
27831 RTX_FRAME_RELATED_P (insn) = 1;
27833 rs6000_emit_load_toc_table (TRUE);
27835 insn = emit_move_insn (lr, tmp);
27836 add_reg_note (insn, REG_CFA_RESTORE, lr);
27837 RTX_FRAME_RELATED_P (insn) = 1;
27839 else
27840 rs6000_emit_load_toc_table (TRUE);
27843 #if TARGET_MACHO
27844 if (!TARGET_SINGLE_PIC_BASE
27845 && DEFAULT_ABI == ABI_DARWIN
27846 && flag_pic && crtl->uses_pic_offset_table)
27848 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
27849 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
27851 /* Save and restore LR locally around this call (in R0). */
27852 if (!info->lr_save_p)
27853 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
27855 emit_insn (gen_load_macho_picbase (src));
27857 emit_move_insn (gen_rtx_REG (Pmode,
27858 RS6000_PIC_OFFSET_TABLE_REGNUM),
27859 lr);
27861 if (!info->lr_save_p)
27862 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
27864 #endif
27866 /* If we need to, save the TOC register after doing the stack setup.
27867 Do not emit eh frame info for this save. The unwinder wants info,
27868 conceptually attached to instructions in this function, about
27869 register values in the caller of this function. This R2 may have
27870 already been changed from the value in the caller.
27871 We don't attempt to write accurate DWARF EH frame info for R2
27872 because code emitted by gcc for a (non-pointer) function call
27873 doesn't save and restore R2. Instead, R2 is managed out-of-line
27874 by a linker generated plt call stub when the function resides in
27875 a shared library. This behavior is costly to describe in DWARF,
27876 both in terms of the size of DWARF info and the time taken in the
27877 unwinder to interpret it. R2 changes, apart from the
27878 calls_eh_return case earlier in this function, are handled by
27879 linux-unwind.h frob_update_context. */
27880 if (rs6000_save_toc_in_prologue_p ()
27881 && !cfun->machine->toc_is_wrapped_separately)
27883 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
27884 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
27887 /* Set up the arg pointer (r12) for -fsplit-stack code. */
27888 if (using_split_stack && split_stack_arg_pointer_used_p ())
27889 emit_split_stack_prologue (info, sp_adjust, frame_off, frame_reg_rtx);
27892 /* Output .extern statements for the save/restore routines we use. */
27894 static void
27895 rs6000_output_savres_externs (FILE *file)
27897 rs6000_stack_t *info = rs6000_stack_info ();
27899 if (TARGET_DEBUG_STACK)
27900 debug_stack_info (info);
27902 /* Write .extern for any function we will call to save and restore
27903 fp values. */
27904 if (info->first_fp_reg_save < 64
27905 && !TARGET_MACHO
27906 && !TARGET_ELF)
27908 char *name;
27909 int regno = info->first_fp_reg_save - 32;
27911 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
27913 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
27914 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
27915 name = rs6000_savres_routine_name (regno, sel);
27916 fprintf (file, "\t.extern %s\n", name);
27918 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
27920 bool lr = (info->savres_strategy
27921 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
27922 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
27923 name = rs6000_savres_routine_name (regno, sel);
27924 fprintf (file, "\t.extern %s\n", name);
27929 /* Write function prologue. */
27931 static void
27932 rs6000_output_function_prologue (FILE *file)
27934 if (!cfun->is_thunk)
27935 rs6000_output_savres_externs (file);
27937 /* ELFv2 ABI r2 setup code and local entry point. This must follow
27938 immediately after the global entry point label. */
27939 if (rs6000_global_entry_point_needed_p ())
27941 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
27943 (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno);
27945 if (TARGET_CMODEL != CMODEL_LARGE)
27947 /* In the small and medium code models, we assume the TOC is less
27948 2 GB away from the text section, so it can be computed via the
27949 following two-instruction sequence. */
27950 char buf[256];
27952 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27953 fprintf (file, "0:\taddis 2,12,.TOC.-");
27954 assemble_name (file, buf);
27955 fprintf (file, "@ha\n");
27956 fprintf (file, "\taddi 2,2,.TOC.-");
27957 assemble_name (file, buf);
27958 fprintf (file, "@l\n");
27960 else
27962 /* In the large code model, we allow arbitrary offsets between the
27963 TOC and the text section, so we have to load the offset from
27964 memory. The data field is emitted directly before the global
27965 entry point in rs6000_elf_declare_function_name. */
27966 char buf[256];
27968 #ifdef HAVE_AS_ENTRY_MARKERS
27969 /* If supported by the linker, emit a marker relocation. If the
27970 total code size of the final executable or shared library
27971 happens to fit into 2 GB after all, the linker will replace
27972 this code sequence with the sequence for the small or medium
27973 code model. */
27974 fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n");
27975 #endif
27976 fprintf (file, "\tld 2,");
27977 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
27978 assemble_name (file, buf);
27979 fprintf (file, "-");
27980 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27981 assemble_name (file, buf);
27982 fprintf (file, "(12)\n");
27983 fprintf (file, "\tadd 2,2,12\n");
27986 fputs ("\t.localentry\t", file);
27987 assemble_name (file, name);
27988 fputs (",.-", file);
27989 assemble_name (file, name);
27990 fputs ("\n", file);
27993 /* Output -mprofile-kernel code. This needs to be done here instead of
27994 in output_function_profile since it must go after the ELFv2 ABI
27995 local entry point. */
27996 if (TARGET_PROFILE_KERNEL && crtl->profile)
27998 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
27999 gcc_assert (!TARGET_32BIT);
28001 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
28003 /* In the ELFv2 ABI we have no compiler stack word. It must be
28004 the resposibility of _mcount to preserve the static chain
28005 register if required. */
28006 if (DEFAULT_ABI != ABI_ELFv2
28007 && cfun->static_chain_decl != NULL)
28009 asm_fprintf (file, "\tstd %s,24(%s)\n",
28010 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
28011 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
28012 asm_fprintf (file, "\tld %s,24(%s)\n",
28013 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
28015 else
28016 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
28019 rs6000_pic_labelno++;
28022 /* -mprofile-kernel code calls mcount before the function prolog,
28023 so a profiled leaf function should stay a leaf function. */
28024 static bool
28025 rs6000_keep_leaf_when_profiled ()
28027 return TARGET_PROFILE_KERNEL;
28030 /* Non-zero if vmx regs are restored before the frame pop, zero if
28031 we restore after the pop when possible. */
28032 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
28034 /* Restoring cr is a two step process: loading a reg from the frame
28035 save, then moving the reg to cr. For ABI_V4 we must let the
28036 unwinder know that the stack location is no longer valid at or
28037 before the stack deallocation, but we can't emit a cfa_restore for
28038 cr at the stack deallocation like we do for other registers.
28039 The trouble is that it is possible for the move to cr to be
28040 scheduled after the stack deallocation. So say exactly where cr
28041 is located on each of the two insns. */
28043 static rtx
28044 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
28046 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
28047 rtx reg = gen_rtx_REG (SImode, regno);
28048 rtx_insn *insn = emit_move_insn (reg, mem);
28050 if (!exit_func && DEFAULT_ABI == ABI_V4)
28052 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
28053 rtx set = gen_rtx_SET (reg, cr);
28055 add_reg_note (insn, REG_CFA_REGISTER, set);
28056 RTX_FRAME_RELATED_P (insn) = 1;
28058 return reg;
28061 /* Reload CR from REG. */
28063 static void
28064 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
28066 int count = 0;
28067 int i;
28069 if (using_mfcr_multiple)
28071 for (i = 0; i < 8; i++)
28072 if (save_reg_p (CR0_REGNO + i))
28073 count++;
28074 gcc_assert (count);
28077 if (using_mfcr_multiple && count > 1)
28079 rtx_insn *insn;
28080 rtvec p;
28081 int ndx;
28083 p = rtvec_alloc (count);
28085 ndx = 0;
28086 for (i = 0; i < 8; i++)
28087 if (save_reg_p (CR0_REGNO + i))
28089 rtvec r = rtvec_alloc (2);
28090 RTVEC_ELT (r, 0) = reg;
28091 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
28092 RTVEC_ELT (p, ndx) =
28093 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
28094 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
28095 ndx++;
28097 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28098 gcc_assert (ndx == count);
28100 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
28101 CR field separately. */
28102 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
28104 for (i = 0; i < 8; i++)
28105 if (save_reg_p (CR0_REGNO + i))
28106 add_reg_note (insn, REG_CFA_RESTORE,
28107 gen_rtx_REG (SImode, CR0_REGNO + i));
28109 RTX_FRAME_RELATED_P (insn) = 1;
28112 else
28113 for (i = 0; i < 8; i++)
28114 if (save_reg_p (CR0_REGNO + i))
28116 rtx insn = emit_insn (gen_movsi_to_cr_one
28117 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28119 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
28120 CR field separately, attached to the insn that in fact
28121 restores this particular CR field. */
28122 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
28124 add_reg_note (insn, REG_CFA_RESTORE,
28125 gen_rtx_REG (SImode, CR0_REGNO + i));
28127 RTX_FRAME_RELATED_P (insn) = 1;
28131 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
28132 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
28133 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
28135 rtx_insn *insn = get_last_insn ();
28136 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
28138 add_reg_note (insn, REG_CFA_RESTORE, cr);
28139 RTX_FRAME_RELATED_P (insn) = 1;
28143 /* Like cr, the move to lr instruction can be scheduled after the
28144 stack deallocation, but unlike cr, its stack frame save is still
28145 valid. So we only need to emit the cfa_restore on the correct
28146 instruction. */
28148 static void
28149 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
28151 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
28152 rtx reg = gen_rtx_REG (Pmode, regno);
28154 emit_move_insn (reg, mem);
28157 static void
28158 restore_saved_lr (int regno, bool exit_func)
28160 rtx reg = gen_rtx_REG (Pmode, regno);
28161 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
28162 rtx_insn *insn = emit_move_insn (lr, reg);
28164 if (!exit_func && flag_shrink_wrap)
28166 add_reg_note (insn, REG_CFA_RESTORE, lr);
28167 RTX_FRAME_RELATED_P (insn) = 1;
28171 static rtx
28172 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
28174 if (DEFAULT_ABI == ABI_ELFv2)
28176 int i;
28177 for (i = 0; i < 8; i++)
28178 if (save_reg_p (CR0_REGNO + i))
28180 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
28181 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
28182 cfa_restores);
28185 else if (info->cr_save_p)
28186 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28187 gen_rtx_REG (SImode, CR2_REGNO),
28188 cfa_restores);
28190 if (info->lr_save_p)
28191 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28192 gen_rtx_REG (Pmode, LR_REGNO),
28193 cfa_restores);
28194 return cfa_restores;
28197 /* Return true if OFFSET from stack pointer can be clobbered by signals.
28198 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
28199 below stack pointer not cloberred by signals. */
28201 static inline bool
28202 offset_below_red_zone_p (HOST_WIDE_INT offset)
28204 return offset < (DEFAULT_ABI == ABI_V4
28206 : TARGET_32BIT ? -220 : -288);
28209 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
28211 static void
28212 emit_cfa_restores (rtx cfa_restores)
28214 rtx_insn *insn = get_last_insn ();
28215 rtx *loc = &REG_NOTES (insn);
28217 while (*loc)
28218 loc = &XEXP (*loc, 1);
28219 *loc = cfa_restores;
28220 RTX_FRAME_RELATED_P (insn) = 1;
28223 /* Emit function epilogue as insns. */
28225 void
28226 rs6000_emit_epilogue (int sibcall)
28228 rs6000_stack_t *info;
28229 int restoring_GPRs_inline;
28230 int restoring_FPRs_inline;
28231 int using_load_multiple;
28232 int using_mtcr_multiple;
28233 int use_backchain_to_restore_sp;
28234 int restore_lr;
28235 int strategy;
28236 HOST_WIDE_INT frame_off = 0;
28237 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
28238 rtx frame_reg_rtx = sp_reg_rtx;
28239 rtx cfa_restores = NULL_RTX;
28240 rtx insn;
28241 rtx cr_save_reg = NULL_RTX;
28242 machine_mode reg_mode = Pmode;
28243 int reg_size = TARGET_32BIT ? 4 : 8;
28244 machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
28245 ? DFmode : SFmode;
28246 int fp_reg_size = 8;
28247 int i;
28248 bool exit_func;
28249 unsigned ptr_regno;
28251 info = rs6000_stack_info ();
28253 strategy = info->savres_strategy;
28254 using_load_multiple = strategy & REST_MULTIPLE;
28255 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
28256 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
28257 using_mtcr_multiple = (rs6000_tune == PROCESSOR_PPC601
28258 || rs6000_tune == PROCESSOR_PPC603
28259 || rs6000_tune == PROCESSOR_PPC750
28260 || optimize_size);
28261 /* Restore via the backchain when we have a large frame, since this
28262 is more efficient than an addis, addi pair. The second condition
28263 here will not trigger at the moment; We don't actually need a
28264 frame pointer for alloca, but the generic parts of the compiler
28265 give us one anyway. */
28266 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
28267 ? info->lr_save_offset
28268 : 0) > 32767
28269 || (cfun->calls_alloca
28270 && !frame_pointer_needed));
28271 restore_lr = (info->lr_save_p
28272 && (restoring_FPRs_inline
28273 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
28274 && (restoring_GPRs_inline
28275 || info->first_fp_reg_save < 64)
28276 && !cfun->machine->lr_is_wrapped_separately);
28279 if (WORLD_SAVE_P (info))
28281 int i, j;
28282 char rname[30];
28283 const char *alloc_rname;
28284 rtvec p;
28286 /* eh_rest_world_r10 will return to the location saved in the LR
28287 stack slot (which is not likely to be our caller.)
28288 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
28289 rest_world is similar, except any R10 parameter is ignored.
28290 The exception-handling stuff that was here in 2.95 is no
28291 longer necessary. */
28293 p = rtvec_alloc (9
28294 + 32 - info->first_gp_reg_save
28295 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
28296 + 63 + 1 - info->first_fp_reg_save);
28298 strcpy (rname, ((crtl->calls_eh_return) ?
28299 "*eh_rest_world_r10" : "*rest_world"));
28300 alloc_rname = ggc_strdup (rname);
28302 j = 0;
28303 RTVEC_ELT (p, j++) = ret_rtx;
28304 RTVEC_ELT (p, j++)
28305 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
28306 /* The instruction pattern requires a clobber here;
28307 it is shared with the restVEC helper. */
28308 RTVEC_ELT (p, j++)
28309 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
28312 /* CR register traditionally saved as CR2. */
28313 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
28314 RTVEC_ELT (p, j++)
28315 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
28316 if (flag_shrink_wrap)
28318 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
28319 gen_rtx_REG (Pmode, LR_REGNO),
28320 cfa_restores);
28321 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28325 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28327 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
28328 RTVEC_ELT (p, j++)
28329 = gen_frame_load (reg,
28330 frame_reg_rtx, info->gp_save_offset + reg_size * i);
28331 if (flag_shrink_wrap
28332 && save_reg_p (info->first_gp_reg_save + i))
28333 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28335 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
28337 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
28338 RTVEC_ELT (p, j++)
28339 = gen_frame_load (reg,
28340 frame_reg_rtx, info->altivec_save_offset + 16 * i);
28341 if (flag_shrink_wrap
28342 && save_reg_p (info->first_altivec_reg_save + i))
28343 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28345 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
28347 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
28348 ? DFmode : SFmode),
28349 info->first_fp_reg_save + i);
28350 RTVEC_ELT (p, j++)
28351 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
28352 if (flag_shrink_wrap
28353 && save_reg_p (info->first_fp_reg_save + i))
28354 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28356 RTVEC_ELT (p, j++)
28357 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
28358 RTVEC_ELT (p, j++)
28359 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
28360 RTVEC_ELT (p, j++)
28361 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
28362 RTVEC_ELT (p, j++)
28363 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
28364 RTVEC_ELT (p, j++)
28365 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
28366 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
28368 if (flag_shrink_wrap)
28370 REG_NOTES (insn) = cfa_restores;
28371 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28372 RTX_FRAME_RELATED_P (insn) = 1;
28374 return;
28377 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
28378 if (info->push_p)
28379 frame_off = info->total_size;
28381 /* Restore AltiVec registers if we must do so before adjusting the
28382 stack. */
28383 if (info->altivec_size != 0
28384 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28385 || (DEFAULT_ABI != ABI_V4
28386 && offset_below_red_zone_p (info->altivec_save_offset))))
28388 int i;
28389 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28391 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
28392 if (use_backchain_to_restore_sp)
28394 int frame_regno = 11;
28396 if ((strategy & REST_INLINE_VRS) == 0)
28398 /* Of r11 and r12, select the one not clobbered by an
28399 out-of-line restore function for the frame register. */
28400 frame_regno = 11 + 12 - scratch_regno;
28402 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
28403 emit_move_insn (frame_reg_rtx,
28404 gen_rtx_MEM (Pmode, sp_reg_rtx));
28405 frame_off = 0;
28407 else if (frame_pointer_needed)
28408 frame_reg_rtx = hard_frame_pointer_rtx;
28410 if ((strategy & REST_INLINE_VRS) == 0)
28412 int end_save = info->altivec_save_offset + info->altivec_size;
28413 int ptr_off;
28414 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28415 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28417 if (end_save + frame_off != 0)
28419 rtx offset = GEN_INT (end_save + frame_off);
28421 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28423 else
28424 emit_move_insn (ptr_reg, frame_reg_rtx);
28426 ptr_off = -end_save;
28427 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28428 info->altivec_save_offset + ptr_off,
28429 0, V4SImode, SAVRES_VR);
28431 else
28433 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28434 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28436 rtx addr, areg, mem, insn;
28437 rtx reg = gen_rtx_REG (V4SImode, i);
28438 HOST_WIDE_INT offset
28439 = (info->altivec_save_offset + frame_off
28440 + 16 * (i - info->first_altivec_reg_save));
28442 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
28444 mem = gen_frame_mem (V4SImode,
28445 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28446 GEN_INT (offset)));
28447 insn = gen_rtx_SET (reg, mem);
28449 else
28451 areg = gen_rtx_REG (Pmode, 0);
28452 emit_move_insn (areg, GEN_INT (offset));
28454 /* AltiVec addressing mode is [reg+reg]. */
28455 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28456 mem = gen_frame_mem (V4SImode, addr);
28458 /* Rather than emitting a generic move, force use of the
28459 lvx instruction, which we always want. In particular we
28460 don't want lxvd2x/xxpermdi for little endian. */
28461 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28464 (void) emit_insn (insn);
28468 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28469 if (((strategy & REST_INLINE_VRS) == 0
28470 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28471 && (flag_shrink_wrap
28472 || (offset_below_red_zone_p
28473 (info->altivec_save_offset
28474 + 16 * (i - info->first_altivec_reg_save))))
28475 && save_reg_p (i))
28477 rtx reg = gen_rtx_REG (V4SImode, i);
28478 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28482 /* Restore VRSAVE if we must do so before adjusting the stack. */
28483 if (info->vrsave_size != 0
28484 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28485 || (DEFAULT_ABI != ABI_V4
28486 && offset_below_red_zone_p (info->vrsave_save_offset))))
28488 rtx reg;
28490 if (frame_reg_rtx == sp_reg_rtx)
28492 if (use_backchain_to_restore_sp)
28494 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28495 emit_move_insn (frame_reg_rtx,
28496 gen_rtx_MEM (Pmode, sp_reg_rtx));
28497 frame_off = 0;
28499 else if (frame_pointer_needed)
28500 frame_reg_rtx = hard_frame_pointer_rtx;
28503 reg = gen_rtx_REG (SImode, 12);
28504 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28505 info->vrsave_save_offset + frame_off));
28507 emit_insn (generate_set_vrsave (reg, info, 1));
28510 insn = NULL_RTX;
28511 /* If we have a large stack frame, restore the old stack pointer
28512 using the backchain. */
28513 if (use_backchain_to_restore_sp)
28515 if (frame_reg_rtx == sp_reg_rtx)
28517 /* Under V.4, don't reset the stack pointer until after we're done
28518 loading the saved registers. */
28519 if (DEFAULT_ABI == ABI_V4)
28520 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28522 insn = emit_move_insn (frame_reg_rtx,
28523 gen_rtx_MEM (Pmode, sp_reg_rtx));
28524 frame_off = 0;
28526 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28527 && DEFAULT_ABI == ABI_V4)
28528 /* frame_reg_rtx has been set up by the altivec restore. */
28530 else
28532 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
28533 frame_reg_rtx = sp_reg_rtx;
28536 /* If we have a frame pointer, we can restore the old stack pointer
28537 from it. */
28538 else if (frame_pointer_needed)
28540 frame_reg_rtx = sp_reg_rtx;
28541 if (DEFAULT_ABI == ABI_V4)
28542 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
28543 /* Prevent reordering memory accesses against stack pointer restore. */
28544 else if (cfun->calls_alloca
28545 || offset_below_red_zone_p (-info->total_size))
28546 rs6000_emit_stack_tie (frame_reg_rtx, true);
28548 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
28549 GEN_INT (info->total_size)));
28550 frame_off = 0;
28552 else if (info->push_p
28553 && DEFAULT_ABI != ABI_V4
28554 && !crtl->calls_eh_return)
28556 /* Prevent reordering memory accesses against stack pointer restore. */
28557 if (cfun->calls_alloca
28558 || offset_below_red_zone_p (-info->total_size))
28559 rs6000_emit_stack_tie (frame_reg_rtx, false);
28560 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
28561 GEN_INT (info->total_size)));
28562 frame_off = 0;
28564 if (insn && frame_reg_rtx == sp_reg_rtx)
28566 if (cfa_restores)
28568 REG_NOTES (insn) = cfa_restores;
28569 cfa_restores = NULL_RTX;
28571 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28572 RTX_FRAME_RELATED_P (insn) = 1;
28575 /* Restore AltiVec registers if we have not done so already. */
28576 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28577 && info->altivec_size != 0
28578 && (DEFAULT_ABI == ABI_V4
28579 || !offset_below_red_zone_p (info->altivec_save_offset)))
28581 int i;
28583 if ((strategy & REST_INLINE_VRS) == 0)
28585 int end_save = info->altivec_save_offset + info->altivec_size;
28586 int ptr_off;
28587 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
28588 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
28589 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
28591 if (end_save + frame_off != 0)
28593 rtx offset = GEN_INT (end_save + frame_off);
28595 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
28597 else
28598 emit_move_insn (ptr_reg, frame_reg_rtx);
28600 ptr_off = -end_save;
28601 insn = rs6000_emit_savres_rtx (info, scratch_reg,
28602 info->altivec_save_offset + ptr_off,
28603 0, V4SImode, SAVRES_VR);
28604 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
28606 /* Frame reg was clobbered by out-of-line save. Restore it
28607 from ptr_reg, and if we are calling out-of-line gpr or
28608 fpr restore set up the correct pointer and offset. */
28609 unsigned newptr_regno = 1;
28610 if (!restoring_GPRs_inline)
28612 bool lr = info->gp_save_offset + info->gp_size == 0;
28613 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28614 newptr_regno = ptr_regno_for_savres (sel);
28615 end_save = info->gp_save_offset + info->gp_size;
28617 else if (!restoring_FPRs_inline)
28619 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
28620 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28621 newptr_regno = ptr_regno_for_savres (sel);
28622 end_save = info->fp_save_offset + info->fp_size;
28625 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
28626 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
28628 if (end_save + ptr_off != 0)
28630 rtx offset = GEN_INT (end_save + ptr_off);
28632 frame_off = -end_save;
28633 if (TARGET_32BIT)
28634 emit_insn (gen_addsi3_carry (frame_reg_rtx,
28635 ptr_reg, offset));
28636 else
28637 emit_insn (gen_adddi3_carry (frame_reg_rtx,
28638 ptr_reg, offset));
28640 else
28642 frame_off = ptr_off;
28643 emit_move_insn (frame_reg_rtx, ptr_reg);
28647 else
28649 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28650 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
28652 rtx addr, areg, mem, insn;
28653 rtx reg = gen_rtx_REG (V4SImode, i);
28654 HOST_WIDE_INT offset
28655 = (info->altivec_save_offset + frame_off
28656 + 16 * (i - info->first_altivec_reg_save));
28658 if (TARGET_P9_VECTOR && quad_address_offset_p (offset))
28660 mem = gen_frame_mem (V4SImode,
28661 gen_rtx_PLUS (Pmode, frame_reg_rtx,
28662 GEN_INT (offset)));
28663 insn = gen_rtx_SET (reg, mem);
28665 else
28667 areg = gen_rtx_REG (Pmode, 0);
28668 emit_move_insn (areg, GEN_INT (offset));
28670 /* AltiVec addressing mode is [reg+reg]. */
28671 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
28672 mem = gen_frame_mem (V4SImode, addr);
28674 /* Rather than emitting a generic move, force use of the
28675 lvx instruction, which we always want. In particular we
28676 don't want lxvd2x/xxpermdi for little endian. */
28677 insn = gen_altivec_lvx_v4si_internal (reg, mem);
28680 (void) emit_insn (insn);
28684 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
28685 if (((strategy & REST_INLINE_VRS) == 0
28686 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
28687 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28688 && save_reg_p (i))
28690 rtx reg = gen_rtx_REG (V4SImode, i);
28691 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28695 /* Restore VRSAVE if we have not done so already. */
28696 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
28697 && info->vrsave_size != 0
28698 && (DEFAULT_ABI == ABI_V4
28699 || !offset_below_red_zone_p (info->vrsave_save_offset)))
28701 rtx reg;
28703 reg = gen_rtx_REG (SImode, 12);
28704 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28705 info->vrsave_save_offset + frame_off));
28707 emit_insn (generate_set_vrsave (reg, info, 1));
28710 /* If we exit by an out-of-line restore function on ABI_V4 then that
28711 function will deallocate the stack, so we don't need to worry
28712 about the unwinder restoring cr from an invalid stack frame
28713 location. */
28714 exit_func = (!restoring_FPRs_inline
28715 || (!restoring_GPRs_inline
28716 && info->first_fp_reg_save == 64));
28718 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
28719 *separate* slots if the routine calls __builtin_eh_return, so
28720 that they can be independently restored by the unwinder. */
28721 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
28723 int i, cr_off = info->ehcr_offset;
28725 for (i = 0; i < 8; i++)
28726 if (!call_used_regs[CR0_REGNO + i])
28728 rtx reg = gen_rtx_REG (SImode, 0);
28729 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28730 cr_off + frame_off));
28732 insn = emit_insn (gen_movsi_to_cr_one
28733 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
28735 if (!exit_func && flag_shrink_wrap)
28737 add_reg_note (insn, REG_CFA_RESTORE,
28738 gen_rtx_REG (SImode, CR0_REGNO + i));
28740 RTX_FRAME_RELATED_P (insn) = 1;
28743 cr_off += reg_size;
28747 /* Get the old lr if we saved it. If we are restoring registers
28748 out-of-line, then the out-of-line routines can do this for us. */
28749 if (restore_lr && restoring_GPRs_inline)
28750 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28752 /* Get the old cr if we saved it. */
28753 if (info->cr_save_p)
28755 unsigned cr_save_regno = 12;
28757 if (!restoring_GPRs_inline)
28759 /* Ensure we don't use the register used by the out-of-line
28760 gpr register restore below. */
28761 bool lr = info->gp_save_offset + info->gp_size == 0;
28762 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
28763 int gpr_ptr_regno = ptr_regno_for_savres (sel);
28765 if (gpr_ptr_regno == 12)
28766 cr_save_regno = 11;
28767 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
28769 else if (REGNO (frame_reg_rtx) == 12)
28770 cr_save_regno = 11;
28772 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
28773 info->cr_save_offset + frame_off,
28774 exit_func);
28777 /* Set LR here to try to overlap restores below. */
28778 if (restore_lr && restoring_GPRs_inline)
28779 restore_saved_lr (0, exit_func);
28781 /* Load exception handler data registers, if needed. */
28782 if (crtl->calls_eh_return)
28784 unsigned int i, regno;
28786 if (TARGET_AIX)
28788 rtx reg = gen_rtx_REG (reg_mode, 2);
28789 emit_insn (gen_frame_load (reg, frame_reg_rtx,
28790 frame_off + RS6000_TOC_SAVE_SLOT));
28793 for (i = 0; ; ++i)
28795 rtx mem;
28797 regno = EH_RETURN_DATA_REGNO (i);
28798 if (regno == INVALID_REGNUM)
28799 break;
28801 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
28802 info->ehrd_offset + frame_off
28803 + reg_size * (int) i);
28805 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
28809 /* Restore GPRs. This is done as a PARALLEL if we are using
28810 the load-multiple instructions. */
28811 if (!restoring_GPRs_inline)
28813 /* We are jumping to an out-of-line function. */
28814 rtx ptr_reg;
28815 int end_save = info->gp_save_offset + info->gp_size;
28816 bool can_use_exit = end_save == 0;
28817 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
28818 int ptr_off;
28820 /* Emit stack reset code if we need it. */
28821 ptr_regno = ptr_regno_for_savres (sel);
28822 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
28823 if (can_use_exit)
28824 rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28825 else if (end_save + frame_off != 0)
28826 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
28827 GEN_INT (end_save + frame_off)));
28828 else if (REGNO (frame_reg_rtx) != ptr_regno)
28829 emit_move_insn (ptr_reg, frame_reg_rtx);
28830 if (REGNO (frame_reg_rtx) == ptr_regno)
28831 frame_off = -end_save;
28833 if (can_use_exit && info->cr_save_p)
28834 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
28836 ptr_off = -end_save;
28837 rs6000_emit_savres_rtx (info, ptr_reg,
28838 info->gp_save_offset + ptr_off,
28839 info->lr_save_offset + ptr_off,
28840 reg_mode, sel);
28842 else if (using_load_multiple)
28844 rtvec p;
28845 p = rtvec_alloc (32 - info->first_gp_reg_save);
28846 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
28847 RTVEC_ELT (p, i)
28848 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
28849 frame_reg_rtx,
28850 info->gp_save_offset + frame_off + reg_size * i);
28851 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
28853 else
28855 int offset = info->gp_save_offset + frame_off;
28856 for (i = info->first_gp_reg_save; i < 32; i++)
28858 if (save_reg_p (i)
28859 && !cfun->machine->gpr_is_wrapped_separately[i])
28861 rtx reg = gen_rtx_REG (reg_mode, i);
28862 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28865 offset += reg_size;
28869 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28871 /* If the frame pointer was used then we can't delay emitting
28872 a REG_CFA_DEF_CFA note. This must happen on the insn that
28873 restores the frame pointer, r31. We may have already emitted
28874 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
28875 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
28876 be harmless if emitted. */
28877 if (frame_pointer_needed)
28879 insn = get_last_insn ();
28880 add_reg_note (insn, REG_CFA_DEF_CFA,
28881 plus_constant (Pmode, frame_reg_rtx, frame_off));
28882 RTX_FRAME_RELATED_P (insn) = 1;
28885 /* Set up cfa_restores. We always need these when
28886 shrink-wrapping. If not shrink-wrapping then we only need
28887 the cfa_restore when the stack location is no longer valid.
28888 The cfa_restores must be emitted on or before the insn that
28889 invalidates the stack, and of course must not be emitted
28890 before the insn that actually does the restore. The latter
28891 is why it is a bad idea to emit the cfa_restores as a group
28892 on the last instruction here that actually does a restore:
28893 That insn may be reordered with respect to others doing
28894 restores. */
28895 if (flag_shrink_wrap
28896 && !restoring_GPRs_inline
28897 && info->first_fp_reg_save == 64)
28898 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
28900 for (i = info->first_gp_reg_save; i < 32; i++)
28901 if (save_reg_p (i)
28902 && !cfun->machine->gpr_is_wrapped_separately[i])
28904 rtx reg = gen_rtx_REG (reg_mode, i);
28905 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
28909 if (!restoring_GPRs_inline
28910 && info->first_fp_reg_save == 64)
28912 /* We are jumping to an out-of-line function. */
28913 if (cfa_restores)
28914 emit_cfa_restores (cfa_restores);
28915 return;
28918 if (restore_lr && !restoring_GPRs_inline)
28920 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
28921 restore_saved_lr (0, exit_func);
28924 /* Restore fpr's if we need to do it without calling a function. */
28925 if (restoring_FPRs_inline)
28927 int offset = info->fp_save_offset + frame_off;
28928 for (i = info->first_fp_reg_save; i < 64; i++)
28930 if (save_reg_p (i)
28931 && !cfun->machine->fpr_is_wrapped_separately[i - 32])
28933 rtx reg = gen_rtx_REG (fp_reg_mode, i);
28934 emit_insn (gen_frame_load (reg, frame_reg_rtx, offset));
28935 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
28936 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
28937 cfa_restores);
28940 offset += fp_reg_size;
28944 /* If we saved cr, restore it here. Just those that were used. */
28945 if (info->cr_save_p)
28946 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
28948 /* If this is V.4, unwind the stack pointer after all of the loads
28949 have been done, or set up r11 if we are restoring fp out of line. */
28950 ptr_regno = 1;
28951 if (!restoring_FPRs_inline)
28953 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
28954 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
28955 ptr_regno = ptr_regno_for_savres (sel);
28958 insn = rs6000_emit_stack_reset (frame_reg_rtx, frame_off, ptr_regno);
28959 if (REGNO (frame_reg_rtx) == ptr_regno)
28960 frame_off = 0;
28962 if (insn && restoring_FPRs_inline)
28964 if (cfa_restores)
28966 REG_NOTES (insn) = cfa_restores;
28967 cfa_restores = NULL_RTX;
28969 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
28970 RTX_FRAME_RELATED_P (insn) = 1;
28973 if (crtl->calls_eh_return)
28975 rtx sa = EH_RETURN_STACKADJ_RTX;
28976 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
28979 if (!sibcall && restoring_FPRs_inline)
28981 if (cfa_restores)
28983 /* We can't hang the cfa_restores off a simple return,
28984 since the shrink-wrap code sometimes uses an existing
28985 return. This means there might be a path from
28986 pre-prologue code to this return, and dwarf2cfi code
28987 wants the eh_frame unwinder state to be the same on
28988 all paths to any point. So we need to emit the
28989 cfa_restores before the return. For -m64 we really
28990 don't need epilogue cfa_restores at all, except for
28991 this irritating dwarf2cfi with shrink-wrap
28992 requirement; The stack red-zone means eh_frame info
28993 from the prologue telling the unwinder to restore
28994 from the stack is perfectly good right to the end of
28995 the function. */
28996 emit_insn (gen_blockage ());
28997 emit_cfa_restores (cfa_restores);
28998 cfa_restores = NULL_RTX;
29001 emit_jump_insn (targetm.gen_simple_return ());
29004 if (!sibcall && !restoring_FPRs_inline)
29006 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
29007 rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save);
29008 int elt = 0;
29009 RTVEC_ELT (p, elt++) = ret_rtx;
29010 if (lr)
29011 RTVEC_ELT (p, elt++)
29012 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
29014 /* We have to restore more than two FP registers, so branch to the
29015 restore function. It will return to our caller. */
29016 int i;
29017 int reg;
29018 rtx sym;
29020 if (flag_shrink_wrap)
29021 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
29023 sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0));
29024 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym);
29025 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
29026 RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
29028 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
29030 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
29032 RTVEC_ELT (p, elt++)
29033 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
29034 if (flag_shrink_wrap
29035 && save_reg_p (info->first_fp_reg_save + i))
29036 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
29039 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
29042 if (cfa_restores)
29044 if (sibcall)
29045 /* Ensure the cfa_restores are hung off an insn that won't
29046 be reordered above other restores. */
29047 emit_insn (gen_blockage ());
29049 emit_cfa_restores (cfa_restores);
29053 /* Write function epilogue. */
29055 static void
29056 rs6000_output_function_epilogue (FILE *file)
29058 #if TARGET_MACHO
29059 macho_branch_islands ();
29062 rtx_insn *insn = get_last_insn ();
29063 rtx_insn *deleted_debug_label = NULL;
29065 /* Mach-O doesn't support labels at the end of objects, so if
29066 it looks like we might want one, take special action.
29068 First, collect any sequence of deleted debug labels. */
29069 while (insn
29070 && NOTE_P (insn)
29071 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
29073 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
29074 notes only, instead set their CODE_LABEL_NUMBER to -1,
29075 otherwise there would be code generation differences
29076 in between -g and -g0. */
29077 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
29078 deleted_debug_label = insn;
29079 insn = PREV_INSN (insn);
29082 /* Second, if we have:
29083 label:
29084 barrier
29085 then this needs to be detected, so skip past the barrier. */
29087 if (insn && BARRIER_P (insn))
29088 insn = PREV_INSN (insn);
29090 /* Up to now we've only seen notes or barriers. */
29091 if (insn)
29093 if (LABEL_P (insn)
29094 || (NOTE_P (insn)
29095 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
29096 /* Trailing label: <barrier>. */
29097 fputs ("\tnop\n", file);
29098 else
29100 /* Lastly, see if we have a completely empty function body. */
29101 while (insn && ! INSN_P (insn))
29102 insn = PREV_INSN (insn);
29103 /* If we don't find any insns, we've got an empty function body;
29104 I.e. completely empty - without a return or branch. This is
29105 taken as the case where a function body has been removed
29106 because it contains an inline __builtin_unreachable(). GCC
29107 states that reaching __builtin_unreachable() means UB so we're
29108 not obliged to do anything special; however, we want
29109 non-zero-sized function bodies. To meet this, and help the
29110 user out, let's trap the case. */
29111 if (insn == NULL)
29112 fputs ("\ttrap\n", file);
29115 else if (deleted_debug_label)
29116 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
29117 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
29118 CODE_LABEL_NUMBER (insn) = -1;
29120 #endif
29122 /* Output a traceback table here. See /usr/include/sys/debug.h for info
29123 on its format.
29125 We don't output a traceback table if -finhibit-size-directive was
29126 used. The documentation for -finhibit-size-directive reads
29127 ``don't output a @code{.size} assembler directive, or anything
29128 else that would cause trouble if the function is split in the
29129 middle, and the two halves are placed at locations far apart in
29130 memory.'' The traceback table has this property, since it
29131 includes the offset from the start of the function to the
29132 traceback table itself.
29134 System V.4 Powerpc's (and the embedded ABI derived from it) use a
29135 different traceback table. */
29136 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29137 && ! flag_inhibit_size_directive
29138 && rs6000_traceback != traceback_none && !cfun->is_thunk)
29140 const char *fname = NULL;
29141 const char *language_string = lang_hooks.name;
29142 int fixed_parms = 0, float_parms = 0, parm_info = 0;
29143 int i;
29144 int optional_tbtab;
29145 rs6000_stack_t *info = rs6000_stack_info ();
29147 if (rs6000_traceback == traceback_full)
29148 optional_tbtab = 1;
29149 else if (rs6000_traceback == traceback_part)
29150 optional_tbtab = 0;
29151 else
29152 optional_tbtab = !optimize_size && !TARGET_ELF;
29154 if (optional_tbtab)
29156 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
29157 while (*fname == '.') /* V.4 encodes . in the name */
29158 fname++;
29160 /* Need label immediately before tbtab, so we can compute
29161 its offset from the function start. */
29162 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29163 ASM_OUTPUT_LABEL (file, fname);
29166 /* The .tbtab pseudo-op can only be used for the first eight
29167 expressions, since it can't handle the possibly variable
29168 length fields that follow. However, if you omit the optional
29169 fields, the assembler outputs zeros for all optional fields
29170 anyways, giving each variable length field is minimum length
29171 (as defined in sys/debug.h). Thus we can not use the .tbtab
29172 pseudo-op at all. */
29174 /* An all-zero word flags the start of the tbtab, for debuggers
29175 that have to find it by searching forward from the entry
29176 point or from the current pc. */
29177 fputs ("\t.long 0\n", file);
29179 /* Tbtab format type. Use format type 0. */
29180 fputs ("\t.byte 0,", file);
29182 /* Language type. Unfortunately, there does not seem to be any
29183 official way to discover the language being compiled, so we
29184 use language_string.
29185 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
29186 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
29187 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
29188 either, so for now use 0. */
29189 if (lang_GNU_C ()
29190 || ! strcmp (language_string, "GNU GIMPLE")
29191 || ! strcmp (language_string, "GNU Go")
29192 || ! strcmp (language_string, "libgccjit"))
29193 i = 0;
29194 else if (! strcmp (language_string, "GNU F77")
29195 || lang_GNU_Fortran ())
29196 i = 1;
29197 else if (! strcmp (language_string, "GNU Pascal"))
29198 i = 2;
29199 else if (! strcmp (language_string, "GNU Ada"))
29200 i = 3;
29201 else if (lang_GNU_CXX ()
29202 || ! strcmp (language_string, "GNU Objective-C++"))
29203 i = 9;
29204 else if (! strcmp (language_string, "GNU Java"))
29205 i = 13;
29206 else if (! strcmp (language_string, "GNU Objective-C"))
29207 i = 14;
29208 else
29209 gcc_unreachable ();
29210 fprintf (file, "%d,", i);
29212 /* 8 single bit fields: global linkage (not set for C extern linkage,
29213 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
29214 from start of procedure stored in tbtab, internal function, function
29215 has controlled storage, function has no toc, function uses fp,
29216 function logs/aborts fp operations. */
29217 /* Assume that fp operations are used if any fp reg must be saved. */
29218 fprintf (file, "%d,",
29219 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
29221 /* 6 bitfields: function is interrupt handler, name present in
29222 proc table, function calls alloca, on condition directives
29223 (controls stack walks, 3 bits), saves condition reg, saves
29224 link reg. */
29225 /* The `function calls alloca' bit seems to be set whenever reg 31 is
29226 set up as a frame pointer, even when there is no alloca call. */
29227 fprintf (file, "%d,",
29228 ((optional_tbtab << 6)
29229 | ((optional_tbtab & frame_pointer_needed) << 5)
29230 | (info->cr_save_p << 1)
29231 | (info->lr_save_p)));
29233 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
29234 (6 bits). */
29235 fprintf (file, "%d,",
29236 (info->push_p << 7) | (64 - info->first_fp_reg_save));
29238 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
29239 fprintf (file, "%d,", (32 - first_reg_to_save ()));
29241 if (optional_tbtab)
29243 /* Compute the parameter info from the function decl argument
29244 list. */
29245 tree decl;
29246 int next_parm_info_bit = 31;
29248 for (decl = DECL_ARGUMENTS (current_function_decl);
29249 decl; decl = DECL_CHAIN (decl))
29251 rtx parameter = DECL_INCOMING_RTL (decl);
29252 machine_mode mode = GET_MODE (parameter);
29254 if (GET_CODE (parameter) == REG)
29256 if (SCALAR_FLOAT_MODE_P (mode))
29258 int bits;
29260 float_parms++;
29262 switch (mode)
29264 case E_SFmode:
29265 case E_SDmode:
29266 bits = 0x2;
29267 break;
29269 case E_DFmode:
29270 case E_DDmode:
29271 case E_TFmode:
29272 case E_TDmode:
29273 case E_IFmode:
29274 case E_KFmode:
29275 bits = 0x3;
29276 break;
29278 default:
29279 gcc_unreachable ();
29282 /* If only one bit will fit, don't or in this entry. */
29283 if (next_parm_info_bit > 0)
29284 parm_info |= (bits << (next_parm_info_bit - 1));
29285 next_parm_info_bit -= 2;
29287 else
29289 fixed_parms += ((GET_MODE_SIZE (mode)
29290 + (UNITS_PER_WORD - 1))
29291 / UNITS_PER_WORD);
29292 next_parm_info_bit -= 1;
29298 /* Number of fixed point parameters. */
29299 /* This is actually the number of words of fixed point parameters; thus
29300 an 8 byte struct counts as 2; and thus the maximum value is 8. */
29301 fprintf (file, "%d,", fixed_parms);
29303 /* 2 bitfields: number of floating point parameters (7 bits), parameters
29304 all on stack. */
29305 /* This is actually the number of fp registers that hold parameters;
29306 and thus the maximum value is 13. */
29307 /* Set parameters on stack bit if parameters are not in their original
29308 registers, regardless of whether they are on the stack? Xlc
29309 seems to set the bit when not optimizing. */
29310 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
29312 if (optional_tbtab)
29314 /* Optional fields follow. Some are variable length. */
29316 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single
29317 float, 11 double float. */
29318 /* There is an entry for each parameter in a register, in the order
29319 that they occur in the parameter list. Any intervening arguments
29320 on the stack are ignored. If the list overflows a long (max
29321 possible length 34 bits) then completely leave off all elements
29322 that don't fit. */
29323 /* Only emit this long if there was at least one parameter. */
29324 if (fixed_parms || float_parms)
29325 fprintf (file, "\t.long %d\n", parm_info);
29327 /* Offset from start of code to tb table. */
29328 fputs ("\t.long ", file);
29329 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
29330 RS6000_OUTPUT_BASENAME (file, fname);
29331 putc ('-', file);
29332 rs6000_output_function_entry (file, fname);
29333 putc ('\n', file);
29335 /* Interrupt handler mask. */
29336 /* Omit this long, since we never set the interrupt handler bit
29337 above. */
29339 /* Number of CTL (controlled storage) anchors. */
29340 /* Omit this long, since the has_ctl bit is never set above. */
29342 /* Displacement into stack of each CTL anchor. */
29343 /* Omit this list of longs, because there are no CTL anchors. */
29345 /* Length of function name. */
29346 if (*fname == '*')
29347 ++fname;
29348 fprintf (file, "\t.short %d\n", (int) strlen (fname));
29350 /* Function name. */
29351 assemble_string (fname, strlen (fname));
29353 /* Register for alloca automatic storage; this is always reg 31.
29354 Only emit this if the alloca bit was set above. */
29355 if (frame_pointer_needed)
29356 fputs ("\t.byte 31\n", file);
29358 fputs ("\t.align 2\n", file);
29362 /* Arrange to define .LCTOC1 label, if not already done. */
29363 if (need_toc_init)
29365 need_toc_init = 0;
29366 if (!toc_initialized)
29368 switch_to_section (toc_section);
29369 switch_to_section (current_function_section ());
29374 /* -fsplit-stack support. */
29376 /* A SYMBOL_REF for __morestack. */
29377 static GTY(()) rtx morestack_ref;
29379 static rtx
29380 gen_add3_const (rtx rt, rtx ra, long c)
29382 if (TARGET_64BIT)
29383 return gen_adddi3 (rt, ra, GEN_INT (c));
29384 else
29385 return gen_addsi3 (rt, ra, GEN_INT (c));
29388 /* Emit -fsplit-stack prologue, which goes before the regular function
29389 prologue (at local entry point in the case of ELFv2). */
29391 void
29392 rs6000_expand_split_stack_prologue (void)
29394 rs6000_stack_t *info = rs6000_stack_info ();
29395 unsigned HOST_WIDE_INT allocate;
29396 long alloc_hi, alloc_lo;
29397 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
29398 rtx_insn *insn;
29400 gcc_assert (flag_split_stack && reload_completed);
29402 if (!info->push_p)
29403 return;
29405 if (global_regs[29])
29407 error ("%qs uses register r29", "-fsplit-stack");
29408 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
29409 "conflicts with %qD", global_regs_decl[29]);
29412 allocate = info->total_size;
29413 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
29415 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
29416 return;
29418 if (morestack_ref == NULL_RTX)
29420 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
29421 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
29422 | SYMBOL_FLAG_FUNCTION);
29425 r0 = gen_rtx_REG (Pmode, 0);
29426 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29427 r12 = gen_rtx_REG (Pmode, 12);
29428 emit_insn (gen_load_split_stack_limit (r0));
29429 /* Always emit two insns here to calculate the requested stack,
29430 so that the linker can edit them when adjusting size for calling
29431 non-split-stack code. */
29432 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
29433 alloc_lo = -allocate - alloc_hi;
29434 if (alloc_hi != 0)
29436 emit_insn (gen_add3_const (r12, r1, alloc_hi));
29437 if (alloc_lo != 0)
29438 emit_insn (gen_add3_const (r12, r12, alloc_lo));
29439 else
29440 emit_insn (gen_nop ());
29442 else
29444 emit_insn (gen_add3_const (r12, r1, alloc_lo));
29445 emit_insn (gen_nop ());
29448 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
29449 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
29450 ok_label = gen_label_rtx ();
29451 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29452 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
29453 gen_rtx_LABEL_REF (VOIDmode, ok_label),
29454 pc_rtx);
29455 insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29456 JUMP_LABEL (insn) = ok_label;
29457 /* Mark the jump as very likely to be taken. */
29458 add_reg_br_prob_note (insn, profile_probability::very_likely ());
29460 lr = gen_rtx_REG (Pmode, LR_REGNO);
29461 insn = emit_move_insn (r0, lr);
29462 RTX_FRAME_RELATED_P (insn) = 1;
29463 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
29464 RTX_FRAME_RELATED_P (insn) = 1;
29466 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
29467 const0_rtx, const0_rtx));
29468 call_fusage = NULL_RTX;
29469 use_reg (&call_fusage, r12);
29470 /* Say the call uses r0, even though it doesn't, to stop regrename
29471 from twiddling with the insns saving lr, trashing args for cfun.
29472 The insns restoring lr are similarly protected by making
29473 split_stack_return use r0. */
29474 use_reg (&call_fusage, r0);
29475 add_function_usage_to (insn, call_fusage);
29476 /* Indicate that this function can't jump to non-local gotos. */
29477 make_reg_eh_region_note_nothrow_nononlocal (insn);
29478 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
29479 insn = emit_move_insn (lr, r0);
29480 add_reg_note (insn, REG_CFA_RESTORE, lr);
29481 RTX_FRAME_RELATED_P (insn) = 1;
29482 emit_insn (gen_split_stack_return ());
29484 emit_label (ok_label);
29485 LABEL_NUSES (ok_label) = 1;
29488 /* Return the internal arg pointer used for function incoming
29489 arguments. When -fsplit-stack, the arg pointer is r12 so we need
29490 to copy it to a pseudo in order for it to be preserved over calls
29491 and suchlike. We'd really like to use a pseudo here for the
29492 internal arg pointer but data-flow analysis is not prepared to
29493 accept pseudos as live at the beginning of a function. */
29495 static rtx
29496 rs6000_internal_arg_pointer (void)
29498 if (flag_split_stack
29499 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
29500 == NULL))
29503 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
29505 rtx pat;
29507 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
29508 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
29510 /* Put the pseudo initialization right after the note at the
29511 beginning of the function. */
29512 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
29513 gen_rtx_REG (Pmode, 12));
29514 push_topmost_sequence ();
29515 emit_insn_after (pat, get_insns ());
29516 pop_topmost_sequence ();
29518 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
29519 FIRST_PARM_OFFSET (current_function_decl));
29521 return virtual_incoming_args_rtx;
29524 /* We may have to tell the dataflow pass that the split stack prologue
29525 is initializing a register. */
29527 static void
29528 rs6000_live_on_entry (bitmap regs)
29530 if (flag_split_stack)
29531 bitmap_set_bit (regs, 12);
29534 /* Emit -fsplit-stack dynamic stack allocation space check. */
29536 void
29537 rs6000_split_stack_space_check (rtx size, rtx label)
29539 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
29540 rtx limit = gen_reg_rtx (Pmode);
29541 rtx requested = gen_reg_rtx (Pmode);
29542 rtx cmp = gen_reg_rtx (CCUNSmode);
29543 rtx jump;
29545 emit_insn (gen_load_split_stack_limit (limit));
29546 if (CONST_INT_P (size))
29547 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
29548 else
29550 size = force_reg (Pmode, size);
29551 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
29553 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
29554 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
29555 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
29556 gen_rtx_LABEL_REF (VOIDmode, label),
29557 pc_rtx);
29558 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
29559 JUMP_LABEL (jump) = label;
29562 /* A C compound statement that outputs the assembler code for a thunk
29563 function, used to implement C++ virtual function calls with
29564 multiple inheritance. The thunk acts as a wrapper around a virtual
29565 function, adjusting the implicit object parameter before handing
29566 control off to the real function.
29568 First, emit code to add the integer DELTA to the location that
29569 contains the incoming first argument. Assume that this argument
29570 contains a pointer, and is the one used to pass the `this' pointer
29571 in C++. This is the incoming argument *before* the function
29572 prologue, e.g. `%o0' on a sparc. The addition must preserve the
29573 values of all other incoming arguments.
29575 After the addition, emit code to jump to FUNCTION, which is a
29576 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
29577 not touch the return address. Hence returning from FUNCTION will
29578 return to whoever called the current `thunk'.
29580 The effect must be as if FUNCTION had been called directly with the
29581 adjusted first argument. This macro is responsible for emitting
29582 all of the code for a thunk function; output_function_prologue()
29583 and output_function_epilogue() are not invoked.
29585 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
29586 been extracted from it.) It might possibly be useful on some
29587 targets, but probably not.
29589 If you do not define this macro, the target-independent code in the
29590 C++ frontend will generate a less efficient heavyweight thunk that
29591 calls FUNCTION instead of jumping to it. The generic approach does
29592 not support varargs. */
29594 static void
29595 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
29596 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
29597 tree function)
29599 rtx this_rtx, funexp;
29600 rtx_insn *insn;
29602 reload_completed = 1;
29603 epilogue_completed = 1;
29605 /* Mark the end of the (empty) prologue. */
29606 emit_note (NOTE_INSN_PROLOGUE_END);
29608 /* Find the "this" pointer. If the function returns a structure,
29609 the structure return pointer is in r3. */
29610 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
29611 this_rtx = gen_rtx_REG (Pmode, 4);
29612 else
29613 this_rtx = gen_rtx_REG (Pmode, 3);
29615 /* Apply the constant offset, if required. */
29616 if (delta)
29617 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
29619 /* Apply the offset from the vtable, if required. */
29620 if (vcall_offset)
29622 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
29623 rtx tmp = gen_rtx_REG (Pmode, 12);
29625 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
29626 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
29628 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
29629 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
29631 else
29633 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
29635 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
29637 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
29640 /* Generate a tail call to the target function. */
29641 if (!TREE_USED (function))
29643 assemble_external (function);
29644 TREE_USED (function) = 1;
29646 funexp = XEXP (DECL_RTL (function), 0);
29647 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
29649 #if TARGET_MACHO
29650 if (MACHOPIC_INDIRECT)
29651 funexp = machopic_indirect_call_target (funexp);
29652 #endif
29654 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
29655 generate sibcall RTL explicitly. */
29656 insn = emit_call_insn (
29657 gen_rtx_PARALLEL (VOIDmode,
29658 gen_rtvec (3,
29659 gen_rtx_CALL (VOIDmode,
29660 funexp, const0_rtx),
29661 gen_rtx_USE (VOIDmode, const0_rtx),
29662 simple_return_rtx)));
29663 SIBLING_CALL_P (insn) = 1;
29664 emit_barrier ();
29666 /* Run just enough of rest_of_compilation to get the insns emitted.
29667 There's not really enough bulk here to make other passes such as
29668 instruction scheduling worth while. Note that use_thunk calls
29669 assemble_start_function and assemble_end_function. */
29670 insn = get_insns ();
29671 shorten_branches (insn);
29672 final_start_function (insn, file, 1);
29673 final (insn, file, 1);
29674 final_end_function ();
29676 reload_completed = 0;
29677 epilogue_completed = 0;
29680 /* A quick summary of the various types of 'constant-pool tables'
29681 under PowerPC:
29683 Target Flags Name One table per
29684 AIX (none) AIX TOC object file
29685 AIX -mfull-toc AIX TOC object file
29686 AIX -mminimal-toc AIX minimal TOC translation unit
29687 SVR4/EABI (none) SVR4 SDATA object file
29688 SVR4/EABI -fpic SVR4 pic object file
29689 SVR4/EABI -fPIC SVR4 PIC translation unit
29690 SVR4/EABI -mrelocatable EABI TOC function
29691 SVR4/EABI -maix AIX TOC object file
29692 SVR4/EABI -maix -mminimal-toc
29693 AIX minimal TOC translation unit
29695 Name Reg. Set by entries contains:
29696 made by addrs? fp? sum?
29698 AIX TOC 2 crt0 as Y option option
29699 AIX minimal TOC 30 prolog gcc Y Y option
29700 SVR4 SDATA 13 crt0 gcc N Y N
29701 SVR4 pic 30 prolog ld Y not yet N
29702 SVR4 PIC 30 prolog gcc Y option option
29703 EABI TOC 30 prolog gcc Y option option
29707 /* Hash functions for the hash table. */
29709 static unsigned
29710 rs6000_hash_constant (rtx k)
29712 enum rtx_code code = GET_CODE (k);
29713 machine_mode mode = GET_MODE (k);
29714 unsigned result = (code << 3) ^ mode;
29715 const char *format;
29716 int flen, fidx;
29718 format = GET_RTX_FORMAT (code);
29719 flen = strlen (format);
29720 fidx = 0;
29722 switch (code)
29724 case LABEL_REF:
29725 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
29727 case CONST_WIDE_INT:
29729 int i;
29730 flen = CONST_WIDE_INT_NUNITS (k);
29731 for (i = 0; i < flen; i++)
29732 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
29733 return result;
29736 case CONST_DOUBLE:
29737 if (mode != VOIDmode)
29738 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
29739 flen = 2;
29740 break;
29742 case CODE_LABEL:
29743 fidx = 3;
29744 break;
29746 default:
29747 break;
29750 for (; fidx < flen; fidx++)
29751 switch (format[fidx])
29753 case 's':
29755 unsigned i, len;
29756 const char *str = XSTR (k, fidx);
29757 len = strlen (str);
29758 result = result * 613 + len;
29759 for (i = 0; i < len; i++)
29760 result = result * 613 + (unsigned) str[i];
29761 break;
29763 case 'u':
29764 case 'e':
29765 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
29766 break;
29767 case 'i':
29768 case 'n':
29769 result = result * 613 + (unsigned) XINT (k, fidx);
29770 break;
29771 case 'w':
29772 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
29773 result = result * 613 + (unsigned) XWINT (k, fidx);
29774 else
29776 size_t i;
29777 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
29778 result = result * 613 + (unsigned) (XWINT (k, fidx)
29779 >> CHAR_BIT * i);
29781 break;
29782 case '0':
29783 break;
29784 default:
29785 gcc_unreachable ();
29788 return result;
29791 hashval_t
29792 toc_hasher::hash (toc_hash_struct *thc)
29794 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
29797 /* Compare H1 and H2 for equivalence. */
29799 bool
29800 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
29802 rtx r1 = h1->key;
29803 rtx r2 = h2->key;
29805 if (h1->key_mode != h2->key_mode)
29806 return 0;
29808 return rtx_equal_p (r1, r2);
29811 /* These are the names given by the C++ front-end to vtables, and
29812 vtable-like objects. Ideally, this logic should not be here;
29813 instead, there should be some programmatic way of inquiring as
29814 to whether or not an object is a vtable. */
29816 #define VTABLE_NAME_P(NAME) \
29817 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
29818 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
29819 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
29820 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
29821 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
29823 #ifdef NO_DOLLAR_IN_LABEL
29824 /* Return a GGC-allocated character string translating dollar signs in
29825 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
29827 const char *
29828 rs6000_xcoff_strip_dollar (const char *name)
29830 char *strip, *p;
29831 const char *q;
29832 size_t len;
29834 q = (const char *) strchr (name, '$');
29836 if (q == 0 || q == name)
29837 return name;
29839 len = strlen (name);
29840 strip = XALLOCAVEC (char, len + 1);
29841 strcpy (strip, name);
29842 p = strip + (q - name);
29843 while (p)
29845 *p = '_';
29846 p = strchr (p + 1, '$');
29849 return ggc_alloc_string (strip, len);
29851 #endif
29853 void
29854 rs6000_output_symbol_ref (FILE *file, rtx x)
29856 const char *name = XSTR (x, 0);
29858 /* Currently C++ toc references to vtables can be emitted before it
29859 is decided whether the vtable is public or private. If this is
29860 the case, then the linker will eventually complain that there is
29861 a reference to an unknown section. Thus, for vtables only,
29862 we emit the TOC reference to reference the identifier and not the
29863 symbol. */
29864 if (VTABLE_NAME_P (name))
29866 RS6000_OUTPUT_BASENAME (file, name);
29868 else
29869 assemble_name (file, name);
29872 /* Output a TOC entry. We derive the entry name from what is being
29873 written. */
29875 void
29876 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
29878 char buf[256];
29879 const char *name = buf;
29880 rtx base = x;
29881 HOST_WIDE_INT offset = 0;
29883 gcc_assert (!TARGET_NO_TOC);
29885 /* When the linker won't eliminate them, don't output duplicate
29886 TOC entries (this happens on AIX if there is any kind of TOC,
29887 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
29888 CODE_LABELs. */
29889 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
29891 struct toc_hash_struct *h;
29893 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
29894 time because GGC is not initialized at that point. */
29895 if (toc_hash_table == NULL)
29896 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
29898 h = ggc_alloc<toc_hash_struct> ();
29899 h->key = x;
29900 h->key_mode = mode;
29901 h->labelno = labelno;
29903 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
29904 if (*found == NULL)
29905 *found = h;
29906 else /* This is indeed a duplicate.
29907 Set this label equal to that label. */
29909 fputs ("\t.set ", file);
29910 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29911 fprintf (file, "%d,", labelno);
29912 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
29913 fprintf (file, "%d\n", ((*found)->labelno));
29915 #ifdef HAVE_AS_TLS
29916 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
29917 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
29918 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
29920 fputs ("\t.set ", file);
29921 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29922 fprintf (file, "%d,", labelno);
29923 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
29924 fprintf (file, "%d\n", ((*found)->labelno));
29926 #endif
29927 return;
29931 /* If we're going to put a double constant in the TOC, make sure it's
29932 aligned properly when strict alignment is on. */
29933 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
29934 && STRICT_ALIGNMENT
29935 && GET_MODE_BITSIZE (mode) >= 64
29936 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
29937 ASM_OUTPUT_ALIGN (file, 3);
29940 (*targetm.asm_out.internal_label) (file, "LC", labelno);
29942 /* Handle FP constants specially. Note that if we have a minimal
29943 TOC, things we put here aren't actually in the TOC, so we can allow
29944 FP constants. */
29945 if (GET_CODE (x) == CONST_DOUBLE &&
29946 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
29947 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
29949 long k[4];
29951 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29952 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
29953 else
29954 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29956 if (TARGET_64BIT)
29958 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29959 fputs (DOUBLE_INT_ASM_OP, file);
29960 else
29961 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29962 k[0] & 0xffffffff, k[1] & 0xffffffff,
29963 k[2] & 0xffffffff, k[3] & 0xffffffff);
29964 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
29965 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
29966 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
29967 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
29968 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
29969 return;
29971 else
29973 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29974 fputs ("\t.long ", file);
29975 else
29976 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
29977 k[0] & 0xffffffff, k[1] & 0xffffffff,
29978 k[2] & 0xffffffff, k[3] & 0xffffffff);
29979 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
29980 k[0] & 0xffffffff, k[1] & 0xffffffff,
29981 k[2] & 0xffffffff, k[3] & 0xffffffff);
29982 return;
29985 else if (GET_CODE (x) == CONST_DOUBLE &&
29986 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
29988 long k[2];
29990 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
29991 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
29992 else
29993 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
29995 if (TARGET_64BIT)
29997 if (TARGET_ELF || TARGET_MINIMAL_TOC)
29998 fputs (DOUBLE_INT_ASM_OP, file);
29999 else
30000 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
30001 k[0] & 0xffffffff, k[1] & 0xffffffff);
30002 fprintf (file, "0x%lx%08lx\n",
30003 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
30004 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
30005 return;
30007 else
30009 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30010 fputs ("\t.long ", file);
30011 else
30012 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
30013 k[0] & 0xffffffff, k[1] & 0xffffffff);
30014 fprintf (file, "0x%lx,0x%lx\n",
30015 k[0] & 0xffffffff, k[1] & 0xffffffff);
30016 return;
30019 else if (GET_CODE (x) == CONST_DOUBLE &&
30020 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
30022 long l;
30024 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
30025 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
30026 else
30027 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
30029 if (TARGET_64BIT)
30031 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30032 fputs (DOUBLE_INT_ASM_OP, file);
30033 else
30034 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
30035 if (WORDS_BIG_ENDIAN)
30036 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
30037 else
30038 fprintf (file, "0x%lx\n", l & 0xffffffff);
30039 return;
30041 else
30043 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30044 fputs ("\t.long ", file);
30045 else
30046 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
30047 fprintf (file, "0x%lx\n", l & 0xffffffff);
30048 return;
30051 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
30053 unsigned HOST_WIDE_INT low;
30054 HOST_WIDE_INT high;
30056 low = INTVAL (x) & 0xffffffff;
30057 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
30059 /* TOC entries are always Pmode-sized, so when big-endian
30060 smaller integer constants in the TOC need to be padded.
30061 (This is still a win over putting the constants in
30062 a separate constant pool, because then we'd have
30063 to have both a TOC entry _and_ the actual constant.)
30065 For a 32-bit target, CONST_INT values are loaded and shifted
30066 entirely within `low' and can be stored in one TOC entry. */
30068 /* It would be easy to make this work, but it doesn't now. */
30069 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
30071 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
30073 low |= high << 32;
30074 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
30075 high = (HOST_WIDE_INT) low >> 32;
30076 low &= 0xffffffff;
30079 if (TARGET_64BIT)
30081 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30082 fputs (DOUBLE_INT_ASM_OP, file);
30083 else
30084 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
30085 (long) high & 0xffffffff, (long) low & 0xffffffff);
30086 fprintf (file, "0x%lx%08lx\n",
30087 (long) high & 0xffffffff, (long) low & 0xffffffff);
30088 return;
30090 else
30092 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
30094 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30095 fputs ("\t.long ", file);
30096 else
30097 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
30098 (long) high & 0xffffffff, (long) low & 0xffffffff);
30099 fprintf (file, "0x%lx,0x%lx\n",
30100 (long) high & 0xffffffff, (long) low & 0xffffffff);
30102 else
30104 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30105 fputs ("\t.long ", file);
30106 else
30107 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
30108 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
30110 return;
30114 if (GET_CODE (x) == CONST)
30116 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
30117 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
30119 base = XEXP (XEXP (x, 0), 0);
30120 offset = INTVAL (XEXP (XEXP (x, 0), 1));
30123 switch (GET_CODE (base))
30125 case SYMBOL_REF:
30126 name = XSTR (base, 0);
30127 break;
30129 case LABEL_REF:
30130 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
30131 CODE_LABEL_NUMBER (XEXP (base, 0)));
30132 break;
30134 case CODE_LABEL:
30135 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
30136 break;
30138 default:
30139 gcc_unreachable ();
30142 if (TARGET_ELF || TARGET_MINIMAL_TOC)
30143 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
30144 else
30146 fputs ("\t.tc ", file);
30147 RS6000_OUTPUT_BASENAME (file, name);
30149 if (offset < 0)
30150 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
30151 else if (offset)
30152 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
30154 /* Mark large TOC symbols on AIX with [TE] so they are mapped
30155 after other TOC symbols, reducing overflow of small TOC access
30156 to [TC] symbols. */
30157 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
30158 ? "[TE]," : "[TC],", file);
30161 /* Currently C++ toc references to vtables can be emitted before it
30162 is decided whether the vtable is public or private. If this is
30163 the case, then the linker will eventually complain that there is
30164 a TOC reference to an unknown section. Thus, for vtables only,
30165 we emit the TOC reference to reference the symbol and not the
30166 section. */
30167 if (VTABLE_NAME_P (name))
30169 RS6000_OUTPUT_BASENAME (file, name);
30170 if (offset < 0)
30171 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
30172 else if (offset > 0)
30173 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
30175 else
30176 output_addr_const (file, x);
30178 #if HAVE_AS_TLS
30179 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF)
30181 switch (SYMBOL_REF_TLS_MODEL (base))
30183 case 0:
30184 break;
30185 case TLS_MODEL_LOCAL_EXEC:
30186 fputs ("@le", file);
30187 break;
30188 case TLS_MODEL_INITIAL_EXEC:
30189 fputs ("@ie", file);
30190 break;
30191 /* Use global-dynamic for local-dynamic. */
30192 case TLS_MODEL_GLOBAL_DYNAMIC:
30193 case TLS_MODEL_LOCAL_DYNAMIC:
30194 putc ('\n', file);
30195 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
30196 fputs ("\t.tc .", file);
30197 RS6000_OUTPUT_BASENAME (file, name);
30198 fputs ("[TC],", file);
30199 output_addr_const (file, x);
30200 fputs ("@m", file);
30201 break;
30202 default:
30203 gcc_unreachable ();
30206 #endif
30208 putc ('\n', file);
30211 /* Output an assembler pseudo-op to write an ASCII string of N characters
30212 starting at P to FILE.
30214 On the RS/6000, we have to do this using the .byte operation and
30215 write out special characters outside the quoted string.
30216 Also, the assembler is broken; very long strings are truncated,
30217 so we must artificially break them up early. */
30219 void
30220 output_ascii (FILE *file, const char *p, int n)
30222 char c;
30223 int i, count_string;
30224 const char *for_string = "\t.byte \"";
30225 const char *for_decimal = "\t.byte ";
30226 const char *to_close = NULL;
30228 count_string = 0;
30229 for (i = 0; i < n; i++)
30231 c = *p++;
30232 if (c >= ' ' && c < 0177)
30234 if (for_string)
30235 fputs (for_string, file);
30236 putc (c, file);
30238 /* Write two quotes to get one. */
30239 if (c == '"')
30241 putc (c, file);
30242 ++count_string;
30245 for_string = NULL;
30246 for_decimal = "\"\n\t.byte ";
30247 to_close = "\"\n";
30248 ++count_string;
30250 if (count_string >= 512)
30252 fputs (to_close, file);
30254 for_string = "\t.byte \"";
30255 for_decimal = "\t.byte ";
30256 to_close = NULL;
30257 count_string = 0;
30260 else
30262 if (for_decimal)
30263 fputs (for_decimal, file);
30264 fprintf (file, "%d", c);
30266 for_string = "\n\t.byte \"";
30267 for_decimal = ", ";
30268 to_close = "\n";
30269 count_string = 0;
30273 /* Now close the string if we have written one. Then end the line. */
30274 if (to_close)
30275 fputs (to_close, file);
30278 /* Generate a unique section name for FILENAME for a section type
30279 represented by SECTION_DESC. Output goes into BUF.
30281 SECTION_DESC can be any string, as long as it is different for each
30282 possible section type.
30284 We name the section in the same manner as xlc. The name begins with an
30285 underscore followed by the filename (after stripping any leading directory
30286 names) with the last period replaced by the string SECTION_DESC. If
30287 FILENAME does not contain a period, SECTION_DESC is appended to the end of
30288 the name. */
30290 void
30291 rs6000_gen_section_name (char **buf, const char *filename,
30292 const char *section_desc)
30294 const char *q, *after_last_slash, *last_period = 0;
30295 char *p;
30296 int len;
30298 after_last_slash = filename;
30299 for (q = filename; *q; q++)
30301 if (*q == '/')
30302 after_last_slash = q + 1;
30303 else if (*q == '.')
30304 last_period = q;
30307 len = strlen (after_last_slash) + strlen (section_desc) + 2;
30308 *buf = (char *) xmalloc (len);
30310 p = *buf;
30311 *p++ = '_';
30313 for (q = after_last_slash; *q; q++)
30315 if (q == last_period)
30317 strcpy (p, section_desc);
30318 p += strlen (section_desc);
30319 break;
30322 else if (ISALNUM (*q))
30323 *p++ = *q;
30326 if (last_period == 0)
30327 strcpy (p, section_desc);
30328 else
30329 *p = '\0';
30332 /* Emit profile function. */
30334 void
30335 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
30337 /* Non-standard profiling for kernels, which just saves LR then calls
30338 _mcount without worrying about arg saves. The idea is to change
30339 the function prologue as little as possible as it isn't easy to
30340 account for arg save/restore code added just for _mcount. */
30341 if (TARGET_PROFILE_KERNEL)
30342 return;
30344 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30346 #ifndef NO_PROFILE_COUNTERS
30347 # define NO_PROFILE_COUNTERS 0
30348 #endif
30349 if (NO_PROFILE_COUNTERS)
30350 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30351 LCT_NORMAL, VOIDmode);
30352 else
30354 char buf[30];
30355 const char *label_name;
30356 rtx fun;
30358 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30359 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
30360 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
30362 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
30363 LCT_NORMAL, VOIDmode, fun, Pmode);
30366 else if (DEFAULT_ABI == ABI_DARWIN)
30368 const char *mcount_name = RS6000_MCOUNT;
30369 int caller_addr_regno = LR_REGNO;
30371 /* Be conservative and always set this, at least for now. */
30372 crtl->uses_pic_offset_table = 1;
30374 #if TARGET_MACHO
30375 /* For PIC code, set up a stub and collect the caller's address
30376 from r0, which is where the prologue puts it. */
30377 if (MACHOPIC_INDIRECT
30378 && crtl->uses_pic_offset_table)
30379 caller_addr_regno = 0;
30380 #endif
30381 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
30382 LCT_NORMAL, VOIDmode,
30383 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
30387 /* Write function profiler code. */
30389 void
30390 output_function_profiler (FILE *file, int labelno)
30392 char buf[100];
30394 switch (DEFAULT_ABI)
30396 default:
30397 gcc_unreachable ();
30399 case ABI_V4:
30400 if (!TARGET_32BIT)
30402 warning (0, "no profiling of 64-bit code for this ABI");
30403 return;
30405 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
30406 fprintf (file, "\tmflr %s\n", reg_names[0]);
30407 if (NO_PROFILE_COUNTERS)
30409 asm_fprintf (file, "\tstw %s,4(%s)\n",
30410 reg_names[0], reg_names[1]);
30412 else if (TARGET_SECURE_PLT && flag_pic)
30414 if (TARGET_LINK_STACK)
30416 char name[32];
30417 get_ppc476_thunk_name (name);
30418 asm_fprintf (file, "\tbl %s\n", name);
30420 else
30421 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
30422 asm_fprintf (file, "\tstw %s,4(%s)\n",
30423 reg_names[0], reg_names[1]);
30424 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30425 asm_fprintf (file, "\taddis %s,%s,",
30426 reg_names[12], reg_names[12]);
30427 assemble_name (file, buf);
30428 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
30429 assemble_name (file, buf);
30430 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
30432 else if (flag_pic == 1)
30434 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
30435 asm_fprintf (file, "\tstw %s,4(%s)\n",
30436 reg_names[0], reg_names[1]);
30437 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
30438 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
30439 assemble_name (file, buf);
30440 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
30442 else if (flag_pic > 1)
30444 asm_fprintf (file, "\tstw %s,4(%s)\n",
30445 reg_names[0], reg_names[1]);
30446 /* Now, we need to get the address of the label. */
30447 if (TARGET_LINK_STACK)
30449 char name[32];
30450 get_ppc476_thunk_name (name);
30451 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
30452 assemble_name (file, buf);
30453 fputs ("-.\n1:", file);
30454 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30455 asm_fprintf (file, "\taddi %s,%s,4\n",
30456 reg_names[11], reg_names[11]);
30458 else
30460 fputs ("\tbcl 20,31,1f\n\t.long ", file);
30461 assemble_name (file, buf);
30462 fputs ("-.\n1:", file);
30463 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
30465 asm_fprintf (file, "\tlwz %s,0(%s)\n",
30466 reg_names[0], reg_names[11]);
30467 asm_fprintf (file, "\tadd %s,%s,%s\n",
30468 reg_names[0], reg_names[0], reg_names[11]);
30470 else
30472 asm_fprintf (file, "\tlis %s,", reg_names[12]);
30473 assemble_name (file, buf);
30474 fputs ("@ha\n", file);
30475 asm_fprintf (file, "\tstw %s,4(%s)\n",
30476 reg_names[0], reg_names[1]);
30477 asm_fprintf (file, "\tla %s,", reg_names[0]);
30478 assemble_name (file, buf);
30479 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
30482 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
30483 fprintf (file, "\tbl %s%s\n",
30484 RS6000_MCOUNT, flag_pic ? "@plt" : "");
30485 break;
30487 case ABI_AIX:
30488 case ABI_ELFv2:
30489 case ABI_DARWIN:
30490 /* Don't do anything, done in output_profile_hook (). */
30491 break;
30497 /* The following variable value is the last issued insn. */
30499 static rtx_insn *last_scheduled_insn;
30501 /* The following variable helps to balance issuing of load and
30502 store instructions */
30504 static int load_store_pendulum;
30506 /* The following variable helps pair divide insns during scheduling. */
30507 static int divide_cnt;
30508 /* The following variable helps pair and alternate vector and vector load
30509 insns during scheduling. */
30510 static int vec_pairing;
30513 /* Power4 load update and store update instructions are cracked into a
30514 load or store and an integer insn which are executed in the same cycle.
30515 Branches have their own dispatch slot which does not count against the
30516 GCC issue rate, but it changes the program flow so there are no other
30517 instructions to issue in this cycle. */
30519 static int
30520 rs6000_variable_issue_1 (rtx_insn *insn, int more)
30522 last_scheduled_insn = insn;
30523 if (GET_CODE (PATTERN (insn)) == USE
30524 || GET_CODE (PATTERN (insn)) == CLOBBER)
30526 cached_can_issue_more = more;
30527 return cached_can_issue_more;
30530 if (insn_terminates_group_p (insn, current_group))
30532 cached_can_issue_more = 0;
30533 return cached_can_issue_more;
30536 /* If no reservation, but reach here */
30537 if (recog_memoized (insn) < 0)
30538 return more;
30540 if (rs6000_sched_groups)
30542 if (is_microcoded_insn (insn))
30543 cached_can_issue_more = 0;
30544 else if (is_cracked_insn (insn))
30545 cached_can_issue_more = more > 2 ? more - 2 : 0;
30546 else
30547 cached_can_issue_more = more - 1;
30549 return cached_can_issue_more;
30552 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
30553 return 0;
30555 cached_can_issue_more = more - 1;
30556 return cached_can_issue_more;
30559 static int
30560 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
30562 int r = rs6000_variable_issue_1 (insn, more);
30563 if (verbose)
30564 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
30565 return r;
30568 /* Adjust the cost of a scheduling dependency. Return the new cost of
30569 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
30571 static int
30572 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
30573 unsigned int)
30575 enum attr_type attr_type;
30577 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
30578 return cost;
30580 switch (dep_type)
30582 case REG_DEP_TRUE:
30584 /* Data dependency; DEP_INSN writes a register that INSN reads
30585 some cycles later. */
30587 /* Separate a load from a narrower, dependent store. */
30588 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9)
30589 && GET_CODE (PATTERN (insn)) == SET
30590 && GET_CODE (PATTERN (dep_insn)) == SET
30591 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
30592 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
30593 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
30594 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
30595 return cost + 14;
30597 attr_type = get_attr_type (insn);
30599 switch (attr_type)
30601 case TYPE_JMPREG:
30602 /* Tell the first scheduling pass about the latency between
30603 a mtctr and bctr (and mtlr and br/blr). The first
30604 scheduling pass will not know about this latency since
30605 the mtctr instruction, which has the latency associated
30606 to it, will be generated by reload. */
30607 return 4;
30608 case TYPE_BRANCH:
30609 /* Leave some extra cycles between a compare and its
30610 dependent branch, to inhibit expensive mispredicts. */
30611 if ((rs6000_tune == PROCESSOR_PPC603
30612 || rs6000_tune == PROCESSOR_PPC604
30613 || rs6000_tune == PROCESSOR_PPC604e
30614 || rs6000_tune == PROCESSOR_PPC620
30615 || rs6000_tune == PROCESSOR_PPC630
30616 || rs6000_tune == PROCESSOR_PPC750
30617 || rs6000_tune == PROCESSOR_PPC7400
30618 || rs6000_tune == PROCESSOR_PPC7450
30619 || rs6000_tune == PROCESSOR_PPCE5500
30620 || rs6000_tune == PROCESSOR_PPCE6500
30621 || rs6000_tune == PROCESSOR_POWER4
30622 || rs6000_tune == PROCESSOR_POWER5
30623 || rs6000_tune == PROCESSOR_POWER7
30624 || rs6000_tune == PROCESSOR_POWER8
30625 || rs6000_tune == PROCESSOR_POWER9
30626 || rs6000_tune == PROCESSOR_CELL)
30627 && recog_memoized (dep_insn)
30628 && (INSN_CODE (dep_insn) >= 0))
30630 switch (get_attr_type (dep_insn))
30632 case TYPE_CMP:
30633 case TYPE_FPCOMPARE:
30634 case TYPE_CR_LOGICAL:
30635 case TYPE_DELAYED_CR:
30636 return cost + 2;
30637 case TYPE_EXTS:
30638 case TYPE_MUL:
30639 if (get_attr_dot (dep_insn) == DOT_YES)
30640 return cost + 2;
30641 else
30642 break;
30643 case TYPE_SHIFT:
30644 if (get_attr_dot (dep_insn) == DOT_YES
30645 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
30646 return cost + 2;
30647 else
30648 break;
30649 default:
30650 break;
30652 break;
30654 case TYPE_STORE:
30655 case TYPE_FPSTORE:
30656 if ((rs6000_tune == PROCESSOR_POWER6)
30657 && recog_memoized (dep_insn)
30658 && (INSN_CODE (dep_insn) >= 0))
30661 if (GET_CODE (PATTERN (insn)) != SET)
30662 /* If this happens, we have to extend this to schedule
30663 optimally. Return default for now. */
30664 return cost;
30666 /* Adjust the cost for the case where the value written
30667 by a fixed point operation is used as the address
30668 gen value on a store. */
30669 switch (get_attr_type (dep_insn))
30671 case TYPE_LOAD:
30672 case TYPE_CNTLZ:
30674 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30675 return get_attr_sign_extend (dep_insn)
30676 == SIGN_EXTEND_YES ? 6 : 4;
30677 break;
30679 case TYPE_SHIFT:
30681 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30682 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30683 6 : 3;
30684 break;
30686 case TYPE_INTEGER:
30687 case TYPE_ADD:
30688 case TYPE_LOGICAL:
30689 case TYPE_EXTS:
30690 case TYPE_INSERT:
30692 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30693 return 3;
30694 break;
30696 case TYPE_STORE:
30697 case TYPE_FPLOAD:
30698 case TYPE_FPSTORE:
30700 if (get_attr_update (dep_insn) == UPDATE_YES
30701 && ! rs6000_store_data_bypass_p (dep_insn, insn))
30702 return 3;
30703 break;
30705 case TYPE_MUL:
30707 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30708 return 17;
30709 break;
30711 case TYPE_DIV:
30713 if (! rs6000_store_data_bypass_p (dep_insn, insn))
30714 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30715 break;
30717 default:
30718 break;
30721 break;
30723 case TYPE_LOAD:
30724 if ((rs6000_tune == PROCESSOR_POWER6)
30725 && recog_memoized (dep_insn)
30726 && (INSN_CODE (dep_insn) >= 0))
30729 /* Adjust the cost for the case where the value written
30730 by a fixed point instruction is used within the address
30731 gen portion of a subsequent load(u)(x) */
30732 switch (get_attr_type (dep_insn))
30734 case TYPE_LOAD:
30735 case TYPE_CNTLZ:
30737 if (set_to_load_agen (dep_insn, insn))
30738 return get_attr_sign_extend (dep_insn)
30739 == SIGN_EXTEND_YES ? 6 : 4;
30740 break;
30742 case TYPE_SHIFT:
30744 if (set_to_load_agen (dep_insn, insn))
30745 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
30746 6 : 3;
30747 break;
30749 case TYPE_INTEGER:
30750 case TYPE_ADD:
30751 case TYPE_LOGICAL:
30752 case TYPE_EXTS:
30753 case TYPE_INSERT:
30755 if (set_to_load_agen (dep_insn, insn))
30756 return 3;
30757 break;
30759 case TYPE_STORE:
30760 case TYPE_FPLOAD:
30761 case TYPE_FPSTORE:
30763 if (get_attr_update (dep_insn) == UPDATE_YES
30764 && set_to_load_agen (dep_insn, insn))
30765 return 3;
30766 break;
30768 case TYPE_MUL:
30770 if (set_to_load_agen (dep_insn, insn))
30771 return 17;
30772 break;
30774 case TYPE_DIV:
30776 if (set_to_load_agen (dep_insn, insn))
30777 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
30778 break;
30780 default:
30781 break;
30784 break;
30786 case TYPE_FPLOAD:
30787 if ((rs6000_tune == PROCESSOR_POWER6)
30788 && get_attr_update (insn) == UPDATE_NO
30789 && recog_memoized (dep_insn)
30790 && (INSN_CODE (dep_insn) >= 0)
30791 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
30792 return 2;
30794 default:
30795 break;
30798 /* Fall out to return default cost. */
30800 break;
30802 case REG_DEP_OUTPUT:
30803 /* Output dependency; DEP_INSN writes a register that INSN writes some
30804 cycles later. */
30805 if ((rs6000_tune == PROCESSOR_POWER6)
30806 && recog_memoized (dep_insn)
30807 && (INSN_CODE (dep_insn) >= 0))
30809 attr_type = get_attr_type (insn);
30811 switch (attr_type)
30813 case TYPE_FP:
30814 case TYPE_FPSIMPLE:
30815 if (get_attr_type (dep_insn) == TYPE_FP
30816 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
30817 return 1;
30818 break;
30819 case TYPE_FPLOAD:
30820 if (get_attr_update (insn) == UPDATE_NO
30821 && get_attr_type (dep_insn) == TYPE_MFFGPR)
30822 return 2;
30823 break;
30824 default:
30825 break;
30828 /* Fall through, no cost for output dependency. */
30829 /* FALLTHRU */
30831 case REG_DEP_ANTI:
30832 /* Anti dependency; DEP_INSN reads a register that INSN writes some
30833 cycles later. */
30834 return 0;
30836 default:
30837 gcc_unreachable ();
30840 return cost;
30843 /* Debug version of rs6000_adjust_cost. */
30845 static int
30846 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
30847 int cost, unsigned int dw)
30849 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
30851 if (ret != cost)
30853 const char *dep;
30855 switch (dep_type)
30857 default: dep = "unknown depencency"; break;
30858 case REG_DEP_TRUE: dep = "data dependency"; break;
30859 case REG_DEP_OUTPUT: dep = "output dependency"; break;
30860 case REG_DEP_ANTI: dep = "anti depencency"; break;
30863 fprintf (stderr,
30864 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
30865 "%s, insn:\n", ret, cost, dep);
30867 debug_rtx (insn);
30870 return ret;
30873 /* The function returns a true if INSN is microcoded.
30874 Return false otherwise. */
30876 static bool
30877 is_microcoded_insn (rtx_insn *insn)
30879 if (!insn || !NONDEBUG_INSN_P (insn)
30880 || GET_CODE (PATTERN (insn)) == USE
30881 || GET_CODE (PATTERN (insn)) == CLOBBER)
30882 return false;
30884 if (rs6000_tune == PROCESSOR_CELL)
30885 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
30887 if (rs6000_sched_groups
30888 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30890 enum attr_type type = get_attr_type (insn);
30891 if ((type == TYPE_LOAD
30892 && get_attr_update (insn) == UPDATE_YES
30893 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
30894 || ((type == TYPE_LOAD || type == TYPE_STORE)
30895 && get_attr_update (insn) == UPDATE_YES
30896 && get_attr_indexed (insn) == INDEXED_YES)
30897 || type == TYPE_MFCR)
30898 return true;
30901 return false;
30904 /* The function returns true if INSN is cracked into 2 instructions
30905 by the processor (and therefore occupies 2 issue slots). */
30907 static bool
30908 is_cracked_insn (rtx_insn *insn)
30910 if (!insn || !NONDEBUG_INSN_P (insn)
30911 || GET_CODE (PATTERN (insn)) == USE
30912 || GET_CODE (PATTERN (insn)) == CLOBBER)
30913 return false;
30915 if (rs6000_sched_groups
30916 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
30918 enum attr_type type = get_attr_type (insn);
30919 if ((type == TYPE_LOAD
30920 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
30921 && get_attr_update (insn) == UPDATE_NO)
30922 || (type == TYPE_LOAD
30923 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
30924 && get_attr_update (insn) == UPDATE_YES
30925 && get_attr_indexed (insn) == INDEXED_NO)
30926 || (type == TYPE_STORE
30927 && get_attr_update (insn) == UPDATE_YES
30928 && get_attr_indexed (insn) == INDEXED_NO)
30929 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
30930 && get_attr_update (insn) == UPDATE_YES)
30931 || type == TYPE_DELAYED_CR
30932 || (type == TYPE_EXTS
30933 && get_attr_dot (insn) == DOT_YES)
30934 || (type == TYPE_SHIFT
30935 && get_attr_dot (insn) == DOT_YES
30936 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
30937 || (type == TYPE_MUL
30938 && get_attr_dot (insn) == DOT_YES)
30939 || type == TYPE_DIV
30940 || (type == TYPE_INSERT
30941 && get_attr_size (insn) == SIZE_32))
30942 return true;
30945 return false;
30948 /* The function returns true if INSN can be issued only from
30949 the branch slot. */
30951 static bool
30952 is_branch_slot_insn (rtx_insn *insn)
30954 if (!insn || !NONDEBUG_INSN_P (insn)
30955 || GET_CODE (PATTERN (insn)) == USE
30956 || GET_CODE (PATTERN (insn)) == CLOBBER)
30957 return false;
30959 if (rs6000_sched_groups)
30961 enum attr_type type = get_attr_type (insn);
30962 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
30963 return true;
30964 return false;
30967 return false;
30970 /* The function returns true if out_inst sets a value that is
30971 used in the address generation computation of in_insn */
30972 static bool
30973 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
30975 rtx out_set, in_set;
30977 /* For performance reasons, only handle the simple case where
30978 both loads are a single_set. */
30979 out_set = single_set (out_insn);
30980 if (out_set)
30982 in_set = single_set (in_insn);
30983 if (in_set)
30984 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
30987 return false;
30990 /* Try to determine base/offset/size parts of the given MEM.
30991 Return true if successful, false if all the values couldn't
30992 be determined.
30994 This function only looks for REG or REG+CONST address forms.
30995 REG+REG address form will return false. */
30997 static bool
30998 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
30999 HOST_WIDE_INT *size)
31001 rtx addr_rtx;
31002 if MEM_SIZE_KNOWN_P (mem)
31003 *size = MEM_SIZE (mem);
31004 else
31005 return false;
31007 addr_rtx = (XEXP (mem, 0));
31008 if (GET_CODE (addr_rtx) == PRE_MODIFY)
31009 addr_rtx = XEXP (addr_rtx, 1);
31011 *offset = 0;
31012 while (GET_CODE (addr_rtx) == PLUS
31013 && CONST_INT_P (XEXP (addr_rtx, 1)))
31015 *offset += INTVAL (XEXP (addr_rtx, 1));
31016 addr_rtx = XEXP (addr_rtx, 0);
31018 if (!REG_P (addr_rtx))
31019 return false;
31021 *base = addr_rtx;
31022 return true;
31025 /* The function returns true if the target storage location of
31026 mem1 is adjacent to the target storage location of mem2 */
31027 /* Return 1 if memory locations are adjacent. */
31029 static bool
31030 adjacent_mem_locations (rtx mem1, rtx mem2)
31032 rtx reg1, reg2;
31033 HOST_WIDE_INT off1, size1, off2, size2;
31035 if (get_memref_parts (mem1, &reg1, &off1, &size1)
31036 && get_memref_parts (mem2, &reg2, &off2, &size2))
31037 return ((REGNO (reg1) == REGNO (reg2))
31038 && ((off1 + size1 == off2)
31039 || (off2 + size2 == off1)));
31041 return false;
31044 /* This function returns true if it can be determined that the two MEM
31045 locations overlap by at least 1 byte based on base reg/offset/size. */
31047 static bool
31048 mem_locations_overlap (rtx mem1, rtx mem2)
31050 rtx reg1, reg2;
31051 HOST_WIDE_INT off1, size1, off2, size2;
31053 if (get_memref_parts (mem1, &reg1, &off1, &size1)
31054 && get_memref_parts (mem2, &reg2, &off2, &size2))
31055 return ((REGNO (reg1) == REGNO (reg2))
31056 && (((off1 <= off2) && (off1 + size1 > off2))
31057 || ((off2 <= off1) && (off2 + size2 > off1))));
31059 return false;
31062 /* A C statement (sans semicolon) to update the integer scheduling
31063 priority INSN_PRIORITY (INSN). Increase the priority to execute the
31064 INSN earlier, reduce the priority to execute INSN later. Do not
31065 define this macro if you do not need to adjust the scheduling
31066 priorities of insns. */
31068 static int
31069 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
31071 rtx load_mem, str_mem;
31072 /* On machines (like the 750) which have asymmetric integer units,
31073 where one integer unit can do multiply and divides and the other
31074 can't, reduce the priority of multiply/divide so it is scheduled
31075 before other integer operations. */
31077 #if 0
31078 if (! INSN_P (insn))
31079 return priority;
31081 if (GET_CODE (PATTERN (insn)) == USE)
31082 return priority;
31084 switch (rs6000_tune) {
31085 case PROCESSOR_PPC750:
31086 switch (get_attr_type (insn))
31088 default:
31089 break;
31091 case TYPE_MUL:
31092 case TYPE_DIV:
31093 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
31094 priority, priority);
31095 if (priority >= 0 && priority < 0x01000000)
31096 priority >>= 3;
31097 break;
31100 #endif
31102 if (insn_must_be_first_in_group (insn)
31103 && reload_completed
31104 && current_sched_info->sched_max_insns_priority
31105 && rs6000_sched_restricted_insns_priority)
31108 /* Prioritize insns that can be dispatched only in the first
31109 dispatch slot. */
31110 if (rs6000_sched_restricted_insns_priority == 1)
31111 /* Attach highest priority to insn. This means that in
31112 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
31113 precede 'priority' (critical path) considerations. */
31114 return current_sched_info->sched_max_insns_priority;
31115 else if (rs6000_sched_restricted_insns_priority == 2)
31116 /* Increase priority of insn by a minimal amount. This means that in
31117 haifa-sched.c:ready_sort(), only 'priority' (critical path)
31118 considerations precede dispatch-slot restriction considerations. */
31119 return (priority + 1);
31122 if (rs6000_tune == PROCESSOR_POWER6
31123 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
31124 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
31125 /* Attach highest priority to insn if the scheduler has just issued two
31126 stores and this instruction is a load, or two loads and this instruction
31127 is a store. Power6 wants loads and stores scheduled alternately
31128 when possible */
31129 return current_sched_info->sched_max_insns_priority;
31131 return priority;
31134 /* Return true if the instruction is nonpipelined on the Cell. */
31135 static bool
31136 is_nonpipeline_insn (rtx_insn *insn)
31138 enum attr_type type;
31139 if (!insn || !NONDEBUG_INSN_P (insn)
31140 || GET_CODE (PATTERN (insn)) == USE
31141 || GET_CODE (PATTERN (insn)) == CLOBBER)
31142 return false;
31144 type = get_attr_type (insn);
31145 if (type == TYPE_MUL
31146 || type == TYPE_DIV
31147 || type == TYPE_SDIV
31148 || type == TYPE_DDIV
31149 || type == TYPE_SSQRT
31150 || type == TYPE_DSQRT
31151 || type == TYPE_MFCR
31152 || type == TYPE_MFCRF
31153 || type == TYPE_MFJMPR)
31155 return true;
31157 return false;
31161 /* Return how many instructions the machine can issue per cycle. */
31163 static int
31164 rs6000_issue_rate (void)
31166 /* Unless scheduling for register pressure, use issue rate of 1 for
31167 first scheduling pass to decrease degradation. */
31168 if (!reload_completed && !flag_sched_pressure)
31169 return 1;
31171 switch (rs6000_tune) {
31172 case PROCESSOR_RS64A:
31173 case PROCESSOR_PPC601: /* ? */
31174 case PROCESSOR_PPC7450:
31175 return 3;
31176 case PROCESSOR_PPC440:
31177 case PROCESSOR_PPC603:
31178 case PROCESSOR_PPC750:
31179 case PROCESSOR_PPC7400:
31180 case PROCESSOR_PPC8540:
31181 case PROCESSOR_PPC8548:
31182 case PROCESSOR_CELL:
31183 case PROCESSOR_PPCE300C2:
31184 case PROCESSOR_PPCE300C3:
31185 case PROCESSOR_PPCE500MC:
31186 case PROCESSOR_PPCE500MC64:
31187 case PROCESSOR_PPCE5500:
31188 case PROCESSOR_PPCE6500:
31189 case PROCESSOR_TITAN:
31190 return 2;
31191 case PROCESSOR_PPC476:
31192 case PROCESSOR_PPC604:
31193 case PROCESSOR_PPC604e:
31194 case PROCESSOR_PPC620:
31195 case PROCESSOR_PPC630:
31196 return 4;
31197 case PROCESSOR_POWER4:
31198 case PROCESSOR_POWER5:
31199 case PROCESSOR_POWER6:
31200 case PROCESSOR_POWER7:
31201 return 5;
31202 case PROCESSOR_POWER8:
31203 return 7;
31204 case PROCESSOR_POWER9:
31205 return 6;
31206 default:
31207 return 1;
31211 /* Return how many instructions to look ahead for better insn
31212 scheduling. */
31214 static int
31215 rs6000_use_sched_lookahead (void)
31217 switch (rs6000_tune)
31219 case PROCESSOR_PPC8540:
31220 case PROCESSOR_PPC8548:
31221 return 4;
31223 case PROCESSOR_CELL:
31224 return (reload_completed ? 8 : 0);
31226 default:
31227 return 0;
31231 /* We are choosing insn from the ready queue. Return zero if INSN can be
31232 chosen. */
31233 static int
31234 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
31236 if (ready_index == 0)
31237 return 0;
31239 if (rs6000_tune != PROCESSOR_CELL)
31240 return 0;
31242 gcc_assert (insn != NULL_RTX && INSN_P (insn));
31244 if (!reload_completed
31245 || is_nonpipeline_insn (insn)
31246 || is_microcoded_insn (insn))
31247 return 1;
31249 return 0;
31252 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
31253 and return true. */
31255 static bool
31256 find_mem_ref (rtx pat, rtx *mem_ref)
31258 const char * fmt;
31259 int i, j;
31261 /* stack_tie does not produce any real memory traffic. */
31262 if (tie_operand (pat, VOIDmode))
31263 return false;
31265 if (GET_CODE (pat) == MEM)
31267 *mem_ref = pat;
31268 return true;
31271 /* Recursively process the pattern. */
31272 fmt = GET_RTX_FORMAT (GET_CODE (pat));
31274 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
31276 if (fmt[i] == 'e')
31278 if (find_mem_ref (XEXP (pat, i), mem_ref))
31279 return true;
31281 else if (fmt[i] == 'E')
31282 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
31284 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
31285 return true;
31289 return false;
31292 /* Determine if PAT is a PATTERN of a load insn. */
31294 static bool
31295 is_load_insn1 (rtx pat, rtx *load_mem)
31297 if (!pat || pat == NULL_RTX)
31298 return false;
31300 if (GET_CODE (pat) == SET)
31301 return find_mem_ref (SET_SRC (pat), load_mem);
31303 if (GET_CODE (pat) == PARALLEL)
31305 int i;
31307 for (i = 0; i < XVECLEN (pat, 0); i++)
31308 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
31309 return true;
31312 return false;
31315 /* Determine if INSN loads from memory. */
31317 static bool
31318 is_load_insn (rtx insn, rtx *load_mem)
31320 if (!insn || !INSN_P (insn))
31321 return false;
31323 if (CALL_P (insn))
31324 return false;
31326 return is_load_insn1 (PATTERN (insn), load_mem);
31329 /* Determine if PAT is a PATTERN of a store insn. */
31331 static bool
31332 is_store_insn1 (rtx pat, rtx *str_mem)
31334 if (!pat || pat == NULL_RTX)
31335 return false;
31337 if (GET_CODE (pat) == SET)
31338 return find_mem_ref (SET_DEST (pat), str_mem);
31340 if (GET_CODE (pat) == PARALLEL)
31342 int i;
31344 for (i = 0; i < XVECLEN (pat, 0); i++)
31345 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
31346 return true;
31349 return false;
31352 /* Determine if INSN stores to memory. */
31354 static bool
31355 is_store_insn (rtx insn, rtx *str_mem)
31357 if (!insn || !INSN_P (insn))
31358 return false;
31360 return is_store_insn1 (PATTERN (insn), str_mem);
31363 /* Return whether TYPE is a Power9 pairable vector instruction type. */
31365 static bool
31366 is_power9_pairable_vec_type (enum attr_type type)
31368 switch (type)
31370 case TYPE_VECSIMPLE:
31371 case TYPE_VECCOMPLEX:
31372 case TYPE_VECDIV:
31373 case TYPE_VECCMP:
31374 case TYPE_VECPERM:
31375 case TYPE_VECFLOAT:
31376 case TYPE_VECFDIV:
31377 case TYPE_VECDOUBLE:
31378 return true;
31379 default:
31380 break;
31382 return false;
31385 /* Returns whether the dependence between INSN and NEXT is considered
31386 costly by the given target. */
31388 static bool
31389 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
31391 rtx insn;
31392 rtx next;
31393 rtx load_mem, str_mem;
31395 /* If the flag is not enabled - no dependence is considered costly;
31396 allow all dependent insns in the same group.
31397 This is the most aggressive option. */
31398 if (rs6000_sched_costly_dep == no_dep_costly)
31399 return false;
31401 /* If the flag is set to 1 - a dependence is always considered costly;
31402 do not allow dependent instructions in the same group.
31403 This is the most conservative option. */
31404 if (rs6000_sched_costly_dep == all_deps_costly)
31405 return true;
31407 insn = DEP_PRO (dep);
31408 next = DEP_CON (dep);
31410 if (rs6000_sched_costly_dep == store_to_load_dep_costly
31411 && is_load_insn (next, &load_mem)
31412 && is_store_insn (insn, &str_mem))
31413 /* Prevent load after store in the same group. */
31414 return true;
31416 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
31417 && is_load_insn (next, &load_mem)
31418 && is_store_insn (insn, &str_mem)
31419 && DEP_TYPE (dep) == REG_DEP_TRUE
31420 && mem_locations_overlap(str_mem, load_mem))
31421 /* Prevent load after store in the same group if it is a true
31422 dependence. */
31423 return true;
31425 /* The flag is set to X; dependences with latency >= X are considered costly,
31426 and will not be scheduled in the same group. */
31427 if (rs6000_sched_costly_dep <= max_dep_latency
31428 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
31429 return true;
31431 return false;
31434 /* Return the next insn after INSN that is found before TAIL is reached,
31435 skipping any "non-active" insns - insns that will not actually occupy
31436 an issue slot. Return NULL_RTX if such an insn is not found. */
31438 static rtx_insn *
31439 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
31441 if (insn == NULL_RTX || insn == tail)
31442 return NULL;
31444 while (1)
31446 insn = NEXT_INSN (insn);
31447 if (insn == NULL_RTX || insn == tail)
31448 return NULL;
31450 if (CALL_P (insn)
31451 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
31452 || (NONJUMP_INSN_P (insn)
31453 && GET_CODE (PATTERN (insn)) != USE
31454 && GET_CODE (PATTERN (insn)) != CLOBBER
31455 && INSN_CODE (insn) != CODE_FOR_stack_tie))
31456 break;
31458 return insn;
31461 /* Do Power9 specific sched_reorder2 reordering of ready list. */
31463 static int
31464 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
31466 int pos;
31467 int i;
31468 rtx_insn *tmp;
31469 enum attr_type type, type2;
31471 type = get_attr_type (last_scheduled_insn);
31473 /* Try to issue fixed point divides back-to-back in pairs so they will be
31474 routed to separate execution units and execute in parallel. */
31475 if (type == TYPE_DIV && divide_cnt == 0)
31477 /* First divide has been scheduled. */
31478 divide_cnt = 1;
31480 /* Scan the ready list looking for another divide, if found move it
31481 to the end of the list so it is chosen next. */
31482 pos = lastpos;
31483 while (pos >= 0)
31485 if (recog_memoized (ready[pos]) >= 0
31486 && get_attr_type (ready[pos]) == TYPE_DIV)
31488 tmp = ready[pos];
31489 for (i = pos; i < lastpos; i++)
31490 ready[i] = ready[i + 1];
31491 ready[lastpos] = tmp;
31492 break;
31494 pos--;
31497 else
31499 /* Last insn was the 2nd divide or not a divide, reset the counter. */
31500 divide_cnt = 0;
31502 /* The best dispatch throughput for vector and vector load insns can be
31503 achieved by interleaving a vector and vector load such that they'll
31504 dispatch to the same superslice. If this pairing cannot be achieved
31505 then it is best to pair vector insns together and vector load insns
31506 together.
31508 To aid in this pairing, vec_pairing maintains the current state with
31509 the following values:
31511 0 : Initial state, no vecload/vector pairing has been started.
31513 1 : A vecload or vector insn has been issued and a candidate for
31514 pairing has been found and moved to the end of the ready
31515 list. */
31516 if (type == TYPE_VECLOAD)
31518 /* Issued a vecload. */
31519 if (vec_pairing == 0)
31521 int vecload_pos = -1;
31522 /* We issued a single vecload, look for a vector insn to pair it
31523 with. If one isn't found, try to pair another vecload. */
31524 pos = lastpos;
31525 while (pos >= 0)
31527 if (recog_memoized (ready[pos]) >= 0)
31529 type2 = get_attr_type (ready[pos]);
31530 if (is_power9_pairable_vec_type (type2))
31532 /* Found a vector insn to pair with, move it to the
31533 end of the ready list so it is scheduled next. */
31534 tmp = ready[pos];
31535 for (i = pos; i < lastpos; i++)
31536 ready[i] = ready[i + 1];
31537 ready[lastpos] = tmp;
31538 vec_pairing = 1;
31539 return cached_can_issue_more;
31541 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
31542 /* Remember position of first vecload seen. */
31543 vecload_pos = pos;
31545 pos--;
31547 if (vecload_pos >= 0)
31549 /* Didn't find a vector to pair with but did find a vecload,
31550 move it to the end of the ready list. */
31551 tmp = ready[vecload_pos];
31552 for (i = vecload_pos; i < lastpos; i++)
31553 ready[i] = ready[i + 1];
31554 ready[lastpos] = tmp;
31555 vec_pairing = 1;
31556 return cached_can_issue_more;
31560 else if (is_power9_pairable_vec_type (type))
31562 /* Issued a vector operation. */
31563 if (vec_pairing == 0)
31565 int vec_pos = -1;
31566 /* We issued a single vector insn, look for a vecload to pair it
31567 with. If one isn't found, try to pair another vector. */
31568 pos = lastpos;
31569 while (pos >= 0)
31571 if (recog_memoized (ready[pos]) >= 0)
31573 type2 = get_attr_type (ready[pos]);
31574 if (type2 == TYPE_VECLOAD)
31576 /* Found a vecload insn to pair with, move it to the
31577 end of the ready list so it is scheduled next. */
31578 tmp = ready[pos];
31579 for (i = pos; i < lastpos; i++)
31580 ready[i] = ready[i + 1];
31581 ready[lastpos] = tmp;
31582 vec_pairing = 1;
31583 return cached_can_issue_more;
31585 else if (is_power9_pairable_vec_type (type2)
31586 && vec_pos == -1)
31587 /* Remember position of first vector insn seen. */
31588 vec_pos = pos;
31590 pos--;
31592 if (vec_pos >= 0)
31594 /* Didn't find a vecload to pair with but did find a vector
31595 insn, move it to the end of the ready list. */
31596 tmp = ready[vec_pos];
31597 for (i = vec_pos; i < lastpos; i++)
31598 ready[i] = ready[i + 1];
31599 ready[lastpos] = tmp;
31600 vec_pairing = 1;
31601 return cached_can_issue_more;
31606 /* We've either finished a vec/vecload pair, couldn't find an insn to
31607 continue the current pair, or the last insn had nothing to do with
31608 with pairing. In any case, reset the state. */
31609 vec_pairing = 0;
31612 return cached_can_issue_more;
31615 /* We are about to begin issuing insns for this clock cycle. */
31617 static int
31618 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
31619 rtx_insn **ready ATTRIBUTE_UNUSED,
31620 int *pn_ready ATTRIBUTE_UNUSED,
31621 int clock_var ATTRIBUTE_UNUSED)
31623 int n_ready = *pn_ready;
31625 if (sched_verbose)
31626 fprintf (dump, "// rs6000_sched_reorder :\n");
31628 /* Reorder the ready list, if the second to last ready insn
31629 is a nonepipeline insn. */
31630 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
31632 if (is_nonpipeline_insn (ready[n_ready - 1])
31633 && (recog_memoized (ready[n_ready - 2]) > 0))
31634 /* Simply swap first two insns. */
31635 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
31638 if (rs6000_tune == PROCESSOR_POWER6)
31639 load_store_pendulum = 0;
31641 return rs6000_issue_rate ();
31644 /* Like rs6000_sched_reorder, but called after issuing each insn. */
31646 static int
31647 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
31648 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
31650 if (sched_verbose)
31651 fprintf (dump, "// rs6000_sched_reorder2 :\n");
31653 /* For Power6, we need to handle some special cases to try and keep the
31654 store queue from overflowing and triggering expensive flushes.
31656 This code monitors how load and store instructions are being issued
31657 and skews the ready list one way or the other to increase the likelihood
31658 that a desired instruction is issued at the proper time.
31660 A couple of things are done. First, we maintain a "load_store_pendulum"
31661 to track the current state of load/store issue.
31663 - If the pendulum is at zero, then no loads or stores have been
31664 issued in the current cycle so we do nothing.
31666 - If the pendulum is 1, then a single load has been issued in this
31667 cycle and we attempt to locate another load in the ready list to
31668 issue with it.
31670 - If the pendulum is -2, then two stores have already been
31671 issued in this cycle, so we increase the priority of the first load
31672 in the ready list to increase it's likelihood of being chosen first
31673 in the next cycle.
31675 - If the pendulum is -1, then a single store has been issued in this
31676 cycle and we attempt to locate another store in the ready list to
31677 issue with it, preferring a store to an adjacent memory location to
31678 facilitate store pairing in the store queue.
31680 - If the pendulum is 2, then two loads have already been
31681 issued in this cycle, so we increase the priority of the first store
31682 in the ready list to increase it's likelihood of being chosen first
31683 in the next cycle.
31685 - If the pendulum < -2 or > 2, then do nothing.
31687 Note: This code covers the most common scenarios. There exist non
31688 load/store instructions which make use of the LSU and which
31689 would need to be accounted for to strictly model the behavior
31690 of the machine. Those instructions are currently unaccounted
31691 for to help minimize compile time overhead of this code.
31693 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
31695 int pos;
31696 int i;
31697 rtx_insn *tmp;
31698 rtx load_mem, str_mem;
31700 if (is_store_insn (last_scheduled_insn, &str_mem))
31701 /* Issuing a store, swing the load_store_pendulum to the left */
31702 load_store_pendulum--;
31703 else if (is_load_insn (last_scheduled_insn, &load_mem))
31704 /* Issuing a load, swing the load_store_pendulum to the right */
31705 load_store_pendulum++;
31706 else
31707 return cached_can_issue_more;
31709 /* If the pendulum is balanced, or there is only one instruction on
31710 the ready list, then all is well, so return. */
31711 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
31712 return cached_can_issue_more;
31714 if (load_store_pendulum == 1)
31716 /* A load has been issued in this cycle. Scan the ready list
31717 for another load to issue with it */
31718 pos = *pn_ready-1;
31720 while (pos >= 0)
31722 if (is_load_insn (ready[pos], &load_mem))
31724 /* Found a load. Move it to the head of the ready list,
31725 and adjust it's priority so that it is more likely to
31726 stay there */
31727 tmp = ready[pos];
31728 for (i=pos; i<*pn_ready-1; i++)
31729 ready[i] = ready[i + 1];
31730 ready[*pn_ready-1] = tmp;
31732 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31733 INSN_PRIORITY (tmp)++;
31734 break;
31736 pos--;
31739 else if (load_store_pendulum == -2)
31741 /* Two stores have been issued in this cycle. Increase the
31742 priority of the first load in the ready list to favor it for
31743 issuing in the next cycle. */
31744 pos = *pn_ready-1;
31746 while (pos >= 0)
31748 if (is_load_insn (ready[pos], &load_mem)
31749 && !sel_sched_p ()
31750 && INSN_PRIORITY_KNOWN (ready[pos]))
31752 INSN_PRIORITY (ready[pos])++;
31754 /* Adjust the pendulum to account for the fact that a load
31755 was found and increased in priority. This is to prevent
31756 increasing the priority of multiple loads */
31757 load_store_pendulum--;
31759 break;
31761 pos--;
31764 else if (load_store_pendulum == -1)
31766 /* A store has been issued in this cycle. Scan the ready list for
31767 another store to issue with it, preferring a store to an adjacent
31768 memory location */
31769 int first_store_pos = -1;
31771 pos = *pn_ready-1;
31773 while (pos >= 0)
31775 if (is_store_insn (ready[pos], &str_mem))
31777 rtx str_mem2;
31778 /* Maintain the index of the first store found on the
31779 list */
31780 if (first_store_pos == -1)
31781 first_store_pos = pos;
31783 if (is_store_insn (last_scheduled_insn, &str_mem2)
31784 && adjacent_mem_locations (str_mem, str_mem2))
31786 /* Found an adjacent store. Move it to the head of the
31787 ready list, and adjust it's priority so that it is
31788 more likely to stay there */
31789 tmp = ready[pos];
31790 for (i=pos; i<*pn_ready-1; i++)
31791 ready[i] = ready[i + 1];
31792 ready[*pn_ready-1] = tmp;
31794 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31795 INSN_PRIORITY (tmp)++;
31797 first_store_pos = -1;
31799 break;
31802 pos--;
31805 if (first_store_pos >= 0)
31807 /* An adjacent store wasn't found, but a non-adjacent store was,
31808 so move the non-adjacent store to the front of the ready
31809 list, and adjust its priority so that it is more likely to
31810 stay there. */
31811 tmp = ready[first_store_pos];
31812 for (i=first_store_pos; i<*pn_ready-1; i++)
31813 ready[i] = ready[i + 1];
31814 ready[*pn_ready-1] = tmp;
31815 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
31816 INSN_PRIORITY (tmp)++;
31819 else if (load_store_pendulum == 2)
31821 /* Two loads have been issued in this cycle. Increase the priority
31822 of the first store in the ready list to favor it for issuing in
31823 the next cycle. */
31824 pos = *pn_ready-1;
31826 while (pos >= 0)
31828 if (is_store_insn (ready[pos], &str_mem)
31829 && !sel_sched_p ()
31830 && INSN_PRIORITY_KNOWN (ready[pos]))
31832 INSN_PRIORITY (ready[pos])++;
31834 /* Adjust the pendulum to account for the fact that a store
31835 was found and increased in priority. This is to prevent
31836 increasing the priority of multiple stores */
31837 load_store_pendulum++;
31839 break;
31841 pos--;
31846 /* Do Power9 dependent reordering if necessary. */
31847 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
31848 && recog_memoized (last_scheduled_insn) >= 0)
31849 return power9_sched_reorder2 (ready, *pn_ready - 1);
31851 return cached_can_issue_more;
31854 /* Return whether the presence of INSN causes a dispatch group termination
31855 of group WHICH_GROUP.
31857 If WHICH_GROUP == current_group, this function will return true if INSN
31858 causes the termination of the current group (i.e, the dispatch group to
31859 which INSN belongs). This means that INSN will be the last insn in the
31860 group it belongs to.
31862 If WHICH_GROUP == previous_group, this function will return true if INSN
31863 causes the termination of the previous group (i.e, the dispatch group that
31864 precedes the group to which INSN belongs). This means that INSN will be
31865 the first insn in the group it belongs to). */
31867 static bool
31868 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
31870 bool first, last;
31872 if (! insn)
31873 return false;
31875 first = insn_must_be_first_in_group (insn);
31876 last = insn_must_be_last_in_group (insn);
31878 if (first && last)
31879 return true;
31881 if (which_group == current_group)
31882 return last;
31883 else if (which_group == previous_group)
31884 return first;
31886 return false;
31890 static bool
31891 insn_must_be_first_in_group (rtx_insn *insn)
31893 enum attr_type type;
31895 if (!insn
31896 || NOTE_P (insn)
31897 || DEBUG_INSN_P (insn)
31898 || GET_CODE (PATTERN (insn)) == USE
31899 || GET_CODE (PATTERN (insn)) == CLOBBER)
31900 return false;
31902 switch (rs6000_tune)
31904 case PROCESSOR_POWER5:
31905 if (is_cracked_insn (insn))
31906 return true;
31907 /* FALLTHRU */
31908 case PROCESSOR_POWER4:
31909 if (is_microcoded_insn (insn))
31910 return true;
31912 if (!rs6000_sched_groups)
31913 return false;
31915 type = get_attr_type (insn);
31917 switch (type)
31919 case TYPE_MFCR:
31920 case TYPE_MFCRF:
31921 case TYPE_MTCR:
31922 case TYPE_DELAYED_CR:
31923 case TYPE_CR_LOGICAL:
31924 case TYPE_MTJMPR:
31925 case TYPE_MFJMPR:
31926 case TYPE_DIV:
31927 case TYPE_LOAD_L:
31928 case TYPE_STORE_C:
31929 case TYPE_ISYNC:
31930 case TYPE_SYNC:
31931 return true;
31932 default:
31933 break;
31935 break;
31936 case PROCESSOR_POWER6:
31937 type = get_attr_type (insn);
31939 switch (type)
31941 case TYPE_EXTS:
31942 case TYPE_CNTLZ:
31943 case TYPE_TRAP:
31944 case TYPE_MUL:
31945 case TYPE_INSERT:
31946 case TYPE_FPCOMPARE:
31947 case TYPE_MFCR:
31948 case TYPE_MTCR:
31949 case TYPE_MFJMPR:
31950 case TYPE_MTJMPR:
31951 case TYPE_ISYNC:
31952 case TYPE_SYNC:
31953 case TYPE_LOAD_L:
31954 case TYPE_STORE_C:
31955 return true;
31956 case TYPE_SHIFT:
31957 if (get_attr_dot (insn) == DOT_NO
31958 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
31959 return true;
31960 else
31961 break;
31962 case TYPE_DIV:
31963 if (get_attr_size (insn) == SIZE_32)
31964 return true;
31965 else
31966 break;
31967 case TYPE_LOAD:
31968 case TYPE_STORE:
31969 case TYPE_FPLOAD:
31970 case TYPE_FPSTORE:
31971 if (get_attr_update (insn) == UPDATE_YES)
31972 return true;
31973 else
31974 break;
31975 default:
31976 break;
31978 break;
31979 case PROCESSOR_POWER7:
31980 type = get_attr_type (insn);
31982 switch (type)
31984 case TYPE_CR_LOGICAL:
31985 case TYPE_MFCR:
31986 case TYPE_MFCRF:
31987 case TYPE_MTCR:
31988 case TYPE_DIV:
31989 case TYPE_ISYNC:
31990 case TYPE_LOAD_L:
31991 case TYPE_STORE_C:
31992 case TYPE_MFJMPR:
31993 case TYPE_MTJMPR:
31994 return true;
31995 case TYPE_MUL:
31996 case TYPE_SHIFT:
31997 case TYPE_EXTS:
31998 if (get_attr_dot (insn) == DOT_YES)
31999 return true;
32000 else
32001 break;
32002 case TYPE_LOAD:
32003 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32004 || get_attr_update (insn) == UPDATE_YES)
32005 return true;
32006 else
32007 break;
32008 case TYPE_STORE:
32009 case TYPE_FPLOAD:
32010 case TYPE_FPSTORE:
32011 if (get_attr_update (insn) == UPDATE_YES)
32012 return true;
32013 else
32014 break;
32015 default:
32016 break;
32018 break;
32019 case PROCESSOR_POWER8:
32020 type = get_attr_type (insn);
32022 switch (type)
32024 case TYPE_CR_LOGICAL:
32025 case TYPE_DELAYED_CR:
32026 case TYPE_MFCR:
32027 case TYPE_MFCRF:
32028 case TYPE_MTCR:
32029 case TYPE_SYNC:
32030 case TYPE_ISYNC:
32031 case TYPE_LOAD_L:
32032 case TYPE_STORE_C:
32033 case TYPE_VECSTORE:
32034 case TYPE_MFJMPR:
32035 case TYPE_MTJMPR:
32036 return true;
32037 case TYPE_SHIFT:
32038 case TYPE_EXTS:
32039 case TYPE_MUL:
32040 if (get_attr_dot (insn) == DOT_YES)
32041 return true;
32042 else
32043 break;
32044 case TYPE_LOAD:
32045 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32046 || get_attr_update (insn) == UPDATE_YES)
32047 return true;
32048 else
32049 break;
32050 case TYPE_STORE:
32051 if (get_attr_update (insn) == UPDATE_YES
32052 && get_attr_indexed (insn) == INDEXED_YES)
32053 return true;
32054 else
32055 break;
32056 default:
32057 break;
32059 break;
32060 default:
32061 break;
32064 return false;
32067 static bool
32068 insn_must_be_last_in_group (rtx_insn *insn)
32070 enum attr_type type;
32072 if (!insn
32073 || NOTE_P (insn)
32074 || DEBUG_INSN_P (insn)
32075 || GET_CODE (PATTERN (insn)) == USE
32076 || GET_CODE (PATTERN (insn)) == CLOBBER)
32077 return false;
32079 switch (rs6000_tune) {
32080 case PROCESSOR_POWER4:
32081 case PROCESSOR_POWER5:
32082 if (is_microcoded_insn (insn))
32083 return true;
32085 if (is_branch_slot_insn (insn))
32086 return true;
32088 break;
32089 case PROCESSOR_POWER6:
32090 type = get_attr_type (insn);
32092 switch (type)
32094 case TYPE_EXTS:
32095 case TYPE_CNTLZ:
32096 case TYPE_TRAP:
32097 case TYPE_MUL:
32098 case TYPE_FPCOMPARE:
32099 case TYPE_MFCR:
32100 case TYPE_MTCR:
32101 case TYPE_MFJMPR:
32102 case TYPE_MTJMPR:
32103 case TYPE_ISYNC:
32104 case TYPE_SYNC:
32105 case TYPE_LOAD_L:
32106 case TYPE_STORE_C:
32107 return true;
32108 case TYPE_SHIFT:
32109 if (get_attr_dot (insn) == DOT_NO
32110 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
32111 return true;
32112 else
32113 break;
32114 case TYPE_DIV:
32115 if (get_attr_size (insn) == SIZE_32)
32116 return true;
32117 else
32118 break;
32119 default:
32120 break;
32122 break;
32123 case PROCESSOR_POWER7:
32124 type = get_attr_type (insn);
32126 switch (type)
32128 case TYPE_ISYNC:
32129 case TYPE_SYNC:
32130 case TYPE_LOAD_L:
32131 case TYPE_STORE_C:
32132 return true;
32133 case TYPE_LOAD:
32134 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32135 && get_attr_update (insn) == UPDATE_YES)
32136 return true;
32137 else
32138 break;
32139 case TYPE_STORE:
32140 if (get_attr_update (insn) == UPDATE_YES
32141 && get_attr_indexed (insn) == INDEXED_YES)
32142 return true;
32143 else
32144 break;
32145 default:
32146 break;
32148 break;
32149 case PROCESSOR_POWER8:
32150 type = get_attr_type (insn);
32152 switch (type)
32154 case TYPE_MFCR:
32155 case TYPE_MTCR:
32156 case TYPE_ISYNC:
32157 case TYPE_SYNC:
32158 case TYPE_LOAD_L:
32159 case TYPE_STORE_C:
32160 return true;
32161 case TYPE_LOAD:
32162 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
32163 && get_attr_update (insn) == UPDATE_YES)
32164 return true;
32165 else
32166 break;
32167 case TYPE_STORE:
32168 if (get_attr_update (insn) == UPDATE_YES
32169 && get_attr_indexed (insn) == INDEXED_YES)
32170 return true;
32171 else
32172 break;
32173 default:
32174 break;
32176 break;
32177 default:
32178 break;
32181 return false;
32184 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
32185 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
32187 static bool
32188 is_costly_group (rtx *group_insns, rtx next_insn)
32190 int i;
32191 int issue_rate = rs6000_issue_rate ();
32193 for (i = 0; i < issue_rate; i++)
32195 sd_iterator_def sd_it;
32196 dep_t dep;
32197 rtx insn = group_insns[i];
32199 if (!insn)
32200 continue;
32202 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
32204 rtx next = DEP_CON (dep);
32206 if (next == next_insn
32207 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
32208 return true;
32212 return false;
32215 /* Utility of the function redefine_groups.
32216 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
32217 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
32218 to keep it "far" (in a separate group) from GROUP_INSNS, following
32219 one of the following schemes, depending on the value of the flag
32220 -minsert_sched_nops = X:
32221 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
32222 in order to force NEXT_INSN into a separate group.
32223 (2) X < sched_finish_regroup_exact: insert exactly X nops.
32224 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
32225 insertion (has a group just ended, how many vacant issue slots remain in the
32226 last group, and how many dispatch groups were encountered so far). */
32228 static int
32229 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
32230 rtx_insn *next_insn, bool *group_end, int can_issue_more,
32231 int *group_count)
32233 rtx nop;
32234 bool force;
32235 int issue_rate = rs6000_issue_rate ();
32236 bool end = *group_end;
32237 int i;
32239 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
32240 return can_issue_more;
32242 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
32243 return can_issue_more;
32245 force = is_costly_group (group_insns, next_insn);
32246 if (!force)
32247 return can_issue_more;
32249 if (sched_verbose > 6)
32250 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
32251 *group_count ,can_issue_more);
32253 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
32255 if (*group_end)
32256 can_issue_more = 0;
32258 /* Since only a branch can be issued in the last issue_slot, it is
32259 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
32260 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
32261 in this case the last nop will start a new group and the branch
32262 will be forced to the new group. */
32263 if (can_issue_more && !is_branch_slot_insn (next_insn))
32264 can_issue_more--;
32266 /* Do we have a special group ending nop? */
32267 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
32268 || rs6000_tune == PROCESSOR_POWER8)
32270 nop = gen_group_ending_nop ();
32271 emit_insn_before (nop, next_insn);
32272 can_issue_more = 0;
32274 else
32275 while (can_issue_more > 0)
32277 nop = gen_nop ();
32278 emit_insn_before (nop, next_insn);
32279 can_issue_more--;
32282 *group_end = true;
32283 return 0;
32286 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
32288 int n_nops = rs6000_sched_insert_nops;
32290 /* Nops can't be issued from the branch slot, so the effective
32291 issue_rate for nops is 'issue_rate - 1'. */
32292 if (can_issue_more == 0)
32293 can_issue_more = issue_rate;
32294 can_issue_more--;
32295 if (can_issue_more == 0)
32297 can_issue_more = issue_rate - 1;
32298 (*group_count)++;
32299 end = true;
32300 for (i = 0; i < issue_rate; i++)
32302 group_insns[i] = 0;
32306 while (n_nops > 0)
32308 nop = gen_nop ();
32309 emit_insn_before (nop, next_insn);
32310 if (can_issue_more == issue_rate - 1) /* new group begins */
32311 end = false;
32312 can_issue_more--;
32313 if (can_issue_more == 0)
32315 can_issue_more = issue_rate - 1;
32316 (*group_count)++;
32317 end = true;
32318 for (i = 0; i < issue_rate; i++)
32320 group_insns[i] = 0;
32323 n_nops--;
32326 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
32327 can_issue_more++;
32329 /* Is next_insn going to start a new group? */
32330 *group_end
32331 = (end
32332 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32333 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32334 || (can_issue_more < issue_rate &&
32335 insn_terminates_group_p (next_insn, previous_group)));
32336 if (*group_end && end)
32337 (*group_count)--;
32339 if (sched_verbose > 6)
32340 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
32341 *group_count, can_issue_more);
32342 return can_issue_more;
32345 return can_issue_more;
32348 /* This function tries to synch the dispatch groups that the compiler "sees"
32349 with the dispatch groups that the processor dispatcher is expected to
32350 form in practice. It tries to achieve this synchronization by forcing the
32351 estimated processor grouping on the compiler (as opposed to the function
32352 'pad_goups' which tries to force the scheduler's grouping on the processor).
32354 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
32355 examines the (estimated) dispatch groups that will be formed by the processor
32356 dispatcher. It marks these group boundaries to reflect the estimated
32357 processor grouping, overriding the grouping that the scheduler had marked.
32358 Depending on the value of the flag '-minsert-sched-nops' this function can
32359 force certain insns into separate groups or force a certain distance between
32360 them by inserting nops, for example, if there exists a "costly dependence"
32361 between the insns.
32363 The function estimates the group boundaries that the processor will form as
32364 follows: It keeps track of how many vacant issue slots are available after
32365 each insn. A subsequent insn will start a new group if one of the following
32366 4 cases applies:
32367 - no more vacant issue slots remain in the current dispatch group.
32368 - only the last issue slot, which is the branch slot, is vacant, but the next
32369 insn is not a branch.
32370 - only the last 2 or less issue slots, including the branch slot, are vacant,
32371 which means that a cracked insn (which occupies two issue slots) can't be
32372 issued in this group.
32373 - less than 'issue_rate' slots are vacant, and the next insn always needs to
32374 start a new group. */
32376 static int
32377 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32378 rtx_insn *tail)
32380 rtx_insn *insn, *next_insn;
32381 int issue_rate;
32382 int can_issue_more;
32383 int slot, i;
32384 bool group_end;
32385 int group_count = 0;
32386 rtx *group_insns;
32388 /* Initialize. */
32389 issue_rate = rs6000_issue_rate ();
32390 group_insns = XALLOCAVEC (rtx, issue_rate);
32391 for (i = 0; i < issue_rate; i++)
32393 group_insns[i] = 0;
32395 can_issue_more = issue_rate;
32396 slot = 0;
32397 insn = get_next_active_insn (prev_head_insn, tail);
32398 group_end = false;
32400 while (insn != NULL_RTX)
32402 slot = (issue_rate - can_issue_more);
32403 group_insns[slot] = insn;
32404 can_issue_more =
32405 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32406 if (insn_terminates_group_p (insn, current_group))
32407 can_issue_more = 0;
32409 next_insn = get_next_active_insn (insn, tail);
32410 if (next_insn == NULL_RTX)
32411 return group_count + 1;
32413 /* Is next_insn going to start a new group? */
32414 group_end
32415 = (can_issue_more == 0
32416 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
32417 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
32418 || (can_issue_more < issue_rate &&
32419 insn_terminates_group_p (next_insn, previous_group)));
32421 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
32422 next_insn, &group_end, can_issue_more,
32423 &group_count);
32425 if (group_end)
32427 group_count++;
32428 can_issue_more = 0;
32429 for (i = 0; i < issue_rate; i++)
32431 group_insns[i] = 0;
32435 if (GET_MODE (next_insn) == TImode && can_issue_more)
32436 PUT_MODE (next_insn, VOIDmode);
32437 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
32438 PUT_MODE (next_insn, TImode);
32440 insn = next_insn;
32441 if (can_issue_more == 0)
32442 can_issue_more = issue_rate;
32443 } /* while */
32445 return group_count;
32448 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
32449 dispatch group boundaries that the scheduler had marked. Pad with nops
32450 any dispatch groups which have vacant issue slots, in order to force the
32451 scheduler's grouping on the processor dispatcher. The function
32452 returns the number of dispatch groups found. */
32454 static int
32455 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
32456 rtx_insn *tail)
32458 rtx_insn *insn, *next_insn;
32459 rtx nop;
32460 int issue_rate;
32461 int can_issue_more;
32462 int group_end;
32463 int group_count = 0;
32465 /* Initialize issue_rate. */
32466 issue_rate = rs6000_issue_rate ();
32467 can_issue_more = issue_rate;
32469 insn = get_next_active_insn (prev_head_insn, tail);
32470 next_insn = get_next_active_insn (insn, tail);
32472 while (insn != NULL_RTX)
32474 can_issue_more =
32475 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
32477 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
32479 if (next_insn == NULL_RTX)
32480 break;
32482 if (group_end)
32484 /* If the scheduler had marked group termination at this location
32485 (between insn and next_insn), and neither insn nor next_insn will
32486 force group termination, pad the group with nops to force group
32487 termination. */
32488 if (can_issue_more
32489 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
32490 && !insn_terminates_group_p (insn, current_group)
32491 && !insn_terminates_group_p (next_insn, previous_group))
32493 if (!is_branch_slot_insn (next_insn))
32494 can_issue_more--;
32496 while (can_issue_more)
32498 nop = gen_nop ();
32499 emit_insn_before (nop, next_insn);
32500 can_issue_more--;
32504 can_issue_more = issue_rate;
32505 group_count++;
32508 insn = next_insn;
32509 next_insn = get_next_active_insn (insn, tail);
32512 return group_count;
32515 /* We're beginning a new block. Initialize data structures as necessary. */
32517 static void
32518 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
32519 int sched_verbose ATTRIBUTE_UNUSED,
32520 int max_ready ATTRIBUTE_UNUSED)
32522 last_scheduled_insn = NULL;
32523 load_store_pendulum = 0;
32524 divide_cnt = 0;
32525 vec_pairing = 0;
32528 /* The following function is called at the end of scheduling BB.
32529 After reload, it inserts nops at insn group bundling. */
32531 static void
32532 rs6000_sched_finish (FILE *dump, int sched_verbose)
32534 int n_groups;
32536 if (sched_verbose)
32537 fprintf (dump, "=== Finishing schedule.\n");
32539 if (reload_completed && rs6000_sched_groups)
32541 /* Do not run sched_finish hook when selective scheduling enabled. */
32542 if (sel_sched_p ())
32543 return;
32545 if (rs6000_sched_insert_nops == sched_finish_none)
32546 return;
32548 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
32549 n_groups = pad_groups (dump, sched_verbose,
32550 current_sched_info->prev_head,
32551 current_sched_info->next_tail);
32552 else
32553 n_groups = redefine_groups (dump, sched_verbose,
32554 current_sched_info->prev_head,
32555 current_sched_info->next_tail);
32557 if (sched_verbose >= 6)
32559 fprintf (dump, "ngroups = %d\n", n_groups);
32560 print_rtl (dump, current_sched_info->prev_head);
32561 fprintf (dump, "Done finish_sched\n");
32566 struct rs6000_sched_context
32568 short cached_can_issue_more;
32569 rtx_insn *last_scheduled_insn;
32570 int load_store_pendulum;
32571 int divide_cnt;
32572 int vec_pairing;
32575 typedef struct rs6000_sched_context rs6000_sched_context_def;
32576 typedef rs6000_sched_context_def *rs6000_sched_context_t;
32578 /* Allocate store for new scheduling context. */
32579 static void *
32580 rs6000_alloc_sched_context (void)
32582 return xmalloc (sizeof (rs6000_sched_context_def));
32585 /* If CLEAN_P is true then initializes _SC with clean data,
32586 and from the global context otherwise. */
32587 static void
32588 rs6000_init_sched_context (void *_sc, bool clean_p)
32590 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32592 if (clean_p)
32594 sc->cached_can_issue_more = 0;
32595 sc->last_scheduled_insn = NULL;
32596 sc->load_store_pendulum = 0;
32597 sc->divide_cnt = 0;
32598 sc->vec_pairing = 0;
32600 else
32602 sc->cached_can_issue_more = cached_can_issue_more;
32603 sc->last_scheduled_insn = last_scheduled_insn;
32604 sc->load_store_pendulum = load_store_pendulum;
32605 sc->divide_cnt = divide_cnt;
32606 sc->vec_pairing = vec_pairing;
32610 /* Sets the global scheduling context to the one pointed to by _SC. */
32611 static void
32612 rs6000_set_sched_context (void *_sc)
32614 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
32616 gcc_assert (sc != NULL);
32618 cached_can_issue_more = sc->cached_can_issue_more;
32619 last_scheduled_insn = sc->last_scheduled_insn;
32620 load_store_pendulum = sc->load_store_pendulum;
32621 divide_cnt = sc->divide_cnt;
32622 vec_pairing = sc->vec_pairing;
32625 /* Free _SC. */
32626 static void
32627 rs6000_free_sched_context (void *_sc)
32629 gcc_assert (_sc != NULL);
32631 free (_sc);
32634 static bool
32635 rs6000_sched_can_speculate_insn (rtx_insn *insn)
32637 switch (get_attr_type (insn))
32639 case TYPE_DIV:
32640 case TYPE_SDIV:
32641 case TYPE_DDIV:
32642 case TYPE_VECDIV:
32643 case TYPE_SSQRT:
32644 case TYPE_DSQRT:
32645 return false;
32647 default:
32648 return true;
32652 /* Length in units of the trampoline for entering a nested function. */
32655 rs6000_trampoline_size (void)
32657 int ret = 0;
32659 switch (DEFAULT_ABI)
32661 default:
32662 gcc_unreachable ();
32664 case ABI_AIX:
32665 ret = (TARGET_32BIT) ? 12 : 24;
32666 break;
32668 case ABI_ELFv2:
32669 gcc_assert (!TARGET_32BIT);
32670 ret = 32;
32671 break;
32673 case ABI_DARWIN:
32674 case ABI_V4:
32675 ret = (TARGET_32BIT) ? 40 : 48;
32676 break;
32679 return ret;
32682 /* Emit RTL insns to initialize the variable parts of a trampoline.
32683 FNADDR is an RTX for the address of the function's pure code.
32684 CXT is an RTX for the static chain value for the function. */
32686 static void
32687 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
32689 int regsize = (TARGET_32BIT) ? 4 : 8;
32690 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
32691 rtx ctx_reg = force_reg (Pmode, cxt);
32692 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
32694 switch (DEFAULT_ABI)
32696 default:
32697 gcc_unreachable ();
32699 /* Under AIX, just build the 3 word function descriptor */
32700 case ABI_AIX:
32702 rtx fnmem, fn_reg, toc_reg;
32704 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32705 error ("you cannot take the address of a nested function if you use "
32706 "the %qs option", "-mno-pointers-to-nested-functions");
32708 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
32709 fn_reg = gen_reg_rtx (Pmode);
32710 toc_reg = gen_reg_rtx (Pmode);
32712 /* Macro to shorten the code expansions below. */
32713 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
32715 m_tramp = replace_equiv_address (m_tramp, addr);
32717 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
32718 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
32719 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
32720 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
32721 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
32723 # undef MEM_PLUS
32725 break;
32727 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
32728 case ABI_ELFv2:
32729 case ABI_DARWIN:
32730 case ABI_V4:
32731 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
32732 LCT_NORMAL, VOIDmode,
32733 addr, Pmode,
32734 GEN_INT (rs6000_trampoline_size ()), SImode,
32735 fnaddr, Pmode,
32736 ctx_reg, Pmode);
32737 break;
32742 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
32743 identifier as an argument, so the front end shouldn't look it up. */
32745 static bool
32746 rs6000_attribute_takes_identifier_p (const_tree attr_id)
32748 return is_attribute_p ("altivec", attr_id);
32751 /* Handle the "altivec" attribute. The attribute may have
32752 arguments as follows:
32754 __attribute__((altivec(vector__)))
32755 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
32756 __attribute__((altivec(bool__))) (always followed by 'unsigned')
32758 and may appear more than once (e.g., 'vector bool char') in a
32759 given declaration. */
32761 static tree
32762 rs6000_handle_altivec_attribute (tree *node,
32763 tree name ATTRIBUTE_UNUSED,
32764 tree args,
32765 int flags ATTRIBUTE_UNUSED,
32766 bool *no_add_attrs)
32768 tree type = *node, result = NULL_TREE;
32769 machine_mode mode;
32770 int unsigned_p;
32771 char altivec_type
32772 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
32773 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
32774 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
32775 : '?');
32777 while (POINTER_TYPE_P (type)
32778 || TREE_CODE (type) == FUNCTION_TYPE
32779 || TREE_CODE (type) == METHOD_TYPE
32780 || TREE_CODE (type) == ARRAY_TYPE)
32781 type = TREE_TYPE (type);
32783 mode = TYPE_MODE (type);
32785 /* Check for invalid AltiVec type qualifiers. */
32786 if (type == long_double_type_node)
32787 error ("use of %<long double%> in AltiVec types is invalid");
32788 else if (type == boolean_type_node)
32789 error ("use of boolean types in AltiVec types is invalid");
32790 else if (TREE_CODE (type) == COMPLEX_TYPE)
32791 error ("use of %<complex%> in AltiVec types is invalid");
32792 else if (DECIMAL_FLOAT_MODE_P (mode))
32793 error ("use of decimal floating point types in AltiVec types is invalid");
32794 else if (!TARGET_VSX)
32796 if (type == long_unsigned_type_node || type == long_integer_type_node)
32798 if (TARGET_64BIT)
32799 error ("use of %<long%> in AltiVec types is invalid for "
32800 "64-bit code without %qs", "-mvsx");
32801 else if (rs6000_warn_altivec_long)
32802 warning (0, "use of %<long%> in AltiVec types is deprecated; "
32803 "use %<int%>");
32805 else if (type == long_long_unsigned_type_node
32806 || type == long_long_integer_type_node)
32807 error ("use of %<long long%> in AltiVec types is invalid without %qs",
32808 "-mvsx");
32809 else if (type == double_type_node)
32810 error ("use of %<double%> in AltiVec types is invalid without %qs",
32811 "-mvsx");
32814 switch (altivec_type)
32816 case 'v':
32817 unsigned_p = TYPE_UNSIGNED (type);
32818 switch (mode)
32820 case E_TImode:
32821 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
32822 break;
32823 case E_DImode:
32824 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
32825 break;
32826 case E_SImode:
32827 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
32828 break;
32829 case E_HImode:
32830 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
32831 break;
32832 case E_QImode:
32833 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
32834 break;
32835 case E_SFmode: result = V4SF_type_node; break;
32836 case E_DFmode: result = V2DF_type_node; break;
32837 /* If the user says 'vector int bool', we may be handed the 'bool'
32838 attribute _before_ the 'vector' attribute, and so select the
32839 proper type in the 'b' case below. */
32840 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
32841 case E_V2DImode: case E_V2DFmode:
32842 result = type;
32843 default: break;
32845 break;
32846 case 'b':
32847 switch (mode)
32849 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
32850 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
32851 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
32852 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
32853 default: break;
32855 break;
32856 case 'p':
32857 switch (mode)
32859 case E_V8HImode: result = pixel_V8HI_type_node;
32860 default: break;
32862 default: break;
32865 /* Propagate qualifiers attached to the element type
32866 onto the vector type. */
32867 if (result && result != type && TYPE_QUALS (type))
32868 result = build_qualified_type (result, TYPE_QUALS (type));
32870 *no_add_attrs = true; /* No need to hang on to the attribute. */
32872 if (result)
32873 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
32875 return NULL_TREE;
32878 /* AltiVec defines four built-in scalar types that serve as vector
32879 elements; we must teach the compiler how to mangle them. */
32881 static const char *
32882 rs6000_mangle_type (const_tree type)
32884 type = TYPE_MAIN_VARIANT (type);
32886 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
32887 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
32888 return NULL;
32890 if (type == bool_char_type_node) return "U6__boolc";
32891 if (type == bool_short_type_node) return "U6__bools";
32892 if (type == pixel_type_node) return "u7__pixel";
32893 if (type == bool_int_type_node) return "U6__booli";
32894 if (type == bool_long_type_node) return "U6__booll";
32896 /* Use a unique name for __float128 rather than trying to use "e" or "g". Use
32897 "g" for IBM extended double, no matter whether it is long double (using
32898 -mabi=ibmlongdouble) or the distinct __ibm128 type. */
32899 if (TARGET_FLOAT128_TYPE)
32901 if (type == ieee128_float_type_node)
32902 return "U10__float128";
32904 if (TARGET_LONG_DOUBLE_128)
32906 if (type == long_double_type_node)
32907 return (TARGET_IEEEQUAD) ? "U10__float128" : "g";
32909 if (type == ibm128_float_type_node)
32910 return "g";
32914 /* Mangle IBM extended float long double as `g' (__float128) on
32915 powerpc*-linux where long-double-64 previously was the default. */
32916 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
32917 && TARGET_ELF
32918 && TARGET_LONG_DOUBLE_128
32919 && !TARGET_IEEEQUAD)
32920 return "g";
32922 /* For all other types, use normal C++ mangling. */
32923 return NULL;
32926 /* Handle a "longcall" or "shortcall" attribute; arguments as in
32927 struct attribute_spec.handler. */
32929 static tree
32930 rs6000_handle_longcall_attribute (tree *node, tree name,
32931 tree args ATTRIBUTE_UNUSED,
32932 int flags ATTRIBUTE_UNUSED,
32933 bool *no_add_attrs)
32935 if (TREE_CODE (*node) != FUNCTION_TYPE
32936 && TREE_CODE (*node) != FIELD_DECL
32937 && TREE_CODE (*node) != TYPE_DECL)
32939 warning (OPT_Wattributes, "%qE attribute only applies to functions",
32940 name);
32941 *no_add_attrs = true;
32944 return NULL_TREE;
32947 /* Set longcall attributes on all functions declared when
32948 rs6000_default_long_calls is true. */
32949 static void
32950 rs6000_set_default_type_attributes (tree type)
32952 if (rs6000_default_long_calls
32953 && (TREE_CODE (type) == FUNCTION_TYPE
32954 || TREE_CODE (type) == METHOD_TYPE))
32955 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
32956 NULL_TREE,
32957 TYPE_ATTRIBUTES (type));
32959 #if TARGET_MACHO
32960 darwin_set_default_type_attributes (type);
32961 #endif
32964 /* Return a reference suitable for calling a function with the
32965 longcall attribute. */
32968 rs6000_longcall_ref (rtx call_ref)
32970 const char *call_name;
32971 tree node;
32973 if (GET_CODE (call_ref) != SYMBOL_REF)
32974 return call_ref;
32976 /* System V adds '.' to the internal name, so skip them. */
32977 call_name = XSTR (call_ref, 0);
32978 if (*call_name == '.')
32980 while (*call_name == '.')
32981 call_name++;
32983 node = get_identifier (call_name);
32984 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
32987 return force_reg (Pmode, call_ref);
32990 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
32991 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
32992 #endif
32994 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
32995 struct attribute_spec.handler. */
32996 static tree
32997 rs6000_handle_struct_attribute (tree *node, tree name,
32998 tree args ATTRIBUTE_UNUSED,
32999 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
33001 tree *type = NULL;
33002 if (DECL_P (*node))
33004 if (TREE_CODE (*node) == TYPE_DECL)
33005 type = &TREE_TYPE (*node);
33007 else
33008 type = node;
33010 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
33011 || TREE_CODE (*type) == UNION_TYPE)))
33013 warning (OPT_Wattributes, "%qE attribute ignored", name);
33014 *no_add_attrs = true;
33017 else if ((is_attribute_p ("ms_struct", name)
33018 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
33019 || ((is_attribute_p ("gcc_struct", name)
33020 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
33022 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
33023 name);
33024 *no_add_attrs = true;
33027 return NULL_TREE;
33030 static bool
33031 rs6000_ms_bitfield_layout_p (const_tree record_type)
33033 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
33034 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
33035 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
33038 #ifdef USING_ELFOS_H
33040 /* A get_unnamed_section callback, used for switching to toc_section. */
33042 static void
33043 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33045 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33046 && TARGET_MINIMAL_TOC)
33048 if (!toc_initialized)
33050 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
33051 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33052 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
33053 fprintf (asm_out_file, "\t.tc ");
33054 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
33055 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33056 fprintf (asm_out_file, "\n");
33058 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33059 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33060 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33061 fprintf (asm_out_file, " = .+32768\n");
33062 toc_initialized = 1;
33064 else
33065 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33067 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33069 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
33070 if (!toc_initialized)
33072 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33073 toc_initialized = 1;
33076 else
33078 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33079 if (!toc_initialized)
33081 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
33082 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
33083 fprintf (asm_out_file, " = .+32768\n");
33084 toc_initialized = 1;
33089 /* Implement TARGET_ASM_INIT_SECTIONS. */
33091 static void
33092 rs6000_elf_asm_init_sections (void)
33094 toc_section
33095 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
33097 sdata2_section
33098 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
33099 SDATA2_SECTION_ASM_OP);
33102 /* Implement TARGET_SELECT_RTX_SECTION. */
33104 static section *
33105 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
33106 unsigned HOST_WIDE_INT align)
33108 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
33109 return toc_section;
33110 else
33111 return default_elf_select_rtx_section (mode, x, align);
33114 /* For a SYMBOL_REF, set generic flags and then perform some
33115 target-specific processing.
33117 When the AIX ABI is requested on a non-AIX system, replace the
33118 function name with the real name (with a leading .) rather than the
33119 function descriptor name. This saves a lot of overriding code to
33120 read the prefixes. */
33122 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
33123 static void
33124 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
33126 default_encode_section_info (decl, rtl, first);
33128 if (first
33129 && TREE_CODE (decl) == FUNCTION_DECL
33130 && !TARGET_AIX
33131 && DEFAULT_ABI == ABI_AIX)
33133 rtx sym_ref = XEXP (rtl, 0);
33134 size_t len = strlen (XSTR (sym_ref, 0));
33135 char *str = XALLOCAVEC (char, len + 2);
33136 str[0] = '.';
33137 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
33138 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
33142 static inline bool
33143 compare_section_name (const char *section, const char *templ)
33145 int len;
33147 len = strlen (templ);
33148 return (strncmp (section, templ, len) == 0
33149 && (section[len] == 0 || section[len] == '.'));
33152 bool
33153 rs6000_elf_in_small_data_p (const_tree decl)
33155 if (rs6000_sdata == SDATA_NONE)
33156 return false;
33158 /* We want to merge strings, so we never consider them small data. */
33159 if (TREE_CODE (decl) == STRING_CST)
33160 return false;
33162 /* Functions are never in the small data area. */
33163 if (TREE_CODE (decl) == FUNCTION_DECL)
33164 return false;
33166 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
33168 const char *section = DECL_SECTION_NAME (decl);
33169 if (compare_section_name (section, ".sdata")
33170 || compare_section_name (section, ".sdata2")
33171 || compare_section_name (section, ".gnu.linkonce.s")
33172 || compare_section_name (section, ".sbss")
33173 || compare_section_name (section, ".sbss2")
33174 || compare_section_name (section, ".gnu.linkonce.sb")
33175 || strcmp (section, ".PPC.EMB.sdata0") == 0
33176 || strcmp (section, ".PPC.EMB.sbss0") == 0)
33177 return true;
33179 else
33181 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
33183 if (size > 0
33184 && size <= g_switch_value
33185 /* If it's not public, and we're not going to reference it there,
33186 there's no need to put it in the small data section. */
33187 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
33188 return true;
33191 return false;
33194 #endif /* USING_ELFOS_H */
33196 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
33198 static bool
33199 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
33201 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
33204 /* Do not place thread-local symbols refs in the object blocks. */
33206 static bool
33207 rs6000_use_blocks_for_decl_p (const_tree decl)
33209 return !DECL_THREAD_LOCAL_P (decl);
33212 /* Return a REG that occurs in ADDR with coefficient 1.
33213 ADDR can be effectively incremented by incrementing REG.
33215 r0 is special and we must not select it as an address
33216 register by this routine since our caller will try to
33217 increment the returned register via an "la" instruction. */
33220 find_addr_reg (rtx addr)
33222 while (GET_CODE (addr) == PLUS)
33224 if (GET_CODE (XEXP (addr, 0)) == REG
33225 && REGNO (XEXP (addr, 0)) != 0)
33226 addr = XEXP (addr, 0);
33227 else if (GET_CODE (XEXP (addr, 1)) == REG
33228 && REGNO (XEXP (addr, 1)) != 0)
33229 addr = XEXP (addr, 1);
33230 else if (CONSTANT_P (XEXP (addr, 0)))
33231 addr = XEXP (addr, 1);
33232 else if (CONSTANT_P (XEXP (addr, 1)))
33233 addr = XEXP (addr, 0);
33234 else
33235 gcc_unreachable ();
33237 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
33238 return addr;
33241 void
33242 rs6000_fatal_bad_address (rtx op)
33244 fatal_insn ("bad address", op);
33247 #if TARGET_MACHO
33249 typedef struct branch_island_d {
33250 tree function_name;
33251 tree label_name;
33252 int line_number;
33253 } branch_island;
33256 static vec<branch_island, va_gc> *branch_islands;
33258 /* Remember to generate a branch island for far calls to the given
33259 function. */
33261 static void
33262 add_compiler_branch_island (tree label_name, tree function_name,
33263 int line_number)
33265 branch_island bi = {function_name, label_name, line_number};
33266 vec_safe_push (branch_islands, bi);
33269 /* Generate far-jump branch islands for everything recorded in
33270 branch_islands. Invoked immediately after the last instruction of
33271 the epilogue has been emitted; the branch islands must be appended
33272 to, and contiguous with, the function body. Mach-O stubs are
33273 generated in machopic_output_stub(). */
33275 static void
33276 macho_branch_islands (void)
33278 char tmp_buf[512];
33280 while (!vec_safe_is_empty (branch_islands))
33282 branch_island *bi = &branch_islands->last ();
33283 const char *label = IDENTIFIER_POINTER (bi->label_name);
33284 const char *name = IDENTIFIER_POINTER (bi->function_name);
33285 char name_buf[512];
33286 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
33287 if (name[0] == '*' || name[0] == '&')
33288 strcpy (name_buf, name+1);
33289 else
33291 name_buf[0] = '_';
33292 strcpy (name_buf+1, name);
33294 strcpy (tmp_buf, "\n");
33295 strcat (tmp_buf, label);
33296 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33297 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33298 dbxout_stabd (N_SLINE, bi->line_number);
33299 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33300 if (flag_pic)
33302 if (TARGET_LINK_STACK)
33304 char name[32];
33305 get_ppc476_thunk_name (name);
33306 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
33307 strcat (tmp_buf, name);
33308 strcat (tmp_buf, "\n");
33309 strcat (tmp_buf, label);
33310 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33312 else
33314 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
33315 strcat (tmp_buf, label);
33316 strcat (tmp_buf, "_pic\n");
33317 strcat (tmp_buf, label);
33318 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
33321 strcat (tmp_buf, "\taddis r11,r11,ha16(");
33322 strcat (tmp_buf, name_buf);
33323 strcat (tmp_buf, " - ");
33324 strcat (tmp_buf, label);
33325 strcat (tmp_buf, "_pic)\n");
33327 strcat (tmp_buf, "\tmtlr r0\n");
33329 strcat (tmp_buf, "\taddi r12,r11,lo16(");
33330 strcat (tmp_buf, name_buf);
33331 strcat (tmp_buf, " - ");
33332 strcat (tmp_buf, label);
33333 strcat (tmp_buf, "_pic)\n");
33335 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
33337 else
33339 strcat (tmp_buf, ":\nlis r12,hi16(");
33340 strcat (tmp_buf, name_buf);
33341 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
33342 strcat (tmp_buf, name_buf);
33343 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
33345 output_asm_insn (tmp_buf, 0);
33346 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
33347 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
33348 dbxout_stabd (N_SLINE, bi->line_number);
33349 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
33350 branch_islands->pop ();
33354 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
33355 already there or not. */
33357 static int
33358 no_previous_def (tree function_name)
33360 branch_island *bi;
33361 unsigned ix;
33363 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33364 if (function_name == bi->function_name)
33365 return 0;
33366 return 1;
33369 /* GET_PREV_LABEL gets the label name from the previous definition of
33370 the function. */
33372 static tree
33373 get_prev_label (tree function_name)
33375 branch_island *bi;
33376 unsigned ix;
33378 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
33379 if (function_name == bi->function_name)
33380 return bi->label_name;
33381 return NULL_TREE;
33384 /* INSN is either a function call or a millicode call. It may have an
33385 unconditional jump in its delay slot.
33387 CALL_DEST is the routine we are calling. */
33389 char *
33390 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
33391 int cookie_operand_number)
33393 static char buf[256];
33394 if (darwin_emit_branch_islands
33395 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
33396 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
33398 tree labelname;
33399 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
33401 if (no_previous_def (funname))
33403 rtx label_rtx = gen_label_rtx ();
33404 char *label_buf, temp_buf[256];
33405 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
33406 CODE_LABEL_NUMBER (label_rtx));
33407 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
33408 labelname = get_identifier (label_buf);
33409 add_compiler_branch_island (labelname, funname, insn_line (insn));
33411 else
33412 labelname = get_prev_label (funname);
33414 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
33415 instruction will reach 'foo', otherwise link as 'bl L42'".
33416 "L42" should be a 'branch island', that will do a far jump to
33417 'foo'. Branch islands are generated in
33418 macho_branch_islands(). */
33419 sprintf (buf, "jbsr %%z%d,%.246s",
33420 dest_operand_number, IDENTIFIER_POINTER (labelname));
33422 else
33423 sprintf (buf, "bl %%z%d", dest_operand_number);
33424 return buf;
33427 /* Generate PIC and indirect symbol stubs. */
33429 void
33430 machopic_output_stub (FILE *file, const char *symb, const char *stub)
33432 unsigned int length;
33433 char *symbol_name, *lazy_ptr_name;
33434 char *local_label_0;
33435 static int label = 0;
33437 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
33438 symb = (*targetm.strip_name_encoding) (symb);
33441 length = strlen (symb);
33442 symbol_name = XALLOCAVEC (char, length + 32);
33443 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
33445 lazy_ptr_name = XALLOCAVEC (char, length + 32);
33446 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
33448 if (flag_pic == 2)
33449 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
33450 else
33451 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
33453 if (flag_pic == 2)
33455 fprintf (file, "\t.align 5\n");
33457 fprintf (file, "%s:\n", stub);
33458 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33460 label++;
33461 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
33462 sprintf (local_label_0, "\"L%011d$spb\"", label);
33464 fprintf (file, "\tmflr r0\n");
33465 if (TARGET_LINK_STACK)
33467 char name[32];
33468 get_ppc476_thunk_name (name);
33469 fprintf (file, "\tbl %s\n", name);
33470 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33472 else
33474 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
33475 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
33477 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
33478 lazy_ptr_name, local_label_0);
33479 fprintf (file, "\tmtlr r0\n");
33480 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
33481 (TARGET_64BIT ? "ldu" : "lwzu"),
33482 lazy_ptr_name, local_label_0);
33483 fprintf (file, "\tmtctr r12\n");
33484 fprintf (file, "\tbctr\n");
33486 else
33488 fprintf (file, "\t.align 4\n");
33490 fprintf (file, "%s:\n", stub);
33491 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33493 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
33494 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
33495 (TARGET_64BIT ? "ldu" : "lwzu"),
33496 lazy_ptr_name);
33497 fprintf (file, "\tmtctr r12\n");
33498 fprintf (file, "\tbctr\n");
33501 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
33502 fprintf (file, "%s:\n", lazy_ptr_name);
33503 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
33504 fprintf (file, "%sdyld_stub_binding_helper\n",
33505 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
33508 /* Legitimize PIC addresses. If the address is already
33509 position-independent, we return ORIG. Newly generated
33510 position-independent addresses go into a reg. This is REG if non
33511 zero, otherwise we allocate register(s) as necessary. */
33513 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
33516 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
33517 rtx reg)
33519 rtx base, offset;
33521 if (reg == NULL && !reload_completed)
33522 reg = gen_reg_rtx (Pmode);
33524 if (GET_CODE (orig) == CONST)
33526 rtx reg_temp;
33528 if (GET_CODE (XEXP (orig, 0)) == PLUS
33529 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
33530 return orig;
33532 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
33534 /* Use a different reg for the intermediate value, as
33535 it will be marked UNCHANGING. */
33536 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
33537 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
33538 Pmode, reg_temp);
33539 offset =
33540 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
33541 Pmode, reg);
33543 if (GET_CODE (offset) == CONST_INT)
33545 if (SMALL_INT (offset))
33546 return plus_constant (Pmode, base, INTVAL (offset));
33547 else if (!reload_completed)
33548 offset = force_reg (Pmode, offset);
33549 else
33551 rtx mem = force_const_mem (Pmode, orig);
33552 return machopic_legitimize_pic_address (mem, Pmode, reg);
33555 return gen_rtx_PLUS (Pmode, base, offset);
33558 /* Fall back on generic machopic code. */
33559 return machopic_legitimize_pic_address (orig, mode, reg);
33562 /* Output a .machine directive for the Darwin assembler, and call
33563 the generic start_file routine. */
33565 static void
33566 rs6000_darwin_file_start (void)
33568 static const struct
33570 const char *arg;
33571 const char *name;
33572 HOST_WIDE_INT if_set;
33573 } mapping[] = {
33574 { "ppc64", "ppc64", MASK_64BIT },
33575 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
33576 { "power4", "ppc970", 0 },
33577 { "G5", "ppc970", 0 },
33578 { "7450", "ppc7450", 0 },
33579 { "7400", "ppc7400", MASK_ALTIVEC },
33580 { "G4", "ppc7400", 0 },
33581 { "750", "ppc750", 0 },
33582 { "740", "ppc750", 0 },
33583 { "G3", "ppc750", 0 },
33584 { "604e", "ppc604e", 0 },
33585 { "604", "ppc604", 0 },
33586 { "603e", "ppc603", 0 },
33587 { "603", "ppc603", 0 },
33588 { "601", "ppc601", 0 },
33589 { NULL, "ppc", 0 } };
33590 const char *cpu_id = "";
33591 size_t i;
33593 rs6000_file_start ();
33594 darwin_file_start ();
33596 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
33598 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
33599 cpu_id = rs6000_default_cpu;
33601 if (global_options_set.x_rs6000_cpu_index)
33602 cpu_id = processor_target_table[rs6000_cpu_index].name;
33604 /* Look through the mapping array. Pick the first name that either
33605 matches the argument, has a bit set in IF_SET that is also set
33606 in the target flags, or has a NULL name. */
33608 i = 0;
33609 while (mapping[i].arg != NULL
33610 && strcmp (mapping[i].arg, cpu_id) != 0
33611 && (mapping[i].if_set & rs6000_isa_flags) == 0)
33612 i++;
33614 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
33617 #endif /* TARGET_MACHO */
33619 #if TARGET_ELF
33620 static int
33621 rs6000_elf_reloc_rw_mask (void)
33623 if (flag_pic)
33624 return 3;
33625 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
33626 return 2;
33627 else
33628 return 0;
33631 /* Record an element in the table of global constructors. SYMBOL is
33632 a SYMBOL_REF of the function to be called; PRIORITY is a number
33633 between 0 and MAX_INIT_PRIORITY.
33635 This differs from default_named_section_asm_out_constructor in
33636 that we have special handling for -mrelocatable. */
33638 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
33639 static void
33640 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
33642 const char *section = ".ctors";
33643 char buf[18];
33645 if (priority != DEFAULT_INIT_PRIORITY)
33647 sprintf (buf, ".ctors.%.5u",
33648 /* Invert the numbering so the linker puts us in the proper
33649 order; constructors are run from right to left, and the
33650 linker sorts in increasing order. */
33651 MAX_INIT_PRIORITY - priority);
33652 section = buf;
33655 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33656 assemble_align (POINTER_SIZE);
33658 if (DEFAULT_ABI == ABI_V4
33659 && (TARGET_RELOCATABLE || flag_pic > 1))
33661 fputs ("\t.long (", asm_out_file);
33662 output_addr_const (asm_out_file, symbol);
33663 fputs (")@fixup\n", asm_out_file);
33665 else
33666 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33669 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
33670 static void
33671 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
33673 const char *section = ".dtors";
33674 char buf[18];
33676 if (priority != DEFAULT_INIT_PRIORITY)
33678 sprintf (buf, ".dtors.%.5u",
33679 /* Invert the numbering so the linker puts us in the proper
33680 order; constructors are run from right to left, and the
33681 linker sorts in increasing order. */
33682 MAX_INIT_PRIORITY - priority);
33683 section = buf;
33686 switch_to_section (get_section (section, SECTION_WRITE, NULL));
33687 assemble_align (POINTER_SIZE);
33689 if (DEFAULT_ABI == ABI_V4
33690 && (TARGET_RELOCATABLE || flag_pic > 1))
33692 fputs ("\t.long (", asm_out_file);
33693 output_addr_const (asm_out_file, symbol);
33694 fputs (")@fixup\n", asm_out_file);
33696 else
33697 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
33700 void
33701 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
33703 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
33705 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
33706 ASM_OUTPUT_LABEL (file, name);
33707 fputs (DOUBLE_INT_ASM_OP, file);
33708 rs6000_output_function_entry (file, name);
33709 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
33710 if (DOT_SYMBOLS)
33712 fputs ("\t.size\t", file);
33713 assemble_name (file, name);
33714 fputs (",24\n\t.type\t.", file);
33715 assemble_name (file, name);
33716 fputs (",@function\n", file);
33717 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
33719 fputs ("\t.globl\t.", file);
33720 assemble_name (file, name);
33721 putc ('\n', file);
33724 else
33725 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33726 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33727 rs6000_output_function_entry (file, name);
33728 fputs (":\n", file);
33729 return;
33732 int uses_toc;
33733 if (DEFAULT_ABI == ABI_V4
33734 && (TARGET_RELOCATABLE || flag_pic > 1)
33735 && !TARGET_SECURE_PLT
33736 && (!constant_pool_empty_p () || crtl->profile)
33737 && (uses_toc = uses_TOC ()))
33739 char buf[256];
33741 if (uses_toc == 2)
33742 switch_to_other_text_partition ();
33743 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33745 fprintf (file, "\t.long ");
33746 assemble_name (file, toc_label_name);
33747 need_toc_init = 1;
33748 putc ('-', file);
33749 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33750 assemble_name (file, buf);
33751 putc ('\n', file);
33752 if (uses_toc == 2)
33753 switch_to_other_text_partition ();
33756 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
33757 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
33759 if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ())
33761 char buf[256];
33763 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
33765 fprintf (file, "\t.quad .TOC.-");
33766 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
33767 assemble_name (file, buf);
33768 putc ('\n', file);
33771 if (DEFAULT_ABI == ABI_AIX)
33773 const char *desc_name, *orig_name;
33775 orig_name = (*targetm.strip_name_encoding) (name);
33776 desc_name = orig_name;
33777 while (*desc_name == '.')
33778 desc_name++;
33780 if (TREE_PUBLIC (decl))
33781 fprintf (file, "\t.globl %s\n", desc_name);
33783 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
33784 fprintf (file, "%s:\n", desc_name);
33785 fprintf (file, "\t.long %s\n", orig_name);
33786 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
33787 fputs ("\t.long 0\n", file);
33788 fprintf (file, "\t.previous\n");
33790 ASM_OUTPUT_LABEL (file, name);
33793 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
33794 static void
33795 rs6000_elf_file_end (void)
33797 #ifdef HAVE_AS_GNU_ATTRIBUTE
33798 /* ??? The value emitted depends on options active at file end.
33799 Assume anyone using #pragma or attributes that might change
33800 options knows what they are doing. */
33801 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
33802 && rs6000_passes_float)
33804 int fp;
33806 if (TARGET_DF_FPR)
33807 fp = 1;
33808 else if (TARGET_SF_FPR)
33809 fp = 3;
33810 else
33811 fp = 2;
33812 if (rs6000_passes_long_double)
33814 if (!TARGET_LONG_DOUBLE_128)
33815 fp |= 2 * 4;
33816 else if (TARGET_IEEEQUAD)
33817 fp |= 3 * 4;
33818 else
33819 fp |= 1 * 4;
33821 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
33823 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
33825 if (rs6000_passes_vector)
33826 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
33827 (TARGET_ALTIVEC_ABI ? 2 : 1));
33828 if (rs6000_returns_struct)
33829 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
33830 aix_struct_return ? 2 : 1);
33832 #endif
33833 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
33834 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
33835 file_end_indicate_exec_stack ();
33836 #endif
33838 if (flag_split_stack)
33839 file_end_indicate_split_stack ();
33841 if (cpu_builtin_p)
33843 /* We have expanded a CPU builtin, so we need to emit a reference to
33844 the special symbol that LIBC uses to declare it supports the
33845 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
33846 switch_to_section (data_section);
33847 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
33848 fprintf (asm_out_file, "\t%s %s\n",
33849 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
33852 #endif
33854 #if TARGET_XCOFF
33856 #ifndef HAVE_XCOFF_DWARF_EXTRAS
33857 #define HAVE_XCOFF_DWARF_EXTRAS 0
33858 #endif
33860 static enum unwind_info_type
33861 rs6000_xcoff_debug_unwind_info (void)
33863 return UI_NONE;
33866 static void
33867 rs6000_xcoff_asm_output_anchor (rtx symbol)
33869 char buffer[100];
33871 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
33872 SYMBOL_REF_BLOCK_OFFSET (symbol));
33873 fprintf (asm_out_file, "%s", SET_ASM_OP);
33874 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
33875 fprintf (asm_out_file, ",");
33876 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
33877 fprintf (asm_out_file, "\n");
33880 static void
33881 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
33883 fputs (GLOBAL_ASM_OP, stream);
33884 RS6000_OUTPUT_BASENAME (stream, name);
33885 putc ('\n', stream);
33888 /* A get_unnamed_decl callback, used for read-only sections. PTR
33889 points to the section string variable. */
33891 static void
33892 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
33894 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
33895 *(const char *const *) directive,
33896 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33899 /* Likewise for read-write sections. */
33901 static void
33902 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
33904 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
33905 *(const char *const *) directive,
33906 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33909 static void
33910 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
33912 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
33913 *(const char *const *) directive,
33914 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
33917 /* A get_unnamed_section callback, used for switching to toc_section. */
33919 static void
33920 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
33922 if (TARGET_MINIMAL_TOC)
33924 /* toc_section is always selected at least once from
33925 rs6000_xcoff_file_start, so this is guaranteed to
33926 always be defined once and only once in each file. */
33927 if (!toc_initialized)
33929 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
33930 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
33931 toc_initialized = 1;
33933 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
33934 (TARGET_32BIT ? "" : ",3"));
33936 else
33937 fputs ("\t.toc\n", asm_out_file);
33940 /* Implement TARGET_ASM_INIT_SECTIONS. */
33942 static void
33943 rs6000_xcoff_asm_init_sections (void)
33945 read_only_data_section
33946 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33947 &xcoff_read_only_section_name);
33949 private_data_section
33950 = get_unnamed_section (SECTION_WRITE,
33951 rs6000_xcoff_output_readwrite_section_asm_op,
33952 &xcoff_private_data_section_name);
33954 tls_data_section
33955 = get_unnamed_section (SECTION_TLS,
33956 rs6000_xcoff_output_tls_section_asm_op,
33957 &xcoff_tls_data_section_name);
33959 tls_private_data_section
33960 = get_unnamed_section (SECTION_TLS,
33961 rs6000_xcoff_output_tls_section_asm_op,
33962 &xcoff_private_data_section_name);
33964 read_only_private_data_section
33965 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
33966 &xcoff_private_data_section_name);
33968 toc_section
33969 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
33971 readonly_data_section = read_only_data_section;
33974 static int
33975 rs6000_xcoff_reloc_rw_mask (void)
33977 return 3;
33980 static void
33981 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
33982 tree decl ATTRIBUTE_UNUSED)
33984 int smclass;
33985 static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" };
33987 if (flags & SECTION_EXCLUDE)
33988 smclass = 4;
33989 else if (flags & SECTION_DEBUG)
33991 fprintf (asm_out_file, "\t.dwsect %s\n", name);
33992 return;
33994 else if (flags & SECTION_CODE)
33995 smclass = 0;
33996 else if (flags & SECTION_TLS)
33997 smclass = 3;
33998 else if (flags & SECTION_WRITE)
33999 smclass = 2;
34000 else
34001 smclass = 1;
34003 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
34004 (flags & SECTION_CODE) ? "." : "",
34005 name, suffix[smclass], flags & SECTION_ENTSIZE);
34008 #define IN_NAMED_SECTION(DECL) \
34009 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
34010 && DECL_SECTION_NAME (DECL) != NULL)
34012 static section *
34013 rs6000_xcoff_select_section (tree decl, int reloc,
34014 unsigned HOST_WIDE_INT align)
34016 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
34017 named section. */
34018 if (align > BIGGEST_ALIGNMENT)
34020 resolve_unique_section (decl, reloc, true);
34021 if (IN_NAMED_SECTION (decl))
34022 return get_named_section (decl, NULL, reloc);
34025 if (decl_readonly_section (decl, reloc))
34027 if (TREE_PUBLIC (decl))
34028 return read_only_data_section;
34029 else
34030 return read_only_private_data_section;
34032 else
34034 #if HAVE_AS_TLS
34035 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34037 if (TREE_PUBLIC (decl))
34038 return tls_data_section;
34039 else if (bss_initializer_p (decl))
34041 /* Convert to COMMON to emit in BSS. */
34042 DECL_COMMON (decl) = 1;
34043 return tls_comm_section;
34045 else
34046 return tls_private_data_section;
34048 else
34049 #endif
34050 if (TREE_PUBLIC (decl))
34051 return data_section;
34052 else
34053 return private_data_section;
34057 static void
34058 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
34060 const char *name;
34062 /* Use select_section for private data and uninitialized data with
34063 alignment <= BIGGEST_ALIGNMENT. */
34064 if (!TREE_PUBLIC (decl)
34065 || DECL_COMMON (decl)
34066 || (DECL_INITIAL (decl) == NULL_TREE
34067 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
34068 || DECL_INITIAL (decl) == error_mark_node
34069 || (flag_zero_initialized_in_bss
34070 && initializer_zerop (DECL_INITIAL (decl))))
34071 return;
34073 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
34074 name = (*targetm.strip_name_encoding) (name);
34075 set_decl_section_name (decl, name);
34078 /* Select section for constant in constant pool.
34080 On RS/6000, all constants are in the private read-only data area.
34081 However, if this is being placed in the TOC it must be output as a
34082 toc entry. */
34084 static section *
34085 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
34086 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
34088 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
34089 return toc_section;
34090 else
34091 return read_only_private_data_section;
34094 /* Remove any trailing [DS] or the like from the symbol name. */
34096 static const char *
34097 rs6000_xcoff_strip_name_encoding (const char *name)
34099 size_t len;
34100 if (*name == '*')
34101 name++;
34102 len = strlen (name);
34103 if (name[len - 1] == ']')
34104 return ggc_alloc_string (name, len - 4);
34105 else
34106 return name;
34109 /* Section attributes. AIX is always PIC. */
34111 static unsigned int
34112 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
34114 unsigned int align;
34115 unsigned int flags = default_section_type_flags (decl, name, reloc);
34117 /* Align to at least UNIT size. */
34118 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
34119 align = MIN_UNITS_PER_WORD;
34120 else
34121 /* Increase alignment of large objects if not already stricter. */
34122 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
34123 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
34124 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
34126 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
34129 /* Output at beginning of assembler file.
34131 Initialize the section names for the RS/6000 at this point.
34133 Specify filename, including full path, to assembler.
34135 We want to go into the TOC section so at least one .toc will be emitted.
34136 Also, in order to output proper .bs/.es pairs, we need at least one static
34137 [RW] section emitted.
34139 Finally, declare mcount when profiling to make the assembler happy. */
34141 static void
34142 rs6000_xcoff_file_start (void)
34144 rs6000_gen_section_name (&xcoff_bss_section_name,
34145 main_input_filename, ".bss_");
34146 rs6000_gen_section_name (&xcoff_private_data_section_name,
34147 main_input_filename, ".rw_");
34148 rs6000_gen_section_name (&xcoff_read_only_section_name,
34149 main_input_filename, ".ro_");
34150 rs6000_gen_section_name (&xcoff_tls_data_section_name,
34151 main_input_filename, ".tls_");
34152 rs6000_gen_section_name (&xcoff_tbss_section_name,
34153 main_input_filename, ".tbss_[UL]");
34155 fputs ("\t.file\t", asm_out_file);
34156 output_quoted_string (asm_out_file, main_input_filename);
34157 fputc ('\n', asm_out_file);
34158 if (write_symbols != NO_DEBUG)
34159 switch_to_section (private_data_section);
34160 switch_to_section (toc_section);
34161 switch_to_section (text_section);
34162 if (profile_flag)
34163 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
34164 rs6000_file_start ();
34167 /* Output at end of assembler file.
34168 On the RS/6000, referencing data should automatically pull in text. */
34170 static void
34171 rs6000_xcoff_file_end (void)
34173 switch_to_section (text_section);
34174 fputs ("_section_.text:\n", asm_out_file);
34175 switch_to_section (data_section);
34176 fputs (TARGET_32BIT
34177 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
34178 asm_out_file);
34181 struct declare_alias_data
34183 FILE *file;
34184 bool function_descriptor;
34187 /* Declare alias N. A helper function for for_node_and_aliases. */
34189 static bool
34190 rs6000_declare_alias (struct symtab_node *n, void *d)
34192 struct declare_alias_data *data = (struct declare_alias_data *)d;
34193 /* Main symbol is output specially, because varasm machinery does part of
34194 the job for us - we do not need to declare .globl/lglobs and such. */
34195 if (!n->alias || n->weakref)
34196 return false;
34198 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
34199 return false;
34201 /* Prevent assemble_alias from trying to use .set pseudo operation
34202 that does not behave as expected by the middle-end. */
34203 TREE_ASM_WRITTEN (n->decl) = true;
34205 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
34206 char *buffer = (char *) alloca (strlen (name) + 2);
34207 char *p;
34208 int dollar_inside = 0;
34210 strcpy (buffer, name);
34211 p = strchr (buffer, '$');
34212 while (p) {
34213 *p = '_';
34214 dollar_inside++;
34215 p = strchr (p + 1, '$');
34217 if (TREE_PUBLIC (n->decl))
34219 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
34221 if (dollar_inside) {
34222 if (data->function_descriptor)
34223 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34224 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34226 if (data->function_descriptor)
34228 fputs ("\t.globl .", data->file);
34229 RS6000_OUTPUT_BASENAME (data->file, buffer);
34230 putc ('\n', data->file);
34232 fputs ("\t.globl ", data->file);
34233 RS6000_OUTPUT_BASENAME (data->file, buffer);
34234 putc ('\n', data->file);
34236 #ifdef ASM_WEAKEN_DECL
34237 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
34238 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
34239 #endif
34241 else
34243 if (dollar_inside)
34245 if (data->function_descriptor)
34246 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
34247 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
34249 if (data->function_descriptor)
34251 fputs ("\t.lglobl .", data->file);
34252 RS6000_OUTPUT_BASENAME (data->file, buffer);
34253 putc ('\n', data->file);
34255 fputs ("\t.lglobl ", data->file);
34256 RS6000_OUTPUT_BASENAME (data->file, buffer);
34257 putc ('\n', data->file);
34259 if (data->function_descriptor)
34260 fputs (".", data->file);
34261 RS6000_OUTPUT_BASENAME (data->file, buffer);
34262 fputs (":\n", data->file);
34263 return false;
34267 #ifdef HAVE_GAS_HIDDEN
34268 /* Helper function to calculate visibility of a DECL
34269 and return the value as a const string. */
34271 static const char *
34272 rs6000_xcoff_visibility (tree decl)
34274 static const char * const visibility_types[] = {
34275 "", ",protected", ",hidden", ",internal"
34278 enum symbol_visibility vis = DECL_VISIBILITY (decl);
34280 if (TREE_CODE (decl) == FUNCTION_DECL
34281 && cgraph_node::get (decl)
34282 && cgraph_node::get (decl)->instrumentation_clone
34283 && cgraph_node::get (decl)->instrumented_version)
34284 vis = DECL_VISIBILITY (cgraph_node::get (decl)->instrumented_version->decl);
34286 return visibility_types[vis];
34288 #endif
34291 /* This macro produces the initial definition of a function name.
34292 On the RS/6000, we need to place an extra '.' in the function name and
34293 output the function descriptor.
34294 Dollar signs are converted to underscores.
34296 The csect for the function will have already been created when
34297 text_section was selected. We do have to go back to that csect, however.
34299 The third and fourth parameters to the .function pseudo-op (16 and 044)
34300 are placeholders which no longer have any use.
34302 Because AIX assembler's .set command has unexpected semantics, we output
34303 all aliases as alternative labels in front of the definition. */
34305 void
34306 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
34308 char *buffer = (char *) alloca (strlen (name) + 1);
34309 char *p;
34310 int dollar_inside = 0;
34311 struct declare_alias_data data = {file, false};
34313 strcpy (buffer, name);
34314 p = strchr (buffer, '$');
34315 while (p) {
34316 *p = '_';
34317 dollar_inside++;
34318 p = strchr (p + 1, '$');
34320 if (TREE_PUBLIC (decl))
34322 if (!RS6000_WEAK || !DECL_WEAK (decl))
34324 if (dollar_inside) {
34325 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34326 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34328 fputs ("\t.globl .", file);
34329 RS6000_OUTPUT_BASENAME (file, buffer);
34330 #ifdef HAVE_GAS_HIDDEN
34331 fputs (rs6000_xcoff_visibility (decl), file);
34332 #endif
34333 putc ('\n', file);
34336 else
34338 if (dollar_inside) {
34339 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
34340 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
34342 fputs ("\t.lglobl .", file);
34343 RS6000_OUTPUT_BASENAME (file, buffer);
34344 putc ('\n', file);
34346 fputs ("\t.csect ", file);
34347 RS6000_OUTPUT_BASENAME (file, buffer);
34348 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
34349 RS6000_OUTPUT_BASENAME (file, buffer);
34350 fputs (":\n", file);
34351 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34352 &data, true);
34353 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
34354 RS6000_OUTPUT_BASENAME (file, buffer);
34355 fputs (", TOC[tc0], 0\n", file);
34356 in_section = NULL;
34357 switch_to_section (function_section (decl));
34358 putc ('.', file);
34359 RS6000_OUTPUT_BASENAME (file, buffer);
34360 fputs (":\n", file);
34361 data.function_descriptor = true;
34362 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34363 &data, true);
34364 if (!DECL_IGNORED_P (decl))
34366 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
34367 xcoffout_declare_function (file, decl, buffer);
34368 else if (write_symbols == DWARF2_DEBUG)
34370 name = (*targetm.strip_name_encoding) (name);
34371 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
34374 return;
34378 /* Output assembly language to globalize a symbol from a DECL,
34379 possibly with visibility. */
34381 void
34382 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
34384 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
34385 fputs (GLOBAL_ASM_OP, stream);
34386 RS6000_OUTPUT_BASENAME (stream, name);
34387 #ifdef HAVE_GAS_HIDDEN
34388 fputs (rs6000_xcoff_visibility (decl), stream);
34389 #endif
34390 putc ('\n', stream);
34393 /* Output assembly language to define a symbol as COMMON from a DECL,
34394 possibly with visibility. */
34396 void
34397 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
34398 tree decl ATTRIBUTE_UNUSED,
34399 const char *name,
34400 unsigned HOST_WIDE_INT size,
34401 unsigned HOST_WIDE_INT align)
34403 unsigned HOST_WIDE_INT align2 = 2;
34405 if (align > 32)
34406 align2 = floor_log2 (align / BITS_PER_UNIT);
34407 else if (size > 4)
34408 align2 = 3;
34410 fputs (COMMON_ASM_OP, stream);
34411 RS6000_OUTPUT_BASENAME (stream, name);
34413 fprintf (stream,
34414 "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED,
34415 size, align2);
34417 #ifdef HAVE_GAS_HIDDEN
34418 if (decl != NULL)
34419 fputs (rs6000_xcoff_visibility (decl), stream);
34420 #endif
34421 putc ('\n', stream);
34424 /* This macro produces the initial definition of a object (variable) name.
34425 Because AIX assembler's .set command has unexpected semantics, we output
34426 all aliases as alternative labels in front of the definition. */
34428 void
34429 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
34431 struct declare_alias_data data = {file, false};
34432 RS6000_OUTPUT_BASENAME (file, name);
34433 fputs (":\n", file);
34434 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
34435 &data, true);
34438 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
34440 void
34441 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
34443 fputs (integer_asm_op (size, FALSE), file);
34444 assemble_name (file, label);
34445 fputs ("-$", file);
34448 /* Output a symbol offset relative to the dbase for the current object.
34449 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
34450 signed offsets.
34452 __gcc_unwind_dbase is embedded in all executables/libraries through
34453 libgcc/config/rs6000/crtdbase.S. */
34455 void
34456 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
34458 fputs (integer_asm_op (size, FALSE), file);
34459 assemble_name (file, label);
34460 fputs("-__gcc_unwind_dbase", file);
34463 #ifdef HAVE_AS_TLS
34464 static void
34465 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
34467 rtx symbol;
34468 int flags;
34469 const char *symname;
34471 default_encode_section_info (decl, rtl, first);
34473 /* Careful not to prod global register variables. */
34474 if (!MEM_P (rtl))
34475 return;
34476 symbol = XEXP (rtl, 0);
34477 if (GET_CODE (symbol) != SYMBOL_REF)
34478 return;
34480 flags = SYMBOL_REF_FLAGS (symbol);
34482 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
34483 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
34485 SYMBOL_REF_FLAGS (symbol) = flags;
34487 /* Append mapping class to extern decls. */
34488 symname = XSTR (symbol, 0);
34489 if (decl /* sync condition with assemble_external () */
34490 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
34491 && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl))
34492 || TREE_CODE (decl) == FUNCTION_DECL)
34493 && symname[strlen (symname) - 1] != ']')
34495 char *newname = (char *) alloca (strlen (symname) + 5);
34496 strcpy (newname, symname);
34497 strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL
34498 ? "[DS]" : "[UA]"));
34499 XSTR (symbol, 0) = ggc_strdup (newname);
34502 #endif /* HAVE_AS_TLS */
34503 #endif /* TARGET_XCOFF */
34505 void
34506 rs6000_asm_weaken_decl (FILE *stream, tree decl,
34507 const char *name, const char *val)
34509 fputs ("\t.weak\t", stream);
34510 RS6000_OUTPUT_BASENAME (stream, name);
34511 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34512 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34514 if (TARGET_XCOFF)
34515 fputs ("[DS]", stream);
34516 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34517 if (TARGET_XCOFF)
34518 fputs (rs6000_xcoff_visibility (decl), stream);
34519 #endif
34520 fputs ("\n\t.weak\t.", stream);
34521 RS6000_OUTPUT_BASENAME (stream, name);
34523 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
34524 if (TARGET_XCOFF)
34525 fputs (rs6000_xcoff_visibility (decl), stream);
34526 #endif
34527 fputc ('\n', stream);
34528 if (val)
34530 #ifdef ASM_OUTPUT_DEF
34531 ASM_OUTPUT_DEF (stream, name, val);
34532 #endif
34533 if (decl && TREE_CODE (decl) == FUNCTION_DECL
34534 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
34536 fputs ("\t.set\t.", stream);
34537 RS6000_OUTPUT_BASENAME (stream, name);
34538 fputs (",.", stream);
34539 RS6000_OUTPUT_BASENAME (stream, val);
34540 fputc ('\n', stream);
34546 /* Return true if INSN should not be copied. */
34548 static bool
34549 rs6000_cannot_copy_insn_p (rtx_insn *insn)
34551 return recog_memoized (insn) >= 0
34552 && get_attr_cannot_copy (insn);
34555 /* Compute a (partial) cost for rtx X. Return true if the complete
34556 cost has been computed, and false if subexpressions should be
34557 scanned. In either case, *TOTAL contains the cost result. */
34559 static bool
34560 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
34561 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
34563 int code = GET_CODE (x);
34565 switch (code)
34567 /* On the RS/6000, if it is valid in the insn, it is free. */
34568 case CONST_INT:
34569 if (((outer_code == SET
34570 || outer_code == PLUS
34571 || outer_code == MINUS)
34572 && (satisfies_constraint_I (x)
34573 || satisfies_constraint_L (x)))
34574 || (outer_code == AND
34575 && (satisfies_constraint_K (x)
34576 || (mode == SImode
34577 ? satisfies_constraint_L (x)
34578 : satisfies_constraint_J (x))))
34579 || ((outer_code == IOR || outer_code == XOR)
34580 && (satisfies_constraint_K (x)
34581 || (mode == SImode
34582 ? satisfies_constraint_L (x)
34583 : satisfies_constraint_J (x))))
34584 || outer_code == ASHIFT
34585 || outer_code == ASHIFTRT
34586 || outer_code == LSHIFTRT
34587 || outer_code == ROTATE
34588 || outer_code == ROTATERT
34589 || outer_code == ZERO_EXTRACT
34590 || (outer_code == MULT
34591 && satisfies_constraint_I (x))
34592 || ((outer_code == DIV || outer_code == UDIV
34593 || outer_code == MOD || outer_code == UMOD)
34594 && exact_log2 (INTVAL (x)) >= 0)
34595 || (outer_code == COMPARE
34596 && (satisfies_constraint_I (x)
34597 || satisfies_constraint_K (x)))
34598 || ((outer_code == EQ || outer_code == NE)
34599 && (satisfies_constraint_I (x)
34600 || satisfies_constraint_K (x)
34601 || (mode == SImode
34602 ? satisfies_constraint_L (x)
34603 : satisfies_constraint_J (x))))
34604 || (outer_code == GTU
34605 && satisfies_constraint_I (x))
34606 || (outer_code == LTU
34607 && satisfies_constraint_P (x)))
34609 *total = 0;
34610 return true;
34612 else if ((outer_code == PLUS
34613 && reg_or_add_cint_operand (x, VOIDmode))
34614 || (outer_code == MINUS
34615 && reg_or_sub_cint_operand (x, VOIDmode))
34616 || ((outer_code == SET
34617 || outer_code == IOR
34618 || outer_code == XOR)
34619 && (INTVAL (x)
34620 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
34622 *total = COSTS_N_INSNS (1);
34623 return true;
34625 /* FALLTHRU */
34627 case CONST_DOUBLE:
34628 case CONST_WIDE_INT:
34629 case CONST:
34630 case HIGH:
34631 case SYMBOL_REF:
34632 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34633 return true;
34635 case MEM:
34636 /* When optimizing for size, MEM should be slightly more expensive
34637 than generating address, e.g., (plus (reg) (const)).
34638 L1 cache latency is about two instructions. */
34639 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
34640 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
34641 *total += COSTS_N_INSNS (100);
34642 return true;
34644 case LABEL_REF:
34645 *total = 0;
34646 return true;
34648 case PLUS:
34649 case MINUS:
34650 if (FLOAT_MODE_P (mode))
34651 *total = rs6000_cost->fp;
34652 else
34653 *total = COSTS_N_INSNS (1);
34654 return false;
34656 case MULT:
34657 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34658 && satisfies_constraint_I (XEXP (x, 1)))
34660 if (INTVAL (XEXP (x, 1)) >= -256
34661 && INTVAL (XEXP (x, 1)) <= 255)
34662 *total = rs6000_cost->mulsi_const9;
34663 else
34664 *total = rs6000_cost->mulsi_const;
34666 else if (mode == SFmode)
34667 *total = rs6000_cost->fp;
34668 else if (FLOAT_MODE_P (mode))
34669 *total = rs6000_cost->dmul;
34670 else if (mode == DImode)
34671 *total = rs6000_cost->muldi;
34672 else
34673 *total = rs6000_cost->mulsi;
34674 return false;
34676 case FMA:
34677 if (mode == SFmode)
34678 *total = rs6000_cost->fp;
34679 else
34680 *total = rs6000_cost->dmul;
34681 break;
34683 case DIV:
34684 case MOD:
34685 if (FLOAT_MODE_P (mode))
34687 *total = mode == DFmode ? rs6000_cost->ddiv
34688 : rs6000_cost->sdiv;
34689 return false;
34691 /* FALLTHRU */
34693 case UDIV:
34694 case UMOD:
34695 if (GET_CODE (XEXP (x, 1)) == CONST_INT
34696 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
34698 if (code == DIV || code == MOD)
34699 /* Shift, addze */
34700 *total = COSTS_N_INSNS (2);
34701 else
34702 /* Shift */
34703 *total = COSTS_N_INSNS (1);
34705 else
34707 if (GET_MODE (XEXP (x, 1)) == DImode)
34708 *total = rs6000_cost->divdi;
34709 else
34710 *total = rs6000_cost->divsi;
34712 /* Add in shift and subtract for MOD unless we have a mod instruction. */
34713 if (!TARGET_MODULO && (code == MOD || code == UMOD))
34714 *total += COSTS_N_INSNS (2);
34715 return false;
34717 case CTZ:
34718 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
34719 return false;
34721 case FFS:
34722 *total = COSTS_N_INSNS (4);
34723 return false;
34725 case POPCOUNT:
34726 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
34727 return false;
34729 case PARITY:
34730 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
34731 return false;
34733 case NOT:
34734 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
34735 *total = 0;
34736 else
34737 *total = COSTS_N_INSNS (1);
34738 return false;
34740 case AND:
34741 if (CONST_INT_P (XEXP (x, 1)))
34743 rtx left = XEXP (x, 0);
34744 rtx_code left_code = GET_CODE (left);
34746 /* rotate-and-mask: 1 insn. */
34747 if ((left_code == ROTATE
34748 || left_code == ASHIFT
34749 || left_code == LSHIFTRT)
34750 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
34752 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
34753 if (!CONST_INT_P (XEXP (left, 1)))
34754 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
34755 *total += COSTS_N_INSNS (1);
34756 return true;
34759 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
34760 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
34761 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
34762 || (val & 0xffff) == val
34763 || (val & 0xffff0000) == val
34764 || ((val & 0xffff) == 0 && mode == SImode))
34766 *total = rtx_cost (left, mode, AND, 0, speed);
34767 *total += COSTS_N_INSNS (1);
34768 return true;
34771 /* 2 insns. */
34772 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
34774 *total = rtx_cost (left, mode, AND, 0, speed);
34775 *total += COSTS_N_INSNS (2);
34776 return true;
34780 *total = COSTS_N_INSNS (1);
34781 return false;
34783 case IOR:
34784 /* FIXME */
34785 *total = COSTS_N_INSNS (1);
34786 return true;
34788 case CLZ:
34789 case XOR:
34790 case ZERO_EXTRACT:
34791 *total = COSTS_N_INSNS (1);
34792 return false;
34794 case ASHIFT:
34795 /* The EXTSWSLI instruction is a combined instruction. Don't count both
34796 the sign extend and shift separately within the insn. */
34797 if (TARGET_EXTSWSLI && mode == DImode
34798 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
34799 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
34801 *total = 0;
34802 return false;
34804 /* fall through */
34806 case ASHIFTRT:
34807 case LSHIFTRT:
34808 case ROTATE:
34809 case ROTATERT:
34810 /* Handle mul_highpart. */
34811 if (outer_code == TRUNCATE
34812 && GET_CODE (XEXP (x, 0)) == MULT)
34814 if (mode == DImode)
34815 *total = rs6000_cost->muldi;
34816 else
34817 *total = rs6000_cost->mulsi;
34818 return true;
34820 else if (outer_code == AND)
34821 *total = 0;
34822 else
34823 *total = COSTS_N_INSNS (1);
34824 return false;
34826 case SIGN_EXTEND:
34827 case ZERO_EXTEND:
34828 if (GET_CODE (XEXP (x, 0)) == MEM)
34829 *total = 0;
34830 else
34831 *total = COSTS_N_INSNS (1);
34832 return false;
34834 case COMPARE:
34835 case NEG:
34836 case ABS:
34837 if (!FLOAT_MODE_P (mode))
34839 *total = COSTS_N_INSNS (1);
34840 return false;
34842 /* FALLTHRU */
34844 case FLOAT:
34845 case UNSIGNED_FLOAT:
34846 case FIX:
34847 case UNSIGNED_FIX:
34848 case FLOAT_TRUNCATE:
34849 *total = rs6000_cost->fp;
34850 return false;
34852 case FLOAT_EXTEND:
34853 if (mode == DFmode)
34854 *total = rs6000_cost->sfdf_convert;
34855 else
34856 *total = rs6000_cost->fp;
34857 return false;
34859 case UNSPEC:
34860 switch (XINT (x, 1))
34862 case UNSPEC_FRSP:
34863 *total = rs6000_cost->fp;
34864 return true;
34866 default:
34867 break;
34869 break;
34871 case CALL:
34872 case IF_THEN_ELSE:
34873 if (!speed)
34875 *total = COSTS_N_INSNS (1);
34876 return true;
34878 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
34880 *total = rs6000_cost->fp;
34881 return false;
34883 break;
34885 case NE:
34886 case EQ:
34887 case GTU:
34888 case LTU:
34889 /* Carry bit requires mode == Pmode.
34890 NEG or PLUS already counted so only add one. */
34891 if (mode == Pmode
34892 && (outer_code == NEG || outer_code == PLUS))
34894 *total = COSTS_N_INSNS (1);
34895 return true;
34897 /* FALLTHRU */
34899 case GT:
34900 case LT:
34901 case UNORDERED:
34902 if (outer_code == SET)
34904 if (XEXP (x, 1) == const0_rtx)
34906 *total = COSTS_N_INSNS (2);
34907 return true;
34909 else
34911 *total = COSTS_N_INSNS (3);
34912 return false;
34915 /* CC COMPARE. */
34916 if (outer_code == COMPARE)
34918 *total = 0;
34919 return true;
34921 break;
34923 default:
34924 break;
34927 return false;
34930 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
34932 static bool
34933 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
34934 int opno, int *total, bool speed)
34936 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
34938 fprintf (stderr,
34939 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
34940 "opno = %d, total = %d, speed = %s, x:\n",
34941 ret ? "complete" : "scan inner",
34942 GET_MODE_NAME (mode),
34943 GET_RTX_NAME (outer_code),
34944 opno,
34945 *total,
34946 speed ? "true" : "false");
34948 debug_rtx (x);
34950 return ret;
34953 static int
34954 rs6000_insn_cost (rtx_insn *insn, bool speed)
34956 if (recog_memoized (insn) < 0)
34957 return 0;
34959 if (!speed)
34960 return get_attr_length (insn);
34962 int cost = get_attr_cost (insn);
34963 if (cost > 0)
34964 return cost;
34966 int n = get_attr_length (insn) / 4;
34967 enum attr_type type = get_attr_type (insn);
34969 switch (type)
34971 case TYPE_LOAD:
34972 case TYPE_FPLOAD:
34973 case TYPE_VECLOAD:
34974 cost = COSTS_N_INSNS (n + 1);
34975 break;
34977 case TYPE_MUL:
34978 switch (get_attr_size (insn))
34980 case SIZE_8:
34981 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
34982 break;
34983 case SIZE_16:
34984 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
34985 break;
34986 case SIZE_32:
34987 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
34988 break;
34989 case SIZE_64:
34990 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
34991 break;
34992 default:
34993 gcc_unreachable ();
34995 break;
34996 case TYPE_DIV:
34997 switch (get_attr_size (insn))
34999 case SIZE_32:
35000 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
35001 break;
35002 case SIZE_64:
35003 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
35004 break;
35005 default:
35006 gcc_unreachable ();
35008 break;
35010 case TYPE_FP:
35011 cost = n * rs6000_cost->fp;
35012 break;
35013 case TYPE_DMUL:
35014 cost = n * rs6000_cost->dmul;
35015 break;
35016 case TYPE_SDIV:
35017 cost = n * rs6000_cost->sdiv;
35018 break;
35019 case TYPE_DDIV:
35020 cost = n * rs6000_cost->ddiv;
35021 break;
35023 case TYPE_SYNC:
35024 case TYPE_LOAD_L:
35025 case TYPE_MFCR:
35026 case TYPE_MFCRF:
35027 cost = COSTS_N_INSNS (n + 2);
35028 break;
35030 default:
35031 cost = COSTS_N_INSNS (n);
35034 return cost;
35037 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
35039 static int
35040 rs6000_debug_address_cost (rtx x, machine_mode mode,
35041 addr_space_t as, bool speed)
35043 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
35045 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
35046 ret, speed ? "true" : "false");
35047 debug_rtx (x);
35049 return ret;
35053 /* A C expression returning the cost of moving data from a register of class
35054 CLASS1 to one of CLASS2. */
35056 static int
35057 rs6000_register_move_cost (machine_mode mode,
35058 reg_class_t from, reg_class_t to)
35060 int ret;
35062 if (TARGET_DEBUG_COST)
35063 dbg_cost_ctrl++;
35065 /* Moves from/to GENERAL_REGS. */
35066 if (reg_classes_intersect_p (to, GENERAL_REGS)
35067 || reg_classes_intersect_p (from, GENERAL_REGS))
35069 reg_class_t rclass = from;
35071 if (! reg_classes_intersect_p (to, GENERAL_REGS))
35072 rclass = to;
35074 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
35075 ret = (rs6000_memory_move_cost (mode, rclass, false)
35076 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
35078 /* It's more expensive to move CR_REGS than CR0_REGS because of the
35079 shift. */
35080 else if (rclass == CR_REGS)
35081 ret = 4;
35083 /* For those processors that have slow LR/CTR moves, make them more
35084 expensive than memory in order to bias spills to memory .*/
35085 else if ((rs6000_tune == PROCESSOR_POWER6
35086 || rs6000_tune == PROCESSOR_POWER7
35087 || rs6000_tune == PROCESSOR_POWER8
35088 || rs6000_tune == PROCESSOR_POWER9)
35089 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
35090 ret = 6 * hard_regno_nregs (0, mode);
35092 else
35093 /* A move will cost one instruction per GPR moved. */
35094 ret = 2 * hard_regno_nregs (0, mode);
35097 /* If we have VSX, we can easily move between FPR or Altivec registers. */
35098 else if (VECTOR_MEM_VSX_P (mode)
35099 && reg_classes_intersect_p (to, VSX_REGS)
35100 && reg_classes_intersect_p (from, VSX_REGS))
35101 ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode);
35103 /* Moving between two similar registers is just one instruction. */
35104 else if (reg_classes_intersect_p (to, from))
35105 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
35107 /* Everything else has to go through GENERAL_REGS. */
35108 else
35109 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
35110 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
35112 if (TARGET_DEBUG_COST)
35114 if (dbg_cost_ctrl == 1)
35115 fprintf (stderr,
35116 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
35117 ret, GET_MODE_NAME (mode), reg_class_names[from],
35118 reg_class_names[to]);
35119 dbg_cost_ctrl--;
35122 return ret;
35125 /* A C expressions returning the cost of moving data of MODE from a register to
35126 or from memory. */
35128 static int
35129 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
35130 bool in ATTRIBUTE_UNUSED)
35132 int ret;
35134 if (TARGET_DEBUG_COST)
35135 dbg_cost_ctrl++;
35137 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
35138 ret = 4 * hard_regno_nregs (0, mode);
35139 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
35140 || reg_classes_intersect_p (rclass, VSX_REGS)))
35141 ret = 4 * hard_regno_nregs (32, mode);
35142 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
35143 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
35144 else
35145 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
35147 if (TARGET_DEBUG_COST)
35149 if (dbg_cost_ctrl == 1)
35150 fprintf (stderr,
35151 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
35152 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
35153 dbg_cost_ctrl--;
35156 return ret;
35159 /* Returns a code for a target-specific builtin that implements
35160 reciprocal of the function, or NULL_TREE if not available. */
35162 static tree
35163 rs6000_builtin_reciprocal (tree fndecl)
35165 switch (DECL_FUNCTION_CODE (fndecl))
35167 case VSX_BUILTIN_XVSQRTDP:
35168 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
35169 return NULL_TREE;
35171 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
35173 case VSX_BUILTIN_XVSQRTSP:
35174 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
35175 return NULL_TREE;
35177 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
35179 default:
35180 return NULL_TREE;
35184 /* Load up a constant. If the mode is a vector mode, splat the value across
35185 all of the vector elements. */
35187 static rtx
35188 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
35190 rtx reg;
35192 if (mode == SFmode || mode == DFmode)
35194 rtx d = const_double_from_real_value (dconst, mode);
35195 reg = force_reg (mode, d);
35197 else if (mode == V4SFmode)
35199 rtx d = const_double_from_real_value (dconst, SFmode);
35200 rtvec v = gen_rtvec (4, d, d, d, d);
35201 reg = gen_reg_rtx (mode);
35202 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
35204 else if (mode == V2DFmode)
35206 rtx d = const_double_from_real_value (dconst, DFmode);
35207 rtvec v = gen_rtvec (2, d, d);
35208 reg = gen_reg_rtx (mode);
35209 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
35211 else
35212 gcc_unreachable ();
35214 return reg;
35217 /* Generate an FMA instruction. */
35219 static void
35220 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
35222 machine_mode mode = GET_MODE (target);
35223 rtx dst;
35225 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
35226 gcc_assert (dst != NULL);
35228 if (dst != target)
35229 emit_move_insn (target, dst);
35232 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
35234 static void
35235 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
35237 machine_mode mode = GET_MODE (dst);
35238 rtx r;
35240 /* This is a tad more complicated, since the fnma_optab is for
35241 a different expression: fma(-m1, m2, a), which is the same
35242 thing except in the case of signed zeros.
35244 Fortunately we know that if FMA is supported that FNMSUB is
35245 also supported in the ISA. Just expand it directly. */
35247 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
35249 r = gen_rtx_NEG (mode, a);
35250 r = gen_rtx_FMA (mode, m1, m2, r);
35251 r = gen_rtx_NEG (mode, r);
35252 emit_insn (gen_rtx_SET (dst, r));
35255 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
35256 add a reg_note saying that this was a division. Support both scalar and
35257 vector divide. Assumes no trapping math and finite arguments. */
35259 void
35260 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
35262 machine_mode mode = GET_MODE (dst);
35263 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
35264 int i;
35266 /* Low precision estimates guarantee 5 bits of accuracy. High
35267 precision estimates guarantee 14 bits of accuracy. SFmode
35268 requires 23 bits of accuracy. DFmode requires 52 bits of
35269 accuracy. Each pass at least doubles the accuracy, leading
35270 to the following. */
35271 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35272 if (mode == DFmode || mode == V2DFmode)
35273 passes++;
35275 enum insn_code code = optab_handler (smul_optab, mode);
35276 insn_gen_fn gen_mul = GEN_FCN (code);
35278 gcc_assert (code != CODE_FOR_nothing);
35280 one = rs6000_load_constant_and_splat (mode, dconst1);
35282 /* x0 = 1./d estimate */
35283 x0 = gen_reg_rtx (mode);
35284 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
35285 UNSPEC_FRES)));
35287 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
35288 if (passes > 1) {
35290 /* e0 = 1. - d * x0 */
35291 e0 = gen_reg_rtx (mode);
35292 rs6000_emit_nmsub (e0, d, x0, one);
35294 /* x1 = x0 + e0 * x0 */
35295 x1 = gen_reg_rtx (mode);
35296 rs6000_emit_madd (x1, e0, x0, x0);
35298 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
35299 ++i, xprev = xnext, eprev = enext) {
35301 /* enext = eprev * eprev */
35302 enext = gen_reg_rtx (mode);
35303 emit_insn (gen_mul (enext, eprev, eprev));
35305 /* xnext = xprev + enext * xprev */
35306 xnext = gen_reg_rtx (mode);
35307 rs6000_emit_madd (xnext, enext, xprev, xprev);
35310 } else
35311 xprev = x0;
35313 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
35315 /* u = n * xprev */
35316 u = gen_reg_rtx (mode);
35317 emit_insn (gen_mul (u, n, xprev));
35319 /* v = n - (d * u) */
35320 v = gen_reg_rtx (mode);
35321 rs6000_emit_nmsub (v, d, u, n);
35323 /* dst = (v * xprev) + u */
35324 rs6000_emit_madd (dst, v, xprev, u);
35326 if (note_p)
35327 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
35330 /* Goldschmidt's Algorithm for single/double-precision floating point
35331 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
35333 void
35334 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
35336 machine_mode mode = GET_MODE (src);
35337 rtx e = gen_reg_rtx (mode);
35338 rtx g = gen_reg_rtx (mode);
35339 rtx h = gen_reg_rtx (mode);
35341 /* Low precision estimates guarantee 5 bits of accuracy. High
35342 precision estimates guarantee 14 bits of accuracy. SFmode
35343 requires 23 bits of accuracy. DFmode requires 52 bits of
35344 accuracy. Each pass at least doubles the accuracy, leading
35345 to the following. */
35346 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
35347 if (mode == DFmode || mode == V2DFmode)
35348 passes++;
35350 int i;
35351 rtx mhalf;
35352 enum insn_code code = optab_handler (smul_optab, mode);
35353 insn_gen_fn gen_mul = GEN_FCN (code);
35355 gcc_assert (code != CODE_FOR_nothing);
35357 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
35359 /* e = rsqrt estimate */
35360 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
35361 UNSPEC_RSQRT)));
35363 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
35364 if (!recip)
35366 rtx zero = force_reg (mode, CONST0_RTX (mode));
35368 if (mode == SFmode)
35370 rtx target = emit_conditional_move (e, GT, src, zero, mode,
35371 e, zero, mode, 0);
35372 if (target != e)
35373 emit_move_insn (e, target);
35375 else
35377 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
35378 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
35382 /* g = sqrt estimate. */
35383 emit_insn (gen_mul (g, e, src));
35384 /* h = 1/(2*sqrt) estimate. */
35385 emit_insn (gen_mul (h, e, mhalf));
35387 if (recip)
35389 if (passes == 1)
35391 rtx t = gen_reg_rtx (mode);
35392 rs6000_emit_nmsub (t, g, h, mhalf);
35393 /* Apply correction directly to 1/rsqrt estimate. */
35394 rs6000_emit_madd (dst, e, t, e);
35396 else
35398 for (i = 0; i < passes; i++)
35400 rtx t1 = gen_reg_rtx (mode);
35401 rtx g1 = gen_reg_rtx (mode);
35402 rtx h1 = gen_reg_rtx (mode);
35404 rs6000_emit_nmsub (t1, g, h, mhalf);
35405 rs6000_emit_madd (g1, g, t1, g);
35406 rs6000_emit_madd (h1, h, t1, h);
35408 g = g1;
35409 h = h1;
35411 /* Multiply by 2 for 1/rsqrt. */
35412 emit_insn (gen_add3_insn (dst, h, h));
35415 else
35417 rtx t = gen_reg_rtx (mode);
35418 rs6000_emit_nmsub (t, g, h, mhalf);
35419 rs6000_emit_madd (dst, g, t, g);
35422 return;
35425 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
35426 (Power7) targets. DST is the target, and SRC is the argument operand. */
35428 void
35429 rs6000_emit_popcount (rtx dst, rtx src)
35431 machine_mode mode = GET_MODE (dst);
35432 rtx tmp1, tmp2;
35434 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
35435 if (TARGET_POPCNTD)
35437 if (mode == SImode)
35438 emit_insn (gen_popcntdsi2 (dst, src));
35439 else
35440 emit_insn (gen_popcntddi2 (dst, src));
35441 return;
35444 tmp1 = gen_reg_rtx (mode);
35446 if (mode == SImode)
35448 emit_insn (gen_popcntbsi2 (tmp1, src));
35449 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
35450 NULL_RTX, 0);
35451 tmp2 = force_reg (SImode, tmp2);
35452 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
35454 else
35456 emit_insn (gen_popcntbdi2 (tmp1, src));
35457 tmp2 = expand_mult (DImode, tmp1,
35458 GEN_INT ((HOST_WIDE_INT)
35459 0x01010101 << 32 | 0x01010101),
35460 NULL_RTX, 0);
35461 tmp2 = force_reg (DImode, tmp2);
35462 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
35467 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
35468 target, and SRC is the argument operand. */
35470 void
35471 rs6000_emit_parity (rtx dst, rtx src)
35473 machine_mode mode = GET_MODE (dst);
35474 rtx tmp;
35476 tmp = gen_reg_rtx (mode);
35478 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
35479 if (TARGET_CMPB)
35481 if (mode == SImode)
35483 emit_insn (gen_popcntbsi2 (tmp, src));
35484 emit_insn (gen_paritysi2_cmpb (dst, tmp));
35486 else
35488 emit_insn (gen_popcntbdi2 (tmp, src));
35489 emit_insn (gen_paritydi2_cmpb (dst, tmp));
35491 return;
35494 if (mode == SImode)
35496 /* Is mult+shift >= shift+xor+shift+xor? */
35497 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
35499 rtx tmp1, tmp2, tmp3, tmp4;
35501 tmp1 = gen_reg_rtx (SImode);
35502 emit_insn (gen_popcntbsi2 (tmp1, src));
35504 tmp2 = gen_reg_rtx (SImode);
35505 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
35506 tmp3 = gen_reg_rtx (SImode);
35507 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
35509 tmp4 = gen_reg_rtx (SImode);
35510 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
35511 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
35513 else
35514 rs6000_emit_popcount (tmp, src);
35515 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
35517 else
35519 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
35520 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
35522 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
35524 tmp1 = gen_reg_rtx (DImode);
35525 emit_insn (gen_popcntbdi2 (tmp1, src));
35527 tmp2 = gen_reg_rtx (DImode);
35528 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
35529 tmp3 = gen_reg_rtx (DImode);
35530 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
35532 tmp4 = gen_reg_rtx (DImode);
35533 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
35534 tmp5 = gen_reg_rtx (DImode);
35535 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
35537 tmp6 = gen_reg_rtx (DImode);
35538 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
35539 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
35541 else
35542 rs6000_emit_popcount (tmp, src);
35543 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
35547 /* Expand an Altivec constant permutation for little endian mode.
35548 There are two issues: First, the two input operands must be
35549 swapped so that together they form a double-wide array in LE
35550 order. Second, the vperm instruction has surprising behavior
35551 in LE mode: it interprets the elements of the source vectors
35552 in BE mode ("left to right") and interprets the elements of
35553 the destination vector in LE mode ("right to left"). To
35554 correct for this, we must subtract each element of the permute
35555 control vector from 31.
35557 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
35558 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
35559 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
35560 serve as the permute control vector. Then, in BE mode,
35562 vperm 9,10,11,12
35564 places the desired result in vr9. However, in LE mode the
35565 vector contents will be
35567 vr10 = 00000003 00000002 00000001 00000000
35568 vr11 = 00000007 00000006 00000005 00000004
35570 The result of the vperm using the same permute control vector is
35572 vr9 = 05000000 07000000 01000000 03000000
35574 That is, the leftmost 4 bytes of vr10 are interpreted as the
35575 source for the rightmost 4 bytes of vr9, and so on.
35577 If we change the permute control vector to
35579 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
35581 and issue
35583 vperm 9,11,10,12
35585 we get the desired
35587 vr9 = 00000006 00000004 00000002 00000000. */
35589 void
35590 altivec_expand_vec_perm_const_le (rtx operands[4])
35592 unsigned int i;
35593 rtx perm[16];
35594 rtx constv, unspec;
35595 rtx target = operands[0];
35596 rtx op0 = operands[1];
35597 rtx op1 = operands[2];
35598 rtx sel = operands[3];
35600 /* Unpack and adjust the constant selector. */
35601 for (i = 0; i < 16; ++i)
35603 rtx e = XVECEXP (sel, 0, i);
35604 unsigned int elt = 31 - (INTVAL (e) & 31);
35605 perm[i] = GEN_INT (elt);
35608 /* Expand to a permute, swapping the inputs and using the
35609 adjusted selector. */
35610 if (!REG_P (op0))
35611 op0 = force_reg (V16QImode, op0);
35612 if (!REG_P (op1))
35613 op1 = force_reg (V16QImode, op1);
35615 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
35616 constv = force_reg (V16QImode, constv);
35617 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
35618 UNSPEC_VPERM);
35619 if (!REG_P (target))
35621 rtx tmp = gen_reg_rtx (V16QImode);
35622 emit_move_insn (tmp, unspec);
35623 unspec = tmp;
35626 emit_move_insn (target, unspec);
35629 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
35630 permute control vector. But here it's not a constant, so we must
35631 generate a vector NAND or NOR to do the adjustment. */
35633 void
35634 altivec_expand_vec_perm_le (rtx operands[4])
35636 rtx notx, iorx, unspec;
35637 rtx target = operands[0];
35638 rtx op0 = operands[1];
35639 rtx op1 = operands[2];
35640 rtx sel = operands[3];
35641 rtx tmp = target;
35642 rtx norreg = gen_reg_rtx (V16QImode);
35643 machine_mode mode = GET_MODE (target);
35645 /* Get everything in regs so the pattern matches. */
35646 if (!REG_P (op0))
35647 op0 = force_reg (mode, op0);
35648 if (!REG_P (op1))
35649 op1 = force_reg (mode, op1);
35650 if (!REG_P (sel))
35651 sel = force_reg (V16QImode, sel);
35652 if (!REG_P (target))
35653 tmp = gen_reg_rtx (mode);
35655 if (TARGET_P9_VECTOR)
35657 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel),
35658 UNSPEC_VPERMR);
35660 else
35662 /* Invert the selector with a VNAND if available, else a VNOR.
35663 The VNAND is preferred for future fusion opportunities. */
35664 notx = gen_rtx_NOT (V16QImode, sel);
35665 iorx = (TARGET_P8_VECTOR
35666 ? gen_rtx_IOR (V16QImode, notx, notx)
35667 : gen_rtx_AND (V16QImode, notx, notx));
35668 emit_insn (gen_rtx_SET (norreg, iorx));
35670 /* Permute with operands reversed and adjusted selector. */
35671 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
35672 UNSPEC_VPERM);
35675 /* Copy into target, possibly by way of a register. */
35676 if (!REG_P (target))
35678 emit_move_insn (tmp, unspec);
35679 unspec = tmp;
35682 emit_move_insn (target, unspec);
35685 /* Expand an Altivec constant permutation. Return true if we match
35686 an efficient implementation; false to fall back to VPERM. */
35688 bool
35689 altivec_expand_vec_perm_const (rtx operands[4])
35691 struct altivec_perm_insn {
35692 HOST_WIDE_INT mask;
35693 enum insn_code impl;
35694 unsigned char perm[16];
35696 static const struct altivec_perm_insn patterns[] = {
35697 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
35698 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
35699 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
35700 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
35701 { OPTION_MASK_ALTIVEC,
35702 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
35703 : CODE_FOR_altivec_vmrglb_direct),
35704 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
35705 { OPTION_MASK_ALTIVEC,
35706 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
35707 : CODE_FOR_altivec_vmrglh_direct),
35708 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
35709 { OPTION_MASK_ALTIVEC,
35710 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
35711 : CODE_FOR_altivec_vmrglw_direct),
35712 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
35713 { OPTION_MASK_ALTIVEC,
35714 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
35715 : CODE_FOR_altivec_vmrghb_direct),
35716 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
35717 { OPTION_MASK_ALTIVEC,
35718 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
35719 : CODE_FOR_altivec_vmrghh_direct),
35720 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
35721 { OPTION_MASK_ALTIVEC,
35722 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
35723 : CODE_FOR_altivec_vmrghw_direct),
35724 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
35725 { OPTION_MASK_P8_VECTOR,
35726 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
35727 : CODE_FOR_p8_vmrgow_v4sf_direct),
35728 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
35729 { OPTION_MASK_P8_VECTOR,
35730 (BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
35731 : CODE_FOR_p8_vmrgew_v4sf_direct),
35732 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
35735 unsigned int i, j, elt, which;
35736 unsigned char perm[16];
35737 rtx target, op0, op1, sel, x;
35738 bool one_vec;
35740 target = operands[0];
35741 op0 = operands[1];
35742 op1 = operands[2];
35743 sel = operands[3];
35745 /* Unpack the constant selector. */
35746 for (i = which = 0; i < 16; ++i)
35748 rtx e = XVECEXP (sel, 0, i);
35749 elt = INTVAL (e) & 31;
35750 which |= (elt < 16 ? 1 : 2);
35751 perm[i] = elt;
35754 /* Simplify the constant selector based on operands. */
35755 switch (which)
35757 default:
35758 gcc_unreachable ();
35760 case 3:
35761 one_vec = false;
35762 if (!rtx_equal_p (op0, op1))
35763 break;
35764 /* FALLTHRU */
35766 case 2:
35767 for (i = 0; i < 16; ++i)
35768 perm[i] &= 15;
35769 op0 = op1;
35770 one_vec = true;
35771 break;
35773 case 1:
35774 op1 = op0;
35775 one_vec = true;
35776 break;
35779 /* Look for splat patterns. */
35780 if (one_vec)
35782 elt = perm[0];
35784 for (i = 0; i < 16; ++i)
35785 if (perm[i] != elt)
35786 break;
35787 if (i == 16)
35789 if (!BYTES_BIG_ENDIAN)
35790 elt = 15 - elt;
35791 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
35792 return true;
35795 if (elt % 2 == 0)
35797 for (i = 0; i < 16; i += 2)
35798 if (perm[i] != elt || perm[i + 1] != elt + 1)
35799 break;
35800 if (i == 16)
35802 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
35803 x = gen_reg_rtx (V8HImode);
35804 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
35805 GEN_INT (field)));
35806 emit_move_insn (target, gen_lowpart (V16QImode, x));
35807 return true;
35811 if (elt % 4 == 0)
35813 for (i = 0; i < 16; i += 4)
35814 if (perm[i] != elt
35815 || perm[i + 1] != elt + 1
35816 || perm[i + 2] != elt + 2
35817 || perm[i + 3] != elt + 3)
35818 break;
35819 if (i == 16)
35821 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
35822 x = gen_reg_rtx (V4SImode);
35823 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
35824 GEN_INT (field)));
35825 emit_move_insn (target, gen_lowpart (V16QImode, x));
35826 return true;
35831 /* Look for merge and pack patterns. */
35832 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
35834 bool swapped;
35836 if ((patterns[j].mask & rs6000_isa_flags) == 0)
35837 continue;
35839 elt = patterns[j].perm[0];
35840 if (perm[0] == elt)
35841 swapped = false;
35842 else if (perm[0] == elt + 16)
35843 swapped = true;
35844 else
35845 continue;
35846 for (i = 1; i < 16; ++i)
35848 elt = patterns[j].perm[i];
35849 if (swapped)
35850 elt = (elt >= 16 ? elt - 16 : elt + 16);
35851 else if (one_vec && elt >= 16)
35852 elt -= 16;
35853 if (perm[i] != elt)
35854 break;
35856 if (i == 16)
35858 enum insn_code icode = patterns[j].impl;
35859 machine_mode omode = insn_data[icode].operand[0].mode;
35860 machine_mode imode = insn_data[icode].operand[1].mode;
35862 /* For little-endian, don't use vpkuwum and vpkuhum if the
35863 underlying vector type is not V4SI and V8HI, respectively.
35864 For example, using vpkuwum with a V8HI picks up the even
35865 halfwords (BE numbering) when the even halfwords (LE
35866 numbering) are what we need. */
35867 if (!BYTES_BIG_ENDIAN
35868 && icode == CODE_FOR_altivec_vpkuwum_direct
35869 && ((GET_CODE (op0) == REG
35870 && GET_MODE (op0) != V4SImode)
35871 || (GET_CODE (op0) == SUBREG
35872 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
35873 continue;
35874 if (!BYTES_BIG_ENDIAN
35875 && icode == CODE_FOR_altivec_vpkuhum_direct
35876 && ((GET_CODE (op0) == REG
35877 && GET_MODE (op0) != V8HImode)
35878 || (GET_CODE (op0) == SUBREG
35879 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
35880 continue;
35882 /* For little-endian, the two input operands must be swapped
35883 (or swapped back) to ensure proper right-to-left numbering
35884 from 0 to 2N-1. */
35885 if (swapped ^ !BYTES_BIG_ENDIAN)
35886 std::swap (op0, op1);
35887 if (imode != V16QImode)
35889 op0 = gen_lowpart (imode, op0);
35890 op1 = gen_lowpart (imode, op1);
35892 if (omode == V16QImode)
35893 x = target;
35894 else
35895 x = gen_reg_rtx (omode);
35896 emit_insn (GEN_FCN (icode) (x, op0, op1));
35897 if (omode != V16QImode)
35898 emit_move_insn (target, gen_lowpart (V16QImode, x));
35899 return true;
35903 if (!BYTES_BIG_ENDIAN)
35905 altivec_expand_vec_perm_const_le (operands);
35906 return true;
35909 return false;
35912 /* Expand a Paired Single or VSX Permute Doubleword constant permutation.
35913 Return true if we match an efficient implementation. */
35915 static bool
35916 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
35917 unsigned char perm0, unsigned char perm1)
35919 rtx x;
35921 /* If both selectors come from the same operand, fold to single op. */
35922 if ((perm0 & 2) == (perm1 & 2))
35924 if (perm0 & 2)
35925 op0 = op1;
35926 else
35927 op1 = op0;
35929 /* If both operands are equal, fold to simpler permutation. */
35930 if (rtx_equal_p (op0, op1))
35932 perm0 = perm0 & 1;
35933 perm1 = (perm1 & 1) + 2;
35935 /* If the first selector comes from the second operand, swap. */
35936 else if (perm0 & 2)
35938 if (perm1 & 2)
35939 return false;
35940 perm0 -= 2;
35941 perm1 += 2;
35942 std::swap (op0, op1);
35944 /* If the second selector does not come from the second operand, fail. */
35945 else if ((perm1 & 2) == 0)
35946 return false;
35948 /* Success! */
35949 if (target != NULL)
35951 machine_mode vmode, dmode;
35952 rtvec v;
35954 vmode = GET_MODE (target);
35955 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
35956 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
35957 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
35958 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
35959 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
35960 emit_insn (gen_rtx_SET (target, x));
35962 return true;
35965 bool
35966 rs6000_expand_vec_perm_const (rtx operands[4])
35968 rtx target, op0, op1, sel;
35969 unsigned char perm0, perm1;
35971 target = operands[0];
35972 op0 = operands[1];
35973 op1 = operands[2];
35974 sel = operands[3];
35976 /* Unpack the constant selector. */
35977 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
35978 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
35980 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
35983 /* Test whether a constant permutation is supported. */
35985 static bool
35986 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
35988 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
35989 if (TARGET_ALTIVEC)
35990 return true;
35992 /* Check for ps_merge* or evmerge* insns. */
35993 if (TARGET_PAIRED_FLOAT && vmode == V2SFmode)
35995 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
35996 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
35997 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
36000 return false;
36003 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
36005 static void
36006 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
36007 machine_mode vmode, unsigned nelt, rtx perm[])
36009 machine_mode imode;
36010 rtx x;
36012 imode = vmode;
36013 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
36014 imode = mode_for_int_vector (vmode).require ();
36016 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
36017 x = expand_vec_perm (vmode, op0, op1, x, target);
36018 if (x != target)
36019 emit_move_insn (target, x);
36022 /* Expand an extract even operation. */
36024 void
36025 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
36027 machine_mode vmode = GET_MODE (target);
36028 unsigned i, nelt = GET_MODE_NUNITS (vmode);
36029 rtx perm[16];
36031 for (i = 0; i < nelt; i++)
36032 perm[i] = GEN_INT (i * 2);
36034 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
36037 /* Expand a vector interleave operation. */
36039 void
36040 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
36042 machine_mode vmode = GET_MODE (target);
36043 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
36044 rtx perm[16];
36046 high = (highp ? 0 : nelt / 2);
36047 for (i = 0; i < nelt / 2; i++)
36049 perm[i * 2] = GEN_INT (i + high);
36050 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
36053 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
36056 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
36057 void
36058 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
36060 HOST_WIDE_INT hwi_scale (scale);
36061 REAL_VALUE_TYPE r_pow;
36062 rtvec v = rtvec_alloc (2);
36063 rtx elt;
36064 rtx scale_vec = gen_reg_rtx (V2DFmode);
36065 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
36066 elt = const_double_from_real_value (r_pow, DFmode);
36067 RTVEC_ELT (v, 0) = elt;
36068 RTVEC_ELT (v, 1) = elt;
36069 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
36070 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
36073 /* Return an RTX representing where to find the function value of a
36074 function returning MODE. */
36075 static rtx
36076 rs6000_complex_function_value (machine_mode mode)
36078 unsigned int regno;
36079 rtx r1, r2;
36080 machine_mode inner = GET_MODE_INNER (mode);
36081 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
36083 if (TARGET_FLOAT128_TYPE
36084 && (mode == KCmode
36085 || (mode == TCmode && TARGET_IEEEQUAD)))
36086 regno = ALTIVEC_ARG_RETURN;
36088 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
36089 regno = FP_ARG_RETURN;
36091 else
36093 regno = GP_ARG_RETURN;
36095 /* 32-bit is OK since it'll go in r3/r4. */
36096 if (TARGET_32BIT && inner_bytes >= 4)
36097 return gen_rtx_REG (mode, regno);
36100 if (inner_bytes >= 8)
36101 return gen_rtx_REG (mode, regno);
36103 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
36104 const0_rtx);
36105 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
36106 GEN_INT (inner_bytes));
36107 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
36110 /* Return an rtx describing a return value of MODE as a PARALLEL
36111 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
36112 stride REG_STRIDE. */
36114 static rtx
36115 rs6000_parallel_return (machine_mode mode,
36116 int n_elts, machine_mode elt_mode,
36117 unsigned int regno, unsigned int reg_stride)
36119 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
36121 int i;
36122 for (i = 0; i < n_elts; i++)
36124 rtx r = gen_rtx_REG (elt_mode, regno);
36125 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
36126 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
36127 regno += reg_stride;
36130 return par;
36133 /* Target hook for TARGET_FUNCTION_VALUE.
36135 An integer value is in r3 and a floating-point value is in fp1,
36136 unless -msoft-float. */
36138 static rtx
36139 rs6000_function_value (const_tree valtype,
36140 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
36141 bool outgoing ATTRIBUTE_UNUSED)
36143 machine_mode mode;
36144 unsigned int regno;
36145 machine_mode elt_mode;
36146 int n_elts;
36148 /* Special handling for structs in darwin64. */
36149 if (TARGET_MACHO
36150 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
36152 CUMULATIVE_ARGS valcum;
36153 rtx valret;
36155 valcum.words = 0;
36156 valcum.fregno = FP_ARG_MIN_REG;
36157 valcum.vregno = ALTIVEC_ARG_MIN_REG;
36158 /* Do a trial code generation as if this were going to be passed as
36159 an argument; if any part goes in memory, we return NULL. */
36160 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
36161 if (valret)
36162 return valret;
36163 /* Otherwise fall through to standard ABI rules. */
36166 mode = TYPE_MODE (valtype);
36168 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
36169 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
36171 int first_reg, n_regs;
36173 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
36175 /* _Decimal128 must use even/odd register pairs. */
36176 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36177 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
36179 else
36181 first_reg = ALTIVEC_ARG_RETURN;
36182 n_regs = 1;
36185 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
36188 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
36189 if (TARGET_32BIT && TARGET_POWERPC64)
36190 switch (mode)
36192 default:
36193 break;
36194 case E_DImode:
36195 case E_SCmode:
36196 case E_DCmode:
36197 case E_TCmode:
36198 int count = GET_MODE_SIZE (mode) / 4;
36199 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
36202 if ((INTEGRAL_TYPE_P (valtype)
36203 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
36204 || POINTER_TYPE_P (valtype))
36205 mode = TARGET_32BIT ? SImode : DImode;
36207 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
36208 /* _Decimal128 must use an even/odd register pair. */
36209 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36210 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
36211 && !FLOAT128_VECTOR_P (mode)
36212 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
36213 regno = FP_ARG_RETURN;
36214 else if (TREE_CODE (valtype) == COMPLEX_TYPE
36215 && targetm.calls.split_complex_arg)
36216 return rs6000_complex_function_value (mode);
36217 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
36218 return register is used in both cases, and we won't see V2DImode/V2DFmode
36219 for pure altivec, combine the two cases. */
36220 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
36221 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
36222 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
36223 regno = ALTIVEC_ARG_RETURN;
36224 else
36225 regno = GP_ARG_RETURN;
36227 return gen_rtx_REG (mode, regno);
36230 /* Define how to find the value returned by a library function
36231 assuming the value has mode MODE. */
36233 rs6000_libcall_value (machine_mode mode)
36235 unsigned int regno;
36237 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
36238 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
36239 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
36241 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
36242 /* _Decimal128 must use an even/odd register pair. */
36243 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
36244 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
36245 && TARGET_HARD_FLOAT
36246 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
36247 regno = FP_ARG_RETURN;
36248 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
36249 return register is used in both cases, and we won't see V2DImode/V2DFmode
36250 for pure altivec, combine the two cases. */
36251 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
36252 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
36253 regno = ALTIVEC_ARG_RETURN;
36254 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
36255 return rs6000_complex_function_value (mode);
36256 else
36257 regno = GP_ARG_RETURN;
36259 return gen_rtx_REG (mode, regno);
36262 /* Compute register pressure classes. We implement the target hook to avoid
36263 IRA picking something like NON_SPECIAL_REGS as a pressure class, which can
36264 lead to incorrect estimates of number of available registers and therefor
36265 increased register pressure/spill. */
36266 static int
36267 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
36269 int n;
36271 n = 0;
36272 pressure_classes[n++] = GENERAL_REGS;
36273 if (TARGET_VSX)
36274 pressure_classes[n++] = VSX_REGS;
36275 else
36277 if (TARGET_ALTIVEC)
36278 pressure_classes[n++] = ALTIVEC_REGS;
36279 if (TARGET_HARD_FLOAT)
36280 pressure_classes[n++] = FLOAT_REGS;
36282 pressure_classes[n++] = CR_REGS;
36283 pressure_classes[n++] = SPECIAL_REGS;
36285 return n;
36288 /* Given FROM and TO register numbers, say whether this elimination is allowed.
36289 Frame pointer elimination is automatically handled.
36291 For the RS/6000, if frame pointer elimination is being done, we would like
36292 to convert ap into fp, not sp.
36294 We need r30 if -mminimal-toc was specified, and there are constant pool
36295 references. */
36297 static bool
36298 rs6000_can_eliminate (const int from, const int to)
36300 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
36301 ? ! frame_pointer_needed
36302 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
36303 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
36304 || constant_pool_empty_p ()
36305 : true);
36308 /* Define the offset between two registers, FROM to be eliminated and its
36309 replacement TO, at the start of a routine. */
36310 HOST_WIDE_INT
36311 rs6000_initial_elimination_offset (int from, int to)
36313 rs6000_stack_t *info = rs6000_stack_info ();
36314 HOST_WIDE_INT offset;
36316 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36317 offset = info->push_p ? 0 : -info->total_size;
36318 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36320 offset = info->push_p ? 0 : -info->total_size;
36321 if (FRAME_GROWS_DOWNWARD)
36322 offset += info->fixed_size + info->vars_size + info->parm_size;
36324 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36325 offset = FRAME_GROWS_DOWNWARD
36326 ? info->fixed_size + info->vars_size + info->parm_size
36327 : 0;
36328 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
36329 offset = info->total_size;
36330 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
36331 offset = info->push_p ? info->total_size : 0;
36332 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
36333 offset = 0;
36334 else
36335 gcc_unreachable ();
36337 return offset;
36340 /* Fill in sizes of registers used by unwinder. */
36342 static void
36343 rs6000_init_dwarf_reg_sizes_extra (tree address)
36345 if (TARGET_MACHO && ! TARGET_ALTIVEC)
36347 int i;
36348 machine_mode mode = TYPE_MODE (char_type_node);
36349 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
36350 rtx mem = gen_rtx_MEM (BLKmode, addr);
36351 rtx value = gen_int_mode (16, mode);
36353 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
36354 The unwinder still needs to know the size of Altivec registers. */
36356 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
36358 int column = DWARF_REG_TO_UNWIND_COLUMN
36359 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
36360 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
36362 emit_move_insn (adjust_address (mem, mode, offset), value);
36367 /* Map internal gcc register numbers to debug format register numbers.
36368 FORMAT specifies the type of debug register number to use:
36369 0 -- debug information, except for frame-related sections
36370 1 -- DWARF .debug_frame section
36371 2 -- DWARF .eh_frame section */
36373 unsigned int
36374 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
36376 /* Except for the above, we use the internal number for non-DWARF
36377 debug information, and also for .eh_frame. */
36378 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
36379 return regno;
36381 /* On some platforms, we use the standard DWARF register
36382 numbering for .debug_info and .debug_frame. */
36383 #ifdef RS6000_USE_DWARF_NUMBERING
36384 if (regno <= 63)
36385 return regno;
36386 if (regno == LR_REGNO)
36387 return 108;
36388 if (regno == CTR_REGNO)
36389 return 109;
36390 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
36391 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
36392 The actual code emitted saves the whole of CR, so we map CR2_REGNO
36393 to the DWARF reg for CR. */
36394 if (format == 1 && regno == CR2_REGNO)
36395 return 64;
36396 if (CR_REGNO_P (regno))
36397 return regno - CR0_REGNO + 86;
36398 if (regno == CA_REGNO)
36399 return 101; /* XER */
36400 if (ALTIVEC_REGNO_P (regno))
36401 return regno - FIRST_ALTIVEC_REGNO + 1124;
36402 if (regno == VRSAVE_REGNO)
36403 return 356;
36404 if (regno == VSCR_REGNO)
36405 return 67;
36406 #endif
36407 return regno;
36410 /* target hook eh_return_filter_mode */
36411 static scalar_int_mode
36412 rs6000_eh_return_filter_mode (void)
36414 return TARGET_32BIT ? SImode : word_mode;
36417 /* Target hook for scalar_mode_supported_p. */
36418 static bool
36419 rs6000_scalar_mode_supported_p (scalar_mode mode)
36421 /* -m32 does not support TImode. This is the default, from
36422 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
36423 same ABI as for -m32. But default_scalar_mode_supported_p allows
36424 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
36425 for -mpowerpc64. */
36426 if (TARGET_32BIT && mode == TImode)
36427 return false;
36429 if (DECIMAL_FLOAT_MODE_P (mode))
36430 return default_decimal_float_supported_p ();
36431 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
36432 return true;
36433 else
36434 return default_scalar_mode_supported_p (mode);
36437 /* Target hook for vector_mode_supported_p. */
36438 static bool
36439 rs6000_vector_mode_supported_p (machine_mode mode)
36442 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
36443 return true;
36445 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
36446 128-bit, the compiler might try to widen IEEE 128-bit to IBM
36447 double-double. */
36448 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
36449 return true;
36451 else
36452 return false;
36455 /* Target hook for floatn_mode. */
36456 static opt_scalar_float_mode
36457 rs6000_floatn_mode (int n, bool extended)
36459 if (extended)
36461 switch (n)
36463 case 32:
36464 return DFmode;
36466 case 64:
36467 if (TARGET_FLOAT128_TYPE)
36468 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36469 else
36470 return opt_scalar_float_mode ();
36472 case 128:
36473 return opt_scalar_float_mode ();
36475 default:
36476 /* Those are the only valid _FloatNx types. */
36477 gcc_unreachable ();
36480 else
36482 switch (n)
36484 case 32:
36485 return SFmode;
36487 case 64:
36488 return DFmode;
36490 case 128:
36491 if (TARGET_FLOAT128_TYPE)
36492 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36493 else
36494 return opt_scalar_float_mode ();
36496 default:
36497 return opt_scalar_float_mode ();
36503 /* Target hook for c_mode_for_suffix. */
36504 static machine_mode
36505 rs6000_c_mode_for_suffix (char suffix)
36507 if (TARGET_FLOAT128_TYPE)
36509 if (suffix == 'q' || suffix == 'Q')
36510 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
36512 /* At the moment, we are not defining a suffix for IBM extended double.
36513 If/when the default for -mabi=ieeelongdouble is changed, and we want
36514 to support __ibm128 constants in legacy library code, we may need to
36515 re-evalaute this decision. Currently, c-lex.c only supports 'w' and
36516 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
36517 __float80 constants. */
36520 return VOIDmode;
36523 /* Target hook for invalid_arg_for_unprototyped_fn. */
36524 static const char *
36525 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
36527 return (!rs6000_darwin64_abi
36528 && typelist == 0
36529 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
36530 && (funcdecl == NULL_TREE
36531 || (TREE_CODE (funcdecl) == FUNCTION_DECL
36532 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
36533 ? N_("AltiVec argument passed to unprototyped function")
36534 : NULL;
36537 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
36538 setup by using __stack_chk_fail_local hidden function instead of
36539 calling __stack_chk_fail directly. Otherwise it is better to call
36540 __stack_chk_fail directly. */
36542 static tree ATTRIBUTE_UNUSED
36543 rs6000_stack_protect_fail (void)
36545 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
36546 ? default_hidden_stack_protect_fail ()
36547 : default_external_stack_protect_fail ();
36550 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
36552 #if TARGET_ELF
36553 static unsigned HOST_WIDE_INT
36554 rs6000_asan_shadow_offset (void)
36556 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
36558 #endif
36560 /* Mask options that we want to support inside of attribute((target)) and
36561 #pragma GCC target operations. Note, we do not include things like
36562 64/32-bit, endianness, hard/soft floating point, etc. that would have
36563 different calling sequences. */
36565 struct rs6000_opt_mask {
36566 const char *name; /* option name */
36567 HOST_WIDE_INT mask; /* mask to set */
36568 bool invert; /* invert sense of mask */
36569 bool valid_target; /* option is a target option */
36572 static struct rs6000_opt_mask const rs6000_opt_masks[] =
36574 { "altivec", OPTION_MASK_ALTIVEC, false, true },
36575 { "cmpb", OPTION_MASK_CMPB, false, true },
36576 { "crypto", OPTION_MASK_CRYPTO, false, true },
36577 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
36578 { "dlmzb", OPTION_MASK_DLMZB, false, true },
36579 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
36580 false, true },
36581 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
36582 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
36583 { "fprnd", OPTION_MASK_FPRND, false, true },
36584 { "hard-dfp", OPTION_MASK_DFP, false, true },
36585 { "htm", OPTION_MASK_HTM, false, true },
36586 { "isel", OPTION_MASK_ISEL, false, true },
36587 { "mfcrf", OPTION_MASK_MFCRF, false, true },
36588 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
36589 { "modulo", OPTION_MASK_MODULO, false, true },
36590 { "mulhw", OPTION_MASK_MULHW, false, true },
36591 { "multiple", OPTION_MASK_MULTIPLE, false, true },
36592 { "popcntb", OPTION_MASK_POPCNTB, false, true },
36593 { "popcntd", OPTION_MASK_POPCNTD, false, true },
36594 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
36595 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
36596 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
36597 { "power9-fusion", OPTION_MASK_P9_FUSION, false, true },
36598 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
36599 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
36600 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
36601 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
36602 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
36603 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
36604 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
36605 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
36606 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
36607 { "string", OPTION_MASK_STRING, false, true },
36608 { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true },
36609 { "update", OPTION_MASK_NO_UPDATE, true , true },
36610 { "vsx", OPTION_MASK_VSX, false, true },
36611 #ifdef OPTION_MASK_64BIT
36612 #if TARGET_AIX_OS
36613 { "aix64", OPTION_MASK_64BIT, false, false },
36614 { "aix32", OPTION_MASK_64BIT, true, false },
36615 #else
36616 { "64", OPTION_MASK_64BIT, false, false },
36617 { "32", OPTION_MASK_64BIT, true, false },
36618 #endif
36619 #endif
36620 #ifdef OPTION_MASK_EABI
36621 { "eabi", OPTION_MASK_EABI, false, false },
36622 #endif
36623 #ifdef OPTION_MASK_LITTLE_ENDIAN
36624 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
36625 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
36626 #endif
36627 #ifdef OPTION_MASK_RELOCATABLE
36628 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
36629 #endif
36630 #ifdef OPTION_MASK_STRICT_ALIGN
36631 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
36632 #endif
36633 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
36634 { "string", OPTION_MASK_STRING, false, false },
36637 /* Builtin mask mapping for printing the flags. */
36638 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
36640 { "altivec", RS6000_BTM_ALTIVEC, false, false },
36641 { "vsx", RS6000_BTM_VSX, false, false },
36642 { "paired", RS6000_BTM_PAIRED, false, false },
36643 { "fre", RS6000_BTM_FRE, false, false },
36644 { "fres", RS6000_BTM_FRES, false, false },
36645 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
36646 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
36647 { "popcntd", RS6000_BTM_POPCNTD, false, false },
36648 { "cell", RS6000_BTM_CELL, false, false },
36649 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
36650 { "power9-vector", RS6000_BTM_P9_VECTOR, false, false },
36651 { "power9-misc", RS6000_BTM_P9_MISC, false, false },
36652 { "crypto", RS6000_BTM_CRYPTO, false, false },
36653 { "htm", RS6000_BTM_HTM, false, false },
36654 { "hard-dfp", RS6000_BTM_DFP, false, false },
36655 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
36656 { "long-double-128", RS6000_BTM_LDBL128, false, false },
36657 { "float128", RS6000_BTM_FLOAT128, false, false },
36658 { "float128-hw", RS6000_BTM_FLOAT128_HW,false, false },
36661 /* Option variables that we want to support inside attribute((target)) and
36662 #pragma GCC target operations. */
36664 struct rs6000_opt_var {
36665 const char *name; /* option name */
36666 size_t global_offset; /* offset of the option in global_options. */
36667 size_t target_offset; /* offset of the option in target options. */
36670 static struct rs6000_opt_var const rs6000_opt_vars[] =
36672 { "friz",
36673 offsetof (struct gcc_options, x_TARGET_FRIZ),
36674 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
36675 { "avoid-indexed-addresses",
36676 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
36677 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
36678 { "paired",
36679 offsetof (struct gcc_options, x_rs6000_paired_float),
36680 offsetof (struct cl_target_option, x_rs6000_paired_float), },
36681 { "longcall",
36682 offsetof (struct gcc_options, x_rs6000_default_long_calls),
36683 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
36684 { "optimize-swaps",
36685 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
36686 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
36687 { "allow-movmisalign",
36688 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
36689 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
36690 { "sched-groups",
36691 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
36692 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
36693 { "always-hint",
36694 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
36695 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
36696 { "align-branch-targets",
36697 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
36698 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
36699 { "tls-markers",
36700 offsetof (struct gcc_options, x_tls_markers),
36701 offsetof (struct cl_target_option, x_tls_markers), },
36702 { "sched-prolog",
36703 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36704 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36705 { "sched-epilog",
36706 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
36707 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
36710 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
36711 parsing. Return true if there were no errors. */
36713 static bool
36714 rs6000_inner_target_options (tree args, bool attr_p)
36716 bool ret = true;
36718 if (args == NULL_TREE)
36721 else if (TREE_CODE (args) == STRING_CST)
36723 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36724 char *q;
36726 while ((q = strtok (p, ",")) != NULL)
36728 bool error_p = false;
36729 bool not_valid_p = false;
36730 const char *cpu_opt = NULL;
36732 p = NULL;
36733 if (strncmp (q, "cpu=", 4) == 0)
36735 int cpu_index = rs6000_cpu_name_lookup (q+4);
36736 if (cpu_index >= 0)
36737 rs6000_cpu_index = cpu_index;
36738 else
36740 error_p = true;
36741 cpu_opt = q+4;
36744 else if (strncmp (q, "tune=", 5) == 0)
36746 int tune_index = rs6000_cpu_name_lookup (q+5);
36747 if (tune_index >= 0)
36748 rs6000_tune_index = tune_index;
36749 else
36751 error_p = true;
36752 cpu_opt = q+5;
36755 else
36757 size_t i;
36758 bool invert = false;
36759 char *r = q;
36761 error_p = true;
36762 if (strncmp (r, "no-", 3) == 0)
36764 invert = true;
36765 r += 3;
36768 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
36769 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
36771 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
36773 if (!rs6000_opt_masks[i].valid_target)
36774 not_valid_p = true;
36775 else
36777 error_p = false;
36778 rs6000_isa_flags_explicit |= mask;
36780 /* VSX needs altivec, so -mvsx automagically sets
36781 altivec and disables -mavoid-indexed-addresses. */
36782 if (!invert)
36784 if (mask == OPTION_MASK_VSX)
36786 mask |= OPTION_MASK_ALTIVEC;
36787 TARGET_AVOID_XFORM = 0;
36791 if (rs6000_opt_masks[i].invert)
36792 invert = !invert;
36794 if (invert)
36795 rs6000_isa_flags &= ~mask;
36796 else
36797 rs6000_isa_flags |= mask;
36799 break;
36802 if (error_p && !not_valid_p)
36804 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
36805 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
36807 size_t j = rs6000_opt_vars[i].global_offset;
36808 *((int *) ((char *)&global_options + j)) = !invert;
36809 error_p = false;
36810 not_valid_p = false;
36811 break;
36816 if (error_p)
36818 const char *eprefix, *esuffix;
36820 ret = false;
36821 if (attr_p)
36823 eprefix = "__attribute__((__target__(";
36824 esuffix = ")))";
36826 else
36828 eprefix = "#pragma GCC target ";
36829 esuffix = "";
36832 if (cpu_opt)
36833 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
36834 q, esuffix);
36835 else if (not_valid_p)
36836 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
36837 else
36838 error ("%s%qs%s is invalid", eprefix, q, esuffix);
36843 else if (TREE_CODE (args) == TREE_LIST)
36847 tree value = TREE_VALUE (args);
36848 if (value)
36850 bool ret2 = rs6000_inner_target_options (value, attr_p);
36851 if (!ret2)
36852 ret = false;
36854 args = TREE_CHAIN (args);
36856 while (args != NULL_TREE);
36859 else
36861 error ("attribute %<target%> argument not a string");
36862 return false;
36865 return ret;
36868 /* Print out the target options as a list for -mdebug=target. */
36870 static void
36871 rs6000_debug_target_options (tree args, const char *prefix)
36873 if (args == NULL_TREE)
36874 fprintf (stderr, "%s<NULL>", prefix);
36876 else if (TREE_CODE (args) == STRING_CST)
36878 char *p = ASTRDUP (TREE_STRING_POINTER (args));
36879 char *q;
36881 while ((q = strtok (p, ",")) != NULL)
36883 p = NULL;
36884 fprintf (stderr, "%s\"%s\"", prefix, q);
36885 prefix = ", ";
36889 else if (TREE_CODE (args) == TREE_LIST)
36893 tree value = TREE_VALUE (args);
36894 if (value)
36896 rs6000_debug_target_options (value, prefix);
36897 prefix = ", ";
36899 args = TREE_CHAIN (args);
36901 while (args != NULL_TREE);
36904 else
36905 gcc_unreachable ();
36907 return;
36911 /* Hook to validate attribute((target("..."))). */
36913 static bool
36914 rs6000_valid_attribute_p (tree fndecl,
36915 tree ARG_UNUSED (name),
36916 tree args,
36917 int flags)
36919 struct cl_target_option cur_target;
36920 bool ret;
36921 tree old_optimize;
36922 tree new_target, new_optimize;
36923 tree func_optimize;
36925 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
36927 if (TARGET_DEBUG_TARGET)
36929 tree tname = DECL_NAME (fndecl);
36930 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
36931 if (tname)
36932 fprintf (stderr, "function: %.*s\n",
36933 (int) IDENTIFIER_LENGTH (tname),
36934 IDENTIFIER_POINTER (tname));
36935 else
36936 fprintf (stderr, "function: unknown\n");
36938 fprintf (stderr, "args:");
36939 rs6000_debug_target_options (args, " ");
36940 fprintf (stderr, "\n");
36942 if (flags)
36943 fprintf (stderr, "flags: 0x%x\n", flags);
36945 fprintf (stderr, "--------------------\n");
36948 /* attribute((target("default"))) does nothing, beyond
36949 affecting multi-versioning. */
36950 if (TREE_VALUE (args)
36951 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
36952 && TREE_CHAIN (args) == NULL_TREE
36953 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
36954 return true;
36956 old_optimize = build_optimization_node (&global_options);
36957 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
36959 /* If the function changed the optimization levels as well as setting target
36960 options, start with the optimizations specified. */
36961 if (func_optimize && func_optimize != old_optimize)
36962 cl_optimization_restore (&global_options,
36963 TREE_OPTIMIZATION (func_optimize));
36965 /* The target attributes may also change some optimization flags, so update
36966 the optimization options if necessary. */
36967 cl_target_option_save (&cur_target, &global_options);
36968 rs6000_cpu_index = rs6000_tune_index = -1;
36969 ret = rs6000_inner_target_options (args, true);
36971 /* Set up any additional state. */
36972 if (ret)
36974 ret = rs6000_option_override_internal (false);
36975 new_target = build_target_option_node (&global_options);
36977 else
36978 new_target = NULL;
36980 new_optimize = build_optimization_node (&global_options);
36982 if (!new_target)
36983 ret = false;
36985 else if (fndecl)
36987 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
36989 if (old_optimize != new_optimize)
36990 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
36993 cl_target_option_restore (&global_options, &cur_target);
36995 if (old_optimize != new_optimize)
36996 cl_optimization_restore (&global_options,
36997 TREE_OPTIMIZATION (old_optimize));
36999 return ret;
37003 /* Hook to validate the current #pragma GCC target and set the state, and
37004 update the macros based on what was changed. If ARGS is NULL, then
37005 POP_TARGET is used to reset the options. */
37007 bool
37008 rs6000_pragma_target_parse (tree args, tree pop_target)
37010 tree prev_tree = build_target_option_node (&global_options);
37011 tree cur_tree;
37012 struct cl_target_option *prev_opt, *cur_opt;
37013 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
37014 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
37016 if (TARGET_DEBUG_TARGET)
37018 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
37019 fprintf (stderr, "args:");
37020 rs6000_debug_target_options (args, " ");
37021 fprintf (stderr, "\n");
37023 if (pop_target)
37025 fprintf (stderr, "pop_target:\n");
37026 debug_tree (pop_target);
37028 else
37029 fprintf (stderr, "pop_target: <NULL>\n");
37031 fprintf (stderr, "--------------------\n");
37034 if (! args)
37036 cur_tree = ((pop_target)
37037 ? pop_target
37038 : target_option_default_node);
37039 cl_target_option_restore (&global_options,
37040 TREE_TARGET_OPTION (cur_tree));
37042 else
37044 rs6000_cpu_index = rs6000_tune_index = -1;
37045 if (!rs6000_inner_target_options (args, false)
37046 || !rs6000_option_override_internal (false)
37047 || (cur_tree = build_target_option_node (&global_options))
37048 == NULL_TREE)
37050 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
37051 fprintf (stderr, "invalid pragma\n");
37053 return false;
37057 target_option_current_node = cur_tree;
37058 rs6000_activate_target_options (target_option_current_node);
37060 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
37061 change the macros that are defined. */
37062 if (rs6000_target_modify_macros_ptr)
37064 prev_opt = TREE_TARGET_OPTION (prev_tree);
37065 prev_bumask = prev_opt->x_rs6000_builtin_mask;
37066 prev_flags = prev_opt->x_rs6000_isa_flags;
37068 cur_opt = TREE_TARGET_OPTION (cur_tree);
37069 cur_flags = cur_opt->x_rs6000_isa_flags;
37070 cur_bumask = cur_opt->x_rs6000_builtin_mask;
37072 diff_bumask = (prev_bumask ^ cur_bumask);
37073 diff_flags = (prev_flags ^ cur_flags);
37075 if ((diff_flags != 0) || (diff_bumask != 0))
37077 /* Delete old macros. */
37078 rs6000_target_modify_macros_ptr (false,
37079 prev_flags & diff_flags,
37080 prev_bumask & diff_bumask);
37082 /* Define new macros. */
37083 rs6000_target_modify_macros_ptr (true,
37084 cur_flags & diff_flags,
37085 cur_bumask & diff_bumask);
37089 return true;
37093 /* Remember the last target of rs6000_set_current_function. */
37094 static GTY(()) tree rs6000_previous_fndecl;
37096 /* Restore target's globals from NEW_TREE and invalidate the
37097 rs6000_previous_fndecl cache. */
37099 void
37100 rs6000_activate_target_options (tree new_tree)
37102 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
37103 if (TREE_TARGET_GLOBALS (new_tree))
37104 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
37105 else if (new_tree == target_option_default_node)
37106 restore_target_globals (&default_target_globals);
37107 else
37108 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
37109 rs6000_previous_fndecl = NULL_TREE;
37112 /* Establish appropriate back-end context for processing the function
37113 FNDECL. The argument might be NULL to indicate processing at top
37114 level, outside of any function scope. */
37115 static void
37116 rs6000_set_current_function (tree fndecl)
37118 if (TARGET_DEBUG_TARGET)
37120 fprintf (stderr, "\n==================== rs6000_set_current_function");
37122 if (fndecl)
37123 fprintf (stderr, ", fndecl %s (%p)",
37124 (DECL_NAME (fndecl)
37125 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
37126 : "<unknown>"), (void *)fndecl);
37128 if (rs6000_previous_fndecl)
37129 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
37131 fprintf (stderr, "\n");
37134 /* Only change the context if the function changes. This hook is called
37135 several times in the course of compiling a function, and we don't want to
37136 slow things down too much or call target_reinit when it isn't safe. */
37137 if (fndecl == rs6000_previous_fndecl)
37138 return;
37140 tree old_tree;
37141 if (rs6000_previous_fndecl == NULL_TREE)
37142 old_tree = target_option_current_node;
37143 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
37144 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
37145 else
37146 old_tree = target_option_default_node;
37148 tree new_tree;
37149 if (fndecl == NULL_TREE)
37151 if (old_tree != target_option_current_node)
37152 new_tree = target_option_current_node;
37153 else
37154 new_tree = NULL_TREE;
37156 else
37158 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
37159 if (new_tree == NULL_TREE)
37160 new_tree = target_option_default_node;
37163 if (TARGET_DEBUG_TARGET)
37165 if (new_tree)
37167 fprintf (stderr, "\nnew fndecl target specific options:\n");
37168 debug_tree (new_tree);
37171 if (old_tree)
37173 fprintf (stderr, "\nold fndecl target specific options:\n");
37174 debug_tree (old_tree);
37177 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
37178 fprintf (stderr, "--------------------\n");
37181 if (new_tree && old_tree != new_tree)
37182 rs6000_activate_target_options (new_tree);
37184 if (fndecl)
37185 rs6000_previous_fndecl = fndecl;
37189 /* Save the current options */
37191 static void
37192 rs6000_function_specific_save (struct cl_target_option *ptr,
37193 struct gcc_options *opts)
37195 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
37196 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
37199 /* Restore the current options */
37201 static void
37202 rs6000_function_specific_restore (struct gcc_options *opts,
37203 struct cl_target_option *ptr)
37206 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
37207 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
37208 (void) rs6000_option_override_internal (false);
37211 /* Print the current options */
37213 static void
37214 rs6000_function_specific_print (FILE *file, int indent,
37215 struct cl_target_option *ptr)
37217 rs6000_print_isa_options (file, indent, "Isa options set",
37218 ptr->x_rs6000_isa_flags);
37220 rs6000_print_isa_options (file, indent, "Isa options explicit",
37221 ptr->x_rs6000_isa_flags_explicit);
37224 /* Helper function to print the current isa or misc options on a line. */
37226 static void
37227 rs6000_print_options_internal (FILE *file,
37228 int indent,
37229 const char *string,
37230 HOST_WIDE_INT flags,
37231 const char *prefix,
37232 const struct rs6000_opt_mask *opts,
37233 size_t num_elements)
37235 size_t i;
37236 size_t start_column = 0;
37237 size_t cur_column;
37238 size_t max_column = 120;
37239 size_t prefix_len = strlen (prefix);
37240 size_t comma_len = 0;
37241 const char *comma = "";
37243 if (indent)
37244 start_column += fprintf (file, "%*s", indent, "");
37246 if (!flags)
37248 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
37249 return;
37252 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
37254 /* Print the various mask options. */
37255 cur_column = start_column;
37256 for (i = 0; i < num_elements; i++)
37258 bool invert = opts[i].invert;
37259 const char *name = opts[i].name;
37260 const char *no_str = "";
37261 HOST_WIDE_INT mask = opts[i].mask;
37262 size_t len = comma_len + prefix_len + strlen (name);
37264 if (!invert)
37266 if ((flags & mask) == 0)
37268 no_str = "no-";
37269 len += sizeof ("no-") - 1;
37272 flags &= ~mask;
37275 else
37277 if ((flags & mask) != 0)
37279 no_str = "no-";
37280 len += sizeof ("no-") - 1;
37283 flags |= mask;
37286 cur_column += len;
37287 if (cur_column > max_column)
37289 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
37290 cur_column = start_column + len;
37291 comma = "";
37294 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
37295 comma = ", ";
37296 comma_len = sizeof (", ") - 1;
37299 fputs ("\n", file);
37302 /* Helper function to print the current isa options on a line. */
37304 static void
37305 rs6000_print_isa_options (FILE *file, int indent, const char *string,
37306 HOST_WIDE_INT flags)
37308 rs6000_print_options_internal (file, indent, string, flags, "-m",
37309 &rs6000_opt_masks[0],
37310 ARRAY_SIZE (rs6000_opt_masks));
37313 static void
37314 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
37315 HOST_WIDE_INT flags)
37317 rs6000_print_options_internal (file, indent, string, flags, "",
37318 &rs6000_builtin_mask_names[0],
37319 ARRAY_SIZE (rs6000_builtin_mask_names));
37322 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
37323 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
37324 -mupper-regs-df, etc.).
37326 If the user used -mno-power8-vector, we need to turn off all of the implicit
37327 ISA 2.07 and 3.0 options that relate to the vector unit.
37329 If the user used -mno-power9-vector, we need to turn off all of the implicit
37330 ISA 3.0 options that relate to the vector unit.
37332 This function does not handle explicit options such as the user specifying
37333 -mdirect-move. These are handled in rs6000_option_override_internal, and
37334 the appropriate error is given if needed.
37336 We return a mask of all of the implicit options that should not be enabled
37337 by default. */
37339 static HOST_WIDE_INT
37340 rs6000_disable_incompatible_switches (void)
37342 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
37343 size_t i, j;
37345 static const struct {
37346 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
37347 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
37348 const char *const name; /* name of the switch. */
37349 } flags[] = {
37350 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
37351 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
37352 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
37355 for (i = 0; i < ARRAY_SIZE (flags); i++)
37357 HOST_WIDE_INT no_flag = flags[i].no_flag;
37359 if ((rs6000_isa_flags & no_flag) == 0
37360 && (rs6000_isa_flags_explicit & no_flag) != 0)
37362 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
37363 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
37364 & rs6000_isa_flags
37365 & dep_flags);
37367 if (set_flags)
37369 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
37370 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
37372 set_flags &= ~rs6000_opt_masks[j].mask;
37373 error ("%<-mno-%s%> turns off %<-m%s%>",
37374 flags[i].name,
37375 rs6000_opt_masks[j].name);
37378 gcc_assert (!set_flags);
37381 rs6000_isa_flags &= ~dep_flags;
37382 ignore_masks |= no_flag | dep_flags;
37386 return ignore_masks;
37390 /* Helper function for printing the function name when debugging. */
37392 static const char *
37393 get_decl_name (tree fn)
37395 tree name;
37397 if (!fn)
37398 return "<null>";
37400 name = DECL_NAME (fn);
37401 if (!name)
37402 return "<no-name>";
37404 return IDENTIFIER_POINTER (name);
37407 /* Return the clone id of the target we are compiling code for in a target
37408 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
37409 the priority list for the target clones (ordered from lowest to
37410 highest). */
37412 static int
37413 rs6000_clone_priority (tree fndecl)
37415 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
37416 HOST_WIDE_INT isa_masks;
37417 int ret = CLONE_DEFAULT;
37418 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
37419 const char *attrs_str = NULL;
37421 attrs = TREE_VALUE (TREE_VALUE (attrs));
37422 attrs_str = TREE_STRING_POINTER (attrs);
37424 /* Return priority zero for default function. Return the ISA needed for the
37425 function if it is not the default. */
37426 if (strcmp (attrs_str, "default") != 0)
37428 if (fn_opts == NULL_TREE)
37429 fn_opts = target_option_default_node;
37431 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
37432 isa_masks = rs6000_isa_flags;
37433 else
37434 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
37436 for (ret = CLONE_MAX - 1; ret != 0; ret--)
37437 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
37438 break;
37441 if (TARGET_DEBUG_TARGET)
37442 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
37443 get_decl_name (fndecl), ret);
37445 return ret;
37448 /* This compares the priority of target features in function DECL1 and DECL2.
37449 It returns positive value if DECL1 is higher priority, negative value if
37450 DECL2 is higher priority and 0 if they are the same. Note, priorities are
37451 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
37453 static int
37454 rs6000_compare_version_priority (tree decl1, tree decl2)
37456 int priority1 = rs6000_clone_priority (decl1);
37457 int priority2 = rs6000_clone_priority (decl2);
37458 int ret = priority1 - priority2;
37460 if (TARGET_DEBUG_TARGET)
37461 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
37462 get_decl_name (decl1), get_decl_name (decl2), ret);
37464 return ret;
37467 /* Make a dispatcher declaration for the multi-versioned function DECL.
37468 Calls to DECL function will be replaced with calls to the dispatcher
37469 by the front-end. Returns the decl of the dispatcher function. */
37471 static tree
37472 rs6000_get_function_versions_dispatcher (void *decl)
37474 tree fn = (tree) decl;
37475 struct cgraph_node *node = NULL;
37476 struct cgraph_node *default_node = NULL;
37477 struct cgraph_function_version_info *node_v = NULL;
37478 struct cgraph_function_version_info *first_v = NULL;
37480 tree dispatch_decl = NULL;
37482 struct cgraph_function_version_info *default_version_info = NULL;
37483 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
37485 if (TARGET_DEBUG_TARGET)
37486 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
37487 get_decl_name (fn));
37489 node = cgraph_node::get (fn);
37490 gcc_assert (node != NULL);
37492 node_v = node->function_version ();
37493 gcc_assert (node_v != NULL);
37495 if (node_v->dispatcher_resolver != NULL)
37496 return node_v->dispatcher_resolver;
37498 /* Find the default version and make it the first node. */
37499 first_v = node_v;
37500 /* Go to the beginning of the chain. */
37501 while (first_v->prev != NULL)
37502 first_v = first_v->prev;
37504 default_version_info = first_v;
37505 while (default_version_info != NULL)
37507 const tree decl2 = default_version_info->this_node->decl;
37508 if (is_function_default_version (decl2))
37509 break;
37510 default_version_info = default_version_info->next;
37513 /* If there is no default node, just return NULL. */
37514 if (default_version_info == NULL)
37515 return NULL;
37517 /* Make default info the first node. */
37518 if (first_v != default_version_info)
37520 default_version_info->prev->next = default_version_info->next;
37521 if (default_version_info->next)
37522 default_version_info->next->prev = default_version_info->prev;
37523 first_v->prev = default_version_info;
37524 default_version_info->next = first_v;
37525 default_version_info->prev = NULL;
37528 default_node = default_version_info->this_node;
37530 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
37531 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37532 "target_clones attribute needs GLIBC (2.23 and newer) that "
37533 "exports hardware capability bits");
37534 #else
37536 if (targetm.has_ifunc_p ())
37538 struct cgraph_function_version_info *it_v = NULL;
37539 struct cgraph_node *dispatcher_node = NULL;
37540 struct cgraph_function_version_info *dispatcher_version_info = NULL;
37542 /* Right now, the dispatching is done via ifunc. */
37543 dispatch_decl = make_dispatcher_decl (default_node->decl);
37545 dispatcher_node = cgraph_node::get_create (dispatch_decl);
37546 gcc_assert (dispatcher_node != NULL);
37547 dispatcher_node->dispatcher_function = 1;
37548 dispatcher_version_info
37549 = dispatcher_node->insert_new_function_version ();
37550 dispatcher_version_info->next = default_version_info;
37551 dispatcher_node->definition = 1;
37553 /* Set the dispatcher for all the versions. */
37554 it_v = default_version_info;
37555 while (it_v != NULL)
37557 it_v->dispatcher_resolver = dispatch_decl;
37558 it_v = it_v->next;
37561 else
37563 error_at (DECL_SOURCE_LOCATION (default_node->decl),
37564 "multiversioning needs ifunc which is not supported "
37565 "on this target");
37567 #endif
37569 return dispatch_decl;
37572 /* Make the resolver function decl to dispatch the versions of a multi-
37573 versioned function, DEFAULT_DECL. Create an empty basic block in the
37574 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
37575 function. */
37577 static tree
37578 make_resolver_func (const tree default_decl,
37579 const tree dispatch_decl,
37580 basic_block *empty_bb)
37582 /* Make the resolver function static. The resolver function returns
37583 void *. */
37584 tree decl_name = clone_function_name (default_decl, "resolver");
37585 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
37586 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
37587 tree decl = build_fn_decl (resolver_name, type);
37588 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
37590 DECL_NAME (decl) = decl_name;
37591 TREE_USED (decl) = 1;
37592 DECL_ARTIFICIAL (decl) = 1;
37593 DECL_IGNORED_P (decl) = 0;
37594 TREE_PUBLIC (decl) = 0;
37595 DECL_UNINLINABLE (decl) = 1;
37597 /* Resolver is not external, body is generated. */
37598 DECL_EXTERNAL (decl) = 0;
37599 DECL_EXTERNAL (dispatch_decl) = 0;
37601 DECL_CONTEXT (decl) = NULL_TREE;
37602 DECL_INITIAL (decl) = make_node (BLOCK);
37603 DECL_STATIC_CONSTRUCTOR (decl) = 0;
37605 /* Build result decl and add to function_decl. */
37606 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
37607 DECL_ARTIFICIAL (t) = 1;
37608 DECL_IGNORED_P (t) = 1;
37609 DECL_RESULT (decl) = t;
37611 gimplify_function_tree (decl);
37612 push_cfun (DECL_STRUCT_FUNCTION (decl));
37613 *empty_bb = init_lowered_empty_function (decl, false,
37614 profile_count::uninitialized ());
37616 cgraph_node::add_new_function (decl, true);
37617 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
37619 pop_cfun ();
37621 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
37622 DECL_ATTRIBUTES (dispatch_decl)
37623 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
37625 cgraph_node::create_same_body_alias (dispatch_decl, decl);
37627 return decl;
37630 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
37631 return a pointer to VERSION_DECL if we are running on a machine that
37632 supports the index CLONE_ISA hardware architecture bits. This function will
37633 be called during version dispatch to decide which function version to
37634 execute. It returns the basic block at the end, to which more conditions
37635 can be added. */
37637 static basic_block
37638 add_condition_to_bb (tree function_decl, tree version_decl,
37639 int clone_isa, basic_block new_bb)
37641 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
37643 gcc_assert (new_bb != NULL);
37644 gimple_seq gseq = bb_seq (new_bb);
37647 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
37648 build_fold_addr_expr (version_decl));
37649 tree result_var = create_tmp_var (ptr_type_node);
37650 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
37651 gimple *return_stmt = gimple_build_return (result_var);
37653 if (clone_isa == CLONE_DEFAULT)
37655 gimple_seq_add_stmt (&gseq, convert_stmt);
37656 gimple_seq_add_stmt (&gseq, return_stmt);
37657 set_bb_seq (new_bb, gseq);
37658 gimple_set_bb (convert_stmt, new_bb);
37659 gimple_set_bb (return_stmt, new_bb);
37660 pop_cfun ();
37661 return new_bb;
37664 tree bool_zero = build_int_cst (bool_int_type_node, 0);
37665 tree cond_var = create_tmp_var (bool_int_type_node);
37666 tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
37667 const char *arg_str = rs6000_clone_map[clone_isa].name;
37668 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
37669 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
37670 gimple_call_set_lhs (call_cond_stmt, cond_var);
37672 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
37673 gimple_set_bb (call_cond_stmt, new_bb);
37674 gimple_seq_add_stmt (&gseq, call_cond_stmt);
37676 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
37677 NULL_TREE, NULL_TREE);
37678 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
37679 gimple_set_bb (if_else_stmt, new_bb);
37680 gimple_seq_add_stmt (&gseq, if_else_stmt);
37682 gimple_seq_add_stmt (&gseq, convert_stmt);
37683 gimple_seq_add_stmt (&gseq, return_stmt);
37684 set_bb_seq (new_bb, gseq);
37686 basic_block bb1 = new_bb;
37687 edge e12 = split_block (bb1, if_else_stmt);
37688 basic_block bb2 = e12->dest;
37689 e12->flags &= ~EDGE_FALLTHRU;
37690 e12->flags |= EDGE_TRUE_VALUE;
37692 edge e23 = split_block (bb2, return_stmt);
37693 gimple_set_bb (convert_stmt, bb2);
37694 gimple_set_bb (return_stmt, bb2);
37696 basic_block bb3 = e23->dest;
37697 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
37699 remove_edge (e23);
37700 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
37702 pop_cfun ();
37703 return bb3;
37706 /* This function generates the dispatch function for multi-versioned functions.
37707 DISPATCH_DECL is the function which will contain the dispatch logic.
37708 FNDECLS are the function choices for dispatch, and is a tree chain.
37709 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
37710 code is generated. */
37712 static int
37713 dispatch_function_versions (tree dispatch_decl,
37714 void *fndecls_p,
37715 basic_block *empty_bb)
37717 int ix;
37718 tree ele;
37719 vec<tree> *fndecls;
37720 tree clones[CLONE_MAX];
37722 if (TARGET_DEBUG_TARGET)
37723 fputs ("dispatch_function_versions, top\n", stderr);
37725 gcc_assert (dispatch_decl != NULL
37726 && fndecls_p != NULL
37727 && empty_bb != NULL);
37729 /* fndecls_p is actually a vector. */
37730 fndecls = static_cast<vec<tree> *> (fndecls_p);
37732 /* At least one more version other than the default. */
37733 gcc_assert (fndecls->length () >= 2);
37735 /* The first version in the vector is the default decl. */
37736 memset ((void *) clones, '\0', sizeof (clones));
37737 clones[CLONE_DEFAULT] = (*fndecls)[0];
37739 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
37740 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
37741 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
37742 recent glibc. If we ever need to call __builtin_cpu_init, we would need
37743 to insert the code here to do the call. */
37745 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
37747 int priority = rs6000_clone_priority (ele);
37748 if (!clones[priority])
37749 clones[priority] = ele;
37752 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
37753 if (clones[ix])
37755 if (TARGET_DEBUG_TARGET)
37756 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
37757 ix, get_decl_name (clones[ix]));
37759 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
37760 *empty_bb);
37763 return 0;
37766 /* Generate the dispatching code body to dispatch multi-versioned function
37767 DECL. The target hook is called to process the "target" attributes and
37768 provide the code to dispatch the right function at run-time. NODE points
37769 to the dispatcher decl whose body will be created. */
37771 static tree
37772 rs6000_generate_version_dispatcher_body (void *node_p)
37774 tree resolver;
37775 basic_block empty_bb;
37776 struct cgraph_node *node = (cgraph_node *) node_p;
37777 struct cgraph_function_version_info *ninfo = node->function_version ();
37779 if (ninfo->dispatcher_resolver)
37780 return ninfo->dispatcher_resolver;
37782 /* node is going to be an alias, so remove the finalized bit. */
37783 node->definition = false;
37785 /* The first version in the chain corresponds to the default version. */
37786 ninfo->dispatcher_resolver = resolver
37787 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
37789 if (TARGET_DEBUG_TARGET)
37790 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
37791 get_decl_name (resolver));
37793 push_cfun (DECL_STRUCT_FUNCTION (resolver));
37794 auto_vec<tree, 2> fn_ver_vec;
37796 for (struct cgraph_function_version_info *vinfo = ninfo->next;
37797 vinfo;
37798 vinfo = vinfo->next)
37800 struct cgraph_node *version = vinfo->this_node;
37801 /* Check for virtual functions here again, as by this time it should
37802 have been determined if this function needs a vtable index or
37803 not. This happens for methods in derived classes that override
37804 virtual methods in base classes but are not explicitly marked as
37805 virtual. */
37806 if (DECL_VINDEX (version->decl))
37807 sorry ("Virtual function multiversioning not supported");
37809 fn_ver_vec.safe_push (version->decl);
37812 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
37813 cgraph_edge::rebuild_edges ();
37814 pop_cfun ();
37815 return resolver;
37819 /* Hook to determine if one function can safely inline another. */
37821 static bool
37822 rs6000_can_inline_p (tree caller, tree callee)
37824 bool ret = false;
37825 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
37826 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
37828 /* If callee has no option attributes, then it is ok to inline. */
37829 if (!callee_tree)
37830 ret = true;
37832 /* If caller has no option attributes, but callee does then it is not ok to
37833 inline. */
37834 else if (!caller_tree)
37835 ret = false;
37837 else
37839 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
37840 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
37842 /* Callee's options should a subset of the caller's, i.e. a vsx function
37843 can inline an altivec function but a non-vsx function can't inline a
37844 vsx function. */
37845 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
37846 == callee_opts->x_rs6000_isa_flags)
37847 ret = true;
37850 if (TARGET_DEBUG_TARGET)
37851 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
37852 get_decl_name (caller), get_decl_name (callee),
37853 (ret ? "can" : "cannot"));
37855 return ret;
37858 /* Allocate a stack temp and fixup the address so it meets the particular
37859 memory requirements (either offetable or REG+REG addressing). */
37862 rs6000_allocate_stack_temp (machine_mode mode,
37863 bool offsettable_p,
37864 bool reg_reg_p)
37866 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
37867 rtx addr = XEXP (stack, 0);
37868 int strict_p = reload_completed;
37870 if (!legitimate_indirect_address_p (addr, strict_p))
37872 if (offsettable_p
37873 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
37874 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37876 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
37877 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
37880 return stack;
37883 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
37884 to such a form to deal with memory reference instructions like STFIWX that
37885 only take reg+reg addressing. */
37888 rs6000_address_for_fpconvert (rtx x)
37890 rtx addr;
37892 gcc_assert (MEM_P (x));
37893 addr = XEXP (x, 0);
37894 if (can_create_pseudo_p ()
37895 && ! legitimate_indirect_address_p (addr, reload_completed)
37896 && ! legitimate_indexed_address_p (addr, reload_completed))
37898 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
37900 rtx reg = XEXP (addr, 0);
37901 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
37902 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
37903 gcc_assert (REG_P (reg));
37904 emit_insn (gen_add3_insn (reg, reg, size_rtx));
37905 addr = reg;
37907 else if (GET_CODE (addr) == PRE_MODIFY)
37909 rtx reg = XEXP (addr, 0);
37910 rtx expr = XEXP (addr, 1);
37911 gcc_assert (REG_P (reg));
37912 gcc_assert (GET_CODE (expr) == PLUS);
37913 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
37914 addr = reg;
37917 x = replace_equiv_address (x, copy_addr_to_reg (addr));
37920 return x;
37923 /* Given a memory reference, if it is not in the form for altivec memory
37924 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
37925 convert to the altivec format. */
37928 rs6000_address_for_altivec (rtx x)
37930 gcc_assert (MEM_P (x));
37931 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
37933 rtx addr = XEXP (x, 0);
37935 if (!legitimate_indexed_address_p (addr, reload_completed)
37936 && !legitimate_indirect_address_p (addr, reload_completed))
37937 addr = copy_to_mode_reg (Pmode, addr);
37939 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
37940 x = change_address (x, GET_MODE (x), addr);
37943 return x;
37946 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
37948 On the RS/6000, all integer constants are acceptable, most won't be valid
37949 for particular insns, though. Only easy FP constants are acceptable. */
37951 static bool
37952 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
37954 if (TARGET_ELF && tls_referenced_p (x))
37955 return false;
37957 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
37958 || GET_MODE (x) == VOIDmode
37959 || (TARGET_POWERPC64 && mode == DImode)
37960 || easy_fp_constant (x, mode)
37961 || easy_vector_constant (x, mode));
37965 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
37967 static bool
37968 chain_already_loaded (rtx_insn *last)
37970 for (; last != NULL; last = PREV_INSN (last))
37972 if (NONJUMP_INSN_P (last))
37974 rtx patt = PATTERN (last);
37976 if (GET_CODE (patt) == SET)
37978 rtx lhs = XEXP (patt, 0);
37980 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
37981 return true;
37985 return false;
37988 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
37990 void
37991 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
37993 const bool direct_call_p
37994 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
37995 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
37996 rtx toc_load = NULL_RTX;
37997 rtx toc_restore = NULL_RTX;
37998 rtx func_addr;
37999 rtx abi_reg = NULL_RTX;
38000 rtx call[4];
38001 int n_call;
38002 rtx insn;
38004 /* Handle longcall attributes. */
38005 if (INTVAL (cookie) & CALL_LONG)
38006 func_desc = rs6000_longcall_ref (func_desc);
38008 /* Handle indirect calls. */
38009 if (GET_CODE (func_desc) != SYMBOL_REF
38010 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
38012 /* Save the TOC into its reserved slot before the call,
38013 and prepare to restore it after the call. */
38014 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
38015 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
38016 rtx stack_toc_mem = gen_frame_mem (Pmode,
38017 gen_rtx_PLUS (Pmode, stack_ptr,
38018 stack_toc_offset));
38019 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
38020 gen_rtvec (1, stack_toc_offset),
38021 UNSPEC_TOCSLOT);
38022 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
38024 /* Can we optimize saving the TOC in the prologue or
38025 do we need to do it at every call? */
38026 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
38027 cfun->machine->save_toc_in_prologue = true;
38028 else
38030 MEM_VOLATILE_P (stack_toc_mem) = 1;
38031 emit_move_insn (stack_toc_mem, toc_reg);
38034 if (DEFAULT_ABI == ABI_ELFv2)
38036 /* A function pointer in the ELFv2 ABI is just a plain address, but
38037 the ABI requires it to be loaded into r12 before the call. */
38038 func_addr = gen_rtx_REG (Pmode, 12);
38039 emit_move_insn (func_addr, func_desc);
38040 abi_reg = func_addr;
38042 else
38044 /* A function pointer under AIX is a pointer to a data area whose
38045 first word contains the actual address of the function, whose
38046 second word contains a pointer to its TOC, and whose third word
38047 contains a value to place in the static chain register (r11).
38048 Note that if we load the static chain, our "trampoline" need
38049 not have any executable code. */
38051 /* Load up address of the actual function. */
38052 func_desc = force_reg (Pmode, func_desc);
38053 func_addr = gen_reg_rtx (Pmode);
38054 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
38056 /* Prepare to load the TOC of the called function. Note that the
38057 TOC load must happen immediately before the actual call so
38058 that unwinding the TOC registers works correctly. See the
38059 comment in frob_update_context. */
38060 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
38061 rtx func_toc_mem = gen_rtx_MEM (Pmode,
38062 gen_rtx_PLUS (Pmode, func_desc,
38063 func_toc_offset));
38064 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
38066 /* If we have a static chain, load it up. But, if the call was
38067 originally direct, the 3rd word has not been written since no
38068 trampoline has been built, so we ought not to load it, lest we
38069 override a static chain value. */
38070 if (!direct_call_p
38071 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
38072 && !chain_already_loaded (get_current_sequence ()->next->last))
38074 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
38075 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
38076 rtx func_sc_mem = gen_rtx_MEM (Pmode,
38077 gen_rtx_PLUS (Pmode, func_desc,
38078 func_sc_offset));
38079 emit_move_insn (sc_reg, func_sc_mem);
38080 abi_reg = sc_reg;
38084 else
38086 /* Direct calls use the TOC: for local calls, the callee will
38087 assume the TOC register is set; for non-local calls, the
38088 PLT stub needs the TOC register. */
38089 abi_reg = toc_reg;
38090 func_addr = func_desc;
38093 /* Create the call. */
38094 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
38095 if (value != NULL_RTX)
38096 call[0] = gen_rtx_SET (value, call[0]);
38097 n_call = 1;
38099 if (toc_load)
38100 call[n_call++] = toc_load;
38101 if (toc_restore)
38102 call[n_call++] = toc_restore;
38104 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
38106 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
38107 insn = emit_call_insn (insn);
38109 /* Mention all registers defined by the ABI to hold information
38110 as uses in CALL_INSN_FUNCTION_USAGE. */
38111 if (abi_reg)
38112 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
38115 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
38117 void
38118 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
38120 rtx call[2];
38121 rtx insn;
38123 gcc_assert (INTVAL (cookie) == 0);
38125 /* Create the call. */
38126 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
38127 if (value != NULL_RTX)
38128 call[0] = gen_rtx_SET (value, call[0]);
38130 call[1] = simple_return_rtx;
38132 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
38133 insn = emit_call_insn (insn);
38135 /* Note use of the TOC register. */
38136 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
38139 /* Return whether we need to always update the saved TOC pointer when we update
38140 the stack pointer. */
38142 static bool
38143 rs6000_save_toc_in_prologue_p (void)
38145 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
38148 #ifdef HAVE_GAS_HIDDEN
38149 # define USE_HIDDEN_LINKONCE 1
38150 #else
38151 # define USE_HIDDEN_LINKONCE 0
38152 #endif
38154 /* Fills in the label name that should be used for a 476 link stack thunk. */
38156 void
38157 get_ppc476_thunk_name (char name[32])
38159 gcc_assert (TARGET_LINK_STACK);
38161 if (USE_HIDDEN_LINKONCE)
38162 sprintf (name, "__ppc476.get_thunk");
38163 else
38164 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
38167 /* This function emits the simple thunk routine that is used to preserve
38168 the link stack on the 476 cpu. */
38170 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
38171 static void
38172 rs6000_code_end (void)
38174 char name[32];
38175 tree decl;
38177 if (!TARGET_LINK_STACK)
38178 return;
38180 get_ppc476_thunk_name (name);
38182 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
38183 build_function_type_list (void_type_node, NULL_TREE));
38184 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
38185 NULL_TREE, void_type_node);
38186 TREE_PUBLIC (decl) = 1;
38187 TREE_STATIC (decl) = 1;
38189 #if RS6000_WEAK
38190 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
38192 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
38193 targetm.asm_out.unique_section (decl, 0);
38194 switch_to_section (get_named_section (decl, NULL, 0));
38195 DECL_WEAK (decl) = 1;
38196 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
38197 targetm.asm_out.globalize_label (asm_out_file, name);
38198 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
38199 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
38201 else
38202 #endif
38204 switch_to_section (text_section);
38205 ASM_OUTPUT_LABEL (asm_out_file, name);
38208 DECL_INITIAL (decl) = make_node (BLOCK);
38209 current_function_decl = decl;
38210 allocate_struct_function (decl, false);
38211 init_function_start (decl);
38212 first_function_block_is_cold = false;
38213 /* Make sure unwind info is emitted for the thunk if needed. */
38214 final_start_function (emit_barrier (), asm_out_file, 1);
38216 fputs ("\tblr\n", asm_out_file);
38218 final_end_function ();
38219 init_insn_lengths ();
38220 free_after_compilation (cfun);
38221 set_cfun (NULL);
38222 current_function_decl = NULL;
38225 /* Add r30 to hard reg set if the prologue sets it up and it is not
38226 pic_offset_table_rtx. */
38228 static void
38229 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
38231 if (!TARGET_SINGLE_PIC_BASE
38232 && TARGET_TOC
38233 && TARGET_MINIMAL_TOC
38234 && !constant_pool_empty_p ())
38235 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
38236 if (cfun->machine->split_stack_argp_used)
38237 add_to_hard_reg_set (&set->set, Pmode, 12);
38239 /* Make sure the hard reg set doesn't include r2, which was possibly added
38240 via PIC_OFFSET_TABLE_REGNUM. */
38241 if (TARGET_TOC)
38242 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
38246 /* Helper function for rs6000_split_logical to emit a logical instruction after
38247 spliting the operation to single GPR registers.
38249 DEST is the destination register.
38250 OP1 and OP2 are the input source registers.
38251 CODE is the base operation (AND, IOR, XOR, NOT).
38252 MODE is the machine mode.
38253 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38254 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38255 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38257 static void
38258 rs6000_split_logical_inner (rtx dest,
38259 rtx op1,
38260 rtx op2,
38261 enum rtx_code code,
38262 machine_mode mode,
38263 bool complement_final_p,
38264 bool complement_op1_p,
38265 bool complement_op2_p)
38267 rtx bool_rtx;
38269 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
38270 if (op2 && GET_CODE (op2) == CONST_INT
38271 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
38272 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38274 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
38275 HOST_WIDE_INT value = INTVAL (op2) & mask;
38277 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
38278 if (code == AND)
38280 if (value == 0)
38282 emit_insn (gen_rtx_SET (dest, const0_rtx));
38283 return;
38286 else if (value == mask)
38288 if (!rtx_equal_p (dest, op1))
38289 emit_insn (gen_rtx_SET (dest, op1));
38290 return;
38294 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
38295 into separate ORI/ORIS or XORI/XORIS instrucitons. */
38296 else if (code == IOR || code == XOR)
38298 if (value == 0)
38300 if (!rtx_equal_p (dest, op1))
38301 emit_insn (gen_rtx_SET (dest, op1));
38302 return;
38307 if (code == AND && mode == SImode
38308 && !complement_final_p && !complement_op1_p && !complement_op2_p)
38310 emit_insn (gen_andsi3 (dest, op1, op2));
38311 return;
38314 if (complement_op1_p)
38315 op1 = gen_rtx_NOT (mode, op1);
38317 if (complement_op2_p)
38318 op2 = gen_rtx_NOT (mode, op2);
38320 /* For canonical RTL, if only one arm is inverted it is the first. */
38321 if (!complement_op1_p && complement_op2_p)
38322 std::swap (op1, op2);
38324 bool_rtx = ((code == NOT)
38325 ? gen_rtx_NOT (mode, op1)
38326 : gen_rtx_fmt_ee (code, mode, op1, op2));
38328 if (complement_final_p)
38329 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
38331 emit_insn (gen_rtx_SET (dest, bool_rtx));
38334 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
38335 operations are split immediately during RTL generation to allow for more
38336 optimizations of the AND/IOR/XOR.
38338 OPERANDS is an array containing the destination and two input operands.
38339 CODE is the base operation (AND, IOR, XOR, NOT).
38340 MODE is the machine mode.
38341 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38342 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38343 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
38344 CLOBBER_REG is either NULL or a scratch register of type CC to allow
38345 formation of the AND instructions. */
38347 static void
38348 rs6000_split_logical_di (rtx operands[3],
38349 enum rtx_code code,
38350 bool complement_final_p,
38351 bool complement_op1_p,
38352 bool complement_op2_p)
38354 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
38355 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
38356 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
38357 enum hi_lo { hi = 0, lo = 1 };
38358 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
38359 size_t i;
38361 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
38362 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
38363 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
38364 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
38366 if (code == NOT)
38367 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
38368 else
38370 if (GET_CODE (operands[2]) != CONST_INT)
38372 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
38373 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
38375 else
38377 HOST_WIDE_INT value = INTVAL (operands[2]);
38378 HOST_WIDE_INT value_hi_lo[2];
38380 gcc_assert (!complement_final_p);
38381 gcc_assert (!complement_op1_p);
38382 gcc_assert (!complement_op2_p);
38384 value_hi_lo[hi] = value >> 32;
38385 value_hi_lo[lo] = value & lower_32bits;
38387 for (i = 0; i < 2; i++)
38389 HOST_WIDE_INT sub_value = value_hi_lo[i];
38391 if (sub_value & sign_bit)
38392 sub_value |= upper_32bits;
38394 op2_hi_lo[i] = GEN_INT (sub_value);
38396 /* If this is an AND instruction, check to see if we need to load
38397 the value in a register. */
38398 if (code == AND && sub_value != -1 && sub_value != 0
38399 && !and_operand (op2_hi_lo[i], SImode))
38400 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
38405 for (i = 0; i < 2; i++)
38407 /* Split large IOR/XOR operations. */
38408 if ((code == IOR || code == XOR)
38409 && GET_CODE (op2_hi_lo[i]) == CONST_INT
38410 && !complement_final_p
38411 && !complement_op1_p
38412 && !complement_op2_p
38413 && !logical_const_operand (op2_hi_lo[i], SImode))
38415 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
38416 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
38417 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
38418 rtx tmp = gen_reg_rtx (SImode);
38420 /* Make sure the constant is sign extended. */
38421 if ((hi_16bits & sign_bit) != 0)
38422 hi_16bits |= upper_32bits;
38424 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
38425 code, SImode, false, false, false);
38427 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
38428 code, SImode, false, false, false);
38430 else
38431 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
38432 code, SImode, complement_final_p,
38433 complement_op1_p, complement_op2_p);
38436 return;
38439 /* Split the insns that make up boolean operations operating on multiple GPR
38440 registers. The boolean MD patterns ensure that the inputs either are
38441 exactly the same as the output registers, or there is no overlap.
38443 OPERANDS is an array containing the destination and two input operands.
38444 CODE is the base operation (AND, IOR, XOR, NOT).
38445 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
38446 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
38447 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
38449 void
38450 rs6000_split_logical (rtx operands[3],
38451 enum rtx_code code,
38452 bool complement_final_p,
38453 bool complement_op1_p,
38454 bool complement_op2_p)
38456 machine_mode mode = GET_MODE (operands[0]);
38457 machine_mode sub_mode;
38458 rtx op0, op1, op2;
38459 int sub_size, regno0, regno1, nregs, i;
38461 /* If this is DImode, use the specialized version that can run before
38462 register allocation. */
38463 if (mode == DImode && !TARGET_POWERPC64)
38465 rs6000_split_logical_di (operands, code, complement_final_p,
38466 complement_op1_p, complement_op2_p);
38467 return;
38470 op0 = operands[0];
38471 op1 = operands[1];
38472 op2 = (code == NOT) ? NULL_RTX : operands[2];
38473 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
38474 sub_size = GET_MODE_SIZE (sub_mode);
38475 regno0 = REGNO (op0);
38476 regno1 = REGNO (op1);
38478 gcc_assert (reload_completed);
38479 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38480 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
38482 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
38483 gcc_assert (nregs > 1);
38485 if (op2 && REG_P (op2))
38486 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
38488 for (i = 0; i < nregs; i++)
38490 int offset = i * sub_size;
38491 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
38492 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
38493 rtx sub_op2 = ((code == NOT)
38494 ? NULL_RTX
38495 : simplify_subreg (sub_mode, op2, mode, offset));
38497 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
38498 complement_final_p, complement_op1_p,
38499 complement_op2_p);
38502 return;
38506 /* Return true if the peephole2 can combine a load involving a combination of
38507 an addis instruction and a load with an offset that can be fused together on
38508 a power8. */
38510 bool
38511 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
38512 rtx addis_value, /* addis value. */
38513 rtx target, /* target register that is loaded. */
38514 rtx mem) /* bottom part of the memory addr. */
38516 rtx addr;
38517 rtx base_reg;
38519 /* Validate arguments. */
38520 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38521 return false;
38523 if (!base_reg_operand (target, GET_MODE (target)))
38524 return false;
38526 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38527 return false;
38529 /* Allow sign/zero extension. */
38530 if (GET_CODE (mem) == ZERO_EXTEND
38531 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
38532 mem = XEXP (mem, 0);
38534 if (!MEM_P (mem))
38535 return false;
38537 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
38538 return false;
38540 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38541 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
38542 return false;
38544 /* Validate that the register used to load the high value is either the
38545 register being loaded, or we can safely replace its use.
38547 This function is only called from the peephole2 pass and we assume that
38548 there are 2 instructions in the peephole (addis and load), so we want to
38549 check if the target register was not used in the memory address and the
38550 register to hold the addis result is dead after the peephole. */
38551 if (REGNO (addis_reg) != REGNO (target))
38553 if (reg_mentioned_p (target, mem))
38554 return false;
38556 if (!peep2_reg_dead_p (2, addis_reg))
38557 return false;
38559 /* If the target register being loaded is the stack pointer, we must
38560 avoid loading any other value into it, even temporarily. */
38561 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
38562 return false;
38565 base_reg = XEXP (addr, 0);
38566 return REGNO (addis_reg) == REGNO (base_reg);
38569 /* During the peephole2 pass, adjust and expand the insns for a load fusion
38570 sequence. We adjust the addis register to use the target register. If the
38571 load sign extends, we adjust the code to do the zero extending load, and an
38572 explicit sign extension later since the fusion only covers zero extending
38573 loads.
38575 The operands are:
38576 operands[0] register set with addis (to be replaced with target)
38577 operands[1] value set via addis
38578 operands[2] target register being loaded
38579 operands[3] D-form memory reference using operands[0]. */
38581 void
38582 expand_fusion_gpr_load (rtx *operands)
38584 rtx addis_value = operands[1];
38585 rtx target = operands[2];
38586 rtx orig_mem = operands[3];
38587 rtx new_addr, new_mem, orig_addr, offset;
38588 enum rtx_code plus_or_lo_sum;
38589 machine_mode target_mode = GET_MODE (target);
38590 machine_mode extend_mode = target_mode;
38591 machine_mode ptr_mode = Pmode;
38592 enum rtx_code extend = UNKNOWN;
38594 if (GET_CODE (orig_mem) == ZERO_EXTEND
38595 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
38597 extend = GET_CODE (orig_mem);
38598 orig_mem = XEXP (orig_mem, 0);
38599 target_mode = GET_MODE (orig_mem);
38602 gcc_assert (MEM_P (orig_mem));
38604 orig_addr = XEXP (orig_mem, 0);
38605 plus_or_lo_sum = GET_CODE (orig_addr);
38606 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
38608 offset = XEXP (orig_addr, 1);
38609 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
38610 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
38612 if (extend != UNKNOWN)
38613 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
38615 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
38616 UNSPEC_FUSION_GPR);
38617 emit_insn (gen_rtx_SET (target, new_mem));
38619 if (extend == SIGN_EXTEND)
38621 int sub_off = ((BYTES_BIG_ENDIAN)
38622 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
38623 : 0);
38624 rtx sign_reg
38625 = simplify_subreg (target_mode, target, extend_mode, sub_off);
38627 emit_insn (gen_rtx_SET (target,
38628 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
38631 return;
38634 /* Emit the addis instruction that will be part of a fused instruction
38635 sequence. */
38637 void
38638 emit_fusion_addis (rtx target, rtx addis_value)
38640 rtx fuse_ops[10];
38641 const char *addis_str = NULL;
38643 /* Emit the addis instruction. */
38644 fuse_ops[0] = target;
38645 if (satisfies_constraint_L (addis_value))
38647 fuse_ops[1] = addis_value;
38648 addis_str = "lis %0,%v1";
38651 else if (GET_CODE (addis_value) == PLUS)
38653 rtx op0 = XEXP (addis_value, 0);
38654 rtx op1 = XEXP (addis_value, 1);
38656 if (REG_P (op0) && CONST_INT_P (op1)
38657 && satisfies_constraint_L (op1))
38659 fuse_ops[1] = op0;
38660 fuse_ops[2] = op1;
38661 addis_str = "addis %0,%1,%v2";
38665 else if (GET_CODE (addis_value) == HIGH)
38667 rtx value = XEXP (addis_value, 0);
38668 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
38670 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
38671 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
38672 if (TARGET_ELF)
38673 addis_str = "addis %0,%2,%1@toc@ha";
38675 else if (TARGET_XCOFF)
38676 addis_str = "addis %0,%1@u(%2)";
38678 else
38679 gcc_unreachable ();
38682 else if (GET_CODE (value) == PLUS)
38684 rtx op0 = XEXP (value, 0);
38685 rtx op1 = XEXP (value, 1);
38687 if (GET_CODE (op0) == UNSPEC
38688 && XINT (op0, 1) == UNSPEC_TOCREL
38689 && CONST_INT_P (op1))
38691 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
38692 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
38693 fuse_ops[3] = op1;
38694 if (TARGET_ELF)
38695 addis_str = "addis %0,%2,%1+%3@toc@ha";
38697 else if (TARGET_XCOFF)
38698 addis_str = "addis %0,%1+%3@u(%2)";
38700 else
38701 gcc_unreachable ();
38705 else if (satisfies_constraint_L (value))
38707 fuse_ops[1] = value;
38708 addis_str = "lis %0,%v1";
38711 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
38713 fuse_ops[1] = value;
38714 addis_str = "lis %0,%1@ha";
38718 if (!addis_str)
38719 fatal_insn ("Could not generate addis value for fusion", addis_value);
38721 output_asm_insn (addis_str, fuse_ops);
38724 /* Emit a D-form load or store instruction that is the second instruction
38725 of a fusion sequence. */
38727 void
38728 emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset,
38729 const char *insn_str)
38731 rtx fuse_ops[10];
38732 char insn_template[80];
38734 fuse_ops[0] = load_store_reg;
38735 fuse_ops[1] = addis_reg;
38737 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
38739 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
38740 fuse_ops[2] = offset;
38741 output_asm_insn (insn_template, fuse_ops);
38744 else if (GET_CODE (offset) == UNSPEC
38745 && XINT (offset, 1) == UNSPEC_TOCREL)
38747 if (TARGET_ELF)
38748 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
38750 else if (TARGET_XCOFF)
38751 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38753 else
38754 gcc_unreachable ();
38756 fuse_ops[2] = XVECEXP (offset, 0, 0);
38757 output_asm_insn (insn_template, fuse_ops);
38760 else if (GET_CODE (offset) == PLUS
38761 && GET_CODE (XEXP (offset, 0)) == UNSPEC
38762 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
38763 && CONST_INT_P (XEXP (offset, 1)))
38765 rtx tocrel_unspec = XEXP (offset, 0);
38766 if (TARGET_ELF)
38767 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
38769 else if (TARGET_XCOFF)
38770 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
38772 else
38773 gcc_unreachable ();
38775 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
38776 fuse_ops[3] = XEXP (offset, 1);
38777 output_asm_insn (insn_template, fuse_ops);
38780 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
38782 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
38784 fuse_ops[2] = offset;
38785 output_asm_insn (insn_template, fuse_ops);
38788 else
38789 fatal_insn ("Unable to generate load/store offset for fusion", offset);
38791 return;
38794 /* Wrap a TOC address that can be fused to indicate that special fusion
38795 processing is needed. */
38798 fusion_wrap_memory_address (rtx old_mem)
38800 rtx old_addr = XEXP (old_mem, 0);
38801 rtvec v = gen_rtvec (1, old_addr);
38802 rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS);
38803 return replace_equiv_address_nv (old_mem, new_addr, false);
38806 /* Given an address, convert it into the addis and load offset parts. Addresses
38807 created during the peephole2 process look like:
38808 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
38809 (unspec [(...)] UNSPEC_TOCREL))
38811 Addresses created via toc fusion look like:
38812 (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */
38814 static void
38815 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
38817 rtx hi, lo;
38819 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS)
38821 lo = XVECEXP (addr, 0, 0);
38822 hi = gen_rtx_HIGH (Pmode, lo);
38824 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
38826 hi = XEXP (addr, 0);
38827 lo = XEXP (addr, 1);
38829 else
38830 gcc_unreachable ();
38832 *p_hi = hi;
38833 *p_lo = lo;
38836 /* Return a string to fuse an addis instruction with a gpr load to the same
38837 register that we loaded up the addis instruction. The address that is used
38838 is the logical address that was formed during peephole2:
38839 (lo_sum (high) (low-part))
38841 Or the address is the TOC address that is wrapped before register allocation:
38842 (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS)
38844 The code is complicated, so we call output_asm_insn directly, and just
38845 return "". */
38847 const char *
38848 emit_fusion_gpr_load (rtx target, rtx mem)
38850 rtx addis_value;
38851 rtx addr;
38852 rtx load_offset;
38853 const char *load_str = NULL;
38854 machine_mode mode;
38856 if (GET_CODE (mem) == ZERO_EXTEND)
38857 mem = XEXP (mem, 0);
38859 gcc_assert (REG_P (target) && MEM_P (mem));
38861 addr = XEXP (mem, 0);
38862 fusion_split_address (addr, &addis_value, &load_offset);
38864 /* Now emit the load instruction to the same register. */
38865 mode = GET_MODE (mem);
38866 switch (mode)
38868 case E_QImode:
38869 load_str = "lbz";
38870 break;
38872 case E_HImode:
38873 load_str = "lhz";
38874 break;
38876 case E_SImode:
38877 case E_SFmode:
38878 load_str = "lwz";
38879 break;
38881 case E_DImode:
38882 case E_DFmode:
38883 gcc_assert (TARGET_POWERPC64);
38884 load_str = "ld";
38885 break;
38887 default:
38888 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
38891 /* Emit the addis instruction. */
38892 emit_fusion_addis (target, addis_value);
38894 /* Emit the D-form load instruction. */
38895 emit_fusion_load_store (target, target, load_offset, load_str);
38897 return "";
38901 /* Return true if the peephole2 can combine a load/store involving a
38902 combination of an addis instruction and the memory operation. This was
38903 added to the ISA 3.0 (power9) hardware. */
38905 bool
38906 fusion_p9_p (rtx addis_reg, /* register set via addis. */
38907 rtx addis_value, /* addis value. */
38908 rtx dest, /* destination (memory or register). */
38909 rtx src) /* source (register or memory). */
38911 rtx addr, mem, offset;
38912 machine_mode mode = GET_MODE (src);
38914 /* Validate arguments. */
38915 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
38916 return false;
38918 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
38919 return false;
38921 /* Ignore extend operations that are part of the load. */
38922 if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND)
38923 src = XEXP (src, 0);
38925 /* Test for memory<-register or register<-memory. */
38926 if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode))
38928 if (!MEM_P (dest))
38929 return false;
38931 mem = dest;
38934 else if (MEM_P (src))
38936 if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode))
38937 return false;
38939 mem = src;
38942 else
38943 return false;
38945 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
38946 if (GET_CODE (addr) == PLUS)
38948 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38949 return false;
38951 return satisfies_constraint_I (XEXP (addr, 1));
38954 else if (GET_CODE (addr) == LO_SUM)
38956 if (!rtx_equal_p (addis_reg, XEXP (addr, 0)))
38957 return false;
38959 offset = XEXP (addr, 1);
38960 if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64))
38961 return small_toc_ref (offset, GET_MODE (offset));
38963 else if (TARGET_ELF && !TARGET_POWERPC64)
38964 return CONSTANT_P (offset);
38967 return false;
38970 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
38971 load sequence.
38973 The operands are:
38974 operands[0] register set with addis
38975 operands[1] value set via addis
38976 operands[2] target register being loaded
38977 operands[3] D-form memory reference using operands[0].
38979 This is similar to the fusion introduced with power8, except it scales to
38980 both loads/stores and does not require the result register to be the same as
38981 the base register. At the moment, we only do this if register set with addis
38982 is dead. */
38984 void
38985 expand_fusion_p9_load (rtx *operands)
38987 rtx tmp_reg = operands[0];
38988 rtx addis_value = operands[1];
38989 rtx target = operands[2];
38990 rtx orig_mem = operands[3];
38991 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn;
38992 enum rtx_code plus_or_lo_sum;
38993 machine_mode target_mode = GET_MODE (target);
38994 machine_mode extend_mode = target_mode;
38995 machine_mode ptr_mode = Pmode;
38996 enum rtx_code extend = UNKNOWN;
38998 if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND)
39000 extend = GET_CODE (orig_mem);
39001 orig_mem = XEXP (orig_mem, 0);
39002 target_mode = GET_MODE (orig_mem);
39005 gcc_assert (MEM_P (orig_mem));
39007 orig_addr = XEXP (orig_mem, 0);
39008 plus_or_lo_sum = GET_CODE (orig_addr);
39009 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39011 offset = XEXP (orig_addr, 1);
39012 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39013 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39015 if (extend != UNKNOWN)
39016 new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem);
39018 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
39019 UNSPEC_FUSION_P9);
39021 set = gen_rtx_SET (target, new_mem);
39022 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39023 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39024 emit_insn (insn);
39026 return;
39029 /* During the peephole2 pass, adjust and expand the insns for an extended fusion
39030 store sequence.
39032 The operands are:
39033 operands[0] register set with addis
39034 operands[1] value set via addis
39035 operands[2] target D-form memory being stored to
39036 operands[3] register being stored
39038 This is similar to the fusion introduced with power8, except it scales to
39039 both loads/stores and does not require the result register to be the same as
39040 the base register. At the moment, we only do this if register set with addis
39041 is dead. */
39043 void
39044 expand_fusion_p9_store (rtx *operands)
39046 rtx tmp_reg = operands[0];
39047 rtx addis_value = operands[1];
39048 rtx orig_mem = operands[2];
39049 rtx src = operands[3];
39050 rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src;
39051 enum rtx_code plus_or_lo_sum;
39052 machine_mode target_mode = GET_MODE (orig_mem);
39053 machine_mode ptr_mode = Pmode;
39055 gcc_assert (MEM_P (orig_mem));
39057 orig_addr = XEXP (orig_mem, 0);
39058 plus_or_lo_sum = GET_CODE (orig_addr);
39059 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
39061 offset = XEXP (orig_addr, 1);
39062 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
39063 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
39065 new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src),
39066 UNSPEC_FUSION_P9);
39068 set = gen_rtx_SET (new_mem, new_src);
39069 clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg);
39070 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber));
39071 emit_insn (insn);
39073 return;
39076 /* Return a string to fuse an addis instruction with a load using extended
39077 fusion. The address that is used is the logical address that was formed
39078 during peephole2: (lo_sum (high) (low-part))
39080 The code is complicated, so we call output_asm_insn directly, and just
39081 return "". */
39083 const char *
39084 emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg)
39086 machine_mode mode = GET_MODE (reg);
39087 rtx hi;
39088 rtx lo;
39089 rtx addr;
39090 const char *load_string;
39091 int r;
39093 if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND)
39095 mem = XEXP (mem, 0);
39096 mode = GET_MODE (mem);
39099 if (GET_CODE (reg) == SUBREG)
39101 gcc_assert (SUBREG_BYTE (reg) == 0);
39102 reg = SUBREG_REG (reg);
39105 if (!REG_P (reg))
39106 fatal_insn ("emit_fusion_p9_load, bad reg #1", reg);
39108 r = REGNO (reg);
39109 if (FP_REGNO_P (r))
39111 if (mode == SFmode)
39112 load_string = "lfs";
39113 else if (mode == DFmode || mode == DImode)
39114 load_string = "lfd";
39115 else
39116 gcc_unreachable ();
39118 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_VECTOR)
39120 if (mode == SFmode)
39121 load_string = "lxssp";
39122 else if (mode == DFmode || mode == DImode)
39123 load_string = "lxsd";
39124 else
39125 gcc_unreachable ();
39127 else if (INT_REGNO_P (r))
39129 switch (mode)
39131 case E_QImode:
39132 load_string = "lbz";
39133 break;
39134 case E_HImode:
39135 load_string = "lhz";
39136 break;
39137 case E_SImode:
39138 case E_SFmode:
39139 load_string = "lwz";
39140 break;
39141 case E_DImode:
39142 case E_DFmode:
39143 if (!TARGET_POWERPC64)
39144 gcc_unreachable ();
39145 load_string = "ld";
39146 break;
39147 default:
39148 gcc_unreachable ();
39151 else
39152 fatal_insn ("emit_fusion_p9_load, bad reg #2", reg);
39154 if (!MEM_P (mem))
39155 fatal_insn ("emit_fusion_p9_load not MEM", mem);
39157 addr = XEXP (mem, 0);
39158 fusion_split_address (addr, &hi, &lo);
39160 /* Emit the addis instruction. */
39161 emit_fusion_addis (tmp_reg, hi);
39163 /* Emit the D-form load instruction. */
39164 emit_fusion_load_store (reg, tmp_reg, lo, load_string);
39166 return "";
39169 /* Return a string to fuse an addis instruction with a store using extended
39170 fusion. The address that is used is the logical address that was formed
39171 during peephole2: (lo_sum (high) (low-part))
39173 The code is complicated, so we call output_asm_insn directly, and just
39174 return "". */
39176 const char *
39177 emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg)
39179 machine_mode mode = GET_MODE (reg);
39180 rtx hi;
39181 rtx lo;
39182 rtx addr;
39183 const char *store_string;
39184 int r;
39186 if (GET_CODE (reg) == SUBREG)
39188 gcc_assert (SUBREG_BYTE (reg) == 0);
39189 reg = SUBREG_REG (reg);
39192 if (!REG_P (reg))
39193 fatal_insn ("emit_fusion_p9_store, bad reg #1", reg);
39195 r = REGNO (reg);
39196 if (FP_REGNO_P (r))
39198 if (mode == SFmode)
39199 store_string = "stfs";
39200 else if (mode == DFmode)
39201 store_string = "stfd";
39202 else
39203 gcc_unreachable ();
39205 else if (ALTIVEC_REGNO_P (r) && TARGET_P9_VECTOR)
39207 if (mode == SFmode)
39208 store_string = "stxssp";
39209 else if (mode == DFmode || mode == DImode)
39210 store_string = "stxsd";
39211 else
39212 gcc_unreachable ();
39214 else if (INT_REGNO_P (r))
39216 switch (mode)
39218 case E_QImode:
39219 store_string = "stb";
39220 break;
39221 case E_HImode:
39222 store_string = "sth";
39223 break;
39224 case E_SImode:
39225 case E_SFmode:
39226 store_string = "stw";
39227 break;
39228 case E_DImode:
39229 case E_DFmode:
39230 if (!TARGET_POWERPC64)
39231 gcc_unreachable ();
39232 store_string = "std";
39233 break;
39234 default:
39235 gcc_unreachable ();
39238 else
39239 fatal_insn ("emit_fusion_p9_store, bad reg #2", reg);
39241 if (!MEM_P (mem))
39242 fatal_insn ("emit_fusion_p9_store not MEM", mem);
39244 addr = XEXP (mem, 0);
39245 fusion_split_address (addr, &hi, &lo);
39247 /* Emit the addis instruction. */
39248 emit_fusion_addis (tmp_reg, hi);
39250 /* Emit the D-form load instruction. */
39251 emit_fusion_load_store (reg, tmp_reg, lo, store_string);
39253 return "";
39256 #ifdef RS6000_GLIBC_ATOMIC_FENV
39257 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
39258 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
39259 #endif
39261 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
39263 static void
39264 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
39266 if (!TARGET_HARD_FLOAT)
39268 #ifdef RS6000_GLIBC_ATOMIC_FENV
39269 if (atomic_hold_decl == NULL_TREE)
39271 atomic_hold_decl
39272 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
39273 get_identifier ("__atomic_feholdexcept"),
39274 build_function_type_list (void_type_node,
39275 double_ptr_type_node,
39276 NULL_TREE));
39277 TREE_PUBLIC (atomic_hold_decl) = 1;
39278 DECL_EXTERNAL (atomic_hold_decl) = 1;
39281 if (atomic_clear_decl == NULL_TREE)
39283 atomic_clear_decl
39284 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
39285 get_identifier ("__atomic_feclearexcept"),
39286 build_function_type_list (void_type_node,
39287 NULL_TREE));
39288 TREE_PUBLIC (atomic_clear_decl) = 1;
39289 DECL_EXTERNAL (atomic_clear_decl) = 1;
39292 tree const_double = build_qualified_type (double_type_node,
39293 TYPE_QUAL_CONST);
39294 tree const_double_ptr = build_pointer_type (const_double);
39295 if (atomic_update_decl == NULL_TREE)
39297 atomic_update_decl
39298 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
39299 get_identifier ("__atomic_feupdateenv"),
39300 build_function_type_list (void_type_node,
39301 const_double_ptr,
39302 NULL_TREE));
39303 TREE_PUBLIC (atomic_update_decl) = 1;
39304 DECL_EXTERNAL (atomic_update_decl) = 1;
39307 tree fenv_var = create_tmp_var_raw (double_type_node);
39308 TREE_ADDRESSABLE (fenv_var) = 1;
39309 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
39311 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
39312 *clear = build_call_expr (atomic_clear_decl, 0);
39313 *update = build_call_expr (atomic_update_decl, 1,
39314 fold_convert (const_double_ptr, fenv_addr));
39315 #endif
39316 return;
39319 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
39320 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
39321 tree call_mffs = build_call_expr (mffs, 0);
39323 /* Generates the equivalent of feholdexcept (&fenv_var)
39325 *fenv_var = __builtin_mffs ();
39326 double fenv_hold;
39327 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
39328 __builtin_mtfsf (0xff, fenv_hold); */
39330 /* Mask to clear everything except for the rounding modes and non-IEEE
39331 arithmetic flag. */
39332 const unsigned HOST_WIDE_INT hold_exception_mask =
39333 HOST_WIDE_INT_C (0xffffffff00000007);
39335 tree fenv_var = create_tmp_var_raw (double_type_node);
39337 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
39339 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
39340 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
39341 build_int_cst (uint64_type_node,
39342 hold_exception_mask));
39344 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39345 fenv_llu_and);
39347 tree hold_mtfsf = build_call_expr (mtfsf, 2,
39348 build_int_cst (unsigned_type_node, 0xff),
39349 fenv_hold_mtfsf);
39351 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
39353 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
39355 double fenv_clear = __builtin_mffs ();
39356 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
39357 __builtin_mtfsf (0xff, fenv_clear); */
39359 /* Mask to clear everything except for the rounding modes and non-IEEE
39360 arithmetic flag. */
39361 const unsigned HOST_WIDE_INT clear_exception_mask =
39362 HOST_WIDE_INT_C (0xffffffff00000000);
39364 tree fenv_clear = create_tmp_var_raw (double_type_node);
39366 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
39368 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
39369 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
39370 fenv_clean_llu,
39371 build_int_cst (uint64_type_node,
39372 clear_exception_mask));
39374 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39375 fenv_clear_llu_and);
39377 tree clear_mtfsf = build_call_expr (mtfsf, 2,
39378 build_int_cst (unsigned_type_node, 0xff),
39379 fenv_clear_mtfsf);
39381 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
39383 /* Generates the equivalent of feupdateenv (&fenv_var)
39385 double old_fenv = __builtin_mffs ();
39386 double fenv_update;
39387 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
39388 (*(uint64_t*)fenv_var 0x1ff80fff);
39389 __builtin_mtfsf (0xff, fenv_update); */
39391 const unsigned HOST_WIDE_INT update_exception_mask =
39392 HOST_WIDE_INT_C (0xffffffff1fffff00);
39393 const unsigned HOST_WIDE_INT new_exception_mask =
39394 HOST_WIDE_INT_C (0x1ff80fff);
39396 tree old_fenv = create_tmp_var_raw (double_type_node);
39397 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
39399 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
39400 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
39401 build_int_cst (uint64_type_node,
39402 update_exception_mask));
39404 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
39405 build_int_cst (uint64_type_node,
39406 new_exception_mask));
39408 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
39409 old_llu_and, new_llu_and);
39411 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
39412 new_llu_mask);
39414 tree update_mtfsf = build_call_expr (mtfsf, 2,
39415 build_int_cst (unsigned_type_node, 0xff),
39416 fenv_update_mtfsf);
39418 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
39421 void
39422 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
39424 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39426 rtx_tmp0 = gen_reg_rtx (V2DImode);
39427 rtx_tmp1 = gen_reg_rtx (V2DImode);
39429 /* The destination of the vmrgew instruction layout is:
39430 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
39431 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
39432 vmrgew instruction will be correct. */
39433 if (VECTOR_ELT_ORDER_BIG)
39435 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
39436 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
39438 else
39440 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
39441 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
39444 rtx_tmp2 = gen_reg_rtx (V4SFmode);
39445 rtx_tmp3 = gen_reg_rtx (V4SFmode);
39447 if (signed_convert)
39449 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
39450 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
39452 else
39454 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
39455 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
39458 if (VECTOR_ELT_ORDER_BIG)
39459 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
39460 else
39461 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
39464 void
39465 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
39466 rtx src2)
39468 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
39470 rtx_tmp0 = gen_reg_rtx (V2DFmode);
39471 rtx_tmp1 = gen_reg_rtx (V2DFmode);
39473 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
39474 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
39476 rtx_tmp2 = gen_reg_rtx (V4SImode);
39477 rtx_tmp3 = gen_reg_rtx (V4SImode);
39479 if (signed_convert)
39481 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
39482 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
39484 else
39486 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
39487 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
39490 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
39493 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
39495 static bool
39496 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
39497 optimization_type opt_type)
39499 switch (op)
39501 case rsqrt_optab:
39502 return (opt_type == OPTIMIZE_FOR_SPEED
39503 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
39505 default:
39506 return true;
39510 /* Implement TARGET_CONSTANT_ALIGNMENT. */
39512 static HOST_WIDE_INT
39513 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
39515 if (TREE_CODE (exp) == STRING_CST
39516 && (STRICT_ALIGNMENT || !optimize_size))
39517 return MAX (align, BITS_PER_WORD);
39518 return align;
39521 /* Implement TARGET_STARTING_FRAME_OFFSET. */
39523 static HOST_WIDE_INT
39524 rs6000_starting_frame_offset (void)
39526 if (FRAME_GROWS_DOWNWARD)
39527 return 0;
39528 return RS6000_STARTING_FRAME_OFFSET;
39531 struct gcc_target targetm = TARGET_INITIALIZER;
39533 #include "gt-rs6000.h"